Jaro similarity

From Rosetta Code
Revision as of 23:32, 12 November 2016 by rosettacode>Jolkdarr (→‎{{header|Haxe}}: Anonymous structure.)
Task
Jaro similarity
You are encouraged to solve this task according to the task description, using any language you may know.

The Jaro distance is a measure of similarity between two strings.

The higher the Jaro distance for two strings is, the more similar the strings are.

The score is normalized such that   0   equates to no similarity and   1   is an exact match.


Definition

The Jaro distance     of two given strings     and     is

Where:

  •   is the number of matching characters;
  •   is half the number of transpositions.


Two characters from     and     respectively, are considered matching only if they are the same and not farther than   .

Each character of     is compared with all its matching characters in   .

The number of matching (but different sequence order) characters divided by 2 defines the number of transpositions.


Example

Given the strings     DWAYNE   and     DUANE   we find:


We find a Jaro score of:


Task

Implement the Jaro-distance algorithm and show the distances for each of the following pairs:

  • ("MARTHA", "MARHTA")
  • ("DIXON", "DICKSONX")
  • ("JELLYFISH", "SMELLYFISH")


See also



C

<lang C>#include <stdlib.h>

  1. include <string.h>
  2. include <ctype.h>
  3. include <stdio.h>
  1. define TRUE 1
  2. define FALSE 0
  1. define max(a, b) ((a) > (b) ? (a) : (b))
  2. define min(a, b) ((a) < (b) ? (a) : (b))

double jaro(const char *str1, const char *str2) {

   // length of the strings
   int str1_len = strlen(str1);
   int str2_len = strlen(str2);
   // if both strings are empty return 1
   // if only one of the strings is empty return 0
   if (str1_len == 0) return str2_len == 0 ? 1.0 : 0.0;
   // max distance between two chars to be considered matching
   // floor() is ommitted due to integer division rules
   int match_distance = (int) max(str1_len, str2_len)/2 - 1;
   // arrays of bools that signify if that char in the matching string has a match
   int *str1_matches = calloc(str1_len, sizeof(int));
   int *str2_matches = calloc(str2_len, sizeof(int));
   // number of matches and transpositions
   double matches = 0.0;
   double transpositions = 0.0;
   // find the matches
   for (int i = 0; i < str1_len; i++) {
       // start and end take into account the match distance
       int start = max(0, i - match_distance);
       int end = min(i + match_distance + 1, str2_len);
       for (int k = start; k < end; k++) {
           // if str2 already has a match continue
           if (str2_matches[k]) continue;
           // if str1 and str2 are not
           if (str1[i] != str2[k]) continue;
           // otherwise assume there is a match
           str1_matches[i] = TRUE;
           str2_matches[k] = TRUE;
           matches++;
           break;
       }
   }
   // if there are no matches return 0
   if (matches == 0) {
       free(str1_matches);
       free(str2_matches);
       return 0.0;
   }
   // count transpositions
   int k = 0;
   for (int i = 0; i < str1_len; i++) {
       // if there are no matches in str1 continue
       if (!str1_matches[i]) continue;
       // while there is no match in str2 increment k
       while (!str2_matches[k]) k++;
       // increment transpositions
       if (str1[i] != str2[k]) transpositions++;
       k++;
   }
   // divide the number of transpositions by two as per the algorithm specs
   // this division is valid because the counted transpositions include both
   // instances of the transposed characters.
   transpositions /= 2.0;
   // free the allocated memory
   free(str1_matches);
   free(str2_matches);
   // return the Jaro distance
   return ((matches / str1_len) +
       (matches / str2_len) +
       ((matches - transpositions) / matches)) / 3.0;

}

int main() {

   printf("%f\n", jaro("MARTHA",    "MARHTA"));
   printf("%f\n", jaro("DIXON",     "DICKSONX"));
   printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"));

}</lang>

Output:
0.944444
0.766667
0.896296

C++

Translation of: C

<lang cpp>#include <algorithm>

  1. include <iostream>
  2. include <string>

double jaro(const std::string s1, const std::string s2) {

   const uint l1 = s1.length(), l2 = s2.length();
   if (l1 == 0)
       return l2 == 0 ? 1.0 : 0.0;
   const uint match_distance = std::max(l1, l2) / 2 - 1;
   bool s1_matches[l1];
   bool s2_matches[l2];
   std::fill(s1_matches, s1_matches + l1, false);
   std::fill(s2_matches, s2_matches + l2, false);
   uint matches = 0;
   for (uint i = 0; i < l1; i++)
   {
       const int end = std::min(i + match_distance + 1, l2);
       for (int k = std::max(0u, i - match_distance); k < end; k++)
           if (!s2_matches[k] && s1[i] == s2[k])
           {
               s1_matches[i] = true;
               s2_matches[k] = true;
               matches++;
               break;
           }
   }
   if (matches == 0)
       return 0.0;
   double t = 0.0;
   uint k = 0;
   for (uint i = 0; i < l1; i++)
       if (s1_matches[i])
       {
           while (!s2_matches[k]) k++;
           if (s1[i] != s2[k]) t += 0.5;
           k++;
       }
   const double m = matches;
   return (m / l1 + m / l2 + (m - t) / m) / 3.0;

}

int main() {

   using namespace std;
   cout << jaro("MARTHA", "MARHTA") << endl;
   cout << jaro("DIXON", "DICKSONX") << endl;
   cout << jaro("JELLYFISH", "SMELLYFISH") << endl;
   return 0;

}</lang>

Clojure

<lang clojure> (ns test-project-intellij.core

 (:gen-class))

(defn find-matches [s t]

 " find match locations in the two strings "
 " s_matches is set to true wherever there is a match in t and t_matches is set conversely "
 (let [s_len (count s)
       t_len (count t)
       match_distance (int (- (/ (max s_len t_len) 2) 1))
       matches 0
       transpositions 0
       fn-start (fn [i] (max 0 (- i match_distance)))              ; function to compute starting position
       fn-end (fn [i] (min (+ i match_distance 1) (- t_len 1))) ]  ; function to compute end position
   (loop [i 0
          start (fn-start i)
          end (fn-end i)
          k start
          s_matches (vec (repeat (count s) false))
          t_matches (vec (repeat (count t) false))
          matches 0]
     (if (< i s_len)
       (if (<= k end)
         (if (get t_matches k)
           ; continue with next k
           (recur i start end (inc k) s_matches t_matches matches)
           (if (= (get s i) (get t k))
             ; match a position, so update matches, s_matches, t_matches to reflect match
             (recur (inc i) (fn-start (inc i)) (fn-end (inc i)) (fn-start (inc i)) (assoc s_matches i true) (assoc t_matches k true) (inc matches))
             ; no match so try next k
             (recur i start end (inc k) s_matches t_matches matches)))
         ; End of k iterations, so increment i and set k to start based upon i
         (recur (inc i) (fn-start (inc i)) (fn-end (inc i)) (fn-start (inc i)) s_matches t_matches matches))
       ; End of i iterations
       [matches s_matches t_matches]))))

(defn count-transpositions [s t s_matches t_matches]

 " Utility function to count the number of transpositions "
 (let [s_len (count s)]
   (loop [i 0
          k 0
          transpositions 0]
     (if (< i s_len)
       ; still elements in s (since i < s_len)
       (if (not (get s_matches i nil))
         ; skip to next i since there are no matches in s
         (recur (inc i) k transpositions)
         ; checking for match in t
         (if (not (get t_matches k nil))
           ; keeping looping around as long as there are no matches in t
           (recur i (inc k) transpositions)
           (if (not= (get s i) (get t k))
             ; increment transposition count (if strings don't equal at match location)
             (recur (inc i) (inc k) (inc transpositions))
             ; was a match, so advance i and k without increasing transpositions count
             (recur (inc i) (inc k) transpositions))))
       ; Return count
       transpositions))))

(defn jaro [s t]

 " Main Jaro Distance routine"
 (if (= s t)
   1
   (let [[matches s_matches t_matches]  (find-matches s t)]
     (if (= 0 matches)
       0
       (let [s_len (count s)
             t_len (count t)
             transpositions (count-transpositions s t s_matches t_matches)]
         (float (/ (+ (/ matches s_len) (/ matches t_len) (/ (- matches (/ transpositions 2)) matches)) 3)))))))


(println (jaro "MARTHA" "MARHTA")) (println (jaro "DIXON" "DICKSONX")) (println (jaro "JELLYFISH" "SMELLYFISH")) </lang>

Output:
0.9444444
0.76666665
0.8962963

CoffeeScript

Translation of: C++

<lang coffeescript>jaro = (s1, s2) ->

   l1 = s1.length
   l2 = s2.length
   if l1 == 0 then return if l2 == 0 then 1.0 else 0.0
   match_distance = Math.max(l1, l2) / 2 - 1
   s1_matches = []
   s2_matches = []
   m = 0
   for i in [0...l1]
       end = Math.min(i + match_distance + 1, l2)
       for k in [Math.max(0, i - match_distance)...end]
           if !s2_matches[k] and s1[i] == s2[k]
               s1_matches[i] = true
               s2_matches[k] = true
               m++
               break
   if m == 0
       0.0
   else
       t = 0.0
       k = 0
       for i in [0...l1]
           if s1_matches[i]
               until s2_matches[k] then k++
               if s1[i] != s2[k++] then t += 0.5
       (m / l1 + m / l2 + (m - t) / m) / 3.0

console.log jaro "MARTHA", "MARHTA" console.log jaro "DIXON", "DICKSONX" console.log jaro "JELLYFISH", "SMELLYFISH"</lang>

Output:
0.9444444444444445
0.7666666666666666
0.8962962962962964

D

Translation of: Kotlin

<lang D>auto jaro(in string s1, in string s2) {

   int s1_len = cast(int) s1.length;
   int s2_len = cast(int) s2.length;
   if (s1_len == 0 && s2_len == 0) return 1;
   import std.algorithm.comparison: min, max;
   auto match_distance = max(s1_len, s2_len) / 2 - 1;
   auto s1_matches = new bool[s1_len];
   auto s2_matches = new bool[s2_len];
   int matches = 0;
   for (auto i = 0; i < s1_len; i++) {
       auto start = max(0, i - match_distance);
       auto end = min(i + match_distance + 1, s2_len);
       for (auto j = start; j < end; j++)
           if (!s2_matches[j] && s1[i] == s2[j]) {
               s1_matches[i] = true;
               s2_matches[j] = true;
               matches++;
               break;
           }
   }
   if (matches == 0) return 0;
   auto t = 0.0;
   auto k = 0;
   for (auto i = 0; i < s1_len; i++)
       if (s1_matches[i]) {
           while (!s2_matches[k]) k++;
           if (s1[i] != s2[k++]) t += 0.5;
       }
   double m = matches;
   return (m / s1_len + m / s2_len + (m - t) / m) / 3.0;

}

void main() {

   import std.stdio: writeln;
   writeln(jaro(   "MARTHA",      "MARHTA"));
   writeln(jaro(    "DIXON",    "DICKSONX"));
   writeln(jaro("JELLYFISH",  "SMELLYFISH"));

}</lang>

0.944444
0.766667
0.896296

Elixir

Translation of: Ruby
Works with: Elixir version 1.3

<lang elixir>defmodule Jaro do

 def distance(s, t) when is_binary(s) and is_binary(t), do:
   distance(to_charlist(s), to_charlist(t))
 def distance(x, x), do: 1.0
 def distance(s, t) do
   s_len = length(s)
   t_len = length(t)
   {s_matches, t_matches, matches} = matching(s, t, s_len, t_len)
   if matches == 0 do
     0.0
   else
     {k, transpositions} = transposition(s, t, s_matches, t_matches)
     ((matches / s_len) +
      (matches / t_len) +
      ((matches - transpositions/2) / matches)) / 3
   end
 end
 
 defp matching(s, t, s_len, t_len) do
   match_distance = div(max(s_len, t_len), 2) - 1
   ac0 = {List.duplicate(false, s_len), List.duplicate(false, t_len), 0}
   Enum.reduce(0..s_len-1, ac0, fn i,acc ->
     j_start = max(0, i-match_distance)
     j_end = min(i+match_distance, t_len-1)
     Enum.reduce_while(j_start..j_end, acc, fn j,{sm,tm,m} ->
       if Enum.at(tm, j) or Enum.at(s, i) != Enum.at(t, j) do
         {:cont, {sm, tm, m}}
       else
         {:halt, { List.replace_at(sm, i, true),
                   List.replace_at(tm, j, true),
                   m + 1 }}
       end
     end)
   end)
 end
 
 defp transposition(s, t, s_matches, t_matches) do
   Enum.reduce(0..length(s)-1, {0,0}, fn i,{k,transpositions} ->
     if Enum.at(s_matches, i) do
       k = k + (Enum.drop(t_matches, k)
                |> Enum.take_while(fn matche -> not matche end)
                |> length)
       if Enum.at(s, i) == Enum.at(t, k), do: {k+1, transpositions},
                                        else: {k+1, transpositions+1}
     else
       {k, transpositions}
     end
   end)
 end

end

~w( MARTHA MARHTA

   DIXON     DICKSONX
   JELLYFISH SMELLYFISH )c

|> Enum.chunk(2) |> Enum.each(fn [s,t] ->

    :io.format "jaro(~s, ~s) = ~.10f~n", [inspect(s), inspect(t), Jaro.distance(s, t)]
  end)</lang>
Output:
jaro('MARTHA', 'MARHTA') = 0.9444444444
jaro('DIXON', 'DICKSONX') = 0.7666666667
jaro('JELLYFISH', 'SMELLYFISH') = 0.8962962963

Elixir has a built-in function (String.jaro_distance).

FreeBASIC

<lang freebasic>' version 09-10-2016 ' compile with: fbc -s console

  1. Macro max(x, y)
 IIf((x) > (y), (x), (y))
  1. EndMacro
  1. Macro min(x, y)
 IIf((x) < (y), (x), (y))
  1. EndMacro

Function jaro(word1 As String, word2 As String) As Double

 If Len(word1) > Len(word2) Then Swap word1, word2
 Dim As Long i, j, j1, m, t
 Dim As Long s1 = Len(word1)
 Dim As Long s2 = Len(word2)
 Dim As Long max_dist = s2 \ 2 -1  ' integer division
 For i = 0 To s1 -1 
   If word1[i] = word2[j] Then
     m = m +1
     word2[j] = 32
   Else
     For j1 = max(0, i - max_dist) To min(s2 -1, i + max_dist)
       If word1[i] = word2[j1] Then
         t = t +1
         m = m +1
         word2[j1] = 32
        If j1 > j Then j = j1
       End If
     Next
   End If
   j = j + 1
 Next

 If m = 0 Then Return 0
 t = t \ 2
 Return (m / s1 + m / s2 + ((m - t) / m)) / 3

End Function

' ------=< MAIN >=------

Print Print " jaro (MARTHA, MARHTA) ="; jaro("MARTHA", "MARHTA") Print " jaro (DIXON, DICKSONX) ="; jaro("DIXON", "DICKSONX") Print " jaro (JELLYFISH, SMELLYFISH) ="; jaro("JELLYFISH", "SMELLYFISH")


' empty keyboard buffer While Inkey <> "" : Wend Print : Print "hit any key to end program" Sleep End</lang>

Output:
 jaro (MARTHA,    MARHTA)     = 0.9444444444444444
 jaro (DIXON,     DICKSONX)   = 0.7666666666666667
 jaro (JELLYFISH, SMELLYFISH) = 0.8962962962962963

Go

<lang go>package main

import "fmt"

func jaro(str1, str2 string) float64 {

   if len(str1) == 0 && len(str2) == 0 {
       return 1
   }
   if len(str1) == 0 || len(str2) == 0 {
       return 0
   }
   match_distance := len(str1)
   if len(str2) > match_distance {
       match_distance = len(str2)
   }
   match_distance = match_distance/2 - 1
   str1_matches := make([]bool, len(str1))
   str2_matches := make([]bool, len(str2))
   matches := 0.
   transpositions := 0.
   for i := range str1 {
       start := i - match_distance
       if start < 0 {
           start = 0
       }
       end := i + match_distance + 1
       if end > len(str2) {
           end = len(str2)
       }
       for k := start; k < end; k++ {
           if str2_matches[k] {
               continue
           }
           if str1[i] != str2[k] {
               continue
           }
           str1_matches[i] = true
           str2_matches[k] = true
           matches++
           break
       }
   }
   if matches == 0 {
       return 0
   }
   k := 0
   for i := range str1 {
       if !str1_matches[i] {
           continue
       }
       for !str2_matches[k] {
           k++
       }
       if str1[i] != str2[k] {
           transpositions++
       }
       k++
   }
   transpositions /= 2
   return (matches/float64(len(str1)) +
       matches/float64(len(str2)) +
       (matches-transpositions)/matches) / 3

}

func main() {

   fmt.Printf("%f\n", jaro("MARTHA", "MARHTA"))
   fmt.Printf("%f\n", jaro("DIXON", "DICKSONX"))
   fmt.Printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"))

}</lang>

Output:
0.944444
0.766667
0.896296

Haxe

Translation of: Kotlin
Works with: Neko version 2.1.0

<lang Haxe>class Jaro {

   private static function jaro(s1: String, s2: String): Float {
       var s1_len = s1.length;
       var s2_len = s2.length;
       if (s1_len == 0 && s2_len == 0) return 1;

       var match_distance = Std.int(Math.max(s1_len, s2_len)) / 2 - 1; 
       var matches = { s1: [for(n in 0...s1_len) false], s2: [for(n in 0...s2_len) false] };
       var m = 0;
       for (i in 0...s1_len) {
           var start = Std.int(Math.max(0, i - match_distance));
           var end = Std.int(Math.min(i + match_distance + 1, s2_len));
           for (j in start...end)
               if (!matches.s2[j] && s1.charAt(i) == s2.charAt(j)) {

matches.s1[i] = true; matches.s2[j] = true; m++; break;

               }
       }
       if (m == 0) return 0;

       var k = 0;
       var t = 0.;
       for (i in 0...s1_len)
           if (matches.s1[i]) {
           	while (!matches.s2[k]) k++;
           	if (s1.charAt(i) != s2.charAt(k++)) t += 0.5;
           }

       return (m / s1_len + m / s2_len + (m - t) / m) / 3.0;
   }

   public static function main() {
       Sys.println(jaro(   "MARTHA",      "MARHTA"));
       Sys.println(jaro(    "DIXON",    "DICKSONX"));
       Sys.println(jaro("JELLYFISH",  "SMELLYFISH"));
   }

}</lang>

Output:
0.944444444444445
0.766666666666667
0.896296296296296

J

Implementation:

<lang J>jaro=: dyad define

 d=. ((x >.&# y)%2)-1
 e=. (x =/y) * d >: |x -/&(i.@#) y
 xm=. (+./"1 e)#x
 ym=. (+./"2 e)#y
 m=. xm <.&# ym
 t=. (+/xm ~:&(m&{.) ym)%2
 s1=. #x
 s2=. #y
 ((m%s1)+(m%s2)+(m-t)%m)%3

)</lang>

Task examples:

<lang J> 'MARTHA' jaro 'MARHTA' 0.944444

  'DIXON' jaro 'DICKSONX'

0.766667

  'JELLYFISH' jaro 'SMELLYFISH'

0.896296</lang>

Java

<lang java>public class JaroDistance {

   public static double jaro(String s, String t) {
       int s_len = s.length();
       int t_len = t.length();
       if (s_len == 0 && t_len == 0) return 1;
       int match_distance = Integer.max(s_len, t_len) / 2 - 1;
       boolean[] s_matches = new boolean[s_len];
       boolean[] t_matches = new boolean[t_len];
       int matches = 0;
       int transpositions = 0;
       for (int i = 0; i < s_len; i++) {
           int start = Integer.max(0, i-match_distance);
           int end = Integer.min(i+match_distance+1, t_len);
           for (int j = start; j < end; j++) {
               if (t_matches[j]) continue;
               if (s.charAt(i) != t.charAt(j)) continue;
               s_matches[i] = true;
               t_matches[j] = true;
               matches++;
               break;
           }
       }
       if (matches == 0) return 0;
       int k = 0;
       for (int i = 0; i < s_len; i++) {
           if (!s_matches[i]) continue;
           while (!t_matches[k]) k++;
           if (s.charAt(i) != t.charAt(k)) transpositions++;
           k++;
       }
       return (((double)matches / s_len) +
               ((double)matches / t_len) +
               (((double)matches - transpositions/2.0) / matches)) / 3.0;
   }
   public static void main(String[] args) {
       System.out.println(jaro(   "MARTHA",      "MARHTA"));
       System.out.println(jaro(    "DIXON",    "DICKSONX"));
       System.out.println(jaro("JELLYFISH",  "SMELLYFISH"));
   }

}</lang>

Output:
0.9444444444444445
0.7666666666666666
0.8962962962962964

Kotlin

Translation of: Java

<lang scala>object Jaro {

   fun distance(s1: String, s2: String): Double {
       val s1_len = s1.length
       val s2_len = s2.length
       if (s1_len == 0 && s2_len == 0) return 1.0
       val match_distance = Math.max(s1_len, s2_len) / 2 - 1
       val s1_matches = BooleanArray(s1_len)
       val s2_matches = BooleanArray(s2_len)
       var matches = 0
       for (i in 0..s1_len - 1) {
           val start = Math.max(0, i - match_distance)
           val end = Math.min(i + match_distance + 1, s2_len)
           (start..end - 1).find { j -> !s2_matches[j] && s1[i] == s2[j] } ?. let {
               s1_matches[i] = true
               s2_matches[it] = true
               matches++
           }
       }
       if (matches == 0) return 0.0
       var t = 0.0
       var k = 0
       (0..s1_len - 1).filter { s1_matches[it] }.forEach { i ->
           while (!s2_matches[k]) k++
           if (s1[i] != s2[k]) t += 0.5
           k++
       }
       val m = matches.toDouble()
       return (m / s1_len + m / s2_len + (m - t) / m) / 3.0
   }

}

fun main(args: Array<String>) {

   println(Jaro.distance("MARTHA", "MARHTA"))
   println(Jaro.distance("DIXON", "DICKSONX"))
   println(Jaro.distance("JELLYFISH", "SMELLYFISH"))

}</lang>

PARI/GP

This version was translated from Java and Perl.

Works with: PARI/GP version 2.7.4 and above

<lang parigp> \\Jaro distance between 2 strings s1 and s2. \\ 4/12/16 aev jaroDist(s1,s2)={ my(vt1=Vecsmall(s1),vt2=Vecsmall(s2),n1=#s1,n2=#s2,d,

  md=max(n1,n2)\2-1,cs,ce,mc=0,tr=0,k=1,ds,
  s1m=vector(n1,z,0),s2m=vector(n2,z,0));

if(!n1||!n2, return(0)); for(i=1,n1,

 cs=max(1,i-md);
 ce=min(i+md+1,n2);
 for(j=cs,ce,
   if(s2m[j],next);
   if(vt1[i]!=vt2[j], next);
   mc++; s1m[i]=1; s2m[j]=1; break;
 );\\fend j

);\\fend i if(!mc, return(0)); for(i=1,n1,

 if(!s1m[i], next);
 while(!s2m[k], k++);
 if(vt1[i]!=vt2[k], tr++);
 k++

);\\fend i d=(mc/n1+mc/n2+(mc-tr/2)/mc)/3.0; ds=Strprintf("%.5f",d); print(" *** Jaro distance is: ",ds," for strings: ",s1,", ",s2); return(d); }

{ \\ Testing: jaroDist("MARTHA","MARHTA"); jaroDist("DIXON","DICKSONX"); jaroDist("JELLYFISH","SMELLYFISH"); jaroDist("DWAYNE","DUANE"); } </lang>

Output:
 *** Jaro distance is: 0.94444 for strings: MARTHA, MARHTA
 *** Jaro distance is: 0.76667 for strings: DIXON, DICKSONX
 *** Jaro distance is: 0.89630 for strings: JELLYFISH, SMELLYFISH
 *** Jaro distance is: 0.82222 for strings: DWAYNE, DUANE

Perl

<lang perl>use List::Util qw(min max);

sub jaro {

   my ($s, $t) = @_;
   my $s_len = length($s);
   my $t_len = length($t);
   return 1 if $s_len == 0 and $t_len == 0;
   my $match_distance = int(max($s_len, $t_len) / 2) - 1;
   my @s_matches;
   my @t_matches;
   my @s = split(//, $s);
   my @t = split(//, $t);
   my $matches = 0;
   foreach my $i (0 .. $#s) {
       my $start = max(0, $i - $match_distance);
       my $end = min($i + $match_distance + 1, $t_len);
       foreach my $j ($start .. $end - 1) {
           $t_matches[$j] and next;
           $s[$i] eq $t[$j] or next;
           $s_matches[$i] = 1;
           $t_matches[$j] = 1;
           $matches++;
           last;
       }
   }
   return 0 if $matches == 0;
   my $k              = 0;
   my $transpositions = 0;
   foreach my $i (0 .. $#s) {
       $s_matches[$i] or next;
       until ($t_matches[$k]) { ++$k }
       $s[$i] eq $t[$k] or ++$transpositions;
       ++$k;
   }
   (($matches / $s_len) + ($matches / $t_len) +
       (($matches - $transpositions / 2) / $matches)) / 3;

}

printf("%f\n", jaro("MARTHA", "MARHTA")); printf("%f\n", jaro("DIXON", "DICKSONX")); printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"));</lang>

Output:
0.944444
0.766667
0.896296

Perl 6

Translation of: Perl

<lang perl6>sub jaro ($s, $t) {

   return 1 if $s eq $t;
   my $s_len = + my @s = $s.comb;
   my $t_len = + my @t = $t.comb;
   my $match_distance = ($s_len max $t_len) div 2 - 1;
   my @s_matches;
   my @t_matches;
   my $matches = 0;
   for ^@s -> $i {
       my $start = 0 max $i - $match_distance;
       my $end = $i + $match_distance min $t_len;
       for $start .. $end -> $j {
           @t_matches[$j] and next;
           @s[$i] eq @t[$j] or next;
           @s_matches[$i] = 1;
           @t_matches[$j] = 1;
           $matches++;
           last;
       }
   }
   return 0 if $matches == 0;
   my $k              = 0;
   my $transpositions = 0;
   for ^@s -> $i {
       @s_matches[$i] or next;
       until @t_matches[$k] { ++$k }
       @s[$i] eq @t[$k] or ++$transpositions;
       ++$k;
   }
   ($matches / $s_len + $matches / $t_len +
       (($matches - $transpositions / 2) / $matches)) / 3;

}

printf("%f\n", jaro("MARTHA", "MARHTA")); printf("%f\n", jaro("DIXON", "DICKSONX")); printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"));</lang>

Output:
0.944444
0.766667
0.896296

Python

<lang python>from __future__ import division

def jaro(s, t):

   s_len = len(s)
   t_len = len(t)
   if s_len == 0 and t_len == 0:
       return 1
   match_distance = (max(s_len, t_len) // 2) - 1
   s_matches = [False] * s_len
   t_matches = [False] * t_len
   matches = 0
   transpositions = 0
   for i in range(s_len):
       start = max(0, i-match_distance)
       end = min(i+match_distance+1, t_len)
       for j in range(start, end):
           if t_matches[j]:
               continue
           if s[i] != t[j]:
               continue
           s_matches[i] = True
           t_matches[j] = True
           matches += 1
           break
   if matches == 0:
       return 0
   k = 0
   for i in range(s_len):
       if not s_matches[i]:
           continue
       while not t_matches[k]:
           k += 1
       if s[i] != t[k]:
           transpositions += 1
       k += 1
   return ((matches / s_len) +
           (matches / t_len) +
           ((matches - transpositions/2) / matches)) / 3

for s,t in [( 'MARTHA', 'MARHTA'),

           (    'DIXON',    'DICKSONX'),
           ('JELLYFISH',  'SMELLYFISH')]:
   print("jaro(%r, %r) = %.10f" % (s, t, jaro(s, t)))</lang>
Output:
jaro('MARTHA', 'MARHTA') = 0.9444444444
jaro('DIXON', 'DICKSONX') = 0.7666666667
jaro('JELLYFISH', 'SMELLYFISH') = 0.8962962963

Racket

Translation of: C

(kinda)

Returns an exact value for the Jaro distance.

<lang racket>#lang racket/base

Translation of: C

(require data/bit-vector)

(define (jaro-distance str1 str2)

 (define str1-len (string-length str1))
 (define str2-len (string-length str2))
 (cond
   [(and (zero? str1-len) (zero? str2-len)) 0]
   [(or  (zero? str1-len) (zero? str2-len)) 1]
   [else     
    ;; vectors of bools that signify if that char in the matching string has a match
    (define str1-matches (make-bit-vector str1-len))
    (define str2-matches (make-bit-vector str2-len))
    (define matches
      ;; max distance between two chars to be considered matching
      (let ((match-distance (sub1 (quotient (max str1-len str2-len) 2))))
        (for/fold ((matches 0))
                  ((i (in-range 0 str1-len))
                   (c1 (in-string str1)))
          (define start (max 0 (- i match-distance)))
          (define end (min (+ i match-distance 1) str2-len))        
          (for/fold ((matches matches))
                    ((k (in-range start end))
                     (c2 (in-string str2 start))
                     #:unless (bit-vector-ref str2-matches k) ; if str2 already has a match continue
                     #:when (char=? c1 c2) ; if str1 and str2 are not
                     #:final #t)
            ;; otherwise assume there is a match
            (bit-vector-set! str1-matches i #t)
            (bit-vector-set! str2-matches k #t)
            (add1 matches)))))
    (cond
      [(zero? matches) 0]
      [else                                
       (define-values (transpositions*2 k+)
         (for/fold ((transpositions 0) (k 0))
                   ((i (in-range 0 str1-len))
                    (c1 (in-string str1))
                    (b1 (in-bit-vector str1-matches))
                    ;; if there are no matches in str1 continue
                    #:when b1)
           (define k+ (for/first ((k+ (in-range k str2-len))
                                  (b2 (in-bit-vector str2-matches k))
                                  #:when b2)
                        k+))        
           (values
            (+ transpositions (if (char=? c1 (string-ref str2 k+)) 0 1)) ; increment transpositions
            (add1 k+)))) ;; while there is no match in str2 increment k             
       
       ;; divide the number of transpositions by two as per the algorithm specs
       ;; this division is valid because the counted transpositions include both
       ;; instances of the transposed characters.
       (define transpositions (quotient transpositions*2 2))
       
       ;; return the Jaro distance
       (/ (+ (/ matches str1-len)
             (/ matches str2-len)
             (/ (- matches transpositions) matches))
          3)])]))

(module+ test

 (jaro-distance "MARTHA"    "MARHTA"); 0.944444 
 (exact->inexact (jaro-distance "MARTHA"    "MARHTA")); 0.944444
 (jaro-distance "DIXON"     "DICKSONX"); 0.766667
 (exact->inexact (jaro-distance "DIXON"     "DICKSONX")); 0.766667
 (jaro-distance "JELLYFISH" "SMELLYFISH"); 0.896296
 (exact->inexact (jaro-distance "JELLYFISH" "SMELLYFISH"))); 0.896296</lang>
Output:

The exact->inexact calls in the tests give an inexact, floating point version of the rational values.

17/18
0.9444444444444444
23/30
0.7666666666666667
121/135
0.8962962962962963

REXX

<lang rexx>/*REXX program computes the Jaro distance between two strings (or a list of strings).*/ @.= /*define a default for the @. array. */ parse arg @.1 /*obtain an optional character string. */ if @.1= then do /*nothing specified? Use the defaults.*/

               @.1= 'MARTHA    MARHTA'
               @.2= 'DIXON     DICKSONX'
               @.3= 'JELLYFISH SMELLYFISH'
               @.4= 'DWAYNE    DUANE'
               end
      do j=1  while @.j\==                    /*process all the strings in the list. */
      d=jaroDist(@.j)
      say 'Jaro distance is  '         format(d, , 5)        " for strings: "        @.j
      end   /*j*/                               /* └──── digits past the decimal point.*/

exit /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ jaroDist: procedure; arg s.1 s.2 .; L1=length(s.1); L2=length(s.2); m=0

         if L1==0 | L2==0  then return 0        /*check if any string is a null string.*/
         f=max(L1,L2)%2 - 1                     /*calculate furthest distanced allowed.*/
         r.=0                                   /* [↓]  see if the char is near enough.*/
              do k=1  for L1;        p=pos(substr(s.1, k, 1), s.2, max(1, k-f));    r.k=p
              if p\==0 & abs(p-k)<=f then m=m+1 /*if near enough, count it as a match. */
                                     else r.k=0 /*       ··· otherwise, don't count it.*/
              end   /*k*/
         t=0
              do o=1  for L1;        om=o-1
              if pos( substr(s.1, o, 1), s.2)==0  |  r.o==0  |  r.om==0  then iterate
              if r.o<r.om  then t=t+1
              end   /*o*/                       /* [↑]  count number of transpositions.*/
         if m==0  then return 0
                       return (m/L1 + m/L2 + (m-t)/m) / 3</lang>

output   when using the default inputs:

Jaro distance is   0.94444  for strings:  MARTHA    MARHTA
Jaro distance is   0.76667  for strings:  DIXON     DICKSONX
Jaro distance is   0.89630  for strings:  JELLYFISH SMELLYFISH
Jaro distance is   0.82222  for strings:  DWAYNE    DUANE

Ruby

<lang ruby>def jaro(s, t)

   return 1.0 if s == t
   
   s_len = s.size
   t_len = t.size
   match_distance = ([s_len, t_len].max / 2) - 1
   
   s_matches = []
   t_matches = []
   matches = 0.0
   
   s_len.times do |i|
       j_start = [0, i-match_distance].max
       j_end = [i+match_distance, t_len-1].min
       
       (j_start..j_end).each do |j|
           t_matches[j] && next
           s[i] == t[j] || next
           s_matches[i] = true
           t_matches[j] = true
           matches += 1.0
           break
       end
   end
   
   return 0.0 if matches == 0.0
   
   k = 0
   transpositions = 0.0
   s_len.times do |i|
       s_matches[i] || next
       k += 1 until t_matches[k]
       s[i] == t[k] || (transpositions += 1.0)
       k += 1
   end
   
   ((matches / s_len) +
    (matches / t_len) +
    ((matches - transpositions/2.0) / matches)) / 3.0

end

%w(

   MARTHA    MARHTA
   DIXON     DICKSONX
   JELLYFISH SMELLYFISH

).each_slice(2) do |s,t|

   puts "jaro(#{s.inspect}, #{t.inspect}) = #{'%.10f' % jaro(s, t)}"

end</lang>

Output:
jaro("MARTHA", "MARHTA") = 0.9444444444
jaro("DIXON", "DICKSONX") = 0.7666666667
jaro("JELLYFISH", "SMELLYFISH") = 0.8962962963

Rust

Translation of: C++

<lang rust>use std::cmp;

pub fn jaro(s1: &str, s2: &str) -> f64 {

   let s1_len = s1.len();
   let s2_len = s2.len();
   if s1_len == 0 && s2_len == 0 { return 1.0; }
   let match_distance = cmp::max(s1_len, s2_len) / 2 - 1;
   let mut s1_matches = vec![false; s1_len];
   let mut s2_matches = vec![false; s2_len];
   let mut m: isize = 0;
   for i in 0..s1_len {
       let start = cmp::max(0, i as isize - match_distance as isize) as usize;
       let end = cmp::min(i + match_distance + 1, s2_len);
       for j in start..end {
           if !s2_matches[j] && s1.as_bytes()[i] == s2.as_bytes()[j] {
               s1_matches[i] = true;
               s2_matches[j] = true;
               m += 1;
               break;
           }
       }
   }
   if m == 0 { return 0.0; }
   let mut t = 0.0;
   let mut k = 0;
   for i in 0..s1_len {
       if s1_matches[i] {
           while !s2_matches[k] { k += 1; }
           if s1.as_bytes()[i] != s2.as_bytes()[k] { t += 0.5; }
           k += 1;
       }
   }
   let m = m as f64;
   (m / s1_len as f64 + m / s2_len as f64 + (m  - t) / m) / 3.0

}

fn main() {

   let pairs = [("MARTHA", "MARHTA"), ("DIXON", "DICKSONX"), ("JELLYFISH", "SMELLYFISH")];
   for p in pairs.iter() { println!("{}/{} = {}", p.0, p.1, jaro(p.0, p.1)); }

}</lang>

Output:
MARTHA/MARHTA = 0.9444444444444445
DIXON/DICKSONX = 0.7666666666666666
JELLYFISH/SMELLYFISH = 0.8962962962962964

Scala

Translation of: Java

<lang scala>object Jaro extends App {

   def distance(s1: String, s2: String): Double = {
       val s1_len = s1.length
       val s2_len = s2.length
       if (s1_len == 0 && s2_len == 0) return 1.0
       val match_distance = Math.max(s1_len, s2_len) / 2 - 1
       val s1_matches = Array.ofDim[Boolean](s1_len)
       val s2_matches = Array.ofDim[Boolean](s2_len)
       var matches = 0
       for (i <- 0 until s1_len) {
           val start = Math.max(0, i - match_distance)
           val end = Math.min(i + match_distance + 1, s2_len)
           start until end find { j => !s2_matches(j) && s1(i) == s2(j) } match {
               case Some(j) =>
                   s1_matches(i) = true
                   s2_matches(j) = true
                   matches += 1
               case None =>
           }
       }
       if (matches == 0) return 0.0
       var t = 0.0
       var k = 0
       0 until s1_len filter s1_matches foreach { i =>
           while (!s2_matches(k)) k += 1
           if (s1(i) != s2(k)) t += 0.5
           k += 1
       }
       val m = matches.toDouble
       (m / s1_len + m / s2_len + (m - t) / m) / 3.0
   }
   val strings = List(("MARTHA", "MARHTA"), ("DIXON", "DICKSONX"), ("JELLYFISH", "SMELLYFISH"))
   strings.foreach { s => println(distance(s._1, s._2)) }

}</lang>

Sidef

<lang ruby>func jaro(s, t) {

   return 1 if (s == t)
   var s_len = s.len
   var t_len = t.len
   var match_distance = ((::max(s_len, t_len) // 2) - 1)
   var s_matches = []
   var t_matches = []
   var matches = 0
   var transpositions = 0
   for i in ^s_len {
       var start = ::max(0, i-match_distance)
       var end = ::min(i+match_distance, t_len-1)
       for k in (start .. end) {
           t_matches[k] && next
           s[i] == t[k] || next
           s_matches[i] = true
           t_matches[k] = true
           matches++
           break
       }
   }
   return 0 if (matches == 0)
   var k = 0
   for i in ^s_len {
       s_matches[i] || next
       while (!t_matches[k]) { ++k }
       s[i] == t[k] || ++transpositions
       ++k
   }
   ((matches / s_len) +
     (matches / t_len) +
       ((matches - transpositions/2) / matches)) / 3

}

for pair in [

   [%c"MARTHA",    %c"MARHTA"],
   [%c"DIXON",     %c"DICKSONX"],
   [%c"JELLYFISH", %c"SMELLYFISH"],

] {

   say "jaro(#{pair.map{.join.dump}.join(', ')}) = #{'%.10f' % jaro(pair...)}"

}</lang>

Output:
jaro("MARTHA", "MARHTA") = 0.9444444444
jaro("DIXON", "DICKSONX") = 0.7666666667
jaro("JELLYFISH", "SMELLYFISH") = 0.8962962963

zkl

<lang zkl> //-->String of matched characters, ordered fcn _jaro(str1,str2, matchDistance){

  cs:=Sink(String);
  foreach i,c in ([0..].zip(str1)){
     str2.find(c,(0).max(i - matchDistance),i + matchDistance) :
     if(Void!=_) cs.write(c);
  }
  cs.close()

}

fcn jaro(str1,str2){

  s1Len,s2Len,matchDistance := str1.len(), str2.len(), s1Len.max(s2Len)/2 - 1;
  cs12,cs21 := _jaro(str1,str2, matchDistance), _jaro(str2,str1, matchDistance); 
  matches:=cs12.len().toFloat();
  if(not matches) return(0.0);
  transpositions:=cs12.walker().zipWith('!=,cs21).filter().sum(0)/2;
  
  ( matches/s1Len + matches/s2Len +
     ((matches - transpositions)/matches) ) / 3.0

}</lang> <lang zkl>foreach s,t in (T(

    T("MARTHA","MARHTA"), T("DIXON","DICKSONX"), T("JELLYFISH","SMELLYFISH"))){
  println(0'|jaro("%s","%s") = %.10f|.fmt(s,t,jaro(s,t)));

}</lang>

Output:
jaro("MARTHA","MARHTA") = 0.9444444444
jaro("DIXON","DICKSONX") = 0.7666666667
jaro("JELLYFISH","SMELLYFISH") = 0.8962962963