1package smetrics 2 3import ( 4 "math" 5) 6 7// The Jaro distance. The result is 1 for equal strings, and 0 for completely different strings. 8func Jaro(a, b string) float64 { 9 // If both strings are zero-length, they are completely equal, 10 // therefore return 1. 11 if len(a) == 0 && len(b) == 0 { 12 return 1 13 } 14 15 // If one string is zero-length, strings are completely different, 16 // therefore return 0. 17 if len(a) == 0 || len(b) == 0 { 18 return 0 19 } 20 21 // Define the necessary variables for the algorithm. 22 la := float64(len(a)) 23 lb := float64(len(b)) 24 matchRange := int(math.Max(0, math.Floor(math.Max(la, lb)/2.0)-1)) 25 matchesA := make([]bool, len(a)) 26 matchesB := make([]bool, len(b)) 27 var matches float64 = 0 28 29 // Step 1: Matches 30 // Loop through each character of the first string, 31 // looking for a matching character in the second string. 32 for i := 0; i < len(a); i++ { 33 start := int(math.Max(0, float64(i-matchRange))) 34 end := int(math.Min(lb-1, float64(i+matchRange))) 35 36 for j := start; j <= end; j++ { 37 if matchesB[j] { 38 continue 39 } 40 41 if a[i] == b[j] { 42 matchesA[i] = true 43 matchesB[j] = true 44 matches++ 45 break 46 } 47 } 48 } 49 50 // If there are no matches, strings are completely different, 51 // therefore return 0. 52 if matches == 0 { 53 return 0 54 } 55 56 // Step 2: Transpositions 57 // Loop through the matches' arrays, looking for 58 // unaligned matches. Count the number of unaligned matches. 59 unaligned := 0 60 j := 0 61 for i := 0; i < len(a); i++ { 62 if !matchesA[i] { 63 continue 64 } 65 66 for !matchesB[j] { 67 j++ 68 } 69 70 if a[i] != b[j] { 71 unaligned++ 72 } 73 74 j++ 75 } 76 77 // The number of unaligned matches divided by two, is the number of _transpositions_. 78 transpositions := math.Floor(float64(unaligned / 2)) 79 80 // Jaro distance is the average between these three numbers: 81 // 1. matches / length of string A 82 // 2. matches / length of string B 83 // 3. (matches - transpositions/matches) 84 // So, all that divided by three is the final result. 85 return ((matches / la) + (matches / lb) + ((matches - transpositions) / matches)) / 3.0 86} 87