1package smetrics
2
3import (
4	"math"
5)
6
7// The Jaro distance. The result is 1 for equal strings, and 0 for completely different strings.
8func Jaro(a, b string) float64 {
9	// If both strings are zero-length, they are completely equal,
10	// therefore return 1.
11	if len(a) == 0 && len(b) == 0 {
12		return 1
13	}
14
15	// If one string is zero-length, strings are completely different,
16	// therefore return 0.
17	if len(a) == 0 || len(b) == 0 {
18		return 0
19	}
20
21	// Define the necessary variables for the algorithm.
22	la := float64(len(a))
23	lb := float64(len(b))
24	matchRange := int(math.Max(0, math.Floor(math.Max(la, lb)/2.0)-1))
25	matchesA := make([]bool, len(a))
26	matchesB := make([]bool, len(b))
27	var matches float64 = 0
28
29	// Step 1: Matches
30	// Loop through each character of the first string,
31	// looking for a matching character in the second string.
32	for i := 0; i < len(a); i++ {
33		start := int(math.Max(0, float64(i-matchRange)))
34		end := int(math.Min(lb-1, float64(i+matchRange)))
35
36		for j := start; j <= end; j++ {
37			if matchesB[j] {
38				continue
39			}
40
41			if a[i] == b[j] {
42				matchesA[i] = true
43				matchesB[j] = true
44				matches++
45				break
46			}
47		}
48	}
49
50	// If there are no matches, strings are completely different,
51	// therefore return 0.
52	if matches == 0 {
53		return 0
54	}
55
56	// Step 2: Transpositions
57	// Loop through the matches' arrays, looking for
58	// unaligned matches. Count the number of unaligned matches.
59	unaligned := 0
60	j := 0
61	for i := 0; i < len(a); i++ {
62		if !matchesA[i] {
63			continue
64		}
65
66		for !matchesB[j] {
67			j++
68		}
69
70		if a[i] != b[j] {
71			unaligned++
72		}
73
74		j++
75	}
76
77	// The number of unaligned matches divided by two, is the number of _transpositions_.
78	transpositions := math.Floor(float64(unaligned / 2))
79
80	// Jaro distance is the average between these three numbers:
81	// 1. matches / length of string A
82	// 2. matches / length of string B
83	// 3. (matches - transpositions/matches)
84	// So, all that divided by three is the final result.
85	return ((matches / la) + (matches / lb) + ((matches - transpositions) / matches)) / 3.0
86}
87