1// Fuzzy searching allows for flexibly matching a string with partial input,
2// useful for filtering data very quickly based on lightweight user input.
3package fuzzy
4
5import (
6	"unicode"
7	"unicode/utf8"
8)
9
10var noop = func(r rune) rune { return r }
11
12// Match returns true if source matches target using a fuzzy-searching
13// algorithm. Note that it doesn't implement Levenshtein distance (see
14// RankMatch instead), but rather a simplified version where there's no
15// approximation. The method will return true only if each character in the
16// source can be found in the target and occurs after the preceding matches.
17func Match(source, target string) bool {
18	return match(source, target, noop)
19}
20
21// MatchFold is a case-insensitive version of Match.
22func MatchFold(source, target string) bool {
23	return match(source, target, unicode.ToLower)
24}
25
26func match(source, target string, fn func(rune) rune) bool {
27	lenDiff := len(target) - len(source)
28
29	if lenDiff < 0 {
30		return false
31	}
32
33	if lenDiff == 0 && source == target {
34		return true
35	}
36
37Outer:
38	for _, r1 := range source {
39		for i, r2 := range target {
40			if fn(r1) == fn(r2) {
41				target = target[i+utf8.RuneLen(r2):]
42				continue Outer
43			}
44		}
45		return false
46	}
47
48	return true
49}
50
51// Find will return a list of strings in targets that fuzzy matches source.
52func Find(source string, targets []string) []string {
53	return find(source, targets, noop)
54}
55
56// FindFold is a case-insensitive version of Find.
57func FindFold(source string, targets []string) []string {
58	return find(source, targets, unicode.ToLower)
59}
60
61func find(source string, targets []string, fn func(rune) rune) []string {
62	var matches []string
63
64	for _, target := range targets {
65		if match(source, target, fn) {
66			matches = append(matches, target)
67		}
68	}
69
70	return matches
71}
72
73// RankMatch is similar to Match except it will measure the Levenshtein
74// distance between the source and the target and return its result. If there
75// was no match, it will return -1.
76// Given the requirements of match, RankMatch only needs to perform a subset of
77// the Levenshtein calculation, only deletions need be considered, required
78// additions and substitutions would fail the match test.
79func RankMatch(source, target string) int {
80	return rank(source, target, noop)
81}
82
83// RankMatchFold is a case-insensitive version of RankMatch.
84func RankMatchFold(source, target string) int {
85	return rank(source, target, unicode.ToLower)
86}
87
88func rank(source, target string, fn func(rune) rune) int {
89	lenDiff := len(target) - len(source)
90
91	if lenDiff < 0 {
92		return -1
93	}
94
95	if lenDiff == 0 && source == target {
96		return 0
97	}
98
99	runeDiff := 0
100
101Outer:
102	for _, r1 := range source {
103		for i, r2 := range target {
104			if fn(r1) == fn(r2) {
105				target = target[i+utf8.RuneLen(r2):]
106				continue Outer
107			} else {
108				runeDiff++
109			}
110		}
111		return -1
112	}
113
114	// Count up remaining char
115	runeDiff += utf8.RuneCountInString(target)
116
117	return runeDiff
118}
119
120// RankFind is similar to Find, except it will also rank all matches using
121// Levenshtein distance.
122func RankFind(source string, targets []string) Ranks {
123	var r Ranks
124
125	for index, target := range targets {
126		if match(source, target, noop) {
127			distance := LevenshteinDistance(source, target)
128			r = append(r, Rank{source, target, distance, index})
129		}
130	}
131	return r
132}
133
134// RankFindFold is a case-insensitive version of RankFind.
135func RankFindFold(source string, targets []string) Ranks {
136	var r Ranks
137
138	for index, target := range targets {
139		if match(source, target, unicode.ToLower) {
140			distance := LevenshteinDistance(source, target)
141			r = append(r, Rank{source, target, distance, index})
142		}
143	}
144	return r
145}
146
147type Rank struct {
148	// Source is used as the source for matching.
149	Source string
150
151	// Target is the word matched against.
152	Target string
153
154	// Distance is the Levenshtein distance between Source and Target.
155	Distance int
156
157	// Location of Target in original list
158	OriginalIndex int
159}
160
161type Ranks []Rank
162
163func (r Ranks) Len() int {
164	return len(r)
165}
166
167func (r Ranks) Swap(i, j int) {
168	r[i], r[j] = r[j], r[i]
169}
170
171func (r Ranks) Less(i, j int) bool {
172	return r[i].Distance < r[j].Distance
173}
174