1// Fuzzy searching allows for flexibly matching a string with partial input, 2// useful for filtering data very quickly based on lightweight user input. 3package fuzzy 4 5import ( 6 "unicode" 7 "unicode/utf8" 8) 9 10var noop = func(r rune) rune { return r } 11 12// Match returns true if source matches target using a fuzzy-searching 13// algorithm. Note that it doesn't implement Levenshtein distance (see 14// RankMatch instead), but rather a simplified version where there's no 15// approximation. The method will return true only if each character in the 16// source can be found in the target and occurs after the preceding matches. 17func Match(source, target string) bool { 18 return match(source, target, noop) 19} 20 21// MatchFold is a case-insensitive version of Match. 22func MatchFold(source, target string) bool { 23 return match(source, target, unicode.ToLower) 24} 25 26func match(source, target string, fn func(rune) rune) bool { 27 lenDiff := len(target) - len(source) 28 29 if lenDiff < 0 { 30 return false 31 } 32 33 if lenDiff == 0 && source == target { 34 return true 35 } 36 37Outer: 38 for _, r1 := range source { 39 for i, r2 := range target { 40 if fn(r1) == fn(r2) { 41 target = target[i+utf8.RuneLen(r2):] 42 continue Outer 43 } 44 } 45 return false 46 } 47 48 return true 49} 50 51// Find will return a list of strings in targets that fuzzy matches source. 52func Find(source string, targets []string) []string { 53 return find(source, targets, noop) 54} 55 56// FindFold is a case-insensitive version of Find. 57func FindFold(source string, targets []string) []string { 58 return find(source, targets, unicode.ToLower) 59} 60 61func find(source string, targets []string, fn func(rune) rune) []string { 62 var matches []string 63 64 for _, target := range targets { 65 if match(source, target, fn) { 66 matches = append(matches, target) 67 } 68 } 69 70 return matches 71} 72 73// RankMatch is similar to Match except it will measure the Levenshtein 74// distance between the source and the target and return its result. If there 75// was no match, it will return -1. 76// Given the requirements of match, RankMatch only needs to perform a subset of 77// the Levenshtein calculation, only deletions need be considered, required 78// additions and substitutions would fail the match test. 79func RankMatch(source, target string) int { 80 return rank(source, target, noop) 81} 82 83// RankMatchFold is a case-insensitive version of RankMatch. 84func RankMatchFold(source, target string) int { 85 return rank(source, target, unicode.ToLower) 86} 87 88func rank(source, target string, fn func(rune) rune) int { 89 lenDiff := len(target) - len(source) 90 91 if lenDiff < 0 { 92 return -1 93 } 94 95 if lenDiff == 0 && source == target { 96 return 0 97 } 98 99 runeDiff := 0 100 101Outer: 102 for _, r1 := range source { 103 for i, r2 := range target { 104 if fn(r1) == fn(r2) { 105 target = target[i+utf8.RuneLen(r2):] 106 continue Outer 107 } else { 108 runeDiff++ 109 } 110 } 111 return -1 112 } 113 114 // Count up remaining char 115 runeDiff += utf8.RuneCountInString(target) 116 117 return runeDiff 118} 119 120// RankFind is similar to Find, except it will also rank all matches using 121// Levenshtein distance. 122func RankFind(source string, targets []string) Ranks { 123 var r Ranks 124 125 for index, target := range targets { 126 if match(source, target, noop) { 127 distance := LevenshteinDistance(source, target) 128 r = append(r, Rank{source, target, distance, index}) 129 } 130 } 131 return r 132} 133 134// RankFindFold is a case-insensitive version of RankFind. 135func RankFindFold(source string, targets []string) Ranks { 136 var r Ranks 137 138 for index, target := range targets { 139 if match(source, target, unicode.ToLower) { 140 distance := LevenshteinDistance(source, target) 141 r = append(r, Rank{source, target, distance, index}) 142 } 143 } 144 return r 145} 146 147type Rank struct { 148 // Source is used as the source for matching. 149 Source string 150 151 // Target is the word matched against. 152 Target string 153 154 // Distance is the Levenshtein distance between Source and Target. 155 Distance int 156 157 // Location of Target in original list 158 OriginalIndex int 159} 160 161type Ranks []Rank 162 163func (r Ranks) Len() int { 164 return len(r) 165} 166 167func (r Ranks) Swap(i, j int) { 168 r[i], r[j] = r[j], r[i] 169} 170 171func (r Ranks) Less(i, j int) bool { 172 return r[i].Distance < r[j].Distance 173} 174