1 // LICENSE
2 //
3 //   This software is dual-licensed to the public domain and under the following
4 //   license: you are granted a perpetual, irrevocable license to copy, modify,
5 //   publish, and distribute this file as you see fit.
6 
7 #include "config.h"
8 
9 #include "fts_fuzzy_match.hh"
10 
11 namespace fts {
12 
13     // Forward declarations for "private" implementation
14     namespace fuzzy_internal {
15         static bool fuzzy_match_recursive(const char * pattern, const char * str, int & outScore, const char * strBegin,
16                                           uint8_t const * srcMatches,  uint8_t * newMatches,  int maxMatches, int nextMatch,
17                                           int & recursionCount, int recursionLimit);
18     }
19 
20     // Public interface
fuzzy_match_simple(char const * pattern,char const * str)21     bool fuzzy_match_simple(char const * pattern, char const * str) {
22         while (*pattern != '\0' && *str != '\0')  {
23             if (tolower(*pattern) == tolower(*str))
24                 ++pattern;
25             ++str;
26         }
27 
28         return *pattern == '\0' ? true : false;
29     }
30 
fuzzy_match(char const * pattern,char const * str,int & outScore)31     bool fuzzy_match(char const * pattern, char const * str, int & outScore) {
32         uint8_t matches[256];
33         return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
34     }
35 
fuzzy_match(char const * pattern,char const * str,int & outScore,uint8_t * matches,int maxMatches)36     bool fuzzy_match(char const * pattern, char const * str, int & outScore, uint8_t * matches, int maxMatches) {
37         int recursionCount = 0;
38         int recursionLimit = 10;
39 
40         return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
41     }
42 
43     // Private implementation
fuzzy_match_recursive(const char * pattern,const char * str,int & outScore,const char * strBegin,uint8_t const * srcMatches,uint8_t * matches,int maxMatches,int nextMatch,int & recursionCount,int recursionLimit)44     static bool fuzzy_internal::fuzzy_match_recursive(const char * pattern, const char * str, int & outScore,
45                                                       const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
46                                                       int nextMatch, int & recursionCount, int recursionLimit)
47     {
48         // Count recursions
49         ++recursionCount;
50         if (recursionCount >= recursionLimit)
51             return false;
52 
53         // Detect end of strings
54         if (*pattern == '\0' || *str == '\0')
55             return false;
56 
57         // Recursion params
58         bool recursiveMatch = false;
59         uint8_t bestRecursiveMatches[256];
60         int bestRecursiveScore = 0;
61 
62         // Loop through pattern and str looking for a match
63         bool first_match = true;
64         while (*pattern != '\0' && *str != '\0') {
65 
66             // Found match
67             if (tolower(*pattern) == tolower(*str)) {
68 
69                 // Supplied matches buffer was too short
70                 if (nextMatch >= maxMatches)
71                     return false;
72 
73                 // "Copy-on-Write" srcMatches into matches
74                 if (first_match && srcMatches) {
75                     memcpy(matches, srcMatches, nextMatch);
76                     first_match = false;
77                 }
78 
79                 // Recursive call that "skips" this match
80                 uint8_t recursiveMatches[256];
81                 int recursiveScore;
82                 if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
83 
84                     // Pick best recursive score
85                     if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
86                         memcpy(bestRecursiveMatches, recursiveMatches, 256);
87                         bestRecursiveScore = recursiveScore;
88                     }
89                     recursiveMatch = true;
90                 }
91 
92                 // Advance
93                 matches[nextMatch++] = (uint8_t)(str - strBegin);
94                 ++pattern;
95             }
96             ++str;
97         }
98 
99         // Determine if full pattern was matched
100         bool matched = *pattern == '\0' ? true : false;
101 
102         // Calculate score
103         if (matched) {
104             const int sequential_bonus = 15;            // bonus for adjacent matches
105             const int separator_bonus = 30;             // bonus if match occurs after a separator
106             const int camel_bonus = 30;                 // bonus if match is uppercase and prev is lower
107             const int first_letter_bonus = 15;          // bonus if the first letter is matched
108 
109             const int leading_letter_penalty = -5;      // penalty applied for every letter in str before the first match
110             const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
111             const int unmatched_letter_penalty = -1;    // penalty for every letter that doesn't matter
112 
113             // Iterate str to end
114             while (*str != '\0')
115                 ++str;
116 
117             // Initialize score
118             outScore = 100;
119 
120             // Apply leading letter penalty
121             int penalty = leading_letter_penalty * matches[0];
122             if (penalty < max_leading_letter_penalty)
123                 penalty = max_leading_letter_penalty;
124             outScore += penalty;
125 
126             // Apply unmatched penalty
127             int unmatched = (int)(str - strBegin) - nextMatch;
128             outScore += unmatched_letter_penalty * unmatched;
129 
130             // Apply ordering bonuses
131             for (int i = 0; i < nextMatch; ++i) {
132                 uint8_t currIdx = matches[i];
133 
134                 if (i > 0) {
135                     uint8_t prevIdx = matches[i - 1];
136 
137                     // Sequential
138                     if (currIdx == (prevIdx + 1))
139                         outScore += sequential_bonus;
140                 }
141 
142                 // Check for bonuses based on neighbor character value
143                 if (currIdx > 0) {
144                     // Camel case
145                     char neighbor = strBegin[currIdx - 1];
146                     char curr = strBegin[currIdx];
147                     if (::islower(neighbor) && ::isupper(curr))
148                         outScore += camel_bonus;
149 
150                     // Separator
151                     bool neighborSeparator = neighbor == '_' || neighbor == ' ';
152                     if (neighborSeparator)
153                         outScore += separator_bonus;
154                 }
155                 else {
156                     // First letter
157                     outScore += first_letter_bonus;
158                 }
159             }
160         }
161 
162         // Return best result
163         if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
164             // Recursive score is better than "this"
165             memcpy(matches, bestRecursiveMatches, maxMatches);
166             outScore = bestRecursiveScore;
167             return true;
168         }
169         else if (matched) {
170             // "this" score is better than recursive
171             return true;
172         }
173         else {
174             // no match
175             return false;
176         }
177     }
178 } // namespace fts
179