1 // LICENSE
2 //
3 //   This software is dual-licensed to the public domain and under the following
4 //   license: you are granted a perpetual, irrevocable license to copy, modify,
5 //   publish, and distribute this file as you see fit.
6 //
7 // VERSION
8 //   0.2.0  (2017-02-18)  Scored matches perform exhaustive search for best score
9 //   0.1.0  (2016-03-28)  Initial release
10 //
11 // AUTHOR
12 //   Forrest Smith
13 //
14 // NOTES
15 //   Compiling
16 //     You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
17 //
18 //   fuzzy_match_simple(...)
19 //     Returns true if each character in pattern is found sequentially within str
20 //
21 //   fuzzy_match(...)
22 //     Returns true if pattern is found AND calculates a score.
23 //     Performs exhaustive search via recursion to find all possible matches and match with highest score.
24 //     Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
25 //     Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
26 //     Uses uint8_t for match indices. Therefore patterns are limited to max_matches characters.
27 //     Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
28 
29 
30 #ifndef FTS_FUZZY_MATCH_H
31 #define FTS_FUZZY_MATCH_H
32 
33 
34 #include <cstdint> // uint8_t
35 #include <ctype.h> // ::tolower, ::toupper
36 #include <cstring> // memcpy
37 
38 #include <cstdio>
39 
40 #include "../Utils/ASCIIFolding.hpp"
41 
42 // Public interface
43 namespace fts {
44 	using 						char_type 	= wchar_t;
45 	using 						pos_type  	= uint16_t;
46 	static constexpr pos_type 	stopper 	= pos_type(-1);
47 	static constexpr int 		max_matches = 255;
48 
49     static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore);
50     static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore, pos_type * matches);
51 }
52 
53 #ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
54 namespace fts {
55 
56     // Forward declarations for "private" implementation
57     namespace fuzzy_internal {
58         static bool fuzzy_match_recursive(const char_type * pattern, const char_type * str, int & outScore, const char_type * const strBegin,
59             pos_type const * srcMatches,  pos_type * newMatches, int nextMatch,
60             int recursionCount, const int recursionLimit);
61         static void copy_matches(pos_type * dst, pos_type const* src);
62     }
63 
64     // Public interface
fuzzy_match(char_type const * pattern,char_type const * str,int & outScore)65     [[maybe_unused]] static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore) {
66         pos_type matches[max_matches + 1]; // with the room for the stopper
67         matches[0] = stopper;
68         return fuzzy_match(pattern, str, outScore, matches);
69     }
70 
fuzzy_match(char_type const * pattern,char_type const * str,int & outScore,pos_type * matches)71     [[maybe_unused]] static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore, pos_type * matches) {
72         int recursionCount = 0;
73         static constexpr int recursionLimit = 10;
74         return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, 0, recursionCount, recursionLimit);
75     }
76 
77     // Private implementation
fuzzy_match_recursive(const char_type * pattern,const char_type * str,int & outScore,const char_type * const strBegin,pos_type const * srcMatches,pos_type * matches,int nextMatch,int recursionCount,const int recursionLimit)78     static bool fuzzy_internal::fuzzy_match_recursive(
79     	// Pattern to match over str.
80     	const char_type * 		pattern,
81     	// Text to match the pattern over.
82     	const char_type * 		str,
83     	// Score of the pattern matching str. Output variable.
84     	int & 					outScore,
85     	// The very start of str, for calculating indices of matches and for calculating matches from the start of the input string.
86         const char_type * const	strBegin,
87         // Matches when entering this function.
88         pos_type const * 		srcMatches,
89         // Output matches.
90         pos_type * 				matches,
91         // Number of matched characters stored in srcMatches when entering this function, also tracking the successive matches.
92         int 					nextMatch,
93         // Recursion count is input / output to track the maximum depth reached.
94         // Was given by reference &recursionCount, see discussion in https://github.com/forrestthewoods/lib_fts/issues/21
95 //        int & 					recursionCount,
96         int				        recursionCount,
97         const int				recursionLimit)
98     {
99         // Count recursions
100         if (++ recursionCount >= recursionLimit)
101             return false;
102 
103         // Detect end of strings
104         if (*pattern == '\0' || *str == '\0')
105             return false;
106 
107         // Recursion params
108         bool recursiveMatch = false;
109         pos_type bestRecursiveMatches[max_matches + 1]; // with the room for the stopper
110         int bestRecursiveScore = 0;
111 
112         // Loop through pattern and str looking for a match
113         bool first_match = true;
114         while (*pattern != '\0' && *str != '\0') {
115 
116         	int  num_matched  = std::towlower(*pattern) == std::towlower(*str) ? 1 : 0;
117             // bool folded_match = false;
118 
119             if (! num_matched) {
120                 wchar_t tmp[4];
121                 wchar_t *end = Slic3r::fold_to_ascii(*str, tmp);
122                 wchar_t *c = tmp;
123                 for (const wchar_t* d = pattern; c != end && *d != 0 && std::towlower(*c) == std::towlower(*d); ++c, ++d);
124                 if (c == end) {
125                     // folded_match = true;
126         			num_matched = end - tmp;
127         		}
128 	        }
129 
130             // Found match
131             if (num_matched) {
132 
133                 // Supplied matches buffer was too short
134                 if (nextMatch + num_matched > max_matches)
135                     return false;
136 
137                 // "Copy-on-Write" srcMatches into matches
138                 if (first_match && srcMatches) {
139                     memcpy(matches, srcMatches, sizeof(pos_type) * (nextMatch + 1)); // including the stopper
140                     first_match = false;
141                 }
142 
143                 // Recursive call that "skips" this match
144                 pos_type recursiveMatches[max_matches + 1]; // with the room for the stopper
145                 int recursiveScore;
146                 if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, nextMatch, recursionCount, recursionLimit)) {
147 
148                     // Pick best recursive score
149                     if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
150                     	copy_matches(bestRecursiveMatches, recursiveMatches);
151                 		bestRecursiveScore = recursiveScore;
152                     }
153                     recursiveMatch = true;
154                 }
155 
156                 // Advance
157                 matches[nextMatch++] = (pos_type)(str - strBegin);
158                 // Write a stopper sign.
159                 matches[nextMatch] = stopper;
160                 // Advance pattern by the number of matched characters (could be more if ASCII folding triggers in).
161                 pattern += num_matched;
162             }
163             ++str;
164         }
165 
166         // Determine if full pattern was matched
167         bool matched = *pattern == '\0';
168 
169         // Calculate score
170         if (matched) {
171             static constexpr int sequential_bonus = 15;            // bonus for adjacent matches
172             static constexpr int separator_bonus = 10/*30*/;             // bonus if match occurs after a separator
173             // static constexpr int camel_bonus = 30;                 // bonus if match is uppercase and prev is lower
174             static constexpr int first_letter_bonus = 15;          // bonus if the first letter is matched
175 
176             static constexpr int leading_letter_penalty = -1/*-5*/;      // penalty applied for every letter in str before the first match
177             static constexpr int max_leading_letter_penalty = -15; // maximum penalty for leading letters
178             static constexpr int unmatched_letter_penalty = -1;    // penalty for every letter that doesn't matter
179 
180             // Iterate str to end
181             while (*str != '\0')
182                 ++str;
183 
184             // Initialize score
185             outScore = 100;
186 
187             // Start of the first group that contains matches[0].
188             const char_type *group_start = strBegin + matches[0];
189             for (const char_type *c = group_start; c >= strBegin && *c != ':'; -- c)
190             	if (*c != ' ' && *c != '\t')
191             		group_start = c;
192 
193             // Apply leading letter penalty or bonus.
194             outScore += matches[0] == int(group_start - strBegin) ?
195             	first_letter_bonus :
196             	std::max((matches[0] - int(group_start - strBegin)) * leading_letter_penalty, max_leading_letter_penalty);
197 
198             // Apply unmatched letters after the end penalty
199 //            outScore += (int(str - group_start) - matches[nextMatch-1] + 1) * unmatched_letter_penalty;
200             // Apply unmatched penalty
201             outScore += (int(str - group_start) - nextMatch) * unmatched_letter_penalty;
202 
203             // Apply ordering bonuses
204             int sequential_state = sequential_bonus;
205             for (int i = 0; i < nextMatch; ++i) {
206                 pos_type currIdx = matches[i];
207 
208                 // Check for bonuses based on neighbor character value
209                 if (currIdx > 0) {
210                     if (i > 0 && currIdx == matches[i - 1] + 1) {
211 	                    // Sequential
212                         outScore += sequential_state;
213                         // Exponential grow of the sequential bonus.
214                     	sequential_state = std::min(5 * sequential_bonus, sequential_state + sequential_state / 3);
215                     } else {
216                     	// Reset the sequential bonus exponential grow.
217                     	sequential_state = sequential_bonus;
218                     }
219 					char_type prev = strBegin[currIdx - 1];
220 /*
221                     // Camel case
222                     if (std::islower(prev) && std::isupper(strBegin[currIdx]))
223                         outScore += camel_bonus;
224 */
225                     // Separator
226                     if (prev == '_' || prev == ' ')
227                         outScore += separator_bonus;
228                 }
229             }
230         }
231 
232         // Return best result
233         if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
234             // Recursive score is better than "this"
235             copy_matches(matches, bestRecursiveMatches);
236             outScore = bestRecursiveScore;
237             return true;
238         }
239         else if (matched) {
240             // "this" score is better than recursive
241             return true;
242         }
243         else {
244             // no match
245             return false;
246         }
247     }
248 
249     // Copy matches up to a stopper.
copy_matches(pos_type * dst,pos_type const * src)250     static void fuzzy_internal::copy_matches(pos_type * dst, pos_type const* src)
251     {
252         while (*src != stopper)
253             *dst++ = *src++;
254         *dst = stopper;
255     }
256 
257 } // namespace fts
258 
259 #endif // FTS_FUZZY_MATCH_IMPLEMENTATION
260 
261 #endif // FTS_FUZZY_MATCH_H
262