1 // LICENSE 2 // 3 // This software is dual-licensed to the public domain and under the following 4 // license: you are granted a perpetual, irrevocable license to copy, modify, 5 // publish, and distribute this file as you see fit. 6 // 7 // VERSION 8 // 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score 9 // 0.1.0 (2016-03-28) Initial release 10 // 11 // AUTHOR 12 // Forrest Smith 13 // 14 // NOTES 15 // Compiling 16 // You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation. 17 // 18 // fuzzy_match_simple(...) 19 // Returns true if each character in pattern is found sequentially within str 20 // 21 // fuzzy_match(...) 22 // Returns true if pattern is found AND calculates a score. 23 // Performs exhaustive search via recursion to find all possible matches and match with highest score. 24 // Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern. 25 // Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") 26 // Uses uint8_t for match indices. Therefore patterns are limited to max_matches characters. 27 // Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning. 28 29 30 #ifndef FTS_FUZZY_MATCH_H 31 #define FTS_FUZZY_MATCH_H 32 33 34 #include <cstdint> // uint8_t 35 #include <ctype.h> // ::tolower, ::toupper 36 #include <cstring> // memcpy 37 38 #include <cstdio> 39 40 #include "../Utils/ASCIIFolding.hpp" 41 42 // Public interface 43 namespace fts { 44 using char_type = wchar_t; 45 using pos_type = uint16_t; 46 static constexpr pos_type stopper = pos_type(-1); 47 static constexpr int max_matches = 255; 48 49 static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore); 50 static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore, pos_type * matches); 51 } 52 53 #ifdef FTS_FUZZY_MATCH_IMPLEMENTATION 54 namespace fts { 55 56 // Forward declarations for "private" implementation 57 namespace fuzzy_internal { 58 static bool fuzzy_match_recursive(const char_type * pattern, const char_type * str, int & outScore, const char_type * const strBegin, 59 pos_type const * srcMatches, pos_type * newMatches, int nextMatch, 60 int recursionCount, const int recursionLimit); 61 static void copy_matches(pos_type * dst, pos_type const* src); 62 } 63 64 // Public interface fuzzy_match(char_type const * pattern,char_type const * str,int & outScore)65 [[maybe_unused]] static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore) { 66 pos_type matches[max_matches + 1]; // with the room for the stopper 67 matches[0] = stopper; 68 return fuzzy_match(pattern, str, outScore, matches); 69 } 70 fuzzy_match(char_type const * pattern,char_type const * str,int & outScore,pos_type * matches)71 [[maybe_unused]] static bool fuzzy_match(char_type const * pattern, char_type const * str, int & outScore, pos_type * matches) { 72 int recursionCount = 0; 73 static constexpr int recursionLimit = 10; 74 return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, 0, recursionCount, recursionLimit); 75 } 76 77 // Private implementation fuzzy_match_recursive(const char_type * pattern,const char_type * str,int & outScore,const char_type * const strBegin,pos_type const * srcMatches,pos_type * matches,int nextMatch,int recursionCount,const int recursionLimit)78 static bool fuzzy_internal::fuzzy_match_recursive( 79 // Pattern to match over str. 80 const char_type * pattern, 81 // Text to match the pattern over. 82 const char_type * str, 83 // Score of the pattern matching str. Output variable. 84 int & outScore, 85 // The very start of str, for calculating indices of matches and for calculating matches from the start of the input string. 86 const char_type * const strBegin, 87 // Matches when entering this function. 88 pos_type const * srcMatches, 89 // Output matches. 90 pos_type * matches, 91 // Number of matched characters stored in srcMatches when entering this function, also tracking the successive matches. 92 int nextMatch, 93 // Recursion count is input / output to track the maximum depth reached. 94 // Was given by reference &recursionCount, see discussion in https://github.com/forrestthewoods/lib_fts/issues/21 95 // int & recursionCount, 96 int recursionCount, 97 const int recursionLimit) 98 { 99 // Count recursions 100 if (++ recursionCount >= recursionLimit) 101 return false; 102 103 // Detect end of strings 104 if (*pattern == '\0' || *str == '\0') 105 return false; 106 107 // Recursion params 108 bool recursiveMatch = false; 109 pos_type bestRecursiveMatches[max_matches + 1]; // with the room for the stopper 110 int bestRecursiveScore = 0; 111 112 // Loop through pattern and str looking for a match 113 bool first_match = true; 114 while (*pattern != '\0' && *str != '\0') { 115 116 int num_matched = std::towlower(*pattern) == std::towlower(*str) ? 1 : 0; 117 // bool folded_match = false; 118 119 if (! num_matched) { 120 wchar_t tmp[4]; 121 wchar_t *end = Slic3r::fold_to_ascii(*str, tmp); 122 wchar_t *c = tmp; 123 for (const wchar_t* d = pattern; c != end && *d != 0 && std::towlower(*c) == std::towlower(*d); ++c, ++d); 124 if (c == end) { 125 // folded_match = true; 126 num_matched = end - tmp; 127 } 128 } 129 130 // Found match 131 if (num_matched) { 132 133 // Supplied matches buffer was too short 134 if (nextMatch + num_matched > max_matches) 135 return false; 136 137 // "Copy-on-Write" srcMatches into matches 138 if (first_match && srcMatches) { 139 memcpy(matches, srcMatches, sizeof(pos_type) * (nextMatch + 1)); // including the stopper 140 first_match = false; 141 } 142 143 // Recursive call that "skips" this match 144 pos_type recursiveMatches[max_matches + 1]; // with the room for the stopper 145 int recursiveScore; 146 if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, nextMatch, recursionCount, recursionLimit)) { 147 148 // Pick best recursive score 149 if (!recursiveMatch || recursiveScore > bestRecursiveScore) { 150 copy_matches(bestRecursiveMatches, recursiveMatches); 151 bestRecursiveScore = recursiveScore; 152 } 153 recursiveMatch = true; 154 } 155 156 // Advance 157 matches[nextMatch++] = (pos_type)(str - strBegin); 158 // Write a stopper sign. 159 matches[nextMatch] = stopper; 160 // Advance pattern by the number of matched characters (could be more if ASCII folding triggers in). 161 pattern += num_matched; 162 } 163 ++str; 164 } 165 166 // Determine if full pattern was matched 167 bool matched = *pattern == '\0'; 168 169 // Calculate score 170 if (matched) { 171 static constexpr int sequential_bonus = 15; // bonus for adjacent matches 172 static constexpr int separator_bonus = 10/*30*/; // bonus if match occurs after a separator 173 // static constexpr int camel_bonus = 30; // bonus if match is uppercase and prev is lower 174 static constexpr int first_letter_bonus = 15; // bonus if the first letter is matched 175 176 static constexpr int leading_letter_penalty = -1/*-5*/; // penalty applied for every letter in str before the first match 177 static constexpr int max_leading_letter_penalty = -15; // maximum penalty for leading letters 178 static constexpr int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter 179 180 // Iterate str to end 181 while (*str != '\0') 182 ++str; 183 184 // Initialize score 185 outScore = 100; 186 187 // Start of the first group that contains matches[0]. 188 const char_type *group_start = strBegin + matches[0]; 189 for (const char_type *c = group_start; c >= strBegin && *c != ':'; -- c) 190 if (*c != ' ' && *c != '\t') 191 group_start = c; 192 193 // Apply leading letter penalty or bonus. 194 outScore += matches[0] == int(group_start - strBegin) ? 195 first_letter_bonus : 196 std::max((matches[0] - int(group_start - strBegin)) * leading_letter_penalty, max_leading_letter_penalty); 197 198 // Apply unmatched letters after the end penalty 199 // outScore += (int(str - group_start) - matches[nextMatch-1] + 1) * unmatched_letter_penalty; 200 // Apply unmatched penalty 201 outScore += (int(str - group_start) - nextMatch) * unmatched_letter_penalty; 202 203 // Apply ordering bonuses 204 int sequential_state = sequential_bonus; 205 for (int i = 0; i < nextMatch; ++i) { 206 pos_type currIdx = matches[i]; 207 208 // Check for bonuses based on neighbor character value 209 if (currIdx > 0) { 210 if (i > 0 && currIdx == matches[i - 1] + 1) { 211 // Sequential 212 outScore += sequential_state; 213 // Exponential grow of the sequential bonus. 214 sequential_state = std::min(5 * sequential_bonus, sequential_state + sequential_state / 3); 215 } else { 216 // Reset the sequential bonus exponential grow. 217 sequential_state = sequential_bonus; 218 } 219 char_type prev = strBegin[currIdx - 1]; 220 /* 221 // Camel case 222 if (std::islower(prev) && std::isupper(strBegin[currIdx])) 223 outScore += camel_bonus; 224 */ 225 // Separator 226 if (prev == '_' || prev == ' ') 227 outScore += separator_bonus; 228 } 229 } 230 } 231 232 // Return best result 233 if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) { 234 // Recursive score is better than "this" 235 copy_matches(matches, bestRecursiveMatches); 236 outScore = bestRecursiveScore; 237 return true; 238 } 239 else if (matched) { 240 // "this" score is better than recursive 241 return true; 242 } 243 else { 244 // no match 245 return false; 246 } 247 } 248 249 // Copy matches up to a stopper. copy_matches(pos_type * dst,pos_type const * src)250 static void fuzzy_internal::copy_matches(pos_type * dst, pos_type const* src) 251 { 252 while (*src != stopper) 253 *dst++ = *src++; 254 *dst = stopper; 255 } 256 257 } // namespace fts 258 259 #endif // FTS_FUZZY_MATCH_IMPLEMENTATION 260 261 #endif // FTS_FUZZY_MATCH_H 262