1 // LICENSE 2 // 3 // This software is dual-licensed to the public domain and under the following 4 // license: you are granted a perpetual, irrevocable license to copy, modify, 5 // publish, and distribute this file as you see fit. 6 7 #include "config.h" 8 9 #include "fts_fuzzy_match.hh" 10 11 namespace fts { 12 13 // Forward declarations for "private" implementation 14 namespace fuzzy_internal { 15 static bool fuzzy_match_recursive(const char * pattern, const char * str, int & outScore, const char * strBegin, 16 uint8_t const * srcMatches, uint8_t * newMatches, int maxMatches, int nextMatch, 17 int & recursionCount, int recursionLimit); 18 } 19 20 // Public interface fuzzy_match_simple(char const * pattern,char const * str)21 bool fuzzy_match_simple(char const * pattern, char const * str) { 22 while (*pattern != '\0' && *str != '\0') { 23 if (tolower(*pattern) == tolower(*str)) 24 ++pattern; 25 ++str; 26 } 27 28 return *pattern == '\0' ? true : false; 29 } 30 fuzzy_match(char const * pattern,char const * str,int & outScore)31 bool fuzzy_match(char const * pattern, char const * str, int & outScore) { 32 uint8_t matches[256]; 33 return fuzzy_match(pattern, str, outScore, matches, sizeof(matches)); 34 } 35 fuzzy_match(char const * pattern,char const * str,int & outScore,uint8_t * matches,int maxMatches)36 bool fuzzy_match(char const * pattern, char const * str, int & outScore, uint8_t * matches, int maxMatches) { 37 int recursionCount = 0; 38 int recursionLimit = 10; 39 40 return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit); 41 } 42 43 // Private implementation fuzzy_match_recursive(const char * pattern,const char * str,int & outScore,const char * strBegin,uint8_t const * srcMatches,uint8_t * matches,int maxMatches,int nextMatch,int & recursionCount,int recursionLimit)44 static bool fuzzy_internal::fuzzy_match_recursive(const char * pattern, const char * str, int & outScore, 45 const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches, 46 int nextMatch, int & recursionCount, int recursionLimit) 47 { 48 // Count recursions 49 ++recursionCount; 50 if (recursionCount >= recursionLimit) 51 return false; 52 53 // Detect end of strings 54 if (*pattern == '\0' || *str == '\0') 55 return false; 56 57 // Recursion params 58 bool recursiveMatch = false; 59 uint8_t bestRecursiveMatches[256]; 60 int bestRecursiveScore = 0; 61 62 // Loop through pattern and str looking for a match 63 bool first_match = true; 64 while (*pattern != '\0' && *str != '\0') { 65 66 // Found match 67 if (tolower(*pattern) == tolower(*str)) { 68 69 // Supplied matches buffer was too short 70 if (nextMatch >= maxMatches) 71 return false; 72 73 // "Copy-on-Write" srcMatches into matches 74 if (first_match && srcMatches) { 75 memcpy(matches, srcMatches, nextMatch); 76 first_match = false; 77 } 78 79 // Recursive call that "skips" this match 80 uint8_t recursiveMatches[256]; 81 int recursiveScore; 82 if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) { 83 84 // Pick best recursive score 85 if (!recursiveMatch || recursiveScore > bestRecursiveScore) { 86 memcpy(bestRecursiveMatches, recursiveMatches, 256); 87 bestRecursiveScore = recursiveScore; 88 } 89 recursiveMatch = true; 90 } 91 92 // Advance 93 matches[nextMatch++] = (uint8_t)(str - strBegin); 94 ++pattern; 95 } 96 ++str; 97 } 98 99 // Determine if full pattern was matched 100 bool matched = *pattern == '\0' ? true : false; 101 102 // Calculate score 103 if (matched) { 104 const int sequential_bonus = 15; // bonus for adjacent matches 105 const int separator_bonus = 30; // bonus if match occurs after a separator 106 const int camel_bonus = 30; // bonus if match is uppercase and prev is lower 107 const int first_letter_bonus = 15; // bonus if the first letter is matched 108 109 const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match 110 const int max_leading_letter_penalty = -15; // maximum penalty for leading letters 111 const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter 112 113 // Iterate str to end 114 while (*str != '\0') 115 ++str; 116 117 // Initialize score 118 outScore = 100; 119 120 // Apply leading letter penalty 121 int penalty = leading_letter_penalty * matches[0]; 122 if (penalty < max_leading_letter_penalty) 123 penalty = max_leading_letter_penalty; 124 outScore += penalty; 125 126 // Apply unmatched penalty 127 int unmatched = (int)(str - strBegin) - nextMatch; 128 outScore += unmatched_letter_penalty * unmatched; 129 130 // Apply ordering bonuses 131 for (int i = 0; i < nextMatch; ++i) { 132 uint8_t currIdx = matches[i]; 133 134 if (i > 0) { 135 uint8_t prevIdx = matches[i - 1]; 136 137 // Sequential 138 if (currIdx == (prevIdx + 1)) 139 outScore += sequential_bonus; 140 } 141 142 // Check for bonuses based on neighbor character value 143 if (currIdx > 0) { 144 // Camel case 145 char neighbor = strBegin[currIdx - 1]; 146 char curr = strBegin[currIdx]; 147 if (::islower(neighbor) && ::isupper(curr)) 148 outScore += camel_bonus; 149 150 // Separator 151 bool neighborSeparator = neighbor == '_' || neighbor == ' '; 152 if (neighborSeparator) 153 outScore += separator_bonus; 154 } 155 else { 156 // First letter 157 outScore += first_letter_bonus; 158 } 159 } 160 } 161 162 // Return best result 163 if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) { 164 // Recursive score is better than "this" 165 memcpy(matches, bestRecursiveMatches, maxMatches); 166 outScore = bestRecursiveScore; 167 return true; 168 } 169 else if (matched) { 170 // "this" score is better than recursive 171 return true; 172 } 173 else { 174 // no match 175 return false; 176 } 177 } 178 } // namespace fts 179