1 #ifndef GAME_MWDIALOGUE_KEYWORDSEARCH_H 2 #define GAME_MWDIALOGUE_KEYWORDSEARCH_H 3 4 #include <map> 5 #include <cctype> 6 #include <stdexcept> 7 #include <vector> 8 #include <algorithm> // std::reverse 9 10 #include <components/misc/stringops.hpp> 11 12 namespace MWDialogue 13 { 14 15 template <typename string_t, typename value_t> 16 class KeywordSearch 17 { 18 public: 19 20 typedef typename string_t::const_iterator Point; 21 22 struct Match 23 { 24 Point mBeg; 25 Point mEnd; 26 value_t mValue; 27 }; 28 seed(string_t keyword,value_t value)29 void seed (string_t keyword, value_t value) 30 { 31 if (keyword.empty()) 32 return; 33 seed_impl (/*std::move*/ (keyword), /*std::move*/ (value), 0, mRoot); 34 } 35 clear()36 void clear () 37 { 38 mRoot.mChildren.clear (); 39 mRoot.mKeyword.clear (); 40 } 41 containsKeyword(string_t keyword,value_t & value)42 bool containsKeyword (string_t keyword, value_t& value) 43 { 44 typename Entry::childen_t::iterator current; 45 typename Entry::childen_t::iterator next; 46 47 current = mRoot.mChildren.find (Misc::StringUtils::toLower (*keyword.begin())); 48 if (current == mRoot.mChildren.end()) 49 return false; 50 else if (current->second.mKeyword.size() && Misc::StringUtils::ciEqual(current->second.mKeyword, keyword)) 51 { 52 value = current->second.mValue; 53 return true; 54 } 55 56 for (Point i = ++keyword.begin(); i != keyword.end(); ++i) 57 { 58 next = current->second.mChildren.find(Misc::StringUtils::toLower (*i)); 59 if (next == current->second.mChildren.end()) 60 return false; 61 if (Misc::StringUtils::ciEqual(next->second.mKeyword, keyword)) 62 { 63 value = next->second.mValue; 64 return true; 65 } 66 current = next; 67 } 68 return false; 69 } 70 sortMatches(const Match & left,const Match & right)71 static bool sortMatches(const Match& left, const Match& right) 72 { 73 return left.mBeg < right.mBeg; 74 } 75 highlightKeywords(Point beg,Point end,std::vector<Match> & out)76 void highlightKeywords (Point beg, Point end, std::vector<Match>& out) 77 { 78 std::vector<Match> matches; 79 for (Point i = beg; i != end; ++i) 80 { 81 // check if previous character marked start of new word 82 if (i != beg) 83 { 84 Point prev = i; 85 --prev; 86 if(isalpha(*prev)) 87 continue; 88 } 89 90 91 // check first character 92 typename Entry::childen_t::iterator candidate = mRoot.mChildren.find (Misc::StringUtils::toLower (*i)); 93 94 // no match, on to next character 95 if (candidate == mRoot.mChildren.end ()) 96 continue; 97 98 // see how far the match goes 99 Point j = i; 100 101 // some keywords might be longer variations of other keywords, so we definitely need a list of candidates 102 // the first element in the pair is length of the match, i.e. depth from the first character on 103 std::vector< typename std::pair<int, typename Entry::childen_t::iterator> > candidates; 104 105 while ((j + 1) != end) 106 { 107 typename Entry::childen_t::iterator next = candidate->second.mChildren.find (Misc::StringUtils::toLower (*++j)); 108 109 if (next == candidate->second.mChildren.end ()) 110 { 111 if (candidate->second.mKeyword.size() > 0) 112 candidates.push_back(std::make_pair((j-i), candidate)); 113 break; 114 } 115 116 candidate = next; 117 118 if (candidate->second.mKeyword.size() > 0) 119 candidates.push_back(std::make_pair((j-i), candidate)); 120 } 121 122 if (candidates.empty()) 123 continue; // didn't match enough to disambiguate, on to next character 124 125 // shorter candidates will be added to the vector first. however, we want to check against longer candidates first 126 std::reverse(candidates.begin(), candidates.end()); 127 128 for (typename std::vector< std::pair<int, typename Entry::childen_t::iterator> >::iterator it = candidates.begin(); 129 it != candidates.end(); ++it) 130 { 131 candidate = it->second; 132 // try to match the rest of the keyword 133 Point k = i + it->first; 134 typename string_t::const_iterator t = candidate->second.mKeyword.begin () + (k - i); 135 136 137 while (k != end && t != candidate->second.mKeyword.end ()) 138 { 139 if (Misc::StringUtils::toLower (*k) != Misc::StringUtils::toLower (*t)) 140 break; 141 142 ++k, ++t; 143 } 144 145 // didn't match full keyword, try the next candidate 146 if (t != candidate->second.mKeyword.end ()) 147 continue; 148 149 // found a keyword, but there might still be longer keywords that start somewhere _within_ this keyword 150 // we will resolve these overlapping keywords later, choosing the longest one in case of conflict 151 Match match; 152 match.mValue = candidate->second.mValue; 153 match.mBeg = i; 154 match.mEnd = k; 155 matches.push_back(match); 156 break; 157 } 158 } 159 160 // resolve overlapping keywords 161 while (!matches.empty()) 162 { 163 int longestKeywordSize = 0; 164 typename std::vector<Match>::iterator longestKeyword = matches.begin(); 165 for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end(); ++it) 166 { 167 int size = it->mEnd - it->mBeg; 168 if (size > longestKeywordSize) 169 { 170 longestKeywordSize = size; 171 longestKeyword = it; 172 } 173 174 typename std::vector<Match>::iterator next = it; 175 ++next; 176 177 if (next == matches.end()) 178 break; 179 180 if (it->mEnd <= next->mBeg) 181 { 182 break; // no overlap 183 } 184 } 185 186 Match keyword = *longestKeyword; 187 matches.erase(longestKeyword); 188 out.push_back(keyword); 189 // erase anything that overlaps with the keyword we just added to the output 190 for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end();) 191 { 192 if (it->mBeg < keyword.mEnd && it->mEnd > keyword.mBeg) 193 it = matches.erase(it); 194 else 195 ++it; 196 } 197 } 198 199 std::sort(out.begin(), out.end(), sortMatches); 200 } 201 202 private: 203 204 struct Entry 205 { 206 typedef std::map <wchar_t, Entry> childen_t; 207 208 string_t mKeyword; 209 value_t mValue; 210 childen_t mChildren; 211 }; 212 seed_impl(string_t keyword,value_t value,size_t depth,Entry & entry)213 void seed_impl (string_t keyword, value_t value, size_t depth, Entry & entry) 214 { 215 int ch = Misc::StringUtils::toLower (keyword.at (depth)); 216 217 typename Entry::childen_t::iterator j = entry.mChildren.find (ch); 218 219 if (j == entry.mChildren.end ()) 220 { 221 entry.mChildren [ch].mValue = /*std::move*/ (value); 222 entry.mChildren [ch].mKeyword = /*std::move*/ (keyword); 223 } 224 else 225 { 226 if (j->second.mKeyword.size () > 0) 227 { 228 if (keyword == j->second.mKeyword) 229 throw std::runtime_error ("duplicate keyword inserted"); 230 231 value_t pushValue = /*std::move*/ (j->second.mValue); 232 string_t pushKeyword = /*std::move*/ (j->second.mKeyword); 233 234 if (depth >= pushKeyword.size ()) 235 throw std::runtime_error ("unexpected"); 236 237 if (depth+1 < pushKeyword.size()) 238 { 239 seed_impl (/*std::move*/ (pushKeyword), /*std::move*/ (pushValue), depth+1, j->second); 240 j->second.mKeyword.clear (); 241 } 242 } 243 if (depth+1 == keyword.size()) 244 j->second.mKeyword = value; 245 else // depth+1 < keyword.size() 246 seed_impl (/*std::move*/ (keyword), /*std::move*/ (value), depth+1, j->second); 247 } 248 249 } 250 251 Entry mRoot; 252 }; 253 254 } 255 256 #endif 257