1 #ifndef GAME_MWDIALOGUE_KEYWORDSEARCH_H
2 #define GAME_MWDIALOGUE_KEYWORDSEARCH_H
3 
4 #include <map>
5 #include <cctype>
6 #include <stdexcept>
7 #include <vector>
8 #include <algorithm>    // std::reverse
9 
10 #include <components/misc/stringops.hpp>
11 
12 namespace MWDialogue
13 {
14 
15 template <typename string_t, typename value_t>
16 class KeywordSearch
17 {
18 public:
19 
20     typedef typename string_t::const_iterator Point;
21 
22     struct Match
23     {
24         Point mBeg;
25         Point mEnd;
26         value_t mValue;
27     };
28 
seed(string_t keyword,value_t value)29     void seed (string_t keyword, value_t value)
30     {
31         if (keyword.empty())
32             return;
33         seed_impl  (/*std::move*/ (keyword), /*std::move*/ (value), 0, mRoot);
34     }
35 
clear()36     void clear ()
37     {
38         mRoot.mChildren.clear ();
39         mRoot.mKeyword.clear ();
40     }
41 
containsKeyword(string_t keyword,value_t & value)42     bool containsKeyword (string_t keyword, value_t& value)
43     {
44         typename Entry::childen_t::iterator current;
45         typename Entry::childen_t::iterator next;
46 
47         current = mRoot.mChildren.find (Misc::StringUtils::toLower (*keyword.begin()));
48         if (current == mRoot.mChildren.end())
49             return false;
50         else if (current->second.mKeyword.size() && Misc::StringUtils::ciEqual(current->second.mKeyword, keyword))
51         {
52             value = current->second.mValue;
53             return true;
54         }
55 
56         for (Point i = ++keyword.begin(); i != keyword.end(); ++i)
57         {
58             next = current->second.mChildren.find(Misc::StringUtils::toLower (*i));
59             if (next == current->second.mChildren.end())
60                 return false;
61             if (Misc::StringUtils::ciEqual(next->second.mKeyword, keyword))
62             {
63                 value = next->second.mValue;
64                 return true;
65             }
66             current = next;
67         }
68         return false;
69     }
70 
sortMatches(const Match & left,const Match & right)71     static bool sortMatches(const Match& left, const Match& right)
72     {
73         return left.mBeg < right.mBeg;
74     }
75 
highlightKeywords(Point beg,Point end,std::vector<Match> & out)76     void highlightKeywords (Point beg, Point end, std::vector<Match>& out)
77     {
78         std::vector<Match> matches;
79         for (Point i = beg; i != end; ++i)
80         {
81             // check if previous character marked start of new word
82             if (i != beg)
83             {
84                 Point prev = i;
85                 --prev;
86                 if(isalpha(*prev))
87                     continue;
88             }
89 
90 
91             // check first character
92             typename Entry::childen_t::iterator candidate = mRoot.mChildren.find (Misc::StringUtils::toLower (*i));
93 
94             // no match, on to next character
95             if (candidate == mRoot.mChildren.end ())
96                 continue;
97 
98             // see how far the match goes
99             Point j = i;
100 
101             // some keywords might be longer variations of other keywords, so we definitely need a list of candidates
102             // the first element in the pair is length of the match, i.e. depth from the first character on
103             std::vector< typename std::pair<int, typename Entry::childen_t::iterator> > candidates;
104 
105             while ((j + 1) != end)
106             {
107                 typename Entry::childen_t::iterator next = candidate->second.mChildren.find (Misc::StringUtils::toLower (*++j));
108 
109                 if (next == candidate->second.mChildren.end ())
110                 {
111                     if (candidate->second.mKeyword.size() > 0)
112                         candidates.push_back(std::make_pair((j-i), candidate));
113                     break;
114                 }
115 
116                 candidate = next;
117 
118                 if (candidate->second.mKeyword.size() > 0)
119                     candidates.push_back(std::make_pair((j-i), candidate));
120             }
121 
122             if (candidates.empty())
123                 continue; // didn't match enough to disambiguate, on to next character
124 
125             // shorter candidates will be added to the vector first. however, we want to check against longer candidates first
126             std::reverse(candidates.begin(), candidates.end());
127 
128             for (typename std::vector< std::pair<int, typename Entry::childen_t::iterator> >::iterator it = candidates.begin();
129                  it != candidates.end(); ++it)
130             {
131                 candidate = it->second;
132                 // try to match the rest of the keyword
133                 Point k = i + it->first;
134                 typename string_t::const_iterator t = candidate->second.mKeyword.begin () + (k - i);
135 
136 
137                 while (k != end && t != candidate->second.mKeyword.end ())
138                 {
139                     if (Misc::StringUtils::toLower (*k) != Misc::StringUtils::toLower (*t))
140                         break;
141 
142                     ++k, ++t;
143                 }
144 
145                 // didn't match full keyword, try the next candidate
146                 if (t != candidate->second.mKeyword.end ())
147                     continue;
148 
149                 // found a keyword, but there might still be longer keywords that start somewhere _within_ this keyword
150                 // we will resolve these overlapping keywords later, choosing the longest one in case of conflict
151                 Match match;
152                 match.mValue = candidate->second.mValue;
153                 match.mBeg = i;
154                 match.mEnd = k;
155                 matches.push_back(match);
156                 break;
157             }
158         }
159 
160         // resolve overlapping keywords
161         while (!matches.empty())
162         {
163             int longestKeywordSize = 0;
164             typename std::vector<Match>::iterator longestKeyword = matches.begin();
165             for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end(); ++it)
166             {
167                 int size = it->mEnd - it->mBeg;
168                 if (size > longestKeywordSize)
169                 {
170                     longestKeywordSize = size;
171                     longestKeyword = it;
172                 }
173 
174                 typename std::vector<Match>::iterator next = it;
175                 ++next;
176 
177                 if (next == matches.end())
178                     break;
179 
180                 if (it->mEnd <= next->mBeg)
181                 {
182                     break; // no overlap
183                 }
184             }
185 
186             Match keyword = *longestKeyword;
187             matches.erase(longestKeyword);
188             out.push_back(keyword);
189             // erase anything that overlaps with the keyword we just added to the output
190             for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end();)
191             {
192                 if (it->mBeg < keyword.mEnd && it->mEnd > keyword.mBeg)
193                     it = matches.erase(it);
194                 else
195                     ++it;
196             }
197         }
198 
199         std::sort(out.begin(), out.end(), sortMatches);
200     }
201 
202 private:
203 
204     struct Entry
205     {
206         typedef std::map <wchar_t, Entry> childen_t;
207 
208         string_t mKeyword;
209         value_t mValue;
210         childen_t mChildren;
211     };
212 
seed_impl(string_t keyword,value_t value,size_t depth,Entry & entry)213     void seed_impl (string_t keyword, value_t value, size_t depth, Entry  & entry)
214     {
215         int ch = Misc::StringUtils::toLower (keyword.at (depth));
216 
217         typename Entry::childen_t::iterator j = entry.mChildren.find (ch);
218 
219         if (j == entry.mChildren.end ())
220         {
221             entry.mChildren [ch].mValue = /*std::move*/ (value);
222             entry.mChildren [ch].mKeyword = /*std::move*/ (keyword);
223         }
224         else
225         {
226             if (j->second.mKeyword.size () > 0)
227             {
228                 if (keyword == j->second.mKeyword)
229                     throw std::runtime_error ("duplicate keyword inserted");
230 
231                 value_t pushValue = /*std::move*/ (j->second.mValue);
232                 string_t pushKeyword = /*std::move*/ (j->second.mKeyword);
233 
234                 if (depth >= pushKeyword.size ())
235                     throw std::runtime_error ("unexpected");
236 
237                 if (depth+1 < pushKeyword.size())
238                 {
239                     seed_impl (/*std::move*/ (pushKeyword), /*std::move*/ (pushValue), depth+1, j->second);
240                     j->second.mKeyword.clear ();
241                 }
242             }
243             if (depth+1 == keyword.size())
244                 j->second.mKeyword = value;
245             else // depth+1 < keyword.size()
246                 seed_impl (/*std::move*/ (keyword), /*std::move*/ (value), depth+1, j->second);
247         }
248 
249     }
250 
251     Entry mRoot;
252 };
253 
254 }
255 
256 #endif
257