1 // SciTE - Scintilla based Text Editor
2 /** @file StringList.cxx
3 ** Implementation of class holding a list of strings.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <cstdlib>
9 #include <cassert>
10 #include <cstring>
11
12 #include <string>
13 #include <vector>
14 #include <map>
15 #include <set>
16 #include <algorithm>
17 #include <chrono>
18
19 #include "GUI.h"
20 #include "StringList.h"
21 #include "StringHelpers.h"
22
CompareNCaseInsensitive(const char * a,const char * b,size_t len)23 static int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept {
24 while (*a && *b && len) {
25 if (*a != *b) {
26 const char upperA = MakeUpperCase(*a);
27 const char upperB = MakeUpperCase(*b);
28 if (upperA != upperB)
29 return upperA - upperB;
30 }
31 a++;
32 b++;
33 len--;
34 }
35 if (len == 0)
36 return 0;
37 else
38 // Either *a or *b is nul
39 return *a - *b;
40 }
41
42 /**
43 * Creates an array that points into each word in the string and puts \0 terminators
44 * after each word.
45 */
ArrayFromStringList(char * stringList,bool onlyLineEnds=false)46 static std::vector<char *> ArrayFromStringList(char *stringList, bool onlyLineEnds = false) {
47 int prev = '\n';
48 int words = 0;
49 // For rapid determination of whether a character is a separator, build
50 // a look up table.
51 bool wordSeparator[256] = {};
52 wordSeparator[static_cast<unsigned int>('\r')] = true;
53 wordSeparator[static_cast<unsigned int>('\n')] = true;
54 if (!onlyLineEnds) {
55 wordSeparator[static_cast<unsigned int>(' ')] = true;
56 wordSeparator[static_cast<unsigned int>('\t')] = true;
57 }
58 for (int j = 0; stringList[j]; j++) {
59 const int curr = static_cast<unsigned char>(stringList[j]);
60 if (!wordSeparator[curr] && wordSeparator[prev])
61 words++;
62 prev = curr;
63 }
64 std::vector<char *> keywords;
65 const size_t slen = strlen(stringList);
66 if (words) {
67 prev = '\0';
68 for (size_t k = 0; k < slen; k++) {
69 if (!wordSeparator[static_cast<unsigned char>(stringList[k])]) {
70 if (!prev) {
71 keywords.push_back(&stringList[k]);
72 }
73 } else {
74 stringList[k] = '\0';
75 }
76 prev = stringList[k];
77 }
78 }
79 return keywords;
80 }
81
SetFromListText()82 void StringList::SetFromListText() {
83 sorted = false;
84 sortedNoCase = false;
85 words = ArrayFromStringList(&listText[0], onlyLineEnds);
86 wordsNoCase = words;
87 }
88
CmpString(const char * a,const char * b)89 static bool CmpString(const char *a, const char *b) noexcept {
90 return strcmp(a, b) < 0;
91 }
92
CmpStringNoCase(const char * a,const char * b)93 static bool CmpStringNoCase(const char *a, const char *b) noexcept {
94 return CompareNoCase(a, b) < 0;
95 }
96
SortIfNeeded(bool ignoreCase)97 void StringList::SortIfNeeded(bool ignoreCase) {
98 // In both cases, the final empty sentinel element is not sorted.
99 if (ignoreCase) {
100 if (!sortedNoCase) {
101 sortedNoCase = true;
102 std::sort(wordsNoCase.begin(), wordsNoCase.end(), CmpStringNoCase);
103 }
104 } else {
105 if (!sorted) {
106 sorted = true;
107 std::sort(words.begin(), words.end(), CmpString);
108 }
109 }
110 }
111
Clear()112 void StringList::Clear() noexcept {
113 words.clear();
114 wordsNoCase.clear();
115 listText.clear();
116 sorted = false;
117 sortedNoCase = false;
118 }
119
Set(const char * s)120 void StringList::Set(const char *s) {
121 listText.assign(s, s+strlen(s)+1);
122 SetFromListText();
123 }
124
Set(const std::vector<char> & data)125 void StringList::Set(const std::vector<char> &data) {
126 listText.assign(data.begin(), data.end());
127 listText.push_back('\0');
128 SetFromListText();
129 }
130
131 namespace {
132
133 // Functors used to find elements given a prefix
134
135 struct CompareString {
136 size_t searchLen;
CompareString__anonc7f2721d0111::CompareString137 explicit CompareString(size_t searchLen_) noexcept : searchLen(searchLen_) {}
operator ()__anonc7f2721d0111::CompareString138 bool operator()(const char *a, const char *b) const noexcept {
139 return strncmp(a, b, searchLen) < 0;
140 }
141 };
142
143 struct CompareStringInsensitive {
144 size_t searchLen;
CompareStringInsensitive__anonc7f2721d0111::CompareStringInsensitive145 explicit CompareStringInsensitive(size_t searchLen_) noexcept : searchLen(searchLen_) {}
operator ()__anonc7f2721d0111::CompareStringInsensitive146 bool operator()(const char *a, const char *b) const noexcept {
147 return CompareNCaseInsensitive(a, b, searchLen) < 0;
148 }
149 };
150
151 template<typename Compare>
GetMatch(std::vector<char * >::iterator start,std::vector<char * >::iterator end,const char * wordStart,const std::string & wordCharacters,int wordIndex,Compare comp)152 std::string GetMatch(std::vector<char *>::iterator start, std::vector<char *>::iterator end,
153 const char *wordStart, const std::string &wordCharacters, int wordIndex, Compare comp) {
154 std::vector<char *>::iterator elem = std::lower_bound(start, end, wordStart, comp);
155 if (!comp(wordStart, *elem) && !comp(*elem, wordStart)) {
156 // Found a matching element, now move forward wordIndex matching elements
157 for (; elem < end; ++elem) {
158 const char *word = *elem;
159 if (!word[comp.searchLen] || !Contains(wordCharacters, word[comp.searchLen])) {
160 if (wordIndex <= 0) {
161 return std::string(word);
162 }
163 wordIndex--;
164 }
165 }
166 }
167 return std::string();
168 }
169
170 }
171
172 /**
173 * Returns an element (complete) of the StringList array which has
174 * the same beginning as the passed string.
175 * The length of the word to compare is passed too.
176 * Letter case can be ignored or preserved.
177 */
GetNearestWord(const char * wordStart,size_t searchLen,bool ignoreCase,const std::string & wordCharacters,int wordIndex)178 std::string StringList::GetNearestWord(const char *wordStart, size_t searchLen, bool ignoreCase, const std::string &wordCharacters, int wordIndex) {
179 if (words.empty())
180 return std::string();
181 SortIfNeeded(ignoreCase);
182 if (ignoreCase) {
183 return GetMatch(wordsNoCase.begin(), wordsNoCase.end(), wordStart, wordCharacters, wordIndex, CompareStringInsensitive(searchLen));
184 } else { // preserve the letter case
185 return GetMatch(words.begin(), words.end(), wordStart, wordCharacters, wordIndex, CompareString(searchLen));
186 }
187 }
188
189 /**
190 * Find the length of a 'word' which is actually an identifier in a string
191 * which looks like "identifier(..." or "identifier" and where
192 * there may be extra spaces after the identifier that should not be
193 * counted in the length.
194 */
LengthWord(const char * word,char otherSeparator)195 static size_t LengthWord(const char *word, char otherSeparator) noexcept {
196 const char *endWord = nullptr;
197 // Find an otherSeparator
198 if (otherSeparator)
199 endWord = strchr(word, otherSeparator);
200 // Find a '('. If that fails go to the end of the string.
201 if (!endWord)
202 endWord = strchr(word, '(');
203 if (!endWord)
204 endWord = word + strlen(word);
205 assert(endWord);
206 // Last case always succeeds so endWord != 0
207
208 // Drop any space characters.
209 if (endWord > word) {
210 endWord--; // Back from the '(', otherSeparator, or '\0'
211 // Move backwards over any spaces
212 while ((endWord > word) && (IsASpace(*endWord))) {
213 endWord--;
214 }
215 }
216 return endWord - word + 1;
217 }
218
219 template<typename Compare>
GetMatches(std::vector<char * >::iterator start,std::vector<char * >::iterator end,const char * wordStart,char otherSeparator,bool exactLen,Compare comp)220 static std::string GetMatches(std::vector<char *>::iterator start, std::vector<char *>::iterator end, const char *wordStart, char otherSeparator, bool exactLen, Compare comp) {
221 std::string wordList;
222 const size_t wordStartLength = LengthWord(wordStart, otherSeparator);
223 std::vector<char *>::iterator elem = std::lower_bound(start, end, wordStart, comp);
224 // Found a matching element, now accumulate all matches
225 for (; elem < end; ++elem) {
226 if (comp(wordStart, *elem) || comp(*elem, wordStart))
227 break; // Not a match so stop
228 // length of the word part (before the '(' brace) of the api array element
229 const size_t wordlen = LengthWord(*elem, otherSeparator);
230 if (!exactLen || (wordlen == wordStartLength)) {
231 if (wordList.length() > 0)
232 wordList.append(" ", 1);
233 wordList.append(*elem, wordlen);
234 }
235 }
236 return wordList;
237 }
238
239 /**
240 * Returns elements (first words of them) of the StringList array which have
241 * the same beginning as the passed string.
242 * The length of the word to compare is passed too.
243 * Letter case can be ignored or preserved (default).
244 * If there are more words meeting the condition they are returned all of
245 * them in the ascending order separated with spaces.
246 */
GetNearestWords(const char * wordStart,size_t searchLen,bool ignoreCase,char otherSeparator,bool exactLen)247 std::string StringList::GetNearestWords(
248 const char *wordStart,
249 size_t searchLen,
250 bool ignoreCase,
251 char otherSeparator /*= '\0'*/,
252 bool exactLen /*=false*/) {
253
254 if (words.empty())
255 return std::string();
256 SortIfNeeded(ignoreCase);
257 if (ignoreCase) {
258 return GetMatches(wordsNoCase.begin(), wordsNoCase.end(), wordStart, otherSeparator, exactLen, CompareStringInsensitive(searchLen));
259 } else {
260 // Preserve the letter case
261 return GetMatches(words.begin(), words.end(), wordStart, otherSeparator, exactLen, CompareString(searchLen));
262 }
263 }
264