1 // SciTE - Scintilla based Text Editor
2 /** @file StringList.cxx
3  ** Implementation of class holding a list of strings.
4  **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <cstdlib>
9 #include <cassert>
10 #include <cstring>
11 
12 #include <string>
13 #include <vector>
14 #include <map>
15 #include <set>
16 #include <algorithm>
17 #include <chrono>
18 
19 #include "GUI.h"
20 #include "StringList.h"
21 #include "StringHelpers.h"
22 
CompareNCaseInsensitive(const char * a,const char * b,size_t len)23 static int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept {
24 	while (*a && *b && len) {
25 		if (*a != *b) {
26 			const char upperA = MakeUpperCase(*a);
27 			const char upperB = MakeUpperCase(*b);
28 			if (upperA != upperB)
29 				return upperA - upperB;
30 		}
31 		a++;
32 		b++;
33 		len--;
34 	}
35 	if (len == 0)
36 		return 0;
37 	else
38 		// Either *a or *b is nul
39 		return *a - *b;
40 }
41 
42 /**
43  * Creates an array that points into each word in the string and puts \0 terminators
44  * after each word.
45  */
ArrayFromStringList(char * stringList,bool onlyLineEnds=false)46 static std::vector<char *> ArrayFromStringList(char *stringList, bool onlyLineEnds = false) {
47 	int prev = '\n';
48 	int words = 0;
49 	// For rapid determination of whether a character is a separator, build
50 	// a look up table.
51 	bool wordSeparator[256] = {};
52 	wordSeparator[static_cast<unsigned int>('\r')] = true;
53 	wordSeparator[static_cast<unsigned int>('\n')] = true;
54 	if (!onlyLineEnds) {
55 		wordSeparator[static_cast<unsigned int>(' ')] = true;
56 		wordSeparator[static_cast<unsigned int>('\t')] = true;
57 	}
58 	for (int j = 0; stringList[j]; j++) {
59 		const int curr = static_cast<unsigned char>(stringList[j]);
60 		if (!wordSeparator[curr] && wordSeparator[prev])
61 			words++;
62 		prev = curr;
63 	}
64 	std::vector<char *> keywords;
65 	const size_t slen = strlen(stringList);
66 	if (words) {
67 		prev = '\0';
68 		for (size_t k = 0; k < slen; k++) {
69 			if (!wordSeparator[static_cast<unsigned char>(stringList[k])]) {
70 				if (!prev) {
71 					keywords.push_back(&stringList[k]);
72 				}
73 			} else {
74 				stringList[k] = '\0';
75 			}
76 			prev = stringList[k];
77 		}
78 	}
79 	return keywords;
80 }
81 
SetFromListText()82 void StringList::SetFromListText() {
83 	sorted = false;
84 	sortedNoCase = false;
85 	words = ArrayFromStringList(&listText[0], onlyLineEnds);
86 	wordsNoCase = words;
87 }
88 
CmpString(const char * a,const char * b)89 static bool CmpString(const char *a, const char *b) noexcept {
90 	return strcmp(a, b) < 0;
91 }
92 
CmpStringNoCase(const char * a,const char * b)93 static bool CmpStringNoCase(const char *a, const char *b) noexcept {
94 	return CompareNoCase(a, b) < 0;
95 }
96 
SortIfNeeded(bool ignoreCase)97 void StringList::SortIfNeeded(bool ignoreCase) {
98 	// In both cases, the final empty sentinel element is not sorted.
99 	if (ignoreCase) {
100 		if (!sortedNoCase) {
101 			sortedNoCase = true;
102 			std::sort(wordsNoCase.begin(), wordsNoCase.end(), CmpStringNoCase);
103 		}
104 	} else {
105 		if (!sorted) {
106 			sorted = true;
107 			std::sort(words.begin(), words.end(), CmpString);
108 		}
109 	}
110 }
111 
Clear()112 void StringList::Clear() noexcept {
113 	words.clear();
114 	wordsNoCase.clear();
115 	listText.clear();
116 	sorted = false;
117 	sortedNoCase = false;
118 }
119 
Set(const char * s)120 void StringList::Set(const char *s) {
121 	listText.assign(s, s+strlen(s)+1);
122 	SetFromListText();
123 }
124 
Set(const std::vector<char> & data)125 void StringList::Set(const std::vector<char> &data) {
126 	listText.assign(data.begin(), data.end());
127 	listText.push_back('\0');
128 	SetFromListText();
129 }
130 
131 namespace {
132 
133 // Functors used to find elements given a prefix
134 
135 struct CompareString {
136 	size_t searchLen;
CompareString__anonc7f2721d0111::CompareString137 	explicit CompareString(size_t searchLen_) noexcept : searchLen(searchLen_) {}
operator ()__anonc7f2721d0111::CompareString138 	bool operator()(const char *a, const char *b) const noexcept {
139 		return strncmp(a, b, searchLen) < 0;
140 	}
141 };
142 
143 struct CompareStringInsensitive {
144 	size_t searchLen;
CompareStringInsensitive__anonc7f2721d0111::CompareStringInsensitive145 	explicit CompareStringInsensitive(size_t searchLen_) noexcept : searchLen(searchLen_) {}
operator ()__anonc7f2721d0111::CompareStringInsensitive146 	bool operator()(const char *a, const char *b) const noexcept {
147 		return CompareNCaseInsensitive(a, b, searchLen) < 0;
148 	}
149 };
150 
151 template<typename Compare>
GetMatch(std::vector<char * >::iterator start,std::vector<char * >::iterator end,const char * wordStart,const std::string & wordCharacters,int wordIndex,Compare comp)152 std::string GetMatch(std::vector<char *>::iterator start, std::vector<char *>::iterator end,
153 		     const char *wordStart, const std::string &wordCharacters, int wordIndex, Compare comp) {
154 	std::vector<char *>::iterator elem = std::lower_bound(start, end, wordStart, comp);
155 	if (!comp(wordStart, *elem) && !comp(*elem, wordStart)) {
156 		// Found a matching element, now move forward wordIndex matching elements
157 		for (; elem < end; ++elem) {
158 			const char *word = *elem;
159 			if (!word[comp.searchLen] || !Contains(wordCharacters, word[comp.searchLen])) {
160 				if (wordIndex <= 0) {
161 					return std::string(word);
162 				}
163 				wordIndex--;
164 			}
165 		}
166 	}
167 	return std::string();
168 }
169 
170 }
171 
172 /**
173  * Returns an element (complete) of the StringList array which has
174  * the same beginning as the passed string.
175  * The length of the word to compare is passed too.
176  * Letter case can be ignored or preserved.
177  */
GetNearestWord(const char * wordStart,size_t searchLen,bool ignoreCase,const std::string & wordCharacters,int wordIndex)178 std::string StringList::GetNearestWord(const char *wordStart, size_t searchLen, bool ignoreCase, const std::string &wordCharacters, int wordIndex) {
179 	if (words.empty())
180 		return std::string();
181 	SortIfNeeded(ignoreCase);
182 	if (ignoreCase) {
183 		return GetMatch(wordsNoCase.begin(), wordsNoCase.end(), wordStart, wordCharacters, wordIndex, CompareStringInsensitive(searchLen));
184 	} else { // preserve the letter case
185 		return GetMatch(words.begin(), words.end(), wordStart, wordCharacters, wordIndex, CompareString(searchLen));
186 	}
187 }
188 
189 /**
190  * Find the length of a 'word' which is actually an identifier in a string
191  * which looks like "identifier(..." or "identifier" and where
192  * there may be extra spaces after the identifier that should not be
193  * counted in the length.
194  */
LengthWord(const char * word,char otherSeparator)195 static size_t LengthWord(const char *word, char otherSeparator) noexcept {
196 	const char *endWord = nullptr;
197 	// Find an otherSeparator
198 	if (otherSeparator)
199 		endWord = strchr(word, otherSeparator);
200 	// Find a '('. If that fails go to the end of the string.
201 	if (!endWord)
202 		endWord = strchr(word, '(');
203 	if (!endWord)
204 		endWord = word + strlen(word);
205 	assert(endWord);
206 	// Last case always succeeds so endWord != 0
207 
208 	// Drop any space characters.
209 	if (endWord > word) {
210 		endWord--;	// Back from the '(', otherSeparator, or '\0'
211 		// Move backwards over any spaces
212 		while ((endWord > word) && (IsASpace(*endWord))) {
213 			endWord--;
214 		}
215 	}
216 	return endWord - word + 1;
217 }
218 
219 template<typename Compare>
GetMatches(std::vector<char * >::iterator start,std::vector<char * >::iterator end,const char * wordStart,char otherSeparator,bool exactLen,Compare comp)220 static std::string GetMatches(std::vector<char *>::iterator start, std::vector<char *>::iterator end, const char *wordStart, char otherSeparator, bool exactLen, Compare comp) {
221 	std::string wordList;
222 	const size_t wordStartLength = LengthWord(wordStart, otherSeparator);
223 	std::vector<char *>::iterator elem = std::lower_bound(start, end, wordStart, comp);
224 	// Found a matching element, now accumulate all matches
225 	for (; elem < end; ++elem) {
226 		if (comp(wordStart, *elem) || comp(*elem, wordStart))
227 			break;	// Not a match so stop
228 		// length of the word part (before the '(' brace) of the api array element
229 		const size_t wordlen = LengthWord(*elem, otherSeparator);
230 		if (!exactLen || (wordlen == wordStartLength)) {
231 			if (wordList.length() > 0)
232 				wordList.append(" ", 1);
233 			wordList.append(*elem, wordlen);
234 		}
235 	}
236 	return wordList;
237 }
238 
239 /**
240  * Returns elements (first words of them) of the StringList array which have
241  * the same beginning as the passed string.
242  * The length of the word to compare is passed too.
243  * Letter case can be ignored or preserved (default).
244  * If there are more words meeting the condition they are returned all of
245  * them in the ascending order separated with spaces.
246  */
GetNearestWords(const char * wordStart,size_t searchLen,bool ignoreCase,char otherSeparator,bool exactLen)247 std::string StringList::GetNearestWords(
248 	const char *wordStart,
249 	size_t searchLen,
250 	bool ignoreCase,
251 	char otherSeparator /*= '\0'*/,
252 	bool exactLen /*=false*/) {
253 
254 	if (words.empty())
255 		return std::string();
256 	SortIfNeeded(ignoreCase);
257 	if (ignoreCase) {
258 		return GetMatches(wordsNoCase.begin(), wordsNoCase.end(), wordStart, otherSeparator, exactLen, CompareStringInsensitive(searchLen));
259 	} else {
260 		// Preserve the letter case
261 		return GetMatches(words.begin(), words.end(), wordStart, otherSeparator, exactLen, CompareString(searchLen));
262 	}
263 }
264