1 // Scintilla source code edit control
2 /** @file KeyWords.cxx
3  ** Colourise for particular languages.
4  **/
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 
14 #include <algorithm>
15 
16 #include "WordList.h"
17 
18 #ifdef SCI_NAMESPACE
19 using namespace Scintilla;
20 #endif
21 
22 /**
23  * Creates an array that points into each word in the string and puts \0 terminators
24  * after each word.
25  */
ArrayFromWordList(char * wordlist,int * len,bool onlyLineEnds=false)26 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
27 	int prev = '\n';
28 	int words = 0;
29 	// For rapid determination of whether a character is a separator, build
30 	// a look up table.
31 	bool wordSeparator[256];
32 	for (int i=0; i<256; i++) {
33 		wordSeparator[i] = false;
34 	}
35 	wordSeparator['\r'] = true;
36 	wordSeparator['\n'] = true;
37 	if (!onlyLineEnds) {
38 		wordSeparator[' '] = true;
39 		wordSeparator['\t'] = true;
40 	}
41 	for (int j = 0; wordlist[j]; j++) {
42 		int curr = static_cast<unsigned char>(wordlist[j]);
43 		if (!wordSeparator[curr] && wordSeparator[prev])
44 			words++;
45 		prev = curr;
46 	}
47 	char **keywords = new char *[words + 1];
48 	if (keywords) {
49 		words = 0;
50 		prev = '\0';
51 		size_t slen = strlen(wordlist);
52 		for (size_t k = 0; k < slen; k++) {
53 			if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
54 				if (!prev) {
55 					keywords[words] = &wordlist[k];
56 					words++;
57 				}
58 			} else {
59 				wordlist[k] = '\0';
60 			}
61 			prev = wordlist[k];
62 		}
63 		keywords[words] = &wordlist[slen];
64 		*len = words;
65 	} else {
66 		*len = 0;
67 	}
68 	return keywords;
69 }
70 
operator !=(const WordList & other) const71 bool WordList::operator!=(const WordList &other) const {
72 	if (len != other.len)
73 		return true;
74 	for (int i=0; i<len; i++) {
75 		if (strcmp(words[i], other.words[i]) != 0)
76 			return true;
77 	}
78 	return false;
79 }
80 
Clear()81 void WordList::Clear() {
82 	if (words) {
83 		delete []list;
84 		delete []words;
85 	}
86 	words = 0;
87 	list = 0;
88 	len = 0;
89 }
90 
91 #ifdef _MSC_VER
92 
cmpWords(const char * a,const char * b)93 static bool cmpWords(const char *a, const char *b) {
94 	return strcmp(a, b) == -1;
95 }
96 
97 #else
98 
cmpWords(const void * a,const void * b)99 static int cmpWords(const void *a, const void *b) {
100 	return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
101 }
102 
SortWordList(char ** words,unsigned int len)103 static void SortWordList(char **words, unsigned int len) {
104 	qsort(reinterpret_cast<void *>(words), len, sizeof(*words), cmpWords);
105 }
106 
107 #endif
108 
Set(const char * s)109 void WordList::Set(const char *s) {
110 	Clear();
111 	list = new char[strlen(s) + 1];
112 	strcpy(list, s);
113 	words = ArrayFromWordList(list, &len, onlyLineEnds);
114 #ifdef _MSC_VER
115 	std::sort(words, words + len, cmpWords);
116 #else
117 	SortWordList(words, len);
118 #endif
119 	for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
120 		starts[k] = -1;
121 	for (int l = len - 1; l >= 0; l--) {
122 		unsigned char indexChar = words[l][0];
123 		starts[indexChar] = l;
124 	}
125 }
126 
127 /** Check whether a string is in the list.
128  * List elements are either exact matches or prefixes.
129  * Prefix elements start with '^' and match all strings that start with the rest of the element
130  * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
131  */
InList(const char * s) const132 bool WordList::InList(const char *s) const {
133 	if (0 == words)
134 		return false;
135 	unsigned char firstChar = s[0];
136 	int j = starts[firstChar];
137 	if (j >= 0) {
138 		while (static_cast<unsigned char>(words[j][0]) == firstChar) {
139 			if (s[1] == words[j][1]) {
140 				const char *a = words[j] + 1;
141 				const char *b = s + 1;
142 				while (*a && *a == *b) {
143 					a++;
144 					b++;
145 				}
146 				if (!*a && !*b)
147 					return true;
148 			}
149 			j++;
150 		}
151 	}
152 	j = starts['^'];
153 	if (j >= 0) {
154 		while (words[j][0] == '^') {
155 			const char *a = words[j] + 1;
156 			const char *b = s;
157 			while (*a && *a == *b) {
158 				a++;
159 				b++;
160 			}
161 			if (!*a)
162 				return true;
163 			j++;
164 		}
165 	}
166 	return false;
167 }
168 
169 /** similar to InList, but word s can be a substring of keyword.
170  * eg. the keyword define is defined as def~ine. This means the word must start
171  * with def to be a keyword, but also defi, defin and define are valid.
172  * The marker is ~ in this case.
173  */
InListAbbreviated(const char * s,const char marker) const174 bool WordList::InListAbbreviated(const char *s, const char marker) const {
175 	if (0 == words)
176 		return false;
177 	unsigned char firstChar = s[0];
178 	int j = starts[firstChar];
179 	if (j >= 0) {
180 		while (static_cast<unsigned char>(words[j][0]) == firstChar) {
181 			bool isSubword = false;
182 			int start = 1;
183 			if (words[j][1] == marker) {
184 				isSubword = true;
185 				start++;
186 			}
187 			if (s[1] == words[j][start]) {
188 				const char *a = words[j] + start;
189 				const char *b = s + 1;
190 				while (*a && *a == *b) {
191 					a++;
192 					if (*a == marker) {
193 						isSubword = true;
194 						a++;
195 					}
196 					b++;
197 				}
198 				if ((!*a || isSubword) && !*b)
199 					return true;
200 			}
201 			j++;
202 		}
203 	}
204 	j = starts['^'];
205 	if (j >= 0) {
206 		while (words[j][0] == '^') {
207 			const char *a = words[j] + 1;
208 			const char *b = s;
209 			while (*a && *a == *b) {
210 				a++;
211 				b++;
212 			}
213 			if (!*a)
214 				return true;
215 			j++;
216 		}
217 	}
218 	return false;
219 }
220