1 // SciTE - Scintilla based Text Editor
2 /** @file PropSet.cxx
3  ** A Java style properties file module.
4  **/
5 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 // Maintain a dictionary of properties
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 
14 #include "Platform.h"
15 
16 #include "PropSet.h"
17 
18 // The comparison and case changing functions here assume ASCII
19 // or extended ASCII such as the normal Windows code page.
20 
MakeUpperCase(char ch)21 static inline char MakeUpperCase(char ch) {
22 	if (ch < 'a' || ch > 'z')
23 		return ch;
24 	else
25 		return static_cast<char>(ch - 'a' + 'A');
26 }
27 
IsLetter(char ch)28 static inline bool IsLetter(char ch) {
29 	return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'));
30 }
31 
IsASpace(unsigned int ch)32 inline bool IsASpace(unsigned int ch) {
33     return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
34 }
35 
CompareCaseInsensitive(const char * a,const char * b)36 int CompareCaseInsensitive(const char *a, const char *b) {
37 	while (*a && *b) {
38 		if (*a != *b) {
39 			char upperA = MakeUpperCase(*a);
40 			char upperB = MakeUpperCase(*b);
41 			if (upperA != upperB)
42 				return upperA - upperB;
43 		}
44 		a++;
45 		b++;
46 	}
47 	// Either *a or *b is nul
48 	return *a - *b;
49 }
50 
CompareNCaseInsensitive(const char * a,const char * b,size_t len)51 int CompareNCaseInsensitive(const char *a, const char *b, size_t len) {
52 	while (*a && *b && len) {
53 		if (*a != *b) {
54 			char upperA = MakeUpperCase(*a);
55 			char upperB = MakeUpperCase(*b);
56 			if (upperA != upperB)
57 				return upperA - upperB;
58 		}
59 		a++;
60 		b++;
61 		len--;
62 	}
63 	if (len == 0)
64 		return 0;
65 	else
66 		// Either *a or *b is nul
67 		return *a - *b;
68 }
69 
EqualCaseInsensitive(const char * a,const char * b)70 bool EqualCaseInsensitive(const char *a, const char *b) {
71 	return 0 == CompareCaseInsensitive(a, b);
72 }
73 
74 // Since the CaseInsensitive functions declared in SString
75 // are implemented here, I will for now put the non-inline
76 // implementations of the SString members here as well, so
77 // that I can quickly see what effect this has.
78 
SString(int i)79 SString::SString(int i) : sizeGrowth(sizeGrowthDefault) {
80 	char number[32];
81 	sprintf(number, "%0d", i);
82 	s = StringAllocate(number);
83 	sSize = sLen = (s) ? strlen(s) : 0;
84 }
85 
SString(double d,int precision)86 SString::SString(double d, int precision) : sizeGrowth(sizeGrowthDefault) {
87 	char number[32];
88 	sprintf(number, "%.*f", precision, d);
89 	s = StringAllocate(number);
90 	sSize = sLen = (s) ? strlen(s) : 0;
91 }
92 
grow(lenpos_t lenNew)93 bool SString::grow(lenpos_t lenNew) {
94 	while (sizeGrowth * 6 < lenNew) {
95 		sizeGrowth *= 2;
96 	}
97 	char *sNew = new char[lenNew + sizeGrowth + 1];
98 	if (sNew) {
99 		if (s) {
100 			memcpy(sNew, s, sLen);
101 			delete []s;
102 		}
103 		s = sNew;
104 		s[sLen] = '\0';
105 		sSize = lenNew + sizeGrowth;
106 	}
107 	return sNew != 0;
108 }
109 
assign(const char * sOther,lenpos_t sSize_)110 SString &SString::assign(const char *sOther, lenpos_t sSize_) {
111 	if (!sOther) {
112 		sSize_ = 0;
113 	} else if (sSize_ == measure_length) {
114 		sSize_ = strlen(sOther);
115 	}
116 	if (sSize > 0 && sSize_ <= sSize) {	// Does not allocate new buffer if the current is big enough
117 		if (s && sSize_) {
118 			memcpy(s, sOther, sSize_);
119 		}
120 		s[sSize_] = '\0';
121 		sLen = sSize_;
122 	} else {
123 		delete []s;
124 		s = StringAllocate(sOther, sSize_);
125 		if (s) {
126 			sSize = sSize_;	// Allow buffer bigger than real string, thus providing space to grow
127 			sLen = sSize_;
128 		} else {
129 			sSize = sLen = 0;
130 		}
131 	}
132 	return *this;
133 }
134 
operator ==(const SString & sOther) const135 bool SString::operator==(const SString &sOther) const {
136 	if ((s == 0) && (sOther.s == 0))
137 		return true;
138 	if ((s == 0) || (sOther.s == 0))
139 		return false;
140 	return strcmp(s, sOther.s) == 0;
141 }
142 
operator ==(const char * sOther) const143 bool SString::operator==(const char *sOther) const {
144 	if ((s == 0) && (sOther == 0))
145 		return true;
146 	if ((s == 0) || (sOther == 0))
147 		return false;
148 	return strcmp(s, sOther) == 0;
149 }
150 
substr(lenpos_t subPos,lenpos_t subLen) const151 SString SString::substr(lenpos_t subPos, lenpos_t subLen) const {
152 	if (subPos >= sLen) {
153 		return SString();					// return a null string if start index is out of bounds
154 	}
155 	if ((subLen == measure_length) || (subPos + subLen > sLen)) {
156 		subLen = sLen - subPos;		// can't substr past end of source string
157 	}
158 	return SString(s, subPos, subPos + subLen);
159 }
160 
lowercase(lenpos_t subPos,lenpos_t subLen)161 SString &SString::lowercase(lenpos_t subPos, lenpos_t subLen) {
162 	if ((subLen == measure_length) || (subPos + subLen > sLen)) {
163 		subLen = sLen - subPos;		// don't apply past end of string
164 	}
165 	for (lenpos_t i = subPos; i < subPos + subLen; i++) {
166 		if (s[i] < 'A' || s[i] > 'Z')
167 			continue;
168 		else
169 			s[i] = static_cast<char>(s[i] - 'A' + 'a');
170 	}
171 	return *this;
172 }
173 
uppercase(lenpos_t subPos,lenpos_t subLen)174 SString &SString::uppercase(lenpos_t subPos, lenpos_t subLen) {
175 	if ((subLen == measure_length) || (subPos + subLen > sLen)) {
176 		subLen = sLen - subPos;		// don't apply past end of string
177 	}
178 	for (lenpos_t i = subPos; i < subPos + subLen; i++) {
179 		if (s[i] < 'a' || s[i] > 'z')
180 			continue;
181 		else
182 			s[i] = static_cast<char>(s[i] - 'a' + 'A');
183 	}
184 	return *this;
185 }
186 
append(const char * sOther,lenpos_t sLenOther,char sep)187 SString &SString::append(const char *sOther, lenpos_t sLenOther, char sep) {
188 	if (!sOther) {
189 		return *this;
190 	}
191 	if (sLenOther == measure_length) {
192 		sLenOther = strlen(sOther);
193 	}
194 	int lenSep = 0;
195 	if (sLen && sep) {	// Only add a separator if not empty
196 		lenSep = 1;
197 	}
198 	lenpos_t lenNew = sLen + sLenOther + lenSep;
199 	// Conservative about growing the buffer: don't do it, unless really needed
200 	if ((lenNew < sSize) || (grow(lenNew))) {
201 		if (lenSep) {
202 			s[sLen] = sep;
203 			sLen++;
204 		}
205 		memcpy(&s[sLen], sOther, sLenOther);
206 		sLen += sLenOther;
207 		s[sLen] = '\0';
208 	}
209 	return *this;
210 }
211 
insert(lenpos_t pos,const char * sOther,lenpos_t sLenOther)212 SString &SString::insert(lenpos_t pos, const char *sOther, lenpos_t sLenOther) {
213 	if (!sOther || pos > sLen) {
214 		return *this;
215 	}
216 	if (sLenOther == measure_length) {
217 		sLenOther = strlen(sOther);
218 	}
219 	lenpos_t lenNew = sLen + sLenOther;
220 	// Conservative about growing the buffer: don't do it, unless really needed
221 	if ((lenNew < sSize) || grow(lenNew)) {
222 		lenpos_t moveChars = sLen - pos + 1;
223 		for (lenpos_t i = moveChars; i > 0; i--) {
224 			s[pos + sLenOther + i - 1] = s[pos + i - 1];
225 		}
226 		memcpy(s + pos, sOther, sLenOther);
227 		sLen = lenNew;
228 	}
229 	return *this;
230 }
231 
232 /**
233  * Remove @a len characters from the @a pos position, included.
234  * Characters at pos + len and beyond replace characters at pos.
235  * If @a len is 0, or greater than the length of the string
236  * starting at @a pos, the string is just truncated at @a pos.
237  */
remove(lenpos_t pos,lenpos_t len)238 void SString::remove(lenpos_t pos, lenpos_t len) {
239 	if (pos >= sLen) {
240 		return;
241 	}
242 	if (len < 1 || pos + len >= sLen) {
243 		s[pos] = '\0';
244 		sLen = pos;
245 	} else {
246 		for (lenpos_t i = pos; i < sLen - len + 1; i++) {
247 			s[i] = s[i+len];
248 		}
249 		sLen -= len;
250 	}
251 }
252 
startswith(const char * prefix)253 bool SString::startswith(const char *prefix) {
254 	lenpos_t lenPrefix = strlen(prefix);
255 	if (lenPrefix > sLen) {
256 		return false;
257 	}
258 	return strncmp(s, prefix, lenPrefix) == 0;
259 }
260 
endswith(const char * suffix)261 bool SString::endswith(const char *suffix) {
262 	lenpos_t lenSuffix = strlen(suffix);
263 	if (lenSuffix > sLen) {
264 		return false;
265 	}
266 	return strncmp(s + sLen - lenSuffix, suffix, lenSuffix) == 0;
267 }
268 
search(const char * sFind,lenpos_t start) const269 int SString::search(const char *sFind, lenpos_t start) const {
270 	if (start < sLen) {
271 		const char *sFound = strstr(s + start, sFind);
272 		if (sFound) {
273 			return sFound - s;
274 		}
275 	}
276 	return -1;
277 }
278 
substitute(char chFind,char chReplace)279 int SString::substitute(char chFind, char chReplace) {
280 	int c = 0;
281 	char *t = s;
282 	while (t) {
283 		t = strchr(t, chFind);
284 		if (t) {
285 			*t = chReplace;
286 			t++;
287 			c++;
288 		}
289 	}
290 	return c;
291 }
292 
substitute(const char * sFind,const char * sReplace)293 int SString::substitute(const char *sFind, const char *sReplace) {
294 	int c = 0;
295 	lenpos_t lenFind = strlen(sFind);
296 	lenpos_t lenReplace = strlen(sReplace);
297 	int posFound = search(sFind);
298 	while (posFound >= 0) {
299 		remove(posFound, lenFind);
300 		insert(posFound, sReplace, lenReplace);
301 		posFound = search(sFind, posFound + lenReplace);
302 		c++;
303 	}
304 	return c;
305 }
306 
StringAllocate(lenpos_t len)307 char *SContainer::StringAllocate(lenpos_t len) {
308 	if (len != measure_length) {
309 		return new char[len + 1];
310 	} else {
311 		return 0;
312 	}
313 }
314 
StringAllocate(const char * s,lenpos_t len)315 char *SContainer::StringAllocate(const char *s, lenpos_t len) {
316 	if (s == 0) {
317 		return 0;
318 	}
319 	if (len == measure_length) {
320 		len = strlen(s);
321 	}
322 	char *sNew = new char[len + 1];
323 	if (sNew) {
324 		memcpy(sNew, s, len);
325 		sNew[len] = '\0';
326 	}
327 	return sNew;
328 }
329 
330 // End SString functions
331 
332 bool PropSet::caseSensitiveFilenames = false;
333 
PropSet()334 PropSet::PropSet() {
335 	superPS = 0;
336 	for (int root = 0; root < hashRoots; root++)
337 		props[root] = 0;
338 }
339 
~PropSet()340 PropSet::~PropSet() {
341 	superPS = 0;
342 	Clear();
343 }
344 
Set(const char * key,const char * val,int lenKey,int lenVal)345 void PropSet::Set(const char *key, const char *val, int lenKey, int lenVal) {
346 	if (!*key)	// Empty keys are not supported
347 		return;
348 	if (lenKey == -1)
349 		lenKey = static_cast<int>(strlen(key));
350 	if (lenVal == -1)
351 		lenVal = static_cast<int>(strlen(val));
352 	unsigned int hash = HashString(key, lenKey);
353 	for (Property *p = props[hash % hashRoots]; p; p = p->next) {
354 		if ((hash == p->hash) &&
355 			((strlen(p->key) == static_cast<unsigned int>(lenKey)) &&
356 				(0 == strncmp(p->key, key, lenKey)))) {
357 			// Replace current value
358 			delete [](p->val);
359 			p->val = StringDup(val, lenVal);
360 			return;
361 		}
362 	}
363 	// Not found
364 	Property *pNew = new Property;
365 	if (pNew) {
366 		pNew->hash = hash;
367 		pNew->key = StringDup(key, lenKey);
368 		pNew->val = StringDup(val, lenVal);
369 		pNew->next = props[hash % hashRoots];
370 		props[hash % hashRoots] = pNew;
371 	}
372 }
373 
Set(const char * keyVal)374 void PropSet::Set(const char *keyVal) {
375 	while (IsASpace(*keyVal))
376 		keyVal++;
377 	const char *endVal = keyVal;
378 	while (*endVal && (*endVal != '\n'))
379 		endVal++;
380 	const char *eqAt = strchr(keyVal, '=');
381 	if (eqAt) {
382 		Set(keyVal, eqAt + 1, eqAt-keyVal, endVal - eqAt - 1);
383 	} else if (*keyVal) {	// No '=' so assume '=1'
384 		Set(keyVal, "1", endVal-keyVal, 1);
385 	}
386 }
387 
Unset(const char * key,int lenKey)388 void PropSet::Unset(const char *key, int lenKey) {
389 	if (!*key)	// Empty keys are not supported
390 		return;
391 	if (lenKey == -1)
392 		lenKey = static_cast<int>(strlen(key));
393 	unsigned int hash = HashString(key, lenKey);
394 	Property *pPrev = NULL;
395 	for (Property *p = props[hash % hashRoots]; p; p = p->next) {
396 		if ((hash == p->hash) &&
397 			((strlen(p->key) == static_cast<unsigned int>(lenKey)) &&
398 				(0 == strncmp(p->key, key, lenKey)))) {
399 			if (pPrev)
400 				pPrev->next = p->next;
401 			else
402 				props[hash % hashRoots] = p->next;
403 			if (p == enumnext)
404 				enumnext = p->next; // Not that anyone should mix enum and Set / Unset.
405 			delete [](p->key);
406 			delete [](p->val);
407 			delete p;
408 			return;
409 		} else {
410 			pPrev = p;
411 		}
412 	}
413 }
414 
SetMultiple(const char * s)415 void PropSet::SetMultiple(const char *s) {
416 	const char *eol = strchr(s, '\n');
417 	while (eol) {
418 		Set(s);
419 		s = eol + 1;
420 		eol = strchr(s, '\n');
421 	}
422 	Set(s);
423 }
424 
Get(const char * key)425 SString PropSet::Get(const char *key) {
426 	unsigned int hash = HashString(key, strlen(key));
427 	for (Property *p = props[hash % hashRoots]; p; p = p->next) {
428 		if ((hash == p->hash) && (0 == strcmp(p->key, key))) {
429 			return p->val;
430 		}
431 	}
432 	if (superPS) {
433 		// Failed here, so try in base property set
434 		return superPS->Get(key);
435 	} else {
436 		return "";
437 	}
438 }
439 
IncludesVar(const char * value,const char * key)440 bool PropSet::IncludesVar(const char *value, const char *key) {
441 	const char *var = strstr(value, "$(");
442 	while (var) {
443 		if (isprefix(var + 2, key) && (var[2 + strlen(key)] == ')')) {
444 			// Found $(key) which would lead to an infinite loop so exit
445 			return true;
446 		}
447 		var = strstr(var + 2, ")");
448 		if (var)
449 			var = strstr(var + 1, "$(");
450 	}
451 	return false;
452 }
453 
454 
455 // There is some inconsistency between GetExpanded("foo") and Expand("$(foo)").
456 // A solution is to keep a stack of variables that have been expanded, so that
457 // recursive expansions can be skipped.  For now I'll just use the C++ stack
458 // for that, through a recursive function and a simple chain of pointers.
459 
460 struct VarChain {
VarChainVarChain461 	VarChain(const char*var_=NULL, const VarChain *link_=NULL): var(var_), link(link_) {}
462 
containsVarChain463 	bool contains(const char *testVar) const {
464 		return (var && (0 == strcmp(var, testVar)))
465 			|| (link && link->contains(testVar));
466 	}
467 
468 	const char *var;
469 	const VarChain *link;
470 };
471 
ExpandAllInPlace(PropSet & props,SString & withVars,int maxExpands,const VarChain & blankVars=VarChain ())472 static int ExpandAllInPlace(PropSet &props, SString &withVars, int maxExpands, const VarChain &blankVars = VarChain()) {
473 	int varStart = withVars.search("$(");
474 	while ((varStart >= 0) && (maxExpands > 0)) {
475 		int varEnd = withVars.search(")", varStart+2);
476 		if (varEnd < 0) {
477 			break;
478 		}
479 
480 		// For consistency, when we see '$(ab$(cde))', expand the inner variable first,
481 		// regardless whether there is actually a degenerate variable named 'ab$(cde'.
482 		int innerVarStart = withVars.search("$(", varStart+2);
483 		while ((innerVarStart > varStart) && (innerVarStart < varEnd)) {
484 			varStart = innerVarStart;
485 			innerVarStart = withVars.search("$(", varStart+2);
486 		}
487 
488 		SString var(withVars.c_str(), varStart + 2, varEnd);
489 		SString val = props.Get(var.c_str());
490 
491 		if (blankVars.contains(var.c_str())) {
492 			val.clear(); // treat blankVar as an empty string (e.g. to block self-reference)
493 		}
494 
495 		if (--maxExpands >= 0) {
496 			maxExpands = ExpandAllInPlace(props, val, maxExpands, VarChain(var.c_str(), &blankVars));
497 		}
498 
499 		withVars.remove(varStart, varEnd-varStart+1);
500 		withVars.insert(varStart, val.c_str(), val.length());
501 
502 		varStart = withVars.search("$(");
503 	}
504 
505 	return maxExpands;
506 }
507 
508 
GetExpanded(const char * key)509 SString PropSet::GetExpanded(const char *key) {
510 	SString val = Get(key);
511 	ExpandAllInPlace(*this, val, 100, VarChain(key));
512 	return val;
513 }
514 
Expand(const char * withVars,int maxExpands)515 SString PropSet::Expand(const char *withVars, int maxExpands) {
516 	SString val = withVars;
517 	ExpandAllInPlace(*this, val, maxExpands);
518 	return val;
519 }
520 
GetInt(const char * key,int defaultValue)521 int PropSet::GetInt(const char *key, int defaultValue) {
522 	SString val = GetExpanded(key);
523 	if (val.length())
524 		return val.value();
525 	return defaultValue;
526 }
527 
isprefix(const char * target,const char * prefix)528 bool isprefix(const char *target, const char *prefix) {
529 	while (*target && *prefix) {
530 		if (*target != *prefix)
531 			return false;
532 		target++;
533 		prefix++;
534 	}
535 	if (*prefix)
536 		return false;
537 	else
538 		return true;
539 }
540 
IsSuffix(const char * target,const char * suffix,bool caseSensitive)541 static bool IsSuffix(const char *target, const char *suffix, bool caseSensitive) {
542 	size_t lentarget = strlen(target);
543 	size_t lensuffix = strlen(suffix);
544 	if (lensuffix > lentarget)
545 		return false;
546 	if (caseSensitive) {
547 		for (int i = static_cast<int>(lensuffix) - 1; i >= 0; i--) {
548 			if (target[i + lentarget - lensuffix] != suffix[i])
549 				return false;
550 		}
551 	} else {
552 	for (int i = static_cast<int>(lensuffix) - 1; i >= 0; i--) {
553 		if (MakeUpperCase(target[i + lentarget - lensuffix]) !=
554 		        MakeUpperCase(suffix[i]))
555 			return false;
556 	}
557 	}
558 	return true;
559 }
560 
GetWild(const char * keybase,const char * filename)561 SString PropSet::GetWild(const char *keybase, const char *filename) {
562 	for (int root = 0; root < hashRoots; root++) {
563 		for (Property *p = props[root]; p; p = p->next) {
564 			if (isprefix(p->key, keybase)) {
565 				char * orgkeyfile = p->key + strlen(keybase);
566 				char *keyfile = NULL;
567 
568 				if (strstr(orgkeyfile, "$(") == orgkeyfile) {
569 					char *cpendvar = strchr(orgkeyfile, ')');
570 					if (cpendvar) {
571 						*cpendvar = '\0';
572 						SString s = GetExpanded(orgkeyfile + 2);
573 						*cpendvar = ')';
574 						keyfile = StringDup(s.c_str());
575 					}
576 				}
577 				char *keyptr = keyfile;
578 
579 				if (keyfile == NULL)
580 					keyfile = orgkeyfile;
581 
582 				for (;;) {
583 					char *del = strchr(keyfile, ';');
584 					if (del == NULL)
585 						del = keyfile + strlen(keyfile);
586 					char delchr = *del;
587 					*del = '\0';
588 					if (*keyfile == '*') {
589 						if (IsSuffix(filename, keyfile + 1, caseSensitiveFilenames)) {
590 							*del = delchr;
591 							delete []keyptr;
592 							return p->val;
593 						}
594 					} else if (0 == strcmp(keyfile, filename)) {
595 						*del = delchr;
596 						delete []keyptr;
597 						return p->val;
598 					}
599 					if (delchr == '\0')
600 						break;
601 					*del = delchr;
602 					keyfile = del + 1;
603 				}
604 				delete []keyptr;
605 
606 				if (0 == strcmp(p->key, keybase)) {
607 					return p->val;
608 				}
609 			}
610 		}
611 	}
612 	if (superPS) {
613 		// Failed here, so try in base property set
614 		return superPS->GetWild(keybase, filename);
615 	} else {
616 		return "";
617 	}
618 }
619 
620 
621 
622 // GetNewExpand does not use Expand as it has to use GetWild with the filename for each
623 // variable reference found.
GetNewExpand(const char * keybase,const char * filename)624 SString PropSet::GetNewExpand(const char *keybase, const char *filename) {
625 	char *base = StringDup(GetWild(keybase, filename).c_str());
626 	char *cpvar = strstr(base, "$(");
627 	int maxExpands = 1000;	// Avoid infinite expansion of recursive definitions
628 	while (cpvar && (maxExpands > 0)) {
629 		char *cpendvar = strchr(cpvar, ')');
630 		if (cpendvar) {
631 			int lenvar = cpendvar - cpvar - 2;  	// Subtract the $()
632 			char *var = StringDup(cpvar + 2, lenvar);
633 			SString val = GetWild(var, filename);
634 			if (0 == strcmp(var, keybase))
635 				val.clear(); // Self-references evaluate to empty string
636 			size_t newlenbase = strlen(base) + val.length() - lenvar;
637 			char *newbase = new char[newlenbase];
638 			strncpy(newbase, base, cpvar - base);
639 			strcpy(newbase + (cpvar - base), val.c_str());
640 			strcpy(newbase + (cpvar - base) + val.length(), cpendvar + 1);
641 			delete []var;
642 			delete []base;
643 			base = newbase;
644 		}
645 		cpvar = strstr(base, "$(");
646 		maxExpands--;
647 	}
648 	SString sret = base;
649 	delete []base;
650 	return sret;
651 }
652 
Clear()653 void PropSet::Clear() {
654 	for (int root = 0; root < hashRoots; root++) {
655 		Property *p = props[root];
656 		while (p) {
657 			Property *pNext = p->next;
658 			p->hash = 0;
659 			delete []p->key;
660 			p->key = 0;
661 			delete []p->val;
662 			p->val = 0;
663 			delete p;
664 			p = pNext;
665 		}
666 		props[root] = 0;
667 	}
668 }
669 
ToString()670 char *PropSet::ToString() {
671 	size_t len=0;
672 	for (int r = 0; r < hashRoots; r++) {
673 		for (Property *p = props[r]; p; p = p->next) {
674 			len += strlen(p->key) + 1;
675 			len += strlen(p->val) + 1;
676 		}
677 	}
678 	if (len == 0)
679 		len = 1;	// Return as empty string
680 	char *ret = new char [len];
681 	if (ret) {
682 		char *w = ret;
683 		for (int root = 0; root < hashRoots; root++) {
684 			for (Property *p = props[root]; p; p = p->next) {
685 				strcpy(w, p->key);
686 				w += strlen(p->key);
687 				*w++ = '=';
688 				strcpy(w, p->val);
689 				w += strlen(p->val);
690 				*w++ = '\n';
691 			}
692 		}
693 		ret[len-1] = '\0';
694 	}
695 	return ret;
696 }
697 
698 /**
699  * Initiate enumeration.
700  */
GetFirst(char ** key,char ** val)701 bool PropSet::GetFirst(char **key, char **val) {
702 	for (int i = 0; i < hashRoots; i++) {
703 		for (Property *p = props[i]; p; p = p->next) {
704 			if (p) {
705 				*key = p->key;
706 				*val = p->val;
707 				enumnext = p->next; // GetNext will begin here ...
708 				enumhash = i;		  // ... in this block
709 				return true;
710 			}
711 		}
712 	}
713 	return false;
714 }
715 
716 /**
717  * Continue enumeration.
718  */
GetNext(char ** key,char ** val)719 bool PropSet::GetNext(char ** key, char ** val) {
720 	bool firstloop = true;
721 
722 	// search begins where we left it : in enumhash block
723 	for (int i = enumhash; i < hashRoots; i++) {
724 		if (!firstloop)
725 			enumnext = props[i]; // Begin with first property in block
726 		// else : begin where we left
727 		firstloop = false;
728 
729 		for (Property *p = enumnext; p; p = p->next) {
730 			if (p) {
731 				*key = p->key;
732 				*val = p->val;
733 				enumnext = p->next; // for GetNext
734 				enumhash = i;
735 				return true;
736 			}
737 		}
738 	}
739 	return false;
740 }
741 
742 /**
743  * Creates an array that points into each word in the string and puts \0 terminators
744  * after each word.
745  */
ArrayFromWordList(char * wordlist,int * len,bool onlyLineEnds=false)746 static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) {
747 	int prev = '\n';
748 	int words = 0;
749 	// For rapid determination of whether a character is a separator, build
750 	// a look up table.
751 	bool wordSeparator[256];
752 	for (int i=0;i<256; i++) {
753 		wordSeparator[i] = false;
754 	}
755 	wordSeparator['\r'] = true;
756 	wordSeparator['\n'] = true;
757 	if (!onlyLineEnds) {
758 		wordSeparator[' '] = true;
759 		wordSeparator['\t'] = true;
760 	}
761 	for (int j = 0; wordlist[j]; j++) {
762 		int curr = static_cast<unsigned char>(wordlist[j]);
763 		if (!wordSeparator[curr] && wordSeparator[prev])
764 			words++;
765 		prev = curr;
766 	}
767 	char **keywords = new char *[words + 1];
768 	if (keywords) {
769 		words = 0;
770 		prev = '\0';
771 		size_t slen = strlen(wordlist);
772 		for (size_t k = 0; k < slen; k++) {
773 			if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
774 				if (!prev) {
775 					keywords[words] = &wordlist[k];
776 					words++;
777 				}
778 			} else {
779 				wordlist[k] = '\0';
780 			}
781 			prev = wordlist[k];
782 		}
783 		keywords[words] = &wordlist[slen];
784 		*len = words;
785 	} else {
786 		*len = 0;
787 	}
788 	return keywords;
789 }
790 
Clear()791 void WordList::Clear() {
792 	if (words) {
793 		delete []list;
794 		delete []words;
795 		delete []wordsNoCase;
796 	}
797 	words = 0;
798 	wordsNoCase = 0;
799 	list = 0;
800 	len = 0;
801 	sorted = false;
802 	sortedNoCase = false;
803 }
804 
Set(const char * s)805 void WordList::Set(const char *s) {
806 	list = StringDup(s);
807 	sorted = false;
808 	sortedNoCase = false;
809 	words = ArrayFromWordList(list, &len, onlyLineEnds);
810 	wordsNoCase = new char * [len + 1];
811 	memcpy(wordsNoCase, words, (len + 1) * sizeof (*words));
812 }
813 
Allocate(int size)814 char *WordList::Allocate(int size) {
815 	list = new char[size + 1];
816 	list[size] = '\0';
817 	return list;
818 }
819 
SetFromAllocated()820 void WordList::SetFromAllocated() {
821 	sorted = false;
822 	sortedNoCase = false;
823 	words = ArrayFromWordList(list, &len, onlyLineEnds);
824 	wordsNoCase = new char * [len + 1];
825 	memcpy(wordsNoCase, words, (len + 1) * sizeof (*words));
826 }
827 
cmpString(const void * a1,const void * a2)828 int cmpString(const void *a1, const void *a2) {
829 	// Can't work out the correct incantation to use modern casts here
830 	return strcmp(*(char**)(a1), *(char**)(a2));
831 }
832 
cmpStringNoCase(const void * a1,const void * a2)833 int cmpStringNoCase(const void *a1, const void *a2) {
834 	// Can't work out the correct incantation to use modern casts here
835 	return CompareCaseInsensitive(*(char**)(a1), *(char**)(a2));
836 }
837 
SortWordList(char ** words,unsigned int len)838 static void SortWordList(char **words, unsigned int len) {
839 	qsort(reinterpret_cast<void*>(words), len, sizeof(*words),
840 	      cmpString);
841 }
842 
SortWordListNoCase(char ** wordsNoCase,unsigned int len)843 static void SortWordListNoCase(char **wordsNoCase, unsigned int len) {
844 	qsort(reinterpret_cast<void*>(wordsNoCase), len, sizeof(*wordsNoCase),
845 	      cmpStringNoCase);
846 }
847 
InList(const char * s)848 bool WordList::InList(const char *s) {
849 	if (0 == words)
850 		return false;
851 	if (!sorted) {
852 		sorted = true;
853 		SortWordList(words, len);
854 		for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
855 			starts[k] = -1;
856 		for (int l = len - 1; l >= 0; l--) {
857 			unsigned char indexChar = words[l][0];
858 			starts[indexChar] = l;
859 		}
860 	}
861 	unsigned char firstChar = s[0];
862 	int j = starts[firstChar];
863 	if (j >= 0) {
864 		while (words[j][0] == firstChar) {
865 			if (s[1] == words[j][1]) {
866 				const char *a = words[j] + 1;
867 				const char *b = s + 1;
868 				while (*a && *a == *b) {
869 					a++;
870 					b++;
871 				}
872 				if (!*a && !*b)
873 					return true;
874 			}
875 			j++;
876 		}
877 	}
878 	j = starts['^'];
879 	if (j >= 0) {
880 		while (words[j][0] == '^') {
881 			const char *a = words[j] + 1;
882 			const char *b = s;
883 			while (*a && *a == *b) {
884 				a++;
885 				b++;
886 			}
887 			if (!*a)
888 				return true;
889 			j++;
890 		}
891 	}
892 	return false;
893 }
894 
895 /** similar to InList, but word s can be a substring of keyword.
896  * eg. the keyword define is defined as def~ine. This means the word must start
897  * with def to be a keyword, but also defi, defin and define are valid.
898  * The marker is ~ in this case.
899  */
InListAbbreviated(const char * s,const char marker)900 bool WordList::InListAbbreviated(const char *s, const char marker) {
901 	if (0 == words)
902 		return false;
903 	if (!sorted) {
904 		sorted = true;
905 		SortWordList(words, len);
906 		for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++)
907 			starts[k] = -1;
908 		for (int l = len - 1; l >= 0; l--) {
909 			unsigned char indexChar = words[l][0];
910 			starts[indexChar] = l;
911 		}
912 	}
913 	unsigned char firstChar = s[0];
914 	int j = starts[firstChar];
915 	if (j >= 0) {
916 		while (words[j][0] == firstChar) {
917 			bool isSubword = false;
918 			int start = 1;
919 			if (words[j][1] == marker) {
920 				isSubword = true;
921 				start++;
922 			}
923 			if (s[1] == words[j][start]) {
924 				const char *a = words[j] + start;
925 				const char *b = s + 1;
926 				while (*a && *a == *b) {
927 					a++;
928 					if (*a == marker) {
929 						isSubword = true;
930 						a++;
931 					}
932 					b++;
933 				}
934 				if ((!*a || isSubword) && !*b)
935 					return true;
936 			}
937 			j++;
938 		}
939 	}
940 	j = starts['^'];
941 	if (j >= 0) {
942 		while (words[j][0] == '^') {
943 			const char *a = words[j] + 1;
944 			const char *b = s;
945 			while (*a && *a == *b) {
946 				a++;
947 				b++;
948 			}
949 			if (!*a)
950 				return true;
951 			j++;
952 		}
953 	}
954 	return false;
955 }
956 
957 /**
958  * Returns an element (complete) of the wordlist array which has
959  * the same beginning as the passed string.
960  * The length of the word to compare is passed too.
961  * Letter case can be ignored or preserved (default).
962  */
GetNearestWord(const char * wordStart,int searchLen,bool ignoreCase,SString wordCharacters,int wordIndex)963 const char *WordList::GetNearestWord(const char *wordStart, int searchLen, bool ignoreCase /*= false*/, SString wordCharacters /*='/0' */, int wordIndex /*= -1 */) {
964 	int start = 0; // lower bound of the api array block to search
965 	int end = len - 1; // upper bound of the api array block to search
966 	int pivot; // index of api array element just being compared
967 	int cond; // comparison result (in the sense of strcmp() result)
968 	const char *word; // api array element just being compared
969 
970 	if (0 == words)
971 		return NULL;
972 	if (ignoreCase) {
973 		if (!sortedNoCase) {
974 			sortedNoCase = true;
975 			SortWordListNoCase(wordsNoCase, len);
976 		}
977 		while (start <= end) { // binary searching loop
978 			pivot = (start + end) >> 1;
979 			word = wordsNoCase[pivot];
980 			cond = CompareNCaseInsensitive(wordStart, word, searchLen);
981 			if (!cond) {
982 				// find first word
983 				start = pivot;
984 				while (start > 0 && !CompareNCaseInsensitive(wordStart, wordsNoCase[start-1], searchLen)) {
985 					start--;
986 				}
987 				// find last word
988 				end = pivot;
989 				while (end < len-1 && !CompareNCaseInsensitive(wordStart, wordsNoCase[end+1], searchLen)) {
990 					end++;
991 				}
992 
993 				// Finds first word in a series of equal words
994 				for (pivot = start; pivot <= end; pivot++) {
995 					word = wordsNoCase[pivot];
996 					if (!wordCharacters.contains(word[searchLen])) {
997 						if (wordIndex <= 0) // Checks if a specific index was requested
998 							return word; // result must not be freed with free()
999 						wordIndex--;
1000 					}
1001 				}
1002 				return NULL;
1003 			}
1004 			else if (cond > 0)
1005 				start = pivot + 1;
1006 			else if (cond < 0)
1007 				end = pivot - 1;
1008 		}
1009 	} else { // preserve the letter case
1010 		if (!sorted) {
1011 			sorted = true;
1012 			SortWordList(words, len);
1013 		}
1014 		while (start <= end) { // binary searching loop
1015 			pivot = (start + end) >> 1;
1016 			word = words[pivot];
1017 			cond = strncmp(wordStart, word, searchLen);
1018 			if (!cond) {
1019 				// find first word
1020 				start = pivot;
1021 				while (start > 0 && !strncmp(wordStart, words[start-1], searchLen)) {
1022 					start--;
1023 				}
1024 				// find last word
1025 				end = pivot;
1026 				while (end < len-1 && !strncmp(wordStart, words[end+1], searchLen)) {
1027 					end++;
1028 				}
1029 
1030 				// Finds first word in a series of equal words
1031 				pivot = start;
1032 				while (pivot <= end) {
1033 					word = words[pivot];
1034 					if (!wordCharacters.contains(word[searchLen])) {
1035 						if (wordIndex <= 0) // Checks if a specific index was requested
1036 							return word; // result must not be freed with free()
1037 						wordIndex--;
1038 					}
1039 					pivot++;
1040 				}
1041 				return NULL;
1042 			}
1043 			else if (cond > 0)
1044 				start = pivot + 1;
1045 			else if (cond < 0)
1046 				end = pivot - 1;
1047 		}
1048 	}
1049 	return NULL;
1050 }
1051 
1052 /**
1053  * Find the length of a 'word' which is actually an identifier in a string
1054  * which looks like "identifier(..." or "identifier" and where
1055  * there may be extra spaces after the identifier that should not be
1056  * counted in the length.
1057  */
LengthWord(const char * word,char otherSeparator)1058 static unsigned int LengthWord(const char *word, char otherSeparator) {
1059 	// Find a '('. If that fails go to the end of the string.
1060 	const char *endWord = strchr(word, '(');
1061 	if (!endWord && otherSeparator)
1062 		endWord = strchr(word, otherSeparator);
1063 	if (!endWord)
1064 		endWord = word + strlen(word);
1065 	// Last case always succeeds so endWord != 0
1066 
1067 	// Drop any space characters.
1068 	if (endWord > word) {
1069 		endWord--;	// Back from the '(', otherSeparator, or '\0'
1070 		// Move backwards over any spaces
1071 		while ((endWord > word) && (IsASpace(*endWord))) {
1072 			endWord--;
1073 		}
1074 	}
1075 	return endWord - word;
1076 }
1077 
1078 /**
1079  * Returns elements (first words of them) of the wordlist array which have
1080  * the same beginning as the passed string.
1081  * The length of the word to compare is passed too.
1082  * Letter case can be ignored or preserved (default).
1083  * If there are more words meeting the condition they are returned all of
1084  * them in the ascending order separated with spaces.
1085  *
1086  * NOTE: returned buffer has to be freed with delete[].
1087  */
GetNearestWords(const char * wordStart,int searchLen,bool ignoreCase,char otherSeparator,bool exactLen)1088 char *WordList::GetNearestWords(
1089     const char *wordStart,
1090     int searchLen,
1091     bool ignoreCase /*= false*/,
1092     char otherSeparator /*= '\0'*/,
1093     bool exactLen /*=false*/) {
1094 	unsigned int wordlen; // length of the word part (before the '(' brace) of the api array element
1095 	SString wordsNear;
1096 	wordsNear.setsizegrowth(1000);
1097 	int start = 0; // lower bound of the api array block to search
1098 	int end = len - 1; // upper bound of the api array block to search
1099 	int pivot; // index of api array element just being compared
1100 	int cond; // comparison result (in the sense of strcmp() result)
1101 
1102 	if (0 == words)
1103 		return NULL;
1104 	if (ignoreCase) {
1105 		if (!sortedNoCase) {
1106 			sortedNoCase = true;
1107 			SortWordListNoCase(wordsNoCase, len);
1108 		}
1109 		while (start <= end) { // Binary searching loop
1110 			pivot = (start + end) / 2;
1111 			cond = CompareNCaseInsensitive(wordStart, wordsNoCase[pivot], searchLen);
1112 			if (!cond) {
1113 				// Find first match
1114 				while ((pivot > start) &&
1115 					(0 == CompareNCaseInsensitive(wordStart,
1116 						wordsNoCase[pivot-1], searchLen))) {
1117 					--pivot;
1118 				}
1119 				// Grab each match
1120 				while ((pivot <= end) &&
1121 					(0 == CompareNCaseInsensitive(wordStart,
1122 						wordsNoCase[pivot], searchLen))) {
1123 					wordlen = LengthWord(wordsNoCase[pivot], otherSeparator) + 1;
1124 					++pivot;
1125 					if (exactLen && wordlen != LengthWord(wordStart, otherSeparator) + 1)
1126 						continue;
1127 					wordsNear.append(wordsNoCase[pivot-1], wordlen, ' ');
1128 				}
1129 				return wordsNear.detach();
1130 			} else if (cond < 0) {
1131 				end = pivot - 1;
1132 			} else if (cond > 0) {
1133 				start = pivot + 1;
1134 			}
1135 		}
1136 	} else {	// Preserve the letter case
1137 		if (!sorted) {
1138 			sorted = true;
1139 			SortWordList(words, len);
1140 		}
1141 		while (start <= end) { // Binary searching loop
1142 			pivot = (start + end) / 2;
1143 			cond = strncmp(wordStart, words[pivot], searchLen);
1144 			if (!cond) {
1145 				// Find first match
1146 				while ((pivot > start) &&
1147 					(0 == strncmp(wordStart,
1148 						words[pivot-1], searchLen))) {
1149 					--pivot;
1150 				}
1151 				// Grab each match
1152 				while ((pivot <= end) &&
1153 					(0 == strncmp(wordStart,
1154 						words[pivot], searchLen))) {
1155 					wordlen = LengthWord(words[pivot], otherSeparator) + 1;
1156 					++pivot;
1157 					if (exactLen && wordlen != LengthWord(wordStart, otherSeparator) + 1)
1158 						continue;
1159 					wordsNear.append(words[pivot-1], wordlen, ' ');
1160 				}
1161 				return wordsNear.detach();
1162 			} else if (cond < 0) {
1163 				end = pivot - 1;
1164 			} else if (cond > 0) {
1165 				start = pivot + 1;
1166 			}
1167 		}
1168 	}
1169 	return NULL;
1170 }
1171