1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3  ** Lexer for C++, C, Java, and JavaScript.
4  ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5  **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 #include <ctype.h>
15 
16 #include <utility>
17 #include <string>
18 #include <vector>
19 #include <map>
20 #include <algorithm>
21 
22 #include "ILexer.h"
23 #include "Scintilla.h"
24 #include "SciLexer.h"
25 
26 #include "StringCopy.h"
27 #include "WordList.h"
28 #include "LexAccessor.h"
29 #include "Accessor.h"
30 #include "StyleContext.h"
31 #include "CharacterSet.h"
32 #include "LexerModule.h"
33 #include "OptionSet.h"
34 #include "SparseState.h"
35 #include "SubStyles.h"
36 
37 using namespace Scintilla;
38 
39 namespace {
40 	// Use an unnamed namespace to protect the functions and classes from name conflicts
41 
IsSpaceEquiv(int state)42 bool IsSpaceEquiv(int state) noexcept {
43 	return (state <= SCE_C_COMMENTDOC) ||
44 		// including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
45 		(state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
46 		(state == SCE_C_COMMENTDOCKEYWORDERROR);
47 }
48 
49 // Preconditions: sc.currentPos points to a character after '+' or '-'.
50 // The test for pos reaching 0 should be redundant,
51 // and is in only for safety measures.
52 // Limitation: this code will give the incorrect answer for code like
53 // a = b+++/ptn/...
54 // Putting a space between the '++' post-inc operator and the '+' binary op
55 // fixes this, and is highly recommended for readability anyway.
FollowsPostfixOperator(const StyleContext & sc,LexAccessor & styler)56 bool FollowsPostfixOperator(const StyleContext &sc, LexAccessor &styler) {
57 	Sci_Position pos = sc.currentPos;
58 	while (--pos > 0) {
59 		const char ch = styler[pos];
60 		if (ch == '+' || ch == '-') {
61 			return styler[pos - 1] == ch;
62 		}
63 	}
64 	return false;
65 }
66 
followsReturnKeyword(const StyleContext & sc,LexAccessor & styler)67 bool followsReturnKeyword(const StyleContext &sc, LexAccessor &styler) {
68 	// Don't look at styles, so no need to flush.
69 	Sci_Position pos = sc.currentPos;
70 	const Sci_Position currentLine = styler.GetLine(pos);
71 	const Sci_Position lineStartPos = styler.LineStart(currentLine);
72 	while (--pos > lineStartPos) {
73 		const char ch = styler.SafeGetCharAt(pos);
74 		if (ch != ' ' && ch != '\t') {
75 			break;
76 		}
77 	}
78 	const char *retBack = "nruter";
79 	const char *s = retBack;
80 	while (*s
81 		&& pos >= lineStartPos
82 		&& styler.SafeGetCharAt(pos) == *s) {
83 		s++;
84 		pos--;
85 	}
86 	return !*s;
87 }
88 
IsSpaceOrTab(int ch)89 bool IsSpaceOrTab(int ch) noexcept {
90 	return ch == ' ' || ch == '\t';
91 }
92 
OnlySpaceOrTab(const std::string & s)93 bool OnlySpaceOrTab(const std::string &s) noexcept {
94 	for (const char ch : s) {
95 		if (!IsSpaceOrTab(ch))
96 			return false;
97 	}
98 	return true;
99 }
100 
StringSplit(const std::string & text,int separator)101 std::vector<std::string> StringSplit(const std::string &text, int separator) {
102 	std::vector<std::string> vs(text.empty() ? 0 : 1);
103 	for (const char ch : text) {
104 		if (ch == separator) {
105 			vs.emplace_back();
106 		} else {
107 			vs.back() += ch;
108 		}
109 	}
110 	return vs;
111 }
112 
113 struct BracketPair {
114 	std::vector<std::string>::iterator itBracket;
115 	std::vector<std::string>::iterator itEndBracket;
116 };
117 
FindBracketPair(std::vector<std::string> & tokens)118 BracketPair FindBracketPair(std::vector<std::string> &tokens) {
119 	BracketPair bp;
120 	std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
121 	bp.itBracket = tokens.end();
122 	bp.itEndBracket = tokens.end();
123 	if (itTok != tokens.end()) {
124 		bp.itBracket = itTok;
125 		size_t nest = 0;
126 		while (itTok != tokens.end()) {
127 			if (*itTok == "(") {
128 				nest++;
129 			} else if (*itTok == ")") {
130 				nest--;
131 				if (nest == 0) {
132 					bp.itEndBracket = itTok;
133 					return bp;
134 				}
135 			}
136 			++itTok;
137 		}
138 	}
139 	bp.itBracket = tokens.end();
140 	return bp;
141 }
142 
highlightTaskMarker(StyleContext & sc,LexAccessor & styler,int activity,const WordList & markerList,bool caseSensitive)143 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
144 		int activity, const WordList &markerList, bool caseSensitive){
145 	if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
146 		const int lengthMarker = 50;
147 		char marker[lengthMarker+1] = "";
148 		const Sci_Position currPos = static_cast<Sci_Position>(sc.currentPos);
149 		int i = 0;
150 		while (i < lengthMarker) {
151 			const char ch = styler.SafeGetCharAt(currPos + i);
152 			if (IsASpace(ch) || isoperator(ch)) {
153 				break;
154 			}
155 			if (caseSensitive)
156 				marker[i] = ch;
157 			else
158 				marker[i] = MakeLowerCase(ch);
159 			i++;
160 		}
161 		marker[i] = '\0';
162 		if (markerList.InList(marker)) {
163 			sc.SetState(SCE_C_TASKMARKER|activity);
164 		}
165 	}
166 }
167 
168 struct EscapeSequence {
169 	int digitsLeft;
170 	CharacterSet setHexDigits;
171 	CharacterSet setOctDigits;
172 	CharacterSet setNoneNumeric;
173 	CharacterSet *escapeSetValid;
EscapeSequence__anonf8af5acf0111::EscapeSequence174 	EscapeSequence() {
175 		digitsLeft = 0;
176 		escapeSetValid = 0;
177 		setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
178 		setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
179 	}
resetEscapeState__anonf8af5acf0111::EscapeSequence180 	void resetEscapeState(int nextChar) {
181 		digitsLeft = 0;
182 		escapeSetValid = &setNoneNumeric;
183 		if (nextChar == 'U') {
184 			digitsLeft = 9;
185 			escapeSetValid = &setHexDigits;
186 		} else if (nextChar == 'u') {
187 			digitsLeft = 5;
188 			escapeSetValid = &setHexDigits;
189 		} else if (nextChar == 'x') {
190 			digitsLeft = 5;
191 			escapeSetValid = &setHexDigits;
192 		} else if (setOctDigits.Contains(nextChar)) {
193 			digitsLeft = 3;
194 			escapeSetValid = &setOctDigits;
195 		}
196 	}
atEscapeEnd__anonf8af5acf0111::EscapeSequence197 	bool atEscapeEnd(int currChar) const {
198 		return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
199 	}
200 };
201 
GetRestOfLine(LexAccessor & styler,Sci_Position start,bool allowSpace)202 std::string GetRestOfLine(LexAccessor &styler, Sci_Position start, bool allowSpace) {
203 	std::string restOfLine;
204 	Sci_Position i =0;
205 	char ch = styler.SafeGetCharAt(start, '\n');
206 	const Sci_Position endLine = styler.LineEnd(styler.GetLine(start));
207 	while (((start+i) < endLine) && (ch != '\r')) {
208 		const char chNext = styler.SafeGetCharAt(start + i + 1, '\n');
209 		if (ch == '/' && (chNext == '/' || chNext == '*'))
210 			break;
211 		if (allowSpace || (ch != ' '))
212 			restOfLine += ch;
213 		i++;
214 		ch = chNext;
215 	}
216 	return restOfLine;
217 }
218 
IsStreamCommentStyle(int style)219 bool IsStreamCommentStyle(int style) noexcept {
220 	return style == SCE_C_COMMENT ||
221 		style == SCE_C_COMMENTDOC ||
222 		style == SCE_C_COMMENTDOCKEYWORD ||
223 		style == SCE_C_COMMENTDOCKEYWORDERROR;
224 }
225 
226 struct PPDefinition {
227 	Sci_Position line;
228 	std::string key;
229 	std::string value;
230 	bool isUndef;
231 	std::string arguments;
PPDefinition__anonf8af5acf0111::PPDefinition232 	PPDefinition(Sci_Position line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, const std::string &arguments_="") :
233 		line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
234 	}
235 };
236 
237 class LinePPState {
238 	int state;
239 	int ifTaken;
240 	int level;
ValidLevel() const241 	bool ValidLevel() const noexcept {
242 		return level >= 0 && level < 32;
243 	}
maskLevel() const244 	int maskLevel() const noexcept {
245 		if (level >= 0) {
246 			return 1 << level;
247 		} else {
248 			return 1;
249 		}
250 	}
251 public:
LinePPState()252 	LinePPState() : state(0), ifTaken(0), level(-1) {
253 	}
IsInactive() const254 	bool IsInactive() const noexcept {
255 		return state != 0;
256 	}
CurrentIfTaken() const257 	bool CurrentIfTaken() const noexcept {
258 		return (ifTaken & maskLevel()) != 0;
259 	}
StartSection(bool on)260 	void StartSection(bool on) noexcept {
261 		level++;
262 		if (ValidLevel()) {
263 			if (on) {
264 				state &= ~maskLevel();
265 				ifTaken |= maskLevel();
266 			} else {
267 				state |= maskLevel();
268 				ifTaken &= ~maskLevel();
269 			}
270 		}
271 	}
EndSection()272 	void EndSection() noexcept {
273 		if (ValidLevel()) {
274 			state &= ~maskLevel();
275 			ifTaken &= ~maskLevel();
276 		}
277 		level--;
278 	}
InvertCurrentLevel()279 	void InvertCurrentLevel() noexcept {
280 		if (ValidLevel()) {
281 			state ^= maskLevel();
282 			ifTaken |= maskLevel();
283 		}
284 	}
285 };
286 
287 // Hold the preprocessor state for each line seen.
288 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
289 class PPStates {
290 	std::vector<LinePPState> vlls;
291 public:
ForLine(Sci_Position line) const292 	LinePPState ForLine(Sci_Position line) const {
293 		if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
294 			return vlls[line];
295 		} else {
296 			return LinePPState();
297 		}
298 	}
Add(Sci_Position line,LinePPState lls)299 	void Add(Sci_Position line, LinePPState lls) {
300 		vlls.resize(line+1);
301 		vlls[line] = lls;
302 	}
303 };
304 
305 // An individual named option for use in an OptionSet
306 
307 // Options used for LexerCPP
308 struct OptionsCPP {
309 	bool stylingWithinPreprocessor;
310 	bool identifiersAllowDollars;
311 	bool trackPreprocessor;
312 	bool updatePreprocessor;
313 	bool verbatimStringsAllowEscapes;
314 	bool triplequotedStrings;
315 	bool hashquotedStrings;
316 	bool backQuotedStrings;
317 	bool escapeSequence;
318 	bool fold;
319 	bool foldSyntaxBased;
320 	bool foldComment;
321 	bool foldCommentMultiline;
322 	bool foldCommentExplicit;
323 	std::string foldExplicitStart;
324 	std::string foldExplicitEnd;
325 	bool foldExplicitAnywhere;
326 	bool foldPreprocessor;
327 	bool foldPreprocessorAtElse;
328 	bool foldCompact;
329 	bool foldAtElse;
OptionsCPP__anonf8af5acf0111::OptionsCPP330 	OptionsCPP() {
331 		stylingWithinPreprocessor = false;
332 		identifiersAllowDollars = true;
333 		trackPreprocessor = true;
334 		updatePreprocessor = true;
335 		verbatimStringsAllowEscapes = false;
336 		triplequotedStrings = false;
337 		hashquotedStrings = false;
338 		backQuotedStrings = false;
339 		escapeSequence = false;
340 		fold = false;
341 		foldSyntaxBased = true;
342 		foldComment = false;
343 		foldCommentMultiline = true;
344 		foldCommentExplicit = true;
345 		foldExplicitStart = "";
346 		foldExplicitEnd = "";
347 		foldExplicitAnywhere = false;
348 		foldPreprocessor = false;
349 		foldPreprocessorAtElse = false;
350 		foldCompact = false;
351 		foldAtElse = false;
352 	}
353 };
354 
355 const char *const cppWordLists[] = {
356             "Primary keywords and identifiers",
357             "Secondary keywords and identifiers",
358             "Documentation comment keywords",
359             "Global classes and typedefs",
360             "Preprocessor definitions",
361             "Task marker and error marker keywords",
362             0,
363 };
364 
365 struct OptionSetCPP : public OptionSet<OptionsCPP> {
OptionSetCPP__anonf8af5acf0111::OptionSetCPP366 	OptionSetCPP() {
367 		DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
368 			"For C++ code, determines whether all preprocessor code is styled in the "
369 			"preprocessor style (0, the default) or only from the initial # to the end "
370 			"of the command word(1).");
371 
372 		DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
373 			"Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
374 
375 		DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
376 			"Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
377 
378 		DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
379 			"Set to 1 to update preprocessor definitions when #define found.");
380 
381 		DefineProperty("lexer.cpp.verbatim.strings.allow.escapes", &OptionsCPP::verbatimStringsAllowEscapes,
382 			"Set to 1 to allow verbatim strings to contain escape sequences.");
383 
384 		DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
385 			"Set to 1 to enable highlighting of triple-quoted strings.");
386 
387 		DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
388 			"Set to 1 to enable highlighting of hash-quoted strings.");
389 
390 		DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
391 			"Set to 1 to enable highlighting of back-quoted raw strings .");
392 
393 		DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
394 			"Set to 1 to enable highlighting of escape sequences in strings");
395 
396 		DefineProperty("fold", &OptionsCPP::fold);
397 
398 		DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
399 			"Set this property to 0 to disable syntax based folding.");
400 
401 		DefineProperty("fold.comment", &OptionsCPP::foldComment,
402 			"This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
403 			"Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
404 			"at the end of a section that should fold.");
405 
406 		DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
407 			"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
408 
409 		DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
410 			"Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
411 
412 		DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
413 			"The string to use for explicit fold start points, replacing the standard //{.");
414 
415 		DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
416 			"The string to use for explicit fold end points, replacing the standard //}.");
417 
418 		DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
419 			"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
420 
421 		DefineProperty("fold.cpp.preprocessor.at.else", &OptionsCPP::foldPreprocessorAtElse,
422 			"This option enables folding on a preprocessor #else or #endif line of an #if statement.");
423 
424 		DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
425 			"This option enables folding preprocessor directives when using the C++ lexer. "
426 			"Includes C#'s explicit #region and #endregion folding directives.");
427 
428 		DefineProperty("fold.compact", &OptionsCPP::foldCompact);
429 
430 		DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
431 			"This option enables C++ folding on a \"} else {\" line of an if statement.");
432 
433 		DefineWordListSets(cppWordLists);
434 	}
435 };
436 
437 const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
438 
439 LexicalClass lexicalClasses[] = {
440 	// Lexer Cpp SCLEX_CPP SCE_C_:
441 	0, "SCE_C_DEFAULT", "default", "White space",
442 	1, "SCE_C_COMMENT", "comment", "Comment: /* */.",
443 	2, "SCE_C_COMMENTLINE", "comment line", "Line Comment: //.",
444 	3, "SCE_C_COMMENTDOC", "comment documentation", "Doc comment: block comments beginning with /** or /*!",
445 	4, "SCE_C_NUMBER", "literal numeric", "Number",
446 	5, "SCE_C_WORD", "keyword", "Keyword",
447 	6, "SCE_C_STRING", "literal string", "Double quoted string",
448 	7, "SCE_C_CHARACTER", "literal string character", "Single quoted string",
449 	8, "SCE_C_UUID", "literal uuid", "UUIDs (only in IDL)",
450 	9, "SCE_C_PREPROCESSOR", "preprocessor", "Preprocessor",
451 	10, "SCE_C_OPERATOR", "operator", "Operators",
452 	11, "SCE_C_IDENTIFIER", "identifier", "Identifiers",
453 	12, "SCE_C_STRINGEOL", "error literal string", "End of line where string is not closed",
454 	13, "SCE_C_VERBATIM", "literal string multiline raw", "Verbatim strings for C#",
455 	14, "SCE_C_REGEX", "literal regex", "Regular expressions for JavaScript",
456 	15, "SCE_C_COMMENTLINEDOC", "comment documentation line", "Doc Comment Line: line comments beginning with /// or //!.",
457 	16, "SCE_C_WORD2", "identifier", "Keywords2",
458 	17, "SCE_C_COMMENTDOCKEYWORD", "comment documentation keyword", "Comment keyword",
459 	18, "SCE_C_COMMENTDOCKEYWORDERROR", "error comment documentation keyword", "Comment keyword error",
460 	19, "SCE_C_GLOBALCLASS", "identifier", "Global class",
461 	20, "SCE_C_STRINGRAW", "literal string multiline raw", "Raw strings for C++0x",
462 	21, "SCE_C_TRIPLEVERBATIM", "literal string multiline raw", "Triple-quoted strings for Vala",
463 	22, "SCE_C_HASHQUOTEDSTRING", "literal string", "Hash-quoted strings for Pike",
464 	23, "SCE_C_PREPROCESSORCOMMENT", "comment preprocessor", "Preprocessor stream comment",
465 	24, "SCE_C_PREPROCESSORCOMMENTDOC", "comment preprocessor documentation", "Preprocessor stream doc comment",
466 	25, "SCE_C_USERLITERAL", "literal", "User defined literals",
467 	26, "SCE_C_TASKMARKER", "comment taskmarker", "Task Marker",
468 	27, "SCE_C_ESCAPESEQUENCE", "literal string escapesequence", "Escape sequence",
469 };
470 
471 }
472 
473 class LexerCPP : public ILexerWithMetaData {
474 	bool caseSensitive;
475 	CharacterSet setWord;
476 	CharacterSet setNegationOp;
477 	CharacterSet setArithmethicOp;
478 	CharacterSet setRelOp;
479 	CharacterSet setLogicalOp;
480 	CharacterSet setWordStart;
481 	PPStates vlls;
482 	std::vector<PPDefinition> ppDefineHistory;
483 	WordList keywords;
484 	WordList keywords2;
485 	WordList keywords3;
486 	WordList keywords4;
487 	WordList ppDefinitions;
488 	WordList markerList;
489 	struct SymbolValue {
490 		std::string value;
491 		std::string arguments;
SymbolValueLexerCPP::SymbolValue492 		SymbolValue(const std::string &value_="", const std::string &arguments_="") : value(value_), arguments(arguments_) {
493 		}
operator =LexerCPP::SymbolValue494 		SymbolValue &operator = (const std::string &value_) {
495 			value = value_;
496 			arguments.clear();
497 			return *this;
498 		}
IsMacroLexerCPP::SymbolValue499 		bool IsMacro() const noexcept {
500 			return !arguments.empty();
501 		}
502 	};
503 	typedef std::map<std::string, SymbolValue> SymbolTable;
504 	SymbolTable preprocessorDefinitionsStart;
505 	OptionsCPP options;
506 	OptionSetCPP osCPP;
507 	EscapeSequence escapeSeq;
508 	SparseState<std::string> rawStringTerminators;
509 	enum { activeFlag = 0x40 };
510 	enum { ssIdentifier, ssDocKeyword };
511 	SubStyles subStyles;
512 	std::string returnBuffer;
513 public:
LexerCPP(bool caseSensitive_)514 	explicit LexerCPP(bool caseSensitive_) :
515 		caseSensitive(caseSensitive_),
516 		setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
517 		setNegationOp(CharacterSet::setNone, "!"),
518 		setArithmethicOp(CharacterSet::setNone, "+-/*%"),
519 		setRelOp(CharacterSet::setNone, "=!<>"),
520 		setLogicalOp(CharacterSet::setNone, "|&"),
521 		subStyles(styleSubable, 0x80, 0x40, activeFlag) {
522 	}
~LexerCPP()523 	virtual ~LexerCPP() {
524 	}
Release()525 	void SCI_METHOD Release() override {
526 		delete this;
527 	}
Version() const528 	int SCI_METHOD Version() const override {
529 		return lvMetaData;
530 	}
PropertyNames()531 	const char * SCI_METHOD PropertyNames() override {
532 		return osCPP.PropertyNames();
533 	}
PropertyType(const char * name)534 	int SCI_METHOD PropertyType(const char *name) override {
535 		return osCPP.PropertyType(name);
536 	}
DescribeProperty(const char * name)537 	const char * SCI_METHOD DescribeProperty(const char *name) override {
538 		return osCPP.DescribeProperty(name);
539 	}
540 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()541 	const char * SCI_METHOD DescribeWordListSets() override {
542 		return osCPP.DescribeWordListSets();
543 	}
544 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
545 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
546 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
547 
PrivateCall(int,void *)548 	void * SCI_METHOD PrivateCall(int, void *) override {
549 		return 0;
550 	}
551 
LineEndTypesSupported()552 	int SCI_METHOD LineEndTypesSupported() override {
553 		return SC_LINE_END_TYPE_UNICODE;
554 	}
555 
AllocateSubStyles(int styleBase,int numberStyles)556 	int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
557 		return subStyles.Allocate(styleBase, numberStyles);
558 	}
SubStylesStart(int styleBase)559 	int SCI_METHOD SubStylesStart(int styleBase) override {
560 		return subStyles.Start(styleBase);
561 	}
SubStylesLength(int styleBase)562 	int SCI_METHOD SubStylesLength(int styleBase) override {
563 		return subStyles.Length(styleBase);
564 	}
StyleFromSubStyle(int subStyle)565 	int SCI_METHOD StyleFromSubStyle(int subStyle) override {
566 		const int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
567 		const int active = subStyle & activeFlag;
568 		return styleBase | active;
569 	}
PrimaryStyleFromStyle(int style)570 	int SCI_METHOD PrimaryStyleFromStyle(int style) override {
571 		return MaskActive(style);
572 	}
FreeSubStyles()573 	void SCI_METHOD FreeSubStyles() override {
574 		subStyles.Free();
575 	}
SetIdentifiers(int style,const char * identifiers)576 	void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
577 		subStyles.SetIdentifiers(style, identifiers);
578 	}
DistanceToSecondaryStyles()579 	int SCI_METHOD DistanceToSecondaryStyles() override {
580 		return activeFlag;
581 	}
GetSubStyleBases()582 	const char * SCI_METHOD GetSubStyleBases() override {
583 		return styleSubable;
584 	}
NamedStyles()585 	int SCI_METHOD NamedStyles() override {
586 		return std::max(subStyles.LastAllocated() + 1,
587 			static_cast<int>(ELEMENTS(lexicalClasses))) +
588 			activeFlag;
589 	}
NameOfStyle(int style)590 	const char * SCI_METHOD NameOfStyle(int style) override {
591 		if (style >= NamedStyles())
592 			return "";
593 		if (style < static_cast<int>(ELEMENTS(lexicalClasses)))
594 			return lexicalClasses[style].name;
595 		// TODO: inactive and substyles
596 		return "";
597 	}
TagsOfStyle(int style)598 	const char * SCI_METHOD TagsOfStyle(int style) override {
599 		if (style >= NamedStyles())
600 			return "Excess";
601 		returnBuffer.clear();
602 		const int firstSubStyle = subStyles.FirstAllocated();
603 		if (firstSubStyle >= 0) {
604 			const int lastSubStyle = subStyles.LastAllocated();
605 			if (((style >= firstSubStyle) && (style <= (lastSubStyle))) ||
606 				((style >= firstSubStyle + activeFlag) && (style <= (lastSubStyle + activeFlag)))) {
607 				int styleActive = style;
608 				if (style > lastSubStyle) {
609 					returnBuffer = "inactive ";
610 					styleActive -= activeFlag;
611 				}
612 				const int styleMain = StyleFromSubStyle(styleActive);
613 				returnBuffer += lexicalClasses[styleMain].tags;
614 				return returnBuffer.c_str();
615 			}
616 		}
617 		if (style < static_cast<int>(ELEMENTS(lexicalClasses)))
618 			return lexicalClasses[style].tags;
619 		if (style >= activeFlag) {
620 			returnBuffer = "inactive ";
621 			const int styleActive = style - activeFlag;
622 			if (styleActive < static_cast<int>(ELEMENTS(lexicalClasses)))
623 				returnBuffer += lexicalClasses[styleActive].tags;
624 			else
625 				returnBuffer = "";
626 			return returnBuffer.c_str();
627 		}
628 		return "";
629 	}
DescriptionOfStyle(int style)630 	const char * SCI_METHOD DescriptionOfStyle(int style) override {
631 		if (style >= NamedStyles())
632 			return "";
633 		if (style < static_cast<int>(ELEMENTS(lexicalClasses)))
634 			return lexicalClasses[style].description;
635 		// TODO: inactive and substyles
636 		return "";
637 	}
638 
LexerFactoryCPP()639 	static ILexer *LexerFactoryCPP() {
640 		return new LexerCPP(true);
641 	}
LexerFactoryCPPInsensitive()642 	static ILexer *LexerFactoryCPPInsensitive() {
643 		return new LexerCPP(false);
644 	}
MaskActive(int style)645 	static int MaskActive(int style) noexcept {
646 		return style & ~activeFlag;
647 	}
648 	void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
649 	std::vector<std::string> Tokenize(const std::string &expr) const;
650 	bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
651 };
652 
PropertySet(const char * key,const char * val)653 Sci_Position SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
654 	if (osCPP.PropertySet(&options, key, val)) {
655 		if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
656 			setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
657 			if (options.identifiersAllowDollars) {
658 				setWord.Add('$');
659 			}
660 		}
661 		return 0;
662 	}
663 	return -1;
664 }
665 
WordListSet(int n,const char * wl)666 Sci_Position SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
667 	WordList *wordListN = 0;
668 	switch (n) {
669 	case 0:
670 		wordListN = &keywords;
671 		break;
672 	case 1:
673 		wordListN = &keywords2;
674 		break;
675 	case 2:
676 		wordListN = &keywords3;
677 		break;
678 	case 3:
679 		wordListN = &keywords4;
680 		break;
681 	case 4:
682 		wordListN = &ppDefinitions;
683 		break;
684 	case 5:
685 		wordListN = &markerList;
686 		break;
687 	}
688 	Sci_Position firstModification = -1;
689 	if (wordListN) {
690 		WordList wlNew;
691 		wlNew.Set(wl);
692 		if (*wordListN != wlNew) {
693 			wordListN->Set(wl);
694 			firstModification = 0;
695 			if (n == 4) {
696 				// Rebuild preprocessorDefinitions
697 				preprocessorDefinitionsStart.clear();
698 				for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
699 					const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
700 					const char *cpEquals = strchr(cpDefinition, '=');
701 					if (cpEquals) {
702 						std::string name(cpDefinition, cpEquals - cpDefinition);
703 						std::string val(cpEquals+1);
704 						const size_t bracket = name.find('(');
705 						const size_t bracketEnd = name.find(')');
706 						if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
707 							// Macro
708 							std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
709 							name = name.substr(0, bracket);
710 							preprocessorDefinitionsStart[name] = SymbolValue(val, args);
711 						} else {
712 							preprocessorDefinitionsStart[name] = val;
713 						}
714 					} else {
715 						std::string name(cpDefinition);
716 						std::string val("1");
717 						preprocessorDefinitionsStart[name] = val;
718 					}
719 				}
720 			}
721 		}
722 	}
723 	return firstModification;
724 }
725 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)726 void SCI_METHOD LexerCPP::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
727 	LexAccessor styler(pAccess);
728 
729 	CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
730 	CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
731 
732 	CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
733 
734 	setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
735 
736 	CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
737 
738 	if (options.identifiersAllowDollars) {
739 		setWordStart.Add('$');
740 	}
741 
742 	int chPrevNonWhite = ' ';
743 	int visibleChars = 0;
744 	bool lastWordWasUUID = false;
745 	int styleBeforeDCKeyword = SCE_C_DEFAULT;
746 	int styleBeforeTaskMarker = SCE_C_DEFAULT;
747 	bool continuationLine = false;
748 	bool isIncludePreprocessor = false;
749 	bool isStringInPreprocessor = false;
750 	bool inRERange = false;
751 	bool seenDocKeyBrace = false;
752 
753 	Sci_Position lineCurrent = styler.GetLine(startPos);
754 	if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
755       (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
756       (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
757 		// Set continuationLine if last character of previous line is '\'
758 		if (lineCurrent > 0) {
759 			const Sci_Position endLinePrevious = styler.LineEnd(lineCurrent - 1);
760 			if (endLinePrevious > 0) {
761 				continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
762 			}
763 		}
764 	}
765 
766 	// look back to set chPrevNonWhite properly for better regex colouring
767 	if (startPos > 0) {
768 		Sci_Position back = startPos;
769 		while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
770 			;
771 		if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
772 			chPrevNonWhite = styler.SafeGetCharAt(back);
773 		}
774 	}
775 
776 	StyleContext sc(startPos, length, initStyle, styler);
777 	LinePPState preproc = vlls.ForLine(lineCurrent);
778 
779 	bool definitionsChanged = false;
780 
781 	// Truncate ppDefineHistory before current line
782 
783 	if (!options.updatePreprocessor)
784 		ppDefineHistory.clear();
785 
786 	std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(),
787 		[lineCurrent](const PPDefinition &p) { return p.line >= lineCurrent; });
788 	if (itInvalid != ppDefineHistory.end()) {
789 		ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
790 		definitionsChanged = true;
791 	}
792 
793 	SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
794 	for (const PPDefinition &ppDef : ppDefineHistory) {
795 		if (ppDef.isUndef)
796 			preprocessorDefinitions.erase(ppDef.key);
797 		else
798 			preprocessorDefinitions[ppDef.key] = SymbolValue(ppDef.value, ppDef.arguments);
799 	}
800 
801 	std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
802 	SparseState<std::string> rawSTNew(lineCurrent);
803 
804 	int activitySet = preproc.IsInactive() ? activeFlag : 0;
805 
806 	const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
807 	const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
808 
809 	Sci_Position lineEndNext = styler.LineEnd(lineCurrent);
810 
811 	for (; sc.More();) {
812 
813 		if (sc.atLineStart) {
814 			// Using MaskActive() is not needed in the following statement.
815 			// Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
816 			if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
817 				// Prevent SCE_C_STRINGEOL from leaking back to previous line which
818 				// ends with a line continuation by locking in the state up to this position.
819 				sc.SetState(sc.state);
820 			}
821 			if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
822 				sc.SetState(SCE_C_DEFAULT|activitySet);
823 			}
824 			// Reset states to beginning of colourise so no surprises
825 			// if different sets of lines lexed.
826 			visibleChars = 0;
827 			lastWordWasUUID = false;
828 			isIncludePreprocessor = false;
829 			inRERange = false;
830 			if (preproc.IsInactive()) {
831 				activitySet = activeFlag;
832 				sc.SetState(sc.state | activitySet);
833 			}
834 		}
835 
836 		if (sc.atLineEnd) {
837 			lineCurrent++;
838 			lineEndNext = styler.LineEnd(lineCurrent);
839 			vlls.Add(lineCurrent, preproc);
840 			if (rawStringTerminator != "") {
841 				rawSTNew.Set(lineCurrent-1, rawStringTerminator);
842 			}
843 		}
844 
845 		// Handle line continuation generically.
846 		if (sc.ch == '\\') {
847 			if (static_cast<Sci_Position>((sc.currentPos+1)) >= lineEndNext) {
848 				lineCurrent++;
849 				lineEndNext = styler.LineEnd(lineCurrent);
850 				vlls.Add(lineCurrent, preproc);
851 				if (rawStringTerminator != "") {
852 					rawSTNew.Set(lineCurrent-1, rawStringTerminator);
853 				}
854 				sc.Forward();
855 				if (sc.ch == '\r' && sc.chNext == '\n') {
856 					// Even in UTF-8, \r and \n are separate
857 					sc.Forward();
858 				}
859 				continuationLine = true;
860 				sc.Forward();
861 				continue;
862 			}
863 		}
864 
865 		const bool atLineEndBeforeSwitch = sc.atLineEnd;
866 
867 		// Determine if the current state should terminate.
868 		switch (MaskActive(sc.state)) {
869 			case SCE_C_OPERATOR:
870 				sc.SetState(SCE_C_DEFAULT|activitySet);
871 				break;
872 			case SCE_C_NUMBER:
873 				// We accept almost anything because of hex. and number suffixes
874 				if (sc.ch == '_') {
875 					sc.ChangeState(SCE_C_USERLITERAL|activitySet);
876 				} else if (!(setWord.Contains(sc.ch)
877 				   || (sc.ch == '\'')
878 				   || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
879 				                                          sc.chPrev == 'p' || sc.chPrev == 'P')))) {
880 					sc.SetState(SCE_C_DEFAULT|activitySet);
881 				}
882 				break;
883 			case SCE_C_USERLITERAL:
884 				if (!(setWord.Contains(sc.ch)))
885 					sc.SetState(SCE_C_DEFAULT|activitySet);
886 				break;
887 			case SCE_C_IDENTIFIER:
888 				if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
889 					char s[1000];
890 					if (caseSensitive) {
891 						sc.GetCurrent(s, sizeof(s));
892 					} else {
893 						sc.GetCurrentLowered(s, sizeof(s));
894 					}
895 					if (keywords.InList(s)) {
896 						lastWordWasUUID = strcmp(s, "uuid") == 0;
897 						sc.ChangeState(SCE_C_WORD|activitySet);
898 					} else if (keywords2.InList(s)) {
899 						sc.ChangeState(SCE_C_WORD2|activitySet);
900 					} else if (keywords4.InList(s)) {
901 						sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
902 					} else {
903 						int subStyle = classifierIdentifiers.ValueFor(s);
904 						if (subStyle >= 0) {
905 							sc.ChangeState(subStyle|activitySet);
906 						}
907 					}
908 					const bool literalString = sc.ch == '\"';
909 					if (literalString || sc.ch == '\'') {
910 						size_t lenS = strlen(s);
911 						const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
912 						if (raw)
913 							s[lenS--] = '\0';
914 						const bool valid =
915 							(lenS == 0) ||
916 							((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
917 							((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
918 						if (valid) {
919 							if (literalString) {
920 								if (raw) {
921 									// Set the style of the string prefix to SCE_C_STRINGRAW but then change to
922 									// SCE_C_DEFAULT as that allows the raw string start code to run.
923 									sc.ChangeState(SCE_C_STRINGRAW|activitySet);
924 									sc.SetState(SCE_C_DEFAULT|activitySet);
925 								} else {
926 									sc.ChangeState(SCE_C_STRING|activitySet);
927 								}
928 							} else {
929 								sc.ChangeState(SCE_C_CHARACTER|activitySet);
930 							}
931 						} else {
932 							sc.SetState(SCE_C_DEFAULT | activitySet);
933 						}
934 					} else {
935 						sc.SetState(SCE_C_DEFAULT|activitySet);
936 					}
937 				}
938 				break;
939 			case SCE_C_PREPROCESSOR:
940 				if (options.stylingWithinPreprocessor) {
941 					if (IsASpace(sc.ch)) {
942 						sc.SetState(SCE_C_DEFAULT|activitySet);
943 					}
944 				} else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
945 					isStringInPreprocessor = false;
946 				} else if (!isStringInPreprocessor) {
947 					if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
948 						isStringInPreprocessor = true;
949 					} else if (sc.Match('/', '*')) {
950 						if (sc.Match("/**") || sc.Match("/*!")) {
951 							sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
952 						} else {
953 							sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
954 						}
955 						sc.Forward();	// Eat the *
956 					} else if (sc.Match('/', '/')) {
957 						sc.SetState(SCE_C_DEFAULT|activitySet);
958 					}
959 				}
960 				break;
961 			case SCE_C_PREPROCESSORCOMMENT:
962 			case SCE_C_PREPROCESSORCOMMENTDOC:
963 				if (sc.Match('*', '/')) {
964 					sc.Forward();
965 					sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
966 					continue;	// Without advancing in case of '\'.
967 				}
968 				break;
969 			case SCE_C_COMMENT:
970 				if (sc.Match('*', '/')) {
971 					sc.Forward();
972 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
973 				} else {
974 					styleBeforeTaskMarker = SCE_C_COMMENT;
975 					highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
976 				}
977 				break;
978 			case SCE_C_COMMENTDOC:
979 				if (sc.Match('*', '/')) {
980 					sc.Forward();
981 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
982 				} else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
983 					// Verify that we have the conditions to mark a comment-doc-keyword
984 					if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
985 						styleBeforeDCKeyword = SCE_C_COMMENTDOC;
986 						sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
987 					}
988 				}
989 				break;
990 			case SCE_C_COMMENTLINE:
991 				if (sc.atLineStart && !continuationLine) {
992 					sc.SetState(SCE_C_DEFAULT|activitySet);
993 				} else {
994 					styleBeforeTaskMarker = SCE_C_COMMENTLINE;
995 					highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
996 				}
997 				break;
998 			case SCE_C_COMMENTLINEDOC:
999 				if (sc.atLineStart && !continuationLine) {
1000 					sc.SetState(SCE_C_DEFAULT|activitySet);
1001 				} else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
1002 					// Verify that we have the conditions to mark a comment-doc-keyword
1003 					if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
1004 						styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
1005 						sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
1006 					}
1007 				}
1008 				break;
1009 			case SCE_C_COMMENTDOCKEYWORD:
1010 				if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
1011 					sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
1012 					sc.Forward();
1013 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1014 					seenDocKeyBrace = false;
1015 				} else if (sc.ch == '[' || sc.ch == '{') {
1016 					seenDocKeyBrace = true;
1017 				} else if (!setDoxygen.Contains(sc.ch)
1018 				           && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
1019 					char s[100];
1020 					if (caseSensitive) {
1021 						sc.GetCurrent(s, sizeof(s));
1022 					} else {
1023 						sc.GetCurrentLowered(s, sizeof(s));
1024 					}
1025 					if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
1026 						sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
1027 					} else if (!keywords3.InList(s + 1)) {
1028 						int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
1029 						if (subStyleCDKW >= 0) {
1030 							sc.ChangeState(subStyleCDKW|activitySet);
1031 						} else {
1032 							sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
1033 						}
1034 					}
1035 					sc.SetState(styleBeforeDCKeyword|activitySet);
1036 					seenDocKeyBrace = false;
1037 				}
1038 				break;
1039 			case SCE_C_STRING:
1040 				if (sc.atLineEnd) {
1041 					sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1042 				} else if (isIncludePreprocessor) {
1043 					if (sc.ch == '>') {
1044 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1045 						isIncludePreprocessor = false;
1046 					}
1047 				} else if (sc.ch == '\\') {
1048 					if (options.escapeSequence) {
1049 						sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
1050 						escapeSeq.resetEscapeState(sc.chNext);
1051 					}
1052 					sc.Forward(); // Skip all characters after the backslash
1053 				} else if (sc.ch == '\"') {
1054 					if (sc.chNext == '_') {
1055 						sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1056 					} else {
1057 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1058 					}
1059 				}
1060 				break;
1061 			case SCE_C_ESCAPESEQUENCE:
1062 				escapeSeq.digitsLeft--;
1063 				if (!escapeSeq.atEscapeEnd(sc.ch)) {
1064 					break;
1065 				}
1066 				if (sc.ch == '"') {
1067 					sc.SetState(SCE_C_STRING|activitySet);
1068 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1069 				} else if (sc.ch == '\\') {
1070 					escapeSeq.resetEscapeState(sc.chNext);
1071 					sc.Forward();
1072 				} else {
1073 					sc.SetState(SCE_C_STRING|activitySet);
1074 					if (sc.atLineEnd) {
1075 						sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1076 					}
1077 				}
1078 				break;
1079 			case SCE_C_HASHQUOTEDSTRING:
1080 				if (sc.ch == '\\') {
1081 					if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1082 						sc.Forward();
1083 					}
1084 				} else if (sc.ch == '\"') {
1085 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1086 				}
1087 				break;
1088 			case SCE_C_STRINGRAW:
1089 				if (sc.Match(rawStringTerminator.c_str())) {
1090 					for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
1091 						sc.Forward();
1092 					sc.SetState(SCE_C_DEFAULT|activitySet);
1093 					rawStringTerminator = "";
1094 				}
1095 				break;
1096 			case SCE_C_CHARACTER:
1097 				if (sc.atLineEnd) {
1098 					sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1099 				} else if (sc.ch == '\\') {
1100 					if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1101 						sc.Forward();
1102 					}
1103 				} else if (sc.ch == '\'') {
1104 					if (sc.chNext == '_') {
1105 						sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1106 					} else {
1107 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1108 					}
1109 				}
1110 				break;
1111 			case SCE_C_REGEX:
1112 				if (sc.atLineStart) {
1113 					sc.SetState(SCE_C_DEFAULT|activitySet);
1114 				} else if (! inRERange && sc.ch == '/') {
1115 					sc.Forward();
1116 					while ((sc.ch < 0x80) && islower(sc.ch))
1117 						sc.Forward();    // gobble regex flags
1118 					sc.SetState(SCE_C_DEFAULT|activitySet);
1119 				} else if (sc.ch == '\\' && (static_cast<Sci_Position>(sc.currentPos+1) < lineEndNext)) {
1120 					// Gobble up the escaped character
1121 					sc.Forward();
1122 				} else if (sc.ch == '[') {
1123 					inRERange = true;
1124 				} else if (sc.ch == ']') {
1125 					inRERange = false;
1126 				}
1127 				break;
1128 			case SCE_C_STRINGEOL:
1129 				if (sc.atLineStart) {
1130 					sc.SetState(SCE_C_DEFAULT|activitySet);
1131 				}
1132 				break;
1133 			case SCE_C_VERBATIM:
1134 				if (options.verbatimStringsAllowEscapes && (sc.ch == '\\')) {
1135 					sc.Forward(); // Skip all characters after the backslash
1136 				} else if (sc.ch == '\"') {
1137 					if (sc.chNext == '\"') {
1138 						sc.Forward();
1139 					} else {
1140 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1141 					}
1142 				}
1143 				break;
1144 			case SCE_C_TRIPLEVERBATIM:
1145 				if (sc.Match(R"(""")")) {
1146 					while (sc.Match('"')) {
1147 						sc.Forward();
1148 					}
1149 					sc.SetState(SCE_C_DEFAULT|activitySet);
1150 				}
1151 				break;
1152 			case SCE_C_UUID:
1153 				if (sc.atLineEnd || sc.ch == ')') {
1154 					sc.SetState(SCE_C_DEFAULT|activitySet);
1155 				}
1156 				break;
1157 			case SCE_C_TASKMARKER:
1158 				if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1159 					sc.SetState(styleBeforeTaskMarker|activitySet);
1160 					styleBeforeTaskMarker = SCE_C_DEFAULT;
1161 				}
1162 		}
1163 
1164 		if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1165 			// State exit processing consumed characters up to end of line.
1166 			lineCurrent++;
1167 			lineEndNext = styler.LineEnd(lineCurrent);
1168 			vlls.Add(lineCurrent, preproc);
1169 		}
1170 
1171 		// Determine if a new state should be entered.
1172 		if (MaskActive(sc.state) == SCE_C_DEFAULT) {
1173 			if (sc.Match('@', '\"')) {
1174 				sc.SetState(SCE_C_VERBATIM|activitySet);
1175 				sc.Forward();
1176 			} else if (options.triplequotedStrings && sc.Match(R"(""")")) {
1177 				sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
1178 				sc.Forward(2);
1179 			} else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1180 				sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
1181 				sc.Forward();
1182 			} else if (options.backQuotedStrings && sc.Match('`')) {
1183 				sc.SetState(SCE_C_STRINGRAW|activitySet);
1184 				rawStringTerminator = "`";
1185 			} else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1186 				if (lastWordWasUUID) {
1187 					sc.SetState(SCE_C_UUID|activitySet);
1188 					lastWordWasUUID = false;
1189 				} else {
1190 					sc.SetState(SCE_C_NUMBER|activitySet);
1191 				}
1192 			} else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1193 				if (lastWordWasUUID) {
1194 					sc.SetState(SCE_C_UUID|activitySet);
1195 					lastWordWasUUID = false;
1196 				} else {
1197 					sc.SetState(SCE_C_IDENTIFIER|activitySet);
1198 				}
1199 			} else if (sc.Match('/', '*')) {
1200 				if (sc.Match("/**") || sc.Match("/*!")) {	// Support of Qt/Doxygen doc. style
1201 					sc.SetState(SCE_C_COMMENTDOC|activitySet);
1202 				} else {
1203 					sc.SetState(SCE_C_COMMENT|activitySet);
1204 				}
1205 				sc.Forward();	// Eat the * so it isn't used for the end of the comment
1206 			} else if (sc.Match('/', '/')) {
1207 				if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1208 					// Support of Qt/Doxygen doc. style
1209 					sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
1210 				else
1211 					sc.SetState(SCE_C_COMMENTLINE|activitySet);
1212 			} else if (sc.ch == '/'
1213 				   && (setOKBeforeRE.Contains(chPrevNonWhite)
1214 				       || followsReturnKeyword(sc, styler))
1215 				   && (!setCouldBePostOp.Contains(chPrevNonWhite)
1216 				       || !FollowsPostfixOperator(sc, styler))) {
1217 				sc.SetState(SCE_C_REGEX|activitySet);	// JavaScript's RegEx
1218 				inRERange = false;
1219 			} else if (sc.ch == '\"') {
1220 				if (sc.chPrev == 'R') {
1221 					styler.Flush();
1222 					if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
1223 						sc.SetState(SCE_C_STRINGRAW|activitySet);
1224 						rawStringTerminator = ")";
1225 						for (Sci_Position termPos = sc.currentPos + 1;; termPos++) {
1226 							const char chTerminator = styler.SafeGetCharAt(termPos, '(');
1227 							if (chTerminator == '(')
1228 								break;
1229 							rawStringTerminator += chTerminator;
1230 						}
1231 						rawStringTerminator += '\"';
1232 					} else {
1233 						sc.SetState(SCE_C_STRING|activitySet);
1234 					}
1235 				} else {
1236 					sc.SetState(SCE_C_STRING|activitySet);
1237 				}
1238 				isIncludePreprocessor = false;	// ensure that '>' won't end the string
1239 			} else if (isIncludePreprocessor && sc.ch == '<') {
1240 				sc.SetState(SCE_C_STRING|activitySet);
1241 			} else if (sc.ch == '\'') {
1242 				sc.SetState(SCE_C_CHARACTER|activitySet);
1243 			} else if (sc.ch == '#' && visibleChars == 0) {
1244 				// Preprocessor commands are alone on their line
1245 				sc.SetState(SCE_C_PREPROCESSOR|activitySet);
1246 				// Skip whitespace between # and preprocessor word
1247 				do {
1248 					sc.Forward();
1249 				} while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1250 				if (sc.atLineEnd) {
1251 					sc.SetState(SCE_C_DEFAULT|activitySet);
1252 				} else if (sc.Match("include")) {
1253 					isIncludePreprocessor = true;
1254 				} else {
1255 					if (options.trackPreprocessor) {
1256 						if (sc.Match("ifdef") || sc.Match("ifndef")) {
1257 							const bool isIfDef = sc.Match("ifdef");
1258 							const int startRest = isIfDef ? 5 : 6;
1259 							std::string restOfLine = GetRestOfLine(styler, sc.currentPos + startRest + 1, false);
1260 							bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1261 							preproc.StartSection(isIfDef == foundDef);
1262 						} else if (sc.Match("if")) {
1263 							std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1264 							const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1265 							preproc.StartSection(ifGood);
1266 						} else if (sc.Match("else")) {
1267 							if (!preproc.CurrentIfTaken()) {
1268 								preproc.InvertCurrentLevel();
1269 								activitySet = preproc.IsInactive() ? activeFlag : 0;
1270 								if (!activitySet)
1271 									sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1272 							} else if (!preproc.IsInactive()) {
1273 								preproc.InvertCurrentLevel();
1274 								activitySet = preproc.IsInactive() ? activeFlag : 0;
1275 								if (!activitySet)
1276 									sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1277 							}
1278 						} else if (sc.Match("elif")) {
1279 							// Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1280 							if (!preproc.CurrentIfTaken()) {
1281 								// Similar to #if
1282 								std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 4, true);
1283 								const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1284 								if (ifGood) {
1285 									preproc.InvertCurrentLevel();
1286 									activitySet = preproc.IsInactive() ? activeFlag : 0;
1287 									if (!activitySet)
1288 										sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1289 								}
1290 							} else if (!preproc.IsInactive()) {
1291 								preproc.InvertCurrentLevel();
1292 								activitySet = preproc.IsInactive() ? activeFlag : 0;
1293 								if (!activitySet)
1294 									sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1295 							}
1296 						} else if (sc.Match("endif")) {
1297 							preproc.EndSection();
1298 							activitySet = preproc.IsInactive() ? activeFlag : 0;
1299 							sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1300 						} else if (sc.Match("define")) {
1301 							if (options.updatePreprocessor && !preproc.IsInactive()) {
1302 								std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1303 								size_t startName = 0;
1304 								while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1305 									startName++;
1306 								size_t endName = startName;
1307 								while ((endName < restOfLine.length()) && setWord.Contains(static_cast<unsigned char>(restOfLine[endName])))
1308 									endName++;
1309 								std::string key = restOfLine.substr(startName, endName-startName);
1310 								if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1311 									// Macro
1312 									size_t endArgs = endName;
1313 									while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1314 										endArgs++;
1315 									std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1316 									size_t startValue = endArgs+1;
1317 									while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1318 										startValue++;
1319 									std::string value;
1320 									if (startValue < restOfLine.length())
1321 										value = restOfLine.substr(startValue);
1322 									preprocessorDefinitions[key] = SymbolValue(value, args);
1323 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1324 									definitionsChanged = true;
1325 								} else {
1326 									// Value
1327 									size_t startValue = endName;
1328 									while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1329 										startValue++;
1330 									std::string value = restOfLine.substr(startValue);
1331 									if (OnlySpaceOrTab(value))
1332 										value = "1";	// No value defaults to 1
1333 									preprocessorDefinitions[key] = value;
1334 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1335 									definitionsChanged = true;
1336 								}
1337 							}
1338 						} else if (sc.Match("undef")) {
1339 							if (options.updatePreprocessor && !preproc.IsInactive()) {
1340 								const std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, false);
1341 								std::vector<std::string> tokens = Tokenize(restOfLine);
1342 								if (tokens.size() >= 1) {
1343 									const std::string key = tokens[0];
1344 									preprocessorDefinitions.erase(key);
1345 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1346 									definitionsChanged = true;
1347 								}
1348 							}
1349 						}
1350 					}
1351 				}
1352 			} else if (isoperator(sc.ch)) {
1353 				sc.SetState(SCE_C_OPERATOR|activitySet);
1354 			}
1355 		}
1356 
1357 		if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1358 			chPrevNonWhite = sc.ch;
1359 			visibleChars++;
1360 		}
1361 		continuationLine = false;
1362 		sc.Forward();
1363 	}
1364 	const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1365 	if (definitionsChanged || rawStringsChanged)
1366 		styler.ChangeLexerState(startPos, startPos + length);
1367 	sc.Complete();
1368 }
1369 
1370 // Store both the current line's fold level and the next lines in the
1371 // level store to make it easy to pick up with each increment
1372 // and to make it possible to fiddle the current level for "} else {".
1373 
Fold(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)1374 void SCI_METHOD LexerCPP::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1375 
1376 	if (!options.fold)
1377 		return;
1378 
1379 	LexAccessor styler(pAccess);
1380 
1381 	const Sci_PositionU endPos = startPos + length;
1382 	int visibleChars = 0;
1383 	bool inLineComment = false;
1384 	Sci_Position lineCurrent = styler.GetLine(startPos);
1385 	int levelCurrent = SC_FOLDLEVELBASE;
1386 	if (lineCurrent > 0)
1387 		levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1388 	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
1389 	int levelMinCurrent = levelCurrent;
1390 	int levelNext = levelCurrent;
1391 	char chNext = styler[startPos];
1392 	int styleNext = MaskActive(styler.StyleAt(startPos));
1393 	int style = MaskActive(initStyle);
1394 	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1395 	for (Sci_PositionU i = startPos; i < endPos; i++) {
1396 		const char ch = chNext;
1397 		chNext = styler.SafeGetCharAt(i + 1);
1398 		const int stylePrev = style;
1399 		style = styleNext;
1400 		styleNext = MaskActive(styler.StyleAt(i + 1));
1401 		const bool atEOL = i == (lineStartNext-1);
1402 		if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
1403 			inLineComment = true;
1404 		if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1405 			if (!IsStreamCommentStyle(stylePrev)) {
1406 				levelNext++;
1407 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1408 				// Comments don't end at end of line and the next character may be unstyled.
1409 				levelNext--;
1410 			}
1411 		}
1412 		if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
1413 			if (userDefinedFoldMarkers) {
1414 				if (styler.Match(i, options.foldExplicitStart.c_str())) {
1415 					levelNext++;
1416 				} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1417 					levelNext--;
1418 				}
1419 			} else {
1420 				if ((ch == '/') && (chNext == '/')) {
1421 					const char chNext2 = styler.SafeGetCharAt(i + 2);
1422 					if (chNext2 == '{') {
1423 						levelNext++;
1424 					} else if (chNext2 == '}') {
1425 						levelNext--;
1426 					}
1427 				}
1428 			}
1429 		}
1430 		if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1431 			if (ch == '#') {
1432 				Sci_PositionU j = i + 1;
1433 				while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1434 					j++;
1435 				}
1436 				if (styler.Match(j, "region") || styler.Match(j, "if")) {
1437 					levelNext++;
1438 				} else if (styler.Match(j, "end")) {
1439 					levelNext--;
1440 				}
1441 
1442 				if (options.foldPreprocessorAtElse && (styler.Match(j, "else") || styler.Match(j, "elif"))) {
1443 					levelMinCurrent--;
1444 				}
1445 			}
1446 		}
1447 		if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1448 			if (ch == '{' || ch == '[' || ch == '(') {
1449 				// Measure the minimum before a '{' to allow
1450 				// folding on "} else {"
1451 				if (options.foldAtElse && levelMinCurrent > levelNext) {
1452 					levelMinCurrent = levelNext;
1453 				}
1454 				levelNext++;
1455 			} else if (ch == '}' || ch == ']' || ch == ')') {
1456 				levelNext--;
1457 			}
1458 		}
1459 		if (!IsASpace(ch))
1460 			visibleChars++;
1461 		if (atEOL || (i == endPos-1)) {
1462 			int levelUse = levelCurrent;
1463 			if ((options.foldSyntaxBased && options.foldAtElse) ||
1464 				(options.foldPreprocessor && options.foldPreprocessorAtElse)
1465 			) {
1466 				levelUse = levelMinCurrent;
1467 			}
1468 			int lev = levelUse | levelNext << 16;
1469 			if (visibleChars == 0 && options.foldCompact)
1470 				lev |= SC_FOLDLEVELWHITEFLAG;
1471 			if (levelUse < levelNext)
1472 				lev |= SC_FOLDLEVELHEADERFLAG;
1473 			if (lev != styler.LevelAt(lineCurrent)) {
1474 				styler.SetLevel(lineCurrent, lev);
1475 			}
1476 			lineCurrent++;
1477 			lineStartNext = styler.LineStart(lineCurrent+1);
1478 			levelCurrent = levelNext;
1479 			levelMinCurrent = levelCurrent;
1480 			if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
1481 				// There is an empty line at end of file so give it same level and empty
1482 				styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1483 			}
1484 			visibleChars = 0;
1485 			inLineComment = false;
1486 		}
1487 	}
1488 }
1489 
EvaluateTokens(std::vector<std::string> & tokens,const SymbolTable & preprocessorDefinitions)1490 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1491 
1492 	// Remove whitespace tokens
1493 	tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1494 
1495 	// Evaluate defined statements to either 0 or 1
1496 	for (size_t i=0; (i+1)<tokens.size();) {
1497 		if (tokens[i] == "defined") {
1498 			const char *val = "0";
1499 			if (tokens[i+1] == "(") {
1500 				if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1501 					// defined()
1502 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1503 				} else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1504 					// defined(<identifier>)
1505 					SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1506 					if (it != preprocessorDefinitions.end()) {
1507 						val = "1";
1508 					}
1509 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1510 				} else {
1511 					// Spurious '(' so erase as more likely to result in false
1512 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1513 				}
1514 			} else {
1515 				// defined <identifier>
1516 				SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1517 				if (it != preprocessorDefinitions.end()) {
1518 					val = "1";
1519 				}
1520 				tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1521 			}
1522 			tokens[i] = val;
1523 		} else {
1524 			i++;
1525 		}
1526 	}
1527 
1528 	// Evaluate identifiers
1529 	const size_t maxIterations = 100;
1530 	size_t iterations = 0;	// Limit number of iterations in case there is a recursive macro.
1531 	for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1532 		iterations++;
1533 		if (setWordStart.Contains(static_cast<unsigned char>(tokens[i][0]))) {
1534 			SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1535 			if (it != preprocessorDefinitions.end()) {
1536 				// Tokenize value
1537 				std::vector<std::string> macroTokens = Tokenize(it->second.value);
1538 				if (it->second.IsMacro()) {
1539 					if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1540 						// Create map of argument name to value
1541 						std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1542 						std::map<std::string, std::string> arguments;
1543 						size_t arg = 0;
1544 						size_t tok = i+2;
1545 						while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1546 							if (tokens.at(tok) != ",") {
1547 								arguments[argumentNames.at(arg)] = tokens.at(tok);
1548 								arg++;
1549 							}
1550 							tok++;
1551 						}
1552 
1553 						// Remove invocation
1554 						tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1555 
1556 						// Substitute values into macro
1557 						macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1558 
1559 						for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1560 							if (setWordStart.Contains(static_cast<unsigned char>(macroTokens[iMacro][0]))) {
1561 								std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1562 								if (itFind != arguments.end()) {
1563 									// TODO: Possible that value will be expression so should insert tokenized form
1564 									macroTokens[iMacro] = itFind->second;
1565 								}
1566 							}
1567 							iMacro++;
1568 						}
1569 
1570 						// Insert results back into tokens
1571 						tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1572 
1573 					} else {
1574 						i++;
1575 					}
1576 				} else {
1577 					// Remove invocation
1578 					tokens.erase(tokens.begin() + i);
1579 					// Insert results back into tokens
1580 					tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1581 				}
1582 			} else {
1583 				// Identifier not found and value defaults to zero
1584 				tokens[i] = "0";
1585 			}
1586 		} else {
1587 			i++;
1588 		}
1589 	}
1590 
1591 	// Find bracketed subexpressions and recurse on them
1592 	BracketPair bracketPair = FindBracketPair(tokens);
1593 	while (bracketPair.itBracket != tokens.end()) {
1594 		std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1595 		EvaluateTokens(inBracket, preprocessorDefinitions);
1596 
1597 		// The insertion is done before the removal because there were failures with the opposite approach
1598 		tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1599 
1600 		bracketPair = FindBracketPair(tokens);
1601 		tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1602 
1603 		bracketPair = FindBracketPair(tokens);
1604 	}
1605 
1606 	// Evaluate logical negations
1607 	for (size_t j=0; (j+1)<tokens.size();) {
1608 		if (setNegationOp.Contains(tokens[j][0])) {
1609 			int isTrue = atoi(tokens[j+1].c_str());
1610 			if (tokens[j] == "!")
1611 				isTrue = !isTrue;
1612 			std::vector<std::string>::iterator itInsert =
1613 				tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1614 			tokens.insert(itInsert, isTrue ? "1" : "0");
1615 		} else {
1616 			j++;
1617 		}
1618 	}
1619 
1620 	// Evaluate expressions in precedence order
1621 	enum precedence { precArithmetic, precRelative, precLogical };
1622 	for (int prec=precArithmetic; prec <= precLogical; prec++) {
1623 		// Looking at 3 tokens at a time so end at 2 before end
1624 		for (size_t k=0; (k+2)<tokens.size();) {
1625 			const char chOp = tokens[k+1][0];
1626 			if (
1627 				((prec==precArithmetic) && setArithmethicOp.Contains(chOp)) ||
1628 				((prec==precRelative) && setRelOp.Contains(chOp)) ||
1629 				((prec==precLogical) && setLogicalOp.Contains(chOp))
1630 				) {
1631 				const int valA = atoi(tokens[k].c_str());
1632 				const int valB = atoi(tokens[k+2].c_str());
1633 				int result = 0;
1634 				if (tokens[k+1] == "+")
1635 					result = valA + valB;
1636 				else if (tokens[k+1] == "-")
1637 					result = valA - valB;
1638 				else if (tokens[k+1] == "*")
1639 					result = valA * valB;
1640 				else if (tokens[k+1] == "/")
1641 					result = valA / (valB ? valB : 1);
1642 				else if (tokens[k+1] == "%")
1643 					result = valA % (valB ? valB : 1);
1644 				else if (tokens[k+1] == "<")
1645 					result = valA < valB;
1646 				else if (tokens[k+1] == "<=")
1647 					result = valA <= valB;
1648 				else if (tokens[k+1] == ">")
1649 					result = valA > valB;
1650 				else if (tokens[k+1] == ">=")
1651 					result = valA >= valB;
1652 				else if (tokens[k+1] == "==")
1653 					result = valA == valB;
1654 				else if (tokens[k+1] == "!=")
1655 					result = valA != valB;
1656 				else if (tokens[k+1] == "||")
1657 					result = valA || valB;
1658 				else if (tokens[k+1] == "&&")
1659 					result = valA && valB;
1660 				char sResult[30];
1661 				sprintf(sResult, "%d", result);
1662 				std::vector<std::string>::iterator itInsert =
1663 					tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1664 				tokens.insert(itInsert, sResult);
1665 			} else {
1666 				k++;
1667 			}
1668 		}
1669 	}
1670 }
1671 
Tokenize(const std::string & expr) const1672 std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
1673 	// Break into tokens
1674 	std::vector<std::string> tokens;
1675 	const char *cp = expr.c_str();
1676 	while (*cp) {
1677 		std::string word;
1678 		if (setWord.Contains(static_cast<unsigned char>(*cp))) {
1679 			// Identifiers and numbers
1680 			while (setWord.Contains(static_cast<unsigned char>(*cp))) {
1681 				word += *cp;
1682 				cp++;
1683 			}
1684 		} else if (IsSpaceOrTab(*cp)) {
1685 			while (IsSpaceOrTab(*cp)) {
1686 				word += *cp;
1687 				cp++;
1688 			}
1689 		} else if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1690 			word += *cp;
1691 			cp++;
1692 			if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1693 				word += *cp;
1694 				cp++;
1695 			}
1696 		} else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1697 			word += *cp;
1698 			cp++;
1699 			if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1700 				word += *cp;
1701 				cp++;
1702 			}
1703 		} else {
1704 			// Should handle strings, characters, and comments here
1705 			word += *cp;
1706 			cp++;
1707 		}
1708 		tokens.push_back(word);
1709 	}
1710 	return tokens;
1711 }
1712 
EvaluateExpression(const std::string & expr,const SymbolTable & preprocessorDefinitions)1713 bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1714 	std::vector<std::string> tokens = Tokenize(expr);
1715 
1716 	EvaluateTokens(tokens, preprocessorDefinitions);
1717 
1718 	// "0" or "" -> false else true
1719 	const bool isFalse = tokens.empty() ||
1720 		((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1721 	return !isFalse;
1722 }
1723 
1724 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1725 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);
1726