1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3  ** Lexer for C++, C, Java, and JavaScript.
4  ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5  **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <cstdlib>
10 #include <cassert>
11 #include <cstring>
12 
13 #include <utility>
14 #include <string>
15 #include <vector>
16 #include <map>
17 #include <algorithm>
18 #include <iterator>
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23 
24 #include "StringCopy.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
33 #include "SubStyles.h"
34 
35 using namespace Scintilla;
36 
37 namespace {
38 	// Use an unnamed namespace to protect the functions and classes from name conflicts
39 
IsSpaceEquiv(int state)40 constexpr bool IsSpaceEquiv(int state) noexcept {
41 	return (state <= SCE_C_COMMENTDOC) ||
42 		// including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
43 		(state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
44 		(state == SCE_C_COMMENTDOCKEYWORDERROR);
45 }
46 
47 // Preconditions: sc.currentPos points to a character after '+' or '-'.
48 // The test for pos reaching 0 should be redundant,
49 // and is in only for safety measures.
50 // Limitation: this code will give the incorrect answer for code like
51 // a = b+++/ptn/...
52 // Putting a space between the '++' post-inc operator and the '+' binary op
53 // fixes this, and is highly recommended for readability anyway.
FollowsPostfixOperator(const StyleContext & sc,LexAccessor & styler)54 bool FollowsPostfixOperator(const StyleContext &sc, LexAccessor &styler) {
55 	Sci_Position pos = sc.currentPos;
56 	while (--pos > 0) {
57 		const char ch = styler[pos];
58 		if (ch == '+' || ch == '-') {
59 			return styler[pos - 1] == ch;
60 		}
61 	}
62 	return false;
63 }
64 
followsReturnKeyword(const StyleContext & sc,LexAccessor & styler)65 bool followsReturnKeyword(const StyleContext &sc, LexAccessor &styler) {
66 	// Don't look at styles, so no need to flush.
67 	Sci_Position pos = sc.currentPos;
68 	const Sci_Position currentLine = styler.GetLine(pos);
69 	const Sci_Position lineStartPos = styler.LineStart(currentLine);
70 	while (--pos > lineStartPos) {
71 		const char ch = styler.SafeGetCharAt(pos);
72 		if (ch != ' ' && ch != '\t') {
73 			break;
74 		}
75 	}
76 	const char *retBack = "nruter";
77 	const char *s = retBack;
78 	while (*s
79 		&& pos >= lineStartPos
80 		&& styler.SafeGetCharAt(pos) == *s) {
81 		s++;
82 		pos--;
83 	}
84 	return !*s;
85 }
86 
IsSpaceOrTab(int ch)87 constexpr bool IsSpaceOrTab(int ch) noexcept {
88 	return ch == ' ' || ch == '\t';
89 }
90 
OnlySpaceOrTab(const std::string & s)91 bool OnlySpaceOrTab(const std::string &s) noexcept {
92 	for (const char ch : s) {
93 		if (!IsSpaceOrTab(ch))
94 			return false;
95 	}
96 	return true;
97 }
98 
StringSplit(const std::string & text,int separator)99 std::vector<std::string> StringSplit(const std::string &text, int separator) {
100 	std::vector<std::string> vs(text.empty() ? 0 : 1);
101 	for (const char ch : text) {
102 		if (ch == separator) {
103 			vs.emplace_back();
104 		} else {
105 			vs.back() += ch;
106 		}
107 	}
108 	return vs;
109 }
110 
111 struct BracketPair {
112 	std::vector<std::string>::iterator itBracket;
113 	std::vector<std::string>::iterator itEndBracket;
114 };
115 
FindBracketPair(std::vector<std::string> & tokens)116 BracketPair FindBracketPair(std::vector<std::string> &tokens) {
117 	BracketPair bp;
118 	std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
119 	bp.itBracket = tokens.end();
120 	bp.itEndBracket = tokens.end();
121 	if (itTok != tokens.end()) {
122 		bp.itBracket = itTok;
123 		size_t nest = 0;
124 		while (itTok != tokens.end()) {
125 			if (*itTok == "(") {
126 				nest++;
127 			} else if (*itTok == ")") {
128 				nest--;
129 				if (nest == 0) {
130 					bp.itEndBracket = itTok;
131 					return bp;
132 				}
133 			}
134 			++itTok;
135 		}
136 	}
137 	bp.itBracket = tokens.end();
138 	return bp;
139 }
140 
highlightTaskMarker(StyleContext & sc,LexAccessor & styler,int activity,const WordList & markerList,bool caseSensitive)141 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
142 		int activity, const WordList &markerList, bool caseSensitive){
143 	if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
144 		constexpr Sci_PositionU lengthMarker = 50;
145 		char marker[lengthMarker+1] = "";
146 		const Sci_PositionU currPos = sc.currentPos;
147 		Sci_PositionU i = 0;
148 		while (i < lengthMarker) {
149 			const char ch = styler.SafeGetCharAt(currPos + i);
150 			if (IsASpace(ch) || isoperator(ch)) {
151 				break;
152 			}
153 			if (caseSensitive)
154 				marker[i] = ch;
155 			else
156 				marker[i] = MakeLowerCase(ch);
157 			i++;
158 		}
159 		marker[i] = '\0';
160 		if (markerList.InList(marker)) {
161 			sc.SetState(SCE_C_TASKMARKER|activity);
162 		}
163 	}
164 }
165 
166 class EscapeSequence {
167 	const CharacterSet setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
168 	const CharacterSet setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
169 	const CharacterSet setNoneNumeric;
170 	const CharacterSet *escapeSetValid = nullptr;
171 	int digitsLeft = 0;
172 public:
173 	EscapeSequence() = default;
resetEscapeState(int nextChar)174 	void resetEscapeState(int nextChar) {
175 		digitsLeft = 0;
176 		escapeSetValid = &setNoneNumeric;
177 		if (nextChar == 'U') {
178 			digitsLeft = 9;
179 			escapeSetValid = &setHexDigits;
180 		} else if (nextChar == 'u') {
181 			digitsLeft = 5;
182 			escapeSetValid = &setHexDigits;
183 		} else if (nextChar == 'x') {
184 			digitsLeft = 5;
185 			escapeSetValid = &setHexDigits;
186 		} else if (setOctDigits.Contains(nextChar)) {
187 			digitsLeft = 3;
188 			escapeSetValid = &setOctDigits;
189 		}
190 	}
atEscapeEnd(int currChar) const191 	bool atEscapeEnd(int currChar) const {
192 		return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
193 	}
consumeDigit()194 	void consumeDigit() noexcept {
195 		digitsLeft--;
196 	}
197 };
198 
GetRestOfLine(LexAccessor & styler,Sci_Position start,bool allowSpace)199 std::string GetRestOfLine(LexAccessor &styler, Sci_Position start, bool allowSpace) {
200 	std::string restOfLine;
201 	Sci_Position line = styler.GetLine(start);
202 	Sci_Position pos = start;
203 	Sci_Position endLine = styler.LineEnd(line);
204 	char ch = styler.SafeGetCharAt(start, '\n');
205 	while (pos < endLine) {
206 		if (ch == '\\' && ((pos + 1) == endLine)) {
207 			// Continuation line
208 			line++;
209 			pos = styler.LineStart(line);
210 			endLine = styler.LineEnd(line);
211 			ch = styler.SafeGetCharAt(pos, '\n');
212 		} else {
213 			const char chNext = styler.SafeGetCharAt(pos + 1, '\n');
214 			if (ch == '/' && (chNext == '/' || chNext == '*'))
215 				break;
216 			if (allowSpace || (ch != ' ')) {
217 				restOfLine += ch;
218 			}
219 			pos++;
220 			ch = chNext;
221 		}
222 	}
223 	return restOfLine;
224 }
225 
IsStreamCommentStyle(int style)226 constexpr bool IsStreamCommentStyle(int style) noexcept {
227 	return style == SCE_C_COMMENT ||
228 		style == SCE_C_COMMENTDOC ||
229 		style == SCE_C_COMMENTDOCKEYWORD ||
230 		style == SCE_C_COMMENTDOCKEYWORDERROR;
231 }
232 
233 struct PPDefinition {
234 	Sci_Position line;
235 	std::string key;
236 	std::string value;
237 	bool isUndef;
238 	std::string arguments;
PPDefinition__anon4c384e1b0111::PPDefinition239 	PPDefinition(Sci_Position line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, const std::string &arguments_="") :
240 		line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
241 	}
242 };
243 
244 constexpr int inactiveFlag = 0x40;
245 
246 class LinePPState {
247 	// Track the state of preprocessor conditionals to allow showing active and inactive
248 	// code in different styles.
249 	// Only works up to 31 levels of conditional nesting.
250 
251 	// state is a bit mask with 1 bit per level
252 	// bit is 1 for level if section inactive, so any bits set = inactive style
253 	int state = 0;
254 	// ifTaken is a bit mask with 1 bit per level
255 	// bit is 1 for level if some branch at this level has been taken
256 	int ifTaken = 0;
257 	// level is the nesting level of #if constructs
258 	int level = -1;
259 	static const int maximumNestingLevel = 31;
ValidLevel() const260 	bool ValidLevel() const noexcept {
261 		return level >= 0 && level < maximumNestingLevel;
262 	}
maskLevel() const263 	int maskLevel() const noexcept {
264 		if (level >= 0) {
265 			return 1 << level;
266 		} else {
267 			return 1;
268 		}
269 	}
270 public:
LinePPState()271 	LinePPState() noexcept {
272 	}
IsActive() const273 	bool IsActive() const noexcept {
274 		return state == 0;
275 	}
IsInactive() const276 	bool IsInactive() const noexcept {
277 		return state != 0;
278 	}
ActiveState() const279 	int ActiveState() const noexcept {
280 		return state ? inactiveFlag : 0;
281 	}
CurrentIfTaken() const282 	bool CurrentIfTaken() const noexcept {
283 		return (ifTaken & maskLevel()) != 0;
284 	}
StartSection(bool on)285 	void StartSection(bool on) noexcept {
286 		level++;
287 		if (ValidLevel()) {
288 			if (on) {
289 				state &= ~maskLevel();
290 				ifTaken |= maskLevel();
291 			} else {
292 				state |= maskLevel();
293 				ifTaken &= ~maskLevel();
294 			}
295 		}
296 	}
EndSection()297 	void EndSection() noexcept {
298 		if (ValidLevel()) {
299 			state &= ~maskLevel();
300 			ifTaken &= ~maskLevel();
301 		}
302 		level--;
303 	}
InvertCurrentLevel()304 	void InvertCurrentLevel() noexcept {
305 		if (ValidLevel()) {
306 			state ^= maskLevel();
307 			ifTaken |= maskLevel();
308 		}
309 	}
310 };
311 
312 // Hold the preprocessor state for each line seen.
313 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
314 class PPStates {
315 	std::vector<LinePPState> vlls;
316 public:
ForLine(Sci_Position line) const317 	LinePPState ForLine(Sci_Position line) const noexcept {
318 		if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
319 			return vlls[line];
320 		} else {
321 			return LinePPState();
322 		}
323 	}
Add(Sci_Position line,LinePPState lls)324 	void Add(Sci_Position line, LinePPState lls) {
325 		vlls.resize(line+1);
326 		vlls[line] = lls;
327 	}
328 };
329 
330 // An individual named option for use in an OptionSet
331 
332 // Options used for LexerCPP
333 struct OptionsCPP {
334 	bool stylingWithinPreprocessor;
335 	bool identifiersAllowDollars;
336 	bool trackPreprocessor;
337 	bool updatePreprocessor;
338 	bool verbatimStringsAllowEscapes;
339 	bool triplequotedStrings;
340 	bool hashquotedStrings;
341 	bool backQuotedStrings;
342 	bool escapeSequence;
343 	bool fold;
344 	bool foldSyntaxBased;
345 	bool foldComment;
346 	bool foldCommentMultiline;
347 	bool foldCommentExplicit;
348 	std::string foldExplicitStart;
349 	std::string foldExplicitEnd;
350 	bool foldExplicitAnywhere;
351 	bool foldPreprocessor;
352 	bool foldPreprocessorAtElse;
353 	bool foldCompact;
354 	bool foldAtElse;
OptionsCPP__anon4c384e1b0111::OptionsCPP355 	OptionsCPP() {
356 		stylingWithinPreprocessor = false;
357 		identifiersAllowDollars = true;
358 		trackPreprocessor = true;
359 		updatePreprocessor = true;
360 		verbatimStringsAllowEscapes = false;
361 		triplequotedStrings = false;
362 		hashquotedStrings = false;
363 		backQuotedStrings = false;
364 		escapeSequence = false;
365 		fold = false;
366 		foldSyntaxBased = true;
367 		foldComment = false;
368 		foldCommentMultiline = true;
369 		foldCommentExplicit = true;
370 		foldExplicitStart = "";
371 		foldExplicitEnd = "";
372 		foldExplicitAnywhere = false;
373 		foldPreprocessor = false;
374 		foldPreprocessorAtElse = false;
375 		foldCompact = false;
376 		foldAtElse = false;
377 	}
378 };
379 
380 const char *const cppWordLists[] = {
381             "Primary keywords and identifiers",
382             "Secondary keywords and identifiers",
383             "Documentation comment keywords",
384             "Global classes and typedefs",
385             "Preprocessor definitions",
386             "Task marker and error marker keywords",
387             nullptr,
388 };
389 
390 struct OptionSetCPP : public OptionSet<OptionsCPP> {
OptionSetCPP__anon4c384e1b0111::OptionSetCPP391 	OptionSetCPP() {
392 		DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
393 			"For C++ code, determines whether all preprocessor code is styled in the "
394 			"preprocessor style (0, the default) or only from the initial # to the end "
395 			"of the command word(1).");
396 
397 		DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
398 			"Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
399 
400 		DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
401 			"Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
402 
403 		DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
404 			"Set to 1 to update preprocessor definitions when #define found.");
405 
406 		DefineProperty("lexer.cpp.verbatim.strings.allow.escapes", &OptionsCPP::verbatimStringsAllowEscapes,
407 			"Set to 1 to allow verbatim strings to contain escape sequences.");
408 
409 		DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
410 			"Set to 1 to enable highlighting of triple-quoted strings.");
411 
412 		DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
413 			"Set to 1 to enable highlighting of hash-quoted strings.");
414 
415 		DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
416 			"Set to 1 to enable highlighting of back-quoted raw strings .");
417 
418 		DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
419 			"Set to 1 to enable highlighting of escape sequences in strings");
420 
421 		DefineProperty("fold", &OptionsCPP::fold);
422 
423 		DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
424 			"Set this property to 0 to disable syntax based folding.");
425 
426 		DefineProperty("fold.comment", &OptionsCPP::foldComment,
427 			"This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
428 			"Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
429 			"at the end of a section that should fold.");
430 
431 		DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
432 			"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
433 
434 		DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
435 			"Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
436 
437 		DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
438 			"The string to use for explicit fold start points, replacing the standard //{.");
439 
440 		DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
441 			"The string to use for explicit fold end points, replacing the standard //}.");
442 
443 		DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
444 			"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
445 
446 		DefineProperty("fold.cpp.preprocessor.at.else", &OptionsCPP::foldPreprocessorAtElse,
447 			"This option enables folding on a preprocessor #else or #endif line of an #if statement.");
448 
449 		DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
450 			"This option enables folding preprocessor directives when using the C++ lexer. "
451 			"Includes C#'s explicit #region and #endregion folding directives.");
452 
453 		DefineProperty("fold.compact", &OptionsCPP::foldCompact);
454 
455 		DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
456 			"This option enables C++ folding on a \"} else {\" line of an if statement.");
457 
458 		DefineWordListSets(cppWordLists);
459 	}
460 };
461 
462 const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
463 
464 LexicalClass lexicalClasses[] = {
465 	// Lexer Cpp SCLEX_CPP SCE_C_:
466 	0, "SCE_C_DEFAULT", "default", "White space",
467 	1, "SCE_C_COMMENT", "comment", "Comment: /* */.",
468 	2, "SCE_C_COMMENTLINE", "comment line", "Line Comment: //.",
469 	3, "SCE_C_COMMENTDOC", "comment documentation", "Doc comment: block comments beginning with /** or /*!",
470 	4, "SCE_C_NUMBER", "literal numeric", "Number",
471 	5, "SCE_C_WORD", "keyword", "Keyword",
472 	6, "SCE_C_STRING", "literal string", "Double quoted string",
473 	7, "SCE_C_CHARACTER", "literal string character", "Single quoted string",
474 	8, "SCE_C_UUID", "literal uuid", "UUIDs (only in IDL)",
475 	9, "SCE_C_PREPROCESSOR", "preprocessor", "Preprocessor",
476 	10, "SCE_C_OPERATOR", "operator", "Operators",
477 	11, "SCE_C_IDENTIFIER", "identifier", "Identifiers",
478 	12, "SCE_C_STRINGEOL", "error literal string", "End of line where string is not closed",
479 	13, "SCE_C_VERBATIM", "literal string multiline raw", "Verbatim strings for C#",
480 	14, "SCE_C_REGEX", "literal regex", "Regular expressions for JavaScript",
481 	15, "SCE_C_COMMENTLINEDOC", "comment documentation line", "Doc Comment Line: line comments beginning with /// or //!.",
482 	16, "SCE_C_WORD2", "identifier", "Keywords2",
483 	17, "SCE_C_COMMENTDOCKEYWORD", "comment documentation keyword", "Comment keyword",
484 	18, "SCE_C_COMMENTDOCKEYWORDERROR", "error comment documentation keyword", "Comment keyword error",
485 	19, "SCE_C_GLOBALCLASS", "identifier", "Global class",
486 	20, "SCE_C_STRINGRAW", "literal string multiline raw", "Raw strings for C++0x",
487 	21, "SCE_C_TRIPLEVERBATIM", "literal string multiline raw", "Triple-quoted strings for Vala",
488 	22, "SCE_C_HASHQUOTEDSTRING", "literal string", "Hash-quoted strings for Pike",
489 	23, "SCE_C_PREPROCESSORCOMMENT", "comment preprocessor", "Preprocessor stream comment",
490 	24, "SCE_C_PREPROCESSORCOMMENTDOC", "comment preprocessor documentation", "Preprocessor stream doc comment",
491 	25, "SCE_C_USERLITERAL", "literal", "User defined literals",
492 	26, "SCE_C_TASKMARKER", "comment taskmarker", "Task Marker",
493 	27, "SCE_C_ESCAPESEQUENCE", "literal string escapesequence", "Escape sequence",
494 };
495 
496 const int sizeLexicalClasses = static_cast<int>(std::size(lexicalClasses));
497 
498 }
499 
500 class LexerCPP : public ILexer5 {
501 	bool caseSensitive;
502 	CharacterSet setWord;
503 	CharacterSet setNegationOp;
504 	CharacterSet setAddOp;
505 	CharacterSet setMultOp;
506 	CharacterSet setRelOp;
507 	CharacterSet setLogicalOp;
508 	CharacterSet setWordStart;
509 	PPStates vlls;
510 	std::vector<PPDefinition> ppDefineHistory;
511 	WordList keywords;
512 	WordList keywords2;
513 	WordList keywords3;
514 	WordList keywords4;
515 	WordList ppDefinitions;
516 	WordList markerList;
517 	struct SymbolValue {
518 		std::string value;
519 		std::string arguments;
520 		SymbolValue() noexcept = default;
SymbolValueLexerCPP::SymbolValue521 		SymbolValue(const std::string &value_, const std::string &arguments_) : value(value_), arguments(arguments_) {
522 		}
operator =LexerCPP::SymbolValue523 		SymbolValue &operator = (const std::string &value_) {
524 			value = value_;
525 			arguments.clear();
526 			return *this;
527 		}
IsMacroLexerCPP::SymbolValue528 		bool IsMacro() const noexcept {
529 			return !arguments.empty();
530 		}
531 	};
532 	typedef std::map<std::string, SymbolValue> SymbolTable;
533 	SymbolTable preprocessorDefinitionsStart;
534 	OptionsCPP options;
535 	OptionSetCPP osCPP;
536 	EscapeSequence escapeSeq;
537 	SparseState<std::string> rawStringTerminators;
538 	enum { ssIdentifier, ssDocKeyword };
539 	SubStyles subStyles;
540 	std::string returnBuffer;
541 public:
LexerCPP(bool caseSensitive_)542 	explicit LexerCPP(bool caseSensitive_) :
543 		caseSensitive(caseSensitive_),
544 		setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
545 		setNegationOp(CharacterSet::setNone, "!"),
546 		setAddOp(CharacterSet::setNone, "+-"),
547 		setMultOp(CharacterSet::setNone, "*/%"),
548 		setRelOp(CharacterSet::setNone, "=!<>"),
549 		setLogicalOp(CharacterSet::setNone, "|&"),
550 		subStyles(styleSubable, 0x80, 0x40, inactiveFlag) {
551 	}
552 	// Deleted so LexerCPP objects can not be copied.
553 	LexerCPP(const LexerCPP &) = delete;
554 	LexerCPP(LexerCPP &&) = delete;
555 	void operator=(const LexerCPP &) = delete;
556 	void operator=(LexerCPP &&) = delete;
~LexerCPP()557 	virtual ~LexerCPP() {
558 	}
Release()559 	void SCI_METHOD Release() noexcept override {
560 		delete this;
561 	}
Version() const562 	int SCI_METHOD Version() const noexcept override {
563 		return lvRelease5;
564 	}
PropertyNames()565 	const char * SCI_METHOD PropertyNames() override {
566 		return osCPP.PropertyNames();
567 	}
PropertyType(const char * name)568 	int SCI_METHOD PropertyType(const char *name) override {
569 		return osCPP.PropertyType(name);
570 	}
DescribeProperty(const char * name)571 	const char * SCI_METHOD DescribeProperty(const char *name) override {
572 		return osCPP.DescribeProperty(name);
573 	}
574 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()575 	const char * SCI_METHOD DescribeWordListSets() override {
576 		return osCPP.DescribeWordListSets();
577 	}
578 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
579 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
580 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
581 
PrivateCall(int,void *)582 	void * SCI_METHOD PrivateCall(int, void *) noexcept override {
583 		return nullptr;
584 	}
585 
LineEndTypesSupported()586 	int SCI_METHOD LineEndTypesSupported() noexcept override {
587 		return SC_LINE_END_TYPE_UNICODE;
588 	}
589 
AllocateSubStyles(int styleBase,int numberStyles)590 	int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
591 		return subStyles.Allocate(styleBase, numberStyles);
592 	}
SubStylesStart(int styleBase)593 	int SCI_METHOD SubStylesStart(int styleBase) override {
594 		return subStyles.Start(styleBase);
595 	}
SubStylesLength(int styleBase)596 	int SCI_METHOD SubStylesLength(int styleBase) override {
597 		return subStyles.Length(styleBase);
598 	}
StyleFromSubStyle(int subStyle)599 	int SCI_METHOD StyleFromSubStyle(int subStyle) override {
600 		const int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
601 		const int inactive = subStyle & inactiveFlag;
602 		return styleBase | inactive;
603 	}
PrimaryStyleFromStyle(int style)604 	int SCI_METHOD PrimaryStyleFromStyle(int style) noexcept override {
605 		return MaskActive(style);
606 	}
FreeSubStyles()607 	void SCI_METHOD FreeSubStyles() override {
608 		subStyles.Free();
609 	}
SetIdentifiers(int style,const char * identifiers)610 	void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
611 		subStyles.SetIdentifiers(style, identifiers);
612 	}
DistanceToSecondaryStyles()613 	int SCI_METHOD DistanceToSecondaryStyles() noexcept override {
614 		return inactiveFlag;
615 	}
GetSubStyleBases()616 	const char * SCI_METHOD GetSubStyleBases() noexcept override {
617 		return styleSubable;
618 	}
NamedStyles()619 	int SCI_METHOD NamedStyles() override {
620 		return std::max(subStyles.LastAllocated() + 1,
621 			sizeLexicalClasses) +
622 			inactiveFlag;
623 	}
NameOfStyle(int style)624 	const char * SCI_METHOD NameOfStyle(int style) override {
625 		if (style >= NamedStyles())
626 			return "";
627 		if (style < sizeLexicalClasses)
628 			return lexicalClasses[style].name;
629 		// TODO: inactive and substyles
630 		return "";
631 	}
TagsOfStyle(int style)632 	const char * SCI_METHOD TagsOfStyle(int style) override {
633 		if (style >= NamedStyles())
634 			return "Excess";
635 		returnBuffer.clear();
636 		const int firstSubStyle = subStyles.FirstAllocated();
637 		if (firstSubStyle >= 0) {
638 			const int lastSubStyle = subStyles.LastAllocated();
639 			if (((style >= firstSubStyle) && (style <= (lastSubStyle))) ||
640 				((style >= firstSubStyle + inactiveFlag) && (style <= (lastSubStyle + inactiveFlag)))) {
641 				int styleActive = style;
642 				if (style > lastSubStyle) {
643 					returnBuffer = "inactive ";
644 					styleActive -= inactiveFlag;
645 				}
646 				const int styleMain = StyleFromSubStyle(styleActive);
647 				returnBuffer += lexicalClasses[styleMain].tags;
648 				return returnBuffer.c_str();
649 			}
650 		}
651 		if (style < sizeLexicalClasses)
652 			return lexicalClasses[style].tags;
653 		if (style >= inactiveFlag) {
654 			returnBuffer = "inactive ";
655 			const int styleActive = style - inactiveFlag;
656 			if (styleActive < sizeLexicalClasses)
657 				returnBuffer += lexicalClasses[styleActive].tags;
658 			else
659 				returnBuffer = "";
660 			return returnBuffer.c_str();
661 		}
662 		return "";
663 	}
DescriptionOfStyle(int style)664 	const char * SCI_METHOD DescriptionOfStyle(int style) override {
665 		if (style >= NamedStyles())
666 			return "";
667 		if (style < sizeLexicalClasses)
668 			return lexicalClasses[style].description;
669 		// TODO: inactive and substyles
670 		return "";
671 	}
672 
673 	// ILexer5 methods
GetName()674 	const char * SCI_METHOD GetName() override {
675 		return caseSensitive ? "cpp" : "cppnocase";
676 	}
GetIdentifier()677 	int SCI_METHOD  GetIdentifier() override {
678 		return caseSensitive ? SCLEX_CPP : SCLEX_CPPNOCASE;
679 	}
680 	const char * SCI_METHOD PropertyGet(const char *key) override;
681 
LexerFactoryCPP()682 	static ILexer5 *LexerFactoryCPP() {
683 		return new LexerCPP(true);
684 	}
LexerFactoryCPPInsensitive()685 	static ILexer5 *LexerFactoryCPPInsensitive() {
686 		return new LexerCPP(false);
687 	}
MaskActive(int style)688 	constexpr static int MaskActive(int style) noexcept {
689 		return style & ~inactiveFlag;
690 	}
691 	void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
692 	std::vector<std::string> Tokenize(const std::string &expr) const;
693 	bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
694 };
695 
PropertySet(const char * key,const char * val)696 Sci_Position SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
697 	if (osCPP.PropertySet(&options, key, val)) {
698 		if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
699 			setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
700 			if (options.identifiersAllowDollars) {
701 				setWord.Add('$');
702 			}
703 		}
704 		return 0;
705 	}
706 	return -1;
707 }
708 
PropertyGet(const char * key)709 const char * SCI_METHOD LexerCPP::PropertyGet(const char *key) {
710 	return osCPP.PropertyGet(key);
711 }
712 
WordListSet(int n,const char * wl)713 Sci_Position SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
714 	WordList *wordListN = nullptr;
715 	switch (n) {
716 	case 0:
717 		wordListN = &keywords;
718 		break;
719 	case 1:
720 		wordListN = &keywords2;
721 		break;
722 	case 2:
723 		wordListN = &keywords3;
724 		break;
725 	case 3:
726 		wordListN = &keywords4;
727 		break;
728 	case 4:
729 		wordListN = &ppDefinitions;
730 		break;
731 	case 5:
732 		wordListN = &markerList;
733 		break;
734 	}
735 	Sci_Position firstModification = -1;
736 	if (wordListN) {
737 		WordList wlNew;
738 		wlNew.Set(wl);
739 		if (*wordListN != wlNew) {
740 			wordListN->Set(wl);
741 			firstModification = 0;
742 			if (n == 4) {
743 				// Rebuild preprocessorDefinitions
744 				preprocessorDefinitionsStart.clear();
745 				for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
746 					const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
747 					const char *cpEquals = strchr(cpDefinition, '=');
748 					if (cpEquals) {
749 						std::string name(cpDefinition, cpEquals - cpDefinition);
750 						std::string val(cpEquals+1);
751 						const size_t bracket = name.find('(');
752 						const size_t bracketEnd = name.find(')');
753 						if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
754 							// Macro
755 							std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
756 							name = name.substr(0, bracket);
757 							preprocessorDefinitionsStart[name] = SymbolValue(val, args);
758 						} else {
759 							preprocessorDefinitionsStart[name] = val;
760 						}
761 					} else {
762 						std::string name(cpDefinition);
763 						std::string val("1");
764 						preprocessorDefinitionsStart[name] = val;
765 					}
766 				}
767 			}
768 		}
769 	}
770 	return firstModification;
771 }
772 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)773 void SCI_METHOD LexerCPP::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
774 	LexAccessor styler(pAccess);
775 
776 	CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
777 	CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
778 
779 	CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
780 
781 	setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
782 
783 	CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
784 
785 	if (options.identifiersAllowDollars) {
786 		setWordStart.Add('$');
787 	}
788 
789 	int chPrevNonWhite = ' ';
790 	int visibleChars = 0;
791 	bool lastWordWasUUID = false;
792 	int styleBeforeDCKeyword = SCE_C_DEFAULT;
793 	int styleBeforeTaskMarker = SCE_C_DEFAULT;
794 	bool continuationLine = false;
795 	bool isIncludePreprocessor = false;
796 	bool isStringInPreprocessor = false;
797 	bool inRERange = false;
798 	bool seenDocKeyBrace = false;
799 
800 	Sci_Position lineCurrent = styler.GetLine(startPos);
801 	if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
802       (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
803       (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
804 		// Set continuationLine if last character of previous line is '\'
805 		if (lineCurrent > 0) {
806 			const Sci_Position endLinePrevious = styler.LineEnd(lineCurrent - 1);
807 			if (endLinePrevious > 0) {
808 				continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
809 			}
810 		}
811 	}
812 
813 	// look back to set chPrevNonWhite properly for better regex colouring
814 	if (startPos > 0) {
815 		Sci_Position back = startPos;
816 		while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
817 			;
818 		if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
819 			chPrevNonWhite = styler.SafeGetCharAt(back);
820 		}
821 	}
822 
823 	StyleContext sc(startPos, length, initStyle, styler);
824 	LinePPState preproc = vlls.ForLine(lineCurrent);
825 
826 	bool definitionsChanged = false;
827 
828 	// Truncate ppDefineHistory before current line
829 
830 	if (!options.updatePreprocessor)
831 		ppDefineHistory.clear();
832 
833 	std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(),
834 		[lineCurrent](const PPDefinition &p) noexcept { return p.line >= lineCurrent; });
835 	if (itInvalid != ppDefineHistory.end()) {
836 		ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
837 		definitionsChanged = true;
838 	}
839 
840 	SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
841 	for (const PPDefinition &ppDef : ppDefineHistory) {
842 		if (ppDef.isUndef)
843 			preprocessorDefinitions.erase(ppDef.key);
844 		else
845 			preprocessorDefinitions[ppDef.key] = SymbolValue(ppDef.value, ppDef.arguments);
846 	}
847 
848 	std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
849 	SparseState<std::string> rawSTNew(lineCurrent);
850 
851 	int activitySet = preproc.ActiveState();
852 
853 	const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
854 	const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
855 
856 	Sci_PositionU lineEndNext = styler.LineEnd(lineCurrent);
857 
858 	for (; sc.More();) {
859 
860 		if (sc.atLineStart) {
861 			// Using MaskActive() is not needed in the following statement.
862 			// Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
863 			if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
864 				// Prevent SCE_C_STRINGEOL from leaking back to previous line which
865 				// ends with a line continuation by locking in the state up to this position.
866 				sc.SetState(sc.state);
867 			}
868 			if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
869 				sc.SetState(SCE_C_DEFAULT|activitySet);
870 			}
871 			// Reset states to beginning of colourise so no surprises
872 			// if different sets of lines lexed.
873 			visibleChars = 0;
874 			lastWordWasUUID = false;
875 			isIncludePreprocessor = false;
876 			inRERange = false;
877 			if (preproc.IsInactive()) {
878 				activitySet = inactiveFlag;
879 				sc.SetState(sc.state | activitySet);
880 			}
881 		}
882 
883 		if (sc.atLineEnd) {
884 			lineCurrent++;
885 			lineEndNext = styler.LineEnd(lineCurrent);
886 			vlls.Add(lineCurrent, preproc);
887 			if (rawStringTerminator != "") {
888 				rawSTNew.Set(lineCurrent-1, rawStringTerminator);
889 			}
890 		}
891 
892 		// Handle line continuation generically.
893 		if (sc.ch == '\\') {
894 			if ((sc.currentPos+1) >= lineEndNext) {
895 				lineCurrent++;
896 				lineEndNext = styler.LineEnd(lineCurrent);
897 				vlls.Add(lineCurrent, preproc);
898 				if (rawStringTerminator != "") {
899 					rawSTNew.Set(lineCurrent-1, rawStringTerminator);
900 				}
901 				sc.Forward();
902 				if (sc.ch == '\r' && sc.chNext == '\n') {
903 					// Even in UTF-8, \r and \n are separate
904 					sc.Forward();
905 				}
906 				continuationLine = true;
907 				sc.Forward();
908 				continue;
909 			}
910 		}
911 
912 		const bool atLineEndBeforeSwitch = sc.atLineEnd;
913 
914 		// Determine if the current state should terminate.
915 		switch (MaskActive(sc.state)) {
916 			case SCE_C_OPERATOR:
917 				sc.SetState(SCE_C_DEFAULT|activitySet);
918 				break;
919 			case SCE_C_NUMBER:
920 				// We accept almost anything because of hex. and number suffixes
921 				if (sc.ch == '_') {
922 					sc.ChangeState(SCE_C_USERLITERAL|activitySet);
923 				} else if (!(setWord.Contains(sc.ch)
924 				   || (sc.ch == '\'')
925 				   || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
926 				                                          sc.chPrev == 'p' || sc.chPrev == 'P')))) {
927 					sc.SetState(SCE_C_DEFAULT|activitySet);
928 				}
929 				break;
930 			case SCE_C_USERLITERAL:
931 				if (!(setWord.Contains(sc.ch)))
932 					sc.SetState(SCE_C_DEFAULT|activitySet);
933 				break;
934 			case SCE_C_IDENTIFIER:
935 				if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
936 					char s[1000];
937 					if (caseSensitive) {
938 						sc.GetCurrent(s, sizeof(s));
939 					} else {
940 						sc.GetCurrentLowered(s, sizeof(s));
941 					}
942 					if (keywords.InList(s)) {
943 						lastWordWasUUID = strcmp(s, "uuid") == 0;
944 						sc.ChangeState(SCE_C_WORD|activitySet);
945 					} else if (keywords2.InList(s)) {
946 						sc.ChangeState(SCE_C_WORD2|activitySet);
947 					} else if (keywords4.InList(s)) {
948 						sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
949 					} else {
950 						int subStyle = classifierIdentifiers.ValueFor(s);
951 						if (subStyle >= 0) {
952 							sc.ChangeState(subStyle|activitySet);
953 						}
954 					}
955 					const bool literalString = sc.ch == '\"';
956 					if (literalString || sc.ch == '\'') {
957 						size_t lenS = strlen(s);
958 						const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
959 						if (raw)
960 							s[lenS--] = '\0';
961 						const bool valid =
962 							(lenS == 0) ||
963 							((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
964 							((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
965 						if (valid) {
966 							if (literalString) {
967 								if (raw) {
968 									// Set the style of the string prefix to SCE_C_STRINGRAW but then change to
969 									// SCE_C_DEFAULT as that allows the raw string start code to run.
970 									sc.ChangeState(SCE_C_STRINGRAW|activitySet);
971 									sc.SetState(SCE_C_DEFAULT|activitySet);
972 								} else {
973 									sc.ChangeState(SCE_C_STRING|activitySet);
974 								}
975 							} else {
976 								sc.ChangeState(SCE_C_CHARACTER|activitySet);
977 							}
978 						} else {
979 							sc.SetState(SCE_C_DEFAULT | activitySet);
980 						}
981 					} else {
982 						sc.SetState(SCE_C_DEFAULT|activitySet);
983 					}
984 				}
985 				break;
986 			case SCE_C_PREPROCESSOR:
987 				if (options.stylingWithinPreprocessor) {
988 					if (IsASpace(sc.ch) || (sc.ch == '(')) {
989 						sc.SetState(SCE_C_DEFAULT|activitySet);
990 					}
991 				} else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
992 					isStringInPreprocessor = false;
993 				} else if (!isStringInPreprocessor) {
994 					if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
995 						isStringInPreprocessor = true;
996 					} else if (sc.Match('/', '*')) {
997 						if (sc.Match("/**") || sc.Match("/*!")) {
998 							sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
999 						} else {
1000 							sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
1001 						}
1002 						sc.Forward();	// Eat the *
1003 					} else if (sc.Match('/', '/')) {
1004 						sc.SetState(SCE_C_DEFAULT|activitySet);
1005 					}
1006 				}
1007 				break;
1008 			case SCE_C_PREPROCESSORCOMMENT:
1009 			case SCE_C_PREPROCESSORCOMMENTDOC:
1010 				if (sc.Match('*', '/')) {
1011 					sc.Forward();
1012 					sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
1013 					continue;	// Without advancing in case of '\'.
1014 				}
1015 				break;
1016 			case SCE_C_COMMENT:
1017 				if (sc.Match('*', '/')) {
1018 					sc.Forward();
1019 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1020 				} else {
1021 					styleBeforeTaskMarker = SCE_C_COMMENT;
1022 					highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
1023 				}
1024 				break;
1025 			case SCE_C_COMMENTDOC:
1026 				if (sc.Match('*', '/')) {
1027 					sc.Forward();
1028 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1029 				} else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
1030 					// Verify that we have the conditions to mark a comment-doc-keyword
1031 					if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
1032 						styleBeforeDCKeyword = SCE_C_COMMENTDOC;
1033 						sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
1034 					}
1035 				}
1036 				break;
1037 			case SCE_C_COMMENTLINE:
1038 				if (sc.atLineStart && !continuationLine) {
1039 					sc.SetState(SCE_C_DEFAULT|activitySet);
1040 				} else {
1041 					styleBeforeTaskMarker = SCE_C_COMMENTLINE;
1042 					highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
1043 				}
1044 				break;
1045 			case SCE_C_COMMENTLINEDOC:
1046 				if (sc.atLineStart && !continuationLine) {
1047 					sc.SetState(SCE_C_DEFAULT|activitySet);
1048 				} else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
1049 					// Verify that we have the conditions to mark a comment-doc-keyword
1050 					if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
1051 						styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
1052 						sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
1053 					}
1054 				}
1055 				break;
1056 			case SCE_C_COMMENTDOCKEYWORD:
1057 				if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
1058 					sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
1059 					sc.Forward();
1060 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1061 					seenDocKeyBrace = false;
1062 				} else if (sc.ch == '[' || sc.ch == '{') {
1063 					seenDocKeyBrace = true;
1064 				} else if (!setDoxygen.Contains(sc.ch)
1065 				           && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
1066 					char s[100];
1067 					if (caseSensitive) {
1068 						sc.GetCurrent(s, sizeof(s));
1069 					} else {
1070 						sc.GetCurrentLowered(s, sizeof(s));
1071 					}
1072 					if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
1073 						sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
1074 					} else if (!keywords3.InList(s + 1)) {
1075 						int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
1076 						if (subStyleCDKW >= 0) {
1077 							sc.ChangeState(subStyleCDKW|activitySet);
1078 						} else {
1079 							sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
1080 						}
1081 					}
1082 					sc.SetState(styleBeforeDCKeyword|activitySet);
1083 					seenDocKeyBrace = false;
1084 				}
1085 				break;
1086 			case SCE_C_STRING:
1087 				if (sc.atLineEnd) {
1088 					sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1089 				} else if (isIncludePreprocessor) {
1090 					if (sc.ch == '>') {
1091 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1092 						isIncludePreprocessor = false;
1093 					}
1094 				} else if (sc.ch == '\\') {
1095 					if (options.escapeSequence) {
1096 						sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
1097 						escapeSeq.resetEscapeState(sc.chNext);
1098 					}
1099 					sc.Forward(); // Skip all characters after the backslash
1100 				} else if (sc.ch == '\"') {
1101 					if (sc.chNext == '_') {
1102 						sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1103 					} else {
1104 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1105 					}
1106 				}
1107 				break;
1108 			case SCE_C_ESCAPESEQUENCE:
1109 				escapeSeq.consumeDigit();
1110 				if (!escapeSeq.atEscapeEnd(sc.ch)) {
1111 					break;
1112 				}
1113 				if (sc.ch == '"') {
1114 					sc.SetState(SCE_C_STRING|activitySet);
1115 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1116 				} else if (sc.ch == '\\') {
1117 					escapeSeq.resetEscapeState(sc.chNext);
1118 					sc.Forward();
1119 				} else {
1120 					sc.SetState(SCE_C_STRING|activitySet);
1121 					if (sc.atLineEnd) {
1122 						sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1123 					}
1124 				}
1125 				break;
1126 			case SCE_C_HASHQUOTEDSTRING:
1127 				if (sc.ch == '\\') {
1128 					if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1129 						sc.Forward();
1130 					}
1131 				} else if (sc.ch == '\"') {
1132 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1133 				}
1134 				break;
1135 			case SCE_C_STRINGRAW:
1136 				if (sc.Match(rawStringTerminator.c_str())) {
1137 					for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
1138 						sc.Forward();
1139 					sc.SetState(SCE_C_DEFAULT|activitySet);
1140 					rawStringTerminator = "";
1141 				}
1142 				break;
1143 			case SCE_C_CHARACTER:
1144 				if (sc.atLineEnd) {
1145 					sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1146 				} else if (sc.ch == '\\') {
1147 					if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1148 						sc.Forward();
1149 					}
1150 				} else if (sc.ch == '\'') {
1151 					if (sc.chNext == '_') {
1152 						sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1153 					} else {
1154 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1155 					}
1156 				}
1157 				break;
1158 			case SCE_C_REGEX:
1159 				if (sc.atLineStart) {
1160 					sc.SetState(SCE_C_DEFAULT|activitySet);
1161 				} else if (!inRERange && sc.ch == '/') {
1162 					sc.Forward();
1163 					while (IsLowerCase(sc.ch))
1164 						sc.Forward();    // gobble regex flags
1165 					sc.SetState(SCE_C_DEFAULT|activitySet);
1166 				} else if (sc.ch == '\\' && ((sc.currentPos+1) < lineEndNext)) {
1167 					// Gobble up the escaped character
1168 					sc.Forward();
1169 				} else if (sc.ch == '[') {
1170 					inRERange = true;
1171 				} else if (sc.ch == ']') {
1172 					inRERange = false;
1173 				}
1174 				break;
1175 			case SCE_C_STRINGEOL:
1176 				if (sc.atLineStart) {
1177 					sc.SetState(SCE_C_DEFAULT|activitySet);
1178 				}
1179 				break;
1180 			case SCE_C_VERBATIM:
1181 				if (options.verbatimStringsAllowEscapes && (sc.ch == '\\')) {
1182 					sc.Forward(); // Skip all characters after the backslash
1183 				} else if (sc.ch == '\"') {
1184 					if (sc.chNext == '\"') {
1185 						sc.Forward();
1186 					} else {
1187 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1188 					}
1189 				}
1190 				break;
1191 			case SCE_C_TRIPLEVERBATIM:
1192 				if (sc.Match(R"(""")")) {
1193 					while (sc.Match('"')) {
1194 						sc.Forward();
1195 					}
1196 					sc.SetState(SCE_C_DEFAULT|activitySet);
1197 				}
1198 				break;
1199 			case SCE_C_UUID:
1200 				if (sc.atLineEnd || sc.ch == ')') {
1201 					sc.SetState(SCE_C_DEFAULT|activitySet);
1202 				}
1203 				break;
1204 			case SCE_C_TASKMARKER:
1205 				if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1206 					sc.SetState(styleBeforeTaskMarker|activitySet);
1207 					styleBeforeTaskMarker = SCE_C_DEFAULT;
1208 				}
1209 		}
1210 
1211 		if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1212 			// State exit processing consumed characters up to end of line.
1213 			lineCurrent++;
1214 			lineEndNext = styler.LineEnd(lineCurrent);
1215 			vlls.Add(lineCurrent, preproc);
1216 		}
1217 
1218 		// Determine if a new state should be entered.
1219 		if (MaskActive(sc.state) == SCE_C_DEFAULT) {
1220 			if (sc.Match('@', '\"')) {
1221 				sc.SetState(SCE_C_VERBATIM|activitySet);
1222 				sc.Forward();
1223 			} else if (options.triplequotedStrings && sc.Match(R"(""")")) {
1224 				sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
1225 				sc.Forward(2);
1226 			} else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1227 				sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
1228 				sc.Forward();
1229 			} else if (options.backQuotedStrings && sc.Match('`')) {
1230 				sc.SetState(SCE_C_STRINGRAW|activitySet);
1231 				rawStringTerminator = "`";
1232 			} else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1233 				if (lastWordWasUUID) {
1234 					sc.SetState(SCE_C_UUID|activitySet);
1235 					lastWordWasUUID = false;
1236 				} else {
1237 					sc.SetState(SCE_C_NUMBER|activitySet);
1238 				}
1239 			} else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1240 				if (lastWordWasUUID) {
1241 					sc.SetState(SCE_C_UUID|activitySet);
1242 					lastWordWasUUID = false;
1243 				} else {
1244 					sc.SetState(SCE_C_IDENTIFIER|activitySet);
1245 				}
1246 			} else if (sc.Match('/', '*')) {
1247 				if (sc.Match("/**") || sc.Match("/*!")) {	// Support of Qt/Doxygen doc. style
1248 					sc.SetState(SCE_C_COMMENTDOC|activitySet);
1249 				} else {
1250 					sc.SetState(SCE_C_COMMENT|activitySet);
1251 				}
1252 				sc.Forward();	// Eat the * so it isn't used for the end of the comment
1253 			} else if (sc.Match('/', '/')) {
1254 				if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1255 					// Support of Qt/Doxygen doc. style
1256 					sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
1257 				else
1258 					sc.SetState(SCE_C_COMMENTLINE|activitySet);
1259 			} else if (sc.ch == '/'
1260 				   && (setOKBeforeRE.Contains(chPrevNonWhite)
1261 				       || followsReturnKeyword(sc, styler))
1262 				   && (!setCouldBePostOp.Contains(chPrevNonWhite)
1263 				       || !FollowsPostfixOperator(sc, styler))) {
1264 				sc.SetState(SCE_C_REGEX|activitySet);	// JavaScript's RegEx
1265 				inRERange = false;
1266 			} else if (sc.ch == '\"') {
1267 				if (sc.chPrev == 'R') {
1268 					styler.Flush();
1269 					if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
1270 						sc.SetState(SCE_C_STRINGRAW|activitySet);
1271 						rawStringTerminator = ")";
1272 						for (Sci_Position termPos = sc.currentPos + 1;; termPos++) {
1273 							const char chTerminator = styler.SafeGetCharAt(termPos, '(');
1274 							if (chTerminator == '(')
1275 								break;
1276 							rawStringTerminator += chTerminator;
1277 						}
1278 						rawStringTerminator += '\"';
1279 					} else {
1280 						sc.SetState(SCE_C_STRING|activitySet);
1281 					}
1282 				} else {
1283 					sc.SetState(SCE_C_STRING|activitySet);
1284 				}
1285 				isIncludePreprocessor = false;	// ensure that '>' won't end the string
1286 			} else if (isIncludePreprocessor && sc.ch == '<') {
1287 				sc.SetState(SCE_C_STRING|activitySet);
1288 			} else if (sc.ch == '\'') {
1289 				sc.SetState(SCE_C_CHARACTER|activitySet);
1290 			} else if (sc.ch == '#' && visibleChars == 0) {
1291 				// Preprocessor commands are alone on their line
1292 				sc.SetState(SCE_C_PREPROCESSOR|activitySet);
1293 				// Skip whitespace between # and preprocessor word
1294 				do {
1295 					sc.Forward();
1296 				} while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1297 				if (sc.atLineEnd) {
1298 					sc.SetState(SCE_C_DEFAULT|activitySet);
1299 				} else if (sc.Match("include")) {
1300 					isIncludePreprocessor = true;
1301 				} else {
1302 					if (options.trackPreprocessor) {
1303 						// If #if is nested too deeply (>31 levels) the active/inactive appearance
1304 						// will stop reflecting the code.
1305 						if (sc.Match("ifdef") || sc.Match("ifndef")) {
1306 							const bool isIfDef = sc.Match("ifdef");
1307 							const int startRest = isIfDef ? 5 : 6;
1308 							std::string restOfLine = GetRestOfLine(styler, sc.currentPos + startRest + 1, false);
1309 							bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1310 							preproc.StartSection(isIfDef == foundDef);
1311 						} else if (sc.Match("if")) {
1312 							std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1313 							const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1314 							preproc.StartSection(ifGood);
1315 						} else if (sc.Match("else")) {
1316 							// #else is shown as active if either preceding or following section is active
1317 							// as that means that it contributed to the result.
1318 							if (!preproc.CurrentIfTaken()) {
1319 								// Inactive, may become active if parent scope active
1320 								assert(sc.state == (SCE_C_PREPROCESSOR|inactiveFlag));
1321 								preproc.InvertCurrentLevel();
1322 								activitySet = preproc.ActiveState();
1323 								// If following is active then show "else" as active
1324 								if (!activitySet)
1325 									sc.ChangeState(SCE_C_PREPROCESSOR);
1326 							} else if (preproc.IsActive()) {
1327 								// Active -> inactive
1328 								assert(sc.state == SCE_C_PREPROCESSOR);
1329 								preproc.InvertCurrentLevel();
1330 								activitySet = preproc.ActiveState();
1331 								// Continue to show "else" as active as it ends active section.
1332 							}
1333 						} else if (sc.Match("elif")) {
1334 							// Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1335 							// #elif is shown as active if either preceding or following section is active
1336 							// as that means that it contributed to the result.
1337 							if (!preproc.CurrentIfTaken()) {
1338 								// Inactive, if expression true then may become active if parent scope active
1339 								assert(sc.state == (SCE_C_PREPROCESSOR|inactiveFlag));
1340 								// Similar to #if
1341 								std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 4, true);
1342 								const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1343 								if (ifGood) {
1344 									preproc.InvertCurrentLevel();
1345 									activitySet = preproc.ActiveState();
1346 									if (!activitySet)
1347 										sc.ChangeState(SCE_C_PREPROCESSOR);
1348 								}
1349 							} else if (preproc.IsActive()) {
1350 								// Active -> inactive
1351 								assert(sc.state == SCE_C_PREPROCESSOR);
1352 								preproc.InvertCurrentLevel();
1353 								activitySet = preproc.ActiveState();
1354 								// Continue to show "elif" as active as it ends active section.
1355 							}
1356 						} else if (sc.Match("endif")) {
1357 							preproc.EndSection();
1358 							activitySet = preproc.ActiveState();
1359 							sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1360 						} else if (sc.Match("define")) {
1361 							if (options.updatePreprocessor && preproc.IsActive()) {
1362 								std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1363 								size_t startName = 0;
1364 								while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1365 									startName++;
1366 								size_t endName = startName;
1367 								while ((endName < restOfLine.length()) && setWord.Contains(restOfLine[endName]))
1368 									endName++;
1369 								std::string key = restOfLine.substr(startName, endName-startName);
1370 								if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1371 									// Macro
1372 									size_t endArgs = endName;
1373 									while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1374 										endArgs++;
1375 									std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1376 									size_t startValue = endArgs+1;
1377 									while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1378 										startValue++;
1379 									std::string value;
1380 									if (startValue < restOfLine.length())
1381 										value = restOfLine.substr(startValue);
1382 									preprocessorDefinitions[key] = SymbolValue(value, args);
1383 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1384 									definitionsChanged = true;
1385 								} else {
1386 									// Value
1387 									size_t startValue = endName;
1388 									while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1389 										startValue++;
1390 									std::string value = restOfLine.substr(startValue);
1391 									if (OnlySpaceOrTab(value))
1392 										value = "1";	// No value defaults to 1
1393 									preprocessorDefinitions[key] = value;
1394 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1395 									definitionsChanged = true;
1396 								}
1397 							}
1398 						} else if (sc.Match("undef")) {
1399 							if (options.updatePreprocessor && preproc.IsActive()) {
1400 								const std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, false);
1401 								std::vector<std::string> tokens = Tokenize(restOfLine);
1402 								if (tokens.size() >= 1) {
1403 									const std::string key = tokens[0];
1404 									preprocessorDefinitions.erase(key);
1405 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1406 									definitionsChanged = true;
1407 								}
1408 							}
1409 						}
1410 					}
1411 				}
1412 			} else if (isoperator(sc.ch)) {
1413 				sc.SetState(SCE_C_OPERATOR|activitySet);
1414 			}
1415 		}
1416 
1417 		if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1418 			chPrevNonWhite = sc.ch;
1419 			visibleChars++;
1420 		}
1421 		continuationLine = false;
1422 		sc.Forward();
1423 	}
1424 	const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1425 	if (definitionsChanged || rawStringsChanged)
1426 		styler.ChangeLexerState(startPos, startPos + length);
1427 	sc.Complete();
1428 }
1429 
1430 // Store both the current line's fold level and the next lines in the
1431 // level store to make it easy to pick up with each increment
1432 // and to make it possible to fiddle the current level for "} else {".
1433 
Fold(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)1434 void SCI_METHOD LexerCPP::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1435 
1436 	if (!options.fold)
1437 		return;
1438 
1439 	LexAccessor styler(pAccess);
1440 
1441 	const Sci_PositionU endPos = startPos + length;
1442 	int visibleChars = 0;
1443 	bool inLineComment = false;
1444 	Sci_Position lineCurrent = styler.GetLine(startPos);
1445 	int levelCurrent = SC_FOLDLEVELBASE;
1446 	if (lineCurrent > 0)
1447 		levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1448 	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
1449 	int levelMinCurrent = levelCurrent;
1450 	int levelNext = levelCurrent;
1451 	char chNext = styler[startPos];
1452 	int styleNext = MaskActive(styler.StyleAt(startPos));
1453 	int style = MaskActive(initStyle);
1454 	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1455 	for (Sci_PositionU i = startPos; i < endPos; i++) {
1456 		const char ch = chNext;
1457 		chNext = styler.SafeGetCharAt(i + 1);
1458 		const int stylePrev = style;
1459 		style = styleNext;
1460 		styleNext = MaskActive(styler.StyleAt(i + 1));
1461 		const bool atEOL = i == (lineStartNext-1);
1462 		if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
1463 			inLineComment = true;
1464 		if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1465 			if (!IsStreamCommentStyle(stylePrev)) {
1466 				levelNext++;
1467 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1468 				// Comments don't end at end of line and the next character may be unstyled.
1469 				levelNext--;
1470 			}
1471 		}
1472 		if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
1473 			if (userDefinedFoldMarkers) {
1474 				if (styler.Match(i, options.foldExplicitStart.c_str())) {
1475 					levelNext++;
1476 				} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1477 					levelNext--;
1478 				}
1479 			} else {
1480 				if ((ch == '/') && (chNext == '/')) {
1481 					const char chNext2 = styler.SafeGetCharAt(i + 2);
1482 					if (chNext2 == '{') {
1483 						levelNext++;
1484 					} else if (chNext2 == '}') {
1485 						levelNext--;
1486 					}
1487 				}
1488 			}
1489 		}
1490 		if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1491 			if (ch == '#') {
1492 				Sci_PositionU j = i + 1;
1493 				while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1494 					j++;
1495 				}
1496 				if (styler.Match(j, "region") || styler.Match(j, "if")) {
1497 					levelNext++;
1498 				} else if (styler.Match(j, "end")) {
1499 					levelNext--;
1500 				}
1501 
1502 				if (options.foldPreprocessorAtElse && (styler.Match(j, "else") || styler.Match(j, "elif"))) {
1503 					levelMinCurrent--;
1504 				}
1505 			}
1506 		}
1507 		if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1508 			if (ch == '{' || ch == '[' || ch == '(') {
1509 				// Measure the minimum before a '{' to allow
1510 				// folding on "} else {"
1511 				if (options.foldAtElse && levelMinCurrent > levelNext) {
1512 					levelMinCurrent = levelNext;
1513 				}
1514 				levelNext++;
1515 			} else if (ch == '}' || ch == ']' || ch == ')') {
1516 				levelNext--;
1517 			}
1518 		}
1519 		if (!IsASpace(ch))
1520 			visibleChars++;
1521 		if (atEOL || (i == endPos-1)) {
1522 			int levelUse = levelCurrent;
1523 			if ((options.foldSyntaxBased && options.foldAtElse) ||
1524 				(options.foldPreprocessor && options.foldPreprocessorAtElse)
1525 			) {
1526 				levelUse = levelMinCurrent;
1527 			}
1528 			int lev = levelUse | levelNext << 16;
1529 			if (visibleChars == 0 && options.foldCompact)
1530 				lev |= SC_FOLDLEVELWHITEFLAG;
1531 			if (levelUse < levelNext)
1532 				lev |= SC_FOLDLEVELHEADERFLAG;
1533 			if (lev != styler.LevelAt(lineCurrent)) {
1534 				styler.SetLevel(lineCurrent, lev);
1535 			}
1536 			lineCurrent++;
1537 			lineStartNext = styler.LineStart(lineCurrent+1);
1538 			levelCurrent = levelNext;
1539 			levelMinCurrent = levelCurrent;
1540 			if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
1541 				// There is an empty line at end of file so give it same level and empty
1542 				styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1543 			}
1544 			visibleChars = 0;
1545 			inLineComment = false;
1546 		}
1547 	}
1548 }
1549 
EvaluateTokens(std::vector<std::string> & tokens,const SymbolTable & preprocessorDefinitions)1550 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1551 
1552 	// Remove whitespace tokens
1553 	tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1554 
1555 	// Evaluate defined statements to either 0 or 1
1556 	for (size_t i=0; (i+1)<tokens.size();) {
1557 		if (tokens[i] == "defined") {
1558 			const char *val = "0";
1559 			if (tokens[i+1] == "(") {
1560 				if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1561 					// defined()
1562 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1563 				} else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1564 					// defined(<identifier>)
1565 					SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1566 					if (it != preprocessorDefinitions.end()) {
1567 						val = "1";
1568 					}
1569 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1570 				} else {
1571 					// Spurious '(' so erase as more likely to result in false
1572 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1573 				}
1574 			} else {
1575 				// defined <identifier>
1576 				SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1577 				if (it != preprocessorDefinitions.end()) {
1578 					val = "1";
1579 				}
1580 				tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1581 			}
1582 			tokens[i] = val;
1583 		} else {
1584 			i++;
1585 		}
1586 	}
1587 
1588 	// Evaluate identifiers
1589 	constexpr size_t maxIterations = 100;
1590 	size_t iterations = 0;	// Limit number of iterations in case there is a recursive macro.
1591 	for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1592 		iterations++;
1593 		if (setWordStart.Contains(tokens[i][0])) {
1594 			SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1595 			if (it != preprocessorDefinitions.end()) {
1596 				// Tokenize value
1597 				std::vector<std::string> macroTokens = Tokenize(it->second.value);
1598 				if (it->second.IsMacro()) {
1599 					if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1600 						// Create map of argument name to value
1601 						std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1602 						std::map<std::string, std::string> arguments;
1603 						size_t arg = 0;
1604 						size_t tok = i+2;
1605 						while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1606 							if (tokens.at(tok) != ",") {
1607 								arguments[argumentNames.at(arg)] = tokens.at(tok);
1608 								arg++;
1609 							}
1610 							tok++;
1611 						}
1612 
1613 						// Remove invocation
1614 						tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1615 
1616 						// Substitute values into macro
1617 						macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1618 
1619 						for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1620 							if (setWordStart.Contains(macroTokens[iMacro][0])) {
1621 								std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1622 								if (itFind != arguments.end()) {
1623 									// TODO: Possible that value will be expression so should insert tokenized form
1624 									macroTokens[iMacro] = itFind->second;
1625 								}
1626 							}
1627 							iMacro++;
1628 						}
1629 
1630 						// Insert results back into tokens
1631 						tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1632 
1633 					} else {
1634 						i++;
1635 					}
1636 				} else {
1637 					// Remove invocation
1638 					tokens.erase(tokens.begin() + i);
1639 					// Insert results back into tokens
1640 					tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1641 				}
1642 			} else {
1643 				// Identifier not found and value defaults to zero
1644 				tokens[i] = "0";
1645 			}
1646 		} else {
1647 			i++;
1648 		}
1649 	}
1650 
1651 	// Find bracketed subexpressions and recurse on them
1652 	BracketPair bracketPair = FindBracketPair(tokens);
1653 	while (bracketPair.itBracket != tokens.end()) {
1654 		std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1655 		EvaluateTokens(inBracket, preprocessorDefinitions);
1656 
1657 		// The insertion is done before the removal because there were failures with the opposite approach
1658 		tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1659 
1660 		bracketPair = FindBracketPair(tokens);
1661 		tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1662 
1663 		bracketPair = FindBracketPair(tokens);
1664 	}
1665 
1666 	// Evaluate logical negations
1667 	for (size_t j=0; (j+1)<tokens.size();) {
1668 		if (setNegationOp.Contains(tokens[j][0])) {
1669 			int isTrue = atoi(tokens[j+1].c_str());
1670 			if (tokens[j] == "!")
1671 				isTrue = !isTrue;
1672 			std::vector<std::string>::iterator itInsert =
1673 				tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1674 			tokens.insert(itInsert, isTrue ? "1" : "0");
1675 		} else {
1676 			j++;
1677 		}
1678 	}
1679 
1680 	// Evaluate expressions in precedence order
1681 	enum precedence { precMult, precAdd, precRelative
1682 		, precLogical, /* end marker */ precLast };
1683 	for (int prec = precMult; prec < precLast; prec++) {
1684 		// Looking at 3 tokens at a time so end at 2 before end
1685 		for (size_t k=0; (k+2)<tokens.size();) {
1686 			const char chOp = tokens[k+1][0];
1687 			if (
1688 				((prec==precMult) && setMultOp.Contains(chOp)) ||
1689 				((prec==precAdd) && setAddOp.Contains(chOp)) ||
1690 				((prec==precRelative) && setRelOp.Contains(chOp)) ||
1691 				((prec==precLogical) && setLogicalOp.Contains(chOp))
1692 				) {
1693 				const int valA = atoi(tokens[k].c_str());
1694 				const int valB = atoi(tokens[k+2].c_str());
1695 				int result = 0;
1696 				if (tokens[k+1] == "+")
1697 					result = valA + valB;
1698 				else if (tokens[k+1] == "-")
1699 					result = valA - valB;
1700 				else if (tokens[k+1] == "*")
1701 					result = valA * valB;
1702 				else if (tokens[k+1] == "/")
1703 					result = valA / (valB ? valB : 1);
1704 				else if (tokens[k+1] == "%")
1705 					result = valA % (valB ? valB : 1);
1706 				else if (tokens[k+1] == "<")
1707 					result = valA < valB;
1708 				else if (tokens[k+1] == "<=")
1709 					result = valA <= valB;
1710 				else if (tokens[k+1] == ">")
1711 					result = valA > valB;
1712 				else if (tokens[k+1] == ">=")
1713 					result = valA >= valB;
1714 				else if (tokens[k+1] == "==")
1715 					result = valA == valB;
1716 				else if (tokens[k+1] == "!=")
1717 					result = valA != valB;
1718 				else if (tokens[k+1] == "||")
1719 					result = valA || valB;
1720 				else if (tokens[k+1] == "&&")
1721 					result = valA && valB;
1722 				std::vector<std::string>::iterator itInsert =
1723 					tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1724 				tokens.insert(itInsert, std::to_string(result));
1725 			} else {
1726 				k++;
1727 			}
1728 		}
1729 	}
1730 }
1731 
Tokenize(const std::string & expr) const1732 std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
1733 	// Break into tokens
1734 	std::vector<std::string> tokens;
1735 	const char *cp = expr.c_str();
1736 	while (*cp) {
1737 		std::string word;
1738 		if (setWord.Contains(*cp)) {
1739 			// Identifiers and numbers
1740 			while (setWord.Contains(*cp)) {
1741 				word += *cp;
1742 				cp++;
1743 			}
1744 		} else if (IsSpaceOrTab(*cp)) {
1745 			while (IsSpaceOrTab(*cp)) {
1746 				word += *cp;
1747 				cp++;
1748 			}
1749 		} else if (setRelOp.Contains(*cp)) {
1750 			word += *cp;
1751 			cp++;
1752 			if (setRelOp.Contains(*cp)) {
1753 				word += *cp;
1754 				cp++;
1755 			}
1756 		} else if (setLogicalOp.Contains(*cp)) {
1757 			word += *cp;
1758 			cp++;
1759 			if (setLogicalOp.Contains(*cp)) {
1760 				word += *cp;
1761 				cp++;
1762 			}
1763 		} else {
1764 			// Should handle strings, characters, and comments here
1765 			word += *cp;
1766 			cp++;
1767 		}
1768 		tokens.push_back(word);
1769 	}
1770 	return tokens;
1771 }
1772 
EvaluateExpression(const std::string & expr,const SymbolTable & preprocessorDefinitions)1773 bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1774 	std::vector<std::string> tokens = Tokenize(expr);
1775 
1776 	EvaluateTokens(tokens, preprocessorDefinitions);
1777 
1778 	// "0" or "" -> false else true
1779 	const bool isFalse = tokens.empty() ||
1780 		((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1781 	return !isFalse;
1782 }
1783 
1784 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1785 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);
1786