1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3  ** Lexer for C++, C, Java, and JavaScript.
4  ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5  **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 #include <ctype.h>
15 
16 #include <string>
17 #include <vector>
18 #include <map>
19 #include <algorithm>
20 
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24 
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
33 #include "SubStyles.h"
34 
35 #ifdef SCI_NAMESPACE
36 using namespace Scintilla;
37 #endif
38 
39 namespace {
40 	// Use an unnamed namespace to protect the functions and classes from name conflicts
41 
IsSpaceEquiv(int state)42 bool IsSpaceEquiv(int state) {
43 	return (state <= SCE_C_COMMENTDOC) ||
44 		// including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
45 		(state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
46 		(state == SCE_C_COMMENTDOCKEYWORDERROR);
47 }
48 
49 // Preconditions: sc.currentPos points to a character after '+' or '-'.
50 // The test for pos reaching 0 should be redundant,
51 // and is in only for safety measures.
52 // Limitation: this code will give the incorrect answer for code like
53 // a = b+++/ptn/...
54 // Putting a space between the '++' post-inc operator and the '+' binary op
55 // fixes this, and is highly recommended for readability anyway.
FollowsPostfixOperator(StyleContext & sc,LexAccessor & styler)56 bool FollowsPostfixOperator(StyleContext &sc, LexAccessor &styler) {
57 	int pos = (int) sc.currentPos;
58 	while (--pos > 0) {
59 		char ch = styler[pos];
60 		if (ch == '+' || ch == '-') {
61 			return styler[pos - 1] == ch;
62 		}
63 	}
64 	return false;
65 }
66 
followsReturnKeyword(StyleContext & sc,LexAccessor & styler)67 bool followsReturnKeyword(StyleContext &sc, LexAccessor &styler) {
68 	// Don't look at styles, so no need to flush.
69 	int pos = (int) sc.currentPos;
70 	int currentLine = styler.GetLine(pos);
71 	int lineStartPos = styler.LineStart(currentLine);
72 	while (--pos > lineStartPos) {
73 		char ch = styler.SafeGetCharAt(pos);
74 		if (ch != ' ' && ch != '\t') {
75 			break;
76 		}
77 	}
78 	const char *retBack = "nruter";
79 	const char *s = retBack;
80 	while (*s
81 		&& pos >= lineStartPos
82 		&& styler.SafeGetCharAt(pos) == *s) {
83 		s++;
84 		pos--;
85 	}
86 	return !*s;
87 }
88 
IsSpaceOrTab(int ch)89 bool IsSpaceOrTab(int ch) {
90 	return ch == ' ' || ch == '\t';
91 }
92 
OnlySpaceOrTab(const std::string & s)93 bool OnlySpaceOrTab(const std::string &s) {
94 	for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
95 		if (!IsSpaceOrTab(*it))
96 			return false;
97 	}
98 	return true;
99 }
100 
StringSplit(const std::string & text,int separator)101 std::vector<std::string> StringSplit(const std::string &text, int separator) {
102 	std::vector<std::string> vs(text.empty() ? 0 : 1);
103 	for (std::string::const_iterator it = text.begin(); it != text.end(); ++it) {
104 		if (*it == separator) {
105 			vs.push_back(std::string());
106 		} else {
107 			vs.back() += *it;
108 		}
109 	}
110 	return vs;
111 }
112 
113 struct BracketPair {
114 	std::vector<std::string>::iterator itBracket;
115 	std::vector<std::string>::iterator itEndBracket;
116 };
117 
FindBracketPair(std::vector<std::string> & tokens)118 BracketPair FindBracketPair(std::vector<std::string> &tokens) {
119 	BracketPair bp;
120 	std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
121 	bp.itBracket = tokens.end();
122 	bp.itEndBracket = tokens.end();
123 	if (itTok != tokens.end()) {
124 		bp.itBracket = itTok;
125 		size_t nest = 0;
126 		while (itTok != tokens.end()) {
127 			if (*itTok == "(") {
128 				nest++;
129 			} else if (*itTok == ")") {
130 				nest--;
131 				if (nest == 0) {
132 					bp.itEndBracket = itTok;
133 					return bp;
134 				}
135 			}
136 			++itTok;
137 		}
138 	}
139 	bp.itBracket = tokens.end();
140 	return bp;
141 }
142 
highlightTaskMarker(StyleContext & sc,LexAccessor & styler,int activity,WordList & markerList,bool caseSensitive)143 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
144 		int activity, WordList &markerList, bool caseSensitive){
145 	if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
146 		const int lengthMarker = 50;
147 		char marker[lengthMarker+1];
148 		int currPos = (int) sc.currentPos;
149 		int i = 0;
150 		while (i < lengthMarker) {
151 			char ch = styler.SafeGetCharAt(currPos + i);
152 			if (IsASpace(ch) || isoperator(ch)) {
153 				break;
154 			}
155 			if (caseSensitive)
156 				marker[i] = ch;
157 			else
158 				marker[i] = static_cast<char>(tolower(ch));
159 			i++;
160 		}
161 		marker[i] = '\0';
162 		if (markerList.InList(marker)) {
163 			sc.SetState(SCE_C_TASKMARKER|activity);
164 		}
165 	}
166 }
167 
168 struct EscapeSequence {
169 	int digitsLeft;
170 	CharacterSet setHexDigits;
171 	CharacterSet setOctDigits;
172 	CharacterSet setNoneNumeric;
173 	CharacterSet *escapeSetValid;
EscapeSequence__anon5b491b7d0111::EscapeSequence174 	EscapeSequence() {
175 		digitsLeft = 0;
176 		escapeSetValid = 0;
177 		setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
178 		setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
179 	}
resetEscapeState__anon5b491b7d0111::EscapeSequence180 	void resetEscapeState(int nextChar) {
181 		digitsLeft = 0;
182 		escapeSetValid = &setNoneNumeric;
183 		if (nextChar == 'U') {
184 			digitsLeft = 9;
185 			escapeSetValid = &setHexDigits;
186 		} else if (nextChar == 'u') {
187 			digitsLeft = 5;
188 			escapeSetValid = &setHexDigits;
189 		} else if (nextChar == 'x') {
190 			digitsLeft = 5;
191 			escapeSetValid = &setHexDigits;
192 		} else if (setOctDigits.Contains(nextChar)) {
193 			digitsLeft = 3;
194 			escapeSetValid = &setOctDigits;
195 		}
196 	}
atEscapeEnd__anon5b491b7d0111::EscapeSequence197 	bool atEscapeEnd(int currChar) const {
198 		return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
199 	}
200 };
201 
GetRestOfLine(LexAccessor & styler,int start,bool allowSpace)202 std::string GetRestOfLine(LexAccessor &styler, int start, bool allowSpace) {
203 	std::string restOfLine;
204 	int i =0;
205 	char ch = styler.SafeGetCharAt(start, '\n');
206 	int endLine = styler.LineEnd(styler.GetLine(start));
207 	while (((start+i) < endLine) && (ch != '\r')) {
208 		char chNext = styler.SafeGetCharAt(start + i + 1, '\n');
209 		if (ch == '/' && (chNext == '/' || chNext == '*'))
210 			break;
211 		if (allowSpace || (ch != ' '))
212 			restOfLine += ch;
213 		i++;
214 		ch = chNext;
215 	}
216 	return restOfLine;
217 }
218 
IsStreamCommentStyle(int style)219 bool IsStreamCommentStyle(int style) {
220 	return style == SCE_C_COMMENT ||
221 		style == SCE_C_COMMENTDOC ||
222 		style == SCE_C_COMMENTDOCKEYWORD ||
223 		style == SCE_C_COMMENTDOCKEYWORDERROR;
224 }
225 
226 struct PPDefinition {
227 	int line;
228 	std::string key;
229 	std::string value;
230 	bool isUndef;
231 	std::string arguments;
PPDefinition__anon5b491b7d0111::PPDefinition232 	PPDefinition(int line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, std::string arguments_="") :
233 		line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
234 	}
235 };
236 
237 class LinePPState {
238 	int state;
239 	int ifTaken;
240 	int level;
ValidLevel() const241 	bool ValidLevel() const {
242 		return level >= 0 && level < 32;
243 	}
maskLevel() const244 	int maskLevel() const {
245 		return 1 << level;
246 	}
247 public:
LinePPState()248 	LinePPState() : state(0), ifTaken(0), level(-1) {
249 	}
IsInactive() const250 	bool IsInactive() const {
251 		return state != 0;
252 	}
CurrentIfTaken() const253 	bool CurrentIfTaken() const {
254 		return (ifTaken & maskLevel()) != 0;
255 	}
StartSection(bool on)256 	void StartSection(bool on) {
257 		level++;
258 		if (ValidLevel()) {
259 			if (on) {
260 				state &= ~maskLevel();
261 				ifTaken |= maskLevel();
262 			} else {
263 				state |= maskLevel();
264 				ifTaken &= ~maskLevel();
265 			}
266 		}
267 	}
EndSection()268 	void EndSection() {
269 		if (ValidLevel()) {
270 			state &= ~maskLevel();
271 			ifTaken &= ~maskLevel();
272 		}
273 		level--;
274 	}
InvertCurrentLevel()275 	void InvertCurrentLevel() {
276 		if (ValidLevel()) {
277 			state ^= maskLevel();
278 			ifTaken |= maskLevel();
279 		}
280 	}
281 };
282 
283 // Hold the preprocessor state for each line seen.
284 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
285 class PPStates {
286 	std::vector<LinePPState> vlls;
287 public:
ForLine(int line) const288 	LinePPState ForLine(int line) const {
289 		if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
290 			return vlls[line];
291 		} else {
292 			return LinePPState();
293 		}
294 	}
Add(int line,LinePPState lls)295 	void Add(int line, LinePPState lls) {
296 		vlls.resize(line+1);
297 		vlls[line] = lls;
298 	}
299 };
300 
301 // An individual named option for use in an OptionSet
302 
303 // Options used for LexerCPP
304 struct OptionsCPP {
305 	bool stylingWithinPreprocessor;
306 	bool identifiersAllowDollars;
307 	bool trackPreprocessor;
308 	bool updatePreprocessor;
309 	bool triplequotedStrings;
310 	bool hashquotedStrings;
311 	bool backQuotedStrings;
312 	bool escapeSequence;
313 	bool fold;
314 	bool foldSyntaxBased;
315 	bool foldComment;
316 	bool foldCommentMultiline;
317 	bool foldCommentExplicit;
318 	std::string foldExplicitStart;
319 	std::string foldExplicitEnd;
320 	bool foldExplicitAnywhere;
321 	bool foldPreprocessor;
322 	bool foldCompact;
323 	bool foldAtElse;
OptionsCPP__anon5b491b7d0111::OptionsCPP324 	OptionsCPP() {
325 		stylingWithinPreprocessor = false;
326 		identifiersAllowDollars = true;
327 		trackPreprocessor = true;
328 		updatePreprocessor = true;
329 		triplequotedStrings = false;
330 		hashquotedStrings = false;
331 		backQuotedStrings = false;
332 		escapeSequence = false;
333 		fold = false;
334 		foldSyntaxBased = true;
335 		foldComment = false;
336 		foldCommentMultiline = true;
337 		foldCommentExplicit = true;
338 		foldExplicitStart = "";
339 		foldExplicitEnd = "";
340 		foldExplicitAnywhere = false;
341 		foldPreprocessor = false;
342 		foldCompact = false;
343 		foldAtElse = false;
344 	}
345 };
346 
347 const char *const cppWordLists[] = {
348             "Primary keywords and identifiers",
349             "Secondary keywords and identifiers",
350             "Documentation comment keywords",
351             "Global classes and typedefs",
352             "Preprocessor definitions",
353             "Task marker and error marker keywords",
354             0,
355 };
356 
357 struct OptionSetCPP : public OptionSet<OptionsCPP> {
OptionSetCPP__anon5b491b7d0111::OptionSetCPP358 	OptionSetCPP() {
359 		DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
360 			"For C++ code, determines whether all preprocessor code is styled in the "
361 			"preprocessor style (0, the default) or only from the initial # to the end "
362 			"of the command word(1).");
363 
364 		DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
365 			"Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
366 
367 		DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
368 			"Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
369 
370 		DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
371 			"Set to 1 to update preprocessor definitions when #define found.");
372 
373 		DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
374 			"Set to 1 to enable highlighting of triple-quoted strings.");
375 
376 		DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
377 			"Set to 1 to enable highlighting of hash-quoted strings.");
378 
379 		DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
380 			"Set to 1 to enable highlighting of back-quoted raw strings .");
381 
382 		DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
383 			"Set to 1 to enable highlighting of escape sequences in strings");
384 
385 		DefineProperty("fold", &OptionsCPP::fold);
386 
387 		DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
388 			"Set this property to 0 to disable syntax based folding.");
389 
390 		DefineProperty("fold.comment", &OptionsCPP::foldComment,
391 			"This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
392 			"Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
393 			"at the end of a section that should fold.");
394 
395 		DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
396 			"Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
397 
398 		DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
399 			"Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
400 
401 		DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
402 			"The string to use for explicit fold start points, replacing the standard //{.");
403 
404 		DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
405 			"The string to use for explicit fold end points, replacing the standard //}.");
406 
407 		DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
408 			"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
409 
410 		DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
411 			"This option enables folding preprocessor directives when using the C++ lexer. "
412 			"Includes C#'s explicit #region and #endregion folding directives.");
413 
414 		DefineProperty("fold.compact", &OptionsCPP::foldCompact);
415 
416 		DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
417 			"This option enables C++ folding on a \"} else {\" line of an if statement.");
418 
419 		DefineWordListSets(cppWordLists);
420 	}
421 };
422 
423 const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
424 
425 }
426 
427 class LexerCPP : public ILexerWithSubStyles {
428 	bool caseSensitive;
429 	CharacterSet setWord;
430 	CharacterSet setNegationOp;
431 	CharacterSet setArithmethicOp;
432 	CharacterSet setRelOp;
433 	CharacterSet setLogicalOp;
434 	CharacterSet setWordStart;
435 	PPStates vlls;
436 	std::vector<PPDefinition> ppDefineHistory;
437 	WordList keywords;
438 	WordList keywords2;
439 	WordList keywords3;
440 	WordList keywords4;
441 	WordList ppDefinitions;
442 	WordList markerList;
443 	struct SymbolValue {
444 		std::string value;
445 		std::string arguments;
SymbolValueLexerCPP::SymbolValue446 		SymbolValue(const std::string &value_="", const std::string &arguments_="") : value(value_), arguments(arguments_) {
447 		}
operator =LexerCPP::SymbolValue448 		SymbolValue &operator = (const std::string &value_) {
449 			value = value_;
450 			arguments.clear();
451 			return *this;
452 		}
IsMacroLexerCPP::SymbolValue453 		bool IsMacro() const {
454 			return !arguments.empty();
455 		}
456 	};
457 	typedef std::map<std::string, SymbolValue> SymbolTable;
458 	SymbolTable preprocessorDefinitionsStart;
459 	OptionsCPP options;
460 	OptionSetCPP osCPP;
461 	EscapeSequence escapeSeq;
462 	SparseState<std::string> rawStringTerminators;
463 	enum { activeFlag = 0x40 };
464 	enum { ssIdentifier, ssDocKeyword };
465 	SubStyles subStyles;
466 public:
LexerCPP(bool caseSensitive_)467 	explicit LexerCPP(bool caseSensitive_) :
468 		caseSensitive(caseSensitive_),
469 		setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
470 		setNegationOp(CharacterSet::setNone, "!"),
471 		setArithmethicOp(CharacterSet::setNone, "+-/*%"),
472 		setRelOp(CharacterSet::setNone, "=!<>"),
473 		setLogicalOp(CharacterSet::setNone, "|&"),
474 		subStyles(styleSubable, 0x80, 0x40, activeFlag) {
475 	}
~LexerCPP()476 	virtual ~LexerCPP() {
477 	}
Release()478 	void SCI_METHOD Release() {
479 		delete this;
480 	}
Version() const481 	int SCI_METHOD Version() const {
482 		return lvSubStyles;
483 	}
PropertyNames()484 	const char * SCI_METHOD PropertyNames() {
485 		return osCPP.PropertyNames();
486 	}
PropertyType(const char * name)487 	int SCI_METHOD PropertyType(const char *name) {
488 		return osCPP.PropertyType(name);
489 	}
DescribeProperty(const char * name)490 	const char * SCI_METHOD DescribeProperty(const char *name) {
491 		return osCPP.DescribeProperty(name);
492 	}
493 	int SCI_METHOD PropertySet(const char *key, const char *val);
DescribeWordListSets()494 	const char * SCI_METHOD DescribeWordListSets() {
495 		return osCPP.DescribeWordListSets();
496 	}
497 	int SCI_METHOD WordListSet(int n, const char *wl);
498 	void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
499 	void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
500 
PrivateCall(int,void *)501 	void * SCI_METHOD PrivateCall(int, void *) {
502 		return 0;
503 	}
504 
LineEndTypesSupported()505 	int SCI_METHOD LineEndTypesSupported() {
506 		return SC_LINE_END_TYPE_UNICODE;
507 	}
508 
AllocateSubStyles(int styleBase,int numberStyles)509 	int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) {
510 		return subStyles.Allocate(styleBase, numberStyles);
511 	}
SubStylesStart(int styleBase)512 	int SCI_METHOD SubStylesStart(int styleBase) {
513 		return subStyles.Start(styleBase);
514 	}
SubStylesLength(int styleBase)515 	int SCI_METHOD SubStylesLength(int styleBase) {
516 		return subStyles.Length(styleBase);
517 	}
StyleFromSubStyle(int subStyle)518 	int SCI_METHOD StyleFromSubStyle(int subStyle) {
519 		int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
520 		int active = subStyle & activeFlag;
521 		return styleBase | active;
522 	}
PrimaryStyleFromStyle(int style)523 	int SCI_METHOD PrimaryStyleFromStyle(int style) {
524 		return MaskActive(style);
525  	}
FreeSubStyles()526 	void SCI_METHOD FreeSubStyles() {
527 		subStyles.Free();
528 	}
SetIdentifiers(int style,const char * identifiers)529 	void SCI_METHOD SetIdentifiers(int style, const char *identifiers) {
530 		subStyles.SetIdentifiers(style, identifiers);
531 	}
DistanceToSecondaryStyles()532 	int SCI_METHOD DistanceToSecondaryStyles() {
533 		return activeFlag;
534 	}
GetSubStyleBases()535 	const char * SCI_METHOD GetSubStyleBases() {
536 		return styleSubable;
537 	}
538 
LexerFactoryCPP()539 	static ILexer *LexerFactoryCPP() {
540 		return new LexerCPP(true);
541 	}
LexerFactoryCPPInsensitive()542 	static ILexer *LexerFactoryCPPInsensitive() {
543 		return new LexerCPP(false);
544 	}
MaskActive(int style)545 	static int MaskActive(int style) {
546 		return style & ~activeFlag;
547 	}
548 	void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
549 	std::vector<std::string> Tokenize(const std::string &expr) const;
550 	bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
551 };
552 
PropertySet(const char * key,const char * val)553 int SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
554 	if (osCPP.PropertySet(&options, key, val)) {
555 		if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
556 			setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
557 			if (options.identifiersAllowDollars) {
558 				setWord.Add('$');
559 			}
560 		}
561 		return 0;
562 	}
563 	return -1;
564 }
565 
WordListSet(int n,const char * wl)566 int SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
567 	WordList *wordListN = 0;
568 	switch (n) {
569 	case 0:
570 		wordListN = &keywords;
571 		break;
572 	case 1:
573 		wordListN = &keywords2;
574 		break;
575 	case 2:
576 		wordListN = &keywords3;
577 		break;
578 	case 3:
579 		wordListN = &keywords4;
580 		break;
581 	case 4:
582 		wordListN = &ppDefinitions;
583 		break;
584 	case 5:
585 		wordListN = &markerList;
586 		break;
587 	}
588 	int firstModification = -1;
589 	if (wordListN) {
590 		WordList wlNew;
591 		wlNew.Set(wl);
592 		if (*wordListN != wlNew) {
593 			wordListN->Set(wl);
594 			firstModification = 0;
595 			if (n == 4) {
596 				// Rebuild preprocessorDefinitions
597 				preprocessorDefinitionsStart.clear();
598 				for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
599 					const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
600 					const char *cpEquals = strchr(cpDefinition, '=');
601 					if (cpEquals) {
602 						std::string name(cpDefinition, cpEquals - cpDefinition);
603 						std::string val(cpEquals+1);
604 						size_t bracket = name.find('(');
605 						size_t bracketEnd = name.find(')');
606 						if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
607 							// Macro
608 							std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
609 							name = name.substr(0, bracket);
610 							preprocessorDefinitionsStart[name] = SymbolValue(val, args);
611 						} else {
612 							preprocessorDefinitionsStart[name] = val;
613 						}
614 					} else {
615 						std::string name(cpDefinition);
616 						std::string val("1");
617 						preprocessorDefinitionsStart[name] = val;
618 					}
619 				}
620 			}
621 		}
622 	}
623 	return firstModification;
624 }
625 
626 // Functor used to truncate history
627 struct After {
628 	int line;
AfterAfter629 	explicit After(int line_) : line(line_) {}
operator ()After630 	bool operator()(PPDefinition &p) const {
631 		return p.line > line;
632 	}
633 };
634 
Lex(unsigned int startPos,int length,int initStyle,IDocument * pAccess)635 void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
636 	LexAccessor styler(pAccess);
637 
638 	CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
639 	CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
640 
641 	CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
642 
643 	setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
644 
645 	CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
646 
647 	if (options.identifiersAllowDollars) {
648 		setWordStart.Add('$');
649 	}
650 
651 	int chPrevNonWhite = ' ';
652 	int visibleChars = 0;
653 	bool lastWordWasUUID = false;
654 	int styleBeforeDCKeyword = SCE_C_DEFAULT;
655 	int styleBeforeTaskMarker = SCE_C_DEFAULT;
656 	bool continuationLine = false;
657 	bool isIncludePreprocessor = false;
658 	bool isStringInPreprocessor = false;
659 	bool inRERange = false;
660 	bool seenDocKeyBrace = false;
661 
662 	int lineCurrent = styler.GetLine(startPos);
663 	if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
664       (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
665       (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
666 		// Set continuationLine if last character of previous line is '\'
667 		if (lineCurrent > 0) {
668 			int endLinePrevious = styler.LineEnd(lineCurrent - 1);
669 			if (endLinePrevious > 0) {
670 				continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
671 			}
672 		}
673 	}
674 
675 	// look back to set chPrevNonWhite properly for better regex colouring
676 	if (startPos > 0) {
677 		int back = startPos;
678 		while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
679 			;
680 		if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
681 			chPrevNonWhite = styler.SafeGetCharAt(back);
682 		}
683 	}
684 
685 	StyleContext sc(startPos, length, initStyle, styler, static_cast<unsigned char>(0xff));
686 	LinePPState preproc = vlls.ForLine(lineCurrent);
687 
688 	bool definitionsChanged = false;
689 
690 	// Truncate ppDefineHistory before current line
691 
692 	if (!options.updatePreprocessor)
693 		ppDefineHistory.clear();
694 
695 	std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(), After(lineCurrent-1));
696 	if (itInvalid != ppDefineHistory.end()) {
697 		ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
698 		definitionsChanged = true;
699 	}
700 
701 	SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
702 	for (std::vector<PPDefinition>::iterator itDef = ppDefineHistory.begin(); itDef != ppDefineHistory.end(); ++itDef) {
703 		if (itDef->isUndef)
704 			preprocessorDefinitions.erase(itDef->key);
705 		else
706 			preprocessorDefinitions[itDef->key] = SymbolValue(itDef->value, itDef->arguments);
707 	}
708 
709 	std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
710 	SparseState<std::string> rawSTNew(lineCurrent);
711 
712 	int activitySet = preproc.IsInactive() ? activeFlag : 0;
713 
714 	const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
715 	const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
716 
717 	int lineEndNext = styler.LineEnd(lineCurrent);
718 
719 	for (; sc.More();) {
720 
721 		if (sc.atLineStart) {
722 			// Using MaskActive() is not needed in the following statement.
723 			// Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
724 			if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
725 				// Prevent SCE_C_STRINGEOL from leaking back to previous line which
726 				// ends with a line continuation by locking in the state up to this position.
727 				sc.SetState(sc.state);
728 			}
729 			if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
730 				sc.SetState(SCE_C_DEFAULT|activitySet);
731 			}
732 			// Reset states to beginning of colourise so no surprises
733 			// if different sets of lines lexed.
734 			visibleChars = 0;
735 			lastWordWasUUID = false;
736 			isIncludePreprocessor = false;
737 			inRERange = false;
738 			if (preproc.IsInactive()) {
739 				activitySet = activeFlag;
740 				sc.SetState(sc.state | activitySet);
741 			}
742 		}
743 
744 		if (sc.atLineEnd) {
745 			lineCurrent++;
746 			lineEndNext = styler.LineEnd(lineCurrent);
747 			vlls.Add(lineCurrent, preproc);
748 			if (rawStringTerminator != "") {
749 				rawSTNew.Set(lineCurrent-1, rawStringTerminator);
750 			}
751 		}
752 
753 		// Handle line continuation generically.
754 		if (sc.ch == '\\') {
755 			if (static_cast<int>((sc.currentPos+1)) >= lineEndNext) {
756 				lineCurrent++;
757 				lineEndNext = styler.LineEnd(lineCurrent);
758 				vlls.Add(lineCurrent, preproc);
759 				sc.Forward();
760 				if (sc.ch == '\r' && sc.chNext == '\n') {
761 					// Even in UTF-8, \r and \n are separate
762 					sc.Forward();
763 				}
764 				continuationLine = true;
765 				sc.Forward();
766 				continue;
767 			}
768 		}
769 
770 		const bool atLineEndBeforeSwitch = sc.atLineEnd;
771 
772 		// Determine if the current state should terminate.
773 		switch (MaskActive(sc.state)) {
774 			case SCE_C_OPERATOR:
775 				sc.SetState(SCE_C_DEFAULT|activitySet);
776 				break;
777 			case SCE_C_NUMBER:
778 				// We accept almost anything because of hex. and number suffixes
779 				if (sc.ch == '_') {
780 					sc.ChangeState(SCE_C_USERLITERAL|activitySet);
781 				} else if (!(setWord.Contains(sc.ch)
782 				   || (sc.ch == '\'')
783 				   || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
784 				                                          sc.chPrev == 'p' || sc.chPrev == 'P')))) {
785 					sc.SetState(SCE_C_DEFAULT|activitySet);
786 				}
787 				break;
788 			case SCE_C_USERLITERAL:
789 				if (!(setWord.Contains(sc.ch)))
790 					sc.SetState(SCE_C_DEFAULT|activitySet);
791 				break;
792 			case SCE_C_IDENTIFIER:
793 				if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
794 					char s[1000];
795 					if (caseSensitive) {
796 						sc.GetCurrent(s, sizeof(s));
797 					} else {
798 						sc.GetCurrentLowered(s, sizeof(s));
799 					}
800 					if (keywords.InList(s)) {
801 						lastWordWasUUID = strcmp(s, "uuid") == 0;
802 						sc.ChangeState(SCE_C_WORD|activitySet);
803 					} else if (keywords2.InList(s)) {
804 						sc.ChangeState(SCE_C_WORD2|activitySet);
805 					} else if (keywords4.InList(s)) {
806 						sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
807 					} else {
808 						int subStyle = classifierIdentifiers.ValueFor(s);
809 						if (subStyle >= 0) {
810 							sc.ChangeState(subStyle|activitySet);
811 						}
812 					}
813 					const bool literalString = sc.ch == '\"';
814 					if (literalString || sc.ch == '\'') {
815 						size_t lenS = strlen(s);
816 						const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
817 						if (raw)
818 							s[lenS--] = '\0';
819 						bool valid =
820 							(lenS == 0) ||
821 							((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
822 							((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
823 						if (valid) {
824 							if (literalString) {
825 								if (raw) {
826 									// Set the style of the string prefix to SCE_C_STRINGRAW but then change to
827 									// SCE_C_DEFAULT as that allows the raw string start code to run.
828 									sc.ChangeState(SCE_C_STRINGRAW|activitySet);
829 									sc.SetState(SCE_C_DEFAULT|activitySet);
830 								} else {
831 									sc.ChangeState(SCE_C_STRING|activitySet);
832 								}
833 							} else {
834 								sc.ChangeState(SCE_C_CHARACTER|activitySet);
835 							}
836 						} else {
837 							sc.SetState(SCE_C_DEFAULT | activitySet);
838 						}
839 					} else {
840 						sc.SetState(SCE_C_DEFAULT|activitySet);
841 					}
842 				}
843 				break;
844 			case SCE_C_PREPROCESSOR:
845 				if (options.stylingWithinPreprocessor) {
846 					if (IsASpace(sc.ch)) {
847 						sc.SetState(SCE_C_DEFAULT|activitySet);
848 					}
849 				} else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
850 					isStringInPreprocessor = false;
851 				} else if (!isStringInPreprocessor) {
852 					if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
853 						isStringInPreprocessor = true;
854 					} else if (sc.Match('/', '*')) {
855 						if (sc.Match("/**") || sc.Match("/*!")) {
856 							sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
857 						} else {
858 							sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
859 						}
860 						sc.Forward();	// Eat the *
861 					} else if (sc.Match('/', '/')) {
862 						sc.SetState(SCE_C_DEFAULT|activitySet);
863 					}
864 				}
865 				break;
866 			case SCE_C_PREPROCESSORCOMMENT:
867 			case SCE_C_PREPROCESSORCOMMENTDOC:
868 				if (sc.Match('*', '/')) {
869 					sc.Forward();
870 					sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
871 					continue;	// Without advancing in case of '\'.
872 				}
873 				break;
874 			case SCE_C_COMMENT:
875 				if (sc.Match('*', '/')) {
876 					sc.Forward();
877 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
878 				} else {
879 					styleBeforeTaskMarker = SCE_C_COMMENT;
880 					highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
881 				}
882 				break;
883 			case SCE_C_COMMENTDOC:
884 				if (sc.Match('*', '/')) {
885 					sc.Forward();
886 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
887 				} else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
888 					// Verify that we have the conditions to mark a comment-doc-keyword
889 					if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
890 						styleBeforeDCKeyword = SCE_C_COMMENTDOC;
891 						sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
892 					}
893 				}
894 				break;
895 			case SCE_C_COMMENTLINE:
896 				if (sc.atLineStart && !continuationLine) {
897 					sc.SetState(SCE_C_DEFAULT|activitySet);
898 				} else {
899 					styleBeforeTaskMarker = SCE_C_COMMENTLINE;
900 					highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
901 				}
902 				break;
903 			case SCE_C_COMMENTLINEDOC:
904 				if (sc.atLineStart && !continuationLine) {
905 					sc.SetState(SCE_C_DEFAULT|activitySet);
906 				} else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
907 					// Verify that we have the conditions to mark a comment-doc-keyword
908 					if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
909 						styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
910 						sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
911 					}
912 				}
913 				break;
914 			case SCE_C_COMMENTDOCKEYWORD:
915 				if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
916 					sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
917 					sc.Forward();
918 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
919 					seenDocKeyBrace = false;
920 				} else if (sc.ch == '[' || sc.ch == '{') {
921 					seenDocKeyBrace = true;
922 				} else if (!setDoxygen.Contains(sc.ch)
923 				           && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
924 					char s[100];
925 					if (caseSensitive) {
926 						sc.GetCurrent(s, sizeof(s));
927 					} else {
928 						sc.GetCurrentLowered(s, sizeof(s));
929 					}
930 					if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
931 						sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
932 					} else if (!keywords3.InList(s + 1)) {
933 						int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
934 						if (subStyleCDKW >= 0) {
935 							sc.ChangeState(subStyleCDKW|activitySet);
936 						} else {
937 							sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
938 						}
939 					}
940 					sc.SetState(styleBeforeDCKeyword|activitySet);
941 					seenDocKeyBrace = false;
942 				}
943 				break;
944 			case SCE_C_STRING:
945 				if (sc.atLineEnd) {
946 					sc.ChangeState(SCE_C_STRINGEOL|activitySet);
947 				} else if (isIncludePreprocessor) {
948 					if (sc.ch == '>') {
949 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
950 						isIncludePreprocessor = false;
951 					}
952 				} else if (sc.ch == '\\') {
953 					if (options.escapeSequence) {
954 						sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
955 						escapeSeq.resetEscapeState(sc.chNext);
956 					}
957 					sc.Forward(); // Skip all characters after the backslash
958 				} else if (sc.ch == '\"') {
959 					if (sc.chNext == '_') {
960 						sc.ChangeState(SCE_C_USERLITERAL|activitySet);
961 					} else {
962 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
963 					}
964 				}
965 				break;
966 			case SCE_C_ESCAPESEQUENCE:
967 				escapeSeq.digitsLeft--;
968 				if (!escapeSeq.atEscapeEnd(sc.ch)) {
969 					break;
970 				}
971 				if (sc.ch == '"') {
972 					sc.SetState(SCE_C_STRING|activitySet);
973 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
974 				} else if (sc.ch == '\\') {
975 					escapeSeq.resetEscapeState(sc.chNext);
976 					sc.Forward();
977 				} else {
978 					sc.SetState(SCE_C_STRING|activitySet);
979 					if (sc.atLineEnd) {
980 						sc.ChangeState(SCE_C_STRINGEOL|activitySet);
981 					}
982 				}
983 				break;
984 			case SCE_C_HASHQUOTEDSTRING:
985 				if (sc.ch == '\\') {
986 					if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
987 						sc.Forward();
988 					}
989 				} else if (sc.ch == '\"') {
990 					sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
991 				}
992 				break;
993 			case SCE_C_STRINGRAW:
994 				if (sc.Match(rawStringTerminator.c_str())) {
995 					for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
996 						sc.Forward();
997 					sc.SetState(SCE_C_DEFAULT|activitySet);
998 					rawStringTerminator = "";
999 				}
1000 				break;
1001 			case SCE_C_CHARACTER:
1002 				if (sc.atLineEnd) {
1003 					sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1004 				} else if (sc.ch == '\\') {
1005 					if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1006 						sc.Forward();
1007 					}
1008 				} else if (sc.ch == '\'') {
1009 					if (sc.chNext == '_') {
1010 						sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1011 					} else {
1012 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1013 					}
1014 				}
1015 				break;
1016 			case SCE_C_REGEX:
1017 				if (sc.atLineStart) {
1018 					sc.SetState(SCE_C_DEFAULT|activitySet);
1019 				} else if (! inRERange && sc.ch == '/') {
1020 					sc.Forward();
1021 					while ((sc.ch < 0x80) && islower(sc.ch))
1022 						sc.Forward();    // gobble regex flags
1023 					sc.SetState(SCE_C_DEFAULT|activitySet);
1024 				} else if (sc.ch == '\\' && (static_cast<int>(sc.currentPos+1) < lineEndNext)) {
1025 					// Gobble up the escaped character
1026 					sc.Forward();
1027 				} else if (sc.ch == '[') {
1028 					inRERange = true;
1029 				} else if (sc.ch == ']') {
1030 					inRERange = false;
1031 				}
1032 				break;
1033 			case SCE_C_STRINGEOL:
1034 				if (sc.atLineStart) {
1035 					sc.SetState(SCE_C_DEFAULT|activitySet);
1036 				}
1037 				break;
1038 			case SCE_C_VERBATIM:
1039 				if (sc.ch == '\"') {
1040 					if (sc.chNext == '\"') {
1041 						sc.Forward();
1042 					} else {
1043 						sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1044 					}
1045 				}
1046 				break;
1047 			case SCE_C_TRIPLEVERBATIM:
1048 				if (sc.Match("\"\"\"")) {
1049 					while (sc.Match('"')) {
1050 						sc.Forward();
1051 					}
1052 					sc.SetState(SCE_C_DEFAULT|activitySet);
1053 				}
1054 				break;
1055 			case SCE_C_UUID:
1056 				if (sc.atLineEnd || sc.ch == ')') {
1057 					sc.SetState(SCE_C_DEFAULT|activitySet);
1058 				}
1059 				break;
1060 			case SCE_C_TASKMARKER:
1061 				if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1062 					sc.SetState(styleBeforeTaskMarker|activitySet);
1063 					styleBeforeTaskMarker = SCE_C_DEFAULT;
1064 				}
1065 		}
1066 
1067 		if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1068 			// State exit processing consumed characters up to end of line.
1069 			lineCurrent++;
1070 			lineEndNext = styler.LineEnd(lineCurrent);
1071 			vlls.Add(lineCurrent, preproc);
1072 		}
1073 
1074 		// Determine if a new state should be entered.
1075 		if (MaskActive(sc.state) == SCE_C_DEFAULT) {
1076 			if (sc.Match('@', '\"')) {
1077 				sc.SetState(SCE_C_VERBATIM|activitySet);
1078 				sc.Forward();
1079 			} else if (options.triplequotedStrings && sc.Match("\"\"\"")) {
1080 				sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
1081 				sc.Forward(2);
1082 			} else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1083 				sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
1084 				sc.Forward();
1085 			} else if (options.backQuotedStrings && sc.Match('`')) {
1086 				sc.SetState(SCE_C_STRINGRAW|activitySet);
1087 				rawStringTerminator = "`";
1088 				sc.Forward();
1089 			} else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1090 				if (lastWordWasUUID) {
1091 					sc.SetState(SCE_C_UUID|activitySet);
1092 					lastWordWasUUID = false;
1093 				} else {
1094 					sc.SetState(SCE_C_NUMBER|activitySet);
1095 				}
1096 			} else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1097 				if (lastWordWasUUID) {
1098 					sc.SetState(SCE_C_UUID|activitySet);
1099 					lastWordWasUUID = false;
1100 				} else {
1101 					sc.SetState(SCE_C_IDENTIFIER|activitySet);
1102 				}
1103 			} else if (sc.Match('/', '*')) {
1104 				if (sc.Match("/**") || sc.Match("/*!")) {	// Support of Qt/Doxygen doc. style
1105 					sc.SetState(SCE_C_COMMENTDOC|activitySet);
1106 				} else {
1107 					sc.SetState(SCE_C_COMMENT|activitySet);
1108 				}
1109 				sc.Forward();	// Eat the * so it isn't used for the end of the comment
1110 			} else if (sc.Match('/', '/')) {
1111 				if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1112 					// Support of Qt/Doxygen doc. style
1113 					sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
1114 				else
1115 					sc.SetState(SCE_C_COMMENTLINE|activitySet);
1116 			} else if (sc.ch == '/'
1117 				   && (setOKBeforeRE.Contains(chPrevNonWhite)
1118 				       || followsReturnKeyword(sc, styler))
1119 				   && (!setCouldBePostOp.Contains(chPrevNonWhite)
1120 				       || !FollowsPostfixOperator(sc, styler))) {
1121 				sc.SetState(SCE_C_REGEX|activitySet);	// JavaScript's RegEx
1122 				inRERange = false;
1123 			} else if (sc.ch == '\"') {
1124 				if (sc.chPrev == 'R') {
1125 					styler.Flush();
1126 					if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
1127 						sc.SetState(SCE_C_STRINGRAW|activitySet);
1128 						rawStringTerminator = ")";
1129 						for (int termPos = sc.currentPos + 1;; termPos++) {
1130 							char chTerminator = styler.SafeGetCharAt(termPos, '(');
1131 							if (chTerminator == '(')
1132 								break;
1133 							rawStringTerminator += chTerminator;
1134 						}
1135 						rawStringTerminator += '\"';
1136 					} else {
1137 						sc.SetState(SCE_C_STRING|activitySet);
1138 					}
1139 				} else {
1140 					sc.SetState(SCE_C_STRING|activitySet);
1141 				}
1142 				isIncludePreprocessor = false;	// ensure that '>' won't end the string
1143 			} else if (isIncludePreprocessor && sc.ch == '<') {
1144 				sc.SetState(SCE_C_STRING|activitySet);
1145 			} else if (sc.ch == '\'') {
1146 				sc.SetState(SCE_C_CHARACTER|activitySet);
1147 			} else if (sc.ch == '#' && visibleChars == 0) {
1148 				// Preprocessor commands are alone on their line
1149 				sc.SetState(SCE_C_PREPROCESSOR|activitySet);
1150 				// Skip whitespace between # and preprocessor word
1151 				do {
1152 					sc.Forward();
1153 				} while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1154 				if (sc.atLineEnd) {
1155 					sc.SetState(SCE_C_DEFAULT|activitySet);
1156 				} else if (sc.Match("include")) {
1157 					isIncludePreprocessor = true;
1158 				} else {
1159 					if (options.trackPreprocessor) {
1160 						if (sc.Match("ifdef") || sc.Match("ifndef")) {
1161 							bool isIfDef = sc.Match("ifdef");
1162 							int i = isIfDef ? 5 : 6;
1163 							std::string restOfLine = GetRestOfLine(styler, sc.currentPos + i + 1, false);
1164 							bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1165 							preproc.StartSection(isIfDef == foundDef);
1166 						} else if (sc.Match("if")) {
1167 							std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1168 							bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1169 							preproc.StartSection(ifGood);
1170 						} else if (sc.Match("else")) {
1171 							if (!preproc.CurrentIfTaken()) {
1172 								preproc.InvertCurrentLevel();
1173 								activitySet = preproc.IsInactive() ? activeFlag : 0;
1174 								if (!activitySet)
1175 									sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1176 							} else if (!preproc.IsInactive()) {
1177 								preproc.InvertCurrentLevel();
1178 								activitySet = preproc.IsInactive() ? activeFlag : 0;
1179 								if (!activitySet)
1180 									sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1181 							}
1182 						} else if (sc.Match("elif")) {
1183 							// Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1184 							if (!preproc.CurrentIfTaken()) {
1185 								// Similar to #if
1186 								std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1187 								bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1188 								if (ifGood) {
1189 									preproc.InvertCurrentLevel();
1190 									activitySet = preproc.IsInactive() ? activeFlag : 0;
1191 									if (!activitySet)
1192 										sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1193 								}
1194 							} else if (!preproc.IsInactive()) {
1195 								preproc.InvertCurrentLevel();
1196 								activitySet = preproc.IsInactive() ? activeFlag : 0;
1197 								if (!activitySet)
1198 									sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1199 							}
1200 						} else if (sc.Match("endif")) {
1201 							preproc.EndSection();
1202 							activitySet = preproc.IsInactive() ? activeFlag : 0;
1203 							sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1204 						} else if (sc.Match("define")) {
1205 							if (options.updatePreprocessor && !preproc.IsInactive()) {
1206 								std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1207 								size_t startName = 0;
1208 								while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1209 									startName++;
1210 								size_t endName = startName;
1211 								while ((endName < restOfLine.length()) && setWord.Contains(static_cast<unsigned char>(restOfLine[endName])))
1212 									endName++;
1213 								std::string key = restOfLine.substr(startName, endName-startName);
1214 								if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1215 									// Macro
1216 									size_t endArgs = endName;
1217 									while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1218 										endArgs++;
1219 									std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1220 									size_t startValue = endArgs+1;
1221 									while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1222 										startValue++;
1223 									std::string value;
1224 									if (startValue < restOfLine.length())
1225 										value = restOfLine.substr(startValue);
1226 									preprocessorDefinitions[key] = SymbolValue(value, args);
1227 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1228 									definitionsChanged = true;
1229 								} else {
1230 									// Value
1231 									size_t startValue = endName;
1232 									while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1233 										startValue++;
1234 									std::string value = restOfLine.substr(startValue);
1235 									preprocessorDefinitions[key] = value;
1236 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1237 									definitionsChanged = true;
1238 								}
1239 							}
1240 						} else if (sc.Match("undef")) {
1241 							if (options.updatePreprocessor && !preproc.IsInactive()) {
1242 								std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, true);
1243 								std::vector<std::string> tokens = Tokenize(restOfLine);
1244 								std::string key;
1245 								if (tokens.size() >= 1) {
1246 									key = tokens[0];
1247 									preprocessorDefinitions.erase(key);
1248 									ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1249 									definitionsChanged = true;
1250 								}
1251 							}
1252 						}
1253 					}
1254 				}
1255 			} else if (isoperator(sc.ch)) {
1256 				sc.SetState(SCE_C_OPERATOR|activitySet);
1257 			}
1258 		}
1259 
1260 		if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1261 			chPrevNonWhite = sc.ch;
1262 			visibleChars++;
1263 		}
1264 		continuationLine = false;
1265 		sc.Forward();
1266 	}
1267 	const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1268 	if (definitionsChanged || rawStringsChanged)
1269 		styler.ChangeLexerState(startPos, startPos + length);
1270 	sc.Complete();
1271 }
1272 
1273 // Store both the current line's fold level and the next lines in the
1274 // level store to make it easy to pick up with each increment
1275 // and to make it possible to fiddle the current level for "} else {".
1276 
Fold(unsigned int startPos,int length,int initStyle,IDocument * pAccess)1277 void SCI_METHOD LexerCPP::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
1278 
1279 	if (!options.fold)
1280 		return;
1281 
1282 	LexAccessor styler(pAccess);
1283 
1284 	unsigned int endPos = startPos + length;
1285 	int visibleChars = 0;
1286 	bool inLineComment = false;
1287 	int lineCurrent = styler.GetLine(startPos);
1288 	int levelCurrent = SC_FOLDLEVELBASE;
1289 	if (lineCurrent > 0)
1290 		levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1291 	unsigned int lineStartNext = styler.LineStart(lineCurrent+1);
1292 	int levelMinCurrent = levelCurrent;
1293 	int levelNext = levelCurrent;
1294 	char chNext = styler[startPos];
1295 	int styleNext = MaskActive(styler.StyleAt(startPos));
1296 	int style = MaskActive(initStyle);
1297 	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1298 	for (unsigned int i = startPos; i < endPos; i++) {
1299 		char ch = chNext;
1300 		chNext = styler.SafeGetCharAt(i + 1);
1301 		int stylePrev = style;
1302 		style = styleNext;
1303 		styleNext = MaskActive(styler.StyleAt(i + 1));
1304 		bool atEOL = i == (lineStartNext-1);
1305 		if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
1306 			inLineComment = true;
1307 		if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1308 			if (!IsStreamCommentStyle(stylePrev)) {
1309 				levelNext++;
1310 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1311 				// Comments don't end at end of line and the next character may be unstyled.
1312 				levelNext--;
1313 			}
1314 		}
1315 		if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
1316 			if (userDefinedFoldMarkers) {
1317 				if (styler.Match(i, options.foldExplicitStart.c_str())) {
1318 					levelNext++;
1319 				} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1320 					levelNext--;
1321 				}
1322 			} else {
1323 				if ((ch == '/') && (chNext == '/')) {
1324 					char chNext2 = styler.SafeGetCharAt(i + 2);
1325 					if (chNext2 == '{') {
1326 						levelNext++;
1327 					} else if (chNext2 == '}') {
1328 						levelNext--;
1329 					}
1330 				}
1331 			}
1332 		}
1333 		if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1334 			if (ch == '#') {
1335 				unsigned int j = i + 1;
1336 				while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1337 					j++;
1338 				}
1339 				if (styler.Match(j, "region") || styler.Match(j, "if")) {
1340 					levelNext++;
1341 				} else if (styler.Match(j, "end")) {
1342 					levelNext--;
1343 				}
1344 			}
1345 		}
1346 		if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1347 			if (ch == '{') {
1348 				// Measure the minimum before a '{' to allow
1349 				// folding on "} else {"
1350 				if (levelMinCurrent > levelNext) {
1351 					levelMinCurrent = levelNext;
1352 				}
1353 				levelNext++;
1354 			} else if (ch == '}') {
1355 				levelNext--;
1356 			}
1357 		}
1358 		if (!IsASpace(ch))
1359 			visibleChars++;
1360 		if (atEOL || (i == endPos-1)) {
1361 			int levelUse = levelCurrent;
1362 			if (options.foldSyntaxBased && options.foldAtElse) {
1363 				levelUse = levelMinCurrent;
1364 			}
1365 			int lev = levelUse | levelNext << 16;
1366 			if (visibleChars == 0 && options.foldCompact)
1367 				lev |= SC_FOLDLEVELWHITEFLAG;
1368 			if (levelUse < levelNext)
1369 				lev |= SC_FOLDLEVELHEADERFLAG;
1370 			if (lev != styler.LevelAt(lineCurrent)) {
1371 				styler.SetLevel(lineCurrent, lev);
1372 			}
1373 			lineCurrent++;
1374 			lineStartNext = styler.LineStart(lineCurrent+1);
1375 			levelCurrent = levelNext;
1376 			levelMinCurrent = levelCurrent;
1377 			if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
1378 				// There is an empty line at end of file so give it same level and empty
1379 				styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1380 			}
1381 			visibleChars = 0;
1382 			inLineComment = false;
1383 		}
1384 	}
1385 }
1386 
EvaluateTokens(std::vector<std::string> & tokens,const SymbolTable & preprocessorDefinitions)1387 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1388 
1389 	// Remove whitespace tokens
1390 	tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1391 
1392 	// Evaluate defined statements to either 0 or 1
1393 	for (size_t i=0; (i+1)<tokens.size();) {
1394 		if (tokens[i] == "defined") {
1395 			const char *val = "0";
1396 			if (tokens[i+1] == "(") {
1397 				if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1398 					// defined()
1399 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1400 				} else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1401 					// defined(<identifier>)
1402 					SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1403 					if (it != preprocessorDefinitions.end()) {
1404 						val = "1";
1405 					}
1406 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1407 				} else {
1408 					// Spurious '(' so erase as more likely to result in false
1409 					tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1410 				}
1411 			} else {
1412 				// defined <identifier>
1413 				SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1414 				if (it != preprocessorDefinitions.end()) {
1415 					val = "1";
1416 				}
1417 			}
1418 			tokens[i] = val;
1419 		} else {
1420 			i++;
1421 		}
1422 	}
1423 
1424 	// Evaluate identifiers
1425 	const size_t maxIterations = 100;
1426 	size_t iterations = 0;	// Limit number of iterations in case there is a recursive macro.
1427 	for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1428 		iterations++;
1429 		if (setWordStart.Contains(static_cast<unsigned char>(tokens[i][0]))) {
1430 			SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1431 			if (it != preprocessorDefinitions.end()) {
1432 				// Tokenize value
1433 				std::vector<std::string> macroTokens = Tokenize(it->second.value);
1434 				if (it->second.IsMacro()) {
1435 					if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1436 						// Create map of argument name to value
1437 						std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1438 						std::map<std::string, std::string> arguments;
1439 						size_t arg = 0;
1440 						size_t tok = i+2;
1441 						while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1442 							if (tokens.at(tok) != ",") {
1443 								arguments[argumentNames.at(arg)] = tokens.at(tok);
1444 								arg++;
1445 							}
1446 							tok++;
1447 						}
1448 
1449 						// Remove invocation
1450 						tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1451 
1452 						// Substitute values into macro
1453 						macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1454 
1455 						for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1456 							if (setWordStart.Contains(static_cast<unsigned char>(macroTokens[iMacro][0]))) {
1457 								std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1458 								if (itFind != arguments.end()) {
1459 									// TODO: Possible that value will be expression so should insert tokenized form
1460 									macroTokens[iMacro] = itFind->second;
1461 								}
1462 							}
1463 							iMacro++;
1464 						}
1465 
1466 						// Insert results back into tokens
1467 						tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1468 
1469 					} else {
1470 						i++;
1471 					}
1472 				} else {
1473 					// Remove invocation
1474 					tokens.erase(tokens.begin() + i);
1475 					// Insert results back into tokens
1476 					tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1477 				}
1478 			} else {
1479 				// Identifier not found
1480 				tokens.erase(tokens.begin() + i);
1481 			}
1482 		} else {
1483 			i++;
1484 		}
1485 	}
1486 
1487 	// Find bracketed subexpressions and recurse on them
1488 	BracketPair bracketPair = FindBracketPair(tokens);
1489 	while (bracketPair.itBracket != tokens.end()) {
1490 		std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1491 		EvaluateTokens(inBracket, preprocessorDefinitions);
1492 
1493 		// The insertion is done before the removal because there were failures with the opposite approach
1494 		tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1495 
1496 		bracketPair = FindBracketPair(tokens);
1497 		tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1498 
1499 		bracketPair = FindBracketPair(tokens);
1500 	}
1501 
1502 	// Evaluate logical negations
1503 	for (size_t j=0; (j+1)<tokens.size();) {
1504 		if (setNegationOp.Contains(tokens[j][0])) {
1505 			int isTrue = atoi(tokens[j+1].c_str());
1506 			if (tokens[j] == "!")
1507 				isTrue = !isTrue;
1508 			std::vector<std::string>::iterator itInsert =
1509 				tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1510 			tokens.insert(itInsert, isTrue ? "1" : "0");
1511 		} else {
1512 			j++;
1513 		}
1514 	}
1515 
1516 	// Evaluate expressions in precedence order
1517 	enum precedence { precArithmetic, precRelative, precLogical };
1518 	for (int prec=precArithmetic; prec <= precLogical; prec++) {
1519 		// Looking at 3 tokens at a time so end at 2 before end
1520 		for (size_t k=0; (k+2)<tokens.size();) {
1521 			char chOp = tokens[k+1][0];
1522 			if (
1523 				((prec==precArithmetic) && setArithmethicOp.Contains(chOp)) ||
1524 				((prec==precRelative) && setRelOp.Contains(chOp)) ||
1525 				((prec==precLogical) && setLogicalOp.Contains(chOp))
1526 				) {
1527 				int valA = atoi(tokens[k].c_str());
1528 				int valB = atoi(tokens[k+2].c_str());
1529 				int result = 0;
1530 				if (tokens[k+1] == "+")
1531 					result = valA + valB;
1532 				else if (tokens[k+1] == "-")
1533 					result = valA - valB;
1534 				else if (tokens[k+1] == "*")
1535 					result = valA * valB;
1536 				else if (tokens[k+1] == "/")
1537 					result = valA / (valB ? valB : 1);
1538 				else if (tokens[k+1] == "%")
1539 					result = valA % (valB ? valB : 1);
1540 				else if (tokens[k+1] == "<")
1541 					result = valA < valB;
1542 				else if (tokens[k+1] == "<=")
1543 					result = valA <= valB;
1544 				else if (tokens[k+1] == ">")
1545 					result = valA > valB;
1546 				else if (tokens[k+1] == ">=")
1547 					result = valA >= valB;
1548 				else if (tokens[k+1] == "==")
1549 					result = valA == valB;
1550 				else if (tokens[k+1] == "!=")
1551 					result = valA != valB;
1552 				else if (tokens[k+1] == "||")
1553 					result = valA || valB;
1554 				else if (tokens[k+1] == "&&")
1555 					result = valA && valB;
1556 				char sResult[30];
1557 				sprintf(sResult, "%d", result);
1558 				std::vector<std::string>::iterator itInsert =
1559 					tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1560 				tokens.insert(itInsert, sResult);
1561 			} else {
1562 				k++;
1563 			}
1564 		}
1565 	}
1566 }
1567 
Tokenize(const std::string & expr) const1568 std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
1569 	// Break into tokens
1570 	std::vector<std::string> tokens;
1571 	const char *cp = expr.c_str();
1572 	while (*cp) {
1573 		std::string word;
1574 		if (setWord.Contains(static_cast<unsigned char>(*cp))) {
1575 			// Identifiers and numbers
1576 			while (setWord.Contains(static_cast<unsigned char>(*cp))) {
1577 				word += *cp;
1578 				cp++;
1579 			}
1580 		} else if (IsSpaceOrTab(*cp)) {
1581 			while (IsSpaceOrTab(*cp)) {
1582 				word += *cp;
1583 				cp++;
1584 			}
1585 		} else if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1586 			word += *cp;
1587 			cp++;
1588 			if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1589 				word += *cp;
1590 				cp++;
1591 			}
1592 		} else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1593 			word += *cp;
1594 			cp++;
1595 			if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1596 				word += *cp;
1597 				cp++;
1598 			}
1599 		} else {
1600 			// Should handle strings, characters, and comments here
1601 			word += *cp;
1602 			cp++;
1603 		}
1604 		tokens.push_back(word);
1605 	}
1606 	return tokens;
1607 }
1608 
EvaluateExpression(const std::string & expr,const SymbolTable & preprocessorDefinitions)1609 bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1610 	std::vector<std::string> tokens = Tokenize(expr);
1611 
1612 	EvaluateTokens(tokens, preprocessorDefinitions);
1613 
1614 	// "0" or "" -> false else true
1615 	bool isFalse = tokens.empty() ||
1616 		((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1617 	return !isFalse;
1618 }
1619 
1620 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1621 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);
1622