1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3 ** Lexer for C++, C, Java, and JavaScript.
4 ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <cstdlib>
10 #include <cassert>
11 #include <cstring>
12
13 #include <utility>
14 #include <string>
15 #include <vector>
16 #include <map>
17 #include <algorithm>
18 #include <iterator>
19
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 #include "SciLexer.h"
23
24 #include "StringCopy.h"
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
33 #include "SubStyles.h"
34
35 using namespace Scintilla;
36
37 namespace {
38 // Use an unnamed namespace to protect the functions and classes from name conflicts
39
IsSpaceEquiv(int state)40 constexpr bool IsSpaceEquiv(int state) noexcept {
41 return (state <= SCE_C_COMMENTDOC) ||
42 // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
43 (state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
44 (state == SCE_C_COMMENTDOCKEYWORDERROR);
45 }
46
47 // Preconditions: sc.currentPos points to a character after '+' or '-'.
48 // The test for pos reaching 0 should be redundant,
49 // and is in only for safety measures.
50 // Limitation: this code will give the incorrect answer for code like
51 // a = b+++/ptn/...
52 // Putting a space between the '++' post-inc operator and the '+' binary op
53 // fixes this, and is highly recommended for readability anyway.
FollowsPostfixOperator(const StyleContext & sc,LexAccessor & styler)54 bool FollowsPostfixOperator(const StyleContext &sc, LexAccessor &styler) {
55 Sci_Position pos = sc.currentPos;
56 while (--pos > 0) {
57 const char ch = styler[pos];
58 if (ch == '+' || ch == '-') {
59 return styler[pos - 1] == ch;
60 }
61 }
62 return false;
63 }
64
followsReturnKeyword(const StyleContext & sc,LexAccessor & styler)65 bool followsReturnKeyword(const StyleContext &sc, LexAccessor &styler) {
66 // Don't look at styles, so no need to flush.
67 Sci_Position pos = sc.currentPos;
68 const Sci_Position currentLine = styler.GetLine(pos);
69 const Sci_Position lineStartPos = styler.LineStart(currentLine);
70 while (--pos > lineStartPos) {
71 const char ch = styler.SafeGetCharAt(pos);
72 if (ch != ' ' && ch != '\t') {
73 break;
74 }
75 }
76 const char *retBack = "nruter";
77 const char *s = retBack;
78 while (*s
79 && pos >= lineStartPos
80 && styler.SafeGetCharAt(pos) == *s) {
81 s++;
82 pos--;
83 }
84 return !*s;
85 }
86
IsSpaceOrTab(int ch)87 constexpr bool IsSpaceOrTab(int ch) noexcept {
88 return ch == ' ' || ch == '\t';
89 }
90
OnlySpaceOrTab(const std::string & s)91 bool OnlySpaceOrTab(const std::string &s) noexcept {
92 for (const char ch : s) {
93 if (!IsSpaceOrTab(ch))
94 return false;
95 }
96 return true;
97 }
98
StringSplit(const std::string & text,int separator)99 std::vector<std::string> StringSplit(const std::string &text, int separator) {
100 std::vector<std::string> vs(text.empty() ? 0 : 1);
101 for (const char ch : text) {
102 if (ch == separator) {
103 vs.emplace_back();
104 } else {
105 vs.back() += ch;
106 }
107 }
108 return vs;
109 }
110
111 struct BracketPair {
112 std::vector<std::string>::iterator itBracket;
113 std::vector<std::string>::iterator itEndBracket;
114 };
115
FindBracketPair(std::vector<std::string> & tokens)116 BracketPair FindBracketPair(std::vector<std::string> &tokens) {
117 BracketPair bp;
118 std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
119 bp.itBracket = tokens.end();
120 bp.itEndBracket = tokens.end();
121 if (itTok != tokens.end()) {
122 bp.itBracket = itTok;
123 size_t nest = 0;
124 while (itTok != tokens.end()) {
125 if (*itTok == "(") {
126 nest++;
127 } else if (*itTok == ")") {
128 nest--;
129 if (nest == 0) {
130 bp.itEndBracket = itTok;
131 return bp;
132 }
133 }
134 ++itTok;
135 }
136 }
137 bp.itBracket = tokens.end();
138 return bp;
139 }
140
highlightTaskMarker(StyleContext & sc,LexAccessor & styler,int activity,const WordList & markerList,bool caseSensitive)141 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
142 int activity, const WordList &markerList, bool caseSensitive){
143 if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
144 constexpr Sci_PositionU lengthMarker = 50;
145 char marker[lengthMarker+1] = "";
146 const Sci_PositionU currPos = sc.currentPos;
147 Sci_PositionU i = 0;
148 while (i < lengthMarker) {
149 const char ch = styler.SafeGetCharAt(currPos + i);
150 if (IsASpace(ch) || isoperator(ch)) {
151 break;
152 }
153 if (caseSensitive)
154 marker[i] = ch;
155 else
156 marker[i] = MakeLowerCase(ch);
157 i++;
158 }
159 marker[i] = '\0';
160 if (markerList.InList(marker)) {
161 sc.SetState(SCE_C_TASKMARKER|activity);
162 }
163 }
164 }
165
166 class EscapeSequence {
167 const CharacterSet setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
168 const CharacterSet setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
169 const CharacterSet setNoneNumeric;
170 const CharacterSet *escapeSetValid = nullptr;
171 int digitsLeft = 0;
172 public:
173 EscapeSequence() = default;
resetEscapeState(int nextChar)174 void resetEscapeState(int nextChar) {
175 digitsLeft = 0;
176 escapeSetValid = &setNoneNumeric;
177 if (nextChar == 'U') {
178 digitsLeft = 9;
179 escapeSetValid = &setHexDigits;
180 } else if (nextChar == 'u') {
181 digitsLeft = 5;
182 escapeSetValid = &setHexDigits;
183 } else if (nextChar == 'x') {
184 digitsLeft = 5;
185 escapeSetValid = &setHexDigits;
186 } else if (setOctDigits.Contains(nextChar)) {
187 digitsLeft = 3;
188 escapeSetValid = &setOctDigits;
189 }
190 }
atEscapeEnd(int currChar) const191 bool atEscapeEnd(int currChar) const {
192 return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
193 }
consumeDigit()194 void consumeDigit() noexcept {
195 digitsLeft--;
196 }
197 };
198
GetRestOfLine(LexAccessor & styler,Sci_Position start,bool allowSpace)199 std::string GetRestOfLine(LexAccessor &styler, Sci_Position start, bool allowSpace) {
200 std::string restOfLine;
201 Sci_Position line = styler.GetLine(start);
202 Sci_Position pos = start;
203 Sci_Position endLine = styler.LineEnd(line);
204 char ch = styler.SafeGetCharAt(start, '\n');
205 while (pos < endLine) {
206 if (ch == '\\' && ((pos + 1) == endLine)) {
207 // Continuation line
208 line++;
209 pos = styler.LineStart(line);
210 endLine = styler.LineEnd(line);
211 ch = styler.SafeGetCharAt(pos, '\n');
212 } else {
213 const char chNext = styler.SafeGetCharAt(pos + 1, '\n');
214 if (ch == '/' && (chNext == '/' || chNext == '*'))
215 break;
216 if (allowSpace || (ch != ' ')) {
217 restOfLine += ch;
218 }
219 pos++;
220 ch = chNext;
221 }
222 }
223 return restOfLine;
224 }
225
IsStreamCommentStyle(int style)226 constexpr bool IsStreamCommentStyle(int style) noexcept {
227 return style == SCE_C_COMMENT ||
228 style == SCE_C_COMMENTDOC ||
229 style == SCE_C_COMMENTDOCKEYWORD ||
230 style == SCE_C_COMMENTDOCKEYWORDERROR;
231 }
232
233 struct PPDefinition {
234 Sci_Position line;
235 std::string key;
236 std::string value;
237 bool isUndef;
238 std::string arguments;
PPDefinition__anon4c384e1b0111::PPDefinition239 PPDefinition(Sci_Position line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, const std::string &arguments_="") :
240 line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
241 }
242 };
243
244 constexpr int inactiveFlag = 0x40;
245
246 class LinePPState {
247 // Track the state of preprocessor conditionals to allow showing active and inactive
248 // code in different styles.
249 // Only works up to 31 levels of conditional nesting.
250
251 // state is a bit mask with 1 bit per level
252 // bit is 1 for level if section inactive, so any bits set = inactive style
253 int state = 0;
254 // ifTaken is a bit mask with 1 bit per level
255 // bit is 1 for level if some branch at this level has been taken
256 int ifTaken = 0;
257 // level is the nesting level of #if constructs
258 int level = -1;
259 static const int maximumNestingLevel = 31;
ValidLevel() const260 bool ValidLevel() const noexcept {
261 return level >= 0 && level < maximumNestingLevel;
262 }
maskLevel() const263 int maskLevel() const noexcept {
264 if (level >= 0) {
265 return 1 << level;
266 } else {
267 return 1;
268 }
269 }
270 public:
LinePPState()271 LinePPState() noexcept {
272 }
IsActive() const273 bool IsActive() const noexcept {
274 return state == 0;
275 }
IsInactive() const276 bool IsInactive() const noexcept {
277 return state != 0;
278 }
ActiveState() const279 int ActiveState() const noexcept {
280 return state ? inactiveFlag : 0;
281 }
CurrentIfTaken() const282 bool CurrentIfTaken() const noexcept {
283 return (ifTaken & maskLevel()) != 0;
284 }
StartSection(bool on)285 void StartSection(bool on) noexcept {
286 level++;
287 if (ValidLevel()) {
288 if (on) {
289 state &= ~maskLevel();
290 ifTaken |= maskLevel();
291 } else {
292 state |= maskLevel();
293 ifTaken &= ~maskLevel();
294 }
295 }
296 }
EndSection()297 void EndSection() noexcept {
298 if (ValidLevel()) {
299 state &= ~maskLevel();
300 ifTaken &= ~maskLevel();
301 }
302 level--;
303 }
InvertCurrentLevel()304 void InvertCurrentLevel() noexcept {
305 if (ValidLevel()) {
306 state ^= maskLevel();
307 ifTaken |= maskLevel();
308 }
309 }
310 };
311
312 // Hold the preprocessor state for each line seen.
313 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
314 class PPStates {
315 std::vector<LinePPState> vlls;
316 public:
ForLine(Sci_Position line) const317 LinePPState ForLine(Sci_Position line) const noexcept {
318 if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
319 return vlls[line];
320 } else {
321 return LinePPState();
322 }
323 }
Add(Sci_Position line,LinePPState lls)324 void Add(Sci_Position line, LinePPState lls) {
325 vlls.resize(line+1);
326 vlls[line] = lls;
327 }
328 };
329
330 // An individual named option for use in an OptionSet
331
332 // Options used for LexerCPP
333 struct OptionsCPP {
334 bool stylingWithinPreprocessor;
335 bool identifiersAllowDollars;
336 bool trackPreprocessor;
337 bool updatePreprocessor;
338 bool verbatimStringsAllowEscapes;
339 bool triplequotedStrings;
340 bool hashquotedStrings;
341 bool backQuotedStrings;
342 bool escapeSequence;
343 bool fold;
344 bool foldSyntaxBased;
345 bool foldComment;
346 bool foldCommentMultiline;
347 bool foldCommentExplicit;
348 std::string foldExplicitStart;
349 std::string foldExplicitEnd;
350 bool foldExplicitAnywhere;
351 bool foldPreprocessor;
352 bool foldPreprocessorAtElse;
353 bool foldCompact;
354 bool foldAtElse;
OptionsCPP__anon4c384e1b0111::OptionsCPP355 OptionsCPP() {
356 stylingWithinPreprocessor = false;
357 identifiersAllowDollars = true;
358 trackPreprocessor = true;
359 updatePreprocessor = true;
360 verbatimStringsAllowEscapes = false;
361 triplequotedStrings = false;
362 hashquotedStrings = false;
363 backQuotedStrings = false;
364 escapeSequence = false;
365 fold = false;
366 foldSyntaxBased = true;
367 foldComment = false;
368 foldCommentMultiline = true;
369 foldCommentExplicit = true;
370 foldExplicitStart = "";
371 foldExplicitEnd = "";
372 foldExplicitAnywhere = false;
373 foldPreprocessor = false;
374 foldPreprocessorAtElse = false;
375 foldCompact = false;
376 foldAtElse = false;
377 }
378 };
379
380 const char *const cppWordLists[] = {
381 "Primary keywords and identifiers",
382 "Secondary keywords and identifiers",
383 "Documentation comment keywords",
384 "Global classes and typedefs",
385 "Preprocessor definitions",
386 "Task marker and error marker keywords",
387 nullptr,
388 };
389
390 struct OptionSetCPP : public OptionSet<OptionsCPP> {
OptionSetCPP__anon4c384e1b0111::OptionSetCPP391 OptionSetCPP() {
392 DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
393 "For C++ code, determines whether all preprocessor code is styled in the "
394 "preprocessor style (0, the default) or only from the initial # to the end "
395 "of the command word(1).");
396
397 DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
398 "Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
399
400 DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
401 "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
402
403 DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
404 "Set to 1 to update preprocessor definitions when #define found.");
405
406 DefineProperty("lexer.cpp.verbatim.strings.allow.escapes", &OptionsCPP::verbatimStringsAllowEscapes,
407 "Set to 1 to allow verbatim strings to contain escape sequences.");
408
409 DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
410 "Set to 1 to enable highlighting of triple-quoted strings.");
411
412 DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
413 "Set to 1 to enable highlighting of hash-quoted strings.");
414
415 DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
416 "Set to 1 to enable highlighting of back-quoted raw strings .");
417
418 DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
419 "Set to 1 to enable highlighting of escape sequences in strings");
420
421 DefineProperty("fold", &OptionsCPP::fold);
422
423 DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
424 "Set this property to 0 to disable syntax based folding.");
425
426 DefineProperty("fold.comment", &OptionsCPP::foldComment,
427 "This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
428 "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
429 "at the end of a section that should fold.");
430
431 DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
432 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
433
434 DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
435 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
436
437 DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
438 "The string to use for explicit fold start points, replacing the standard //{.");
439
440 DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
441 "The string to use for explicit fold end points, replacing the standard //}.");
442
443 DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
444 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
445
446 DefineProperty("fold.cpp.preprocessor.at.else", &OptionsCPP::foldPreprocessorAtElse,
447 "This option enables folding on a preprocessor #else or #endif line of an #if statement.");
448
449 DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
450 "This option enables folding preprocessor directives when using the C++ lexer. "
451 "Includes C#'s explicit #region and #endregion folding directives.");
452
453 DefineProperty("fold.compact", &OptionsCPP::foldCompact);
454
455 DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
456 "This option enables C++ folding on a \"} else {\" line of an if statement.");
457
458 DefineWordListSets(cppWordLists);
459 }
460 };
461
462 const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
463
464 LexicalClass lexicalClasses[] = {
465 // Lexer Cpp SCLEX_CPP SCE_C_:
466 0, "SCE_C_DEFAULT", "default", "White space",
467 1, "SCE_C_COMMENT", "comment", "Comment: /* */.",
468 2, "SCE_C_COMMENTLINE", "comment line", "Line Comment: //.",
469 3, "SCE_C_COMMENTDOC", "comment documentation", "Doc comment: block comments beginning with /** or /*!",
470 4, "SCE_C_NUMBER", "literal numeric", "Number",
471 5, "SCE_C_WORD", "keyword", "Keyword",
472 6, "SCE_C_STRING", "literal string", "Double quoted string",
473 7, "SCE_C_CHARACTER", "literal string character", "Single quoted string",
474 8, "SCE_C_UUID", "literal uuid", "UUIDs (only in IDL)",
475 9, "SCE_C_PREPROCESSOR", "preprocessor", "Preprocessor",
476 10, "SCE_C_OPERATOR", "operator", "Operators",
477 11, "SCE_C_IDENTIFIER", "identifier", "Identifiers",
478 12, "SCE_C_STRINGEOL", "error literal string", "End of line where string is not closed",
479 13, "SCE_C_VERBATIM", "literal string multiline raw", "Verbatim strings for C#",
480 14, "SCE_C_REGEX", "literal regex", "Regular expressions for JavaScript",
481 15, "SCE_C_COMMENTLINEDOC", "comment documentation line", "Doc Comment Line: line comments beginning with /// or //!.",
482 16, "SCE_C_WORD2", "identifier", "Keywords2",
483 17, "SCE_C_COMMENTDOCKEYWORD", "comment documentation keyword", "Comment keyword",
484 18, "SCE_C_COMMENTDOCKEYWORDERROR", "error comment documentation keyword", "Comment keyword error",
485 19, "SCE_C_GLOBALCLASS", "identifier", "Global class",
486 20, "SCE_C_STRINGRAW", "literal string multiline raw", "Raw strings for C++0x",
487 21, "SCE_C_TRIPLEVERBATIM", "literal string multiline raw", "Triple-quoted strings for Vala",
488 22, "SCE_C_HASHQUOTEDSTRING", "literal string", "Hash-quoted strings for Pike",
489 23, "SCE_C_PREPROCESSORCOMMENT", "comment preprocessor", "Preprocessor stream comment",
490 24, "SCE_C_PREPROCESSORCOMMENTDOC", "comment preprocessor documentation", "Preprocessor stream doc comment",
491 25, "SCE_C_USERLITERAL", "literal", "User defined literals",
492 26, "SCE_C_TASKMARKER", "comment taskmarker", "Task Marker",
493 27, "SCE_C_ESCAPESEQUENCE", "literal string escapesequence", "Escape sequence",
494 };
495
496 const int sizeLexicalClasses = static_cast<int>(std::size(lexicalClasses));
497
498 }
499
500 class LexerCPP : public ILexer5 {
501 bool caseSensitive;
502 CharacterSet setWord;
503 CharacterSet setNegationOp;
504 CharacterSet setAddOp;
505 CharacterSet setMultOp;
506 CharacterSet setRelOp;
507 CharacterSet setLogicalOp;
508 CharacterSet setWordStart;
509 PPStates vlls;
510 std::vector<PPDefinition> ppDefineHistory;
511 WordList keywords;
512 WordList keywords2;
513 WordList keywords3;
514 WordList keywords4;
515 WordList ppDefinitions;
516 WordList markerList;
517 struct SymbolValue {
518 std::string value;
519 std::string arguments;
520 SymbolValue() noexcept = default;
SymbolValueLexerCPP::SymbolValue521 SymbolValue(const std::string &value_, const std::string &arguments_) : value(value_), arguments(arguments_) {
522 }
operator =LexerCPP::SymbolValue523 SymbolValue &operator = (const std::string &value_) {
524 value = value_;
525 arguments.clear();
526 return *this;
527 }
IsMacroLexerCPP::SymbolValue528 bool IsMacro() const noexcept {
529 return !arguments.empty();
530 }
531 };
532 typedef std::map<std::string, SymbolValue> SymbolTable;
533 SymbolTable preprocessorDefinitionsStart;
534 OptionsCPP options;
535 OptionSetCPP osCPP;
536 EscapeSequence escapeSeq;
537 SparseState<std::string> rawStringTerminators;
538 enum { ssIdentifier, ssDocKeyword };
539 SubStyles subStyles;
540 std::string returnBuffer;
541 public:
LexerCPP(bool caseSensitive_)542 explicit LexerCPP(bool caseSensitive_) :
543 caseSensitive(caseSensitive_),
544 setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
545 setNegationOp(CharacterSet::setNone, "!"),
546 setAddOp(CharacterSet::setNone, "+-"),
547 setMultOp(CharacterSet::setNone, "*/%"),
548 setRelOp(CharacterSet::setNone, "=!<>"),
549 setLogicalOp(CharacterSet::setNone, "|&"),
550 subStyles(styleSubable, 0x80, 0x40, inactiveFlag) {
551 }
552 // Deleted so LexerCPP objects can not be copied.
553 LexerCPP(const LexerCPP &) = delete;
554 LexerCPP(LexerCPP &&) = delete;
555 void operator=(const LexerCPP &) = delete;
556 void operator=(LexerCPP &&) = delete;
~LexerCPP()557 virtual ~LexerCPP() {
558 }
Release()559 void SCI_METHOD Release() noexcept override {
560 delete this;
561 }
Version() const562 int SCI_METHOD Version() const noexcept override {
563 return lvRelease5;
564 }
PropertyNames()565 const char * SCI_METHOD PropertyNames() override {
566 return osCPP.PropertyNames();
567 }
PropertyType(const char * name)568 int SCI_METHOD PropertyType(const char *name) override {
569 return osCPP.PropertyType(name);
570 }
DescribeProperty(const char * name)571 const char * SCI_METHOD DescribeProperty(const char *name) override {
572 return osCPP.DescribeProperty(name);
573 }
574 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()575 const char * SCI_METHOD DescribeWordListSets() override {
576 return osCPP.DescribeWordListSets();
577 }
578 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
579 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
580 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
581
PrivateCall(int,void *)582 void * SCI_METHOD PrivateCall(int, void *) noexcept override {
583 return nullptr;
584 }
585
LineEndTypesSupported()586 int SCI_METHOD LineEndTypesSupported() noexcept override {
587 return SC_LINE_END_TYPE_UNICODE;
588 }
589
AllocateSubStyles(int styleBase,int numberStyles)590 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
591 return subStyles.Allocate(styleBase, numberStyles);
592 }
SubStylesStart(int styleBase)593 int SCI_METHOD SubStylesStart(int styleBase) override {
594 return subStyles.Start(styleBase);
595 }
SubStylesLength(int styleBase)596 int SCI_METHOD SubStylesLength(int styleBase) override {
597 return subStyles.Length(styleBase);
598 }
StyleFromSubStyle(int subStyle)599 int SCI_METHOD StyleFromSubStyle(int subStyle) override {
600 const int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
601 const int inactive = subStyle & inactiveFlag;
602 return styleBase | inactive;
603 }
PrimaryStyleFromStyle(int style)604 int SCI_METHOD PrimaryStyleFromStyle(int style) noexcept override {
605 return MaskActive(style);
606 }
FreeSubStyles()607 void SCI_METHOD FreeSubStyles() override {
608 subStyles.Free();
609 }
SetIdentifiers(int style,const char * identifiers)610 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
611 subStyles.SetIdentifiers(style, identifiers);
612 }
DistanceToSecondaryStyles()613 int SCI_METHOD DistanceToSecondaryStyles() noexcept override {
614 return inactiveFlag;
615 }
GetSubStyleBases()616 const char * SCI_METHOD GetSubStyleBases() noexcept override {
617 return styleSubable;
618 }
NamedStyles()619 int SCI_METHOD NamedStyles() override {
620 return std::max(subStyles.LastAllocated() + 1,
621 sizeLexicalClasses) +
622 inactiveFlag;
623 }
NameOfStyle(int style)624 const char * SCI_METHOD NameOfStyle(int style) override {
625 if (style >= NamedStyles())
626 return "";
627 if (style < sizeLexicalClasses)
628 return lexicalClasses[style].name;
629 // TODO: inactive and substyles
630 return "";
631 }
TagsOfStyle(int style)632 const char * SCI_METHOD TagsOfStyle(int style) override {
633 if (style >= NamedStyles())
634 return "Excess";
635 returnBuffer.clear();
636 const int firstSubStyle = subStyles.FirstAllocated();
637 if (firstSubStyle >= 0) {
638 const int lastSubStyle = subStyles.LastAllocated();
639 if (((style >= firstSubStyle) && (style <= (lastSubStyle))) ||
640 ((style >= firstSubStyle + inactiveFlag) && (style <= (lastSubStyle + inactiveFlag)))) {
641 int styleActive = style;
642 if (style > lastSubStyle) {
643 returnBuffer = "inactive ";
644 styleActive -= inactiveFlag;
645 }
646 const int styleMain = StyleFromSubStyle(styleActive);
647 returnBuffer += lexicalClasses[styleMain].tags;
648 return returnBuffer.c_str();
649 }
650 }
651 if (style < sizeLexicalClasses)
652 return lexicalClasses[style].tags;
653 if (style >= inactiveFlag) {
654 returnBuffer = "inactive ";
655 const int styleActive = style - inactiveFlag;
656 if (styleActive < sizeLexicalClasses)
657 returnBuffer += lexicalClasses[styleActive].tags;
658 else
659 returnBuffer = "";
660 return returnBuffer.c_str();
661 }
662 return "";
663 }
DescriptionOfStyle(int style)664 const char * SCI_METHOD DescriptionOfStyle(int style) override {
665 if (style >= NamedStyles())
666 return "";
667 if (style < sizeLexicalClasses)
668 return lexicalClasses[style].description;
669 // TODO: inactive and substyles
670 return "";
671 }
672
673 // ILexer5 methods
GetName()674 const char * SCI_METHOD GetName() override {
675 return caseSensitive ? "cpp" : "cppnocase";
676 }
GetIdentifier()677 int SCI_METHOD GetIdentifier() override {
678 return caseSensitive ? SCLEX_CPP : SCLEX_CPPNOCASE;
679 }
680 const char * SCI_METHOD PropertyGet(const char *key) override;
681
LexerFactoryCPP()682 static ILexer5 *LexerFactoryCPP() {
683 return new LexerCPP(true);
684 }
LexerFactoryCPPInsensitive()685 static ILexer5 *LexerFactoryCPPInsensitive() {
686 return new LexerCPP(false);
687 }
MaskActive(int style)688 constexpr static int MaskActive(int style) noexcept {
689 return style & ~inactiveFlag;
690 }
691 void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
692 std::vector<std::string> Tokenize(const std::string &expr) const;
693 bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
694 };
695
PropertySet(const char * key,const char * val)696 Sci_Position SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
697 if (osCPP.PropertySet(&options, key, val)) {
698 if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
699 setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
700 if (options.identifiersAllowDollars) {
701 setWord.Add('$');
702 }
703 }
704 return 0;
705 }
706 return -1;
707 }
708
PropertyGet(const char * key)709 const char * SCI_METHOD LexerCPP::PropertyGet(const char *key) {
710 return osCPP.PropertyGet(key);
711 }
712
WordListSet(int n,const char * wl)713 Sci_Position SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
714 WordList *wordListN = nullptr;
715 switch (n) {
716 case 0:
717 wordListN = &keywords;
718 break;
719 case 1:
720 wordListN = &keywords2;
721 break;
722 case 2:
723 wordListN = &keywords3;
724 break;
725 case 3:
726 wordListN = &keywords4;
727 break;
728 case 4:
729 wordListN = &ppDefinitions;
730 break;
731 case 5:
732 wordListN = &markerList;
733 break;
734 }
735 Sci_Position firstModification = -1;
736 if (wordListN) {
737 WordList wlNew;
738 wlNew.Set(wl);
739 if (*wordListN != wlNew) {
740 wordListN->Set(wl);
741 firstModification = 0;
742 if (n == 4) {
743 // Rebuild preprocessorDefinitions
744 preprocessorDefinitionsStart.clear();
745 for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
746 const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
747 const char *cpEquals = strchr(cpDefinition, '=');
748 if (cpEquals) {
749 std::string name(cpDefinition, cpEquals - cpDefinition);
750 std::string val(cpEquals+1);
751 const size_t bracket = name.find('(');
752 const size_t bracketEnd = name.find(')');
753 if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
754 // Macro
755 std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
756 name = name.substr(0, bracket);
757 preprocessorDefinitionsStart[name] = SymbolValue(val, args);
758 } else {
759 preprocessorDefinitionsStart[name] = val;
760 }
761 } else {
762 std::string name(cpDefinition);
763 std::string val("1");
764 preprocessorDefinitionsStart[name] = val;
765 }
766 }
767 }
768 }
769 }
770 return firstModification;
771 }
772
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)773 void SCI_METHOD LexerCPP::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
774 LexAccessor styler(pAccess);
775
776 CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
777 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
778
779 CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
780
781 setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
782
783 CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
784
785 if (options.identifiersAllowDollars) {
786 setWordStart.Add('$');
787 }
788
789 int chPrevNonWhite = ' ';
790 int visibleChars = 0;
791 bool lastWordWasUUID = false;
792 int styleBeforeDCKeyword = SCE_C_DEFAULT;
793 int styleBeforeTaskMarker = SCE_C_DEFAULT;
794 bool continuationLine = false;
795 bool isIncludePreprocessor = false;
796 bool isStringInPreprocessor = false;
797 bool inRERange = false;
798 bool seenDocKeyBrace = false;
799
800 Sci_Position lineCurrent = styler.GetLine(startPos);
801 if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
802 (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
803 (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
804 // Set continuationLine if last character of previous line is '\'
805 if (lineCurrent > 0) {
806 const Sci_Position endLinePrevious = styler.LineEnd(lineCurrent - 1);
807 if (endLinePrevious > 0) {
808 continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
809 }
810 }
811 }
812
813 // look back to set chPrevNonWhite properly for better regex colouring
814 if (startPos > 0) {
815 Sci_Position back = startPos;
816 while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
817 ;
818 if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
819 chPrevNonWhite = styler.SafeGetCharAt(back);
820 }
821 }
822
823 StyleContext sc(startPos, length, initStyle, styler);
824 LinePPState preproc = vlls.ForLine(lineCurrent);
825
826 bool definitionsChanged = false;
827
828 // Truncate ppDefineHistory before current line
829
830 if (!options.updatePreprocessor)
831 ppDefineHistory.clear();
832
833 std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(),
834 [lineCurrent](const PPDefinition &p) noexcept { return p.line >= lineCurrent; });
835 if (itInvalid != ppDefineHistory.end()) {
836 ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
837 definitionsChanged = true;
838 }
839
840 SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
841 for (const PPDefinition &ppDef : ppDefineHistory) {
842 if (ppDef.isUndef)
843 preprocessorDefinitions.erase(ppDef.key);
844 else
845 preprocessorDefinitions[ppDef.key] = SymbolValue(ppDef.value, ppDef.arguments);
846 }
847
848 std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
849 SparseState<std::string> rawSTNew(lineCurrent);
850
851 int activitySet = preproc.ActiveState();
852
853 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
854 const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
855
856 Sci_PositionU lineEndNext = styler.LineEnd(lineCurrent);
857
858 for (; sc.More();) {
859
860 if (sc.atLineStart) {
861 // Using MaskActive() is not needed in the following statement.
862 // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
863 if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
864 // Prevent SCE_C_STRINGEOL from leaking back to previous line which
865 // ends with a line continuation by locking in the state up to this position.
866 sc.SetState(sc.state);
867 }
868 if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
869 sc.SetState(SCE_C_DEFAULT|activitySet);
870 }
871 // Reset states to beginning of colourise so no surprises
872 // if different sets of lines lexed.
873 visibleChars = 0;
874 lastWordWasUUID = false;
875 isIncludePreprocessor = false;
876 inRERange = false;
877 if (preproc.IsInactive()) {
878 activitySet = inactiveFlag;
879 sc.SetState(sc.state | activitySet);
880 }
881 }
882
883 if (sc.atLineEnd) {
884 lineCurrent++;
885 lineEndNext = styler.LineEnd(lineCurrent);
886 vlls.Add(lineCurrent, preproc);
887 if (rawStringTerminator != "") {
888 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
889 }
890 }
891
892 // Handle line continuation generically.
893 if (sc.ch == '\\') {
894 if ((sc.currentPos+1) >= lineEndNext) {
895 lineCurrent++;
896 lineEndNext = styler.LineEnd(lineCurrent);
897 vlls.Add(lineCurrent, preproc);
898 if (rawStringTerminator != "") {
899 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
900 }
901 sc.Forward();
902 if (sc.ch == '\r' && sc.chNext == '\n') {
903 // Even in UTF-8, \r and \n are separate
904 sc.Forward();
905 }
906 continuationLine = true;
907 sc.Forward();
908 continue;
909 }
910 }
911
912 const bool atLineEndBeforeSwitch = sc.atLineEnd;
913
914 // Determine if the current state should terminate.
915 switch (MaskActive(sc.state)) {
916 case SCE_C_OPERATOR:
917 sc.SetState(SCE_C_DEFAULT|activitySet);
918 break;
919 case SCE_C_NUMBER:
920 // We accept almost anything because of hex. and number suffixes
921 if (sc.ch == '_') {
922 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
923 } else if (!(setWord.Contains(sc.ch)
924 || (sc.ch == '\'')
925 || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
926 sc.chPrev == 'p' || sc.chPrev == 'P')))) {
927 sc.SetState(SCE_C_DEFAULT|activitySet);
928 }
929 break;
930 case SCE_C_USERLITERAL:
931 if (!(setWord.Contains(sc.ch)))
932 sc.SetState(SCE_C_DEFAULT|activitySet);
933 break;
934 case SCE_C_IDENTIFIER:
935 if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
936 char s[1000];
937 if (caseSensitive) {
938 sc.GetCurrent(s, sizeof(s));
939 } else {
940 sc.GetCurrentLowered(s, sizeof(s));
941 }
942 if (keywords.InList(s)) {
943 lastWordWasUUID = strcmp(s, "uuid") == 0;
944 sc.ChangeState(SCE_C_WORD|activitySet);
945 } else if (keywords2.InList(s)) {
946 sc.ChangeState(SCE_C_WORD2|activitySet);
947 } else if (keywords4.InList(s)) {
948 sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
949 } else {
950 int subStyle = classifierIdentifiers.ValueFor(s);
951 if (subStyle >= 0) {
952 sc.ChangeState(subStyle|activitySet);
953 }
954 }
955 const bool literalString = sc.ch == '\"';
956 if (literalString || sc.ch == '\'') {
957 size_t lenS = strlen(s);
958 const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
959 if (raw)
960 s[lenS--] = '\0';
961 const bool valid =
962 (lenS == 0) ||
963 ((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
964 ((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
965 if (valid) {
966 if (literalString) {
967 if (raw) {
968 // Set the style of the string prefix to SCE_C_STRINGRAW but then change to
969 // SCE_C_DEFAULT as that allows the raw string start code to run.
970 sc.ChangeState(SCE_C_STRINGRAW|activitySet);
971 sc.SetState(SCE_C_DEFAULT|activitySet);
972 } else {
973 sc.ChangeState(SCE_C_STRING|activitySet);
974 }
975 } else {
976 sc.ChangeState(SCE_C_CHARACTER|activitySet);
977 }
978 } else {
979 sc.SetState(SCE_C_DEFAULT | activitySet);
980 }
981 } else {
982 sc.SetState(SCE_C_DEFAULT|activitySet);
983 }
984 }
985 break;
986 case SCE_C_PREPROCESSOR:
987 if (options.stylingWithinPreprocessor) {
988 if (IsASpace(sc.ch) || (sc.ch == '(')) {
989 sc.SetState(SCE_C_DEFAULT|activitySet);
990 }
991 } else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
992 isStringInPreprocessor = false;
993 } else if (!isStringInPreprocessor) {
994 if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
995 isStringInPreprocessor = true;
996 } else if (sc.Match('/', '*')) {
997 if (sc.Match("/**") || sc.Match("/*!")) {
998 sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
999 } else {
1000 sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
1001 }
1002 sc.Forward(); // Eat the *
1003 } else if (sc.Match('/', '/')) {
1004 sc.SetState(SCE_C_DEFAULT|activitySet);
1005 }
1006 }
1007 break;
1008 case SCE_C_PREPROCESSORCOMMENT:
1009 case SCE_C_PREPROCESSORCOMMENTDOC:
1010 if (sc.Match('*', '/')) {
1011 sc.Forward();
1012 sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
1013 continue; // Without advancing in case of '\'.
1014 }
1015 break;
1016 case SCE_C_COMMENT:
1017 if (sc.Match('*', '/')) {
1018 sc.Forward();
1019 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1020 } else {
1021 styleBeforeTaskMarker = SCE_C_COMMENT;
1022 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
1023 }
1024 break;
1025 case SCE_C_COMMENTDOC:
1026 if (sc.Match('*', '/')) {
1027 sc.Forward();
1028 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1029 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
1030 // Verify that we have the conditions to mark a comment-doc-keyword
1031 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
1032 styleBeforeDCKeyword = SCE_C_COMMENTDOC;
1033 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
1034 }
1035 }
1036 break;
1037 case SCE_C_COMMENTLINE:
1038 if (sc.atLineStart && !continuationLine) {
1039 sc.SetState(SCE_C_DEFAULT|activitySet);
1040 } else {
1041 styleBeforeTaskMarker = SCE_C_COMMENTLINE;
1042 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
1043 }
1044 break;
1045 case SCE_C_COMMENTLINEDOC:
1046 if (sc.atLineStart && !continuationLine) {
1047 sc.SetState(SCE_C_DEFAULT|activitySet);
1048 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
1049 // Verify that we have the conditions to mark a comment-doc-keyword
1050 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
1051 styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
1052 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
1053 }
1054 }
1055 break;
1056 case SCE_C_COMMENTDOCKEYWORD:
1057 if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
1058 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
1059 sc.Forward();
1060 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1061 seenDocKeyBrace = false;
1062 } else if (sc.ch == '[' || sc.ch == '{') {
1063 seenDocKeyBrace = true;
1064 } else if (!setDoxygen.Contains(sc.ch)
1065 && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
1066 char s[100];
1067 if (caseSensitive) {
1068 sc.GetCurrent(s, sizeof(s));
1069 } else {
1070 sc.GetCurrentLowered(s, sizeof(s));
1071 }
1072 if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
1073 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
1074 } else if (!keywords3.InList(s + 1)) {
1075 int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
1076 if (subStyleCDKW >= 0) {
1077 sc.ChangeState(subStyleCDKW|activitySet);
1078 } else {
1079 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
1080 }
1081 }
1082 sc.SetState(styleBeforeDCKeyword|activitySet);
1083 seenDocKeyBrace = false;
1084 }
1085 break;
1086 case SCE_C_STRING:
1087 if (sc.atLineEnd) {
1088 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1089 } else if (isIncludePreprocessor) {
1090 if (sc.ch == '>') {
1091 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1092 isIncludePreprocessor = false;
1093 }
1094 } else if (sc.ch == '\\') {
1095 if (options.escapeSequence) {
1096 sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
1097 escapeSeq.resetEscapeState(sc.chNext);
1098 }
1099 sc.Forward(); // Skip all characters after the backslash
1100 } else if (sc.ch == '\"') {
1101 if (sc.chNext == '_') {
1102 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1103 } else {
1104 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1105 }
1106 }
1107 break;
1108 case SCE_C_ESCAPESEQUENCE:
1109 escapeSeq.consumeDigit();
1110 if (!escapeSeq.atEscapeEnd(sc.ch)) {
1111 break;
1112 }
1113 if (sc.ch == '"') {
1114 sc.SetState(SCE_C_STRING|activitySet);
1115 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1116 } else if (sc.ch == '\\') {
1117 escapeSeq.resetEscapeState(sc.chNext);
1118 sc.Forward();
1119 } else {
1120 sc.SetState(SCE_C_STRING|activitySet);
1121 if (sc.atLineEnd) {
1122 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1123 }
1124 }
1125 break;
1126 case SCE_C_HASHQUOTEDSTRING:
1127 if (sc.ch == '\\') {
1128 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1129 sc.Forward();
1130 }
1131 } else if (sc.ch == '\"') {
1132 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1133 }
1134 break;
1135 case SCE_C_STRINGRAW:
1136 if (sc.Match(rawStringTerminator.c_str())) {
1137 for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
1138 sc.Forward();
1139 sc.SetState(SCE_C_DEFAULT|activitySet);
1140 rawStringTerminator = "";
1141 }
1142 break;
1143 case SCE_C_CHARACTER:
1144 if (sc.atLineEnd) {
1145 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1146 } else if (sc.ch == '\\') {
1147 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1148 sc.Forward();
1149 }
1150 } else if (sc.ch == '\'') {
1151 if (sc.chNext == '_') {
1152 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1153 } else {
1154 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1155 }
1156 }
1157 break;
1158 case SCE_C_REGEX:
1159 if (sc.atLineStart) {
1160 sc.SetState(SCE_C_DEFAULT|activitySet);
1161 } else if (!inRERange && sc.ch == '/') {
1162 sc.Forward();
1163 while (IsLowerCase(sc.ch))
1164 sc.Forward(); // gobble regex flags
1165 sc.SetState(SCE_C_DEFAULT|activitySet);
1166 } else if (sc.ch == '\\' && ((sc.currentPos+1) < lineEndNext)) {
1167 // Gobble up the escaped character
1168 sc.Forward();
1169 } else if (sc.ch == '[') {
1170 inRERange = true;
1171 } else if (sc.ch == ']') {
1172 inRERange = false;
1173 }
1174 break;
1175 case SCE_C_STRINGEOL:
1176 if (sc.atLineStart) {
1177 sc.SetState(SCE_C_DEFAULT|activitySet);
1178 }
1179 break;
1180 case SCE_C_VERBATIM:
1181 if (options.verbatimStringsAllowEscapes && (sc.ch == '\\')) {
1182 sc.Forward(); // Skip all characters after the backslash
1183 } else if (sc.ch == '\"') {
1184 if (sc.chNext == '\"') {
1185 sc.Forward();
1186 } else {
1187 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1188 }
1189 }
1190 break;
1191 case SCE_C_TRIPLEVERBATIM:
1192 if (sc.Match(R"(""")")) {
1193 while (sc.Match('"')) {
1194 sc.Forward();
1195 }
1196 sc.SetState(SCE_C_DEFAULT|activitySet);
1197 }
1198 break;
1199 case SCE_C_UUID:
1200 if (sc.atLineEnd || sc.ch == ')') {
1201 sc.SetState(SCE_C_DEFAULT|activitySet);
1202 }
1203 break;
1204 case SCE_C_TASKMARKER:
1205 if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1206 sc.SetState(styleBeforeTaskMarker|activitySet);
1207 styleBeforeTaskMarker = SCE_C_DEFAULT;
1208 }
1209 }
1210
1211 if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1212 // State exit processing consumed characters up to end of line.
1213 lineCurrent++;
1214 lineEndNext = styler.LineEnd(lineCurrent);
1215 vlls.Add(lineCurrent, preproc);
1216 }
1217
1218 // Determine if a new state should be entered.
1219 if (MaskActive(sc.state) == SCE_C_DEFAULT) {
1220 if (sc.Match('@', '\"')) {
1221 sc.SetState(SCE_C_VERBATIM|activitySet);
1222 sc.Forward();
1223 } else if (options.triplequotedStrings && sc.Match(R"(""")")) {
1224 sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
1225 sc.Forward(2);
1226 } else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1227 sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
1228 sc.Forward();
1229 } else if (options.backQuotedStrings && sc.Match('`')) {
1230 sc.SetState(SCE_C_STRINGRAW|activitySet);
1231 rawStringTerminator = "`";
1232 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1233 if (lastWordWasUUID) {
1234 sc.SetState(SCE_C_UUID|activitySet);
1235 lastWordWasUUID = false;
1236 } else {
1237 sc.SetState(SCE_C_NUMBER|activitySet);
1238 }
1239 } else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1240 if (lastWordWasUUID) {
1241 sc.SetState(SCE_C_UUID|activitySet);
1242 lastWordWasUUID = false;
1243 } else {
1244 sc.SetState(SCE_C_IDENTIFIER|activitySet);
1245 }
1246 } else if (sc.Match('/', '*')) {
1247 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
1248 sc.SetState(SCE_C_COMMENTDOC|activitySet);
1249 } else {
1250 sc.SetState(SCE_C_COMMENT|activitySet);
1251 }
1252 sc.Forward(); // Eat the * so it isn't used for the end of the comment
1253 } else if (sc.Match('/', '/')) {
1254 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1255 // Support of Qt/Doxygen doc. style
1256 sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
1257 else
1258 sc.SetState(SCE_C_COMMENTLINE|activitySet);
1259 } else if (sc.ch == '/'
1260 && (setOKBeforeRE.Contains(chPrevNonWhite)
1261 || followsReturnKeyword(sc, styler))
1262 && (!setCouldBePostOp.Contains(chPrevNonWhite)
1263 || !FollowsPostfixOperator(sc, styler))) {
1264 sc.SetState(SCE_C_REGEX|activitySet); // JavaScript's RegEx
1265 inRERange = false;
1266 } else if (sc.ch == '\"') {
1267 if (sc.chPrev == 'R') {
1268 styler.Flush();
1269 if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
1270 sc.SetState(SCE_C_STRINGRAW|activitySet);
1271 rawStringTerminator = ")";
1272 for (Sci_Position termPos = sc.currentPos + 1;; termPos++) {
1273 const char chTerminator = styler.SafeGetCharAt(termPos, '(');
1274 if (chTerminator == '(')
1275 break;
1276 rawStringTerminator += chTerminator;
1277 }
1278 rawStringTerminator += '\"';
1279 } else {
1280 sc.SetState(SCE_C_STRING|activitySet);
1281 }
1282 } else {
1283 sc.SetState(SCE_C_STRING|activitySet);
1284 }
1285 isIncludePreprocessor = false; // ensure that '>' won't end the string
1286 } else if (isIncludePreprocessor && sc.ch == '<') {
1287 sc.SetState(SCE_C_STRING|activitySet);
1288 } else if (sc.ch == '\'') {
1289 sc.SetState(SCE_C_CHARACTER|activitySet);
1290 } else if (sc.ch == '#' && visibleChars == 0) {
1291 // Preprocessor commands are alone on their line
1292 sc.SetState(SCE_C_PREPROCESSOR|activitySet);
1293 // Skip whitespace between # and preprocessor word
1294 do {
1295 sc.Forward();
1296 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1297 if (sc.atLineEnd) {
1298 sc.SetState(SCE_C_DEFAULT|activitySet);
1299 } else if (sc.Match("include")) {
1300 isIncludePreprocessor = true;
1301 } else {
1302 if (options.trackPreprocessor) {
1303 // If #if is nested too deeply (>31 levels) the active/inactive appearance
1304 // will stop reflecting the code.
1305 if (sc.Match("ifdef") || sc.Match("ifndef")) {
1306 const bool isIfDef = sc.Match("ifdef");
1307 const int startRest = isIfDef ? 5 : 6;
1308 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + startRest + 1, false);
1309 bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1310 preproc.StartSection(isIfDef == foundDef);
1311 } else if (sc.Match("if")) {
1312 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1313 const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1314 preproc.StartSection(ifGood);
1315 } else if (sc.Match("else")) {
1316 // #else is shown as active if either preceding or following section is active
1317 // as that means that it contributed to the result.
1318 if (!preproc.CurrentIfTaken()) {
1319 // Inactive, may become active if parent scope active
1320 assert(sc.state == (SCE_C_PREPROCESSOR|inactiveFlag));
1321 preproc.InvertCurrentLevel();
1322 activitySet = preproc.ActiveState();
1323 // If following is active then show "else" as active
1324 if (!activitySet)
1325 sc.ChangeState(SCE_C_PREPROCESSOR);
1326 } else if (preproc.IsActive()) {
1327 // Active -> inactive
1328 assert(sc.state == SCE_C_PREPROCESSOR);
1329 preproc.InvertCurrentLevel();
1330 activitySet = preproc.ActiveState();
1331 // Continue to show "else" as active as it ends active section.
1332 }
1333 } else if (sc.Match("elif")) {
1334 // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1335 // #elif is shown as active if either preceding or following section is active
1336 // as that means that it contributed to the result.
1337 if (!preproc.CurrentIfTaken()) {
1338 // Inactive, if expression true then may become active if parent scope active
1339 assert(sc.state == (SCE_C_PREPROCESSOR|inactiveFlag));
1340 // Similar to #if
1341 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 4, true);
1342 const bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1343 if (ifGood) {
1344 preproc.InvertCurrentLevel();
1345 activitySet = preproc.ActiveState();
1346 if (!activitySet)
1347 sc.ChangeState(SCE_C_PREPROCESSOR);
1348 }
1349 } else if (preproc.IsActive()) {
1350 // Active -> inactive
1351 assert(sc.state == SCE_C_PREPROCESSOR);
1352 preproc.InvertCurrentLevel();
1353 activitySet = preproc.ActiveState();
1354 // Continue to show "elif" as active as it ends active section.
1355 }
1356 } else if (sc.Match("endif")) {
1357 preproc.EndSection();
1358 activitySet = preproc.ActiveState();
1359 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1360 } else if (sc.Match("define")) {
1361 if (options.updatePreprocessor && preproc.IsActive()) {
1362 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1363 size_t startName = 0;
1364 while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1365 startName++;
1366 size_t endName = startName;
1367 while ((endName < restOfLine.length()) && setWord.Contains(restOfLine[endName]))
1368 endName++;
1369 std::string key = restOfLine.substr(startName, endName-startName);
1370 if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1371 // Macro
1372 size_t endArgs = endName;
1373 while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1374 endArgs++;
1375 std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1376 size_t startValue = endArgs+1;
1377 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1378 startValue++;
1379 std::string value;
1380 if (startValue < restOfLine.length())
1381 value = restOfLine.substr(startValue);
1382 preprocessorDefinitions[key] = SymbolValue(value, args);
1383 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1384 definitionsChanged = true;
1385 } else {
1386 // Value
1387 size_t startValue = endName;
1388 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1389 startValue++;
1390 std::string value = restOfLine.substr(startValue);
1391 if (OnlySpaceOrTab(value))
1392 value = "1"; // No value defaults to 1
1393 preprocessorDefinitions[key] = value;
1394 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1395 definitionsChanged = true;
1396 }
1397 }
1398 } else if (sc.Match("undef")) {
1399 if (options.updatePreprocessor && preproc.IsActive()) {
1400 const std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, false);
1401 std::vector<std::string> tokens = Tokenize(restOfLine);
1402 if (tokens.size() >= 1) {
1403 const std::string key = tokens[0];
1404 preprocessorDefinitions.erase(key);
1405 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1406 definitionsChanged = true;
1407 }
1408 }
1409 }
1410 }
1411 }
1412 } else if (isoperator(sc.ch)) {
1413 sc.SetState(SCE_C_OPERATOR|activitySet);
1414 }
1415 }
1416
1417 if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1418 chPrevNonWhite = sc.ch;
1419 visibleChars++;
1420 }
1421 continuationLine = false;
1422 sc.Forward();
1423 }
1424 const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1425 if (definitionsChanged || rawStringsChanged)
1426 styler.ChangeLexerState(startPos, startPos + length);
1427 sc.Complete();
1428 }
1429
1430 // Store both the current line's fold level and the next lines in the
1431 // level store to make it easy to pick up with each increment
1432 // and to make it possible to fiddle the current level for "} else {".
1433
Fold(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)1434 void SCI_METHOD LexerCPP::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
1435
1436 if (!options.fold)
1437 return;
1438
1439 LexAccessor styler(pAccess);
1440
1441 const Sci_PositionU endPos = startPos + length;
1442 int visibleChars = 0;
1443 bool inLineComment = false;
1444 Sci_Position lineCurrent = styler.GetLine(startPos);
1445 int levelCurrent = SC_FOLDLEVELBASE;
1446 if (lineCurrent > 0)
1447 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1448 Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
1449 int levelMinCurrent = levelCurrent;
1450 int levelNext = levelCurrent;
1451 char chNext = styler[startPos];
1452 int styleNext = MaskActive(styler.StyleAt(startPos));
1453 int style = MaskActive(initStyle);
1454 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1455 for (Sci_PositionU i = startPos; i < endPos; i++) {
1456 const char ch = chNext;
1457 chNext = styler.SafeGetCharAt(i + 1);
1458 const int stylePrev = style;
1459 style = styleNext;
1460 styleNext = MaskActive(styler.StyleAt(i + 1));
1461 const bool atEOL = i == (lineStartNext-1);
1462 if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
1463 inLineComment = true;
1464 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1465 if (!IsStreamCommentStyle(stylePrev)) {
1466 levelNext++;
1467 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1468 // Comments don't end at end of line and the next character may be unstyled.
1469 levelNext--;
1470 }
1471 }
1472 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
1473 if (userDefinedFoldMarkers) {
1474 if (styler.Match(i, options.foldExplicitStart.c_str())) {
1475 levelNext++;
1476 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1477 levelNext--;
1478 }
1479 } else {
1480 if ((ch == '/') && (chNext == '/')) {
1481 const char chNext2 = styler.SafeGetCharAt(i + 2);
1482 if (chNext2 == '{') {
1483 levelNext++;
1484 } else if (chNext2 == '}') {
1485 levelNext--;
1486 }
1487 }
1488 }
1489 }
1490 if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1491 if (ch == '#') {
1492 Sci_PositionU j = i + 1;
1493 while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1494 j++;
1495 }
1496 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1497 levelNext++;
1498 } else if (styler.Match(j, "end")) {
1499 levelNext--;
1500 }
1501
1502 if (options.foldPreprocessorAtElse && (styler.Match(j, "else") || styler.Match(j, "elif"))) {
1503 levelMinCurrent--;
1504 }
1505 }
1506 }
1507 if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1508 if (ch == '{' || ch == '[' || ch == '(') {
1509 // Measure the minimum before a '{' to allow
1510 // folding on "} else {"
1511 if (options.foldAtElse && levelMinCurrent > levelNext) {
1512 levelMinCurrent = levelNext;
1513 }
1514 levelNext++;
1515 } else if (ch == '}' || ch == ']' || ch == ')') {
1516 levelNext--;
1517 }
1518 }
1519 if (!IsASpace(ch))
1520 visibleChars++;
1521 if (atEOL || (i == endPos-1)) {
1522 int levelUse = levelCurrent;
1523 if ((options.foldSyntaxBased && options.foldAtElse) ||
1524 (options.foldPreprocessor && options.foldPreprocessorAtElse)
1525 ) {
1526 levelUse = levelMinCurrent;
1527 }
1528 int lev = levelUse | levelNext << 16;
1529 if (visibleChars == 0 && options.foldCompact)
1530 lev |= SC_FOLDLEVELWHITEFLAG;
1531 if (levelUse < levelNext)
1532 lev |= SC_FOLDLEVELHEADERFLAG;
1533 if (lev != styler.LevelAt(lineCurrent)) {
1534 styler.SetLevel(lineCurrent, lev);
1535 }
1536 lineCurrent++;
1537 lineStartNext = styler.LineStart(lineCurrent+1);
1538 levelCurrent = levelNext;
1539 levelMinCurrent = levelCurrent;
1540 if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
1541 // There is an empty line at end of file so give it same level and empty
1542 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1543 }
1544 visibleChars = 0;
1545 inLineComment = false;
1546 }
1547 }
1548 }
1549
EvaluateTokens(std::vector<std::string> & tokens,const SymbolTable & preprocessorDefinitions)1550 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1551
1552 // Remove whitespace tokens
1553 tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1554
1555 // Evaluate defined statements to either 0 or 1
1556 for (size_t i=0; (i+1)<tokens.size();) {
1557 if (tokens[i] == "defined") {
1558 const char *val = "0";
1559 if (tokens[i+1] == "(") {
1560 if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1561 // defined()
1562 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1563 } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1564 // defined(<identifier>)
1565 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1566 if (it != preprocessorDefinitions.end()) {
1567 val = "1";
1568 }
1569 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1570 } else {
1571 // Spurious '(' so erase as more likely to result in false
1572 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1573 }
1574 } else {
1575 // defined <identifier>
1576 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1577 if (it != preprocessorDefinitions.end()) {
1578 val = "1";
1579 }
1580 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1581 }
1582 tokens[i] = val;
1583 } else {
1584 i++;
1585 }
1586 }
1587
1588 // Evaluate identifiers
1589 constexpr size_t maxIterations = 100;
1590 size_t iterations = 0; // Limit number of iterations in case there is a recursive macro.
1591 for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1592 iterations++;
1593 if (setWordStart.Contains(tokens[i][0])) {
1594 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1595 if (it != preprocessorDefinitions.end()) {
1596 // Tokenize value
1597 std::vector<std::string> macroTokens = Tokenize(it->second.value);
1598 if (it->second.IsMacro()) {
1599 if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1600 // Create map of argument name to value
1601 std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1602 std::map<std::string, std::string> arguments;
1603 size_t arg = 0;
1604 size_t tok = i+2;
1605 while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1606 if (tokens.at(tok) != ",") {
1607 arguments[argumentNames.at(arg)] = tokens.at(tok);
1608 arg++;
1609 }
1610 tok++;
1611 }
1612
1613 // Remove invocation
1614 tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1615
1616 // Substitute values into macro
1617 macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1618
1619 for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1620 if (setWordStart.Contains(macroTokens[iMacro][0])) {
1621 std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1622 if (itFind != arguments.end()) {
1623 // TODO: Possible that value will be expression so should insert tokenized form
1624 macroTokens[iMacro] = itFind->second;
1625 }
1626 }
1627 iMacro++;
1628 }
1629
1630 // Insert results back into tokens
1631 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1632
1633 } else {
1634 i++;
1635 }
1636 } else {
1637 // Remove invocation
1638 tokens.erase(tokens.begin() + i);
1639 // Insert results back into tokens
1640 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1641 }
1642 } else {
1643 // Identifier not found and value defaults to zero
1644 tokens[i] = "0";
1645 }
1646 } else {
1647 i++;
1648 }
1649 }
1650
1651 // Find bracketed subexpressions and recurse on them
1652 BracketPair bracketPair = FindBracketPair(tokens);
1653 while (bracketPair.itBracket != tokens.end()) {
1654 std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1655 EvaluateTokens(inBracket, preprocessorDefinitions);
1656
1657 // The insertion is done before the removal because there were failures with the opposite approach
1658 tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1659
1660 bracketPair = FindBracketPair(tokens);
1661 tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1662
1663 bracketPair = FindBracketPair(tokens);
1664 }
1665
1666 // Evaluate logical negations
1667 for (size_t j=0; (j+1)<tokens.size();) {
1668 if (setNegationOp.Contains(tokens[j][0])) {
1669 int isTrue = atoi(tokens[j+1].c_str());
1670 if (tokens[j] == "!")
1671 isTrue = !isTrue;
1672 std::vector<std::string>::iterator itInsert =
1673 tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1674 tokens.insert(itInsert, isTrue ? "1" : "0");
1675 } else {
1676 j++;
1677 }
1678 }
1679
1680 // Evaluate expressions in precedence order
1681 enum precedence { precMult, precAdd, precRelative
1682 , precLogical, /* end marker */ precLast };
1683 for (int prec = precMult; prec < precLast; prec++) {
1684 // Looking at 3 tokens at a time so end at 2 before end
1685 for (size_t k=0; (k+2)<tokens.size();) {
1686 const char chOp = tokens[k+1][0];
1687 if (
1688 ((prec==precMult) && setMultOp.Contains(chOp)) ||
1689 ((prec==precAdd) && setAddOp.Contains(chOp)) ||
1690 ((prec==precRelative) && setRelOp.Contains(chOp)) ||
1691 ((prec==precLogical) && setLogicalOp.Contains(chOp))
1692 ) {
1693 const int valA = atoi(tokens[k].c_str());
1694 const int valB = atoi(tokens[k+2].c_str());
1695 int result = 0;
1696 if (tokens[k+1] == "+")
1697 result = valA + valB;
1698 else if (tokens[k+1] == "-")
1699 result = valA - valB;
1700 else if (tokens[k+1] == "*")
1701 result = valA * valB;
1702 else if (tokens[k+1] == "/")
1703 result = valA / (valB ? valB : 1);
1704 else if (tokens[k+1] == "%")
1705 result = valA % (valB ? valB : 1);
1706 else if (tokens[k+1] == "<")
1707 result = valA < valB;
1708 else if (tokens[k+1] == "<=")
1709 result = valA <= valB;
1710 else if (tokens[k+1] == ">")
1711 result = valA > valB;
1712 else if (tokens[k+1] == ">=")
1713 result = valA >= valB;
1714 else if (tokens[k+1] == "==")
1715 result = valA == valB;
1716 else if (tokens[k+1] == "!=")
1717 result = valA != valB;
1718 else if (tokens[k+1] == "||")
1719 result = valA || valB;
1720 else if (tokens[k+1] == "&&")
1721 result = valA && valB;
1722 std::vector<std::string>::iterator itInsert =
1723 tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1724 tokens.insert(itInsert, std::to_string(result));
1725 } else {
1726 k++;
1727 }
1728 }
1729 }
1730 }
1731
Tokenize(const std::string & expr) const1732 std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
1733 // Break into tokens
1734 std::vector<std::string> tokens;
1735 const char *cp = expr.c_str();
1736 while (*cp) {
1737 std::string word;
1738 if (setWord.Contains(*cp)) {
1739 // Identifiers and numbers
1740 while (setWord.Contains(*cp)) {
1741 word += *cp;
1742 cp++;
1743 }
1744 } else if (IsSpaceOrTab(*cp)) {
1745 while (IsSpaceOrTab(*cp)) {
1746 word += *cp;
1747 cp++;
1748 }
1749 } else if (setRelOp.Contains(*cp)) {
1750 word += *cp;
1751 cp++;
1752 if (setRelOp.Contains(*cp)) {
1753 word += *cp;
1754 cp++;
1755 }
1756 } else if (setLogicalOp.Contains(*cp)) {
1757 word += *cp;
1758 cp++;
1759 if (setLogicalOp.Contains(*cp)) {
1760 word += *cp;
1761 cp++;
1762 }
1763 } else {
1764 // Should handle strings, characters, and comments here
1765 word += *cp;
1766 cp++;
1767 }
1768 tokens.push_back(word);
1769 }
1770 return tokens;
1771 }
1772
EvaluateExpression(const std::string & expr,const SymbolTable & preprocessorDefinitions)1773 bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1774 std::vector<std::string> tokens = Tokenize(expr);
1775
1776 EvaluateTokens(tokens, preprocessorDefinitions);
1777
1778 // "0" or "" -> false else true
1779 const bool isFalse = tokens.empty() ||
1780 ((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1781 return !isFalse;
1782 }
1783
1784 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1785 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);
1786