1 // Scintilla source code edit control
2 /** @file LexCPP.cxx
3 ** Lexer for C++, C, Java, and JavaScript.
4 ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
5 **/
6 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 #include <ctype.h>
15
16 #include <string>
17 #include <vector>
18 #include <map>
19 #include <algorithm>
20
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24
25 #include "WordList.h"
26 #include "LexAccessor.h"
27 #include "Accessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SparseState.h"
33 #include "SubStyles.h"
34
35 #ifdef SCI_NAMESPACE
36 using namespace Scintilla;
37 #endif
38
39 namespace {
40 // Use an unnamed namespace to protect the functions and classes from name conflicts
41
IsSpaceEquiv(int state)42 bool IsSpaceEquiv(int state) {
43 return (state <= SCE_C_COMMENTDOC) ||
44 // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
45 (state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
46 (state == SCE_C_COMMENTDOCKEYWORDERROR);
47 }
48
49 // Preconditions: sc.currentPos points to a character after '+' or '-'.
50 // The test for pos reaching 0 should be redundant,
51 // and is in only for safety measures.
52 // Limitation: this code will give the incorrect answer for code like
53 // a = b+++/ptn/...
54 // Putting a space between the '++' post-inc operator and the '+' binary op
55 // fixes this, and is highly recommended for readability anyway.
FollowsPostfixOperator(StyleContext & sc,LexAccessor & styler)56 bool FollowsPostfixOperator(StyleContext &sc, LexAccessor &styler) {
57 int pos = (int) sc.currentPos;
58 while (--pos > 0) {
59 char ch = styler[pos];
60 if (ch == '+' || ch == '-') {
61 return styler[pos - 1] == ch;
62 }
63 }
64 return false;
65 }
66
followsReturnKeyword(StyleContext & sc,LexAccessor & styler)67 bool followsReturnKeyword(StyleContext &sc, LexAccessor &styler) {
68 // Don't look at styles, so no need to flush.
69 int pos = (int) sc.currentPos;
70 int currentLine = styler.GetLine(pos);
71 int lineStartPos = styler.LineStart(currentLine);
72 while (--pos > lineStartPos) {
73 char ch = styler.SafeGetCharAt(pos);
74 if (ch != ' ' && ch != '\t') {
75 break;
76 }
77 }
78 const char *retBack = "nruter";
79 const char *s = retBack;
80 while (*s
81 && pos >= lineStartPos
82 && styler.SafeGetCharAt(pos) == *s) {
83 s++;
84 pos--;
85 }
86 return !*s;
87 }
88
IsSpaceOrTab(int ch)89 bool IsSpaceOrTab(int ch) {
90 return ch == ' ' || ch == '\t';
91 }
92
OnlySpaceOrTab(const std::string & s)93 bool OnlySpaceOrTab(const std::string &s) {
94 for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
95 if (!IsSpaceOrTab(*it))
96 return false;
97 }
98 return true;
99 }
100
StringSplit(const std::string & text,int separator)101 std::vector<std::string> StringSplit(const std::string &text, int separator) {
102 std::vector<std::string> vs(text.empty() ? 0 : 1);
103 for (std::string::const_iterator it = text.begin(); it != text.end(); ++it) {
104 if (*it == separator) {
105 vs.push_back(std::string());
106 } else {
107 vs.back() += *it;
108 }
109 }
110 return vs;
111 }
112
113 struct BracketPair {
114 std::vector<std::string>::iterator itBracket;
115 std::vector<std::string>::iterator itEndBracket;
116 };
117
FindBracketPair(std::vector<std::string> & tokens)118 BracketPair FindBracketPair(std::vector<std::string> &tokens) {
119 BracketPair bp;
120 std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
121 bp.itBracket = tokens.end();
122 bp.itEndBracket = tokens.end();
123 if (itTok != tokens.end()) {
124 bp.itBracket = itTok;
125 size_t nest = 0;
126 while (itTok != tokens.end()) {
127 if (*itTok == "(") {
128 nest++;
129 } else if (*itTok == ")") {
130 nest--;
131 if (nest == 0) {
132 bp.itEndBracket = itTok;
133 return bp;
134 }
135 }
136 ++itTok;
137 }
138 }
139 bp.itBracket = tokens.end();
140 return bp;
141 }
142
highlightTaskMarker(StyleContext & sc,LexAccessor & styler,int activity,WordList & markerList,bool caseSensitive)143 void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
144 int activity, WordList &markerList, bool caseSensitive){
145 if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
146 const int lengthMarker = 50;
147 char marker[lengthMarker+1];
148 int currPos = (int) sc.currentPos;
149 int i = 0;
150 while (i < lengthMarker) {
151 char ch = styler.SafeGetCharAt(currPos + i);
152 if (IsASpace(ch) || isoperator(ch)) {
153 break;
154 }
155 if (caseSensitive)
156 marker[i] = ch;
157 else
158 marker[i] = static_cast<char>(tolower(ch));
159 i++;
160 }
161 marker[i] = '\0';
162 if (markerList.InList(marker)) {
163 sc.SetState(SCE_C_TASKMARKER|activity);
164 }
165 }
166 }
167
168 struct EscapeSequence {
169 int digitsLeft;
170 CharacterSet setHexDigits;
171 CharacterSet setOctDigits;
172 CharacterSet setNoneNumeric;
173 CharacterSet *escapeSetValid;
EscapeSequence__anon5b491b7d0111::EscapeSequence174 EscapeSequence() {
175 digitsLeft = 0;
176 escapeSetValid = 0;
177 setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
178 setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
179 }
resetEscapeState__anon5b491b7d0111::EscapeSequence180 void resetEscapeState(int nextChar) {
181 digitsLeft = 0;
182 escapeSetValid = &setNoneNumeric;
183 if (nextChar == 'U') {
184 digitsLeft = 9;
185 escapeSetValid = &setHexDigits;
186 } else if (nextChar == 'u') {
187 digitsLeft = 5;
188 escapeSetValid = &setHexDigits;
189 } else if (nextChar == 'x') {
190 digitsLeft = 5;
191 escapeSetValid = &setHexDigits;
192 } else if (setOctDigits.Contains(nextChar)) {
193 digitsLeft = 3;
194 escapeSetValid = &setOctDigits;
195 }
196 }
atEscapeEnd__anon5b491b7d0111::EscapeSequence197 bool atEscapeEnd(int currChar) const {
198 return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
199 }
200 };
201
GetRestOfLine(LexAccessor & styler,int start,bool allowSpace)202 std::string GetRestOfLine(LexAccessor &styler, int start, bool allowSpace) {
203 std::string restOfLine;
204 int i =0;
205 char ch = styler.SafeGetCharAt(start, '\n');
206 int endLine = styler.LineEnd(styler.GetLine(start));
207 while (((start+i) < endLine) && (ch != '\r')) {
208 char chNext = styler.SafeGetCharAt(start + i + 1, '\n');
209 if (ch == '/' && (chNext == '/' || chNext == '*'))
210 break;
211 if (allowSpace || (ch != ' '))
212 restOfLine += ch;
213 i++;
214 ch = chNext;
215 }
216 return restOfLine;
217 }
218
IsStreamCommentStyle(int style)219 bool IsStreamCommentStyle(int style) {
220 return style == SCE_C_COMMENT ||
221 style == SCE_C_COMMENTDOC ||
222 style == SCE_C_COMMENTDOCKEYWORD ||
223 style == SCE_C_COMMENTDOCKEYWORDERROR;
224 }
225
226 struct PPDefinition {
227 int line;
228 std::string key;
229 std::string value;
230 bool isUndef;
231 std::string arguments;
PPDefinition__anon5b491b7d0111::PPDefinition232 PPDefinition(int line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, std::string arguments_="") :
233 line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
234 }
235 };
236
237 class LinePPState {
238 int state;
239 int ifTaken;
240 int level;
ValidLevel() const241 bool ValidLevel() const {
242 return level >= 0 && level < 32;
243 }
maskLevel() const244 int maskLevel() const {
245 return 1 << level;
246 }
247 public:
LinePPState()248 LinePPState() : state(0), ifTaken(0), level(-1) {
249 }
IsInactive() const250 bool IsInactive() const {
251 return state != 0;
252 }
CurrentIfTaken() const253 bool CurrentIfTaken() const {
254 return (ifTaken & maskLevel()) != 0;
255 }
StartSection(bool on)256 void StartSection(bool on) {
257 level++;
258 if (ValidLevel()) {
259 if (on) {
260 state &= ~maskLevel();
261 ifTaken |= maskLevel();
262 } else {
263 state |= maskLevel();
264 ifTaken &= ~maskLevel();
265 }
266 }
267 }
EndSection()268 void EndSection() {
269 if (ValidLevel()) {
270 state &= ~maskLevel();
271 ifTaken &= ~maskLevel();
272 }
273 level--;
274 }
InvertCurrentLevel()275 void InvertCurrentLevel() {
276 if (ValidLevel()) {
277 state ^= maskLevel();
278 ifTaken |= maskLevel();
279 }
280 }
281 };
282
283 // Hold the preprocessor state for each line seen.
284 // Currently one entry per line but could become sparse with just one entry per preprocessor line.
285 class PPStates {
286 std::vector<LinePPState> vlls;
287 public:
ForLine(int line) const288 LinePPState ForLine(int line) const {
289 if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
290 return vlls[line];
291 } else {
292 return LinePPState();
293 }
294 }
Add(int line,LinePPState lls)295 void Add(int line, LinePPState lls) {
296 vlls.resize(line+1);
297 vlls[line] = lls;
298 }
299 };
300
301 // An individual named option for use in an OptionSet
302
303 // Options used for LexerCPP
304 struct OptionsCPP {
305 bool stylingWithinPreprocessor;
306 bool identifiersAllowDollars;
307 bool trackPreprocessor;
308 bool updatePreprocessor;
309 bool triplequotedStrings;
310 bool hashquotedStrings;
311 bool backQuotedStrings;
312 bool escapeSequence;
313 bool fold;
314 bool foldSyntaxBased;
315 bool foldComment;
316 bool foldCommentMultiline;
317 bool foldCommentExplicit;
318 std::string foldExplicitStart;
319 std::string foldExplicitEnd;
320 bool foldExplicitAnywhere;
321 bool foldPreprocessor;
322 bool foldCompact;
323 bool foldAtElse;
OptionsCPP__anon5b491b7d0111::OptionsCPP324 OptionsCPP() {
325 stylingWithinPreprocessor = false;
326 identifiersAllowDollars = true;
327 trackPreprocessor = true;
328 updatePreprocessor = true;
329 triplequotedStrings = false;
330 hashquotedStrings = false;
331 backQuotedStrings = false;
332 escapeSequence = false;
333 fold = false;
334 foldSyntaxBased = true;
335 foldComment = false;
336 foldCommentMultiline = true;
337 foldCommentExplicit = true;
338 foldExplicitStart = "";
339 foldExplicitEnd = "";
340 foldExplicitAnywhere = false;
341 foldPreprocessor = false;
342 foldCompact = false;
343 foldAtElse = false;
344 }
345 };
346
347 const char *const cppWordLists[] = {
348 "Primary keywords and identifiers",
349 "Secondary keywords and identifiers",
350 "Documentation comment keywords",
351 "Global classes and typedefs",
352 "Preprocessor definitions",
353 "Task marker and error marker keywords",
354 0,
355 };
356
357 struct OptionSetCPP : public OptionSet<OptionsCPP> {
OptionSetCPP__anon5b491b7d0111::OptionSetCPP358 OptionSetCPP() {
359 DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
360 "For C++ code, determines whether all preprocessor code is styled in the "
361 "preprocessor style (0, the default) or only from the initial # to the end "
362 "of the command word(1).");
363
364 DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
365 "Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
366
367 DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
368 "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
369
370 DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
371 "Set to 1 to update preprocessor definitions when #define found.");
372
373 DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
374 "Set to 1 to enable highlighting of triple-quoted strings.");
375
376 DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
377 "Set to 1 to enable highlighting of hash-quoted strings.");
378
379 DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
380 "Set to 1 to enable highlighting of back-quoted raw strings .");
381
382 DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
383 "Set to 1 to enable highlighting of escape sequences in strings");
384
385 DefineProperty("fold", &OptionsCPP::fold);
386
387 DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
388 "Set this property to 0 to disable syntax based folding.");
389
390 DefineProperty("fold.comment", &OptionsCPP::foldComment,
391 "This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
392 "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
393 "at the end of a section that should fold.");
394
395 DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
396 "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
397
398 DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
399 "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
400
401 DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
402 "The string to use for explicit fold start points, replacing the standard //{.");
403
404 DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
405 "The string to use for explicit fold end points, replacing the standard //}.");
406
407 DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
408 "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
409
410 DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
411 "This option enables folding preprocessor directives when using the C++ lexer. "
412 "Includes C#'s explicit #region and #endregion folding directives.");
413
414 DefineProperty("fold.compact", &OptionsCPP::foldCompact);
415
416 DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
417 "This option enables C++ folding on a \"} else {\" line of an if statement.");
418
419 DefineWordListSets(cppWordLists);
420 }
421 };
422
423 const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
424
425 }
426
427 class LexerCPP : public ILexerWithSubStyles {
428 bool caseSensitive;
429 CharacterSet setWord;
430 CharacterSet setNegationOp;
431 CharacterSet setArithmethicOp;
432 CharacterSet setRelOp;
433 CharacterSet setLogicalOp;
434 CharacterSet setWordStart;
435 PPStates vlls;
436 std::vector<PPDefinition> ppDefineHistory;
437 WordList keywords;
438 WordList keywords2;
439 WordList keywords3;
440 WordList keywords4;
441 WordList ppDefinitions;
442 WordList markerList;
443 struct SymbolValue {
444 std::string value;
445 std::string arguments;
SymbolValueLexerCPP::SymbolValue446 SymbolValue(const std::string &value_="", const std::string &arguments_="") : value(value_), arguments(arguments_) {
447 }
operator =LexerCPP::SymbolValue448 SymbolValue &operator = (const std::string &value_) {
449 value = value_;
450 arguments.clear();
451 return *this;
452 }
IsMacroLexerCPP::SymbolValue453 bool IsMacro() const {
454 return !arguments.empty();
455 }
456 };
457 typedef std::map<std::string, SymbolValue> SymbolTable;
458 SymbolTable preprocessorDefinitionsStart;
459 OptionsCPP options;
460 OptionSetCPP osCPP;
461 EscapeSequence escapeSeq;
462 SparseState<std::string> rawStringTerminators;
463 enum { activeFlag = 0x40 };
464 enum { ssIdentifier, ssDocKeyword };
465 SubStyles subStyles;
466 public:
LexerCPP(bool caseSensitive_)467 explicit LexerCPP(bool caseSensitive_) :
468 caseSensitive(caseSensitive_),
469 setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
470 setNegationOp(CharacterSet::setNone, "!"),
471 setArithmethicOp(CharacterSet::setNone, "+-/*%"),
472 setRelOp(CharacterSet::setNone, "=!<>"),
473 setLogicalOp(CharacterSet::setNone, "|&"),
474 subStyles(styleSubable, 0x80, 0x40, activeFlag) {
475 }
~LexerCPP()476 virtual ~LexerCPP() {
477 }
Release()478 void SCI_METHOD Release() {
479 delete this;
480 }
Version() const481 int SCI_METHOD Version() const {
482 return lvSubStyles;
483 }
PropertyNames()484 const char * SCI_METHOD PropertyNames() {
485 return osCPP.PropertyNames();
486 }
PropertyType(const char * name)487 int SCI_METHOD PropertyType(const char *name) {
488 return osCPP.PropertyType(name);
489 }
DescribeProperty(const char * name)490 const char * SCI_METHOD DescribeProperty(const char *name) {
491 return osCPP.DescribeProperty(name);
492 }
493 int SCI_METHOD PropertySet(const char *key, const char *val);
DescribeWordListSets()494 const char * SCI_METHOD DescribeWordListSets() {
495 return osCPP.DescribeWordListSets();
496 }
497 int SCI_METHOD WordListSet(int n, const char *wl);
498 void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
499 void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
500
PrivateCall(int,void *)501 void * SCI_METHOD PrivateCall(int, void *) {
502 return 0;
503 }
504
LineEndTypesSupported()505 int SCI_METHOD LineEndTypesSupported() {
506 return SC_LINE_END_TYPE_UNICODE;
507 }
508
AllocateSubStyles(int styleBase,int numberStyles)509 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) {
510 return subStyles.Allocate(styleBase, numberStyles);
511 }
SubStylesStart(int styleBase)512 int SCI_METHOD SubStylesStart(int styleBase) {
513 return subStyles.Start(styleBase);
514 }
SubStylesLength(int styleBase)515 int SCI_METHOD SubStylesLength(int styleBase) {
516 return subStyles.Length(styleBase);
517 }
StyleFromSubStyle(int subStyle)518 int SCI_METHOD StyleFromSubStyle(int subStyle) {
519 int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
520 int active = subStyle & activeFlag;
521 return styleBase | active;
522 }
PrimaryStyleFromStyle(int style)523 int SCI_METHOD PrimaryStyleFromStyle(int style) {
524 return MaskActive(style);
525 }
FreeSubStyles()526 void SCI_METHOD FreeSubStyles() {
527 subStyles.Free();
528 }
SetIdentifiers(int style,const char * identifiers)529 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) {
530 subStyles.SetIdentifiers(style, identifiers);
531 }
DistanceToSecondaryStyles()532 int SCI_METHOD DistanceToSecondaryStyles() {
533 return activeFlag;
534 }
GetSubStyleBases()535 const char * SCI_METHOD GetSubStyleBases() {
536 return styleSubable;
537 }
538
LexerFactoryCPP()539 static ILexer *LexerFactoryCPP() {
540 return new LexerCPP(true);
541 }
LexerFactoryCPPInsensitive()542 static ILexer *LexerFactoryCPPInsensitive() {
543 return new LexerCPP(false);
544 }
MaskActive(int style)545 static int MaskActive(int style) {
546 return style & ~activeFlag;
547 }
548 void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
549 std::vector<std::string> Tokenize(const std::string &expr) const;
550 bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
551 };
552
PropertySet(const char * key,const char * val)553 int SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
554 if (osCPP.PropertySet(&options, key, val)) {
555 if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
556 setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
557 if (options.identifiersAllowDollars) {
558 setWord.Add('$');
559 }
560 }
561 return 0;
562 }
563 return -1;
564 }
565
WordListSet(int n,const char * wl)566 int SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
567 WordList *wordListN = 0;
568 switch (n) {
569 case 0:
570 wordListN = &keywords;
571 break;
572 case 1:
573 wordListN = &keywords2;
574 break;
575 case 2:
576 wordListN = &keywords3;
577 break;
578 case 3:
579 wordListN = &keywords4;
580 break;
581 case 4:
582 wordListN = &ppDefinitions;
583 break;
584 case 5:
585 wordListN = &markerList;
586 break;
587 }
588 int firstModification = -1;
589 if (wordListN) {
590 WordList wlNew;
591 wlNew.Set(wl);
592 if (*wordListN != wlNew) {
593 wordListN->Set(wl);
594 firstModification = 0;
595 if (n == 4) {
596 // Rebuild preprocessorDefinitions
597 preprocessorDefinitionsStart.clear();
598 for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
599 const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
600 const char *cpEquals = strchr(cpDefinition, '=');
601 if (cpEquals) {
602 std::string name(cpDefinition, cpEquals - cpDefinition);
603 std::string val(cpEquals+1);
604 size_t bracket = name.find('(');
605 size_t bracketEnd = name.find(')');
606 if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
607 // Macro
608 std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
609 name = name.substr(0, bracket);
610 preprocessorDefinitionsStart[name] = SymbolValue(val, args);
611 } else {
612 preprocessorDefinitionsStart[name] = val;
613 }
614 } else {
615 std::string name(cpDefinition);
616 std::string val("1");
617 preprocessorDefinitionsStart[name] = val;
618 }
619 }
620 }
621 }
622 }
623 return firstModification;
624 }
625
626 // Functor used to truncate history
627 struct After {
628 int line;
AfterAfter629 explicit After(int line_) : line(line_) {}
operator ()After630 bool operator()(PPDefinition &p) const {
631 return p.line > line;
632 }
633 };
634
Lex(unsigned int startPos,int length,int initStyle,IDocument * pAccess)635 void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
636 LexAccessor styler(pAccess);
637
638 CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
639 CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
640
641 CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
642
643 setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
644
645 CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
646
647 if (options.identifiersAllowDollars) {
648 setWordStart.Add('$');
649 }
650
651 int chPrevNonWhite = ' ';
652 int visibleChars = 0;
653 bool lastWordWasUUID = false;
654 int styleBeforeDCKeyword = SCE_C_DEFAULT;
655 int styleBeforeTaskMarker = SCE_C_DEFAULT;
656 bool continuationLine = false;
657 bool isIncludePreprocessor = false;
658 bool isStringInPreprocessor = false;
659 bool inRERange = false;
660 bool seenDocKeyBrace = false;
661
662 int lineCurrent = styler.GetLine(startPos);
663 if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
664 (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
665 (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
666 // Set continuationLine if last character of previous line is '\'
667 if (lineCurrent > 0) {
668 int endLinePrevious = styler.LineEnd(lineCurrent - 1);
669 if (endLinePrevious > 0) {
670 continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
671 }
672 }
673 }
674
675 // look back to set chPrevNonWhite properly for better regex colouring
676 if (startPos > 0) {
677 int back = startPos;
678 while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
679 ;
680 if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
681 chPrevNonWhite = styler.SafeGetCharAt(back);
682 }
683 }
684
685 StyleContext sc(startPos, length, initStyle, styler, static_cast<unsigned char>(0xff));
686 LinePPState preproc = vlls.ForLine(lineCurrent);
687
688 bool definitionsChanged = false;
689
690 // Truncate ppDefineHistory before current line
691
692 if (!options.updatePreprocessor)
693 ppDefineHistory.clear();
694
695 std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(), After(lineCurrent-1));
696 if (itInvalid != ppDefineHistory.end()) {
697 ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
698 definitionsChanged = true;
699 }
700
701 SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
702 for (std::vector<PPDefinition>::iterator itDef = ppDefineHistory.begin(); itDef != ppDefineHistory.end(); ++itDef) {
703 if (itDef->isUndef)
704 preprocessorDefinitions.erase(itDef->key);
705 else
706 preprocessorDefinitions[itDef->key] = SymbolValue(itDef->value, itDef->arguments);
707 }
708
709 std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
710 SparseState<std::string> rawSTNew(lineCurrent);
711
712 int activitySet = preproc.IsInactive() ? activeFlag : 0;
713
714 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
715 const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
716
717 int lineEndNext = styler.LineEnd(lineCurrent);
718
719 for (; sc.More();) {
720
721 if (sc.atLineStart) {
722 // Using MaskActive() is not needed in the following statement.
723 // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
724 if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
725 // Prevent SCE_C_STRINGEOL from leaking back to previous line which
726 // ends with a line continuation by locking in the state up to this position.
727 sc.SetState(sc.state);
728 }
729 if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
730 sc.SetState(SCE_C_DEFAULT|activitySet);
731 }
732 // Reset states to beginning of colourise so no surprises
733 // if different sets of lines lexed.
734 visibleChars = 0;
735 lastWordWasUUID = false;
736 isIncludePreprocessor = false;
737 inRERange = false;
738 if (preproc.IsInactive()) {
739 activitySet = activeFlag;
740 sc.SetState(sc.state | activitySet);
741 }
742 }
743
744 if (sc.atLineEnd) {
745 lineCurrent++;
746 lineEndNext = styler.LineEnd(lineCurrent);
747 vlls.Add(lineCurrent, preproc);
748 if (rawStringTerminator != "") {
749 rawSTNew.Set(lineCurrent-1, rawStringTerminator);
750 }
751 }
752
753 // Handle line continuation generically.
754 if (sc.ch == '\\') {
755 if (static_cast<int>((sc.currentPos+1)) >= lineEndNext) {
756 lineCurrent++;
757 lineEndNext = styler.LineEnd(lineCurrent);
758 vlls.Add(lineCurrent, preproc);
759 sc.Forward();
760 if (sc.ch == '\r' && sc.chNext == '\n') {
761 // Even in UTF-8, \r and \n are separate
762 sc.Forward();
763 }
764 continuationLine = true;
765 sc.Forward();
766 continue;
767 }
768 }
769
770 const bool atLineEndBeforeSwitch = sc.atLineEnd;
771
772 // Determine if the current state should terminate.
773 switch (MaskActive(sc.state)) {
774 case SCE_C_OPERATOR:
775 sc.SetState(SCE_C_DEFAULT|activitySet);
776 break;
777 case SCE_C_NUMBER:
778 // We accept almost anything because of hex. and number suffixes
779 if (sc.ch == '_') {
780 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
781 } else if (!(setWord.Contains(sc.ch)
782 || (sc.ch == '\'')
783 || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
784 sc.chPrev == 'p' || sc.chPrev == 'P')))) {
785 sc.SetState(SCE_C_DEFAULT|activitySet);
786 }
787 break;
788 case SCE_C_USERLITERAL:
789 if (!(setWord.Contains(sc.ch)))
790 sc.SetState(SCE_C_DEFAULT|activitySet);
791 break;
792 case SCE_C_IDENTIFIER:
793 if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
794 char s[1000];
795 if (caseSensitive) {
796 sc.GetCurrent(s, sizeof(s));
797 } else {
798 sc.GetCurrentLowered(s, sizeof(s));
799 }
800 if (keywords.InList(s)) {
801 lastWordWasUUID = strcmp(s, "uuid") == 0;
802 sc.ChangeState(SCE_C_WORD|activitySet);
803 } else if (keywords2.InList(s)) {
804 sc.ChangeState(SCE_C_WORD2|activitySet);
805 } else if (keywords4.InList(s)) {
806 sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
807 } else {
808 int subStyle = classifierIdentifiers.ValueFor(s);
809 if (subStyle >= 0) {
810 sc.ChangeState(subStyle|activitySet);
811 }
812 }
813 const bool literalString = sc.ch == '\"';
814 if (literalString || sc.ch == '\'') {
815 size_t lenS = strlen(s);
816 const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
817 if (raw)
818 s[lenS--] = '\0';
819 bool valid =
820 (lenS == 0) ||
821 ((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
822 ((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
823 if (valid) {
824 if (literalString) {
825 if (raw) {
826 // Set the style of the string prefix to SCE_C_STRINGRAW but then change to
827 // SCE_C_DEFAULT as that allows the raw string start code to run.
828 sc.ChangeState(SCE_C_STRINGRAW|activitySet);
829 sc.SetState(SCE_C_DEFAULT|activitySet);
830 } else {
831 sc.ChangeState(SCE_C_STRING|activitySet);
832 }
833 } else {
834 sc.ChangeState(SCE_C_CHARACTER|activitySet);
835 }
836 } else {
837 sc.SetState(SCE_C_DEFAULT | activitySet);
838 }
839 } else {
840 sc.SetState(SCE_C_DEFAULT|activitySet);
841 }
842 }
843 break;
844 case SCE_C_PREPROCESSOR:
845 if (options.stylingWithinPreprocessor) {
846 if (IsASpace(sc.ch)) {
847 sc.SetState(SCE_C_DEFAULT|activitySet);
848 }
849 } else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
850 isStringInPreprocessor = false;
851 } else if (!isStringInPreprocessor) {
852 if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
853 isStringInPreprocessor = true;
854 } else if (sc.Match('/', '*')) {
855 if (sc.Match("/**") || sc.Match("/*!")) {
856 sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
857 } else {
858 sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
859 }
860 sc.Forward(); // Eat the *
861 } else if (sc.Match('/', '/')) {
862 sc.SetState(SCE_C_DEFAULT|activitySet);
863 }
864 }
865 break;
866 case SCE_C_PREPROCESSORCOMMENT:
867 case SCE_C_PREPROCESSORCOMMENTDOC:
868 if (sc.Match('*', '/')) {
869 sc.Forward();
870 sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
871 continue; // Without advancing in case of '\'.
872 }
873 break;
874 case SCE_C_COMMENT:
875 if (sc.Match('*', '/')) {
876 sc.Forward();
877 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
878 } else {
879 styleBeforeTaskMarker = SCE_C_COMMENT;
880 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
881 }
882 break;
883 case SCE_C_COMMENTDOC:
884 if (sc.Match('*', '/')) {
885 sc.Forward();
886 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
887 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
888 // Verify that we have the conditions to mark a comment-doc-keyword
889 if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
890 styleBeforeDCKeyword = SCE_C_COMMENTDOC;
891 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
892 }
893 }
894 break;
895 case SCE_C_COMMENTLINE:
896 if (sc.atLineStart && !continuationLine) {
897 sc.SetState(SCE_C_DEFAULT|activitySet);
898 } else {
899 styleBeforeTaskMarker = SCE_C_COMMENTLINE;
900 highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
901 }
902 break;
903 case SCE_C_COMMENTLINEDOC:
904 if (sc.atLineStart && !continuationLine) {
905 sc.SetState(SCE_C_DEFAULT|activitySet);
906 } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
907 // Verify that we have the conditions to mark a comment-doc-keyword
908 if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
909 styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
910 sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
911 }
912 }
913 break;
914 case SCE_C_COMMENTDOCKEYWORD:
915 if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
916 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
917 sc.Forward();
918 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
919 seenDocKeyBrace = false;
920 } else if (sc.ch == '[' || sc.ch == '{') {
921 seenDocKeyBrace = true;
922 } else if (!setDoxygen.Contains(sc.ch)
923 && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
924 char s[100];
925 if (caseSensitive) {
926 sc.GetCurrent(s, sizeof(s));
927 } else {
928 sc.GetCurrentLowered(s, sizeof(s));
929 }
930 if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
931 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
932 } else if (!keywords3.InList(s + 1)) {
933 int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
934 if (subStyleCDKW >= 0) {
935 sc.ChangeState(subStyleCDKW|activitySet);
936 } else {
937 sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
938 }
939 }
940 sc.SetState(styleBeforeDCKeyword|activitySet);
941 seenDocKeyBrace = false;
942 }
943 break;
944 case SCE_C_STRING:
945 if (sc.atLineEnd) {
946 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
947 } else if (isIncludePreprocessor) {
948 if (sc.ch == '>') {
949 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
950 isIncludePreprocessor = false;
951 }
952 } else if (sc.ch == '\\') {
953 if (options.escapeSequence) {
954 sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
955 escapeSeq.resetEscapeState(sc.chNext);
956 }
957 sc.Forward(); // Skip all characters after the backslash
958 } else if (sc.ch == '\"') {
959 if (sc.chNext == '_') {
960 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
961 } else {
962 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
963 }
964 }
965 break;
966 case SCE_C_ESCAPESEQUENCE:
967 escapeSeq.digitsLeft--;
968 if (!escapeSeq.atEscapeEnd(sc.ch)) {
969 break;
970 }
971 if (sc.ch == '"') {
972 sc.SetState(SCE_C_STRING|activitySet);
973 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
974 } else if (sc.ch == '\\') {
975 escapeSeq.resetEscapeState(sc.chNext);
976 sc.Forward();
977 } else {
978 sc.SetState(SCE_C_STRING|activitySet);
979 if (sc.atLineEnd) {
980 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
981 }
982 }
983 break;
984 case SCE_C_HASHQUOTEDSTRING:
985 if (sc.ch == '\\') {
986 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
987 sc.Forward();
988 }
989 } else if (sc.ch == '\"') {
990 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
991 }
992 break;
993 case SCE_C_STRINGRAW:
994 if (sc.Match(rawStringTerminator.c_str())) {
995 for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
996 sc.Forward();
997 sc.SetState(SCE_C_DEFAULT|activitySet);
998 rawStringTerminator = "";
999 }
1000 break;
1001 case SCE_C_CHARACTER:
1002 if (sc.atLineEnd) {
1003 sc.ChangeState(SCE_C_STRINGEOL|activitySet);
1004 } else if (sc.ch == '\\') {
1005 if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
1006 sc.Forward();
1007 }
1008 } else if (sc.ch == '\'') {
1009 if (sc.chNext == '_') {
1010 sc.ChangeState(SCE_C_USERLITERAL|activitySet);
1011 } else {
1012 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1013 }
1014 }
1015 break;
1016 case SCE_C_REGEX:
1017 if (sc.atLineStart) {
1018 sc.SetState(SCE_C_DEFAULT|activitySet);
1019 } else if (! inRERange && sc.ch == '/') {
1020 sc.Forward();
1021 while ((sc.ch < 0x80) && islower(sc.ch))
1022 sc.Forward(); // gobble regex flags
1023 sc.SetState(SCE_C_DEFAULT|activitySet);
1024 } else if (sc.ch == '\\' && (static_cast<int>(sc.currentPos+1) < lineEndNext)) {
1025 // Gobble up the escaped character
1026 sc.Forward();
1027 } else if (sc.ch == '[') {
1028 inRERange = true;
1029 } else if (sc.ch == ']') {
1030 inRERange = false;
1031 }
1032 break;
1033 case SCE_C_STRINGEOL:
1034 if (sc.atLineStart) {
1035 sc.SetState(SCE_C_DEFAULT|activitySet);
1036 }
1037 break;
1038 case SCE_C_VERBATIM:
1039 if (sc.ch == '\"') {
1040 if (sc.chNext == '\"') {
1041 sc.Forward();
1042 } else {
1043 sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
1044 }
1045 }
1046 break;
1047 case SCE_C_TRIPLEVERBATIM:
1048 if (sc.Match("\"\"\"")) {
1049 while (sc.Match('"')) {
1050 sc.Forward();
1051 }
1052 sc.SetState(SCE_C_DEFAULT|activitySet);
1053 }
1054 break;
1055 case SCE_C_UUID:
1056 if (sc.atLineEnd || sc.ch == ')') {
1057 sc.SetState(SCE_C_DEFAULT|activitySet);
1058 }
1059 break;
1060 case SCE_C_TASKMARKER:
1061 if (isoperator(sc.ch) || IsASpace(sc.ch)) {
1062 sc.SetState(styleBeforeTaskMarker|activitySet);
1063 styleBeforeTaskMarker = SCE_C_DEFAULT;
1064 }
1065 }
1066
1067 if (sc.atLineEnd && !atLineEndBeforeSwitch) {
1068 // State exit processing consumed characters up to end of line.
1069 lineCurrent++;
1070 lineEndNext = styler.LineEnd(lineCurrent);
1071 vlls.Add(lineCurrent, preproc);
1072 }
1073
1074 // Determine if a new state should be entered.
1075 if (MaskActive(sc.state) == SCE_C_DEFAULT) {
1076 if (sc.Match('@', '\"')) {
1077 sc.SetState(SCE_C_VERBATIM|activitySet);
1078 sc.Forward();
1079 } else if (options.triplequotedStrings && sc.Match("\"\"\"")) {
1080 sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
1081 sc.Forward(2);
1082 } else if (options.hashquotedStrings && sc.Match('#', '\"')) {
1083 sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
1084 sc.Forward();
1085 } else if (options.backQuotedStrings && sc.Match('`')) {
1086 sc.SetState(SCE_C_STRINGRAW|activitySet);
1087 rawStringTerminator = "`";
1088 sc.Forward();
1089 } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1090 if (lastWordWasUUID) {
1091 sc.SetState(SCE_C_UUID|activitySet);
1092 lastWordWasUUID = false;
1093 } else {
1094 sc.SetState(SCE_C_NUMBER|activitySet);
1095 }
1096 } else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
1097 if (lastWordWasUUID) {
1098 sc.SetState(SCE_C_UUID|activitySet);
1099 lastWordWasUUID = false;
1100 } else {
1101 sc.SetState(SCE_C_IDENTIFIER|activitySet);
1102 }
1103 } else if (sc.Match('/', '*')) {
1104 if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
1105 sc.SetState(SCE_C_COMMENTDOC|activitySet);
1106 } else {
1107 sc.SetState(SCE_C_COMMENT|activitySet);
1108 }
1109 sc.Forward(); // Eat the * so it isn't used for the end of the comment
1110 } else if (sc.Match('/', '/')) {
1111 if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
1112 // Support of Qt/Doxygen doc. style
1113 sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
1114 else
1115 sc.SetState(SCE_C_COMMENTLINE|activitySet);
1116 } else if (sc.ch == '/'
1117 && (setOKBeforeRE.Contains(chPrevNonWhite)
1118 || followsReturnKeyword(sc, styler))
1119 && (!setCouldBePostOp.Contains(chPrevNonWhite)
1120 || !FollowsPostfixOperator(sc, styler))) {
1121 sc.SetState(SCE_C_REGEX|activitySet); // JavaScript's RegEx
1122 inRERange = false;
1123 } else if (sc.ch == '\"') {
1124 if (sc.chPrev == 'R') {
1125 styler.Flush();
1126 if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
1127 sc.SetState(SCE_C_STRINGRAW|activitySet);
1128 rawStringTerminator = ")";
1129 for (int termPos = sc.currentPos + 1;; termPos++) {
1130 char chTerminator = styler.SafeGetCharAt(termPos, '(');
1131 if (chTerminator == '(')
1132 break;
1133 rawStringTerminator += chTerminator;
1134 }
1135 rawStringTerminator += '\"';
1136 } else {
1137 sc.SetState(SCE_C_STRING|activitySet);
1138 }
1139 } else {
1140 sc.SetState(SCE_C_STRING|activitySet);
1141 }
1142 isIncludePreprocessor = false; // ensure that '>' won't end the string
1143 } else if (isIncludePreprocessor && sc.ch == '<') {
1144 sc.SetState(SCE_C_STRING|activitySet);
1145 } else if (sc.ch == '\'') {
1146 sc.SetState(SCE_C_CHARACTER|activitySet);
1147 } else if (sc.ch == '#' && visibleChars == 0) {
1148 // Preprocessor commands are alone on their line
1149 sc.SetState(SCE_C_PREPROCESSOR|activitySet);
1150 // Skip whitespace between # and preprocessor word
1151 do {
1152 sc.Forward();
1153 } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
1154 if (sc.atLineEnd) {
1155 sc.SetState(SCE_C_DEFAULT|activitySet);
1156 } else if (sc.Match("include")) {
1157 isIncludePreprocessor = true;
1158 } else {
1159 if (options.trackPreprocessor) {
1160 if (sc.Match("ifdef") || sc.Match("ifndef")) {
1161 bool isIfDef = sc.Match("ifdef");
1162 int i = isIfDef ? 5 : 6;
1163 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + i + 1, false);
1164 bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
1165 preproc.StartSection(isIfDef == foundDef);
1166 } else if (sc.Match("if")) {
1167 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1168 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1169 preproc.StartSection(ifGood);
1170 } else if (sc.Match("else")) {
1171 if (!preproc.CurrentIfTaken()) {
1172 preproc.InvertCurrentLevel();
1173 activitySet = preproc.IsInactive() ? activeFlag : 0;
1174 if (!activitySet)
1175 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1176 } else if (!preproc.IsInactive()) {
1177 preproc.InvertCurrentLevel();
1178 activitySet = preproc.IsInactive() ? activeFlag : 0;
1179 if (!activitySet)
1180 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1181 }
1182 } else if (sc.Match("elif")) {
1183 // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
1184 if (!preproc.CurrentIfTaken()) {
1185 // Similar to #if
1186 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
1187 bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
1188 if (ifGood) {
1189 preproc.InvertCurrentLevel();
1190 activitySet = preproc.IsInactive() ? activeFlag : 0;
1191 if (!activitySet)
1192 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1193 }
1194 } else if (!preproc.IsInactive()) {
1195 preproc.InvertCurrentLevel();
1196 activitySet = preproc.IsInactive() ? activeFlag : 0;
1197 if (!activitySet)
1198 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1199 }
1200 } else if (sc.Match("endif")) {
1201 preproc.EndSection();
1202 activitySet = preproc.IsInactive() ? activeFlag : 0;
1203 sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
1204 } else if (sc.Match("define")) {
1205 if (options.updatePreprocessor && !preproc.IsInactive()) {
1206 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
1207 size_t startName = 0;
1208 while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
1209 startName++;
1210 size_t endName = startName;
1211 while ((endName < restOfLine.length()) && setWord.Contains(static_cast<unsigned char>(restOfLine[endName])))
1212 endName++;
1213 std::string key = restOfLine.substr(startName, endName-startName);
1214 if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
1215 // Macro
1216 size_t endArgs = endName;
1217 while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
1218 endArgs++;
1219 std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
1220 size_t startValue = endArgs+1;
1221 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1222 startValue++;
1223 std::string value;
1224 if (startValue < restOfLine.length())
1225 value = restOfLine.substr(startValue);
1226 preprocessorDefinitions[key] = SymbolValue(value, args);
1227 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
1228 definitionsChanged = true;
1229 } else {
1230 // Value
1231 size_t startValue = endName;
1232 while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
1233 startValue++;
1234 std::string value = restOfLine.substr(startValue);
1235 preprocessorDefinitions[key] = value;
1236 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
1237 definitionsChanged = true;
1238 }
1239 }
1240 } else if (sc.Match("undef")) {
1241 if (options.updatePreprocessor && !preproc.IsInactive()) {
1242 std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, true);
1243 std::vector<std::string> tokens = Tokenize(restOfLine);
1244 std::string key;
1245 if (tokens.size() >= 1) {
1246 key = tokens[0];
1247 preprocessorDefinitions.erase(key);
1248 ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
1249 definitionsChanged = true;
1250 }
1251 }
1252 }
1253 }
1254 }
1255 } else if (isoperator(sc.ch)) {
1256 sc.SetState(SCE_C_OPERATOR|activitySet);
1257 }
1258 }
1259
1260 if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
1261 chPrevNonWhite = sc.ch;
1262 visibleChars++;
1263 }
1264 continuationLine = false;
1265 sc.Forward();
1266 }
1267 const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
1268 if (definitionsChanged || rawStringsChanged)
1269 styler.ChangeLexerState(startPos, startPos + length);
1270 sc.Complete();
1271 }
1272
1273 // Store both the current line's fold level and the next lines in the
1274 // level store to make it easy to pick up with each increment
1275 // and to make it possible to fiddle the current level for "} else {".
1276
Fold(unsigned int startPos,int length,int initStyle,IDocument * pAccess)1277 void SCI_METHOD LexerCPP::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
1278
1279 if (!options.fold)
1280 return;
1281
1282 LexAccessor styler(pAccess);
1283
1284 unsigned int endPos = startPos + length;
1285 int visibleChars = 0;
1286 bool inLineComment = false;
1287 int lineCurrent = styler.GetLine(startPos);
1288 int levelCurrent = SC_FOLDLEVELBASE;
1289 if (lineCurrent > 0)
1290 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
1291 unsigned int lineStartNext = styler.LineStart(lineCurrent+1);
1292 int levelMinCurrent = levelCurrent;
1293 int levelNext = levelCurrent;
1294 char chNext = styler[startPos];
1295 int styleNext = MaskActive(styler.StyleAt(startPos));
1296 int style = MaskActive(initStyle);
1297 const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
1298 for (unsigned int i = startPos; i < endPos; i++) {
1299 char ch = chNext;
1300 chNext = styler.SafeGetCharAt(i + 1);
1301 int stylePrev = style;
1302 style = styleNext;
1303 styleNext = MaskActive(styler.StyleAt(i + 1));
1304 bool atEOL = i == (lineStartNext-1);
1305 if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
1306 inLineComment = true;
1307 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
1308 if (!IsStreamCommentStyle(stylePrev)) {
1309 levelNext++;
1310 } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
1311 // Comments don't end at end of line and the next character may be unstyled.
1312 levelNext--;
1313 }
1314 }
1315 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
1316 if (userDefinedFoldMarkers) {
1317 if (styler.Match(i, options.foldExplicitStart.c_str())) {
1318 levelNext++;
1319 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
1320 levelNext--;
1321 }
1322 } else {
1323 if ((ch == '/') && (chNext == '/')) {
1324 char chNext2 = styler.SafeGetCharAt(i + 2);
1325 if (chNext2 == '{') {
1326 levelNext++;
1327 } else if (chNext2 == '}') {
1328 levelNext--;
1329 }
1330 }
1331 }
1332 }
1333 if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
1334 if (ch == '#') {
1335 unsigned int j = i + 1;
1336 while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1337 j++;
1338 }
1339 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1340 levelNext++;
1341 } else if (styler.Match(j, "end")) {
1342 levelNext--;
1343 }
1344 }
1345 }
1346 if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
1347 if (ch == '{') {
1348 // Measure the minimum before a '{' to allow
1349 // folding on "} else {"
1350 if (levelMinCurrent > levelNext) {
1351 levelMinCurrent = levelNext;
1352 }
1353 levelNext++;
1354 } else if (ch == '}') {
1355 levelNext--;
1356 }
1357 }
1358 if (!IsASpace(ch))
1359 visibleChars++;
1360 if (atEOL || (i == endPos-1)) {
1361 int levelUse = levelCurrent;
1362 if (options.foldSyntaxBased && options.foldAtElse) {
1363 levelUse = levelMinCurrent;
1364 }
1365 int lev = levelUse | levelNext << 16;
1366 if (visibleChars == 0 && options.foldCompact)
1367 lev |= SC_FOLDLEVELWHITEFLAG;
1368 if (levelUse < levelNext)
1369 lev |= SC_FOLDLEVELHEADERFLAG;
1370 if (lev != styler.LevelAt(lineCurrent)) {
1371 styler.SetLevel(lineCurrent, lev);
1372 }
1373 lineCurrent++;
1374 lineStartNext = styler.LineStart(lineCurrent+1);
1375 levelCurrent = levelNext;
1376 levelMinCurrent = levelCurrent;
1377 if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
1378 // There is an empty line at end of file so give it same level and empty
1379 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
1380 }
1381 visibleChars = 0;
1382 inLineComment = false;
1383 }
1384 }
1385 }
1386
EvaluateTokens(std::vector<std::string> & tokens,const SymbolTable & preprocessorDefinitions)1387 void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
1388
1389 // Remove whitespace tokens
1390 tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
1391
1392 // Evaluate defined statements to either 0 or 1
1393 for (size_t i=0; (i+1)<tokens.size();) {
1394 if (tokens[i] == "defined") {
1395 const char *val = "0";
1396 if (tokens[i+1] == "(") {
1397 if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
1398 // defined()
1399 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
1400 } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
1401 // defined(<identifier>)
1402 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
1403 if (it != preprocessorDefinitions.end()) {
1404 val = "1";
1405 }
1406 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
1407 } else {
1408 // Spurious '(' so erase as more likely to result in false
1409 tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
1410 }
1411 } else {
1412 // defined <identifier>
1413 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
1414 if (it != preprocessorDefinitions.end()) {
1415 val = "1";
1416 }
1417 }
1418 tokens[i] = val;
1419 } else {
1420 i++;
1421 }
1422 }
1423
1424 // Evaluate identifiers
1425 const size_t maxIterations = 100;
1426 size_t iterations = 0; // Limit number of iterations in case there is a recursive macro.
1427 for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
1428 iterations++;
1429 if (setWordStart.Contains(static_cast<unsigned char>(tokens[i][0]))) {
1430 SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
1431 if (it != preprocessorDefinitions.end()) {
1432 // Tokenize value
1433 std::vector<std::string> macroTokens = Tokenize(it->second.value);
1434 if (it->second.IsMacro()) {
1435 if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
1436 // Create map of argument name to value
1437 std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
1438 std::map<std::string, std::string> arguments;
1439 size_t arg = 0;
1440 size_t tok = i+2;
1441 while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
1442 if (tokens.at(tok) != ",") {
1443 arguments[argumentNames.at(arg)] = tokens.at(tok);
1444 arg++;
1445 }
1446 tok++;
1447 }
1448
1449 // Remove invocation
1450 tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
1451
1452 // Substitute values into macro
1453 macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
1454
1455 for (size_t iMacro = 0; iMacro < macroTokens.size();) {
1456 if (setWordStart.Contains(static_cast<unsigned char>(macroTokens[iMacro][0]))) {
1457 std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
1458 if (itFind != arguments.end()) {
1459 // TODO: Possible that value will be expression so should insert tokenized form
1460 macroTokens[iMacro] = itFind->second;
1461 }
1462 }
1463 iMacro++;
1464 }
1465
1466 // Insert results back into tokens
1467 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1468
1469 } else {
1470 i++;
1471 }
1472 } else {
1473 // Remove invocation
1474 tokens.erase(tokens.begin() + i);
1475 // Insert results back into tokens
1476 tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
1477 }
1478 } else {
1479 // Identifier not found
1480 tokens.erase(tokens.begin() + i);
1481 }
1482 } else {
1483 i++;
1484 }
1485 }
1486
1487 // Find bracketed subexpressions and recurse on them
1488 BracketPair bracketPair = FindBracketPair(tokens);
1489 while (bracketPair.itBracket != tokens.end()) {
1490 std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
1491 EvaluateTokens(inBracket, preprocessorDefinitions);
1492
1493 // The insertion is done before the removal because there were failures with the opposite approach
1494 tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
1495
1496 bracketPair = FindBracketPair(tokens);
1497 tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
1498
1499 bracketPair = FindBracketPair(tokens);
1500 }
1501
1502 // Evaluate logical negations
1503 for (size_t j=0; (j+1)<tokens.size();) {
1504 if (setNegationOp.Contains(tokens[j][0])) {
1505 int isTrue = atoi(tokens[j+1].c_str());
1506 if (tokens[j] == "!")
1507 isTrue = !isTrue;
1508 std::vector<std::string>::iterator itInsert =
1509 tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
1510 tokens.insert(itInsert, isTrue ? "1" : "0");
1511 } else {
1512 j++;
1513 }
1514 }
1515
1516 // Evaluate expressions in precedence order
1517 enum precedence { precArithmetic, precRelative, precLogical };
1518 for (int prec=precArithmetic; prec <= precLogical; prec++) {
1519 // Looking at 3 tokens at a time so end at 2 before end
1520 for (size_t k=0; (k+2)<tokens.size();) {
1521 char chOp = tokens[k+1][0];
1522 if (
1523 ((prec==precArithmetic) && setArithmethicOp.Contains(chOp)) ||
1524 ((prec==precRelative) && setRelOp.Contains(chOp)) ||
1525 ((prec==precLogical) && setLogicalOp.Contains(chOp))
1526 ) {
1527 int valA = atoi(tokens[k].c_str());
1528 int valB = atoi(tokens[k+2].c_str());
1529 int result = 0;
1530 if (tokens[k+1] == "+")
1531 result = valA + valB;
1532 else if (tokens[k+1] == "-")
1533 result = valA - valB;
1534 else if (tokens[k+1] == "*")
1535 result = valA * valB;
1536 else if (tokens[k+1] == "/")
1537 result = valA / (valB ? valB : 1);
1538 else if (tokens[k+1] == "%")
1539 result = valA % (valB ? valB : 1);
1540 else if (tokens[k+1] == "<")
1541 result = valA < valB;
1542 else if (tokens[k+1] == "<=")
1543 result = valA <= valB;
1544 else if (tokens[k+1] == ">")
1545 result = valA > valB;
1546 else if (tokens[k+1] == ">=")
1547 result = valA >= valB;
1548 else if (tokens[k+1] == "==")
1549 result = valA == valB;
1550 else if (tokens[k+1] == "!=")
1551 result = valA != valB;
1552 else if (tokens[k+1] == "||")
1553 result = valA || valB;
1554 else if (tokens[k+1] == "&&")
1555 result = valA && valB;
1556 char sResult[30];
1557 sprintf(sResult, "%d", result);
1558 std::vector<std::string>::iterator itInsert =
1559 tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
1560 tokens.insert(itInsert, sResult);
1561 } else {
1562 k++;
1563 }
1564 }
1565 }
1566 }
1567
Tokenize(const std::string & expr) const1568 std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
1569 // Break into tokens
1570 std::vector<std::string> tokens;
1571 const char *cp = expr.c_str();
1572 while (*cp) {
1573 std::string word;
1574 if (setWord.Contains(static_cast<unsigned char>(*cp))) {
1575 // Identifiers and numbers
1576 while (setWord.Contains(static_cast<unsigned char>(*cp))) {
1577 word += *cp;
1578 cp++;
1579 }
1580 } else if (IsSpaceOrTab(*cp)) {
1581 while (IsSpaceOrTab(*cp)) {
1582 word += *cp;
1583 cp++;
1584 }
1585 } else if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1586 word += *cp;
1587 cp++;
1588 if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
1589 word += *cp;
1590 cp++;
1591 }
1592 } else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1593 word += *cp;
1594 cp++;
1595 if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
1596 word += *cp;
1597 cp++;
1598 }
1599 } else {
1600 // Should handle strings, characters, and comments here
1601 word += *cp;
1602 cp++;
1603 }
1604 tokens.push_back(word);
1605 }
1606 return tokens;
1607 }
1608
EvaluateExpression(const std::string & expr,const SymbolTable & preprocessorDefinitions)1609 bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
1610 std::vector<std::string> tokens = Tokenize(expr);
1611
1612 EvaluateTokens(tokens, preprocessorDefinitions);
1613
1614 // "0" or "" -> false else true
1615 bool isFalse = tokens.empty() ||
1616 ((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
1617 return !isFalse;
1618 }
1619
1620 LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
1621 LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);
1622