1 // Scintilla source code edit control
2 /** @file LexBash.cxx
3 ** Lexer for Bash.
4 **/
5 // Copyright 2004-2012 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
8
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14
15 #include <string>
16 #include <vector>
17 #include <map>
18
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #include "StringCopy.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 #include "SubStyles.h"
31 #include "DefaultLexer.h"
32
33 using namespace Scintilla;
34
35 #define HERE_DELIM_MAX 256
36
37 // define this if you want 'invalid octals' to be marked as errors
38 // usually, this is not a good idea, permissive lexing is better
39 #undef PEDANTIC_OCTAL
40
41 #define BASH_BASE_ERROR 65
42 #define BASH_BASE_DECIMAL 66
43 #define BASH_BASE_HEX 67
44 #ifdef PEDANTIC_OCTAL
45 #define BASH_BASE_OCTAL 68
46 #define BASH_BASE_OCTAL_ERROR 69
47 #endif
48
49 // state constants for parts of a bash command segment
50 #define BASH_CMD_BODY 0
51 #define BASH_CMD_START 1
52 #define BASH_CMD_WORD 2
53 #define BASH_CMD_TEST 3
54 #define BASH_CMD_ARITH 4
55 #define BASH_CMD_DELIM 5
56
57 // state constants for nested delimiter pairs, used by
58 // SCE_SH_STRING and SCE_SH_BACKTICKS processing
59 #define BASH_DELIM_LITERAL 0
60 #define BASH_DELIM_STRING 1
61 #define BASH_DELIM_CSTRING 2
62 #define BASH_DELIM_LSTRING 3
63 #define BASH_DELIM_COMMAND 4
64 #define BASH_DELIM_BACKTICK 5
65
66 #define BASH_DELIM_STACK_MAX 7
67
68 namespace {
69
translateBashDigit(int ch)70 inline int translateBashDigit(int ch) {
71 if (ch >= '0' && ch <= '9') {
72 return ch - '0';
73 } else if (ch >= 'a' && ch <= 'z') {
74 return ch - 'a' + 10;
75 } else if (ch >= 'A' && ch <= 'Z') {
76 return ch - 'A' + 36;
77 } else if (ch == '@') {
78 return 62;
79 } else if (ch == '_') {
80 return 63;
81 }
82 return BASH_BASE_ERROR;
83 }
84
getBashNumberBase(char * s)85 inline int getBashNumberBase(char *s) {
86 int i = 0;
87 int base = 0;
88 while (*s) {
89 base = base * 10 + (*s++ - '0');
90 i++;
91 }
92 if (base > 64 || i > 2) {
93 return BASH_BASE_ERROR;
94 }
95 return base;
96 }
97
opposite(int ch)98 int opposite(int ch) {
99 if (ch == '(') return ')';
100 if (ch == '[') return ']';
101 if (ch == '{') return '}';
102 if (ch == '<') return '>';
103 return ch;
104 }
105
GlobScan(StyleContext & sc)106 int GlobScan(StyleContext &sc) {
107 // forward scan for zsh globs, disambiguate versus bash arrays
108 // complex expressions may still fail, e.g. unbalanced () '' "" etc
109 int c, sLen = 0;
110 int pCount = 0;
111 int hash = 0;
112 while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
113 if (IsASpace(c)) {
114 return 0;
115 } else if (c == '\'' || c == '\"') {
116 if (hash != 2) return 0;
117 } else if (c == '#' && hash == 0) {
118 hash = (sLen == 1) ? 2:1;
119 } else if (c == '(') {
120 pCount++;
121 } else if (c == ')') {
122 if (pCount == 0) {
123 if (hash) return sLen;
124 return 0;
125 }
126 pCount--;
127 }
128 }
129 return 0;
130 }
131
IsCommentLine(Sci_Position line,LexAccessor & styler)132 bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
133 Sci_Position pos = styler.LineStart(line);
134 Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
135 for (Sci_Position i = pos; i < eol_pos; i++) {
136 char ch = styler[i];
137 if (ch == '#')
138 return true;
139 else if (ch != ' ' && ch != '\t')
140 return false;
141 }
142 return false;
143 }
144
145 struct OptionsBash {
146 bool fold;
147 bool foldComment;
148 bool foldCompact;
149
OptionsBash__anonbbb275910111::OptionsBash150 OptionsBash() {
151 fold = false;
152 foldComment = false;
153 foldCompact = true;
154 }
155 };
156
157 const char * const bashWordListDesc[] = {
158 "Keywords",
159 0
160 };
161
162 struct OptionSetBash : public OptionSet<OptionsBash> {
OptionSetBash__anonbbb275910111::OptionSetBash163 OptionSetBash() {
164 DefineProperty("fold", &OptionsBash::fold);
165
166 DefineProperty("fold.comment", &OptionsBash::foldComment);
167
168 DefineProperty("fold.compact", &OptionsBash::foldCompact);
169
170 DefineWordListSets(bashWordListDesc);
171 }
172 };
173
174 const char styleSubable[] = { SCE_SH_IDENTIFIER, SCE_SH_SCALAR, 0 };
175
176 LexicalClass lexicalClasses[] = {
177 // Lexer Bash SCLEX_BASH SCE_SH_:
178 0, "SCE_SH_DEFAULT", "default", "White space",
179 1, "SCE_SH_ERROR", "error", "Error",
180 2, "SCE_SH_COMMENTLINE", "comment line", "Line comment: #",
181 3, "SCE_SH_NUMBER", "literal numeric", "Number",
182 4, "SCE_SH_WORD", "keyword", "Keyword",
183 5, "SCE_SH_STRING", "literal string", "String",
184 6, "SCE_SH_CHARACTER", "literal string", "Single quoted string",
185 7, "SCE_SH_OPERATOR", "operator", "Operators",
186 8, "SCE_SH_IDENTIFIER", "identifier", "Identifiers",
187 9, "SCE_SH_SCALAR", "identifier", "Scalar variable",
188 10, "SCE_SH_PARAM", "identifier", "Parameter",
189 11, "SCE_SH_BACKTICKS", "literal string", "Backtick quoted command",
190 12, "SCE_SH_HERE_DELIM", "operator", "Heredoc delimiter",
191 13, "SCE_SH_HERE_Q", "literal string", "Heredoc quoted string",
192 };
193
194 }
195
196 class LexerBash : public DefaultLexer {
197 WordList keywords;
198 OptionsBash options;
199 OptionSetBash osBash;
200 enum { ssIdentifier, ssScalar };
201 SubStyles subStyles;
202 public:
LexerBash()203 LexerBash() :
204 DefaultLexer("bash", SCLEX_BASH, lexicalClasses, ELEMENTS(lexicalClasses)),
205 subStyles(styleSubable, 0x80, 0x40, 0) {
206 }
~LexerBash()207 virtual ~LexerBash() {
208 }
Release()209 void SCI_METHOD Release() override {
210 delete this;
211 }
Version() const212 int SCI_METHOD Version() const override {
213 return lvRelease5;
214 }
PropertyNames()215 const char * SCI_METHOD PropertyNames() override {
216 return osBash.PropertyNames();
217 }
PropertyType(const char * name)218 int SCI_METHOD PropertyType(const char* name) override {
219 return osBash.PropertyType(name);
220 }
DescribeProperty(const char * name)221 const char * SCI_METHOD DescribeProperty(const char *name) override {
222 return osBash.DescribeProperty(name);
223 }
224 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
PropertyGet(const char * key)225 const char * SCI_METHOD PropertyGet(const char* key) override {
226 return osBash.PropertyGet(key);
227 }
DescribeWordListSets()228 const char * SCI_METHOD DescribeWordListSets() override {
229 return osBash.DescribeWordListSets();
230 }
231 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
232 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
233 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
234
PrivateCall(int,void *)235 void * SCI_METHOD PrivateCall(int, void *) override {
236 return 0;
237 }
238
AllocateSubStyles(int styleBase,int numberStyles)239 int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
240 return subStyles.Allocate(styleBase, numberStyles);
241 }
SubStylesStart(int styleBase)242 int SCI_METHOD SubStylesStart(int styleBase) override {
243 return subStyles.Start(styleBase);
244 }
SubStylesLength(int styleBase)245 int SCI_METHOD SubStylesLength(int styleBase) override {
246 return subStyles.Length(styleBase);
247 }
StyleFromSubStyle(int subStyle)248 int SCI_METHOD StyleFromSubStyle(int subStyle) override {
249 const int styleBase = subStyles.BaseStyle(subStyle);
250 return styleBase;
251 }
PrimaryStyleFromStyle(int style)252 int SCI_METHOD PrimaryStyleFromStyle(int style) override {
253 return style;
254 }
FreeSubStyles()255 void SCI_METHOD FreeSubStyles() override {
256 subStyles.Free();
257 }
SetIdentifiers(int style,const char * identifiers)258 void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
259 subStyles.SetIdentifiers(style, identifiers);
260 }
DistanceToSecondaryStyles()261 int SCI_METHOD DistanceToSecondaryStyles() override {
262 return 0;
263 }
GetSubStyleBases()264 const char *SCI_METHOD GetSubStyleBases() override {
265 return styleSubable;
266 }
267
LexerFactoryBash()268 static ILexer5 *LexerFactoryBash() {
269 return new LexerBash();
270 }
271 };
272
PropertySet(const char * key,const char * val)273 Sci_Position SCI_METHOD LexerBash::PropertySet(const char *key, const char *val) {
274 if (osBash.PropertySet(&options, key, val)) {
275 return 0;
276 }
277 return -1;
278 }
279
WordListSet(int n,const char * wl)280 Sci_Position SCI_METHOD LexerBash::WordListSet(int n, const char *wl) {
281 WordList *wordListN = 0;
282 switch (n) {
283 case 0:
284 wordListN = &keywords;
285 break;
286 }
287 Sci_Position firstModification = -1;
288 if (wordListN) {
289 WordList wlNew;
290 wlNew.Set(wl);
291 if (*wordListN != wlNew) {
292 wordListN->Set(wl);
293 firstModification = 0;
294 }
295 }
296 return firstModification;
297 }
298
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)299 void SCI_METHOD LexerBash::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
300 WordList cmdDelimiter, bashStruct, bashStruct_in;
301 cmdDelimiter.Set("| || |& & && ; ;; ( ) { }");
302 bashStruct.Set("if elif fi while until else then do done esac eval");
303 bashStruct_in.Set("for case select");
304
305 CharacterSet setWordStart(CharacterSet::setAlpha, "_");
306 // note that [+-] are often parts of identifiers in shell scripts
307 CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
308 CharacterSet setMetaCharacter(CharacterSet::setNone, "|&;()<> \t\r\n");
309 setMetaCharacter.Add(0);
310 CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@");
311 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
312 CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
313 CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!%*,./:?@[]^`{}~");
314 CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!%*,./:=?@[]^`{}~");
315 CharacterSet setLeftShift(CharacterSet::setDigits, "$");
316
317 class HereDocCls { // Class to manage HERE document elements
318 public:
319 int State; // 0: '<<' encountered
320 // 1: collect the delimiter
321 // 2: here doc text (lines after the delimiter)
322 int Quote; // the char after '<<'
323 bool Quoted; // true if Quote in ('\'','"','`')
324 bool Indent; // indented delimiter (for <<-)
325 int DelimiterLength; // strlen(Delimiter)
326 char Delimiter[HERE_DELIM_MAX]; // the Delimiter
327 HereDocCls() {
328 State = 0;
329 Quote = 0;
330 Quoted = false;
331 Indent = 0;
332 DelimiterLength = 0;
333 Delimiter[0] = '\0';
334 }
335 void Append(int ch) {
336 Delimiter[DelimiterLength++] = static_cast<char>(ch);
337 Delimiter[DelimiterLength] = '\0';
338 }
339 ~HereDocCls() {
340 }
341 };
342 HereDocCls HereDoc;
343
344 class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
345 public:
346 int Count;
347 int Up, Down;
348 QuoteCls() {
349 Count = 0;
350 Up = '\0';
351 Down = '\0';
352 }
353 void Open(int u) {
354 Count++;
355 Up = u;
356 Down = opposite(Up);
357 }
358 void Start(int u) {
359 Count = 0;
360 Open(u);
361 }
362 };
363 QuoteCls Quote;
364
365 class QuoteStackCls { // Class to manage quote pairs that nest
366 public:
367 int Count;
368 int Up, Down;
369 int Style;
370 int Depth; // levels pushed
371 int CountStack[BASH_DELIM_STACK_MAX];
372 int UpStack [BASH_DELIM_STACK_MAX];
373 int StyleStack[BASH_DELIM_STACK_MAX];
374 QuoteStackCls() {
375 Count = 0;
376 Up = '\0';
377 Down = '\0';
378 Style = 0;
379 Depth = 0;
380 }
381 void Start(int u, int s) {
382 Count = 1;
383 Up = u;
384 Down = opposite(Up);
385 Style = s;
386 }
387 void Push(int u, int s) {
388 if (Depth >= BASH_DELIM_STACK_MAX)
389 return;
390 CountStack[Depth] = Count;
391 UpStack [Depth] = Up;
392 StyleStack[Depth] = Style;
393 Depth++;
394 Count = 1;
395 Up = u;
396 Down = opposite(Up);
397 Style = s;
398 }
399 void Pop(void) {
400 if (Depth <= 0)
401 return;
402 Depth--;
403 Count = CountStack[Depth];
404 Up = UpStack [Depth];
405 Style = StyleStack[Depth];
406 Down = opposite(Up);
407 }
408 ~QuoteStackCls() {
409 }
410 };
411 QuoteStackCls QuoteStack;
412
413 const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_SH_IDENTIFIER);
414 const WordClassifier &classifierScalars = subStyles.Classifier(SCE_SH_SCALAR);
415
416 int numBase = 0;
417 int digit;
418 Sci_PositionU endPos = startPos + length;
419 int cmdState = BASH_CMD_START;
420 int testExprType = 0;
421 LexAccessor styler(pAccess);
422
423 // Always backtracks to the start of a line that is not a continuation
424 // of the previous line (i.e. start of a bash command segment)
425 Sci_Position ln = styler.GetLine(startPos);
426 if (ln > 0 && startPos == static_cast<Sci_PositionU>(styler.LineStart(ln)))
427 ln--;
428 for (;;) {
429 startPos = styler.LineStart(ln);
430 if (ln == 0 || styler.GetLineState(ln) == BASH_CMD_START)
431 break;
432 ln--;
433 }
434 initStyle = SCE_SH_DEFAULT;
435
436 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
437
438 for (; sc.More(); sc.Forward()) {
439
440 // handle line continuation, updates per-line stored state
441 if (sc.atLineStart) {
442 ln = styler.GetLine(sc.currentPos);
443 if (sc.state == SCE_SH_STRING
444 || sc.state == SCE_SH_BACKTICKS
445 || sc.state == SCE_SH_CHARACTER
446 || sc.state == SCE_SH_HERE_Q
447 || sc.state == SCE_SH_COMMENTLINE
448 || sc.state == SCE_SH_PARAM) {
449 // force backtrack while retaining cmdState
450 styler.SetLineState(ln, BASH_CMD_BODY);
451 } else {
452 if (ln > 0) {
453 if ((sc.GetRelative(-3) == '\\' && sc.GetRelative(-2) == '\r' && sc.chPrev == '\n')
454 || sc.GetRelative(-2) == '\\') { // handle '\' line continuation
455 // retain last line's state
456 } else
457 cmdState = BASH_CMD_START;
458 }
459 styler.SetLineState(ln, cmdState);
460 }
461 }
462
463 // controls change of cmdState at the end of a non-whitespace element
464 // states BODY|TEST|ARITH persist until the end of a command segment
465 // state WORD persist, but ends with 'in' or 'do' construct keywords
466 int cmdStateNew = BASH_CMD_BODY;
467 if (cmdState == BASH_CMD_TEST || cmdState == BASH_CMD_ARITH || cmdState == BASH_CMD_WORD)
468 cmdStateNew = cmdState;
469 int stylePrev = sc.state;
470
471 // Determine if the current state should terminate.
472 switch (sc.state) {
473 case SCE_SH_OPERATOR:
474 sc.SetState(SCE_SH_DEFAULT);
475 if (cmdState == BASH_CMD_DELIM) // if command delimiter, start new command
476 cmdStateNew = BASH_CMD_START;
477 else if (sc.chPrev == '\\') // propagate command state if line continued
478 cmdStateNew = cmdState;
479 break;
480 case SCE_SH_WORD:
481 // "." never used in Bash variable names but used in file names
482 if (!setWord.Contains(sc.ch)) {
483 char s[500];
484 char s2[10];
485 sc.GetCurrent(s, sizeof(s));
486 int identifierStyle = SCE_SH_IDENTIFIER;
487 int subStyle = classifierIdentifiers.ValueFor(s);
488 if (subStyle >= 0) {
489 identifierStyle = subStyle;
490 }
491 // allow keywords ending in a whitespace or command delimiter
492 s2[0] = static_cast<char>(sc.ch);
493 s2[1] = '\0';
494 bool keywordEnds = IsASpace(sc.ch) || cmdDelimiter.InList(s2);
495 // 'in' or 'do' may be construct keywords
496 if (cmdState == BASH_CMD_WORD) {
497 if (strcmp(s, "in") == 0 && keywordEnds)
498 cmdStateNew = BASH_CMD_BODY;
499 else if (strcmp(s, "do") == 0 && keywordEnds)
500 cmdStateNew = BASH_CMD_START;
501 else
502 sc.ChangeState(identifierStyle);
503 sc.SetState(SCE_SH_DEFAULT);
504 break;
505 }
506 // a 'test' keyword starts a test expression
507 if (strcmp(s, "test") == 0) {
508 if (cmdState == BASH_CMD_START && keywordEnds) {
509 cmdStateNew = BASH_CMD_TEST;
510 testExprType = 0;
511 } else
512 sc.ChangeState(identifierStyle);
513 }
514 // detect bash construct keywords
515 else if (bashStruct.InList(s)) {
516 if (cmdState == BASH_CMD_START && keywordEnds)
517 cmdStateNew = BASH_CMD_START;
518 else
519 sc.ChangeState(identifierStyle);
520 }
521 // 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
522 else if (bashStruct_in.InList(s)) {
523 if (cmdState == BASH_CMD_START && keywordEnds)
524 cmdStateNew = BASH_CMD_WORD;
525 else
526 sc.ChangeState(identifierStyle);
527 }
528 // disambiguate option items and file test operators
529 else if (s[0] == '-') {
530 if (cmdState != BASH_CMD_TEST)
531 sc.ChangeState(identifierStyle);
532 }
533 // disambiguate keywords and identifiers
534 else if (cmdState != BASH_CMD_START
535 || !(keywords.InList(s) && keywordEnds)) {
536 sc.ChangeState(identifierStyle);
537 }
538 sc.SetState(SCE_SH_DEFAULT);
539 }
540 break;
541 case SCE_SH_IDENTIFIER:
542 if (sc.chPrev == '\\' || !setWord.Contains(sc.ch) ||
543 (cmdState == BASH_CMD_ARITH && !setWordStart.Contains(sc.ch))) {
544 char s[500];
545 sc.GetCurrent(s, sizeof(s));
546 int subStyle = classifierIdentifiers.ValueFor(s);
547 if (subStyle >= 0) {
548 sc.ChangeState(subStyle);
549 }
550 if (sc.chPrev == '\\') { // for escaped chars
551 sc.ForwardSetState(SCE_SH_DEFAULT);
552 } else {
553 sc.SetState(SCE_SH_DEFAULT);
554 }
555 }
556 break;
557 case SCE_SH_NUMBER:
558 digit = translateBashDigit(sc.ch);
559 if (numBase == BASH_BASE_DECIMAL) {
560 if (sc.ch == '#') {
561 char s[10];
562 sc.GetCurrent(s, sizeof(s));
563 numBase = getBashNumberBase(s);
564 if (numBase != BASH_BASE_ERROR)
565 break;
566 } else if (IsADigit(sc.ch))
567 break;
568 } else if (numBase == BASH_BASE_HEX) {
569 if (IsADigit(sc.ch, 16))
570 break;
571 #ifdef PEDANTIC_OCTAL
572 } else if (numBase == BASH_BASE_OCTAL ||
573 numBase == BASH_BASE_OCTAL_ERROR) {
574 if (digit <= 7)
575 break;
576 if (digit <= 9) {
577 numBase = BASH_BASE_OCTAL_ERROR;
578 break;
579 }
580 #endif
581 } else if (numBase == BASH_BASE_ERROR) {
582 if (digit <= 9)
583 break;
584 } else { // DD#DDDD number style handling
585 if (digit != BASH_BASE_ERROR) {
586 if (numBase <= 36) {
587 // case-insensitive if base<=36
588 if (digit >= 36) digit -= 26;
589 }
590 if (digit < numBase)
591 break;
592 if (digit <= 9) {
593 numBase = BASH_BASE_ERROR;
594 break;
595 }
596 }
597 }
598 // fallthrough when number is at an end or error
599 if (numBase == BASH_BASE_ERROR
600 #ifdef PEDANTIC_OCTAL
601 || numBase == BASH_BASE_OCTAL_ERROR
602 #endif
603 ) {
604 sc.ChangeState(SCE_SH_ERROR);
605 }
606 sc.SetState(SCE_SH_DEFAULT);
607 break;
608 case SCE_SH_COMMENTLINE:
609 if (sc.atLineEnd && sc.chPrev != '\\') {
610 sc.SetState(SCE_SH_DEFAULT);
611 }
612 break;
613 case SCE_SH_HERE_DELIM:
614 // From Bash info:
615 // ---------------
616 // Specifier format is: <<[-]WORD
617 // Optional '-' is for removal of leading tabs from here-doc.
618 // Whitespace acceptable after <<[-] operator
619 //
620 if (HereDoc.State == 0) { // '<<' encountered
621 HereDoc.Quote = sc.chNext;
622 HereDoc.Quoted = false;
623 HereDoc.DelimiterLength = 0;
624 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
625 if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
626 sc.Forward();
627 HereDoc.Quoted = true;
628 HereDoc.State = 1;
629 } else if (setHereDoc.Contains(sc.chNext) ||
630 (sc.chNext == '=' && cmdState != BASH_CMD_ARITH)) {
631 // an unquoted here-doc delimiter, no special handling
632 HereDoc.State = 1;
633 } else if (sc.chNext == '<') { // HERE string <<<
634 sc.Forward();
635 sc.ForwardSetState(SCE_SH_DEFAULT);
636 } else if (IsASpace(sc.chNext)) {
637 // eat whitespace
638 } else if (setLeftShift.Contains(sc.chNext) ||
639 (sc.chNext == '=' && cmdState == BASH_CMD_ARITH)) {
640 // left shift <<$var or <<= cases
641 sc.ChangeState(SCE_SH_OPERATOR);
642 sc.ForwardSetState(SCE_SH_DEFAULT);
643 } else {
644 // symbols terminates; deprecated zero-length delimiter
645 HereDoc.State = 1;
646 }
647 } else if (HereDoc.State == 1) { // collect the delimiter
648 // * if single quoted, there's no escape
649 // * if double quoted, there are \\ and \" escapes
650 if ((HereDoc.Quote == '\'' && sc.ch != HereDoc.Quote) ||
651 (HereDoc.Quoted && sc.ch != HereDoc.Quote && sc.ch != '\\') ||
652 (HereDoc.Quote != '\'' && sc.chPrev == '\\') ||
653 (setHereDoc2.Contains(sc.ch))) {
654 HereDoc.Append(sc.ch);
655 } else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
656 sc.ForwardSetState(SCE_SH_DEFAULT);
657 } else if (sc.ch == '\\') {
658 if (HereDoc.Quoted && sc.chNext != HereDoc.Quote && sc.chNext != '\\') {
659 // in quoted prefixes only \ and the quote eat the escape
660 HereDoc.Append(sc.ch);
661 } else {
662 // skip escape prefix
663 }
664 } else if (!HereDoc.Quoted) {
665 sc.SetState(SCE_SH_DEFAULT);
666 }
667 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
668 sc.SetState(SCE_SH_ERROR);
669 HereDoc.State = 0;
670 }
671 }
672 break;
673 case SCE_SH_HERE_Q:
674 // HereDoc.State == 2
675 if (sc.atLineStart) {
676 sc.SetState(SCE_SH_HERE_Q);
677 int prefixws = 0;
678 while (sc.ch == '\t' && !sc.atLineEnd) { // tabulation prefix
679 sc.Forward();
680 prefixws++;
681 }
682 if (prefixws > 0)
683 sc.SetState(SCE_SH_HERE_Q);
684 while (!sc.atLineEnd) {
685 sc.Forward();
686 }
687 char s[HERE_DELIM_MAX];
688 sc.GetCurrent(s, sizeof(s));
689 if (sc.LengthCurrent() == 0) { // '' or "" delimiters
690 if ((prefixws == 0 || HereDoc.Indent) &&
691 HereDoc.Quoted && HereDoc.DelimiterLength == 0)
692 sc.SetState(SCE_SH_DEFAULT);
693 break;
694 }
695 if (s[strlen(s) - 1] == '\r')
696 s[strlen(s) - 1] = '\0';
697 if (strcmp(HereDoc.Delimiter, s) == 0) {
698 if ((prefixws == 0) || // indentation rule
699 (prefixws > 0 && HereDoc.Indent)) {
700 sc.SetState(SCE_SH_DEFAULT);
701 break;
702 }
703 }
704 }
705 break;
706 case SCE_SH_SCALAR: // variable names
707 if (!setParam.Contains(sc.ch)) {
708 char s[500];
709 sc.GetCurrent(s, sizeof(s));
710 int subStyle = classifierScalars.ValueFor(&s[1]); // skip the $
711 if (subStyle >= 0) {
712 sc.ChangeState(subStyle);
713 }
714 if (sc.LengthCurrent() == 1) {
715 // Special variable: $(, $_ etc.
716 sc.ForwardSetState(SCE_SH_DEFAULT);
717 } else {
718 sc.SetState(SCE_SH_DEFAULT);
719 }
720 }
721 break;
722 case SCE_SH_STRING: // delimited styles, can nest
723 case SCE_SH_BACKTICKS:
724 if (sc.ch == '\\' && QuoteStack.Up != '\\') {
725 if (QuoteStack.Style != BASH_DELIM_LITERAL)
726 sc.Forward();
727 } else if (sc.ch == QuoteStack.Down) {
728 QuoteStack.Count--;
729 if (QuoteStack.Count == 0) {
730 if (QuoteStack.Depth > 0) {
731 QuoteStack.Pop();
732 } else
733 sc.ForwardSetState(SCE_SH_DEFAULT);
734 }
735 } else if (sc.ch == QuoteStack.Up) {
736 QuoteStack.Count++;
737 } else {
738 if (QuoteStack.Style == BASH_DELIM_STRING ||
739 QuoteStack.Style == BASH_DELIM_LSTRING
740 ) { // do nesting for "string", $"locale-string"
741 if (sc.ch == '`') {
742 QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
743 } else if (sc.ch == '$' && sc.chNext == '(') {
744 sc.Forward();
745 QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
746 }
747 } else if (QuoteStack.Style == BASH_DELIM_COMMAND ||
748 QuoteStack.Style == BASH_DELIM_BACKTICK
749 ) { // do nesting for $(command), `command`
750 if (sc.ch == '\'') {
751 QuoteStack.Push(sc.ch, BASH_DELIM_LITERAL);
752 } else if (sc.ch == '\"') {
753 QuoteStack.Push(sc.ch, BASH_DELIM_STRING);
754 } else if (sc.ch == '`') {
755 QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
756 } else if (sc.ch == '$') {
757 if (sc.chNext == '\'') {
758 sc.Forward();
759 QuoteStack.Push(sc.ch, BASH_DELIM_CSTRING);
760 } else if (sc.chNext == '\"') {
761 sc.Forward();
762 QuoteStack.Push(sc.ch, BASH_DELIM_LSTRING);
763 } else if (sc.chNext == '(') {
764 sc.Forward();
765 QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
766 }
767 }
768 }
769 }
770 break;
771 case SCE_SH_PARAM: // ${parameter}
772 if (sc.ch == '\\' && Quote.Up != '\\') {
773 sc.Forward();
774 } else if (sc.ch == Quote.Down) {
775 Quote.Count--;
776 if (Quote.Count == 0) {
777 sc.ForwardSetState(SCE_SH_DEFAULT);
778 }
779 } else if (sc.ch == Quote.Up) {
780 Quote.Count++;
781 }
782 break;
783 case SCE_SH_CHARACTER: // singly-quoted strings
784 if (sc.ch == Quote.Down) {
785 Quote.Count--;
786 if (Quote.Count == 0) {
787 sc.ForwardSetState(SCE_SH_DEFAULT);
788 }
789 }
790 break;
791 }
792
793 // Must check end of HereDoc state 1 before default state is handled
794 if (HereDoc.State == 1 && sc.atLineEnd) {
795 // Begin of here-doc (the line after the here-doc delimiter):
796 // Lexically, the here-doc starts from the next line after the >>, but the
797 // first line of here-doc seem to follow the style of the last EOL sequence
798 HereDoc.State = 2;
799 if (HereDoc.Quoted) {
800 if (sc.state == SCE_SH_HERE_DELIM) {
801 // Missing quote at end of string! Syntax error in bash 4.3
802 // Mark this bit as an error, do not colour any here-doc
803 sc.ChangeState(SCE_SH_ERROR);
804 sc.SetState(SCE_SH_DEFAULT);
805 } else {
806 // HereDoc.Quote always == '\''
807 sc.SetState(SCE_SH_HERE_Q);
808 }
809 } else if (HereDoc.DelimiterLength == 0) {
810 // no delimiter, illegal (but '' and "" are legal)
811 sc.ChangeState(SCE_SH_ERROR);
812 sc.SetState(SCE_SH_DEFAULT);
813 } else {
814 sc.SetState(SCE_SH_HERE_Q);
815 }
816 }
817
818 // update cmdState about the current command segment
819 if (stylePrev != SCE_SH_DEFAULT && sc.state == SCE_SH_DEFAULT) {
820 cmdState = cmdStateNew;
821 }
822 // Determine if a new state should be entered.
823 if (sc.state == SCE_SH_DEFAULT) {
824 if (sc.ch == '\\') {
825 // Bash can escape any non-newline as a literal
826 sc.SetState(SCE_SH_IDENTIFIER);
827 if (sc.chNext == '\r' || sc.chNext == '\n')
828 sc.SetState(SCE_SH_OPERATOR);
829 } else if (IsADigit(sc.ch)) {
830 sc.SetState(SCE_SH_NUMBER);
831 numBase = BASH_BASE_DECIMAL;
832 if (sc.ch == '0') { // hex,octal
833 if (sc.chNext == 'x' || sc.chNext == 'X') {
834 numBase = BASH_BASE_HEX;
835 sc.Forward();
836 } else if (IsADigit(sc.chNext)) {
837 #ifdef PEDANTIC_OCTAL
838 numBase = BASH_BASE_OCTAL;
839 #else
840 numBase = BASH_BASE_HEX;
841 #endif
842 }
843 }
844 } else if (setWordStart.Contains(sc.ch)) {
845 sc.SetState(SCE_SH_WORD);
846 } else if (sc.ch == '#') {
847 if (stylePrev != SCE_SH_WORD && stylePrev != SCE_SH_IDENTIFIER &&
848 (sc.currentPos == 0 || setMetaCharacter.Contains(sc.chPrev))) {
849 sc.SetState(SCE_SH_COMMENTLINE);
850 } else {
851 sc.SetState(SCE_SH_WORD);
852 }
853 // handle some zsh features within arithmetic expressions only
854 if (cmdState == BASH_CMD_ARITH) {
855 if (sc.chPrev == '[') { // [#8] [##8] output digit setting
856 sc.SetState(SCE_SH_WORD);
857 if (sc.chNext == '#') {
858 sc.Forward();
859 }
860 } else if (sc.Match("##^") && IsUpperCase(sc.GetRelative(3))) { // ##^A
861 sc.SetState(SCE_SH_IDENTIFIER);
862 sc.Forward(3);
863 } else if (sc.chNext == '#' && !IsASpace(sc.GetRelative(2))) { // ##a
864 sc.SetState(SCE_SH_IDENTIFIER);
865 sc.Forward(2);
866 } else if (setWordStart.Contains(sc.chNext)) { // #name
867 sc.SetState(SCE_SH_IDENTIFIER);
868 }
869 }
870 } else if (sc.ch == '\"') {
871 sc.SetState(SCE_SH_STRING);
872 QuoteStack.Start(sc.ch, BASH_DELIM_STRING);
873 } else if (sc.ch == '\'') {
874 sc.SetState(SCE_SH_CHARACTER);
875 Quote.Start(sc.ch);
876 } else if (sc.ch == '`') {
877 sc.SetState(SCE_SH_BACKTICKS);
878 QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
879 } else if (sc.ch == '$') {
880 if (sc.Match("$((")) {
881 sc.SetState(SCE_SH_OPERATOR); // handle '((' later
882 continue;
883 }
884 sc.SetState(SCE_SH_SCALAR);
885 sc.Forward();
886 if (sc.ch == '{') {
887 sc.ChangeState(SCE_SH_PARAM);
888 Quote.Start(sc.ch);
889 } else if (sc.ch == '\'') {
890 sc.ChangeState(SCE_SH_STRING);
891 QuoteStack.Start(sc.ch, BASH_DELIM_CSTRING);
892 } else if (sc.ch == '"') {
893 sc.ChangeState(SCE_SH_STRING);
894 QuoteStack.Start(sc.ch, BASH_DELIM_LSTRING);
895 } else if (sc.ch == '(') {
896 sc.ChangeState(SCE_SH_BACKTICKS);
897 QuoteStack.Start(sc.ch, BASH_DELIM_COMMAND);
898 } else if (sc.ch == '`') { // $` seen in a configure script, valid?
899 sc.ChangeState(SCE_SH_BACKTICKS);
900 QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
901 } else {
902 continue; // scalar has no delimiter pair
903 }
904 } else if (sc.Match('<', '<')) {
905 sc.SetState(SCE_SH_HERE_DELIM);
906 HereDoc.State = 0;
907 if (sc.GetRelative(2) == '-') { // <<- indent case
908 HereDoc.Indent = true;
909 sc.Forward();
910 } else {
911 HereDoc.Indent = false;
912 }
913 } else if (sc.ch == '-' && // one-char file test operators
914 setSingleCharOp.Contains(sc.chNext) &&
915 !setWord.Contains(sc.GetRelative(2)) &&
916 IsASpace(sc.chPrev)) {
917 sc.SetState(SCE_SH_WORD);
918 sc.Forward();
919 } else if (setBashOperator.Contains(sc.ch)) {
920 char s[10];
921 bool isCmdDelim = false;
922 sc.SetState(SCE_SH_OPERATOR);
923 // globs have no whitespace, do not appear in arithmetic expressions
924 if (cmdState != BASH_CMD_ARITH && sc.ch == '(' && sc.chNext != '(') {
925 int i = GlobScan(sc);
926 if (i > 1) {
927 sc.SetState(SCE_SH_IDENTIFIER);
928 sc.Forward(i);
929 continue;
930 }
931 }
932 // handle opening delimiters for test/arithmetic expressions - ((,[[,[
933 if (cmdState == BASH_CMD_START
934 || cmdState == BASH_CMD_BODY) {
935 if (sc.Match('(', '(')) {
936 cmdState = BASH_CMD_ARITH;
937 sc.Forward();
938 } else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) {
939 cmdState = BASH_CMD_TEST;
940 testExprType = 1;
941 sc.Forward();
942 } else if (sc.ch == '[' && IsASpace(sc.chNext)) {
943 cmdState = BASH_CMD_TEST;
944 testExprType = 2;
945 }
946 }
947 // special state -- for ((x;y;z)) in ... looping
948 if (cmdState == BASH_CMD_WORD && sc.Match('(', '(')) {
949 cmdState = BASH_CMD_ARITH;
950 sc.Forward();
951 continue;
952 }
953 // handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
954 if (cmdState == BASH_CMD_START
955 || cmdState == BASH_CMD_BODY
956 || cmdState == BASH_CMD_WORD
957 || (cmdState == BASH_CMD_TEST && testExprType == 0)) {
958 s[0] = static_cast<char>(sc.ch);
959 if (setBashOperator.Contains(sc.chNext)) {
960 s[1] = static_cast<char>(sc.chNext);
961 s[2] = '\0';
962 isCmdDelim = cmdDelimiter.InList(s);
963 if (isCmdDelim)
964 sc.Forward();
965 }
966 if (!isCmdDelim) {
967 s[1] = '\0';
968 isCmdDelim = cmdDelimiter.InList(s);
969 }
970 if (isCmdDelim) {
971 cmdState = BASH_CMD_DELIM;
972 continue;
973 }
974 }
975 // handle closing delimiters for test/arithmetic expressions - )),]],]
976 if (cmdState == BASH_CMD_ARITH && sc.Match(')', ')')) {
977 cmdState = BASH_CMD_BODY;
978 sc.Forward();
979 } else if (cmdState == BASH_CMD_TEST && IsASpace(sc.chPrev)) {
980 if (sc.Match(']', ']') && testExprType == 1) {
981 sc.Forward();
982 cmdState = BASH_CMD_BODY;
983 } else if (sc.ch == ']' && testExprType == 2) {
984 cmdState = BASH_CMD_BODY;
985 }
986 }
987 }
988 }// sc.state
989 }
990 sc.Complete();
991 if (sc.state == SCE_SH_HERE_Q) {
992 styler.ChangeLexerState(sc.currentPos, styler.Length());
993 }
994 sc.Complete();
995 }
996
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)997 void SCI_METHOD LexerBash::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) {
998 if(!options.fold)
999 return;
1000
1001 LexAccessor styler(pAccess);
1002
1003 Sci_PositionU endPos = startPos + length;
1004 int visibleChars = 0;
1005 int skipHereCh = 0;
1006 Sci_Position lineCurrent = styler.GetLine(startPos);
1007 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1008 int levelCurrent = levelPrev;
1009 char chNext = styler[startPos];
1010 int styleNext = styler.StyleAt(startPos);
1011 char word[8] = { '\0' }; // we're not interested in long words anyway
1012 unsigned int wordlen = 0;
1013 for (Sci_PositionU i = startPos; i < endPos; i++) {
1014 char ch = chNext;
1015 chNext = styler.SafeGetCharAt(i + 1);
1016 int style = styleNext;
1017 styleNext = styler.StyleAt(i + 1);
1018 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1019 // Comment folding
1020 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1021 {
1022 if (!IsCommentLine(lineCurrent - 1, styler)
1023 && IsCommentLine(lineCurrent + 1, styler))
1024 levelCurrent++;
1025 else if (IsCommentLine(lineCurrent - 1, styler)
1026 && !IsCommentLine(lineCurrent + 1, styler))
1027 levelCurrent--;
1028 }
1029 if (style == SCE_SH_WORD) {
1030 if ((wordlen + 1) < sizeof(word))
1031 word[wordlen++] = ch;
1032 if (styleNext != style) {
1033 word[wordlen] = '\0';
1034 wordlen = 0;
1035 if (strcmp(word, "if") == 0 || strcmp(word, "case") == 0 || strcmp(word, "do") == 0) {
1036 levelCurrent++;
1037 } else if (strcmp(word, "fi") == 0 || strcmp(word, "esac") == 0 || strcmp(word, "done") == 0) {
1038 levelCurrent--;
1039 }
1040 }
1041 }
1042 if (style == SCE_SH_OPERATOR) {
1043 if (ch == '{') {
1044 levelCurrent++;
1045 } else if (ch == '}') {
1046 levelCurrent--;
1047 }
1048 }
1049 // Here Document folding
1050 if (style == SCE_SH_HERE_DELIM) {
1051 if (ch == '<' && chNext == '<') {
1052 if (styler.SafeGetCharAt(i + 2) == '<') {
1053 skipHereCh = 1;
1054 } else {
1055 if (skipHereCh == 0) {
1056 levelCurrent++;
1057 } else {
1058 skipHereCh = 0;
1059 }
1060 }
1061 }
1062 } else if (style == SCE_SH_HERE_Q && styler.StyleAt(i+1) == SCE_SH_DEFAULT) {
1063 levelCurrent--;
1064 }
1065 if (atEOL) {
1066 int lev = levelPrev;
1067 if (visibleChars == 0 && options.foldCompact)
1068 lev |= SC_FOLDLEVELWHITEFLAG;
1069 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1070 lev |= SC_FOLDLEVELHEADERFLAG;
1071 if (lev != styler.LevelAt(lineCurrent)) {
1072 styler.SetLevel(lineCurrent, lev);
1073 }
1074 lineCurrent++;
1075 levelPrev = levelCurrent;
1076 visibleChars = 0;
1077 }
1078 if (!isspacechar(ch))
1079 visibleChars++;
1080 }
1081 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1082 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1083 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1084 }
1085
1086 LexerModule lmBash(SCLEX_BASH, LexerBash::LexerFactoryBash, "bash", bashWordListDesc);
1087