1 // Scintilla source code edit control
2 /** @file LexBash.cxx
3  ** Lexer for Bash.
4  **/
5 // Copyright 2004-2012 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 
15 #include <string>
16 #include <vector>
17 #include <map>
18 
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 
23 #include "StringCopy.h"
24 #include "WordList.h"
25 #include "LexAccessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "OptionSet.h"
30 #include "SubStyles.h"
31 #include "DefaultLexer.h"
32 
33 using namespace Scintilla;
34 
35 #define HERE_DELIM_MAX			256
36 
37 // define this if you want 'invalid octals' to be marked as errors
38 // usually, this is not a good idea, permissive lexing is better
39 #undef PEDANTIC_OCTAL
40 
41 #define BASH_BASE_ERROR			65
42 #define BASH_BASE_DECIMAL		66
43 #define BASH_BASE_HEX			67
44 #ifdef PEDANTIC_OCTAL
45 #define BASH_BASE_OCTAL			68
46 #define	BASH_BASE_OCTAL_ERROR	69
47 #endif
48 
49 // state constants for parts of a bash command segment
50 #define	BASH_CMD_BODY			0
51 #define BASH_CMD_START			1
52 #define BASH_CMD_WORD			2
53 #define BASH_CMD_TEST			3
54 #define BASH_CMD_ARITH			4
55 #define BASH_CMD_DELIM			5
56 
57 // state constants for nested delimiter pairs, used by
58 // SCE_SH_STRING and SCE_SH_BACKTICKS processing
59 #define BASH_DELIM_LITERAL		0
60 #define BASH_DELIM_STRING		1
61 #define BASH_DELIM_CSTRING		2
62 #define BASH_DELIM_LSTRING		3
63 #define BASH_DELIM_COMMAND		4
64 #define BASH_DELIM_BACKTICK		5
65 
66 #define BASH_DELIM_STACK_MAX	7
67 
68 namespace {
69 
translateBashDigit(int ch)70 inline int translateBashDigit(int ch) {
71 	if (ch >= '0' && ch <= '9') {
72 		return ch - '0';
73 	} else if (ch >= 'a' && ch <= 'z') {
74 		return ch - 'a' + 10;
75 	} else if (ch >= 'A' && ch <= 'Z') {
76 		return ch - 'A' + 36;
77 	} else if (ch == '@') {
78 		return 62;
79 	} else if (ch == '_') {
80 		return 63;
81 	}
82 	return BASH_BASE_ERROR;
83 }
84 
getBashNumberBase(char * s)85 inline int getBashNumberBase(char *s) {
86 	int i = 0;
87 	int base = 0;
88 	while (*s) {
89 		base = base * 10 + (*s++ - '0');
90 		i++;
91 	}
92 	if (base > 64 || i > 2) {
93 		return BASH_BASE_ERROR;
94 	}
95 	return base;
96 }
97 
opposite(int ch)98 int opposite(int ch) {
99 	if (ch == '(') return ')';
100 	if (ch == '[') return ']';
101 	if (ch == '{') return '}';
102 	if (ch == '<') return '>';
103 	return ch;
104 }
105 
GlobScan(StyleContext & sc)106 int GlobScan(StyleContext &sc) {
107 	// forward scan for zsh globs, disambiguate versus bash arrays
108 	// complex expressions may still fail, e.g. unbalanced () '' "" etc
109 	int c, sLen = 0;
110 	int pCount = 0;
111 	int hash = 0;
112 	while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
113 		if (IsASpace(c)) {
114 			return 0;
115 		} else if (c == '\'' || c == '\"') {
116 			if (hash != 2) return 0;
117 		} else if (c == '#' && hash == 0) {
118 			hash = (sLen == 1) ? 2:1;
119 		} else if (c == '(') {
120 			pCount++;
121 		} else if (c == ')') {
122 			if (pCount == 0) {
123 				if (hash) return sLen;
124 				return 0;
125 			}
126 			pCount--;
127 		}
128 	}
129 	return 0;
130 }
131 
IsCommentLine(Sci_Position line,LexAccessor & styler)132 bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
133 	Sci_Position pos = styler.LineStart(line);
134 	Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
135 	for (Sci_Position i = pos; i < eol_pos; i++) {
136 		char ch = styler[i];
137 		if (ch == '#')
138 			return true;
139 		else if (ch != ' ' && ch != '\t')
140 			return false;
141 	}
142 	return false;
143 }
144 
145 struct OptionsBash {
146 	bool fold;
147 	bool foldComment;
148 	bool foldCompact;
149 
OptionsBash__anone8497e820111::OptionsBash150 	OptionsBash() {
151 		fold = false;
152 		foldComment = false;
153 		foldCompact = true;
154 	}
155 };
156 
157 const char * const bashWordListDesc[] = {
158 	"Keywords",
159 	0
160 };
161 
162 struct OptionSetBash : public OptionSet<OptionsBash> {
OptionSetBash__anone8497e820111::OptionSetBash163 	OptionSetBash() {
164 		DefineProperty("fold", &OptionsBash::fold);
165 
166 		DefineProperty("fold.comment", &OptionsBash::foldComment);
167 
168 		DefineProperty("fold.compact", &OptionsBash::foldCompact);
169 
170 		DefineWordListSets(bashWordListDesc);
171 	}
172 };
173 
174 const char styleSubable[] = { SCE_SH_IDENTIFIER, SCE_SH_SCALAR, 0 };
175 
176 LexicalClass lexicalClasses[] = {
177 	// Lexer Bash SCLEX_BASH SCE_SH_:
178 	0, "SCE_SH_DEFAULT", "default", "White space",
179 	1, "SCE_SH_ERROR", "error", "Error",
180 	2, "SCE_SH_COMMENTLINE", "comment line", "Line comment: #",
181 	3, "SCE_SH_NUMBER", "literal numeric", "Number",
182 	4, "SCE_SH_WORD", "keyword", "Keyword",
183 	5, "SCE_SH_STRING", "literal string", "String",
184 	6, "SCE_SH_CHARACTER", "literal string", "Single quoted string",
185 	7, "SCE_SH_OPERATOR", "operator", "Operators",
186 	8, "SCE_SH_IDENTIFIER", "identifier", "Identifiers",
187 	9, "SCE_SH_SCALAR", "identifier", "Scalar variable",
188 	10, "SCE_SH_PARAM", "identifier", "Parameter",
189 	11, "SCE_SH_BACKTICKS", "literal string", "Backtick quoted command",
190 	12, "SCE_SH_HERE_DELIM", "operator", "Heredoc delimiter",
191 	13, "SCE_SH_HERE_Q", "literal string", "Heredoc quoted string",
192 };
193 
194 }
195 
196 class LexerBash : public DefaultLexer {
197 	WordList keywords;
198 	OptionsBash options;
199 	OptionSetBash osBash;
200 	enum { ssIdentifier, ssScalar };
201 	SubStyles subStyles;
202 public:
LexerBash()203 	LexerBash() :
204 		DefaultLexer("bash", SCLEX_BASH, lexicalClasses, ELEMENTS(lexicalClasses)),
205 		subStyles(styleSubable, 0x80, 0x40, 0) {
206 	}
~LexerBash()207 	virtual ~LexerBash() {
208 	}
Release()209 	void SCI_METHOD Release() override {
210 		delete this;
211 	}
Version() const212 	int SCI_METHOD Version() const override {
213 		return lvIdentity;
214 	}
PropertyNames()215 	const char * SCI_METHOD PropertyNames() override {
216 		return osBash.PropertyNames();
217 	}
PropertyType(const char * name)218 	int SCI_METHOD PropertyType(const char* name) override {
219 		return osBash.PropertyType(name);
220 	}
DescribeProperty(const char * name)221 	const char * SCI_METHOD DescribeProperty(const char *name) override {
222 		return osBash.DescribeProperty(name);
223 	}
224 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
PropertyGet(const char * key)225 	const char * SCI_METHOD PropertyGet(const char* key) override {
226 		return osBash.PropertyGet(key);
227 	}
DescribeWordListSets()228 	const char * SCI_METHOD DescribeWordListSets() override {
229 		return osBash.DescribeWordListSets();
230 	}
231 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
232 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
233 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
234 
PrivateCall(int,void *)235 	void * SCI_METHOD PrivateCall(int, void *) override {
236 		return 0;
237 	}
238 
AllocateSubStyles(int styleBase,int numberStyles)239 	int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
240 		return subStyles.Allocate(styleBase, numberStyles);
241 	}
SubStylesStart(int styleBase)242 	int SCI_METHOD SubStylesStart(int styleBase) override {
243 		return subStyles.Start(styleBase);
244 	}
SubStylesLength(int styleBase)245 	int SCI_METHOD SubStylesLength(int styleBase) override {
246 		return subStyles.Length(styleBase);
247 	}
StyleFromSubStyle(int subStyle)248 	int SCI_METHOD StyleFromSubStyle(int subStyle) override {
249 		const int styleBase = subStyles.BaseStyle(subStyle);
250 		return styleBase;
251 	}
PrimaryStyleFromStyle(int style)252 	int SCI_METHOD PrimaryStyleFromStyle(int style) override {
253 		return style;
254 	}
FreeSubStyles()255 	void SCI_METHOD FreeSubStyles() override {
256 		subStyles.Free();
257 	}
SetIdentifiers(int style,const char * identifiers)258 	void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
259 		subStyles.SetIdentifiers(style, identifiers);
260 	}
DistanceToSecondaryStyles()261 	int SCI_METHOD DistanceToSecondaryStyles() override {
262 		return 0;
263 	}
GetSubStyleBases()264 	const char *SCI_METHOD GetSubStyleBases() override {
265 		return styleSubable;
266 	}
267 
LexerFactoryBash()268 	static ILexer *LexerFactoryBash() {
269 		return new LexerBash();
270 	}
271 };
272 
PropertySet(const char * key,const char * val)273 Sci_Position SCI_METHOD LexerBash::PropertySet(const char *key, const char *val) {
274 	if (osBash.PropertySet(&options, key, val)) {
275 		return 0;
276 	}
277 	return -1;
278 }
279 
WordListSet(int n,const char * wl)280 Sci_Position SCI_METHOD LexerBash::WordListSet(int n, const char *wl) {
281 	WordList *wordListN = 0;
282 	switch (n) {
283 	case 0:
284 		wordListN = &keywords;
285 		break;
286 	}
287 	Sci_Position firstModification = -1;
288 	if (wordListN) {
289 		WordList wlNew;
290 		wlNew.Set(wl);
291 		if (*wordListN != wlNew) {
292 			wordListN->Set(wl);
293 			firstModification = 0;
294 		}
295 	}
296 	return firstModification;
297 }
298 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)299 void SCI_METHOD LexerBash::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
300 	WordList cmdDelimiter, bashStruct, bashStruct_in;
301 	cmdDelimiter.Set("| || |& & && ; ;; ( ) { }");
302 	bashStruct.Set("if elif fi while until else then do done esac eval");
303 	bashStruct_in.Set("for case select");
304 
305 	CharacterSet setWordStart(CharacterSet::setAlpha, "_");
306 	// note that [+-] are often parts of identifiers in shell scripts
307 	CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
308 	CharacterSet setMetaCharacter(CharacterSet::setNone, "|&;()<> \t\r\n");
309 	setMetaCharacter.Add(0);
310 	CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@");
311 	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
312 	CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
313 	CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!%*,./:?@[]^`{}~");
314 	CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!%*,./:=?@[]^`{}~");
315 	CharacterSet setLeftShift(CharacterSet::setDigits, "$");
316 
317 	class HereDocCls {	// Class to manage HERE document elements
318 	public:
319 		int State;		// 0: '<<' encountered
320 		// 1: collect the delimiter
321 		// 2: here doc text (lines after the delimiter)
322 		int Quote;		// the char after '<<'
323 		bool Quoted;		// true if Quote in ('\'','"','`')
324 		bool Indent;		// indented delimiter (for <<-)
325 		int DelimiterLength;	// strlen(Delimiter)
326 		char Delimiter[HERE_DELIM_MAX];	// the Delimiter
327 		HereDocCls() {
328 			State = 0;
329 			Quote = 0;
330 			Quoted = false;
331 			Indent = 0;
332 			DelimiterLength = 0;
333 			Delimiter[0] = '\0';
334 		}
335 		void Append(int ch) {
336 			Delimiter[DelimiterLength++] = static_cast<char>(ch);
337 			Delimiter[DelimiterLength] = '\0';
338 		}
339 		~HereDocCls() {
340 		}
341 	};
342 	HereDocCls HereDoc;
343 
344 	class QuoteCls {	// Class to manage quote pairs (simplified vs LexPerl)
345 		public:
346 		int Count;
347 		int Up, Down;
348 		QuoteCls() {
349 			Count = 0;
350 			Up    = '\0';
351 			Down  = '\0';
352 		}
353 		void Open(int u) {
354 			Count++;
355 			Up    = u;
356 			Down  = opposite(Up);
357 		}
358 		void Start(int u) {
359 			Count = 0;
360 			Open(u);
361 		}
362 	};
363 	QuoteCls Quote;
364 
365 	class QuoteStackCls {	// Class to manage quote pairs that nest
366 		public:
367 		int Count;
368 		int Up, Down;
369 		int Style;
370 		int Depth;			// levels pushed
371 		int CountStack[BASH_DELIM_STACK_MAX];
372 		int UpStack   [BASH_DELIM_STACK_MAX];
373 		int StyleStack[BASH_DELIM_STACK_MAX];
374 		QuoteStackCls() {
375 			Count = 0;
376 			Up    = '\0';
377 			Down  = '\0';
378 			Style = 0;
379 			Depth = 0;
380 		}
381 		void Start(int u, int s) {
382 			Count = 1;
383 			Up    = u;
384 			Down  = opposite(Up);
385 			Style = s;
386 		}
387 		void Push(int u, int s) {
388 			if (Depth >= BASH_DELIM_STACK_MAX)
389 				return;
390 			CountStack[Depth] = Count;
391 			UpStack   [Depth] = Up;
392 			StyleStack[Depth] = Style;
393 			Depth++;
394 			Count = 1;
395 			Up    = u;
396 			Down  = opposite(Up);
397 			Style = s;
398 		}
399 		void Pop(void) {
400 			if (Depth <= 0)
401 				return;
402 			Depth--;
403 			Count = CountStack[Depth];
404 			Up    = UpStack   [Depth];
405 			Style = StyleStack[Depth];
406 			Down  = opposite(Up);
407 		}
408 		~QuoteStackCls() {
409 		}
410 	};
411 	QuoteStackCls QuoteStack;
412 
413 	const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_SH_IDENTIFIER);
414 	const WordClassifier &classifierScalars = subStyles.Classifier(SCE_SH_SCALAR);
415 
416 	int numBase = 0;
417 	int digit;
418 	Sci_PositionU endPos = startPos + length;
419 	int cmdState = BASH_CMD_START;
420 	int testExprType = 0;
421 	LexAccessor styler(pAccess);
422 
423 	// Always backtracks to the start of a line that is not a continuation
424 	// of the previous line (i.e. start of a bash command segment)
425 	Sci_Position ln = styler.GetLine(startPos);
426 	if (ln > 0 && startPos == static_cast<Sci_PositionU>(styler.LineStart(ln)))
427 		ln--;
428 	for (;;) {
429 		startPos = styler.LineStart(ln);
430 		if (ln == 0 || styler.GetLineState(ln) == BASH_CMD_START)
431 			break;
432 		ln--;
433 	}
434 	initStyle = SCE_SH_DEFAULT;
435 
436 	StyleContext sc(startPos, endPos - startPos, initStyle, styler);
437 
438 	for (; sc.More(); sc.Forward()) {
439 
440 		// handle line continuation, updates per-line stored state
441 		if (sc.atLineStart) {
442 			ln = styler.GetLine(sc.currentPos);
443 			if (sc.state == SCE_SH_STRING
444 			 || sc.state == SCE_SH_BACKTICKS
445 			 || sc.state == SCE_SH_CHARACTER
446 			 || sc.state == SCE_SH_HERE_Q
447 			 || sc.state == SCE_SH_COMMENTLINE
448 			 || sc.state == SCE_SH_PARAM) {
449 				// force backtrack while retaining cmdState
450 				styler.SetLineState(ln, BASH_CMD_BODY);
451 			} else {
452 				if (ln > 0) {
453 					if ((sc.GetRelative(-3) == '\\' && sc.GetRelative(-2) == '\r' && sc.chPrev == '\n')
454 					 || sc.GetRelative(-2) == '\\') {	// handle '\' line continuation
455 						// retain last line's state
456 					} else
457 						cmdState = BASH_CMD_START;
458 				}
459 				styler.SetLineState(ln, cmdState);
460 			}
461 		}
462 
463 		// controls change of cmdState at the end of a non-whitespace element
464 		// states BODY|TEST|ARITH persist until the end of a command segment
465 		// state WORD persist, but ends with 'in' or 'do' construct keywords
466 		int cmdStateNew = BASH_CMD_BODY;
467 		if (cmdState == BASH_CMD_TEST || cmdState == BASH_CMD_ARITH || cmdState == BASH_CMD_WORD)
468 			cmdStateNew = cmdState;
469 		int stylePrev = sc.state;
470 
471 		// Determine if the current state should terminate.
472 		switch (sc.state) {
473 			case SCE_SH_OPERATOR:
474 				sc.SetState(SCE_SH_DEFAULT);
475 				if (cmdState == BASH_CMD_DELIM)		// if command delimiter, start new command
476 					cmdStateNew = BASH_CMD_START;
477 				else if (sc.chPrev == '\\')			// propagate command state if line continued
478 					cmdStateNew = cmdState;
479 				break;
480 			case SCE_SH_WORD:
481 				// "." never used in Bash variable names but used in file names
482 				if (!setWord.Contains(sc.ch)) {
483 					char s[500];
484 					char s2[10];
485 					sc.GetCurrent(s, sizeof(s));
486 					int identifierStyle = SCE_SH_IDENTIFIER;
487 					int subStyle = classifierIdentifiers.ValueFor(s);
488 					if (subStyle >= 0) {
489 						identifierStyle = subStyle;
490 					}
491 					// allow keywords ending in a whitespace or command delimiter
492 					s2[0] = static_cast<char>(sc.ch);
493 					s2[1] = '\0';
494 					bool keywordEnds = IsASpace(sc.ch) || cmdDelimiter.InList(s2);
495 					// 'in' or 'do' may be construct keywords
496 					if (cmdState == BASH_CMD_WORD) {
497 						if (strcmp(s, "in") == 0 && keywordEnds)
498 							cmdStateNew = BASH_CMD_BODY;
499 						else if (strcmp(s, "do") == 0 && keywordEnds)
500 							cmdStateNew = BASH_CMD_START;
501 						else
502 							sc.ChangeState(identifierStyle);
503 						sc.SetState(SCE_SH_DEFAULT);
504 						break;
505 					}
506 					// a 'test' keyword starts a test expression
507 					if (strcmp(s, "test") == 0) {
508 						if (cmdState == BASH_CMD_START && keywordEnds) {
509 							cmdStateNew = BASH_CMD_TEST;
510 							testExprType = 0;
511 						} else
512 							sc.ChangeState(identifierStyle);
513 					}
514 					// detect bash construct keywords
515 					else if (bashStruct.InList(s)) {
516 						if (cmdState == BASH_CMD_START && keywordEnds)
517 							cmdStateNew = BASH_CMD_START;
518 						else
519 							sc.ChangeState(identifierStyle);
520 					}
521 					// 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
522 					else if (bashStruct_in.InList(s)) {
523 						if (cmdState == BASH_CMD_START && keywordEnds)
524 							cmdStateNew = BASH_CMD_WORD;
525 						else
526 							sc.ChangeState(identifierStyle);
527 					}
528 					// disambiguate option items and file test operators
529 					else if (s[0] == '-') {
530 						if (cmdState != BASH_CMD_TEST)
531 							sc.ChangeState(identifierStyle);
532 					}
533 					// disambiguate keywords and identifiers
534 					else if (cmdState != BASH_CMD_START
535 						  || !(keywords.InList(s) && keywordEnds)) {
536 						sc.ChangeState(identifierStyle);
537 					}
538 					sc.SetState(SCE_SH_DEFAULT);
539 				}
540 				break;
541 			case SCE_SH_IDENTIFIER:
542 				if (sc.chPrev == '\\' || !setWord.Contains(sc.ch) ||
543 					  (cmdState == BASH_CMD_ARITH && !setWordStart.Contains(sc.ch))) {
544 					char s[500];
545 					sc.GetCurrent(s, sizeof(s));
546 					int subStyle = classifierIdentifiers.ValueFor(s);
547 					if (subStyle >= 0) {
548 						sc.ChangeState(subStyle);
549 					}
550 					if (sc.chPrev == '\\') {	// for escaped chars
551 						sc.ForwardSetState(SCE_SH_DEFAULT);
552 					} else {
553 						sc.SetState(SCE_SH_DEFAULT);
554 					}
555 				}
556 				break;
557 			case SCE_SH_NUMBER:
558 				digit = translateBashDigit(sc.ch);
559 				if (numBase == BASH_BASE_DECIMAL) {
560 					if (sc.ch == '#') {
561 						char s[10];
562 						sc.GetCurrent(s, sizeof(s));
563 						numBase = getBashNumberBase(s);
564 						if (numBase != BASH_BASE_ERROR)
565 							break;
566 					} else if (IsADigit(sc.ch))
567 						break;
568 				} else if (numBase == BASH_BASE_HEX) {
569 					if (IsADigit(sc.ch, 16))
570 						break;
571 #ifdef PEDANTIC_OCTAL
572 				} else if (numBase == BASH_BASE_OCTAL ||
573 						   numBase == BASH_BASE_OCTAL_ERROR) {
574 					if (digit <= 7)
575 						break;
576 					if (digit <= 9) {
577 						numBase = BASH_BASE_OCTAL_ERROR;
578 						break;
579 					}
580 #endif
581 				} else if (numBase == BASH_BASE_ERROR) {
582 					if (digit <= 9)
583 						break;
584 				} else {	// DD#DDDD number style handling
585 					if (digit != BASH_BASE_ERROR) {
586 						if (numBase <= 36) {
587 							// case-insensitive if base<=36
588 							if (digit >= 36) digit -= 26;
589 						}
590 						if (digit < numBase)
591 							break;
592 						if (digit <= 9) {
593 							numBase = BASH_BASE_ERROR;
594 							break;
595 						}
596 					}
597 				}
598 				// fallthrough when number is at an end or error
599 				if (numBase == BASH_BASE_ERROR
600 #ifdef PEDANTIC_OCTAL
601 					|| numBase == BASH_BASE_OCTAL_ERROR
602 #endif
603 				) {
604 					sc.ChangeState(SCE_SH_ERROR);
605 				}
606 				sc.SetState(SCE_SH_DEFAULT);
607 				break;
608 			case SCE_SH_COMMENTLINE:
609 				if (sc.atLineEnd && sc.chPrev != '\\') {
610 					sc.SetState(SCE_SH_DEFAULT);
611 				}
612 				break;
613 			case SCE_SH_HERE_DELIM:
614 				// From Bash info:
615 				// ---------------
616 				// Specifier format is: <<[-]WORD
617 				// Optional '-' is for removal of leading tabs from here-doc.
618 				// Whitespace acceptable after <<[-] operator
619 				//
620 				if (HereDoc.State == 0) { // '<<' encountered
621 					HereDoc.Quote = sc.chNext;
622 					HereDoc.Quoted = false;
623 					HereDoc.DelimiterLength = 0;
624 					HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
625 					if (sc.chNext == '\'' || sc.chNext == '\"') {	// a quoted here-doc delimiter (' or ")
626 						sc.Forward();
627 						HereDoc.Quoted = true;
628 						HereDoc.State = 1;
629 					} else if (setHereDoc.Contains(sc.chNext) ||
630 					           (sc.chNext == '=' && cmdState != BASH_CMD_ARITH)) {
631 						// an unquoted here-doc delimiter, no special handling
632 						HereDoc.State = 1;
633 					} else if (sc.chNext == '<') {	// HERE string <<<
634 						sc.Forward();
635 						sc.ForwardSetState(SCE_SH_DEFAULT);
636 					} else if (IsASpace(sc.chNext)) {
637 						// eat whitespace
638 					} else if (setLeftShift.Contains(sc.chNext) ||
639 					           (sc.chNext == '=' && cmdState == BASH_CMD_ARITH)) {
640 						// left shift <<$var or <<= cases
641 						sc.ChangeState(SCE_SH_OPERATOR);
642 						sc.ForwardSetState(SCE_SH_DEFAULT);
643 					} else {
644 						// symbols terminates; deprecated zero-length delimiter
645 						HereDoc.State = 1;
646 					}
647 				} else if (HereDoc.State == 1) { // collect the delimiter
648 					// * if single quoted, there's no escape
649 					// * if double quoted, there are \\ and \" escapes
650 					if ((HereDoc.Quote == '\'' && sc.ch != HereDoc.Quote) ||
651 					    (HereDoc.Quoted && sc.ch != HereDoc.Quote && sc.ch != '\\') ||
652 					    (HereDoc.Quote != '\'' && sc.chPrev == '\\') ||
653 					    (setHereDoc2.Contains(sc.ch))) {
654 						HereDoc.Append(sc.ch);
655 					} else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) {	// closing quote => end of delimiter
656 						sc.ForwardSetState(SCE_SH_DEFAULT);
657 					} else if (sc.ch == '\\') {
658 						if (HereDoc.Quoted && sc.chNext != HereDoc.Quote && sc.chNext != '\\') {
659 							// in quoted prefixes only \ and the quote eat the escape
660 							HereDoc.Append(sc.ch);
661 						} else {
662 							// skip escape prefix
663 						}
664 					} else if (!HereDoc.Quoted) {
665 						sc.SetState(SCE_SH_DEFAULT);
666 					}
667 					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {	// force blowup
668 						sc.SetState(SCE_SH_ERROR);
669 						HereDoc.State = 0;
670 					}
671 				}
672 				break;
673 			case SCE_SH_HERE_Q:
674 				// HereDoc.State == 2
675 				if (sc.atLineStart) {
676 					sc.SetState(SCE_SH_HERE_Q);
677 					int prefixws = 0;
678 					while (sc.ch == '\t' && !sc.atLineEnd) {	// tabulation prefix
679 						sc.Forward();
680 						prefixws++;
681 					}
682 					if (prefixws > 0)
683 						sc.SetState(SCE_SH_HERE_Q);
684 					while (!sc.atLineEnd) {
685 						sc.Forward();
686 					}
687 					char s[HERE_DELIM_MAX];
688 					sc.GetCurrent(s, sizeof(s));
689 					if (sc.LengthCurrent() == 0) {  // '' or "" delimiters
690 						if ((prefixws == 0 || HereDoc.Indent) &&
691 							HereDoc.Quoted && HereDoc.DelimiterLength == 0)
692 							sc.SetState(SCE_SH_DEFAULT);
693 						break;
694 					}
695 					if (s[strlen(s) - 1] == '\r')
696 						s[strlen(s) - 1] = '\0';
697 					if (strcmp(HereDoc.Delimiter, s) == 0) {
698 						if ((prefixws == 0) ||	// indentation rule
699 							(prefixws > 0 && HereDoc.Indent)) {
700 							sc.SetState(SCE_SH_DEFAULT);
701 							break;
702 						}
703 					}
704 				}
705 				break;
706 			case SCE_SH_SCALAR:	// variable names
707 				if (!setParam.Contains(sc.ch)) {
708 					char s[500];
709 					sc.GetCurrent(s, sizeof(s));
710 					int subStyle = classifierScalars.ValueFor(&s[1]); // skip the $
711 					if (subStyle >= 0) {
712 						sc.ChangeState(subStyle);
713 					}
714 					if (sc.LengthCurrent() == 1) {
715 						// Special variable: $(, $_ etc.
716 						sc.ForwardSetState(SCE_SH_DEFAULT);
717 					} else {
718 						sc.SetState(SCE_SH_DEFAULT);
719 					}
720 				}
721 				break;
722 			case SCE_SH_STRING:	// delimited styles, can nest
723 			case SCE_SH_BACKTICKS:
724 				if (sc.ch == '\\' && QuoteStack.Up != '\\') {
725 					if (QuoteStack.Style != BASH_DELIM_LITERAL)
726 						sc.Forward();
727 				} else if (sc.ch == QuoteStack.Down) {
728 					QuoteStack.Count--;
729 					if (QuoteStack.Count == 0) {
730 						if (QuoteStack.Depth > 0) {
731 							QuoteStack.Pop();
732 						} else
733 							sc.ForwardSetState(SCE_SH_DEFAULT);
734 					}
735 				} else if (sc.ch == QuoteStack.Up) {
736 					QuoteStack.Count++;
737 				} else {
738 					if (QuoteStack.Style == BASH_DELIM_STRING ||
739 						QuoteStack.Style == BASH_DELIM_LSTRING
740 					) {	// do nesting for "string", $"locale-string"
741 						if (sc.ch == '`') {
742 							QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
743 						} else if (sc.ch == '$' && sc.chNext == '(') {
744 							sc.Forward();
745 							QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
746 						}
747 					} else if (QuoteStack.Style == BASH_DELIM_COMMAND ||
748 							   QuoteStack.Style == BASH_DELIM_BACKTICK
749 					) {	// do nesting for $(command), `command`
750 						if (sc.ch == '\'') {
751 							QuoteStack.Push(sc.ch, BASH_DELIM_LITERAL);
752 						} else if (sc.ch == '\"') {
753 							QuoteStack.Push(sc.ch, BASH_DELIM_STRING);
754 						} else if (sc.ch == '`') {
755 							QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
756 						} else if (sc.ch == '$') {
757 							if (sc.chNext == '\'') {
758 								sc.Forward();
759 								QuoteStack.Push(sc.ch, BASH_DELIM_CSTRING);
760 							} else if (sc.chNext == '\"') {
761 								sc.Forward();
762 								QuoteStack.Push(sc.ch, BASH_DELIM_LSTRING);
763 							} else if (sc.chNext == '(') {
764 								sc.Forward();
765 								QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
766 							}
767 						}
768 					}
769 				}
770 				break;
771 			case SCE_SH_PARAM: // ${parameter}
772 				if (sc.ch == '\\' && Quote.Up != '\\') {
773 					sc.Forward();
774 				} else if (sc.ch == Quote.Down) {
775 					Quote.Count--;
776 					if (Quote.Count == 0) {
777 						sc.ForwardSetState(SCE_SH_DEFAULT);
778 					}
779 				} else if (sc.ch == Quote.Up) {
780 					Quote.Count++;
781 				}
782 				break;
783 			case SCE_SH_CHARACTER: // singly-quoted strings
784 				if (sc.ch == Quote.Down) {
785 					Quote.Count--;
786 					if (Quote.Count == 0) {
787 						sc.ForwardSetState(SCE_SH_DEFAULT);
788 					}
789 				}
790 				break;
791 		}
792 
793 		// Must check end of HereDoc state 1 before default state is handled
794 		if (HereDoc.State == 1 && sc.atLineEnd) {
795 			// Begin of here-doc (the line after the here-doc delimiter):
796 			// Lexically, the here-doc starts from the next line after the >>, but the
797 			// first line of here-doc seem to follow the style of the last EOL sequence
798 			HereDoc.State = 2;
799 			if (HereDoc.Quoted) {
800 				if (sc.state == SCE_SH_HERE_DELIM) {
801 					// Missing quote at end of string! Syntax error in bash 4.3
802 					// Mark this bit as an error, do not colour any here-doc
803 					sc.ChangeState(SCE_SH_ERROR);
804 					sc.SetState(SCE_SH_DEFAULT);
805 				} else {
806 					// HereDoc.Quote always == '\''
807 					sc.SetState(SCE_SH_HERE_Q);
808 				}
809 			} else if (HereDoc.DelimiterLength == 0) {
810 				// no delimiter, illegal (but '' and "" are legal)
811 				sc.ChangeState(SCE_SH_ERROR);
812 				sc.SetState(SCE_SH_DEFAULT);
813 			} else {
814 				sc.SetState(SCE_SH_HERE_Q);
815 			}
816 		}
817 
818 		// update cmdState about the current command segment
819 		if (stylePrev != SCE_SH_DEFAULT && sc.state == SCE_SH_DEFAULT) {
820 			cmdState = cmdStateNew;
821 		}
822 		// Determine if a new state should be entered.
823 		if (sc.state == SCE_SH_DEFAULT) {
824 			if (sc.ch == '\\') {
825 				// Bash can escape any non-newline as a literal
826 				sc.SetState(SCE_SH_IDENTIFIER);
827 				if (sc.chNext == '\r' || sc.chNext == '\n')
828 					sc.SetState(SCE_SH_OPERATOR);
829 			} else if (IsADigit(sc.ch)) {
830 				sc.SetState(SCE_SH_NUMBER);
831 				numBase = BASH_BASE_DECIMAL;
832 				if (sc.ch == '0') {	// hex,octal
833 					if (sc.chNext == 'x' || sc.chNext == 'X') {
834 						numBase = BASH_BASE_HEX;
835 						sc.Forward();
836 					} else if (IsADigit(sc.chNext)) {
837 #ifdef PEDANTIC_OCTAL
838 						numBase = BASH_BASE_OCTAL;
839 #else
840 						numBase = BASH_BASE_HEX;
841 #endif
842 					}
843 				}
844 			} else if (setWordStart.Contains(sc.ch)) {
845 				sc.SetState(SCE_SH_WORD);
846 			} else if (sc.ch == '#') {
847 				if (stylePrev != SCE_SH_WORD && stylePrev != SCE_SH_IDENTIFIER &&
848 					(sc.currentPos == 0 || setMetaCharacter.Contains(sc.chPrev))) {
849 					sc.SetState(SCE_SH_COMMENTLINE);
850 				} else {
851 					sc.SetState(SCE_SH_WORD);
852 				}
853 				// handle some zsh features within arithmetic expressions only
854 				if (cmdState == BASH_CMD_ARITH) {
855 					if (sc.chPrev == '[') {	// [#8] [##8] output digit setting
856 						sc.SetState(SCE_SH_WORD);
857 						if (sc.chNext == '#') {
858 							sc.Forward();
859 						}
860 					} else if (sc.Match("##^") && IsUpperCase(sc.GetRelative(3))) {	// ##^A
861 						sc.SetState(SCE_SH_IDENTIFIER);
862 						sc.Forward(3);
863 					} else if (sc.chNext == '#' && !IsASpace(sc.GetRelative(2))) {	// ##a
864 						sc.SetState(SCE_SH_IDENTIFIER);
865 						sc.Forward(2);
866 					} else if (setWordStart.Contains(sc.chNext)) {	// #name
867 						sc.SetState(SCE_SH_IDENTIFIER);
868 					}
869 				}
870 			} else if (sc.ch == '\"') {
871 				sc.SetState(SCE_SH_STRING);
872 				QuoteStack.Start(sc.ch, BASH_DELIM_STRING);
873 			} else if (sc.ch == '\'') {
874 				sc.SetState(SCE_SH_CHARACTER);
875 				Quote.Start(sc.ch);
876 			} else if (sc.ch == '`') {
877 				sc.SetState(SCE_SH_BACKTICKS);
878 				QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
879 			} else if (sc.ch == '$') {
880 				if (sc.Match("$((")) {
881 					sc.SetState(SCE_SH_OPERATOR);	// handle '((' later
882 					continue;
883 				}
884 				sc.SetState(SCE_SH_SCALAR);
885 				sc.Forward();
886 				if (sc.ch == '{') {
887 					sc.ChangeState(SCE_SH_PARAM);
888 					Quote.Start(sc.ch);
889 				} else if (sc.ch == '\'') {
890 					sc.ChangeState(SCE_SH_STRING);
891 					QuoteStack.Start(sc.ch, BASH_DELIM_CSTRING);
892 				} else if (sc.ch == '"') {
893 					sc.ChangeState(SCE_SH_STRING);
894 					QuoteStack.Start(sc.ch, BASH_DELIM_LSTRING);
895 				} else if (sc.ch == '(') {
896 					sc.ChangeState(SCE_SH_BACKTICKS);
897 					QuoteStack.Start(sc.ch, BASH_DELIM_COMMAND);
898 				} else if (sc.ch == '`') {	// $` seen in a configure script, valid?
899 					sc.ChangeState(SCE_SH_BACKTICKS);
900 					QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
901 				} else {
902 					continue;	// scalar has no delimiter pair
903 				}
904 			} else if (sc.Match('<', '<')) {
905 				sc.SetState(SCE_SH_HERE_DELIM);
906 				HereDoc.State = 0;
907 				if (sc.GetRelative(2) == '-') {	// <<- indent case
908 					HereDoc.Indent = true;
909 					sc.Forward();
910 				} else {
911 					HereDoc.Indent = false;
912 				}
913 			} else if (sc.ch == '-'	&&	// one-char file test operators
914 					   setSingleCharOp.Contains(sc.chNext) &&
915 					   !setWord.Contains(sc.GetRelative(2)) &&
916 					   IsASpace(sc.chPrev)) {
917 				sc.SetState(SCE_SH_WORD);
918 				sc.Forward();
919 			} else if (setBashOperator.Contains(sc.ch)) {
920 				char s[10];
921 				bool isCmdDelim = false;
922 				sc.SetState(SCE_SH_OPERATOR);
923 				// globs have no whitespace, do not appear in arithmetic expressions
924 				if (cmdState != BASH_CMD_ARITH && sc.ch == '(' && sc.chNext != '(') {
925 					int i = GlobScan(sc);
926 					if (i > 1) {
927 						sc.SetState(SCE_SH_IDENTIFIER);
928 						sc.Forward(i);
929 						continue;
930 					}
931 				}
932 				// handle opening delimiters for test/arithmetic expressions - ((,[[,[
933 				if (cmdState == BASH_CMD_START
934 				 || cmdState == BASH_CMD_BODY) {
935 					if (sc.Match('(', '(')) {
936 						cmdState = BASH_CMD_ARITH;
937 						sc.Forward();
938 					} else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) {
939 						cmdState = BASH_CMD_TEST;
940 						testExprType = 1;
941 						sc.Forward();
942 					} else if (sc.ch == '[' && IsASpace(sc.chNext)) {
943 						cmdState = BASH_CMD_TEST;
944 						testExprType = 2;
945 					}
946 				}
947 				// special state -- for ((x;y;z)) in ... looping
948 				if (cmdState == BASH_CMD_WORD && sc.Match('(', '(')) {
949 					cmdState = BASH_CMD_ARITH;
950 					sc.Forward();
951 					continue;
952 				}
953 				// handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
954 				if (cmdState == BASH_CMD_START
955 				 || cmdState == BASH_CMD_BODY
956 				 || cmdState == BASH_CMD_WORD
957 				 || (cmdState == BASH_CMD_TEST && testExprType == 0)) {
958 					s[0] = static_cast<char>(sc.ch);
959 					if (setBashOperator.Contains(sc.chNext)) {
960 						s[1] = static_cast<char>(sc.chNext);
961 						s[2] = '\0';
962 						isCmdDelim = cmdDelimiter.InList(s);
963 						if (isCmdDelim)
964 							sc.Forward();
965 					}
966 					if (!isCmdDelim) {
967 						s[1] = '\0';
968 						isCmdDelim = cmdDelimiter.InList(s);
969 					}
970 					if (isCmdDelim) {
971 						cmdState = BASH_CMD_DELIM;
972 						continue;
973 					}
974 				}
975 				// handle closing delimiters for test/arithmetic expressions - )),]],]
976 				if (cmdState == BASH_CMD_ARITH && sc.Match(')', ')')) {
977 					cmdState = BASH_CMD_BODY;
978 					sc.Forward();
979 				} else if (cmdState == BASH_CMD_TEST && IsASpace(sc.chPrev)) {
980 					if (sc.Match(']', ']') && testExprType == 1) {
981 						sc.Forward();
982 						cmdState = BASH_CMD_BODY;
983 					} else if (sc.ch == ']' && testExprType == 2) {
984 						cmdState = BASH_CMD_BODY;
985 					}
986 				}
987 			}
988 		}// sc.state
989 	}
990 	sc.Complete();
991 	if (sc.state == SCE_SH_HERE_Q) {
992 		styler.ChangeLexerState(sc.currentPos, styler.Length());
993 	}
994 	sc.Complete();
995 }
996 
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)997 void SCI_METHOD LexerBash::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) {
998 	if(!options.fold)
999 		return;
1000 
1001 	LexAccessor styler(pAccess);
1002 
1003 	Sci_PositionU endPos = startPos + length;
1004 	int visibleChars = 0;
1005 	int skipHereCh = 0;
1006 	Sci_Position lineCurrent = styler.GetLine(startPos);
1007 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1008 	int levelCurrent = levelPrev;
1009 	char chNext = styler[startPos];
1010 	int styleNext = styler.StyleAt(startPos);
1011 	char word[8] = { '\0' }; // we're not interested in long words anyway
1012 	unsigned int wordlen = 0;
1013 	for (Sci_PositionU i = startPos; i < endPos; i++) {
1014 		char ch = chNext;
1015 		chNext = styler.SafeGetCharAt(i + 1);
1016 		int style = styleNext;
1017 		styleNext = styler.StyleAt(i + 1);
1018 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1019 		// Comment folding
1020 		if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler))
1021 		{
1022 			if (!IsCommentLine(lineCurrent - 1, styler)
1023 				&& IsCommentLine(lineCurrent + 1, styler))
1024 				levelCurrent++;
1025 			else if (IsCommentLine(lineCurrent - 1, styler)
1026 					 && !IsCommentLine(lineCurrent + 1, styler))
1027 				levelCurrent--;
1028 		}
1029 		if (style == SCE_SH_WORD) {
1030 			if ((wordlen + 1) < sizeof(word))
1031 				word[wordlen++] = ch;
1032 			if (styleNext != style) {
1033 				word[wordlen] = '\0';
1034 				wordlen = 0;
1035 				if (strcmp(word, "if") == 0 || strcmp(word, "case") == 0 || strcmp(word, "do") == 0) {
1036 					levelCurrent++;
1037 				} else if (strcmp(word, "fi") == 0 || strcmp(word, "esac") == 0 || strcmp(word, "done") == 0) {
1038 					levelCurrent--;
1039 				}
1040 			}
1041 		}
1042 		if (style == SCE_SH_OPERATOR) {
1043 			if (ch == '{') {
1044 				levelCurrent++;
1045 			} else if (ch == '}') {
1046 				levelCurrent--;
1047 			}
1048 		}
1049 		// Here Document folding
1050 		if (style == SCE_SH_HERE_DELIM) {
1051 			if (ch == '<' && chNext == '<') {
1052 				if (styler.SafeGetCharAt(i + 2) == '<') {
1053 					skipHereCh = 1;
1054 				} else {
1055 					if (skipHereCh == 0) {
1056 						levelCurrent++;
1057 					} else {
1058 						skipHereCh = 0;
1059 					}
1060 				}
1061 			}
1062 		} else if (style == SCE_SH_HERE_Q && styler.StyleAt(i+1) == SCE_SH_DEFAULT) {
1063 			levelCurrent--;
1064 		}
1065 		if (atEOL) {
1066 			int lev = levelPrev;
1067 			if (visibleChars == 0 && options.foldCompact)
1068 				lev |= SC_FOLDLEVELWHITEFLAG;
1069 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
1070 				lev |= SC_FOLDLEVELHEADERFLAG;
1071 			if (lev != styler.LevelAt(lineCurrent)) {
1072 				styler.SetLevel(lineCurrent, lev);
1073 			}
1074 			lineCurrent++;
1075 			levelPrev = levelCurrent;
1076 			visibleChars = 0;
1077 		}
1078 		if (!isspacechar(ch))
1079 			visibleChars++;
1080 	}
1081 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1082 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1083 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1084 }
1085 
1086 LexerModule lmBash(SCLEX_BASH, LexerBash::LexerFactoryBash, "bash", bashWordListDesc);
1087