1 // Scintilla source code edit control
2 /** @file LexBash.cxx
3  ** Lexer for Bash.
4  **/
5 // Copyright 2004-2012 by Neil Hodgson <neilh@scintilla.org>
6 // Adapted from LexPerl by Kein-Hong Man 2004
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
18 
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
25 
26 #ifdef SCI_NAMESPACE
27 using namespace Scintilla;
28 #endif
29 
30 #define HERE_DELIM_MAX			256
31 
32 // define this if you want 'invalid octals' to be marked as errors
33 // usually, this is not a good idea, permissive lexing is better
34 #undef PEDANTIC_OCTAL
35 
36 #define BASH_BASE_ERROR			65
37 #define BASH_BASE_DECIMAL		66
38 #define BASH_BASE_HEX			67
39 #ifdef PEDANTIC_OCTAL
40 #define BASH_BASE_OCTAL			68
41 #define	BASH_BASE_OCTAL_ERROR	69
42 #endif
43 
44 // state constants for parts of a bash command segment
45 #define	BASH_CMD_BODY			0
46 #define BASH_CMD_START			1
47 #define BASH_CMD_WORD			2
48 #define BASH_CMD_TEST			3
49 #define BASH_CMD_ARITH			4
50 #define BASH_CMD_DELIM			5
51 
52 // state constants for nested delimiter pairs, used by
53 // SCE_SH_STRING and SCE_SH_BACKTICKS processing
54 #define BASH_DELIM_LITERAL		0
55 #define BASH_DELIM_STRING		1
56 #define BASH_DELIM_CSTRING		2
57 #define BASH_DELIM_LSTRING		3
58 #define BASH_DELIM_COMMAND		4
59 #define BASH_DELIM_BACKTICK		5
60 
61 #define BASH_DELIM_STACK_MAX	7
62 
translateBashDigit(int ch)63 static inline int translateBashDigit(int ch) {
64 	if (ch >= '0' && ch <= '9') {
65 		return ch - '0';
66 	} else if (ch >= 'a' && ch <= 'z') {
67 		return ch - 'a' + 10;
68 	} else if (ch >= 'A' && ch <= 'Z') {
69 		return ch - 'A' + 36;
70 	} else if (ch == '@') {
71 		return 62;
72 	} else if (ch == '_') {
73 		return 63;
74 	}
75 	return BASH_BASE_ERROR;
76 }
77 
getBashNumberBase(char * s)78 static inline int getBashNumberBase(char *s) {
79 	int i = 0;
80 	int base = 0;
81 	while (*s) {
82 		base = base * 10 + (*s++ - '0');
83 		i++;
84 	}
85 	if (base > 64 || i > 2) {
86 		return BASH_BASE_ERROR;
87 	}
88 	return base;
89 }
90 
opposite(int ch)91 static int opposite(int ch) {
92 	if (ch == '(') return ')';
93 	if (ch == '[') return ']';
94 	if (ch == '{') return '}';
95 	if (ch == '<') return '>';
96 	return ch;
97 }
98 
GlobScan(StyleContext & sc)99 static int GlobScan(StyleContext &sc) {
100 	// forward scan for a glob-like (...), no whitespace allowed
101 	int c, sLen = 0;
102 	while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
103 		if (IsASpace(c)) {
104 			return 0;
105 		} else if (c == ')') {
106 			return sLen;
107 		}
108 	}
109 	return 0;
110 }
111 
ColouriseBashDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)112 static void ColouriseBashDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
113 							 WordList *keywordlists[], Accessor &styler) {
114 
115 	WordList &keywords = *keywordlists[0];
116 	WordList cmdDelimiter, bashStruct, bashStruct_in;
117 	cmdDelimiter.Set("| || |& & && ; ;; ( ) { }");
118 	bashStruct.Set("if elif fi while until else then do done esac eval");
119 	bashStruct_in.Set("for case select");
120 
121 	CharacterSet setWordStart(CharacterSet::setAlpha, "_");
122 	// note that [+-] are often parts of identifiers in shell scripts
123 	CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
124 	CharacterSet setMetaCharacter(CharacterSet::setNone, "|&;()<> \t\r\n");
125 	setMetaCharacter.Add(0);
126 	CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@");
127 	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
128 	CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
129 	CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!%*,./:?@[]^`{}~");
130 	CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!%*,./:=?@[]^`{}~");
131 	CharacterSet setLeftShift(CharacterSet::setDigits, "$");
132 
133 	class HereDocCls {	// Class to manage HERE document elements
134 	public:
135 		int State;		// 0: '<<' encountered
136 		// 1: collect the delimiter
137 		// 2: here doc text (lines after the delimiter)
138 		int Quote;		// the char after '<<'
139 		bool Quoted;		// true if Quote in ('\'','"','`')
140 		bool Indent;		// indented delimiter (for <<-)
141 		int DelimiterLength;	// strlen(Delimiter)
142 		char Delimiter[HERE_DELIM_MAX];	// the Delimiter
143 		HereDocCls() {
144 			State = 0;
145 			Quote = 0;
146 			Quoted = false;
147 			Indent = 0;
148 			DelimiterLength = 0;
149 			Delimiter[0] = '\0';
150 		}
151 		void Append(int ch) {
152 			Delimiter[DelimiterLength++] = static_cast<char>(ch);
153 			Delimiter[DelimiterLength] = '\0';
154 		}
155 		~HereDocCls() {
156 		}
157 	};
158 	HereDocCls HereDoc;
159 
160 	class QuoteCls {	// Class to manage quote pairs (simplified vs LexPerl)
161 		public:
162 		int Count;
163 		int Up, Down;
164 		QuoteCls() {
165 			Count = 0;
166 			Up    = '\0';
167 			Down  = '\0';
168 		}
169 		void Open(int u) {
170 			Count++;
171 			Up    = u;
172 			Down  = opposite(Up);
173 		}
174 		void Start(int u) {
175 			Count = 0;
176 			Open(u);
177 		}
178 	};
179 	QuoteCls Quote;
180 
181 	class QuoteStackCls {	// Class to manage quote pairs that nest
182 		public:
183 		int Count;
184 		int Up, Down;
185 		int Style;
186 		int Depth;			// levels pushed
187 		int CountStack[BASH_DELIM_STACK_MAX];
188 		int UpStack   [BASH_DELIM_STACK_MAX];
189 		int StyleStack[BASH_DELIM_STACK_MAX];
190 		QuoteStackCls() {
191 			Count = 0;
192 			Up    = '\0';
193 			Down  = '\0';
194 			Style = 0;
195 			Depth = 0;
196 		}
197 		void Start(int u, int s) {
198 			Count = 1;
199 			Up    = u;
200 			Down  = opposite(Up);
201 			Style = s;
202 		}
203 		void Push(int u, int s) {
204 			if (Depth >= BASH_DELIM_STACK_MAX)
205 				return;
206 			CountStack[Depth] = Count;
207 			UpStack   [Depth] = Up;
208 			StyleStack[Depth] = Style;
209 			Depth++;
210 			Count = 1;
211 			Up    = u;
212 			Down  = opposite(Up);
213 			Style = s;
214 		}
215 		void Pop(void) {
216 			if (Depth <= 0)
217 				return;
218 			Depth--;
219 			Count = CountStack[Depth];
220 			Up    = UpStack   [Depth];
221 			Style = StyleStack[Depth];
222 			Down  = opposite(Up);
223 		}
224 		~QuoteStackCls() {
225 		}
226 	};
227 	QuoteStackCls QuoteStack;
228 
229 	int numBase = 0;
230 	int digit;
231 	Sci_PositionU endPos = startPos + length;
232 	int cmdState = BASH_CMD_START;
233 	int testExprType = 0;
234 
235 	// Always backtracks to the start of a line that is not a continuation
236 	// of the previous line (i.e. start of a bash command segment)
237 	Sci_Position ln = styler.GetLine(startPos);
238 	if (ln > 0 && startPos == static_cast<Sci_PositionU>(styler.LineStart(ln)))
239 		ln--;
240 	for (;;) {
241 		startPos = styler.LineStart(ln);
242 		if (ln == 0 || styler.GetLineState(ln) == BASH_CMD_START)
243 			break;
244 		ln--;
245 	}
246 	initStyle = SCE_SH_DEFAULT;
247 
248 	StyleContext sc(startPos, endPos - startPos, initStyle, styler);
249 
250 	for (; sc.More(); sc.Forward()) {
251 
252 		// handle line continuation, updates per-line stored state
253 		if (sc.atLineStart) {
254 			ln = styler.GetLine(sc.currentPos);
255 			if (sc.state == SCE_SH_STRING
256 			 || sc.state == SCE_SH_BACKTICKS
257 			 || sc.state == SCE_SH_CHARACTER
258 			 || sc.state == SCE_SH_HERE_Q
259 			 || sc.state == SCE_SH_COMMENTLINE
260 			 || sc.state == SCE_SH_PARAM) {
261 				// force backtrack while retaining cmdState
262 				styler.SetLineState(ln, BASH_CMD_BODY);
263 			} else {
264 				if (ln > 0) {
265 					if ((sc.GetRelative(-3) == '\\' && sc.GetRelative(-2) == '\r' && sc.chPrev == '\n')
266 					 || sc.GetRelative(-2) == '\\') {	// handle '\' line continuation
267 						// retain last line's state
268 					} else
269 						cmdState = BASH_CMD_START;
270 				}
271 				styler.SetLineState(ln, cmdState);
272 			}
273 		}
274 
275 		// controls change of cmdState at the end of a non-whitespace element
276 		// states BODY|TEST|ARITH persist until the end of a command segment
277 		// state WORD persist, but ends with 'in' or 'do' construct keywords
278 		int cmdStateNew = BASH_CMD_BODY;
279 		if (cmdState == BASH_CMD_TEST || cmdState == BASH_CMD_ARITH || cmdState == BASH_CMD_WORD)
280 			cmdStateNew = cmdState;
281 		int stylePrev = sc.state;
282 
283 		// Determine if the current state should terminate.
284 		switch (sc.state) {
285 			case SCE_SH_OPERATOR:
286 				sc.SetState(SCE_SH_DEFAULT);
287 				if (cmdState == BASH_CMD_DELIM)		// if command delimiter, start new command
288 					cmdStateNew = BASH_CMD_START;
289 				else if (sc.chPrev == '\\')			// propagate command state if line continued
290 					cmdStateNew = cmdState;
291 				break;
292 			case SCE_SH_WORD:
293 				// "." never used in Bash variable names but used in file names
294 				if (!setWord.Contains(sc.ch)) {
295 					char s[500];
296 					char s2[10];
297 					sc.GetCurrent(s, sizeof(s));
298 					// allow keywords ending in a whitespace or command delimiter
299 					s2[0] = static_cast<char>(sc.ch);
300 					s2[1] = '\0';
301 					bool keywordEnds = IsASpace(sc.ch) || cmdDelimiter.InList(s2);
302 					// 'in' or 'do' may be construct keywords
303 					if (cmdState == BASH_CMD_WORD) {
304 						if (strcmp(s, "in") == 0 && keywordEnds)
305 							cmdStateNew = BASH_CMD_BODY;
306 						else if (strcmp(s, "do") == 0 && keywordEnds)
307 							cmdStateNew = BASH_CMD_START;
308 						else
309 							sc.ChangeState(SCE_SH_IDENTIFIER);
310 						sc.SetState(SCE_SH_DEFAULT);
311 						break;
312 					}
313 					// a 'test' keyword starts a test expression
314 					if (strcmp(s, "test") == 0) {
315 						if (cmdState == BASH_CMD_START && keywordEnds) {
316 							cmdStateNew = BASH_CMD_TEST;
317 							testExprType = 0;
318 						} else
319 							sc.ChangeState(SCE_SH_IDENTIFIER);
320 					}
321 					// detect bash construct keywords
322 					else if (bashStruct.InList(s)) {
323 						if (cmdState == BASH_CMD_START && keywordEnds)
324 							cmdStateNew = BASH_CMD_START;
325 						else
326 							sc.ChangeState(SCE_SH_IDENTIFIER);
327 					}
328 					// 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
329 					else if (bashStruct_in.InList(s)) {
330 						if (cmdState == BASH_CMD_START && keywordEnds)
331 							cmdStateNew = BASH_CMD_WORD;
332 						else
333 							sc.ChangeState(SCE_SH_IDENTIFIER);
334 					}
335 					// disambiguate option items and file test operators
336 					else if (s[0] == '-') {
337 						if (cmdState != BASH_CMD_TEST)
338 							sc.ChangeState(SCE_SH_IDENTIFIER);
339 					}
340 					// disambiguate keywords and identifiers
341 					else if (cmdState != BASH_CMD_START
342 						  || !(keywords.InList(s) && keywordEnds)) {
343 						sc.ChangeState(SCE_SH_IDENTIFIER);
344 					}
345 					sc.SetState(SCE_SH_DEFAULT);
346 				}
347 				break;
348 			case SCE_SH_IDENTIFIER:
349 				if (sc.chPrev == '\\') {	// for escaped chars
350 					sc.ForwardSetState(SCE_SH_DEFAULT);
351 				} else if (!setWord.Contains(sc.ch)) {
352 					sc.SetState(SCE_SH_DEFAULT);
353 				} else if (cmdState == BASH_CMD_ARITH && !setWordStart.Contains(sc.ch)) {
354 					sc.SetState(SCE_SH_DEFAULT);
355 				}
356 				break;
357 			case SCE_SH_NUMBER:
358 				digit = translateBashDigit(sc.ch);
359 				if (numBase == BASH_BASE_DECIMAL) {
360 					if (sc.ch == '#') {
361 						char s[10];
362 						sc.GetCurrent(s, sizeof(s));
363 						numBase = getBashNumberBase(s);
364 						if (numBase != BASH_BASE_ERROR)
365 							break;
366 					} else if (IsADigit(sc.ch))
367 						break;
368 				} else if (numBase == BASH_BASE_HEX) {
369 					if (IsADigit(sc.ch, 16))
370 						break;
371 #ifdef PEDANTIC_OCTAL
372 				} else if (numBase == BASH_BASE_OCTAL ||
373 						   numBase == BASH_BASE_OCTAL_ERROR) {
374 					if (digit <= 7)
375 						break;
376 					if (digit <= 9) {
377 						numBase = BASH_BASE_OCTAL_ERROR;
378 						break;
379 					}
380 #endif
381 				} else if (numBase == BASH_BASE_ERROR) {
382 					if (digit <= 9)
383 						break;
384 				} else {	// DD#DDDD number style handling
385 					if (digit != BASH_BASE_ERROR) {
386 						if (numBase <= 36) {
387 							// case-insensitive if base<=36
388 							if (digit >= 36) digit -= 26;
389 						}
390 						if (digit < numBase)
391 							break;
392 						if (digit <= 9) {
393 							numBase = BASH_BASE_ERROR;
394 							break;
395 						}
396 					}
397 				}
398 				// fallthrough when number is at an end or error
399 				if (numBase == BASH_BASE_ERROR
400 #ifdef PEDANTIC_OCTAL
401 					|| numBase == BASH_BASE_OCTAL_ERROR
402 #endif
403 				) {
404 					sc.ChangeState(SCE_SH_ERROR);
405 				}
406 				sc.SetState(SCE_SH_DEFAULT);
407 				break;
408 			case SCE_SH_COMMENTLINE:
409 				if (sc.atLineEnd && sc.chPrev != '\\') {
410 					sc.SetState(SCE_SH_DEFAULT);
411 				}
412 				break;
413 			case SCE_SH_HERE_DELIM:
414 				// From Bash info:
415 				// ---------------
416 				// Specifier format is: <<[-]WORD
417 				// Optional '-' is for removal of leading tabs from here-doc.
418 				// Whitespace acceptable after <<[-] operator
419 				//
420 				if (HereDoc.State == 0) { // '<<' encountered
421 					HereDoc.Quote = sc.chNext;
422 					HereDoc.Quoted = false;
423 					HereDoc.DelimiterLength = 0;
424 					HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
425 					if (sc.chNext == '\'' || sc.chNext == '\"') {	// a quoted here-doc delimiter (' or ")
426 						sc.Forward();
427 						HereDoc.Quoted = true;
428 						HereDoc.State = 1;
429 					} else if (setHereDoc.Contains(sc.chNext) ||
430 					           (sc.chNext == '=' && cmdState != BASH_CMD_ARITH)) {
431 						// an unquoted here-doc delimiter, no special handling
432 						HereDoc.State = 1;
433 					} else if (sc.chNext == '<') {	// HERE string <<<
434 						sc.Forward();
435 						sc.ForwardSetState(SCE_SH_DEFAULT);
436 					} else if (IsASpace(sc.chNext)) {
437 						// eat whitespace
438 					} else if (setLeftShift.Contains(sc.chNext) ||
439 					           (sc.chNext == '=' && cmdState == BASH_CMD_ARITH)) {
440 						// left shift <<$var or <<= cases
441 						sc.ChangeState(SCE_SH_OPERATOR);
442 						sc.ForwardSetState(SCE_SH_DEFAULT);
443 					} else {
444 						// symbols terminates; deprecated zero-length delimiter
445 						HereDoc.State = 1;
446 					}
447 				} else if (HereDoc.State == 1) { // collect the delimiter
448 					// * if single quoted, there's no escape
449 					// * if double quoted, there are \\ and \" escapes
450 					if ((HereDoc.Quote == '\'' && sc.ch != HereDoc.Quote) ||
451 					    (HereDoc.Quoted && sc.ch != HereDoc.Quote && sc.ch != '\\') ||
452 					    (HereDoc.Quote != '\'' && sc.chPrev == '\\') ||
453 					    (setHereDoc2.Contains(sc.ch))) {
454 						HereDoc.Append(sc.ch);
455 					} else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) {	// closing quote => end of delimiter
456 						sc.ForwardSetState(SCE_SH_DEFAULT);
457 					} else if (sc.ch == '\\') {
458 						if (HereDoc.Quoted && sc.chNext != HereDoc.Quote && sc.chNext != '\\') {
459 							// in quoted prefixes only \ and the quote eat the escape
460 							HereDoc.Append(sc.ch);
461 						} else {
462 							// skip escape prefix
463 						}
464 					} else if (!HereDoc.Quoted) {
465 						sc.SetState(SCE_SH_DEFAULT);
466 					}
467 					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {	// force blowup
468 						sc.SetState(SCE_SH_ERROR);
469 						HereDoc.State = 0;
470 					}
471 				}
472 				break;
473 			case SCE_SH_HERE_Q:
474 				// HereDoc.State == 2
475 				if (sc.atLineStart) {
476 					sc.SetState(SCE_SH_HERE_Q);
477 					int prefixws = 0;
478 					while (sc.ch == '\t' && !sc.atLineEnd) {	// tabulation prefix
479 						sc.Forward();
480 						prefixws++;
481 					}
482 					if (prefixws > 0)
483 						sc.SetState(SCE_SH_HERE_Q);
484 					while (!sc.atLineEnd) {
485 						sc.Forward();
486 					}
487 					char s[HERE_DELIM_MAX];
488 					sc.GetCurrent(s, sizeof(s));
489 					if (sc.LengthCurrent() == 0) {  // '' or "" delimiters
490 						if ((prefixws == 0 || HereDoc.Indent) &&
491 							HereDoc.Quoted && HereDoc.DelimiterLength == 0)
492 							sc.SetState(SCE_SH_DEFAULT);
493 						break;
494 					}
495 					if (s[strlen(s) - 1] == '\r')
496 						s[strlen(s) - 1] = '\0';
497 					if (strcmp(HereDoc.Delimiter, s) == 0) {
498 						if ((prefixws == 0) ||	// indentation rule
499 							(prefixws > 0 && HereDoc.Indent)) {
500 							sc.SetState(SCE_SH_DEFAULT);
501 							break;
502 						}
503 					}
504 				}
505 				break;
506 			case SCE_SH_SCALAR:	// variable names
507 				if (!setParam.Contains(sc.ch)) {
508 					if (sc.LengthCurrent() == 1) {
509 						// Special variable: $(, $_ etc.
510 						sc.ForwardSetState(SCE_SH_DEFAULT);
511 					} else {
512 						sc.SetState(SCE_SH_DEFAULT);
513 					}
514 				}
515 				break;
516 			case SCE_SH_STRING:	// delimited styles, can nest
517 			case SCE_SH_BACKTICKS:
518 				if (sc.ch == '\\' && QuoteStack.Up != '\\') {
519 					if (QuoteStack.Style != BASH_DELIM_LITERAL)
520 						sc.Forward();
521 				} else if (sc.ch == QuoteStack.Down) {
522 					QuoteStack.Count--;
523 					if (QuoteStack.Count == 0) {
524 						if (QuoteStack.Depth > 0) {
525 							QuoteStack.Pop();
526 						} else
527 							sc.ForwardSetState(SCE_SH_DEFAULT);
528 					}
529 				} else if (sc.ch == QuoteStack.Up) {
530 					QuoteStack.Count++;
531 				} else {
532 					if (QuoteStack.Style == BASH_DELIM_STRING ||
533 						QuoteStack.Style == BASH_DELIM_LSTRING
534 					) {	// do nesting for "string", $"locale-string"
535 						if (sc.ch == '`') {
536 							QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
537 						} else if (sc.ch == '$' && sc.chNext == '(') {
538 							sc.Forward();
539 							QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
540 						}
541 					} else if (QuoteStack.Style == BASH_DELIM_COMMAND ||
542 							   QuoteStack.Style == BASH_DELIM_BACKTICK
543 					) {	// do nesting for $(command), `command`
544 						if (sc.ch == '\'') {
545 							QuoteStack.Push(sc.ch, BASH_DELIM_LITERAL);
546 						} else if (sc.ch == '\"') {
547 							QuoteStack.Push(sc.ch, BASH_DELIM_STRING);
548 						} else if (sc.ch == '`') {
549 							QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
550 						} else if (sc.ch == '$') {
551 							if (sc.chNext == '\'') {
552 								sc.Forward();
553 								QuoteStack.Push(sc.ch, BASH_DELIM_CSTRING);
554 							} else if (sc.chNext == '\"') {
555 								sc.Forward();
556 								QuoteStack.Push(sc.ch, BASH_DELIM_LSTRING);
557 							} else if (sc.chNext == '(') {
558 								sc.Forward();
559 								QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
560 							}
561 						}
562 					}
563 				}
564 				break;
565 			case SCE_SH_PARAM: // ${parameter}
566 				if (sc.ch == '\\' && Quote.Up != '\\') {
567 					sc.Forward();
568 				} else if (sc.ch == Quote.Down) {
569 					Quote.Count--;
570 					if (Quote.Count == 0) {
571 						sc.ForwardSetState(SCE_SH_DEFAULT);
572 					}
573 				} else if (sc.ch == Quote.Up) {
574 					Quote.Count++;
575 				}
576 				break;
577 			case SCE_SH_CHARACTER: // singly-quoted strings
578 				if (sc.ch == Quote.Down) {
579 					Quote.Count--;
580 					if (Quote.Count == 0) {
581 						sc.ForwardSetState(SCE_SH_DEFAULT);
582 					}
583 				}
584 				break;
585 		}
586 
587 		// Must check end of HereDoc state 1 before default state is handled
588 		if (HereDoc.State == 1 && sc.atLineEnd) {
589 			// Begin of here-doc (the line after the here-doc delimiter):
590 			// Lexically, the here-doc starts from the next line after the >>, but the
591 			// first line of here-doc seem to follow the style of the last EOL sequence
592 			HereDoc.State = 2;
593 			if (HereDoc.Quoted) {
594 				if (sc.state == SCE_SH_HERE_DELIM) {
595 					// Missing quote at end of string! Syntax error in bash 4.3
596 					// Mark this bit as an error, do not colour any here-doc
597 					sc.ChangeState(SCE_SH_ERROR);
598 					sc.SetState(SCE_SH_DEFAULT);
599 				} else {
600 					// HereDoc.Quote always == '\''
601 					sc.SetState(SCE_SH_HERE_Q);
602 				}
603 			} else if (HereDoc.DelimiterLength == 0) {
604 				// no delimiter, illegal (but '' and "" are legal)
605 				sc.ChangeState(SCE_SH_ERROR);
606 				sc.SetState(SCE_SH_DEFAULT);
607 			} else {
608 				sc.SetState(SCE_SH_HERE_Q);
609 			}
610 		}
611 
612 		// update cmdState about the current command segment
613 		if (stylePrev != SCE_SH_DEFAULT && sc.state == SCE_SH_DEFAULT) {
614 			cmdState = cmdStateNew;
615 		}
616 		// Determine if a new state should be entered.
617 		if (sc.state == SCE_SH_DEFAULT) {
618 			if (sc.ch == '\\') {
619 				// Bash can escape any non-newline as a literal
620 				sc.SetState(SCE_SH_IDENTIFIER);
621 				if (sc.chNext == '\r' || sc.chNext == '\n')
622 					sc.SetState(SCE_SH_OPERATOR);
623 			} else if (IsADigit(sc.ch)) {
624 				sc.SetState(SCE_SH_NUMBER);
625 				numBase = BASH_BASE_DECIMAL;
626 				if (sc.ch == '0') {	// hex,octal
627 					if (sc.chNext == 'x' || sc.chNext == 'X') {
628 						numBase = BASH_BASE_HEX;
629 						sc.Forward();
630 					} else if (IsADigit(sc.chNext)) {
631 #ifdef PEDANTIC_OCTAL
632 						numBase = BASH_BASE_OCTAL;
633 #else
634 						numBase = BASH_BASE_HEX;
635 #endif
636 					}
637 				}
638 			} else if (setWordStart.Contains(sc.ch)) {
639 				sc.SetState(SCE_SH_WORD);
640 			} else if (sc.ch == '#') {
641 				if (stylePrev != SCE_SH_WORD && stylePrev != SCE_SH_IDENTIFIER &&
642 					(sc.currentPos == 0 || setMetaCharacter.Contains(sc.chPrev))) {
643 					sc.SetState(SCE_SH_COMMENTLINE);
644 				} else {
645 					sc.SetState(SCE_SH_WORD);
646 				}
647 				// handle some zsh features within arithmetic expressions only
648 				if (cmdState == BASH_CMD_ARITH) {
649 					if (sc.chPrev == '[') {	// [#8] [##8] output digit setting
650 						sc.SetState(SCE_SH_WORD);
651 						if (sc.chNext == '#') {
652 							sc.Forward();
653 						}
654 					} else if (sc.Match("##^") && IsUpperCase(sc.GetRelative(3))) {	// ##^A
655 						sc.SetState(SCE_SH_IDENTIFIER);
656 						sc.Forward(3);
657 					} else if (sc.chNext == '#' && !IsASpace(sc.GetRelative(2))) {	// ##a
658 						sc.SetState(SCE_SH_IDENTIFIER);
659 						sc.Forward(2);
660 					} else if (setWordStart.Contains(sc.chNext)) {	// #name
661 						sc.SetState(SCE_SH_IDENTIFIER);
662 					}
663 				}
664 			} else if (sc.ch == '\"') {
665 				sc.SetState(SCE_SH_STRING);
666 				QuoteStack.Start(sc.ch, BASH_DELIM_STRING);
667 			} else if (sc.ch == '\'') {
668 				sc.SetState(SCE_SH_CHARACTER);
669 				Quote.Start(sc.ch);
670 			} else if (sc.ch == '`') {
671 				sc.SetState(SCE_SH_BACKTICKS);
672 				QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
673 			} else if (sc.ch == '$') {
674 				if (sc.Match("$((")) {
675 					sc.SetState(SCE_SH_OPERATOR);	// handle '((' later
676 					continue;
677 				}
678 				sc.SetState(SCE_SH_SCALAR);
679 				sc.Forward();
680 				if (sc.ch == '{') {
681 					sc.ChangeState(SCE_SH_PARAM);
682 					Quote.Start(sc.ch);
683 				} else if (sc.ch == '\'') {
684 					sc.ChangeState(SCE_SH_STRING);
685 					QuoteStack.Start(sc.ch, BASH_DELIM_CSTRING);
686 				} else if (sc.ch == '"') {
687 					sc.ChangeState(SCE_SH_STRING);
688 					QuoteStack.Start(sc.ch, BASH_DELIM_LSTRING);
689 				} else if (sc.ch == '(') {
690 					sc.ChangeState(SCE_SH_BACKTICKS);
691 					QuoteStack.Start(sc.ch, BASH_DELIM_COMMAND);
692 				} else if (sc.ch == '`') {	// $` seen in a configure script, valid?
693 					sc.ChangeState(SCE_SH_BACKTICKS);
694 					QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
695 				} else {
696 					continue;	// scalar has no delimiter pair
697 				}
698 			} else if (sc.Match('<', '<')) {
699 				sc.SetState(SCE_SH_HERE_DELIM);
700 				HereDoc.State = 0;
701 				if (sc.GetRelative(2) == '-') {	// <<- indent case
702 					HereDoc.Indent = true;
703 					sc.Forward();
704 				} else {
705 					HereDoc.Indent = false;
706 				}
707 			} else if (sc.ch == '-'	&&	// one-char file test operators
708 					   setSingleCharOp.Contains(sc.chNext) &&
709 					   !setWord.Contains(sc.GetRelative(2)) &&
710 					   IsASpace(sc.chPrev)) {
711 				sc.SetState(SCE_SH_WORD);
712 				sc.Forward();
713 			} else if (setBashOperator.Contains(sc.ch)) {
714 				char s[10];
715 				bool isCmdDelim = false;
716 				sc.SetState(SCE_SH_OPERATOR);
717 				// globs have no whitespace, do not appear in arithmetic expressions
718 				if (cmdState != BASH_CMD_ARITH && sc.ch == '(' && sc.chNext != '(') {
719 					int i = GlobScan(sc);
720 					if (i > 1) {
721 						sc.SetState(SCE_SH_IDENTIFIER);
722 						sc.Forward(i);
723 						continue;
724 					}
725 				}
726 				// handle opening delimiters for test/arithmetic expressions - ((,[[,[
727 				if (cmdState == BASH_CMD_START
728 				 || cmdState == BASH_CMD_BODY) {
729 					if (sc.Match('(', '(')) {
730 						cmdState = BASH_CMD_ARITH;
731 						sc.Forward();
732 					} else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) {
733 						cmdState = BASH_CMD_TEST;
734 						testExprType = 1;
735 						sc.Forward();
736 					} else if (sc.ch == '[' && IsASpace(sc.chNext)) {
737 						cmdState = BASH_CMD_TEST;
738 						testExprType = 2;
739 					}
740 				}
741 				// special state -- for ((x;y;z)) in ... looping
742 				if (cmdState == BASH_CMD_WORD && sc.Match('(', '(')) {
743 					cmdState = BASH_CMD_ARITH;
744 					sc.Forward();
745 					continue;
746 				}
747 				// handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
748 				if (cmdState == BASH_CMD_START
749 				 || cmdState == BASH_CMD_BODY
750 				 || cmdState == BASH_CMD_WORD
751 				 || (cmdState == BASH_CMD_TEST && testExprType == 0)) {
752 					s[0] = static_cast<char>(sc.ch);
753 					if (setBashOperator.Contains(sc.chNext)) {
754 						s[1] = static_cast<char>(sc.chNext);
755 						s[2] = '\0';
756 						isCmdDelim = cmdDelimiter.InList(s);
757 						if (isCmdDelim)
758 							sc.Forward();
759 					}
760 					if (!isCmdDelim) {
761 						s[1] = '\0';
762 						isCmdDelim = cmdDelimiter.InList(s);
763 					}
764 					if (isCmdDelim) {
765 						cmdState = BASH_CMD_DELIM;
766 						continue;
767 					}
768 				}
769 				// handle closing delimiters for test/arithmetic expressions - )),]],]
770 				if (cmdState == BASH_CMD_ARITH && sc.Match(')', ')')) {
771 					cmdState = BASH_CMD_BODY;
772 					sc.Forward();
773 				} else if (cmdState == BASH_CMD_TEST && IsASpace(sc.chPrev)) {
774 					if (sc.Match(']', ']') && testExprType == 1) {
775 						sc.Forward();
776 						cmdState = BASH_CMD_BODY;
777 					} else if (sc.ch == ']' && testExprType == 2) {
778 						cmdState = BASH_CMD_BODY;
779 					}
780 				}
781 			}
782 		}// sc.state
783 	}
784 	sc.Complete();
785 	if (sc.state == SCE_SH_HERE_Q) {
786 		styler.ChangeLexerState(sc.currentPos, styler.Length());
787 	}
788 	sc.Complete();
789 }
790 
IsCommentLine(Sci_Position line,Accessor & styler)791 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
792 	Sci_Position pos = styler.LineStart(line);
793 	Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
794 	for (Sci_Position i = pos; i < eol_pos; i++) {
795 		char ch = styler[i];
796 		if (ch == '#')
797 			return true;
798 		else if (ch != ' ' && ch != '\t')
799 			return false;
800 	}
801 	return false;
802 }
803 
FoldBashDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)804 static void FoldBashDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
805 						Accessor &styler) {
806 	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
807 	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
808 	Sci_PositionU endPos = startPos + length;
809 	int visibleChars = 0;
810 	int skipHereCh = 0;
811 	Sci_Position lineCurrent = styler.GetLine(startPos);
812 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
813 	int levelCurrent = levelPrev;
814 	char chNext = styler[startPos];
815 	int styleNext = styler.StyleAt(startPos);
816 	for (Sci_PositionU i = startPos; i < endPos; i++) {
817 		char ch = chNext;
818 		chNext = styler.SafeGetCharAt(i + 1);
819 		int style = styleNext;
820 		styleNext = styler.StyleAt(i + 1);
821 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
822 		// Comment folding
823 		if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
824 		{
825 			if (!IsCommentLine(lineCurrent - 1, styler)
826 				&& IsCommentLine(lineCurrent + 1, styler))
827 				levelCurrent++;
828 			else if (IsCommentLine(lineCurrent - 1, styler)
829 					 && !IsCommentLine(lineCurrent + 1, styler))
830 				levelCurrent--;
831 		}
832 		if (style == SCE_SH_OPERATOR) {
833 			if (ch == '{') {
834 				levelCurrent++;
835 			} else if (ch == '}') {
836 				levelCurrent--;
837 			}
838 		}
839 		// Here Document folding
840 		if (style == SCE_SH_HERE_DELIM) {
841 			if (ch == '<' && chNext == '<') {
842 				if (styler.SafeGetCharAt(i + 2) == '<') {
843 					skipHereCh = 1;
844 				} else {
845 					if (skipHereCh == 0) {
846 						levelCurrent++;
847 					} else {
848 						skipHereCh = 0;
849 					}
850 				}
851 			}
852 		} else if (style == SCE_SH_HERE_Q && styler.StyleAt(i+1) == SCE_SH_DEFAULT) {
853 			levelCurrent--;
854 		}
855 		if (atEOL) {
856 			int lev = levelPrev;
857 			if (visibleChars == 0 && foldCompact)
858 				lev |= SC_FOLDLEVELWHITEFLAG;
859 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
860 				lev |= SC_FOLDLEVELHEADERFLAG;
861 			if (lev != styler.LevelAt(lineCurrent)) {
862 				styler.SetLevel(lineCurrent, lev);
863 			}
864 			lineCurrent++;
865 			levelPrev = levelCurrent;
866 			visibleChars = 0;
867 		}
868 		if (!isspacechar(ch))
869 			visibleChars++;
870 	}
871 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
872 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
873 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
874 }
875 
876 static const char * const bashWordListDesc[] = {
877 	"Keywords",
878 	0
879 };
880 
881 LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);
882