1 // Scintilla\ source code edit control
2 /** @file LexTCMD.cxx
3  ** Lexer for Take Command / TCC batch scripts (.bat, .btm, .cmd).
4  **/
5 // Written by Rex Conn (rconn [at] jpsoft [dot] com)
6 // based on the CMD lexer
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 #include <ctype.h>
15 
16 #include "ILexer.h"
17 #include "Scintilla.h"
18 #include "SciLexer.h"
19 
20 #include "WordList.h"
21 #include "LexAccessor.h"
22 #include "Accessor.h"
23 #include "StyleContext.h"
24 #include "CharacterSet.h"
25 #include "LexerModule.h"
26 
27 using namespace Scintilla;
28 
29 
IsAlphabetic(int ch)30 static bool IsAlphabetic(int ch) {
31 	return IsASCII(ch) && isalpha(ch);
32 }
33 
AtEOL(Accessor & styler,Sci_PositionU i)34 static inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
35 	return (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
36 }
37 
38 // Tests for BATCH Operators
IsBOperator(char ch)39 static bool IsBOperator(char ch) {
40 	return (ch == '=') || (ch == '+') || (ch == '>') || (ch == '<') || (ch == '|') || (ch == '&') || (ch == '!') || (ch == '?') || (ch == '*') || (ch == '(') || (ch == ')');
41 }
42 
43 // Tests for BATCH Separators
IsBSeparator(char ch)44 static bool IsBSeparator(char ch) {
45 	return (ch == '\\') || (ch == '.') || (ch == ';') || (ch == ' ') || (ch == '\t') || (ch == '[') || (ch == ']') || (ch == '\"') || (ch == '\'') || (ch == '/');
46 }
47 
48 // Find length of CMD FOR variable with modifier (%~...) or return 0
GetBatchVarLen(char * wordBuffer)49 static unsigned int GetBatchVarLen( char *wordBuffer )
50 {
51 	int nLength = 0;
52 	if ( wordBuffer[0] == '%' ) {
53 
54 		if ( wordBuffer[1] == '~' )
55 			nLength = 2;
56 		else if (( wordBuffer[1] == '%' ) && ( wordBuffer[2] == '~' ))
57 			nLength++;
58 		else
59 			return 0;
60 
61 		for ( ; ( wordBuffer[nLength] ); nLength++ ) {
62 
63 			switch ( toupper(wordBuffer[nLength]) ) {
64 			case 'A':
65 				// file attributes
66 			case 'D':
67 				// drive letter only
68 			case 'F':
69 				// fully qualified path name
70 			case 'N':
71 				// filename only
72 			case 'P':
73 				// path only
74 			case 'S':
75 				// short name
76 			case 'T':
77 				// date / time of file
78 			case 'X':
79 				// file extension only
80 			case 'Z':
81 				// file size
82 				break;
83 			default:
84 				return nLength;
85 			}
86 		}
87 	}
88 
89 	return nLength;
90 }
91 
92 
ColouriseTCMDLine(char * lineBuffer,Sci_PositionU lengthLine,Sci_PositionU startLine,Sci_PositionU endPos,WordList * keywordlists[],Accessor & styler)93 static void ColouriseTCMDLine( char *lineBuffer, Sci_PositionU lengthLine, Sci_PositionU startLine, Sci_PositionU endPos, WordList *keywordlists[], Accessor &styler)
94 {
95 	Sci_PositionU offset = 0;	// Line Buffer Offset
96 	char wordBuffer[260];		// Word Buffer - large to catch long paths
97 	Sci_PositionU wbl;			// Word Buffer Length
98 	Sci_PositionU wbo;			// Word Buffer Offset - also Special Keyword Buffer Length
99 	WordList &keywords = *keywordlists[0];      // Internal Commands
100 //	WordList &keywords2 = *keywordlists[1];     // Aliases (optional)
101 	bool isDelayedExpansion = 1;				// !var!
102 
103 	bool continueProcessing = true;	// Used to toggle Regular Keyword Checking
104 	// Special Keywords are those that allow certain characters without whitespace after the command
105 	// Examples are: cd. cd\ echo: echo. path=
106 	bool inString = false; // Used for processing while ""
107 	// Special Keyword Buffer used to determine if the first n characters is a Keyword
108 	char sKeywordBuffer[260] = "";	// Special Keyword Buffer
109 	bool sKeywordFound;		// Exit Special Keyword for-loop if found
110 
111 	// Skip leading whitespace
112 	while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
113 		offset++;
114 	}
115 	// Colorize Default Text
116 	styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT);
117 
118 	if ( offset >= lengthLine )
119 		return;
120 
121 	// Check for Fake Label (Comment) or Real Label - return if found
122 	if (lineBuffer[offset] == ':') {
123 		if (lineBuffer[offset + 1] == ':') {
124 			// Colorize Fake Label (Comment) - :: is the same as REM
125 			styler.ColourTo(endPos, SCE_TCMD_COMMENT);
126 		} else {
127 			// Colorize Real Label
128 			styler.ColourTo(endPos, SCE_TCMD_LABEL);
129 		}
130 		return;
131 
132 	// Check for Comment - return if found
133 	} else if (( CompareNCaseInsensitive(lineBuffer+offset, "rem", 3) == 0 ) && (( lineBuffer[offset+3] == 0 ) || ( isspace(lineBuffer[offset+3] )))) {
134 			styler.ColourTo(endPos, SCE_TCMD_COMMENT);
135 			return;
136 
137 	// Check for Drive Change (Drive Change is internal command) - return if found
138 	} else if ((IsAlphabetic(lineBuffer[offset])) &&
139 		(lineBuffer[offset + 1] == ':') &&
140 		((isspacechar(lineBuffer[offset + 2])) ||
141 		(((lineBuffer[offset + 2] == '\\')) &&
142 		(isspacechar(lineBuffer[offset + 3]))))) {
143 		// Colorize Regular Keyword
144 		styler.ColourTo(endPos, SCE_TCMD_WORD);
145 		return;
146 	}
147 
148 	// Check for Hide Command (@ECHO OFF/ON)
149 	if (lineBuffer[offset] == '@') {
150 		styler.ColourTo(startLine + offset, SCE_TCMD_HIDE);
151 		offset++;
152 	}
153 	// Skip whitespace
154 	while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
155 		offset++;
156 	}
157 
158 	// Read remainder of line word-at-a-time or remainder-of-word-at-a-time
159 	while (offset < lengthLine) {
160 		if (offset > startLine) {
161 			// Colorize Default Text
162 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT);
163 		}
164 		// Copy word from Line Buffer into Word Buffer
165 		wbl = 0;
166 		for (; offset < lengthLine && ( wbl < 260 ) && !isspacechar(lineBuffer[offset]); wbl++, offset++) {
167 			wordBuffer[wbl] = static_cast<char>(tolower(lineBuffer[offset]));
168 		}
169 		wordBuffer[wbl] = '\0';
170 		wbo = 0;
171 
172 		// Check for Separator
173 		if (IsBSeparator(wordBuffer[0])) {
174 
175 			// Reset Offset to re-process remainder of word
176 			offset -= (wbl - 1);
177 			// Colorize Default Text
178 			styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT);
179 
180 			if (wordBuffer[0] == '"')
181 				inString = !inString;
182 
183 		// Check for Regular expression
184 		} else if (( wordBuffer[0] == ':' ) && ( wordBuffer[1] == ':' ) && (continueProcessing)) {
185 
186 			// Colorize Regular exoressuin
187 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT);
188 			// No need to Reset Offset
189 
190 		// Check for Labels in text (... :label)
191 		} else if (wordBuffer[0] == ':' && isspacechar(lineBuffer[offset - wbl - 1])) {
192 			// Colorize Default Text
193 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
194 			// Colorize Label
195 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_CLABEL);
196 			// No need to Reset Offset
197 		// Check for delayed expansion Variable (!x...!)
198 		} else if (isDelayedExpansion && wordBuffer[0] == '!') {
199 			// Colorize Default Text
200 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
201 			wbo++;
202 			// Search to end of word for second !
203 			while ((wbo < wbl) && (wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) {
204 				wbo++;
205 			}
206 			if (wordBuffer[wbo] == '!') {
207 				wbo++;
208 				// Colorize Environment Variable
209 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_EXPANSION);
210 			} else {
211 				wbo = 1;
212 				// Colorize Symbol
213 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_DEFAULT);
214 			}
215 
216 			// Reset Offset to re-process remainder of word
217 			offset -= (wbl - wbo);
218 
219 		// Check for Regular Keyword in list
220 		} else if ((keywords.InList(wordBuffer)) &&	(!inString) && (continueProcessing)) {
221 
222 			// ECHO, PATH, and PROMPT require no further Regular Keyword Checking
223 			if ((CompareCaseInsensitive(wordBuffer, "echo") == 0) ||
224 			  (CompareCaseInsensitive(sKeywordBuffer, "echos") == 0) ||
225 			  (CompareCaseInsensitive(sKeywordBuffer, "echoerr") == 0) ||
226 			  (CompareCaseInsensitive(sKeywordBuffer, "echoserr") == 0) ||
227 			  (CompareCaseInsensitive(wordBuffer, "path") == 0) ||
228 			  (CompareCaseInsensitive(wordBuffer, "prompt") == 0)) {
229 				continueProcessing = false;
230 			}
231 
232 			// Colorize Regular keyword
233 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_WORD);
234 			// No need to Reset Offset
235 
236 		} else if ((wordBuffer[0] != '%') && (wordBuffer[0] != '!') && (!IsBOperator(wordBuffer[0])) &&	(!inString) && (continueProcessing)) {
237 
238 			// a few commands accept "illegal" syntax -- cd\, echo., etc.
239 			sscanf( wordBuffer, "%[^.<>|&=\\/]", sKeywordBuffer );
240 			sKeywordFound = false;
241 
242 			if ((CompareCaseInsensitive(sKeywordBuffer, "echo") == 0) ||
243 			  (CompareCaseInsensitive(sKeywordBuffer, "echos") == 0) ||
244 			  (CompareCaseInsensitive(sKeywordBuffer, "echoerr") == 0) ||
245 			  (CompareCaseInsensitive(sKeywordBuffer, "echoserr") == 0) ||
246 			  (CompareCaseInsensitive(sKeywordBuffer, "cd") == 0) ||
247 			  (CompareCaseInsensitive(sKeywordBuffer, "path") == 0) ||
248 			  (CompareCaseInsensitive(sKeywordBuffer, "prompt") == 0)) {
249 
250 				// no further Regular Keyword Checking
251 				continueProcessing = false;
252 				sKeywordFound = true;
253 				wbo = (Sci_PositionU)strlen( sKeywordBuffer );
254 
255 				// Colorize Special Keyword as Regular Keyword
256 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_WORD);
257 				// Reset Offset to re-process remainder of word
258 				offset -= (wbl - wbo);
259 			}
260 
261 			// Check for Default Text
262 			if (!sKeywordFound) {
263 				wbo = 0;
264 				// Read up to %, Operator or Separator
265 				while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!isDelayedExpansion || wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) &&	(!IsBSeparator(wordBuffer[wbo]))) {
266 					wbo++;
267 				}
268 				// Colorize Default Text
269 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_DEFAULT);
270 				// Reset Offset to re-process remainder of word
271 				offset -= (wbl - wbo);
272 			}
273 
274 		// Check for Argument  (%n), Environment Variable (%x...%) or Local Variable (%%a)
275 		} else if (wordBuffer[0] == '%') {
276 			unsigned int varlen;
277 			unsigned int n = 1;
278 			// Colorize Default Text
279 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
280 			wbo++;
281 
282 			// check for %[nn] syntax
283 			if ( wordBuffer[1] == '[' ) {
284 				n++;
285 				while ((n < wbl) && (wordBuffer[n] != ']')) {
286 					n++;
287 				}
288 				if ( wordBuffer[n] == ']' )
289 					n++;
290 				goto ColorizeArg;
291 			}
292 
293 			// Search to end of word for second % or to the first terminator (can be a long path)
294 			while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) {
295 				wbo++;
296 			}
297 
298 			// Check for Argument (%n) or (%*)
299 			if (((isdigit(wordBuffer[1])) || (wordBuffer[1] == '*')) && (wordBuffer[wbo] != '%')) {
300 				while (( wordBuffer[n] ) && ( strchr( "%0123456789*#$", wordBuffer[n] ) != NULL ))
301 					n++;
302 ColorizeArg:
303 				// Colorize Argument
304 				styler.ColourTo(startLine + offset - 1 - (wbl - n), SCE_TCMD_IDENTIFIER);
305 				// Reset Offset to re-process remainder of word
306 				offset -= (wbl - n);
307 
308 			// Check for Variable with modifiers (%~...)
309 			} else if ((varlen = GetBatchVarLen(wordBuffer)) != 0) {
310 
311 				// Colorize Variable
312 				styler.ColourTo(startLine + offset - 1 - (wbl - varlen), SCE_TCMD_IDENTIFIER);
313 				// Reset Offset to re-process remainder of word
314 				offset -= (wbl - varlen);
315 
316 			// Check for Environment Variable (%x...%)
317 			} else if (( wordBuffer[1] ) && ( wordBuffer[1] != '%')) {
318 				if ( wordBuffer[wbo] == '%' )
319 					wbo++;
320 
321 				// Colorize Environment Variable
322 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_ENVIRONMENT);
323 				// Reset Offset to re-process remainder of word
324 				offset -= (wbl - wbo);
325 
326 			// Check for Local Variable (%%a)
327 			} else if (	(wbl > 2) && (wordBuffer[1] == '%') && (wordBuffer[2] != '%') && (!IsBOperator(wordBuffer[2])) && (!IsBSeparator(wordBuffer[2]))) {
328 
329 				n = 2;
330 				while (( wordBuffer[n] ) && (!IsBOperator(wordBuffer[n])) && (!IsBSeparator(wordBuffer[n])))
331 					n++;
332 
333 				// Colorize Local Variable
334 				styler.ColourTo(startLine + offset - 1 - (wbl - n), SCE_TCMD_IDENTIFIER);
335 				// Reset Offset to re-process remainder of word
336 				offset -= (wbl - n);
337 
338 			// Check for %%
339 			} else if ((wbl > 1) && (wordBuffer[1] == '%')) {
340 
341 				// Colorize Symbols
342 				styler.ColourTo(startLine + offset - 1 - (wbl - 2), SCE_TCMD_DEFAULT);
343 				// Reset Offset to re-process remainder of word
344 				offset -= (wbl - 2);
345 			} else {
346 
347 				// Colorize Symbol
348 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_DEFAULT);
349 				// Reset Offset to re-process remainder of word
350 				offset -= (wbl - 1);
351 			}
352 
353 		// Check for Operator
354 		} else if (IsBOperator(wordBuffer[0])) {
355 			// Colorize Default Text
356 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
357 
358 			// Check for Pipe, compound, or conditional Operator
359 			if ((wordBuffer[0] == '|') || (wordBuffer[0] == '&')) {
360 
361 				// Colorize Pipe Operator
362 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_OPERATOR);
363 				// Reset Offset to re-process remainder of word
364 				offset -= (wbl - 1);
365 				continueProcessing = true;
366 
367 			// Check for Other Operator
368 			} else {
369 				// Check for > Operator
370 				if ((wordBuffer[0] == '>') || (wordBuffer[0] == '<')) {
371 					// Turn Keyword and External Command / Program checking back on
372 					continueProcessing = true;
373 				}
374 				// Colorize Other Operator
375 				if (!inString || !(wordBuffer[0] == '(' || wordBuffer[0] == ')'))
376 					styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_OPERATOR);
377 				// Reset Offset to re-process remainder of word
378 				offset -= (wbl - 1);
379 			}
380 
381 		// Check for Default Text
382 		} else {
383 			// Read up to %, Operator or Separator
384 			while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!isDelayedExpansion || wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) &&	(!IsBSeparator(wordBuffer[wbo]))) {
385 				wbo++;
386 			}
387 			// Colorize Default Text
388 			styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_DEFAULT);
389 			// Reset Offset to re-process remainder of word
390 			offset -= (wbl - wbo);
391 		}
392 
393 		// Skip whitespace - nothing happens if Offset was Reset
394 		while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
395 			offset++;
396 		}
397 	}
398 	// Colorize Default Text for remainder of line - currently not lexed
399 	styler.ColourTo(endPos, SCE_TCMD_DEFAULT);
400 }
401 
ColouriseTCMDDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * keywordlists[],Accessor & styler)402 static void ColouriseTCMDDoc( Sci_PositionU startPos, Sci_Position length, int /*initStyle*/, WordList *keywordlists[], Accessor &styler )
403 {
404 	char lineBuffer[16384];
405 
406 	styler.StartAt(startPos);
407 	styler.StartSegment(startPos);
408 	Sci_PositionU linePos = 0;
409 	Sci_PositionU startLine = startPos;
410 	for (Sci_PositionU i = startPos; i < startPos + length; i++) {
411 		lineBuffer[linePos++] = styler[i];
412 		if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
413 			// End of line (or of line buffer) met, colourise it
414 			lineBuffer[linePos] = '\0';
415 			ColouriseTCMDLine(lineBuffer, linePos, startLine, i, keywordlists, styler);
416 			linePos = 0;
417 			startLine = i + 1;
418 		}
419 	}
420 	if (linePos > 0) {	// Last line does not have ending characters
421 		lineBuffer[linePos] = '\0';
422 		ColouriseTCMDLine(lineBuffer, linePos, startLine, startPos + length - 1, keywordlists, styler);
423 	}
424 }
425 
426 // Convert string to upper case
StrUpr(char * s)427 static void StrUpr(char *s) {
428 	while (*s) {
429 		*s = MakeUpperCase(*s);
430 		s++;
431 	}
432 }
433 
434 // Folding support (for DO, IFF, SWITCH, TEXT, and command groups)
FoldTCMDDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)435 static void FoldTCMDDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler)
436 {
437 	Sci_Position line = styler.GetLine(startPos);
438 	int level = styler.LevelAt(line);
439 	int levelIndent = 0;
440 	Sci_PositionU endPos = startPos + length;
441 	char s[16] = "";
442 
443     char chPrev = styler.SafeGetCharAt(startPos - 1);
444 
445 	// Scan for ( and )
446 	for (Sci_PositionU i = startPos; i < endPos; i++) {
447 
448 		int c = styler.SafeGetCharAt(i, '\n');
449 		int style = styler.StyleAt(i);
450         bool bLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
451 
452 		if (style == SCE_TCMD_OPERATOR) {
453 			// CheckFoldPoint
454 			if (c == '(') {
455 				levelIndent += 1;
456 			} else if (c == ')') {
457 				levelIndent -= 1;
458 			}
459 		}
460 
461         if (( bLineStart ) && ( style == SCE_TCMD_WORD )) {
462             for (Sci_PositionU j = 0; j < 10; j++) {
463                 if (!iswordchar(styler[i + j])) {
464                     break;
465                 }
466                 s[j] = styler[i + j];
467                 s[j + 1] = '\0';
468             }
469 
470 			StrUpr( s );
471             if ((strcmp(s, "DO") == 0) || (strcmp(s, "IFF") == 0) || (strcmp(s, "SWITCH") == 0) || (strcmp(s, "TEXT") == 0)) {
472                 levelIndent++;
473             } else if ((strcmp(s, "ENDDO") == 0) || (strcmp(s, "ENDIFF") == 0) || (strcmp(s, "ENDSWITCH") == 0) || (strcmp(s, "ENDTEXT") == 0)) {
474                 levelIndent--;
475             }
476         }
477 
478 		if (c == '\n') { // line end
479 				if (levelIndent > 0) {
480 						level |= SC_FOLDLEVELHEADERFLAG;
481 				}
482 				if (level != styler.LevelAt(line))
483 						styler.SetLevel(line, level);
484 				level += levelIndent;
485 				if ((level & SC_FOLDLEVELNUMBERMASK) < SC_FOLDLEVELBASE)
486 						level = SC_FOLDLEVELBASE;
487 				line++;
488 				// reset state
489 				levelIndent = 0;
490 				level &= ~SC_FOLDLEVELHEADERFLAG;
491 				level &= ~SC_FOLDLEVELWHITEFLAG;
492 		}
493 
494 		chPrev = c;
495 	}
496 }
497 
498 static const char *const tcmdWordListDesc[] = {
499 	"Internal Commands",
500 	"Aliases",
501 	0
502 };
503 
504 LexerModule lmTCMD(SCLEX_TCMD, ColouriseTCMDDoc, "tcmd", FoldTCMDDoc, tcmdWordListDesc);
505