1 // Scintilla\ source code edit control
2 /** @file LexTCMD.cxx
3  ** Lexer for Take Command / TCC batch scripts (.bat, .btm, .cmd).
4  **/
5 // Written by Rex Conn (rconn [at] jpsoft [dot] com)
6 // based on the CMD lexer
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 #include <ctype.h>
15 
16 #include "ILexer.h"
17 #include "Scintilla.h"
18 #include "SciLexer.h"
19 
20 #include "WordList.h"
21 #include "LexAccessor.h"
22 #include "Accessor.h"
23 #include "StyleContext.h"
24 #include "CharacterSet.h"
25 #include "LexerModule.h"
26 
27 #ifdef SCI_NAMESPACE
28 using namespace Scintilla;
29 #endif
30 
31 
IsAlphabetic(int ch)32 static bool IsAlphabetic(int ch) {
33 	return IsASCII(ch) && isalpha(ch);
34 }
35 
AtEOL(Accessor & styler,Sci_PositionU i)36 static inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
37 	return (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
38 }
39 
40 // Tests for BATCH Operators
IsBOperator(char ch)41 static bool IsBOperator(char ch) {
42 	return (ch == '=') || (ch == '+') || (ch == '>') || (ch == '<') || (ch == '|') || (ch == '&') || (ch == '!') || (ch == '?') || (ch == '*') || (ch == '(') || (ch == ')');
43 }
44 
45 // Tests for BATCH Separators
IsBSeparator(char ch)46 static bool IsBSeparator(char ch) {
47 	return (ch == '\\') || (ch == '.') || (ch == ';') || (ch == ' ') || (ch == '\t') || (ch == '[') || (ch == ']') || (ch == '\"') || (ch == '\'') || (ch == '/');
48 }
49 
50 // Find length of CMD FOR variable with modifier (%~...) or return 0
GetBatchVarLen(char * wordBuffer)51 static unsigned int GetBatchVarLen( char *wordBuffer )
52 {
53 	int nLength = 0;
54 	if ( wordBuffer[0] == '%' ) {
55 
56 		if ( wordBuffer[1] == '~' )
57 			nLength = 2;
58 		else if (( wordBuffer[1] == '%' ) && ( wordBuffer[2] == '~' ))
59 			nLength++;
60 		else
61 			return 0;
62 
63 		for ( ; ( wordBuffer[nLength] ); nLength++ ) {
64 
65 			switch ( toupper(wordBuffer[nLength]) ) {
66 			case 'A':
67 				// file attributes
68 			case 'D':
69 				// drive letter only
70 			case 'F':
71 				// fully qualified path name
72 			case 'N':
73 				// filename only
74 			case 'P':
75 				// path only
76 			case 'S':
77 				// short name
78 			case 'T':
79 				// date / time of file
80 			case 'X':
81 				// file extension only
82 			case 'Z':
83 				// file size
84 				break;
85 			default:
86 				return nLength;
87 			}
88 		}
89 	}
90 
91 	return nLength;
92 }
93 
94 
ColouriseTCMDLine(char * lineBuffer,Sci_PositionU lengthLine,Sci_PositionU startLine,Sci_PositionU endPos,WordList * keywordlists[],Accessor & styler)95 static void ColouriseTCMDLine( char *lineBuffer, Sci_PositionU lengthLine, Sci_PositionU startLine, Sci_PositionU endPos, WordList *keywordlists[], Accessor &styler)
96 {
97 	Sci_PositionU offset = 0;	// Line Buffer Offset
98 	char wordBuffer[260];		// Word Buffer - large to catch long paths
99 	Sci_PositionU wbl;			// Word Buffer Length
100 	Sci_PositionU wbo;			// Word Buffer Offset - also Special Keyword Buffer Length
101 	WordList &keywords = *keywordlists[0];      // Internal Commands
102 //	WordList &keywords2 = *keywordlists[1];     // Aliases (optional)
103 	bool isDelayedExpansion = 1;				// !var!
104 
105 	bool continueProcessing = true;	// Used to toggle Regular Keyword Checking
106 	// Special Keywords are those that allow certain characters without whitespace after the command
107 	// Examples are: cd. cd\ echo: echo. path=
108 	bool inString = false; // Used for processing while ""
109 	// Special Keyword Buffer used to determine if the first n characters is a Keyword
110 	char sKeywordBuffer[260] = "";	// Special Keyword Buffer
111 	bool sKeywordFound;		// Exit Special Keyword for-loop if found
112 
113 	// Skip leading whitespace
114 	while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
115 		offset++;
116 	}
117 	// Colorize Default Text
118 	styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT);
119 
120 	if ( offset >= lengthLine )
121 		return;
122 
123 	// Check for Fake Label (Comment) or Real Label - return if found
124 	if (lineBuffer[offset] == ':') {
125 		if (lineBuffer[offset + 1] == ':') {
126 			// Colorize Fake Label (Comment) - :: is the same as REM
127 			styler.ColourTo(endPos, SCE_TCMD_COMMENT);
128 		} else {
129 			// Colorize Real Label
130 			styler.ColourTo(endPos, SCE_TCMD_LABEL);
131 		}
132 		return;
133 
134 	// Check for Comment - return if found
135 	} else if (( CompareNCaseInsensitive(lineBuffer+offset, "rem", 3) == 0 ) && (( lineBuffer[offset+3] == 0 ) || ( isspace(lineBuffer[offset+3] )))) {
136 			styler.ColourTo(endPos, SCE_TCMD_COMMENT);
137 			return;
138 
139 	// Check for Drive Change (Drive Change is internal command) - return if found
140 	} else if ((IsAlphabetic(lineBuffer[offset])) &&
141 		(lineBuffer[offset + 1] == ':') &&
142 		((isspacechar(lineBuffer[offset + 2])) ||
143 		(((lineBuffer[offset + 2] == '\\')) &&
144 		(isspacechar(lineBuffer[offset + 3]))))) {
145 		// Colorize Regular Keyword
146 		styler.ColourTo(endPos, SCE_TCMD_WORD);
147 		return;
148 	}
149 
150 	// Check for Hide Command (@ECHO OFF/ON)
151 	if (lineBuffer[offset] == '@') {
152 		styler.ColourTo(startLine + offset, SCE_TCMD_HIDE);
153 		offset++;
154 	}
155 	// Skip whitespace
156 	while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
157 		offset++;
158 	}
159 
160 	// Read remainder of line word-at-a-time or remainder-of-word-at-a-time
161 	while (offset < lengthLine) {
162 		if (offset > startLine) {
163 			// Colorize Default Text
164 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT);
165 		}
166 		// Copy word from Line Buffer into Word Buffer
167 		wbl = 0;
168 		for (; offset < lengthLine && ( wbl < 260 ) && !isspacechar(lineBuffer[offset]); wbl++, offset++) {
169 			wordBuffer[wbl] = static_cast<char>(tolower(lineBuffer[offset]));
170 		}
171 		wordBuffer[wbl] = '\0';
172 		wbo = 0;
173 
174 		// Check for Separator
175 		if (IsBSeparator(wordBuffer[0])) {
176 
177 			// Reset Offset to re-process remainder of word
178 			offset -= (wbl - 1);
179 			// Colorize Default Text
180 			styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT);
181 
182 			if (wordBuffer[0] == '"')
183 				inString = !inString;
184 
185 		// Check for Regular expression
186 		} else if (( wordBuffer[0] == ':' ) && ( wordBuffer[1] == ':' ) && (continueProcessing)) {
187 
188 			// Colorize Regular exoressuin
189 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_DEFAULT);
190 			// No need to Reset Offset
191 
192 		// Check for Labels in text (... :label)
193 		} else if (wordBuffer[0] == ':' && isspacechar(lineBuffer[offset - wbl - 1])) {
194 			// Colorize Default Text
195 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
196 			// Colorize Label
197 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_CLABEL);
198 			// No need to Reset Offset
199 		// Check for delayed expansion Variable (!x...!)
200 		} else if (isDelayedExpansion && wordBuffer[0] == '!') {
201 			// Colorize Default Text
202 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
203 			wbo++;
204 			// Search to end of word for second !
205 			while ((wbo < wbl) && (wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) {
206 				wbo++;
207 			}
208 			if (wordBuffer[wbo] == '!') {
209 				wbo++;
210 				// Colorize Environment Variable
211 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_EXPANSION);
212 			} else {
213 				wbo = 1;
214 				// Colorize Symbol
215 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_DEFAULT);
216 			}
217 
218 			// Reset Offset to re-process remainder of word
219 			offset -= (wbl - wbo);
220 
221 		// Check for Regular Keyword in list
222 		} else if ((keywords.InList(wordBuffer)) &&	(!inString) && (continueProcessing)) {
223 
224 			// ECHO, PATH, and PROMPT require no further Regular Keyword Checking
225 			if ((CompareCaseInsensitive(wordBuffer, "echo") == 0) ||
226 			  (CompareCaseInsensitive(sKeywordBuffer, "echos") == 0) ||
227 			  (CompareCaseInsensitive(sKeywordBuffer, "echoerr") == 0) ||
228 			  (CompareCaseInsensitive(sKeywordBuffer, "echoserr") == 0) ||
229 			  (CompareCaseInsensitive(wordBuffer, "path") == 0) ||
230 			  (CompareCaseInsensitive(wordBuffer, "prompt") == 0)) {
231 				continueProcessing = false;
232 			}
233 
234 			// Colorize Regular keyword
235 			styler.ColourTo(startLine + offset - 1, SCE_TCMD_WORD);
236 			// No need to Reset Offset
237 
238 		} else if ((wordBuffer[0] != '%') && (wordBuffer[0] != '!') && (!IsBOperator(wordBuffer[0])) &&	(!inString) && (continueProcessing)) {
239 
240 			// a few commands accept "illegal" syntax -- cd\, echo., etc.
241 			sscanf( wordBuffer, "%[^.<>|&=\\/]", sKeywordBuffer );
242 			sKeywordFound = false;
243 
244 			if ((CompareCaseInsensitive(sKeywordBuffer, "echo") == 0) ||
245 			  (CompareCaseInsensitive(sKeywordBuffer, "echos") == 0) ||
246 			  (CompareCaseInsensitive(sKeywordBuffer, "echoerr") == 0) ||
247 			  (CompareCaseInsensitive(sKeywordBuffer, "echoserr") == 0) ||
248 			  (CompareCaseInsensitive(sKeywordBuffer, "cd") == 0) ||
249 			  (CompareCaseInsensitive(sKeywordBuffer, "path") == 0) ||
250 			  (CompareCaseInsensitive(sKeywordBuffer, "prompt") == 0)) {
251 
252 				// no further Regular Keyword Checking
253 				continueProcessing = false;
254 				sKeywordFound = true;
255 				wbo = (Sci_PositionU)strlen( sKeywordBuffer );
256 
257 				// Colorize Special Keyword as Regular Keyword
258 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_WORD);
259 				// Reset Offset to re-process remainder of word
260 				offset -= (wbl - wbo);
261 			}
262 
263 			// Check for Default Text
264 			if (!sKeywordFound) {
265 				wbo = 0;
266 				// Read up to %, Operator or Separator
267 				while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!isDelayedExpansion || wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) &&	(!IsBSeparator(wordBuffer[wbo]))) {
268 					wbo++;
269 				}
270 				// Colorize Default Text
271 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_DEFAULT);
272 				// Reset Offset to re-process remainder of word
273 				offset -= (wbl - wbo);
274 			}
275 
276 		// Check for Argument  (%n), Environment Variable (%x...%) or Local Variable (%%a)
277 		} else if (wordBuffer[0] == '%') {
278 			unsigned int varlen;
279 			unsigned int n = 1;
280 			// Colorize Default Text
281 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
282 			wbo++;
283 
284 			// check for %[nn] syntax
285 			if ( wordBuffer[1] == '[' ) {
286 				n++;
287 				while ((n < wbl) && (wordBuffer[n] != ']')) {
288 					n++;
289 				}
290 				if ( wordBuffer[n] == ']' )
291 					n++;
292 				goto ColorizeArg;
293 			}
294 
295 			// Search to end of word for second % or to the first terminator (can be a long path)
296 			while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!IsBOperator(wordBuffer[wbo])) && (!IsBSeparator(wordBuffer[wbo]))) {
297 				wbo++;
298 			}
299 
300 			// Check for Argument (%n) or (%*)
301 			if (((isdigit(wordBuffer[1])) || (wordBuffer[1] == '*')) && (wordBuffer[wbo] != '%')) {
302 				while (( wordBuffer[n] ) && ( strchr( "%0123456789*#$", wordBuffer[n] ) != NULL ))
303 					n++;
304 ColorizeArg:
305 				// Colorize Argument
306 				styler.ColourTo(startLine + offset - 1 - (wbl - n), SCE_TCMD_IDENTIFIER);
307 				// Reset Offset to re-process remainder of word
308 				offset -= (wbl - n);
309 
310 			// Check for Variable with modifiers (%~...)
311 			} else if ((varlen = GetBatchVarLen(wordBuffer)) != 0) {
312 
313 				// Colorize Variable
314 				styler.ColourTo(startLine + offset - 1 - (wbl - varlen), SCE_TCMD_IDENTIFIER);
315 				// Reset Offset to re-process remainder of word
316 				offset -= (wbl - varlen);
317 
318 			// Check for Environment Variable (%x...%)
319 			} else if (( wordBuffer[1] ) && ( wordBuffer[1] != '%')) {
320 				if ( wordBuffer[wbo] == '%' )
321 					wbo++;
322 
323 				// Colorize Environment Variable
324 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_ENVIRONMENT);
325 				// Reset Offset to re-process remainder of word
326 				offset -= (wbl - wbo);
327 
328 			// Check for Local Variable (%%a)
329 			} else if (	(wbl > 2) && (wordBuffer[1] == '%') && (wordBuffer[2] != '%') && (!IsBOperator(wordBuffer[2])) && (!IsBSeparator(wordBuffer[2]))) {
330 
331 				n = 2;
332 				while (( wordBuffer[n] ) && (!IsBOperator(wordBuffer[n])) && (!IsBSeparator(wordBuffer[n])))
333 					n++;
334 
335 				// Colorize Local Variable
336 				styler.ColourTo(startLine + offset - 1 - (wbl - n), SCE_TCMD_IDENTIFIER);
337 				// Reset Offset to re-process remainder of word
338 				offset -= (wbl - n);
339 
340 			// Check for %%
341 			} else if ((wbl > 1) && (wordBuffer[1] == '%')) {
342 
343 				// Colorize Symbols
344 				styler.ColourTo(startLine + offset - 1 - (wbl - 2), SCE_TCMD_DEFAULT);
345 				// Reset Offset to re-process remainder of word
346 				offset -= (wbl - 2);
347 			} else {
348 
349 				// Colorize Symbol
350 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_DEFAULT);
351 				// Reset Offset to re-process remainder of word
352 				offset -= (wbl - 1);
353 			}
354 
355 		// Check for Operator
356 		} else if (IsBOperator(wordBuffer[0])) {
357 			// Colorize Default Text
358 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_TCMD_DEFAULT);
359 
360 			// Check for Pipe, compound, or conditional Operator
361 			if ((wordBuffer[0] == '|') || (wordBuffer[0] == '&')) {
362 
363 				// Colorize Pipe Operator
364 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_OPERATOR);
365 				// Reset Offset to re-process remainder of word
366 				offset -= (wbl - 1);
367 				continueProcessing = true;
368 
369 			// Check for Other Operator
370 			} else {
371 				// Check for > Operator
372 				if ((wordBuffer[0] == '>') || (wordBuffer[0] == '<')) {
373 					// Turn Keyword and External Command / Program checking back on
374 					continueProcessing = true;
375 				}
376 				// Colorize Other Operator
377 				if (!inString || !(wordBuffer[0] == '(' || wordBuffer[0] == ')'))
378 					styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_TCMD_OPERATOR);
379 				// Reset Offset to re-process remainder of word
380 				offset -= (wbl - 1);
381 			}
382 
383 		// Check for Default Text
384 		} else {
385 			// Read up to %, Operator or Separator
386 			while ((wbo < wbl) && (wordBuffer[wbo] != '%') && (!isDelayedExpansion || wordBuffer[wbo] != '!') && (!IsBOperator(wordBuffer[wbo])) &&	(!IsBSeparator(wordBuffer[wbo]))) {
387 				wbo++;
388 			}
389 			// Colorize Default Text
390 			styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_TCMD_DEFAULT);
391 			// Reset Offset to re-process remainder of word
392 			offset -= (wbl - wbo);
393 		}
394 
395 		// Skip whitespace - nothing happens if Offset was Reset
396 		while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
397 			offset++;
398 		}
399 	}
400 	// Colorize Default Text for remainder of line - currently not lexed
401 	styler.ColourTo(endPos, SCE_TCMD_DEFAULT);
402 }
403 
ColouriseTCMDDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * keywordlists[],Accessor & styler)404 static void ColouriseTCMDDoc( Sci_PositionU startPos, Sci_Position length, int /*initStyle*/, WordList *keywordlists[], Accessor &styler )
405 {
406 	char lineBuffer[16384];
407 
408 	styler.StartAt(startPos);
409 	styler.StartSegment(startPos);
410 	Sci_PositionU linePos = 0;
411 	Sci_PositionU startLine = startPos;
412 	for (Sci_PositionU i = startPos; i < startPos + length; i++) {
413 		lineBuffer[linePos++] = styler[i];
414 		if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
415 			// End of line (or of line buffer) met, colourise it
416 			lineBuffer[linePos] = '\0';
417 			ColouriseTCMDLine(lineBuffer, linePos, startLine, i, keywordlists, styler);
418 			linePos = 0;
419 			startLine = i + 1;
420 		}
421 	}
422 	if (linePos > 0) {	// Last line does not have ending characters
423 		lineBuffer[linePos] = '\0';
424 		ColouriseTCMDLine(lineBuffer, linePos, startLine, startPos + length - 1, keywordlists, styler);
425 	}
426 }
427 
428 // Convert string to upper case
StrUpr(char * s)429 static void StrUpr(char *s) {
430 	while (*s) {
431 		*s = MakeUpperCase(*s);
432 		s++;
433 	}
434 }
435 
436 // Folding support (for DO, IFF, SWITCH, TEXT, and command groups)
FoldTCMDDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)437 static void FoldTCMDDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler)
438 {
439 	Sci_Position line = styler.GetLine(startPos);
440 	int level = styler.LevelAt(line);
441 	int levelIndent = 0;
442 	Sci_PositionU endPos = startPos + length;
443 	char s[16] = "";
444 
445     char chPrev = styler.SafeGetCharAt(startPos - 1);
446 
447 	// Scan for ( and )
448 	for (Sci_PositionU i = startPos; i < endPos; i++) {
449 
450 		int c = styler.SafeGetCharAt(i, '\n');
451 		int style = styler.StyleAt(i);
452         bool bLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
453 
454 		if (style == SCE_TCMD_OPERATOR) {
455 			// CheckFoldPoint
456 			if (c == '(') {
457 				levelIndent += 1;
458 			} else if (c == ')') {
459 				levelIndent -= 1;
460 			}
461 		}
462 
463         if (( bLineStart ) && ( style == SCE_TCMD_WORD )) {
464             for (Sci_PositionU j = 0; j < 10; j++) {
465                 if (!iswordchar(styler[i + j])) {
466                     break;
467                 }
468                 s[j] = styler[i + j];
469                 s[j + 1] = '\0';
470             }
471 
472 			StrUpr( s );
473             if ((strcmp(s, "DO") == 0) || (strcmp(s, "IFF") == 0) || (strcmp(s, "SWITCH") == 0) || (strcmp(s, "TEXT") == 0)) {
474                 levelIndent++;
475             } else if ((strcmp(s, "ENDDO") == 0) || (strcmp(s, "ENDIFF") == 0) || (strcmp(s, "ENDSWITCH") == 0) || (strcmp(s, "ENDTEXT") == 0)) {
476                 levelIndent--;
477             }
478         }
479 
480 		if (c == '\n') { // line end
481 				if (levelIndent > 0) {
482 						level |= SC_FOLDLEVELHEADERFLAG;
483 				}
484 				if (level != styler.LevelAt(line))
485 						styler.SetLevel(line, level);
486 				level += levelIndent;
487 				if ((level & SC_FOLDLEVELNUMBERMASK) < SC_FOLDLEVELBASE)
488 						level = SC_FOLDLEVELBASE;
489 				line++;
490 				// reset state
491 				levelIndent = 0;
492 				level &= ~SC_FOLDLEVELHEADERFLAG;
493 				level &= ~SC_FOLDLEVELWHITEFLAG;
494 		}
495 
496 		chPrev = c;
497 	}
498 }
499 
500 static const char *const tcmdWordListDesc[] = {
501 	"Internal Commands",
502 	"Aliases",
503 	0
504 };
505 
506 LexerModule lmTCMD(SCLEX_TCMD, ColouriseTCMDDoc, "tcmd", FoldTCMDDoc, tcmdWordListDesc);
507