1 // Scintilla source code edit control
2 /** @file LexBatch.cxx
3  ** Lexer for batch files.
4  **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
14 
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
18 
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
25 
26 using namespace Scintilla;
27 
Is0To9(char ch)28 static bool Is0To9(char ch) {
29 	return (ch >= '0') && (ch <= '9');
30 }
31 
IsAlphabetic(int ch)32 static bool IsAlphabetic(int ch) {
33 	return IsASCII(ch) && isalpha(ch);
34 }
35 
AtEOL(Accessor & styler,Sci_PositionU i)36 static inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
37 	return (styler[i] == '\n') ||
38 	       ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
39 }
40 
41 // Tests for BATCH Operators
IsBOperator(char ch)42 static bool IsBOperator(char ch) {
43 	return (ch == '=') || (ch == '+') || (ch == '>') || (ch == '<') ||
44 		(ch == '|') || (ch == '?') || (ch == '*');
45 }
46 
47 // Tests for BATCH Separators
IsBSeparator(char ch)48 static bool IsBSeparator(char ch) {
49 	return (ch == '\\') || (ch == '.') || (ch == ';') ||
50 		(ch == '\"') || (ch == '\'') || (ch == '/');
51 }
52 
ColouriseBatchLine(char * lineBuffer,Sci_PositionU lengthLine,Sci_PositionU startLine,Sci_PositionU endPos,WordList * keywordlists[],Accessor & styler)53 static void ColouriseBatchLine(
54     char *lineBuffer,
55     Sci_PositionU lengthLine,
56     Sci_PositionU startLine,
57     Sci_PositionU endPos,
58     WordList *keywordlists[],
59     Accessor &styler) {
60 
61 	Sci_PositionU offset = 0;	// Line Buffer Offset
62 	Sci_PositionU cmdLoc;		// External Command / Program Location
63 	char wordBuffer[81];		// Word Buffer - large to catch long paths
64 	Sci_PositionU wbl;		// Word Buffer Length
65 	Sci_PositionU wbo;		// Word Buffer Offset - also Special Keyword Buffer Length
66 	WordList &keywords = *keywordlists[0];      // Internal Commands
67 	WordList &keywords2 = *keywordlists[1];     // External Commands (optional)
68 
69 	// CHOICE, ECHO, GOTO, PROMPT and SET have Default Text that may contain Regular Keywords
70 	//   Toggling Regular Keyword Checking off improves readability
71 	// Other Regular Keywords and External Commands / Programs might also benefit from toggling
72 	//   Need a more robust algorithm to properly toggle Regular Keyword Checking
73 	bool continueProcessing = true;	// Used to toggle Regular Keyword Checking
74 	// Special Keywords are those that allow certain characters without whitespace after the command
75 	// Examples are: cd. cd\ md. rd. dir| dir> echo: echo. path=
76 	// Special Keyword Buffer used to determine if the first n characters is a Keyword
77 	char sKeywordBuffer[10];	// Special Keyword Buffer
78 	bool sKeywordFound;		// Exit Special Keyword for-loop if found
79 
80 	// Skip initial spaces
81 	while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
82 		offset++;
83 	}
84 	// Colorize Default Text
85 	styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT);
86 	// Set External Command / Program Location
87 	cmdLoc = offset;
88 
89 	// Check for Fake Label (Comment) or Real Label - return if found
90 	if (lineBuffer[offset] == ':') {
91 		if (lineBuffer[offset + 1] == ':') {
92 			// Colorize Fake Label (Comment) - :: is similar to REM, see http://content.techweb.com/winmag/columns/explorer/2000/21.htm
93 			styler.ColourTo(endPos, SCE_BAT_COMMENT);
94 		} else {
95 			// Colorize Real Label
96 			styler.ColourTo(endPos, SCE_BAT_LABEL);
97 		}
98 		return;
99 	// Check for Drive Change (Drive Change is internal command) - return if found
100 	} else if ((IsAlphabetic(lineBuffer[offset])) &&
101 		(lineBuffer[offset + 1] == ':') &&
102 		((isspacechar(lineBuffer[offset + 2])) ||
103 		(((lineBuffer[offset + 2] == '\\')) &&
104 		(isspacechar(lineBuffer[offset + 3]))))) {
105 		// Colorize Regular Keyword
106 		styler.ColourTo(endPos, SCE_BAT_WORD);
107 		return;
108 	}
109 
110 	// Check for Hide Command (@ECHO OFF/ON)
111 	if (lineBuffer[offset] == '@') {
112 		styler.ColourTo(startLine + offset, SCE_BAT_HIDE);
113 		offset++;
114 	}
115 	// Skip next spaces
116 	while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
117 		offset++;
118 	}
119 
120 	// Read remainder of line word-at-a-time or remainder-of-word-at-a-time
121 	while (offset < lengthLine) {
122 		if (offset > startLine) {
123 			// Colorize Default Text
124 			styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT);
125 		}
126 		// Copy word from Line Buffer into Word Buffer
127 		wbl = 0;
128 		for (; offset < lengthLine && wbl < 80 &&
129 		        !isspacechar(lineBuffer[offset]); wbl++, offset++) {
130 			wordBuffer[wbl] = static_cast<char>(tolower(lineBuffer[offset]));
131 		}
132 		wordBuffer[wbl] = '\0';
133 		wbo = 0;
134 
135 		// Check for Comment - return if found
136 		if (CompareCaseInsensitive(wordBuffer, "rem") == 0) {
137 			styler.ColourTo(endPos, SCE_BAT_COMMENT);
138 			return;
139 		}
140 		// Check for Separator
141 		if (IsBSeparator(wordBuffer[0])) {
142 			// Check for External Command / Program
143 			if ((cmdLoc == offset - wbl) &&
144 				((wordBuffer[0] == ':') ||
145 				(wordBuffer[0] == '\\') ||
146 				(wordBuffer[0] == '.'))) {
147 				// Reset Offset to re-process remainder of word
148 				offset -= (wbl - 1);
149 				// Colorize External Command / Program
150 				if (!keywords2) {
151 					styler.ColourTo(startLine + offset - 1, SCE_BAT_COMMAND);
152 				} else if (keywords2.InList(wordBuffer)) {
153 					styler.ColourTo(startLine + offset - 1, SCE_BAT_COMMAND);
154 				} else {
155 					styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT);
156 				}
157 				// Reset External Command / Program Location
158 				cmdLoc = offset;
159 			} else {
160 				// Reset Offset to re-process remainder of word
161 				offset -= (wbl - 1);
162 				// Colorize Default Text
163 				styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT);
164 			}
165 		// Check for Regular Keyword in list
166 		} else if ((keywords.InList(wordBuffer)) &&
167 			(continueProcessing)) {
168 			// ECHO, GOTO, PROMPT and SET require no further Regular Keyword Checking
169 			if ((CompareCaseInsensitive(wordBuffer, "echo") == 0) ||
170 				(CompareCaseInsensitive(wordBuffer, "goto") == 0) ||
171 				(CompareCaseInsensitive(wordBuffer, "prompt") == 0) ||
172 				(CompareCaseInsensitive(wordBuffer, "set") == 0)) {
173 				continueProcessing = false;
174 			}
175 			// Identify External Command / Program Location for ERRORLEVEL, and EXIST
176 			if ((CompareCaseInsensitive(wordBuffer, "errorlevel") == 0) ||
177 				(CompareCaseInsensitive(wordBuffer, "exist") == 0)) {
178 				// Reset External Command / Program Location
179 				cmdLoc = offset;
180 				// Skip next spaces
181 				while ((cmdLoc < lengthLine) &&
182 					(isspacechar(lineBuffer[cmdLoc]))) {
183 					cmdLoc++;
184 				}
185 				// Skip comparison
186 				while ((cmdLoc < lengthLine) &&
187 					(!isspacechar(lineBuffer[cmdLoc]))) {
188 					cmdLoc++;
189 				}
190 				// Skip next spaces
191 				while ((cmdLoc < lengthLine) &&
192 					(isspacechar(lineBuffer[cmdLoc]))) {
193 					cmdLoc++;
194 				}
195 			// Identify External Command / Program Location for CALL, DO, LOADHIGH and LH
196 			} else if ((CompareCaseInsensitive(wordBuffer, "call") == 0) ||
197 				(CompareCaseInsensitive(wordBuffer, "do") == 0) ||
198 				(CompareCaseInsensitive(wordBuffer, "loadhigh") == 0) ||
199 				(CompareCaseInsensitive(wordBuffer, "lh") == 0)) {
200 				// Reset External Command / Program Location
201 				cmdLoc = offset;
202 				// Skip next spaces
203 				while ((cmdLoc < lengthLine) &&
204 					(isspacechar(lineBuffer[cmdLoc]))) {
205 					cmdLoc++;
206 				}
207 			}
208 			// Colorize Regular keyword
209 			styler.ColourTo(startLine + offset - 1, SCE_BAT_WORD);
210 			// No need to Reset Offset
211 		// Check for Special Keyword in list, External Command / Program, or Default Text
212 		} else if ((wordBuffer[0] != '%') &&
213 				   (wordBuffer[0] != '!') &&
214 			(!IsBOperator(wordBuffer[0])) &&
215 			(continueProcessing)) {
216 			// Check for Special Keyword
217 			//     Affected Commands are in Length range 2-6
218 			//     Good that ERRORLEVEL, EXIST, CALL, DO, LOADHIGH, and LH are unaffected
219 			sKeywordFound = false;
220 			for (Sci_PositionU keywordLength = 2; keywordLength < wbl && keywordLength < 7 && !sKeywordFound; keywordLength++) {
221 				wbo = 0;
222 				// Copy Keyword Length from Word Buffer into Special Keyword Buffer
223 				for (; wbo < keywordLength; wbo++) {
224 					sKeywordBuffer[wbo] = static_cast<char>(wordBuffer[wbo]);
225 				}
226 				sKeywordBuffer[wbo] = '\0';
227 				// Check for Special Keyword in list
228 				if ((keywords.InList(sKeywordBuffer)) &&
229 					((IsBOperator(wordBuffer[wbo])) ||
230 					(IsBSeparator(wordBuffer[wbo])))) {
231 					sKeywordFound = true;
232 					// ECHO requires no further Regular Keyword Checking
233 					if (CompareCaseInsensitive(sKeywordBuffer, "echo") == 0) {
234 						continueProcessing = false;
235 					}
236 					// Colorize Special Keyword as Regular Keyword
237 					styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_BAT_WORD);
238 					// Reset Offset to re-process remainder of word
239 					offset -= (wbl - wbo);
240 				}
241 			}
242 			// Check for External Command / Program or Default Text
243 			if (!sKeywordFound) {
244 				wbo = 0;
245 				// Check for External Command / Program
246 				if (cmdLoc == offset - wbl) {
247 					// Read up to %, Operator or Separator
248 					while ((wbo < wbl) &&
249 						(wordBuffer[wbo] != '%') &&
250 						(wordBuffer[wbo] != '!') &&
251 						(!IsBOperator(wordBuffer[wbo])) &&
252 						(!IsBSeparator(wordBuffer[wbo]))) {
253 						wbo++;
254 					}
255 					// Reset External Command / Program Location
256 					cmdLoc = offset - (wbl - wbo);
257 					// Reset Offset to re-process remainder of word
258 					offset -= (wbl - wbo);
259 					// CHOICE requires no further Regular Keyword Checking
260 					if (CompareCaseInsensitive(wordBuffer, "choice") == 0) {
261 						continueProcessing = false;
262 					}
263 					// Check for START (and its switches) - What follows is External Command \ Program
264 					if (CompareCaseInsensitive(wordBuffer, "start") == 0) {
265 						// Reset External Command / Program Location
266 						cmdLoc = offset;
267 						// Skip next spaces
268 						while ((cmdLoc < lengthLine) &&
269 							(isspacechar(lineBuffer[cmdLoc]))) {
270 							cmdLoc++;
271 						}
272 						// Reset External Command / Program Location if command switch detected
273 						if (lineBuffer[cmdLoc] == '/') {
274 							// Skip command switch
275 							while ((cmdLoc < lengthLine) &&
276 								(!isspacechar(lineBuffer[cmdLoc]))) {
277 								cmdLoc++;
278 							}
279 							// Skip next spaces
280 							while ((cmdLoc < lengthLine) &&
281 								(isspacechar(lineBuffer[cmdLoc]))) {
282 								cmdLoc++;
283 							}
284 						}
285 					}
286 					// Colorize External Command / Program
287 					if (!keywords2) {
288 						styler.ColourTo(startLine + offset - 1, SCE_BAT_COMMAND);
289 					} else if (keywords2.InList(wordBuffer)) {
290 						styler.ColourTo(startLine + offset - 1, SCE_BAT_COMMAND);
291 					} else {
292 						styler.ColourTo(startLine + offset - 1, SCE_BAT_DEFAULT);
293 					}
294 					// No need to Reset Offset
295 				// Check for Default Text
296 				} else {
297 					// Read up to %, Operator or Separator
298 					while ((wbo < wbl) &&
299 						(wordBuffer[wbo] != '%') &&
300 						(wordBuffer[wbo] != '!') &&
301 						(!IsBOperator(wordBuffer[wbo])) &&
302 						(!IsBSeparator(wordBuffer[wbo]))) {
303 						wbo++;
304 					}
305 					// Colorize Default Text
306 					styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_BAT_DEFAULT);
307 					// Reset Offset to re-process remainder of word
308 					offset -= (wbl - wbo);
309 				}
310 			}
311 		// Check for Argument  (%n), Environment Variable (%x...%) or Local Variable (%%a)
312 		} else if (wordBuffer[0] == '%') {
313 			// Colorize Default Text
314 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_BAT_DEFAULT);
315 			wbo++;
316 			// Search to end of word for second % (can be a long path)
317 			while ((wbo < wbl) &&
318 				(wordBuffer[wbo] != '%') &&
319 				(!IsBOperator(wordBuffer[wbo])) &&
320 				(!IsBSeparator(wordBuffer[wbo]))) {
321 				wbo++;
322 			}
323 			// Check for Argument (%n) or (%*)
324 			if (((Is0To9(wordBuffer[1])) || (wordBuffer[1] == '*')) &&
325 				(wordBuffer[wbo] != '%')) {
326 				// Check for External Command / Program
327 				if (cmdLoc == offset - wbl) {
328 					cmdLoc = offset - (wbl - 2);
329 				}
330 				// Colorize Argument
331 				styler.ColourTo(startLine + offset - 1 - (wbl - 2), SCE_BAT_IDENTIFIER);
332 				// Reset Offset to re-process remainder of word
333 				offset -= (wbl - 2);
334 			// Check for Expanded Argument (%~...) / Variable (%%~...)
335 			} else if (((wbl > 1) && (wordBuffer[1] == '~')) ||
336 				((wbl > 2) && (wordBuffer[1] == '%') && (wordBuffer[2] == '~'))) {
337 				// Check for External Command / Program
338 				if (cmdLoc == offset - wbl) {
339 					cmdLoc = offset - (wbl - wbo);
340 				}
341 				// Colorize Expanded Argument / Variable
342 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_BAT_IDENTIFIER);
343 				// Reset Offset to re-process remainder of word
344 				offset -= (wbl - wbo);
345 			// Check for Environment Variable (%x...%)
346 			} else if ((wordBuffer[1] != '%') &&
347 				(wordBuffer[wbo] == '%')) {
348 				wbo++;
349 				// Check for External Command / Program
350 				if (cmdLoc == offset - wbl) {
351 					cmdLoc = offset - (wbl - wbo);
352 				}
353 				// Colorize Environment Variable
354 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_BAT_IDENTIFIER);
355 				// Reset Offset to re-process remainder of word
356 				offset -= (wbl - wbo);
357 			// Check for Local Variable (%%a)
358 			} else if (
359 				(wbl > 2) &&
360 				(wordBuffer[1] == '%') &&
361 				(wordBuffer[2] != '%') &&
362 				(!IsBOperator(wordBuffer[2])) &&
363 				(!IsBSeparator(wordBuffer[2]))) {
364 				// Check for External Command / Program
365 				if (cmdLoc == offset - wbl) {
366 					cmdLoc = offset - (wbl - 3);
367 				}
368 				// Colorize Local Variable
369 				styler.ColourTo(startLine + offset - 1 - (wbl - 3), SCE_BAT_IDENTIFIER);
370 				// Reset Offset to re-process remainder of word
371 				offset -= (wbl - 3);
372 			}
373 		// Check for Environment Variable (!x...!)
374 		} else if (wordBuffer[0] == '!') {
375 			// Colorize Default Text
376 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_BAT_DEFAULT);
377 			wbo++;
378 			// Search to end of word for second ! (can be a long path)
379 			while ((wbo < wbl) &&
380 				(wordBuffer[wbo] != '!') &&
381 				(!IsBOperator(wordBuffer[wbo])) &&
382 				(!IsBSeparator(wordBuffer[wbo]))) {
383 				wbo++;
384 			}
385 			if (wordBuffer[wbo] == '!') {
386 				wbo++;
387 				// Check for External Command / Program
388 				if (cmdLoc == offset - wbl) {
389 					cmdLoc = offset - (wbl - wbo);
390 				}
391 				// Colorize Environment Variable
392 				styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_BAT_IDENTIFIER);
393 				// Reset Offset to re-process remainder of word
394 				offset -= (wbl - wbo);
395 			}
396 		// Check for Operator
397 		} else if (IsBOperator(wordBuffer[0])) {
398 			// Colorize Default Text
399 			styler.ColourTo(startLine + offset - 1 - wbl, SCE_BAT_DEFAULT);
400 			// Check for Comparison Operator
401 			if ((wordBuffer[0] == '=') && (wordBuffer[1] == '=')) {
402 				// Identify External Command / Program Location for IF
403 				cmdLoc = offset;
404 				// Skip next spaces
405 				while ((cmdLoc < lengthLine) &&
406 					(isspacechar(lineBuffer[cmdLoc]))) {
407 					cmdLoc++;
408 				}
409 				// Colorize Comparison Operator
410 				styler.ColourTo(startLine + offset - 1 - (wbl - 2), SCE_BAT_OPERATOR);
411 				// Reset Offset to re-process remainder of word
412 				offset -= (wbl - 2);
413 			// Check for Pipe Operator
414 			} else if (wordBuffer[0] == '|') {
415 				// Reset External Command / Program Location
416 				cmdLoc = offset - wbl + 1;
417 				// Skip next spaces
418 				while ((cmdLoc < lengthLine) &&
419 					(isspacechar(lineBuffer[cmdLoc]))) {
420 					cmdLoc++;
421 				}
422 				// Colorize Pipe Operator
423 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_BAT_OPERATOR);
424 				// Reset Offset to re-process remainder of word
425 				offset -= (wbl - 1);
426 			// Check for Other Operator
427 			} else {
428 				// Check for > Operator
429 				if (wordBuffer[0] == '>') {
430 					// Turn Keyword and External Command / Program checking back on
431 					continueProcessing = true;
432 				}
433 				// Colorize Other Operator
434 				styler.ColourTo(startLine + offset - 1 - (wbl - 1), SCE_BAT_OPERATOR);
435 				// Reset Offset to re-process remainder of word
436 				offset -= (wbl - 1);
437 			}
438 		// Check for Default Text
439 		} else {
440 			// Read up to %, Operator or Separator
441 			while ((wbo < wbl) &&
442 				(wordBuffer[wbo] != '%') &&
443 				(wordBuffer[wbo] != '!') &&
444 				(!IsBOperator(wordBuffer[wbo])) &&
445 				(!IsBSeparator(wordBuffer[wbo]))) {
446 				wbo++;
447 			}
448 			// Colorize Default Text
449 			styler.ColourTo(startLine + offset - 1 - (wbl - wbo), SCE_BAT_DEFAULT);
450 			// Reset Offset to re-process remainder of word
451 			offset -= (wbl - wbo);
452 		}
453 		// Skip next spaces - nothing happens if Offset was Reset
454 		while ((offset < lengthLine) && (isspacechar(lineBuffer[offset]))) {
455 			offset++;
456 		}
457 	}
458 	// Colorize Default Text for remainder of line - currently not lexed
459 	styler.ColourTo(endPos, SCE_BAT_DEFAULT);
460 }
461 
ColouriseBatchDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * keywordlists[],Accessor & styler)462 static void ColouriseBatchDoc(
463     Sci_PositionU startPos,
464     Sci_Position length,
465     int /*initStyle*/,
466     WordList *keywordlists[],
467     Accessor &styler) {
468 
469 	char lineBuffer[1024];
470 
471 	styler.StartAt(startPos);
472 	styler.StartSegment(startPos);
473 	Sci_PositionU linePos = 0;
474 	Sci_PositionU startLine = startPos;
475 	for (Sci_PositionU i = startPos; i < startPos + length; i++) {
476 		lineBuffer[linePos++] = styler[i];
477 		if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
478 			// End of line (or of line buffer) met, colourise it
479 			lineBuffer[linePos] = '\0';
480 			ColouriseBatchLine(lineBuffer, linePos, startLine, i, keywordlists, styler);
481 			linePos = 0;
482 			startLine = i + 1;
483 		}
484 	}
485 	if (linePos > 0) {	// Last line does not have ending characters
486 		lineBuffer[linePos] = '\0';
487 		ColouriseBatchLine(lineBuffer, linePos, startLine, startPos + length - 1,
488 		                   keywordlists, styler);
489 	}
490 }
491 
492 static const char *const batchWordListDesc[] = {
493 	"Internal Commands",
494 	"External Commands",
495 	0
496 };
497 
498 LexerModule lmBatch(SCLEX_BATCH, ColouriseBatchDoc, "batch", 0, batchWordListDesc);
499