1 // Scintilla source code edit control
2 /** @file LexErrorList.cxx
3  ** Lexer for error lists. Used for the output pane in SciTE.
4  **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
14 
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
18 
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
25 
26 #ifdef SCI_NAMESPACE
27 using namespace Scintilla;
28 #endif
29 
strstart(const char * haystack,const char * needle)30 static bool strstart(const char *haystack, const char *needle) {
31 	return strncmp(haystack, needle, strlen(needle)) == 0;
32 }
33 
Is0To9(char ch)34 static bool Is0To9(char ch) {
35 	return (ch >= '0') && (ch <= '9');
36 }
37 
Is1To9(char ch)38 static bool Is1To9(char ch) {
39 	return (ch >= '1') && (ch <= '9');
40 }
41 
IsAlphabetic(int ch)42 static bool IsAlphabetic(int ch) {
43 	return IsASCII(ch) && isalpha(ch);
44 }
45 
AtEOL(Accessor & styler,Sci_PositionU i)46 static inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
47 	return (styler[i] == '\n') ||
48 	       ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
49 }
50 
RecogniseErrorListLine(const char * lineBuffer,Sci_PositionU lengthLine,Sci_Position & startValue)51 static int RecogniseErrorListLine(const char *lineBuffer, Sci_PositionU lengthLine, Sci_Position &startValue) {
52 	if (lineBuffer[0] == '>') {
53 		// Command or return status
54 		return SCE_ERR_CMD;
55 	} else if (lineBuffer[0] == '<') {
56 		// Diff removal.
57 		return SCE_ERR_DIFF_DELETION;
58 	} else if (lineBuffer[0] == '!') {
59 		return SCE_ERR_DIFF_CHANGED;
60 	} else if (lineBuffer[0] == '+') {
61 		if (strstart(lineBuffer, "+++ ")) {
62 			return SCE_ERR_DIFF_MESSAGE;
63 		} else {
64 			return SCE_ERR_DIFF_ADDITION;
65 		}
66 	} else if (lineBuffer[0] == '-') {
67 		if (strstart(lineBuffer, "--- ")) {
68 			return SCE_ERR_DIFF_MESSAGE;
69 		} else {
70 			return SCE_ERR_DIFF_DELETION;
71 		}
72 	} else if (strstart(lineBuffer, "cf90-")) {
73 		// Absoft Pro Fortran 90/95 v8.2 error and/or warning message
74 		return SCE_ERR_ABSF;
75 	} else if (strstart(lineBuffer, "fortcom:")) {
76 		// Intel Fortran Compiler v8.0 error/warning message
77 		return SCE_ERR_IFORT;
78 	} else if (strstr(lineBuffer, "File \"") && strstr(lineBuffer, ", line ")) {
79 		return SCE_ERR_PYTHON;
80 	} else if (strstr(lineBuffer, " in ") && strstr(lineBuffer, " on line ")) {
81 		return SCE_ERR_PHP;
82 	} else if ((strstart(lineBuffer, "Error ") ||
83 	            strstart(lineBuffer, "Warning ")) &&
84 	           strstr(lineBuffer, " at (") &&
85 	           strstr(lineBuffer, ") : ") &&
86 	           (strstr(lineBuffer, " at (") < strstr(lineBuffer, ") : "))) {
87 		// Intel Fortran Compiler error/warning message
88 		return SCE_ERR_IFC;
89 	} else if (strstart(lineBuffer, "Error ")) {
90 		// Borland error message
91 		return SCE_ERR_BORLAND;
92 	} else if (strstart(lineBuffer, "Warning ")) {
93 		// Borland warning message
94 		return SCE_ERR_BORLAND;
95 	} else if (strstr(lineBuffer, "at line ") &&
96 	        (strstr(lineBuffer, "at line ") < (lineBuffer + lengthLine)) &&
97 	           strstr(lineBuffer, "file ") &&
98 	           (strstr(lineBuffer, "file ") < (lineBuffer + lengthLine))) {
99 		// Lua 4 error message
100 		return SCE_ERR_LUA;
101 	} else if (strstr(lineBuffer, " at ") &&
102 	        (strstr(lineBuffer, " at ") < (lineBuffer + lengthLine)) &&
103 	           strstr(lineBuffer, " line ") &&
104 	           (strstr(lineBuffer, " line ") < (lineBuffer + lengthLine)) &&
105 	        (strstr(lineBuffer, " at ") + 4 < (strstr(lineBuffer, " line ")))) {
106 		// perl error message:
107 		// <message> at <file> line <line>
108 		return SCE_ERR_PERL;
109 	} else if ((memcmp(lineBuffer, "   at ", 6) == 0) &&
110 	           strstr(lineBuffer, ":line ")) {
111 		// A .NET traceback
112 		return SCE_ERR_NET;
113 	} else if (strstart(lineBuffer, "Line ") &&
114 	           strstr(lineBuffer, ", file ")) {
115 		// Essential Lahey Fortran error message
116 		return SCE_ERR_ELF;
117 	} else if (strstart(lineBuffer, "line ") &&
118 	           strstr(lineBuffer, " column ")) {
119 		// HTML tidy style: line 42 column 1
120 		return SCE_ERR_TIDY;
121 	} else if (strstart(lineBuffer, "\tat ") &&
122 	           strstr(lineBuffer, "(") &&
123 	           strstr(lineBuffer, ".java:")) {
124 		// Java stack back trace
125 		return SCE_ERR_JAVA_STACK;
126 	} else if (strstart(lineBuffer, "In file included from ") ||
127 	           strstart(lineBuffer, "                 from ")) {
128 		// GCC showing include path to following error
129 		return SCE_ERR_GCC_INCLUDED_FROM;
130 	} else if (strstr(lineBuffer, "warning LNK")) {
131 		// Microsoft linker warning:
132 		// {<object> : } warning LNK9999
133 		return SCE_ERR_MS;
134 	} else {
135 		// Look for one of the following formats:
136 		// GCC: <filename>:<line>:<message>
137 		// Microsoft: <filename>(<line>) :<message>
138 		// Common: <filename>(<line>): warning|error|note|remark|catastrophic|fatal
139 		// Common: <filename>(<line>) warning|error|note|remark|catastrophic|fatal
140 		// Microsoft: <filename>(<line>,<column>)<message>
141 		// CTags: <identifier>\t<filename>\t<message>
142 		// Lua 5 traceback: \t<filename>:<line>:<message>
143 		// Lua 5.1: <exe>: <filename>:<line>:<message>
144 		bool initialTab = (lineBuffer[0] == '\t');
145 		bool initialColonPart = false;
146 		bool canBeCtags = !initialTab;	// For ctags must have an identifier with no spaces then a tab
147 		enum { stInitial,
148 			stGccStart, stGccDigit, stGccColumn, stGcc,
149 			stMsStart, stMsDigit, stMsBracket, stMsVc, stMsDigitComma, stMsDotNet,
150 			stCtagsStart, stCtagsFile, stCtagsStartString, stCtagsStringDollar, stCtags,
151 			stUnrecognized
152 		} state = stInitial;
153 		for (Sci_PositionU i = 0; i < lengthLine; i++) {
154 			char ch = lineBuffer[i];
155 			char chNext = ' ';
156 			if ((i + 1) < lengthLine)
157 				chNext = lineBuffer[i + 1];
158 			if (state == stInitial) {
159 				if (ch == ':') {
160 					// May be GCC, or might be Lua 5 (Lua traceback same but with tab prefix)
161 					if ((chNext != '\\') && (chNext != '/') && (chNext != ' ')) {
162 						// This check is not completely accurate as may be on
163 						// GTK+ with a file name that includes ':'.
164 						state = stGccStart;
165 					} else if (chNext == ' ') { // indicates a Lua 5.1 error message
166 						initialColonPart = true;
167 					}
168 				} else if ((ch == '(') && Is1To9(chNext) && (!initialTab)) {
169 					// May be Microsoft
170 					// Check against '0' often removes phone numbers
171 					state = stMsStart;
172 				} else if ((ch == '\t') && canBeCtags) {
173 					// May be CTags
174 					state = stCtagsStart;
175 				} else if (ch == ' ') {
176 					canBeCtags = false;
177 				}
178 			} else if (state == stGccStart) {	// <filename>:
179 				state = Is0To9(ch) ? stGccDigit : stUnrecognized;
180 			} else if (state == stGccDigit) {	// <filename>:<line>
181 				if (ch == ':') {
182 					state = stGccColumn;	// :9.*: is GCC
183 					startValue = i + 1;
184 				} else if (!Is0To9(ch)) {
185 					state = stUnrecognized;
186 				}
187 			} else if (state == stGccColumn) {	// <filename>:<line>:<column>
188 				if (!Is0To9(ch)) {
189 					state = stGcc;
190 					if (ch == ':')
191 						startValue = i + 1;
192 					break;
193 				}
194 			} else if (state == stMsStart) {	// <filename>(
195 				state = Is0To9(ch) ? stMsDigit : stUnrecognized;
196 			} else if (state == stMsDigit) {	// <filename>(<line>
197 				if (ch == ',') {
198 					state = stMsDigitComma;
199 				} else if (ch == ')') {
200 					state = stMsBracket;
201 				} else if ((ch != ' ') && !Is0To9(ch)) {
202 					state = stUnrecognized;
203 				}
204 			} else if (state == stMsBracket) {	// <filename>(<line>)
205 				if ((ch == ' ') && (chNext == ':')) {
206 					state = stMsVc;
207 				} else if ((ch == ':' && chNext == ' ') || (ch == ' ')) {
208 					// Possibly Delphi.. don't test against chNext as it's one of the strings below.
209 					char word[512];
210 					Sci_PositionU j, chPos;
211 					unsigned numstep;
212 					chPos = 0;
213 					if (ch == ' ')
214 						numstep = 1; // ch was ' ', handle as if it's a delphi errorline, only add 1 to i.
215 					else
216 						numstep = 2; // otherwise add 2.
217 					for (j = i + numstep; j < lengthLine && IsAlphabetic(lineBuffer[j]) && chPos < sizeof(word) - 1; j++)
218 						word[chPos++] = lineBuffer[j];
219 					word[chPos] = 0;
220 					if (!CompareCaseInsensitive(word, "error") || !CompareCaseInsensitive(word, "warning") ||
221 						!CompareCaseInsensitive(word, "fatal") || !CompareCaseInsensitive(word, "catastrophic") ||
222 						!CompareCaseInsensitive(word, "note") || !CompareCaseInsensitive(word, "remark")) {
223 						state = stMsVc;
224 					} else {
225 						state = stUnrecognized;
226 					}
227 				} else {
228 					state = stUnrecognized;
229 				}
230 			} else if (state == stMsDigitComma) {	// <filename>(<line>,
231 				if (ch == ')') {
232 					state = stMsDotNet;
233 					break;
234 				} else if ((ch != ' ') && !Is0To9(ch)) {
235 					state = stUnrecognized;
236 				}
237 			} else if (state == stCtagsStart) {
238 				if (ch == '\t') {
239 					state = stCtagsFile;
240 				}
241 			} else if (state == stCtagsFile) {
242 				if ((lineBuffer[i - 1] == '\t') &&
243 				        ((ch == '/' && chNext == '^') || Is0To9(ch))) {
244 					state = stCtags;
245 					break;
246 				} else if ((ch == '/') && (chNext == '^')) {
247 					state = stCtagsStartString;
248 				}
249 			} else if ((state == stCtagsStartString) && ((lineBuffer[i] == '$') && (lineBuffer[i + 1] == '/'))) {
250 				state = stCtagsStringDollar;
251 				break;
252 			}
253 		}
254 		if (state == stGcc) {
255 			return initialColonPart ? SCE_ERR_LUA : SCE_ERR_GCC;
256 		} else if ((state == stMsVc) || (state == stMsDotNet)) {
257 			return SCE_ERR_MS;
258 		} else if ((state == stCtagsStringDollar) || (state == stCtags)) {
259 			return SCE_ERR_CTAG;
260 		} else if (initialColonPart && strstr(lineBuffer, ": warning C")) {
261 			// Microsoft warning without line number
262 			// <filename>: warning C9999
263 			return SCE_ERR_MS;
264 		} else {
265 			return SCE_ERR_DEFAULT;
266 		}
267 	}
268 }
269 
270 #define CSI "\033["
271 
272 namespace {
273 
SequenceEnd(int ch)274 bool SequenceEnd(int ch) {
275 	return (ch == 0) || ((ch >= '@') && (ch <= '~'));
276 }
277 
StyleFromSequence(const char * seq)278 int StyleFromSequence(const char *seq) {
279 	int bold = 0;
280 	int colour = 0;
281 	while (!SequenceEnd(*seq)) {
282 		if (Is0To9(*seq)) {
283 			int base = *seq - '0';
284 			if (Is0To9(seq[1])) {
285 				base = base * 10;
286 				base += seq[1] - '0';
287 				seq++;
288 			}
289 			if (base == 0) {
290 				colour = 0;
291 				bold = 0;
292 			}
293 			else if (base == 1) {
294 				bold = 1;
295 			}
296 			else if (base >= 30 && base <= 37) {
297 				colour = base - 30;
298 			}
299 		}
300 		seq++;
301 	}
302 	return SCE_ERR_ES_BLACK + bold * 8 + colour;
303 }
304 
305 }
306 
ColouriseErrorListLine(char * lineBuffer,Sci_PositionU lengthLine,Sci_PositionU endPos,Accessor & styler,bool valueSeparate,bool escapeSequences)307 static void ColouriseErrorListLine(
308     char *lineBuffer,
309     Sci_PositionU lengthLine,
310     Sci_PositionU endPos,
311     Accessor &styler,
312 	bool valueSeparate,
313 	bool escapeSequences) {
314 	Sci_Position startValue = -1;
315 	int style = RecogniseErrorListLine(lineBuffer, lengthLine, startValue);
316 	if (escapeSequences && strstr(lineBuffer, CSI)) {
317 		const int startPos = endPos - lengthLine;
318 		const char *linePortion = lineBuffer;
319 		int startPortion = startPos;
320 		int portionStyle = style;
321 		while (const char *startSeq = strstr(linePortion, CSI)) {
322 			if (startSeq > linePortion) {
323 				styler.ColourTo(startPortion + static_cast<int>(startSeq - linePortion), portionStyle);
324 			}
325 			const char *endSeq = startSeq + 2;
326 			while (!SequenceEnd(*endSeq))
327 				endSeq++;
328 			const int endSeqPosition = startPortion + static_cast<int>(endSeq - linePortion) + 1;
329 			switch (*endSeq) {
330 			case 0:
331 				styler.ColourTo(endPos, SCE_ERR_ESCSEQ_UNKNOWN);
332 				return;
333 			case 'm':	// Colour command
334 				styler.ColourTo(endSeqPosition, SCE_ERR_ESCSEQ);
335 				portionStyle = StyleFromSequence(startSeq+2);
336 				break;
337 			case 'K':	// Erase to end of line -> ignore
338 				styler.ColourTo(endSeqPosition, SCE_ERR_ESCSEQ);
339 				break;
340 			default:
341 				styler.ColourTo(endSeqPosition, SCE_ERR_ESCSEQ_UNKNOWN);
342 				portionStyle = style;
343 			}
344 			startPortion = endSeqPosition;
345 			linePortion = endSeq + 1;
346 		}
347 		styler.ColourTo(endPos, portionStyle);
348 	} else {
349 		if (valueSeparate && (startValue >= 0)) {
350 			styler.ColourTo(endPos - (lengthLine - startValue), style);
351 			styler.ColourTo(endPos, SCE_ERR_VALUE);
352 		} else {
353 			styler.ColourTo(endPos, style);
354 		}
355 	}
356 }
357 
ColouriseErrorListDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)358 static void ColouriseErrorListDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler) {
359 	char lineBuffer[10000];
360 	styler.StartAt(startPos);
361 	styler.StartSegment(startPos);
362 	Sci_PositionU linePos = 0;
363 
364 	// property lexer.errorlist.value.separate
365 	//	For lines in the output pane that are matches from Find in Files or GCC-style
366 	//	diagnostics, style the path and line number separately from the rest of the
367 	//	line with style 21 used for the rest of the line.
368 	//	This allows matched text to be more easily distinguished from its location.
369 	bool valueSeparate = styler.GetPropertyInt("lexer.errorlist.value.separate", 0) != 0;
370 
371 	// property lexer.errorlist.escape.sequences
372 	//	Set to 1 to interpret escape sequences.
373 	const bool escapeSequences = styler.GetPropertyInt("lexer.errorlist.escape.sequences") != 0;
374 
375 	for (Sci_PositionU i = startPos; i < startPos + length; i++) {
376 		lineBuffer[linePos++] = styler[i];
377 		if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
378 			// End of line (or of line buffer) met, colourise it
379 			lineBuffer[linePos] = '\0';
380 			ColouriseErrorListLine(lineBuffer, linePos, i, styler, valueSeparate, escapeSequences);
381 			linePos = 0;
382 		}
383 	}
384 	if (linePos > 0) {	// Last line does not have ending characters
385 		lineBuffer[linePos] = '\0';
386 		ColouriseErrorListLine(lineBuffer, linePos, startPos + length - 1, styler, valueSeparate, escapeSequences);
387 	}
388 }
389 
390 static const char *const emptyWordListDesc[] = {
391 	0
392 };
393 
394 LexerModule lmErrorList(SCLEX_ERRORLIST, ColouriseErrorListDoc, "errorlist", 0, emptyWordListDesc);
395