1 // Copyright (c) Microsoft. All rights reserved.
2 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 
4 using System;
5 using System.IO;
6 using System.Collections;
7 using System.Diagnostics;
8 using System.Globalization;
9 using System.Text.RegularExpressions;
10 
11 namespace Microsoft.Build.Shared
12 {
13     /// <summary>
14     /// Functions for dealing with the specially formatted errors returned by
15     /// build tools.
16     /// </summary>
17     /// <remarks>
18     /// Various tools produce and consume CanonicalErrors in various formats.
19     ///
20     /// DEVENV Format When Clicking on Items in the Output Window
21     /// (taken from env\msenv\core\findutil.cpp ParseLocation function)
22     ///
23     ///      v:\dir\file.ext (loc) : msg
24     ///      \\server\share\dir\file.ext(loc):msg
25     ///      url
26     ///
27     ///      loc:
28     ///      (line)
29     ///      (line-line)
30     ///      (line,col)
31     ///      (line,col-col)
32     ///      (line,col,len)
33     ///      (line,col,line,col)
34     ///
35     /// DevDiv Build Process
36     /// (taken from tools\devdiv2.def)
37     ///
38     ///      To echo warnings and errors to the build console, the
39     ///      "description block" must be recognized by build. To do this,
40     ///      add a $(ECHO_COMPILING_COMMAND) or $(ECHO_PROCESSING_COMMAND)
41     ///      to the first line of the description block, e.g.
42     ///
43     ///          $(ECHO_COMPILING_CMD) Resgen_$&lt;
44     ///
45     ///      Errors must have the format:
46     ///
47     ///          &lt;text&gt; : error [num]: &lt;msg&gt;
48     ///
49     ///      Warnings must have the format:
50     ///
51     ///          &lt;text&gt; : warning [num]: &lt;msg&gt;
52     /// </remarks>
53     internal static class CanonicalError
54     {
55         // Defines the main pattern for matching messages.
56         private static readonly Lazy<Regex> s_originCategoryCodeTextExpression = new Lazy<Regex>(
57             () => new Regex
58                 (
59                 // Beginning of line and any amount of whitespace.
60                 @"^\s*"
61                     // Match a [optional project number prefix 'ddd>'], single letter + colon + remaining filename, or
62                     // string with no colon followed by a colon.
63                 + @"(((?<ORIGIN>(((\d+>)?[a-zA-Z]?:[^:]*)|([^:]*))):)"
64                     // Origin may also be empty. In this case there's no trailing colon.
65                 + "|())"
66                     // Match the empty string or a string without a colon that ends with a space
67                 + "(?<SUBCATEGORY>(()|([^:]*? )))"
68                     // Match 'error' or 'warning'.
69                 + @"(?<CATEGORY>(error|warning))"
70                     // Match anything starting with a space that's not a colon/space, followed by a colon.
71                     // Error code is optional in which case "error"/"warning" can be followed immediately by a colon.
72                 + @"( \s*(?<CODE>[^: ]*))?\s*:"
73                     // Whatever's left on this line, including colons.
74                 + "(?<TEXT>.*)$",
75                 RegexOptions.IgnoreCase | RegexOptions.Compiled
76                 ));
77 
78 		private static readonly Lazy<Regex> s_originCategoryCodeTextExpression2 = new Lazy<Regex>(
79             () => new Regex
80                 (
81                 @"^\s*(?<ORIGIN>(?<FILENAME>.*):(?<LOCATION>(?<LINE>[0-9]*):(?<COLUMN>[0-9]*))):(?<CATEGORY> error| warning):(?<TEXT>.*)",
82                 RegexOptions.IgnoreCase | RegexOptions.Compiled
83             ));
84 
85         // Matches and extracts filename and location from an 'origin' element.
86         private static readonly Lazy<Regex> s_filenameLocationFromOrigin = new Lazy<Regex>(
87             () => new Regex
88                 (
89                 "^" // Beginning of line
90                 + @"(\d+>)?" // Optional ddd> project number prefix
91                 + "(?<FILENAME>.*)" // Match anything.
92                 + @"\(" // Find a parenthesis.
93                 + @"(?<LOCATION>[\,,0-9,-]*)" // Match any combination of numbers and ',' and '-'
94                 + @"\)\s*" // Find the closing paren then any amount of spaces.
95                 + "$", // End-of-line
96                 RegexOptions.IgnoreCase | RegexOptions.Compiled
97                 ));
98 
99         // Matches location that is a simple number.
100         private static readonly Lazy<Regex> s_lineFromLocation = new Lazy<Regex>(
101             () => new Regex // Example: line
102                 (
103                 "^" // Beginning of line
104                 + "(?<LINE>[0-9]*)" // Match any number.
105                 + "$", // End-of-line
106                 RegexOptions.IgnoreCase | RegexOptions.Compiled
107                 ));
108 
109         // Matches location that is a range of lines.
110         private static readonly Lazy<Regex> s_lineLineFromLocation = new Lazy<Regex>(
111             () => new Regex // Example: line-line
112                 (
113                 "^" // Beginning of line
114                 + "(?<LINE>[0-9]*)" // Match any number.
115                 + "-" // Dash
116                 + "(?<ENDLINE>[0-9]*)" // Match any number.
117                 + "$", // End-of-line
118                 RegexOptions.IgnoreCase | RegexOptions.Compiled
119                 ));
120 
121         // Matches location that is a line and column
122         private static readonly Lazy<Regex> s_lineColFromLocation = new Lazy<Regex>(
123             () => new Regex // Example: line,col
124                 (
125                 "^" // Beginning of line
126                 + "(?<LINE>[0-9]*)" // Match any number.
127                 + "," // Comma
128                 + "(?<COLUMN>[0-9]*)" // Match any number.
129                 + "$", // End-of-line
130                 RegexOptions.IgnoreCase | RegexOptions.Compiled
131                 ));
132 
133         // Matches location that is a line and column-range
134         private static readonly Lazy<Regex> s_lineColColFromLocation = new Lazy<Regex>(
135             () => new Regex // Example: line,col-col
136                 (
137                 "^" // Beginning of line
138                 + "(?<LINE>[0-9]*)" // Match any number.
139                 + "," // Comma
140                 + "(?<COLUMN>[0-9]*)" // Match any number.
141                 + "-" // Dash
142                 + "(?<ENDCOLUMN>[0-9]*)" // Match any number.
143                 + "$", // End-of-line
144                 RegexOptions.IgnoreCase | RegexOptions.Compiled
145                 ));
146 
147         // Matches location that is line,col,line,col
148         private static readonly Lazy<Regex> s_lineColLineColFromLocation = new Lazy<Regex>(
149             () => new Regex // Example: line,col,line,col
150                 (
151                 "^" // Beginning of line
152                 + "(?<LINE>[0-9]*)" // Match any number.
153                 + "," // Comma
154                 + "(?<COLUMN>[0-9]*)" // Match any number.
155                 + "," // Dash
156                 + "(?<ENDLINE>[0-9]*)" // Match any number.
157                 + "," // Dash
158                 + "(?<ENDCOLUMN>[0-9]*)" // Match any number.
159                 + "$", // End-of-line
160                 RegexOptions.IgnoreCase | RegexOptions.Compiled
161                 ));
162 
163         /// <summary>
164         /// Represents the parts of a decomposed canonical message.
165         /// </summary>
166         internal sealed class Parts
167         {
168             /// <summary>
169             /// Defines the error category\severity level.
170             /// </summary>
171             internal enum Category
172             {
173                 Warning,
174                 Error
175             }
176 
177             /// <summary>
178             /// Value used for unspecified line and column numbers, which are 1-relative.
179             /// </summary>
180             internal const int numberNotSpecified = 0;
181 
182             /// <summary>
183             /// Initializes a new instance of the <see cref="Parts"/> class.
184             /// </summary>
Parts()185             internal Parts()
186             {
187             }
188 
189             /// <summary>
190             /// Name of the file or tool (not localized)
191             /// </summary>
192             internal string origin;
193 
194             /// <summary>
195             /// The line number.
196             /// </summary>
197             internal int line = Parts.numberNotSpecified;
198 
199             /// <summary>
200             /// The column number.
201             /// </summary>
202             internal int column = Parts.numberNotSpecified;
203 
204             /// <summary>
205             /// The ending line number.
206             /// </summary>
207             internal int endLine = Parts.numberNotSpecified;
208 
209             /// <summary>
210             /// The ending column number.
211             /// </summary>
212             internal int endColumn = Parts.numberNotSpecified;
213 
214             /// <summary>
215             /// The category/severity level
216             /// </summary>
217             internal Category category;
218 
219             /// <summary>
220             /// The sub category (localized)
221             /// </summary>
222             internal string subcategory;
223 
224             /// <summary>
225             /// The error code (not localized)
226             /// </summary>
227             internal string code;
228 
229             /// <summary>
230             /// The error message text (localized)
231             /// </summary>
232             internal string text;
233 
234 #if NEVER
ToString()235             internal new string ToString()
236             {
237                 return String.Format
238                 (
239                      "Origin='{0}'\n"
240                     +"Filename='{1}'\n"
241                     +"Line='{2}'\n"
242                     +"Column='{3}'\n"
243                     +"EndLine='{4}'\n"
244                     +"EndColumn='{5}'\n"
245                     +"Category='{6}'\n"
246                     +"Subcategory='{7}'\n"
247                     +"Text='{8}'\n"
248                     , origin, line, column, endLine, endColumn, category.ToString(), subcategory, code, text
249                 );
250 
251             }
252 #endif
253         }
254 
255         /// <summary>
256         /// A small custom int conversion method that treats invalid entries as missing (0). This is done to work around tools
257         /// that don't fully conform to the canonical message format - we still want to salvage what we can from the message.
258         /// </summary>
259         /// <param name="value"></param>
260         /// <returns>'value' converted to int or 0 if it can't be parsed or is negative</returns>
ConvertToIntWithDefault(string value)261         private static int ConvertToIntWithDefault(string value)
262         {
263             int result;
264             bool success = int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out result);
265 
266             if (!success || (result < 0))
267             {
268                 result = CanonicalError.Parts.numberNotSpecified;
269             }
270 
271             return result;
272         }
273 
274         /// <summary>
275         /// Decompose an error or warning message into constituent parts. If the message isn't in the canonical form, return null.
276         /// </summary>
277         /// <remarks>This method is thread-safe, because the Regex class is thread-safe (per MSDN).</remarks>
278         /// <param name="message"></param>
279         /// <returns>Decomposed canonical message, or null.</returns>
Parse(string message)280         internal static Parts Parse(string message)
281         {
282             // An unusually long string causes pathologically slow Regex back-tracking.
283             // To avoid that, only scan the first 400 characters. That's enough for
284             // the longest possible prefix: MAX_PATH, plus a huge subcategory string, and an error location.
285             // After the regex is done, we can append the overflow.
286             string messageOverflow = String.Empty;
287             if (message.Length > 400)
288             {
289                 messageOverflow = message.Substring(400);
290                 message = message.Substring(0, 400);
291             }
292 
293             // If a tool has a large amount of output that isn't an error or warning (eg., "dir /s %hugetree%")
294             // the regex below is slow. It's faster to pre-scan for "warning" and "error"
295             // and bail out if neither are present.
296             if (message.IndexOf("warning", StringComparison.OrdinalIgnoreCase) == -1 &&
297                 message.IndexOf("error", StringComparison.OrdinalIgnoreCase) == -1)
298             {
299                 return null;
300             }
301 
302             Parts parsedMessage = new Parts();
303 
304             // First, split the message into three parts--Origin, Category, Code, Text.
305             // Example,
306             //      Main.cs(17,20):Command line warning CS0168: The variable 'foo' is declared but never used
307             //      -------------- ------------ ------- ------  ----------------------------------------------
308             //      Origin         SubCategory  Cat.    Code    Text
309             //
310             // To accommodate absolute filenames in Origin, tolerate a colon in the second position
311             // as long as its preceded by a letter.
312             //
313             // Localization Note:
314             //  Even in foreign-language versions of tools, the category field needs to be in English.
315             //  Also, if origin is a tool name, then that needs to be in English.
316             //
317             //  Here's an example from the Japanese version of CL.EXE:
318             //   cl : ???? ??? warning D4024 : ?????????? 'AssemblyInfo.cs' ?????????????????? ???????????
319             //
320             //  Here's an example from the Japanese version of LINK.EXE:
321             //   AssemblyInfo.cpp : fatal error LNK1106: ???????????? ??????????????: 0x6580 ??????????
322             //
323             Match match = s_originCategoryCodeTextExpression.Value.Match(message);
324             string category;
325             if (!match.Success)
326             {
327                 // try again with the Clang/GCC matcher
328                 // Example,
329                 //       err.cpp:6:3: error: use of undeclared identifier 'force_an_error'
330                 //       -----------  -----  ---------------------------------------------
331                 //       Origin       Cat.   Text
332                 match = s_originCategoryCodeTextExpression2.Value.Match(message);
333                 if (!match.Success)
334                 {
335                     return null;
336                 }
337 
338                 category = match.Groups["CATEGORY"].Value.Trim();
339                 if (0 == String.Compare(category, "error", StringComparison.OrdinalIgnoreCase))
340                 {
341                     parsedMessage.category = Parts.Category.Error;
342                 }
343                 else if (0 == String.Compare(category, "warning", StringComparison.OrdinalIgnoreCase))
344                 {
345                     parsedMessage.category = Parts.Category.Warning;
346                 }
347                 else
348                 {
349                     // Not an error\warning message.
350                     return null;
351                 }
352                 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim());
353                 parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim());
354                 parsedMessage.text = (match.Groups["TEXT"].Value + messageOverflow).Trim();
355                 parsedMessage.origin = match.Groups["FILENAME"].Value.Trim();
356 
357                 string[] explodedText = parsedMessage.text.Split(new char[] {'\''}, StringSplitOptions.RemoveEmptyEntries);
358                 if (explodedText.Length > 0)
359                 {
360                     parsedMessage.code = "G" + explodedText[0].GetHashCode().ToString("X8");
361                 }
362                 else
363                 {
364                     parsedMessage.code = "G00000000";
365                 }
366 
367                 return parsedMessage;
368             }
369 
370             string origin = match.Groups["ORIGIN"].Value.Trim();
371             category = match.Groups["CATEGORY"].Value.Trim();
372             parsedMessage.code = match.Groups["CODE"].Value.Trim();
373             parsedMessage.text = (match.Groups["TEXT"].Value + messageOverflow).Trim();
374             parsedMessage.subcategory = match.Groups["SUBCATEGORY"].Value.Trim();
375 
376             // Next, see if category is something that is recognized.
377             if (0 == String.Compare(category, "error", StringComparison.OrdinalIgnoreCase))
378             {
379                 parsedMessage.category = Parts.Category.Error;
380             }
381             else if (0 == String.Compare(category, "warning", StringComparison.OrdinalIgnoreCase))
382             {
383                 parsedMessage.category = Parts.Category.Warning;
384             }
385             else
386             {
387                 // Not an error\warning message.
388                 return null;
389             }
390 
391             // Origin is not a simple file, but it still could be of the form,
392             //  foo.cpp(location)
393             match = s_filenameLocationFromOrigin.Value.Match(origin);
394 
395             if (match.Success)
396             {
397                 // The origin is in the form,
398                 //  foo.cpp(location)
399                 // Assume the filename exists, but don't verify it. What else could it be?
400                 string location = match.Groups["LOCATION"].Value.Trim();
401                 parsedMessage.origin = match.Groups["FILENAME"].Value.Trim();
402 
403                 // Now, take apart the location. It can be one of these:
404                 //      loc:
405                 //      (line)
406                 //      (line-line)
407                 //      (line,col)
408                 //      (line,col-col)
409                 //      (line,col,len)
410                 //      (line,col,line,col)
411                 if (location.Length > 0)
412                 {
413                     match = s_lineFromLocation.Value.Match(location);
414                     if (match.Success)
415                     {
416                         parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim());
417                     }
418                     else
419                     {
420                         match = s_lineLineFromLocation.Value.Match(location);
421                         if (match.Success)
422                         {
423                             parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim());
424                             parsedMessage.endLine = ConvertToIntWithDefault(match.Groups["ENDLINE"].Value.Trim());
425                         }
426                         else
427                         {
428                             match = s_lineColFromLocation.Value.Match(location);
429                             if (match.Success)
430                             {
431                                 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim());
432                                 parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim());
433                             }
434                             else
435                             {
436                                 match = s_lineColColFromLocation.Value.Match(location);
437                                 if (match.Success)
438                                 {
439                                     parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim());
440                                     parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim());
441                                     parsedMessage.endColumn = ConvertToIntWithDefault(match.Groups["ENDCOLUMN"].Value.Trim());
442                                 }
443                                 else
444                                 {
445                                     match = s_lineColLineColFromLocation.Value.Match(location);
446                                     if (match.Success)
447                                     {
448                                         parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim());
449                                         parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim());
450                                         parsedMessage.endLine = ConvertToIntWithDefault(match.Groups["ENDLINE"].Value.Trim());
451                                         parsedMessage.endColumn = ConvertToIntWithDefault(match.Groups["ENDCOLUMN"].Value.Trim());
452                                     }
453                                 }
454                             }
455                         }
456                     }
457                 }
458             }
459             else
460             {
461                 // The origin does not fit the filename(location) pattern.
462                 parsedMessage.origin = origin;
463             }
464 
465             return parsedMessage;
466         }
467     }
468 }
469