1 // Copyright (c) Microsoft. All rights reserved. 2 // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 4 using System; 5 using System.IO; 6 using System.Collections; 7 using System.Diagnostics; 8 using System.Globalization; 9 using System.Text.RegularExpressions; 10 11 namespace Microsoft.Build.Shared 12 { 13 /// <summary> 14 /// Functions for dealing with the specially formatted errors returned by 15 /// build tools. 16 /// </summary> 17 /// <remarks> 18 /// Various tools produce and consume CanonicalErrors in various formats. 19 /// 20 /// DEVENV Format When Clicking on Items in the Output Window 21 /// (taken from env\msenv\core\findutil.cpp ParseLocation function) 22 /// 23 /// v:\dir\file.ext (loc) : msg 24 /// \\server\share\dir\file.ext(loc):msg 25 /// url 26 /// 27 /// loc: 28 /// (line) 29 /// (line-line) 30 /// (line,col) 31 /// (line,col-col) 32 /// (line,col,len) 33 /// (line,col,line,col) 34 /// 35 /// DevDiv Build Process 36 /// (taken from tools\devdiv2.def) 37 /// 38 /// To echo warnings and errors to the build console, the 39 /// "description block" must be recognized by build. To do this, 40 /// add a $(ECHO_COMPILING_COMMAND) or $(ECHO_PROCESSING_COMMAND) 41 /// to the first line of the description block, e.g. 42 /// 43 /// $(ECHO_COMPILING_CMD) Resgen_$< 44 /// 45 /// Errors must have the format: 46 /// 47 /// <text> : error [num]: <msg> 48 /// 49 /// Warnings must have the format: 50 /// 51 /// <text> : warning [num]: <msg> 52 /// </remarks> 53 internal static class CanonicalError 54 { 55 // Defines the main pattern for matching messages. 56 private static readonly Lazy<Regex> s_originCategoryCodeTextExpression = new Lazy<Regex>( 57 () => new Regex 58 ( 59 // Beginning of line and any amount of whitespace. 60 @"^\s*" 61 // Match a [optional project number prefix 'ddd>'], single letter + colon + remaining filename, or 62 // string with no colon followed by a colon. 63 + @"(((?<ORIGIN>(((\d+>)?[a-zA-Z]?:[^:]*)|([^:]*))):)" 64 // Origin may also be empty. In this case there's no trailing colon. 65 + "|())" 66 // Match the empty string or a string without a colon that ends with a space 67 + "(?<SUBCATEGORY>(()|([^:]*? )))" 68 // Match 'error' or 'warning'. 69 + @"(?<CATEGORY>(error|warning))" 70 // Match anything starting with a space that's not a colon/space, followed by a colon. 71 // Error code is optional in which case "error"/"warning" can be followed immediately by a colon. 72 + @"( \s*(?<CODE>[^: ]*))?\s*:" 73 // Whatever's left on this line, including colons. 74 + "(?<TEXT>.*)$", 75 RegexOptions.IgnoreCase | RegexOptions.Compiled 76 )); 77 78 private static readonly Lazy<Regex> s_originCategoryCodeTextExpression2 = new Lazy<Regex>( 79 () => new Regex 80 ( 81 @"^\s*(?<ORIGIN>(?<FILENAME>.*):(?<LOCATION>(?<LINE>[0-9]*):(?<COLUMN>[0-9]*))):(?<CATEGORY> error| warning):(?<TEXT>.*)", 82 RegexOptions.IgnoreCase | RegexOptions.Compiled 83 )); 84 85 // Matches and extracts filename and location from an 'origin' element. 86 private static readonly Lazy<Regex> s_filenameLocationFromOrigin = new Lazy<Regex>( 87 () => new Regex 88 ( 89 "^" // Beginning of line 90 + @"(\d+>)?" // Optional ddd> project number prefix 91 + "(?<FILENAME>.*)" // Match anything. 92 + @"\(" // Find a parenthesis. 93 + @"(?<LOCATION>[\,,0-9,-]*)" // Match any combination of numbers and ',' and '-' 94 + @"\)\s*" // Find the closing paren then any amount of spaces. 95 + "$", // End-of-line 96 RegexOptions.IgnoreCase | RegexOptions.Compiled 97 )); 98 99 // Matches location that is a simple number. 100 private static readonly Lazy<Regex> s_lineFromLocation = new Lazy<Regex>( 101 () => new Regex // Example: line 102 ( 103 "^" // Beginning of line 104 + "(?<LINE>[0-9]*)" // Match any number. 105 + "$", // End-of-line 106 RegexOptions.IgnoreCase | RegexOptions.Compiled 107 )); 108 109 // Matches location that is a range of lines. 110 private static readonly Lazy<Regex> s_lineLineFromLocation = new Lazy<Regex>( 111 () => new Regex // Example: line-line 112 ( 113 "^" // Beginning of line 114 + "(?<LINE>[0-9]*)" // Match any number. 115 + "-" // Dash 116 + "(?<ENDLINE>[0-9]*)" // Match any number. 117 + "$", // End-of-line 118 RegexOptions.IgnoreCase | RegexOptions.Compiled 119 )); 120 121 // Matches location that is a line and column 122 private static readonly Lazy<Regex> s_lineColFromLocation = new Lazy<Regex>( 123 () => new Regex // Example: line,col 124 ( 125 "^" // Beginning of line 126 + "(?<LINE>[0-9]*)" // Match any number. 127 + "," // Comma 128 + "(?<COLUMN>[0-9]*)" // Match any number. 129 + "$", // End-of-line 130 RegexOptions.IgnoreCase | RegexOptions.Compiled 131 )); 132 133 // Matches location that is a line and column-range 134 private static readonly Lazy<Regex> s_lineColColFromLocation = new Lazy<Regex>( 135 () => new Regex // Example: line,col-col 136 ( 137 "^" // Beginning of line 138 + "(?<LINE>[0-9]*)" // Match any number. 139 + "," // Comma 140 + "(?<COLUMN>[0-9]*)" // Match any number. 141 + "-" // Dash 142 + "(?<ENDCOLUMN>[0-9]*)" // Match any number. 143 + "$", // End-of-line 144 RegexOptions.IgnoreCase | RegexOptions.Compiled 145 )); 146 147 // Matches location that is line,col,line,col 148 private static readonly Lazy<Regex> s_lineColLineColFromLocation = new Lazy<Regex>( 149 () => new Regex // Example: line,col,line,col 150 ( 151 "^" // Beginning of line 152 + "(?<LINE>[0-9]*)" // Match any number. 153 + "," // Comma 154 + "(?<COLUMN>[0-9]*)" // Match any number. 155 + "," // Dash 156 + "(?<ENDLINE>[0-9]*)" // Match any number. 157 + "," // Dash 158 + "(?<ENDCOLUMN>[0-9]*)" // Match any number. 159 + "$", // End-of-line 160 RegexOptions.IgnoreCase | RegexOptions.Compiled 161 )); 162 163 /// <summary> 164 /// Represents the parts of a decomposed canonical message. 165 /// </summary> 166 internal sealed class Parts 167 { 168 /// <summary> 169 /// Defines the error category\severity level. 170 /// </summary> 171 internal enum Category 172 { 173 Warning, 174 Error 175 } 176 177 /// <summary> 178 /// Value used for unspecified line and column numbers, which are 1-relative. 179 /// </summary> 180 internal const int numberNotSpecified = 0; 181 182 /// <summary> 183 /// Initializes a new instance of the <see cref="Parts"/> class. 184 /// </summary> Parts()185 internal Parts() 186 { 187 } 188 189 /// <summary> 190 /// Name of the file or tool (not localized) 191 /// </summary> 192 internal string origin; 193 194 /// <summary> 195 /// The line number. 196 /// </summary> 197 internal int line = Parts.numberNotSpecified; 198 199 /// <summary> 200 /// The column number. 201 /// </summary> 202 internal int column = Parts.numberNotSpecified; 203 204 /// <summary> 205 /// The ending line number. 206 /// </summary> 207 internal int endLine = Parts.numberNotSpecified; 208 209 /// <summary> 210 /// The ending column number. 211 /// </summary> 212 internal int endColumn = Parts.numberNotSpecified; 213 214 /// <summary> 215 /// The category/severity level 216 /// </summary> 217 internal Category category; 218 219 /// <summary> 220 /// The sub category (localized) 221 /// </summary> 222 internal string subcategory; 223 224 /// <summary> 225 /// The error code (not localized) 226 /// </summary> 227 internal string code; 228 229 /// <summary> 230 /// The error message text (localized) 231 /// </summary> 232 internal string text; 233 234 #if NEVER ToString()235 internal new string ToString() 236 { 237 return String.Format 238 ( 239 "Origin='{0}'\n" 240 +"Filename='{1}'\n" 241 +"Line='{2}'\n" 242 +"Column='{3}'\n" 243 +"EndLine='{4}'\n" 244 +"EndColumn='{5}'\n" 245 +"Category='{6}'\n" 246 +"Subcategory='{7}'\n" 247 +"Text='{8}'\n" 248 , origin, line, column, endLine, endColumn, category.ToString(), subcategory, code, text 249 ); 250 251 } 252 #endif 253 } 254 255 /// <summary> 256 /// A small custom int conversion method that treats invalid entries as missing (0). This is done to work around tools 257 /// that don't fully conform to the canonical message format - we still want to salvage what we can from the message. 258 /// </summary> 259 /// <param name="value"></param> 260 /// <returns>'value' converted to int or 0 if it can't be parsed or is negative</returns> ConvertToIntWithDefault(string value)261 private static int ConvertToIntWithDefault(string value) 262 { 263 int result; 264 bool success = int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out result); 265 266 if (!success || (result < 0)) 267 { 268 result = CanonicalError.Parts.numberNotSpecified; 269 } 270 271 return result; 272 } 273 274 /// <summary> 275 /// Decompose an error or warning message into constituent parts. If the message isn't in the canonical form, return null. 276 /// </summary> 277 /// <remarks>This method is thread-safe, because the Regex class is thread-safe (per MSDN).</remarks> 278 /// <param name="message"></param> 279 /// <returns>Decomposed canonical message, or null.</returns> Parse(string message)280 internal static Parts Parse(string message) 281 { 282 // An unusually long string causes pathologically slow Regex back-tracking. 283 // To avoid that, only scan the first 400 characters. That's enough for 284 // the longest possible prefix: MAX_PATH, plus a huge subcategory string, and an error location. 285 // After the regex is done, we can append the overflow. 286 string messageOverflow = String.Empty; 287 if (message.Length > 400) 288 { 289 messageOverflow = message.Substring(400); 290 message = message.Substring(0, 400); 291 } 292 293 // If a tool has a large amount of output that isn't an error or warning (eg., "dir /s %hugetree%") 294 // the regex below is slow. It's faster to pre-scan for "warning" and "error" 295 // and bail out if neither are present. 296 if (message.IndexOf("warning", StringComparison.OrdinalIgnoreCase) == -1 && 297 message.IndexOf("error", StringComparison.OrdinalIgnoreCase) == -1) 298 { 299 return null; 300 } 301 302 Parts parsedMessage = new Parts(); 303 304 // First, split the message into three parts--Origin, Category, Code, Text. 305 // Example, 306 // Main.cs(17,20):Command line warning CS0168: The variable 'foo' is declared but never used 307 // -------------- ------------ ------- ------ ---------------------------------------------- 308 // Origin SubCategory Cat. Code Text 309 // 310 // To accommodate absolute filenames in Origin, tolerate a colon in the second position 311 // as long as its preceded by a letter. 312 // 313 // Localization Note: 314 // Even in foreign-language versions of tools, the category field needs to be in English. 315 // Also, if origin is a tool name, then that needs to be in English. 316 // 317 // Here's an example from the Japanese version of CL.EXE: 318 // cl : ???? ??? warning D4024 : ?????????? 'AssemblyInfo.cs' ?????????????????? ??????????? 319 // 320 // Here's an example from the Japanese version of LINK.EXE: 321 // AssemblyInfo.cpp : fatal error LNK1106: ???????????? ??????????????: 0x6580 ?????????? 322 // 323 Match match = s_originCategoryCodeTextExpression.Value.Match(message); 324 string category; 325 if (!match.Success) 326 { 327 // try again with the Clang/GCC matcher 328 // Example, 329 // err.cpp:6:3: error: use of undeclared identifier 'force_an_error' 330 // ----------- ----- --------------------------------------------- 331 // Origin Cat. Text 332 match = s_originCategoryCodeTextExpression2.Value.Match(message); 333 if (!match.Success) 334 { 335 return null; 336 } 337 338 category = match.Groups["CATEGORY"].Value.Trim(); 339 if (0 == String.Compare(category, "error", StringComparison.OrdinalIgnoreCase)) 340 { 341 parsedMessage.category = Parts.Category.Error; 342 } 343 else if (0 == String.Compare(category, "warning", StringComparison.OrdinalIgnoreCase)) 344 { 345 parsedMessage.category = Parts.Category.Warning; 346 } 347 else 348 { 349 // Not an error\warning message. 350 return null; 351 } 352 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim()); 353 parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim()); 354 parsedMessage.text = (match.Groups["TEXT"].Value + messageOverflow).Trim(); 355 parsedMessage.origin = match.Groups["FILENAME"].Value.Trim(); 356 357 string[] explodedText = parsedMessage.text.Split(new char[] {'\''}, StringSplitOptions.RemoveEmptyEntries); 358 if (explodedText.Length > 0) 359 { 360 parsedMessage.code = "G" + explodedText[0].GetHashCode().ToString("X8"); 361 } 362 else 363 { 364 parsedMessage.code = "G00000000"; 365 } 366 367 return parsedMessage; 368 } 369 370 string origin = match.Groups["ORIGIN"].Value.Trim(); 371 category = match.Groups["CATEGORY"].Value.Trim(); 372 parsedMessage.code = match.Groups["CODE"].Value.Trim(); 373 parsedMessage.text = (match.Groups["TEXT"].Value + messageOverflow).Trim(); 374 parsedMessage.subcategory = match.Groups["SUBCATEGORY"].Value.Trim(); 375 376 // Next, see if category is something that is recognized. 377 if (0 == String.Compare(category, "error", StringComparison.OrdinalIgnoreCase)) 378 { 379 parsedMessage.category = Parts.Category.Error; 380 } 381 else if (0 == String.Compare(category, "warning", StringComparison.OrdinalIgnoreCase)) 382 { 383 parsedMessage.category = Parts.Category.Warning; 384 } 385 else 386 { 387 // Not an error\warning message. 388 return null; 389 } 390 391 // Origin is not a simple file, but it still could be of the form, 392 // foo.cpp(location) 393 match = s_filenameLocationFromOrigin.Value.Match(origin); 394 395 if (match.Success) 396 { 397 // The origin is in the form, 398 // foo.cpp(location) 399 // Assume the filename exists, but don't verify it. What else could it be? 400 string location = match.Groups["LOCATION"].Value.Trim(); 401 parsedMessage.origin = match.Groups["FILENAME"].Value.Trim(); 402 403 // Now, take apart the location. It can be one of these: 404 // loc: 405 // (line) 406 // (line-line) 407 // (line,col) 408 // (line,col-col) 409 // (line,col,len) 410 // (line,col,line,col) 411 if (location.Length > 0) 412 { 413 match = s_lineFromLocation.Value.Match(location); 414 if (match.Success) 415 { 416 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim()); 417 } 418 else 419 { 420 match = s_lineLineFromLocation.Value.Match(location); 421 if (match.Success) 422 { 423 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim()); 424 parsedMessage.endLine = ConvertToIntWithDefault(match.Groups["ENDLINE"].Value.Trim()); 425 } 426 else 427 { 428 match = s_lineColFromLocation.Value.Match(location); 429 if (match.Success) 430 { 431 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim()); 432 parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim()); 433 } 434 else 435 { 436 match = s_lineColColFromLocation.Value.Match(location); 437 if (match.Success) 438 { 439 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim()); 440 parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim()); 441 parsedMessage.endColumn = ConvertToIntWithDefault(match.Groups["ENDCOLUMN"].Value.Trim()); 442 } 443 else 444 { 445 match = s_lineColLineColFromLocation.Value.Match(location); 446 if (match.Success) 447 { 448 parsedMessage.line = ConvertToIntWithDefault(match.Groups["LINE"].Value.Trim()); 449 parsedMessage.column = ConvertToIntWithDefault(match.Groups["COLUMN"].Value.Trim()); 450 parsedMessage.endLine = ConvertToIntWithDefault(match.Groups["ENDLINE"].Value.Trim()); 451 parsedMessage.endColumn = ConvertToIntWithDefault(match.Groups["ENDCOLUMN"].Value.Trim()); 452 } 453 } 454 } 455 } 456 } 457 } 458 } 459 else 460 { 461 // The origin does not fit the filename(location) pattern. 462 parsedMessage.origin = origin; 463 } 464 465 return parsedMessage; 466 } 467 } 468 } 469