1#!/usr/bin/env python 2 3##===--- iwyu_test_util.py - include-what-you-use test framework ----------===## 4# 5# The LLVM Compiler Infrastructure 6# 7# This file is distributed under the University of Illinois Open Source 8# License. See LICENSE.TXT for details. 9# 10##===----------------------------------------------------------------------===## 11 12"""Utilities for writing tests for IWYU. 13 14This script has been tested with python 2.7, 3.1.3 and 3.2. 15In order to support all of these platforms there are a few unusual constructs: 16 * print statements require parentheses 17 * standard output must be decoded as utf-8 18 * range() must be used in place of xrange() 19 * _PortableNext() is used to obtain next iterator value 20 21There is more detail on some of these issues at: 22http://diveintopython3.org/porting-code-to-python-3-with-2to3.html 23""" 24 25__author__ = 'wan@google.com (Zhanyong Wan)' 26 27import difflib 28import operator 29import os 30import re 31import subprocess 32import sys 33 34# These are the warning/error lines that iwyu.cc produces when --verbose >= 3 35_EXPECTED_DIAGNOSTICS_RE = re.compile(r'^\s*//\s*IWYU:\s*(.*)$') 36_ACTUAL_DIAGNOSTICS_RE = re.compile(r'^(.*?):(\d+):\d+:\s*' 37 r'(?:warning|error|fatal error):\s*(.*)$') 38 39# This is the final summary output that iwyu.cc produces when --verbose >= 1 40# The summary for a given source file should appear in that source file, 41# surrounded by '/**** IWYU_SUMMARY' and '***** IWYU_SUMMARY */'. 42_EXPECTED_SUMMARY_START_RE = re.compile(r'/\*+\s*IWYU_SUMMARY') 43_EXPECTED_SUMMARY_END_RE = re.compile(r'\**\s*IWYU_SUMMARY\s*\*+/') 44_ACTUAL_SUMMARY_START_RE = re.compile(r'^(.*?) should add these lines:$') 45_ACTUAL_SUMMARY_END_RE = re.compile(r'^---$') 46_ACTUAL_REMOVAL_LIST_START_RE = re.compile(r'.* should remove these lines:$') 47_NODIFFS_RE = re.compile(r'^\((.*?) has correct #includes/fwd-decls\)$') 48 49 50def _PortableNext(iterator): 51 if hasattr(iterator, 'next'): 52 iterator.next() # Python 2.4-2.6 53 else: 54 next(iterator) # Python 3 55 56 57def _Which(program, paths): 58 """Searches specified paths for program.""" 59 if sys.platform == 'win32' and not program.lower().endswith('.exe'): 60 program += '.exe' 61 62 for path in paths: 63 candidate = os.path.join(os.path.normpath(path), program) 64 if os.path.isfile(candidate): 65 return candidate 66 67 return None 68 69 70_IWYU_PATH = None 71_SYSTEM_PATHS = [p.strip('"') for p in os.environ["PATH"].split(os.pathsep)] 72_IWYU_PATHS = [ 73 '../../../../Debug+Asserts/bin', 74 '../../../../Release+Asserts/bin', 75 '../../../../Release/bin', 76 '../../../../build/Debug+Asserts/bin', 77 '../../../../build/Release+Asserts/bin', 78 '../../../../build/Release/bin', 79 # Linux/Mac OS X default out-of-tree paths. 80 '../../../../../build/Debug+Asserts/bin', 81 '../../../../../build/Release+Asserts/bin', 82 '../../../../../build/Release/bin', 83 # Windows default out-of-tree paths. 84 '../../../../../build/bin/Debug', 85 '../../../../../build/bin/Release', 86 '../../../../../build/bin/MinSizeRel', 87 '../../../../../build/bin/RelWithDebInfo', 88 ] 89 90 91def SetIwyuPath(iwyu_path): 92 """Set the path to the IWYU executable under test. 93 """ 94 global _IWYU_PATH 95 _IWYU_PATH = iwyu_path 96 97 98def _GetIwyuPath(): 99 """Returns the path to IWYU or raises IOError if it cannot be found.""" 100 global _IWYU_PATH 101 102 if not _IWYU_PATH: 103 iwyu_paths = _IWYU_PATHS + _SYSTEM_PATHS 104 _IWYU_PATH = _Which('include-what-you-use', iwyu_paths) 105 if not _IWYU_PATH: 106 raise IOError('Failed to locate IWYU.\nSearched\n %s' % 107 '\n '.join(iwyu_paths)) 108 109 return _IWYU_PATH 110 111 112def _ShellQuote(arg): 113 if ' ' in arg: 114 arg = '"' + arg + '"' 115 return arg 116 117 118def _GetCommandOutput(command): 119 p = subprocess.Popen(command, 120 shell=True, 121 stdout=subprocess.PIPE, 122 stderr=subprocess.STDOUT) 123 stdout, _ = p.communicate() 124 lines = stdout.decode("utf-8").splitlines(True) 125 lines = [line.replace(os.linesep, '\n') for line in lines] 126 return lines 127 128 129def _GetMatchingLines(regex, file_names): 130 """Returns a map: file location => string matching `regex`. 131 132 File location is a tuple (file_name, line number starting from 1).""" 133 134 loc_to_line = {} 135 for file_name in file_names: 136 with open(file_name) as fileobj: 137 for line_num, line in enumerate(fileobj): 138 m = regex.match(line) 139 if m: 140 loc_to_line[file_name, line_num + 1] = m.group() 141 return loc_to_line 142 143 144def _GetExpectedDiagnosticRegexes(spec_loc_to_line): 145 """Returns a map: source file location => list of regexes for that line.""" 146 147 # Maps a source file line location to a list of regexes for diagnostics 148 # that should be generated for that line. 149 expected_diagnostic_regexes = {} 150 regexes = [] 151 for loc in sorted(spec_loc_to_line.keys()): 152 line = spec_loc_to_line[loc] 153 m = _EXPECTED_DIAGNOSTICS_RE.match(line.strip()) 154 assert m is not None, "Input should contain only matching lines." 155 regex = m.group(1) 156 if not regex: 157 # Allow the regex to be omitted if we are uninterested in the 158 # diagnostic message. 159 regex = r'.*' 160 regexes.append(re.compile(regex)) 161 # Do we have a spec on the next line? 162 path, line_num = loc 163 next_line_loc = path, line_num + 1 164 if next_line_loc not in spec_loc_to_line: 165 expected_diagnostic_regexes[next_line_loc] = regexes 166 regexes = [] 167 168 return expected_diagnostic_regexes 169 170 171def _GetActualDiagnostics(actual_output): 172 """Returns a map: source file location => list of diagnostics on that line. 173 174 The elements of the list are unique and sorted.""" 175 176 actual_diagnostics = {} 177 for line in actual_output: 178 m = _ACTUAL_DIAGNOSTICS_RE.match(line.strip()) 179 if m: 180 path, line_num, message = m.groups() 181 loc = path, int(line_num) 182 actual_diagnostics[loc] = actual_diagnostics.get(loc, []) + [message] 183 184 locs = actual_diagnostics.keys() 185 for loc in locs: 186 actual_diagnostics[loc] = sorted(set(actual_diagnostics[loc])) 187 188 return actual_diagnostics 189 190 191def _StripCommentFromLine(line): 192 """Removes the "// ..." comment at the end of the given line.""" 193 194 m = re.match(r'(.*)//', line) 195 if m: 196 return m.group(1).strip() + '\n' 197 else: 198 return line 199 200 201def _NormalizeSummaryLineNumbers(line): 202 """Replaces the comment '// lines <number>-<number>' with '// lines XX-YY'. 203 204 Because line numbers in the source code often change, it's a pain to 205 keep the '// lines <number>-<number>' comments accurate in our 206 'golden' output. Instead, we normalize these iwyu comments to just 207 say how many line numbers are listed by mapping the output to 208 '// lines XX-XX' (for one-line spans) or '// lines XX-XX+<number>'. 209 For instance, '// lines 12-12' would map to '// lines XX-XX', while 210 '// lines 12-14' would map to '//lines XX-XX+2'. 211 212 Arguments: 213 line: the line to be normalized. 214 215 Returns: 216 A new line with the '// lines' comment, if any, normalized as 217 described above. If no '// lines' comment is present, returns 218 the original line. 219 """ 220 m = re.search('// lines ([0-9]+)-([0-9]+)', line) 221 if not m: 222 return line 223 if m.group(1) == m.group(2): 224 return line[:m.start()] + '// lines XX-XX\n' 225 else: 226 num_lines = int(m.group(2)) - int(m.group(1)) 227 return line[:m.start()] + '// lines XX-XX+%d\n' % num_lines 228 229 230def _NormalizeSummaryLine(line): 231 """Alphabetically sorts the symbols in the '// for XXX, YYY, ZZZ' comments. 232 233 Most iwyu summary lines have the form 234 #include <foo.h> // for XXX, YYY, ZZZ 235 XXX, YYY, ZZZ are symbols that this file uses from foo.h. They are 236 sorted in frequency order, but that changes so often as the test is 237 augmented, that it's impractical to test. We just sort the symbols 238 alphabetically and compare that way. This means we never test the 239 frequency ordering here, but that's a small price to pay for easier 240 testing development. 241 242 We also always move the '// for' comment to be exactly two spaces 243 after the '#include' text. Again, this means we don't test the 244 indenting correctly (though iwyu_output_test.cc does), but allows us 245 to rename filenames without having to reformat each test. This is 246 particularly important when opensourcing, since the filenames will 247 be different in opensource-land than they are inside google. 248 249 Arguments: 250 line: one line of the summary output 251 252 Returns: 253 A normalized form of 'line', with the 'why' symbols sorted and 254 whitespace before the 'why' comment collapsed. 255 """ 256 m = re.match(r'(.*?)\s* // for (.*)', line) 257 if not m: 258 return line 259 symbols = m.group(2).strip().split(', ') 260 symbols.sort() 261 return '%s // for %s\n' % (m.group(1), ', '.join(symbols)) 262 263 264def _GetExpectedSummaries(files): 265 """Returns a map: source file => list of iwyu summary lines.""" 266 267 expected_summaries = {} 268 for f in files: 269 in_summary = False 270 fh = open(f) 271 for line in fh: 272 if _EXPECTED_SUMMARY_START_RE.match(line): 273 in_summary = True 274 expected_summaries[f] = [] 275 elif _EXPECTED_SUMMARY_END_RE.match(line): 276 in_summary = False 277 elif re.match(r'^\s*//', line): 278 pass # ignore comment lines 279 elif in_summary: 280 expected_summaries[f].append(line) 281 fh.close() 282 283 # Get rid of blank lines at the beginning and end of the each summary. 284 for loc in expected_summaries: 285 while expected_summaries[loc] and expected_summaries[loc][-1] == '\n': 286 expected_summaries[loc].pop() 287 while expected_summaries[loc] and expected_summaries[loc][0] == '\n': 288 expected_summaries[loc].pop(0) 289 290 return expected_summaries 291 292 293def _GetActualSummaries(output): 294 """Returns a map: source file => list of iwyu summary lines.""" 295 296 actual_summaries = {} 297 file_being_summarized = None 298 in_addition_section = False # Are we in the "should add these lines" section? 299 for line in output: 300 # For files with no diffs, we print a different (one-line) summary. 301 m = _NODIFFS_RE.match(line) 302 if m: 303 actual_summaries[m.group(1)] = [line] 304 continue 305 306 m = _ACTUAL_SUMMARY_START_RE.match(line) 307 if m: 308 file_being_summarized = m.group(1) 309 in_addition_section = True 310 actual_summaries[file_being_summarized] = [line] 311 elif _ACTUAL_SUMMARY_END_RE.match(line): 312 file_being_summarized = None 313 elif file_being_summarized: 314 if _ACTUAL_REMOVAL_LIST_START_RE.match(line): 315 in_addition_section = False 316 # Replace any line numbers in comments with something more stable. 317 line = _NormalizeSummaryLineNumbers(line) 318 if in_addition_section: 319 # Each #include in the "should add" list will appear later in 320 # the full include list. There's no need to verify its symbol 321 # list twice. Therefore we remove the symbol list here for 322 # easy test maintenance. 323 line = _StripCommentFromLine(line) 324 else: 325 line = _NormalizeSummaryLine(line) 326 actual_summaries[file_being_summarized].append(line) 327 328 return actual_summaries 329 330 331def _VerifyDiagnosticsAtLoc(loc_str, regexes, diagnostics): 332 """Verify the diagnostics at the given location; return a list of failures.""" 333 334 # Find out which regexes match a diagnostic and vice versa. 335 matching_regexes = [[] for unused_i in range(len(diagnostics))] 336 matched_diagnostics = [[] for unused_i in range(len(regexes))] 337 for (r_index, regex) in enumerate(regexes): 338 for (d_index, diagnostic) in enumerate(diagnostics): 339 if regex.search(diagnostic): 340 matching_regexes[d_index].append(r_index) 341 matched_diagnostics[r_index].append(d_index) 342 343 failure_messages = [] 344 345 # Collect unmatched diagnostics and multiply matched diagnostics. 346 for (d_index, r_indexes) in enumerate(matching_regexes): 347 if not r_indexes: 348 failure_messages.append('Unexpected diagnostic:\n%s\n' 349 % diagnostics[d_index]) 350 elif len(r_indexes) > 1: 351 failure_messages.append( 352 'The diagnostic message:\n%s\n' 353 'matches multiple regexes:\n%s' 354 % (diagnostics[d_index], 355 '\n'.join([regexes[r_index].pattern for r_index in r_indexes]))) 356 357 # Collect unmatched regexes and regexes with multiple matches. 358 for (r_index, d_indexes) in enumerate(matched_diagnostics): 359 if not d_indexes: 360 failure_messages.append('Unmatched regex:\n%s\n' 361 % regexes[r_index].pattern) 362 elif len(d_indexes) > 1: 363 failure_messages.append( 364 'The regex:\n%s\n' 365 'matches multiple diagnostics:\n%s' 366 % (regexes[r_index].pattern, 367 '\n'.join([diagnostics[d_index] for d_index in d_indexes]))) 368 369 return ['%s %s' % (loc_str, message) for message in failure_messages] 370 371 372def _CompareExpectedAndActualDiagnostics(expected_diagnostic_regexes, 373 actual_diagnostics): 374 """Verify that the diagnostics are as expected; return a list of failures.""" 375 376 failures = [] 377 for loc in sorted(set(actual_diagnostics.keys()) | 378 set(expected_diagnostic_regexes.keys())): 379 # Find all regexes and actual diagnostics for the given location. 380 regexes = expected_diagnostic_regexes.get(loc, []) 381 diagnostics = actual_diagnostics.get(loc, []) 382 failures += _VerifyDiagnosticsAtLoc('\n%s:%s:' % loc, regexes, diagnostics) 383 384 return failures 385 386 387def _CompareExpectedAndActualSummaries(expected_summaries, actual_summaries): 388 """Verify that the summaries are as expected; return a list of failures.""" 389 390 failures = [] 391 for loc in sorted(set(actual_summaries.keys()) | 392 set(expected_summaries.keys())): 393 this_failure = difflib.unified_diff(expected_summaries.get(loc, []), 394 actual_summaries.get(loc, [])) 395 try: 396 _PortableNext(this_failure) # read past the 'what files are this' header 397 failures.append('\n') 398 failures.append('Unexpected summary diffs for %s:\n' % loc) 399 failures.extend(this_failure) 400 failures.append('---\n') 401 except StopIteration: 402 pass # empty diff 403 return failures 404 405 406def TestIwyuOnRelativeFile(test_case, cc_file, cpp_files_to_check, 407 iwyu_flags=None, clang_flags=None, verbose=False): 408 """Checks running IWYU on the given .cc file. 409 410 Args: 411 test_case: A googletest.TestCase instance. 412 cc_file: The name of the file to test, relative to the current dir. 413 cpp_files_to_check: A list of filenames for the files 414 to check the diagnostics on, relative to the current dir. 415 iwyu_flags: Extra command-line flags to pass to iwyu. 416 clang_flags: Extra command-line flags to pass to clang, for example 417 "-std=c++11". 418 verbose: Whether to display verbose output. 419 """ 420 iwyu_flags = iwyu_flags or [] # Make sure iwyu_flags is a list. 421 clang_flags = clang_flags or [] # Make sure this is a list 422 423 # Require verbose level 3 so that we can verify the individual diagnostics. 424 # We allow the level to be overriden by the IWYU_VERBOSE environment 425 # variable, or by iwyu_flags, for easy debugging. (We put the 426 # envvar-based flag first, so user flags can override it later.) 427 iwyu_flags = ['--verbose=%s' % os.getenv('IWYU_VERBOSE', '3')] + iwyu_flags 428 429 # clang reads iwyu flags after the -Xiwyu clang flag: '-Xiwyu --verbose=6' 430 iwyu_flags = ['-Xiwyu ' + flag for flag in iwyu_flags] 431 432 # TODO(csilvers): verify that has exit-status 0. 433 cmd = '%s %s %s %s' % ( 434 _ShellQuote(_GetIwyuPath()), 435 ' '.join(iwyu_flags), 436 ' '.join(clang_flags), 437 cc_file) 438 if verbose: 439 print('>>> Running %s' % cmd) 440 output = _GetCommandOutput(cmd) 441 print(''.join(output)) 442 sys.stdout.flush() # don't commingle this output with the failure output 443 444 expected_diagnostics = _GetMatchingLines( 445 _EXPECTED_DIAGNOSTICS_RE, cpp_files_to_check) 446 failures = _CompareExpectedAndActualDiagnostics( 447 _GetExpectedDiagnosticRegexes(expected_diagnostics), 448 _GetActualDiagnostics(output)) 449 450 # Also figure out if the end-of-parsing suggestions match up. 451 failures += _CompareExpectedAndActualSummaries( 452 _GetExpectedSummaries(cpp_files_to_check), 453 _GetActualSummaries(output)) 454 455 test_case.assertTrue(not failures, ''.join(failures)) 456