1#!/usr/bin/env python
2
3##===--- iwyu_test_util.py - include-what-you-use test framework ----------===##
4#
5#                     The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10##===----------------------------------------------------------------------===##
11
12"""Utilities for writing tests for IWYU.
13
14This script has been tested with python 2.7, 3.1.3 and 3.2.
15In order to support all of these platforms there are a few unusual constructs:
16 * print statements require parentheses
17 * standard output must be decoded as utf-8
18 * range() must be used in place of xrange()
19 * _PortableNext() is used to obtain next iterator value
20
21There is more detail on some of these issues at:
22http://diveintopython3.org/porting-code-to-python-3-with-2to3.html
23"""
24
25__author__ = 'wan@google.com (Zhanyong Wan)'
26
27import difflib
28import operator
29import os
30import re
31import subprocess
32import sys
33
34# These are the warning/error lines that iwyu.cc produces when --verbose >= 3
35_EXPECTED_DIAGNOSTICS_RE = re.compile(r'^\s*//\s*IWYU:\s*(.*)$')
36_ACTUAL_DIAGNOSTICS_RE = re.compile(r'^(.*?):(\d+):\d+:\s*'
37                                    r'(?:warning|error|fatal error):\s*(.*)$')
38
39# This is the final summary output that iwyu.cc produces when --verbose >= 1
40# The summary for a given source file should appear in that source file,
41# surrounded by '/**** IWYU_SUMMARY' and '***** IWYU_SUMMARY */'.
42_EXPECTED_SUMMARY_START_RE = re.compile(r'/\*+\s*IWYU_SUMMARY')
43_EXPECTED_SUMMARY_END_RE = re.compile(r'\**\s*IWYU_SUMMARY\s*\*+/')
44_ACTUAL_SUMMARY_START_RE = re.compile(r'^(.*?) should add these lines:$')
45_ACTUAL_SUMMARY_END_RE = re.compile(r'^---$')
46_ACTUAL_REMOVAL_LIST_START_RE = re.compile(r'.* should remove these lines:$')
47_NODIFFS_RE = re.compile(r'^\((.*?) has correct #includes/fwd-decls\)$')
48
49
50def _PortableNext(iterator):
51  if hasattr(iterator, 'next'):
52    iterator.next()  # Python 2.4-2.6
53  else:
54    next(iterator)   # Python 3
55
56
57def _Which(program, paths):
58    """Searches specified paths for program."""
59    if sys.platform == 'win32' and not program.lower().endswith('.exe'):
60        program += '.exe'
61
62    for path in paths:
63        candidate = os.path.join(os.path.normpath(path), program)
64        if os.path.isfile(candidate):
65            return candidate
66
67    return None
68
69
70_IWYU_PATH = None
71_SYSTEM_PATHS = [p.strip('"') for p in os.environ["PATH"].split(os.pathsep)]
72_IWYU_PATHS = [
73    '../../../../Debug+Asserts/bin',
74    '../../../../Release+Asserts/bin',
75    '../../../../Release/bin',
76    '../../../../build/Debug+Asserts/bin',
77    '../../../../build/Release+Asserts/bin',
78    '../../../../build/Release/bin',
79    # Linux/Mac OS X default out-of-tree paths.
80    '../../../../../build/Debug+Asserts/bin',
81    '../../../../../build/Release+Asserts/bin',
82    '../../../../../build/Release/bin',
83    # Windows default out-of-tree paths.
84    '../../../../../build/bin/Debug',
85    '../../../../../build/bin/Release',
86    '../../../../../build/bin/MinSizeRel',
87    '../../../../../build/bin/RelWithDebInfo',
88    ]
89
90
91def SetIwyuPath(iwyu_path):
92  """Set the path to the IWYU executable under test.
93  """
94  global _IWYU_PATH
95  _IWYU_PATH = iwyu_path
96
97
98def _GetIwyuPath():
99  """Returns the path to IWYU or raises IOError if it cannot be found."""
100  global _IWYU_PATH
101
102  if not _IWYU_PATH:
103    iwyu_paths = _IWYU_PATHS + _SYSTEM_PATHS
104    _IWYU_PATH = _Which('include-what-you-use', iwyu_paths)
105    if not _IWYU_PATH:
106      raise IOError('Failed to locate IWYU.\nSearched\n %s' %
107                    '\n '.join(iwyu_paths))
108
109  return _IWYU_PATH
110
111
112def _ShellQuote(arg):
113  if ' ' in arg:
114    arg = '"' + arg + '"'
115  return arg
116
117
118def _GetCommandOutput(command):
119  p = subprocess.Popen(command,
120                       shell=True,
121                       stdout=subprocess.PIPE,
122                       stderr=subprocess.STDOUT)
123  stdout, _ = p.communicate()
124  lines = stdout.decode("utf-8").splitlines(True)
125  lines = [line.replace(os.linesep, '\n') for line in lines]
126  return lines
127
128
129def _GetMatchingLines(regex, file_names):
130  """Returns a map: file location => string matching `regex`.
131
132  File location is a tuple (file_name, line number starting from 1)."""
133
134  loc_to_line = {}
135  for file_name in file_names:
136    with open(file_name) as fileobj:
137      for line_num, line in enumerate(fileobj):
138        m = regex.match(line)
139        if m:
140          loc_to_line[file_name, line_num + 1] = m.group()
141  return loc_to_line
142
143
144def _GetExpectedDiagnosticRegexes(spec_loc_to_line):
145  """Returns a map: source file location => list of regexes for that line."""
146
147  # Maps a source file line location to a list of regexes for diagnostics
148  # that should be generated for that line.
149  expected_diagnostic_regexes = {}
150  regexes = []
151  for loc in sorted(spec_loc_to_line.keys()):
152    line = spec_loc_to_line[loc]
153    m = _EXPECTED_DIAGNOSTICS_RE.match(line.strip())
154    assert m is not None, "Input should contain only matching lines."
155    regex = m.group(1)
156    if not regex:
157      # Allow the regex to be omitted if we are uninterested in the
158      # diagnostic message.
159      regex = r'.*'
160    regexes.append(re.compile(regex))
161    # Do we have a spec on the next line?
162    path, line_num = loc
163    next_line_loc = path, line_num + 1
164    if next_line_loc not in spec_loc_to_line:
165      expected_diagnostic_regexes[next_line_loc] = regexes
166      regexes = []
167
168  return expected_diagnostic_regexes
169
170
171def _GetActualDiagnostics(actual_output):
172  """Returns a map: source file location => list of diagnostics on that line.
173
174  The elements of the list are unique and sorted."""
175
176  actual_diagnostics = {}
177  for line in actual_output:
178    m = _ACTUAL_DIAGNOSTICS_RE.match(line.strip())
179    if m:
180      path, line_num, message = m.groups()
181      loc = path, int(line_num)
182      actual_diagnostics[loc] = actual_diagnostics.get(loc, []) + [message]
183
184  locs = actual_diagnostics.keys()
185  for loc in locs:
186    actual_diagnostics[loc] = sorted(set(actual_diagnostics[loc]))
187
188  return actual_diagnostics
189
190
191def _StripCommentFromLine(line):
192  """Removes the "// ..." comment at the end of the given line."""
193
194  m = re.match(r'(.*)//', line)
195  if m:
196    return m.group(1).strip() + '\n'
197  else:
198    return line
199
200
201def _NormalizeSummaryLineNumbers(line):
202  """Replaces the comment '// lines <number>-<number>' with '// lines XX-YY'.
203
204  Because line numbers in the source code often change, it's a pain to
205  keep the '// lines <number>-<number>' comments accurate in our
206  'golden' output.  Instead, we normalize these iwyu comments to just
207  say how many line numbers are listed by mapping the output to
208  '// lines XX-XX' (for one-line spans) or '// lines XX-XX+<number>'.
209  For instance, '// lines 12-12' would map to '// lines XX-XX', while
210  '// lines 12-14' would map to '//lines XX-XX+2'.
211
212  Arguments:
213    line: the line to be normalized.
214
215  Returns:
216    A new line with the '// lines' comment, if any, normalized as
217    described above.  If no '// lines' comment is present, returns
218    the original line.
219  """
220  m = re.search('// lines ([0-9]+)-([0-9]+)', line)
221  if not m:
222    return line
223  if m.group(1) == m.group(2):
224    return line[:m.start()] + '// lines XX-XX\n'
225  else:
226    num_lines = int(m.group(2)) - int(m.group(1))
227    return line[:m.start()] + '// lines XX-XX+%d\n' % num_lines
228
229
230def _NormalizeSummaryLine(line):
231  """Alphabetically sorts the symbols in the '// for XXX, YYY, ZZZ' comments.
232
233  Most iwyu summary lines have the form
234     #include <foo.h>   // for XXX, YYY, ZZZ
235  XXX, YYY, ZZZ are symbols that this file uses from foo.h.  They are
236  sorted in frequency order, but that changes so often as the test is
237  augmented, that it's impractical to test.  We just sort the symbols
238  alphabetically and compare that way.  This means we never test the
239  frequency ordering here, but that's a small price to pay for easier
240  testing development.
241
242  We also always move the '// for' comment to be exactly two spaces
243  after the '#include' text.  Again, this means we don't test the
244  indenting correctly (though iwyu_output_test.cc does), but allows us
245  to rename filenames without having to reformat each test.  This is
246  particularly important when opensourcing, since the filenames will
247  be different in opensource-land than they are inside google.
248
249  Arguments:
250    line: one line of the summary output
251
252  Returns:
253    A normalized form of 'line', with the 'why' symbols sorted and
254    whitespace before the 'why' comment collapsed.
255  """
256  m = re.match(r'(.*?)\s*  // for (.*)', line)
257  if not m:
258    return line
259  symbols = m.group(2).strip().split(', ')
260  symbols.sort()
261  return '%s  // for %s\n' % (m.group(1), ', '.join(symbols))
262
263
264def _GetExpectedSummaries(files):
265  """Returns a map: source file => list of iwyu summary lines."""
266
267  expected_summaries = {}
268  for f in files:
269    in_summary = False
270    fh = open(f)
271    for line in fh:
272      if _EXPECTED_SUMMARY_START_RE.match(line):
273        in_summary = True
274        expected_summaries[f] = []
275      elif _EXPECTED_SUMMARY_END_RE.match(line):
276        in_summary = False
277      elif re.match(r'^\s*//', line):
278        pass   # ignore comment lines
279      elif in_summary:
280        expected_summaries[f].append(line)
281    fh.close()
282
283  # Get rid of blank lines at the beginning and end of the each summary.
284  for loc in expected_summaries:
285    while expected_summaries[loc] and expected_summaries[loc][-1] == '\n':
286      expected_summaries[loc].pop()
287    while expected_summaries[loc] and expected_summaries[loc][0] == '\n':
288      expected_summaries[loc].pop(0)
289
290  return expected_summaries
291
292
293def _GetActualSummaries(output):
294  """Returns a map: source file => list of iwyu summary lines."""
295
296  actual_summaries = {}
297  file_being_summarized = None
298  in_addition_section = False  # Are we in the "should add these lines" section?
299  for line in output:
300    # For files with no diffs, we print a different (one-line) summary.
301    m = _NODIFFS_RE.match(line)
302    if m:
303      actual_summaries[m.group(1)] = [line]
304      continue
305
306    m = _ACTUAL_SUMMARY_START_RE.match(line)
307    if m:
308      file_being_summarized = m.group(1)
309      in_addition_section = True
310      actual_summaries[file_being_summarized] = [line]
311    elif _ACTUAL_SUMMARY_END_RE.match(line):
312      file_being_summarized = None
313    elif file_being_summarized:
314      if _ACTUAL_REMOVAL_LIST_START_RE.match(line):
315        in_addition_section = False
316      # Replace any line numbers in comments with something more stable.
317      line = _NormalizeSummaryLineNumbers(line)
318      if in_addition_section:
319        # Each #include in the "should add" list will appear later in
320        # the full include list.  There's no need to verify its symbol
321        # list twice.  Therefore we remove the symbol list here for
322        # easy test maintenance.
323        line = _StripCommentFromLine(line)
324      else:
325        line = _NormalizeSummaryLine(line)
326      actual_summaries[file_being_summarized].append(line)
327
328  return actual_summaries
329
330
331def _VerifyDiagnosticsAtLoc(loc_str, regexes, diagnostics):
332  """Verify the diagnostics at the given location; return a list of failures."""
333
334  # Find out which regexes match a diagnostic and vice versa.
335  matching_regexes = [[] for unused_i in range(len(diagnostics))]
336  matched_diagnostics = [[] for unused_i in range(len(regexes))]
337  for (r_index, regex) in enumerate(regexes):
338    for (d_index, diagnostic) in enumerate(diagnostics):
339      if regex.search(diagnostic):
340        matching_regexes[d_index].append(r_index)
341        matched_diagnostics[r_index].append(d_index)
342
343  failure_messages = []
344
345  # Collect unmatched diagnostics and multiply matched diagnostics.
346  for (d_index, r_indexes) in enumerate(matching_regexes):
347    if not r_indexes:
348      failure_messages.append('Unexpected diagnostic:\n%s\n'
349                              % diagnostics[d_index])
350    elif len(r_indexes) > 1:
351      failure_messages.append(
352          'The diagnostic message:\n%s\n'
353          'matches multiple regexes:\n%s'
354          % (diagnostics[d_index],
355             '\n'.join([regexes[r_index].pattern for r_index in r_indexes])))
356
357  # Collect unmatched regexes and regexes with multiple matches.
358  for (r_index, d_indexes) in enumerate(matched_diagnostics):
359    if not d_indexes:
360      failure_messages.append('Unmatched regex:\n%s\n'
361                              % regexes[r_index].pattern)
362    elif len(d_indexes) > 1:
363      failure_messages.append(
364          'The regex:\n%s\n'
365          'matches multiple diagnostics:\n%s'
366          % (regexes[r_index].pattern,
367             '\n'.join([diagnostics[d_index] for d_index in d_indexes])))
368
369  return ['%s %s' % (loc_str, message) for message in failure_messages]
370
371
372def _CompareExpectedAndActualDiagnostics(expected_diagnostic_regexes,
373                                         actual_diagnostics):
374  """Verify that the diagnostics are as expected; return a list of failures."""
375
376  failures = []
377  for loc in sorted(set(actual_diagnostics.keys()) |
378                    set(expected_diagnostic_regexes.keys())):
379    # Find all regexes and actual diagnostics for the given location.
380    regexes = expected_diagnostic_regexes.get(loc, [])
381    diagnostics = actual_diagnostics.get(loc, [])
382    failures += _VerifyDiagnosticsAtLoc('\n%s:%s:' % loc, regexes, diagnostics)
383
384  return failures
385
386
387def _CompareExpectedAndActualSummaries(expected_summaries, actual_summaries):
388  """Verify that the summaries are as expected; return a list of failures."""
389
390  failures = []
391  for loc in sorted(set(actual_summaries.keys()) |
392                    set(expected_summaries.keys())):
393    this_failure = difflib.unified_diff(expected_summaries.get(loc, []),
394                                        actual_summaries.get(loc, []))
395    try:
396      _PortableNext(this_failure)     # read past the 'what files are this' header
397      failures.append('\n')
398      failures.append('Unexpected summary diffs for %s:\n' % loc)
399      failures.extend(this_failure)
400      failures.append('---\n')
401    except StopIteration:
402      pass                    # empty diff
403  return failures
404
405
406def TestIwyuOnRelativeFile(test_case, cc_file, cpp_files_to_check,
407                           iwyu_flags=None, clang_flags=None, verbose=False):
408  """Checks running IWYU on the given .cc file.
409
410  Args:
411    test_case: A googletest.TestCase instance.
412    cc_file: The name of the file to test, relative to the current dir.
413    cpp_files_to_check: A list of filenames for the files
414              to check the diagnostics on, relative to the current dir.
415    iwyu_flags: Extra command-line flags to pass to iwyu.
416    clang_flags: Extra command-line flags to pass to clang, for example
417              "-std=c++11".
418    verbose: Whether to display verbose output.
419  """
420  iwyu_flags = iwyu_flags or []  # Make sure iwyu_flags is a list.
421  clang_flags = clang_flags or [] # Make sure this is a list
422
423  # Require verbose level 3 so that we can verify the individual diagnostics.
424  # We allow the level to be overriden by the IWYU_VERBOSE environment
425  # variable, or by iwyu_flags, for easy debugging.  (We put the
426  # envvar-based flag first, so user flags can override it later.)
427  iwyu_flags = ['--verbose=%s' % os.getenv('IWYU_VERBOSE', '3')] + iwyu_flags
428
429  # clang reads iwyu flags after the -Xiwyu clang flag: '-Xiwyu --verbose=6'
430  iwyu_flags = ['-Xiwyu ' + flag for flag in iwyu_flags]
431
432  # TODO(csilvers): verify that has exit-status 0.
433  cmd = '%s %s %s %s' % (
434    _ShellQuote(_GetIwyuPath()),
435    ' '.join(iwyu_flags),
436    ' '.join(clang_flags),
437    cc_file)
438  if verbose:
439    print('>>> Running %s' % cmd)
440  output = _GetCommandOutput(cmd)
441  print(''.join(output))
442  sys.stdout.flush()      # don't commingle this output with the failure output
443
444  expected_diagnostics = _GetMatchingLines(
445      _EXPECTED_DIAGNOSTICS_RE, cpp_files_to_check)
446  failures = _CompareExpectedAndActualDiagnostics(
447      _GetExpectedDiagnosticRegexes(expected_diagnostics),
448      _GetActualDiagnostics(output))
449
450  # Also figure out if the end-of-parsing suggestions match up.
451  failures += _CompareExpectedAndActualSummaries(
452      _GetExpectedSummaries(cpp_files_to_check),
453      _GetActualSummaries(output))
454
455  test_case.assertTrue(not failures, ''.join(failures))
456