1#!/usr/local/bin/python3.8
2
3##===--- iwyu_tool.py -----------------------------------------------------===##
4#
5#                     The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10##===----------------------------------------------------------------------===##
11
12""" Driver to consume a Clang compilation database and invoke IWYU.
13
14Example usage with CMake:
15
16  # Unix systems
17  $ mkdir build && cd build
18  $ CC="clang" CXX="clang++" cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ...
19  $ iwyu_tool.py -p .
20
21  # Windows systems
22  $ mkdir build && cd build
23  $ cmake -DCMAKE_CXX_COMPILER="%VCINSTALLDIR%/bin/cl.exe" \
24    -DCMAKE_C_COMPILER="%VCINSTALLDIR%/VC/bin/cl.exe" \
25    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
26    -G Ninja ...
27  $ python iwyu_tool.py -p .
28
29See iwyu_tool.py -h for more details on command-line arguments.
30"""
31from __future__ import print_function
32import os
33import re
34import sys
35import json
36import time
37import shlex
38import argparse
39import tempfile
40import subprocess
41
42
43CORRECT_RE = re.compile(r'^\((.*?) has correct #includes/fwd-decls\)$')
44SHOULD_ADD_RE = re.compile(r'^(.*?) should add these lines:$')
45SHOULD_REMOVE_RE = re.compile(r'^(.*?) should remove these lines:$')
46FULL_LIST_RE = re.compile(r'The full include-list for (.*?):$')
47END_RE = re.compile(r'^---$')
48LINES_RE = re.compile(r'^- (.*?)  // lines ([0-9]+)-[0-9]+$')
49
50
51GENERAL, ADD, REMOVE, LIST = range(4)
52
53
54def clang_formatter(output):
55    """ Process iwyu's output into something clang-like. """
56    formatted = []
57
58    state = (GENERAL, None)
59    for line in output.splitlines():
60        match = CORRECT_RE.match(line)
61        if match:
62            formatted.append('%s:1:1: note: #includes/fwd-decls are correct' %
63                             match.groups(1))
64            continue
65        match = SHOULD_ADD_RE.match(line)
66        if match:
67            state = (ADD, match.group(1))
68            continue
69        match = SHOULD_REMOVE_RE.match(line)
70        if match:
71            state = (REMOVE, match.group(1))
72            continue
73        match = FULL_LIST_RE.match(line)
74        if match:
75            state = (LIST, match.group(1))
76        elif END_RE.match(line):
77            state = (GENERAL, None)
78        elif not line.strip():
79            continue
80        elif state[0] == GENERAL:
81            formatted.append(line)
82        elif state[0] == ADD:
83            formatted.append('%s:1:1: error: add the following line' % state[1])
84            formatted.append(line)
85        elif state[0] == REMOVE:
86            match = LINES_RE.match(line)
87            line_no = match.group(2) if match else '1'
88            formatted.append('%s:%s:1: error: remove the following line' %
89                             (state[1], line_no))
90            formatted.append(match.group(1))
91
92    return os.linesep.join(formatted)
93
94
95DEFAULT_FORMAT = 'iwyu'
96FORMATTERS = {
97    'iwyu': lambda output: output,
98    'clang': clang_formatter
99}
100
101
102if sys.platform.startswith('win'):
103    # Case-insensitive match on Windows
104    def normcase(s):
105        return s.lower()
106else:
107    def normcase(s):
108        return s
109
110
111def is_subpath_of(path, parent):
112    """ Return True if path is equal to or fully contained within parent.
113
114    Assumes both paths are canonicalized with os.path.realpath.
115    """
116    parent = normcase(parent)
117    path = normcase(path)
118
119    if path == parent:
120        return True
121
122    if not path.startswith(parent):
123        return False
124
125    # Now we know parent is a prefix of path, but they only share lineage if the
126    # difference between them starts with a path separator, e.g. /a/b/c/file
127    # is not a parent of /a/b/c/file.cpp, but /a/b/c and /a/b/c/ are.
128    parent = parent.rstrip(os.path.sep)
129    suffix = path[len(parent):]
130    return suffix.startswith(os.path.sep)
131
132
133def is_msvc_driver(compile_command):
134    """ Return True if compile_command matches an MSVC CL-style driver. """
135    compile_command = normcase(compile_command)
136
137    if compile_command.endswith('cl.exe'):
138        # Native MSVC compiler or clang-cl.exe
139        return True
140
141    if compile_command.endswith('clang-cl'):
142        # Cross clang-cl on non-Windows
143        return True
144
145    return False
146
147
148def win_split(cmdline):
149    """ Minimal implementation of shlex.split for Windows following
150    https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft.aspx.
151    """
152    def split_iter(cmdline):
153        in_quotes = False
154        backslashes = 0
155        arg = ''
156        for c in cmdline:
157            if c == '\\':
158                # MSDN: Backslashes are interpreted literally, unless they
159                # immediately precede a double quotation mark.
160                # Buffer them until we know what comes next.
161                backslashes += 1
162            elif c == '"':
163                # Quotes can either be an escaped quote or the start of a quoted
164                # string. Paraphrasing MSDN:
165                # Before quotes, place one backslash in the arg for every pair
166                # of leading backslashes. If the number of backslashes is odd,
167                # retain the double quotation mark, otherwise interpret it as a
168                # string delimiter and switch state.
169                arg += '\\' * (backslashes // 2)
170                if backslashes % 2 == 1:
171                    arg += c
172                else:
173                    in_quotes = not in_quotes
174                backslashes = 0
175            elif c in (' ', '\t') and not in_quotes:
176                # MSDN: Arguments are delimited by white space, which is either
177                # a space or a tab [but only outside of a string].
178                # Flush backslashes and return arg bufferd so far, unless empty.
179                arg += '\\' * backslashes
180                if arg:
181                    yield arg
182                    arg = ''
183                backslashes = 0
184            else:
185                # Flush buffered backslashes and append.
186                arg += '\\' * backslashes
187                arg += c
188                backslashes = 0
189
190        if arg:
191            arg += '\\' * backslashes
192            yield arg
193
194    return list(split_iter(cmdline))
195
196
197def split_command(cmdstr):
198    """ Split a command string into a list, respecting shell quoting. """
199    if sys.platform.startswith('win'):
200        # shlex.split does not work for Windows command-lines, so special-case
201        # to our own implementation.
202        cmd = win_split(cmdstr)
203    else:
204        cmd = shlex.split(cmdstr)
205
206    return cmd
207
208
209def find_include_what_you_use():
210    """ Find IWYU executable and return its full pathname. """
211    if 'IWYU_BINARY' in os.environ:
212        return os.environ.get('IWYU_BINARY')
213
214    # TODO: Investigate using shutil.which when Python 2 has passed away.
215    executable_name = 'include-what-you-use'
216    if sys.platform.startswith('win'):
217        executable_name += '.exe'
218
219    search_path = [os.path.dirname(__file__)]
220    search_path += os.environ.get('PATH', '').split(os.pathsep)
221
222    for dirpath in search_path:
223        full = os.path.join(dirpath, executable_name)
224        if os.path.isfile(full):
225            return os.path.realpath(full)
226
227    return None
228
229
230IWYU_EXECUTABLE = find_include_what_you_use()
231
232
233class Process(object):
234    """ Manages an IWYU process in flight """
235    def __init__(self, proc, outfile):
236        self.proc = proc
237        self.outfile = outfile
238        self.output = None
239
240    def poll(self):
241        """ Return the exit code if the process has completed, None otherwise.
242        """
243        return self.proc.poll()
244
245    def get_output(self):
246        """ Return stdout+stderr output of the process.
247
248        This call blocks until the process is complete, then returns the output.
249        """
250        if not self.output:
251            self.proc.wait()
252            self.outfile.seek(0)
253            self.output = self.outfile.read().decode("utf-8")
254            self.outfile.close()
255
256        return self.output
257
258    @classmethod
259    def start(cls, invocation):
260        """ Start a Process for the invocation and capture stdout+stderr. """
261        outfile = tempfile.TemporaryFile(prefix='iwyu')
262        process = subprocess.Popen(
263            invocation.command,
264            cwd=invocation.cwd,
265            stdout=outfile,
266            stderr=subprocess.STDOUT)
267        return cls(process, outfile)
268
269
270KNOWN_COMPILER_WRAPPERS=frozenset([
271    "ccache"
272])
273
274
275class Invocation(object):
276    """ Holds arguments of an IWYU invocation. """
277    def __init__(self, command, cwd):
278        self.command = command
279        self.cwd = cwd
280
281    def __str__(self):
282        return ' '.join(self.command)
283
284    @classmethod
285    def from_compile_command(cls, entry, extra_args):
286        """ Parse a JSON compilation database entry into new Invocation. """
287        if 'arguments' in entry:
288            # arguments is a command-line in list form.
289            command = entry['arguments']
290        elif 'command' in entry:
291            # command is a command-line in string form, split to list.
292            command = split_command(entry['command'])
293        else:
294            raise ValueError('Invalid compilation database entry: %s' % entry)
295
296        if command[0] in KNOWN_COMPILER_WRAPPERS:
297            # Remove the compiler wrapper from the command.
298            command = command[1:]
299
300        # Rewrite the compile command for IWYU
301        compile_command, compile_args = command[0], command[1:]
302        if is_msvc_driver(compile_command):
303            # If the compiler is cl-compatible, let IWYU be cl-compatible.
304            extra_args = ['--driver-mode=cl'] + extra_args
305
306        command = [IWYU_EXECUTABLE] + extra_args + compile_args
307        return cls(command, entry['directory'])
308
309    def start(self, verbose):
310        """ Run invocation and collect output. """
311        if verbose:
312            print('# %s' % self, file=sys.stderr)
313
314        return Process.start(self)
315
316
317def fixup_compilation_db(compilation_db):
318    """ Canonicalize paths in JSON compilation database. """
319    for entry in compilation_db:
320        # Convert relative paths to absolute ones if possible, based on the entry's directory.
321        if 'directory' in entry and not os.path.isabs(entry['file']):
322            entry['file'] = os.path.join(entry['directory'], entry['file'])
323
324        # Expand relative paths and symlinks
325        entry['file'] = os.path.realpath(entry['file'])
326
327    return compilation_db
328
329
330def slice_compilation_db(compilation_db, selection):
331    """ Return a new compilation database reduced to the paths in selection. """
332    if not selection:
333        return compilation_db
334
335    # Canonicalize selection paths to match compilation database.
336    selection = [os.path.realpath(p) for p in selection]
337
338    new_db = []
339    for path in selection:
340        if not os.path.exists(path):
341            print('warning: \'%s\' not found on disk.' % path, file=sys.stderr)
342            continue
343
344        found = [e for e in compilation_db if is_subpath_of(e['file'], path)]
345        if not found:
346            print('warning: \'%s\' not found in compilation database.' % path,
347                  file=sys.stderr)
348            continue
349
350        new_db.extend(found)
351
352    return new_db
353
354
355def execute(invocations, verbose, formatter, jobs):
356    """ Launch processes described by invocations. """
357    if jobs == 1:
358        for invocation in invocations:
359            print(formatter(invocation.start(verbose).get_output()))
360        return
361
362    pending = []
363    while invocations or pending:
364        # Collect completed IWYU processes and print results.
365        complete = [proc for proc in pending if proc.poll() is not None]
366        for proc in complete:
367            pending.remove(proc)
368            print(formatter(proc.get_output()))
369
370        # Schedule new processes if there's room.
371        capacity = jobs - len(pending)
372        pending.extend(i.start(verbose) for i in invocations[:capacity])
373        invocations = invocations[capacity:]
374
375        # Yield CPU.
376        time.sleep(0.0001)
377
378
379def main(compilation_db_path, source_files, verbose, formatter, jobs,
380         extra_args):
381    """ Entry point. """
382
383    if not IWYU_EXECUTABLE:
384        print('error: include-what-you-use executable not found',
385              file=sys.stderr)
386        return 1
387
388    try:
389        if os.path.isdir(compilation_db_path):
390            compilation_db_path = os.path.join(compilation_db_path,
391                                               'compile_commands.json')
392
393        # Read compilation db from disk.
394        compilation_db_path = os.path.realpath(compilation_db_path)
395        with open(compilation_db_path, 'r') as fileobj:
396            compilation_db = json.load(fileobj)
397    except IOError as why:
398        print('error: failed to parse compilation database: %s' % why,
399              file=sys.stderr)
400        return 1
401
402    compilation_db = fixup_compilation_db(compilation_db)
403    compilation_db = slice_compilation_db(compilation_db, source_files)
404
405    # Transform compilation db entries into a list of IWYU invocations.
406    invocations = [
407        Invocation.from_compile_command(e, extra_args) for e in compilation_db
408    ]
409
410    return execute(invocations, verbose, formatter, jobs)
411
412
413def _bootstrap(sys_argv):
414    """ Parse arguments and dispatch to main(). """
415
416    # This hackery is necessary to add the forwarded IWYU args to the
417    # usage and help strings.
418    def customize_usage(parser):
419        """ Rewrite the parser's format_usage. """
420        original_format_usage = parser.format_usage
421        parser.format_usage = lambda: original_format_usage().rstrip() + \
422                              ' -- [<IWYU args>]' + os.linesep
423
424    def customize_help(parser):
425        """ Rewrite the parser's format_help. """
426        original_format_help = parser.format_help
427
428        def custom_help():
429            """ Customized help string, calls the adjusted format_usage. """
430            helpmsg = original_format_help()
431            helplines = helpmsg.splitlines()
432            helplines[0] = parser.format_usage().rstrip()
433            return os.linesep.join(helplines) + os.linesep
434
435        parser.format_help = custom_help
436
437    # Parse arguments.
438    parser = argparse.ArgumentParser(
439        description='Include-what-you-use compilation database driver.',
440        epilog='Assumes include-what-you-use is available on the PATH.')
441    customize_usage(parser)
442    customize_help(parser)
443
444    parser.add_argument('-v', '--verbose', action='store_true',
445                        help='Print IWYU commands')
446    parser.add_argument('-o', '--output-format', type=str,
447                        choices=FORMATTERS.keys(), default=DEFAULT_FORMAT,
448                        help='Output format (default: %s)' % DEFAULT_FORMAT)
449    parser.add_argument('-j', '--jobs', type=int, default=1,
450                        help='Number of concurrent subprocesses')
451    parser.add_argument('-p', metavar='<build-path>', required=True,
452                        help='Compilation database path', dest='dbpath')
453    parser.add_argument('source', nargs='*',
454                        help=('Zero or more source files (or directories) to '
455                              'run IWYU on. Defaults to all in compilation '
456                              'database.'))
457
458    def partition_args(argv):
459        """ Split around '--' into driver args and IWYU args. """
460        try:
461            double_dash = argv.index('--')
462            return argv[:double_dash], argv[double_dash+1:]
463        except ValueError:
464            return argv, []
465    argv, extra_args = partition_args(sys_argv[1:])
466    args = parser.parse_args(argv)
467
468    return main(args.dbpath, args.source, args.verbose,
469                FORMATTERS[args.output_format], args.jobs, extra_args)
470
471
472if __name__ == '__main__':
473    sys.exit(_bootstrap(sys.argv))
474