1#!/usr/local/bin/python3.8 2 3##===--- iwyu_tool.py -----------------------------------------------------===## 4# 5# The LLVM Compiler Infrastructure 6# 7# This file is distributed under the University of Illinois Open Source 8# License. See LICENSE.TXT for details. 9# 10##===----------------------------------------------------------------------===## 11 12""" Driver to consume a Clang compilation database and invoke IWYU. 13 14Example usage with CMake: 15 16 # Unix systems 17 $ mkdir build && cd build 18 $ CC="clang" CXX="clang++" cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ... 19 $ iwyu_tool.py -p . 20 21 # Windows systems 22 $ mkdir build && cd build 23 $ cmake -DCMAKE_CXX_COMPILER="%VCINSTALLDIR%/bin/cl.exe" \ 24 -DCMAKE_C_COMPILER="%VCINSTALLDIR%/VC/bin/cl.exe" \ 25 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ 26 -G Ninja ... 27 $ python iwyu_tool.py -p . 28 29See iwyu_tool.py -h for more details on command-line arguments. 30""" 31from __future__ import print_function 32import os 33import re 34import sys 35import json 36import time 37import shlex 38import argparse 39import tempfile 40import subprocess 41 42 43CORRECT_RE = re.compile(r'^\((.*?) has correct #includes/fwd-decls\)$') 44SHOULD_ADD_RE = re.compile(r'^(.*?) should add these lines:$') 45SHOULD_REMOVE_RE = re.compile(r'^(.*?) should remove these lines:$') 46FULL_LIST_RE = re.compile(r'The full include-list for (.*?):$') 47END_RE = re.compile(r'^---$') 48LINES_RE = re.compile(r'^- (.*?) // lines ([0-9]+)-[0-9]+$') 49 50 51GENERAL, ADD, REMOVE, LIST = range(4) 52 53 54def clang_formatter(output): 55 """ Process iwyu's output into something clang-like. """ 56 formatted = [] 57 58 state = (GENERAL, None) 59 for line in output.splitlines(): 60 match = CORRECT_RE.match(line) 61 if match: 62 formatted.append('%s:1:1: note: #includes/fwd-decls are correct' % 63 match.groups(1)) 64 continue 65 match = SHOULD_ADD_RE.match(line) 66 if match: 67 state = (ADD, match.group(1)) 68 continue 69 match = SHOULD_REMOVE_RE.match(line) 70 if match: 71 state = (REMOVE, match.group(1)) 72 continue 73 match = FULL_LIST_RE.match(line) 74 if match: 75 state = (LIST, match.group(1)) 76 elif END_RE.match(line): 77 state = (GENERAL, None) 78 elif not line.strip(): 79 continue 80 elif state[0] == GENERAL: 81 formatted.append(line) 82 elif state[0] == ADD: 83 formatted.append('%s:1:1: error: add the following line' % state[1]) 84 formatted.append(line) 85 elif state[0] == REMOVE: 86 match = LINES_RE.match(line) 87 line_no = match.group(2) if match else '1' 88 formatted.append('%s:%s:1: error: remove the following line' % 89 (state[1], line_no)) 90 formatted.append(match.group(1)) 91 92 return os.linesep.join(formatted) 93 94 95DEFAULT_FORMAT = 'iwyu' 96FORMATTERS = { 97 'iwyu': lambda output: output, 98 'clang': clang_formatter 99} 100 101 102if sys.platform.startswith('win'): 103 # Case-insensitive match on Windows 104 def normcase(s): 105 return s.lower() 106else: 107 def normcase(s): 108 return s 109 110 111def is_subpath_of(path, parent): 112 """ Return True if path is equal to or fully contained within parent. 113 114 Assumes both paths are canonicalized with os.path.realpath. 115 """ 116 parent = normcase(parent) 117 path = normcase(path) 118 119 if path == parent: 120 return True 121 122 if not path.startswith(parent): 123 return False 124 125 # Now we know parent is a prefix of path, but they only share lineage if the 126 # difference between them starts with a path separator, e.g. /a/b/c/file 127 # is not a parent of /a/b/c/file.cpp, but /a/b/c and /a/b/c/ are. 128 parent = parent.rstrip(os.path.sep) 129 suffix = path[len(parent):] 130 return suffix.startswith(os.path.sep) 131 132 133def is_msvc_driver(compile_command): 134 """ Return True if compile_command matches an MSVC CL-style driver. """ 135 compile_command = normcase(compile_command) 136 137 if compile_command.endswith('cl.exe'): 138 # Native MSVC compiler or clang-cl.exe 139 return True 140 141 if compile_command.endswith('clang-cl'): 142 # Cross clang-cl on non-Windows 143 return True 144 145 return False 146 147 148def win_split(cmdline): 149 """ Minimal implementation of shlex.split for Windows following 150 https://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft.aspx. 151 """ 152 def split_iter(cmdline): 153 in_quotes = False 154 backslashes = 0 155 arg = '' 156 for c in cmdline: 157 if c == '\\': 158 # MSDN: Backslashes are interpreted literally, unless they 159 # immediately precede a double quotation mark. 160 # Buffer them until we know what comes next. 161 backslashes += 1 162 elif c == '"': 163 # Quotes can either be an escaped quote or the start of a quoted 164 # string. Paraphrasing MSDN: 165 # Before quotes, place one backslash in the arg for every pair 166 # of leading backslashes. If the number of backslashes is odd, 167 # retain the double quotation mark, otherwise interpret it as a 168 # string delimiter and switch state. 169 arg += '\\' * (backslashes // 2) 170 if backslashes % 2 == 1: 171 arg += c 172 else: 173 in_quotes = not in_quotes 174 backslashes = 0 175 elif c in (' ', '\t') and not in_quotes: 176 # MSDN: Arguments are delimited by white space, which is either 177 # a space or a tab [but only outside of a string]. 178 # Flush backslashes and return arg bufferd so far, unless empty. 179 arg += '\\' * backslashes 180 if arg: 181 yield arg 182 arg = '' 183 backslashes = 0 184 else: 185 # Flush buffered backslashes and append. 186 arg += '\\' * backslashes 187 arg += c 188 backslashes = 0 189 190 if arg: 191 arg += '\\' * backslashes 192 yield arg 193 194 return list(split_iter(cmdline)) 195 196 197def split_command(cmdstr): 198 """ Split a command string into a list, respecting shell quoting. """ 199 if sys.platform.startswith('win'): 200 # shlex.split does not work for Windows command-lines, so special-case 201 # to our own implementation. 202 cmd = win_split(cmdstr) 203 else: 204 cmd = shlex.split(cmdstr) 205 206 return cmd 207 208 209def find_include_what_you_use(): 210 """ Find IWYU executable and return its full pathname. """ 211 if 'IWYU_BINARY' in os.environ: 212 return os.environ.get('IWYU_BINARY') 213 214 # TODO: Investigate using shutil.which when Python 2 has passed away. 215 executable_name = 'include-what-you-use' 216 if sys.platform.startswith('win'): 217 executable_name += '.exe' 218 219 search_path = [os.path.dirname(__file__)] 220 search_path += os.environ.get('PATH', '').split(os.pathsep) 221 222 for dirpath in search_path: 223 full = os.path.join(dirpath, executable_name) 224 if os.path.isfile(full): 225 return os.path.realpath(full) 226 227 return None 228 229 230IWYU_EXECUTABLE = find_include_what_you_use() 231 232 233class Process(object): 234 """ Manages an IWYU process in flight """ 235 def __init__(self, proc, outfile): 236 self.proc = proc 237 self.outfile = outfile 238 self.output = None 239 240 def poll(self): 241 """ Return the exit code if the process has completed, None otherwise. 242 """ 243 return self.proc.poll() 244 245 def get_output(self): 246 """ Return stdout+stderr output of the process. 247 248 This call blocks until the process is complete, then returns the output. 249 """ 250 if not self.output: 251 self.proc.wait() 252 self.outfile.seek(0) 253 self.output = self.outfile.read().decode("utf-8") 254 self.outfile.close() 255 256 return self.output 257 258 @classmethod 259 def start(cls, invocation): 260 """ Start a Process for the invocation and capture stdout+stderr. """ 261 outfile = tempfile.TemporaryFile(prefix='iwyu') 262 process = subprocess.Popen( 263 invocation.command, 264 cwd=invocation.cwd, 265 stdout=outfile, 266 stderr=subprocess.STDOUT) 267 return cls(process, outfile) 268 269 270KNOWN_COMPILER_WRAPPERS=frozenset([ 271 "ccache" 272]) 273 274 275class Invocation(object): 276 """ Holds arguments of an IWYU invocation. """ 277 def __init__(self, command, cwd): 278 self.command = command 279 self.cwd = cwd 280 281 def __str__(self): 282 return ' '.join(self.command) 283 284 @classmethod 285 def from_compile_command(cls, entry, extra_args): 286 """ Parse a JSON compilation database entry into new Invocation. """ 287 if 'arguments' in entry: 288 # arguments is a command-line in list form. 289 command = entry['arguments'] 290 elif 'command' in entry: 291 # command is a command-line in string form, split to list. 292 command = split_command(entry['command']) 293 else: 294 raise ValueError('Invalid compilation database entry: %s' % entry) 295 296 if command[0] in KNOWN_COMPILER_WRAPPERS: 297 # Remove the compiler wrapper from the command. 298 command = command[1:] 299 300 # Rewrite the compile command for IWYU 301 compile_command, compile_args = command[0], command[1:] 302 if is_msvc_driver(compile_command): 303 # If the compiler is cl-compatible, let IWYU be cl-compatible. 304 extra_args = ['--driver-mode=cl'] + extra_args 305 306 command = [IWYU_EXECUTABLE] + extra_args + compile_args 307 return cls(command, entry['directory']) 308 309 def start(self, verbose): 310 """ Run invocation and collect output. """ 311 if verbose: 312 print('# %s' % self, file=sys.stderr) 313 314 return Process.start(self) 315 316 317def fixup_compilation_db(compilation_db): 318 """ Canonicalize paths in JSON compilation database. """ 319 for entry in compilation_db: 320 # Convert relative paths to absolute ones if possible, based on the entry's directory. 321 if 'directory' in entry and not os.path.isabs(entry['file']): 322 entry['file'] = os.path.join(entry['directory'], entry['file']) 323 324 # Expand relative paths and symlinks 325 entry['file'] = os.path.realpath(entry['file']) 326 327 return compilation_db 328 329 330def slice_compilation_db(compilation_db, selection): 331 """ Return a new compilation database reduced to the paths in selection. """ 332 if not selection: 333 return compilation_db 334 335 # Canonicalize selection paths to match compilation database. 336 selection = [os.path.realpath(p) for p in selection] 337 338 new_db = [] 339 for path in selection: 340 if not os.path.exists(path): 341 print('warning: \'%s\' not found on disk.' % path, file=sys.stderr) 342 continue 343 344 found = [e for e in compilation_db if is_subpath_of(e['file'], path)] 345 if not found: 346 print('warning: \'%s\' not found in compilation database.' % path, 347 file=sys.stderr) 348 continue 349 350 new_db.extend(found) 351 352 return new_db 353 354 355def execute(invocations, verbose, formatter, jobs): 356 """ Launch processes described by invocations. """ 357 if jobs == 1: 358 for invocation in invocations: 359 print(formatter(invocation.start(verbose).get_output())) 360 return 361 362 pending = [] 363 while invocations or pending: 364 # Collect completed IWYU processes and print results. 365 complete = [proc for proc in pending if proc.poll() is not None] 366 for proc in complete: 367 pending.remove(proc) 368 print(formatter(proc.get_output())) 369 370 # Schedule new processes if there's room. 371 capacity = jobs - len(pending) 372 pending.extend(i.start(verbose) for i in invocations[:capacity]) 373 invocations = invocations[capacity:] 374 375 # Yield CPU. 376 time.sleep(0.0001) 377 378 379def main(compilation_db_path, source_files, verbose, formatter, jobs, 380 extra_args): 381 """ Entry point. """ 382 383 if not IWYU_EXECUTABLE: 384 print('error: include-what-you-use executable not found', 385 file=sys.stderr) 386 return 1 387 388 try: 389 if os.path.isdir(compilation_db_path): 390 compilation_db_path = os.path.join(compilation_db_path, 391 'compile_commands.json') 392 393 # Read compilation db from disk. 394 compilation_db_path = os.path.realpath(compilation_db_path) 395 with open(compilation_db_path, 'r') as fileobj: 396 compilation_db = json.load(fileobj) 397 except IOError as why: 398 print('error: failed to parse compilation database: %s' % why, 399 file=sys.stderr) 400 return 1 401 402 compilation_db = fixup_compilation_db(compilation_db) 403 compilation_db = slice_compilation_db(compilation_db, source_files) 404 405 # Transform compilation db entries into a list of IWYU invocations. 406 invocations = [ 407 Invocation.from_compile_command(e, extra_args) for e in compilation_db 408 ] 409 410 return execute(invocations, verbose, formatter, jobs) 411 412 413def _bootstrap(sys_argv): 414 """ Parse arguments and dispatch to main(). """ 415 416 # This hackery is necessary to add the forwarded IWYU args to the 417 # usage and help strings. 418 def customize_usage(parser): 419 """ Rewrite the parser's format_usage. """ 420 original_format_usage = parser.format_usage 421 parser.format_usage = lambda: original_format_usage().rstrip() + \ 422 ' -- [<IWYU args>]' + os.linesep 423 424 def customize_help(parser): 425 """ Rewrite the parser's format_help. """ 426 original_format_help = parser.format_help 427 428 def custom_help(): 429 """ Customized help string, calls the adjusted format_usage. """ 430 helpmsg = original_format_help() 431 helplines = helpmsg.splitlines() 432 helplines[0] = parser.format_usage().rstrip() 433 return os.linesep.join(helplines) + os.linesep 434 435 parser.format_help = custom_help 436 437 # Parse arguments. 438 parser = argparse.ArgumentParser( 439 description='Include-what-you-use compilation database driver.', 440 epilog='Assumes include-what-you-use is available on the PATH.') 441 customize_usage(parser) 442 customize_help(parser) 443 444 parser.add_argument('-v', '--verbose', action='store_true', 445 help='Print IWYU commands') 446 parser.add_argument('-o', '--output-format', type=str, 447 choices=FORMATTERS.keys(), default=DEFAULT_FORMAT, 448 help='Output format (default: %s)' % DEFAULT_FORMAT) 449 parser.add_argument('-j', '--jobs', type=int, default=1, 450 help='Number of concurrent subprocesses') 451 parser.add_argument('-p', metavar='<build-path>', required=True, 452 help='Compilation database path', dest='dbpath') 453 parser.add_argument('source', nargs='*', 454 help=('Zero or more source files (or directories) to ' 455 'run IWYU on. Defaults to all in compilation ' 456 'database.')) 457 458 def partition_args(argv): 459 """ Split around '--' into driver args and IWYU args. """ 460 try: 461 double_dash = argv.index('--') 462 return argv[:double_dash], argv[double_dash+1:] 463 except ValueError: 464 return argv, [] 465 argv, extra_args = partition_args(sys_argv[1:]) 466 args = parser.parse_args(argv) 467 468 return main(args.dbpath, args.source, args.verbose, 469 FORMATTERS[args.output_format], args.jobs, extra_args) 470 471 472if __name__ == '__main__': 473 sys.exit(_bootstrap(sys.argv)) 474