1#!/usr/bin/env python 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Wrapper script to help run clang tools across Chromium code. 6 7How to use run_tool.py: 8If you want to run a clang tool across all Chromium code: 9run_tool.py <tool> <path/to/compiledb> 10 11If you want to include all files mentioned in the compilation database 12(this will also include generated files, unlike the previous command): 13run_tool.py <tool> <path/to/compiledb> --all 14 15If you want to run the clang tool across only chrome/browser and 16content/browser: 17run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser 18 19Please see docs/clang_tool_refactoring.md for more information, which documents 20the entire automated refactoring flow in Chromium. 21 22Why use run_tool.py (instead of running a clang tool directly): 23The clang tool implementation doesn't take advantage of multiple cores, and if 24it fails mysteriously in the middle, all the generated replacements will be 25lost. Additionally, if the work is simply sharded across multiple cores by 26running multiple RefactoringTools, problems arise when they attempt to rewrite a 27file at the same time. 28 29run_tool.py will 301) run multiple instances of clang tool in parallel 312) gather stdout from clang tool invocations 323) "atomically" forward #2 to stdout 33 34Output of run_tool.py can be piped into extract_edits.py and then into 35apply_edits.py. These tools will extract individual edits and apply them to the 36source files. These tools assume the clang tool emits the edits in the 37following format: 38 ... 39 ==== BEGIN EDITS ==== 40 r:::<file path>:::<offset>:::<length>:::<replacement text> 41 r:::<file path>:::<offset>:::<length>:::<replacement text> 42 ...etc... 43 ==== END EDITS ==== 44 ... 45 46extract_edits.py extracts only lines between BEGIN/END EDITS markers 47apply_edits.py reads edit lines from stdin and applies the edits 48""" 49 50from __future__ import print_function 51 52import argparse 53from collections import namedtuple 54import functools 55import json 56import multiprocessing 57import os 58import os.path 59import re 60import subprocess 61import shlex 62import sys 63 64script_dir = os.path.dirname(os.path.realpath(__file__)) 65tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) 66sys.path.insert(0, tool_dir) 67 68from clang import compile_db 69 70 71CompDBEntry = namedtuple('CompDBEntry', ['directory', 'filename', 'command']) 72 73def _PruneGitFiles(git_files, paths): 74 """Prunes the list of files from git to include only those that are either in 75 |paths| or start with one item in |paths|. 76 77 Args: 78 git_files: List of all repository files. 79 paths: Prefix filter for the returned paths. May contain multiple entries, 80 and the contents should be absolute paths. 81 82 Returns: 83 Pruned list of files. 84 """ 85 if not git_files: 86 return [] 87 git_files.sort() 88 pruned_list = [] 89 git_index = 0 90 for path in sorted(paths): 91 least = git_index 92 most = len(git_files) - 1 93 while least <= most: 94 middle = (least + most ) / 2 95 if git_files[middle] == path: 96 least = middle 97 break 98 elif git_files[middle] > path: 99 most = middle - 1 100 else: 101 least = middle + 1 102 while least < len(git_files) and git_files[least].startswith(path): 103 pruned_list.append(git_files[least]) 104 least += 1 105 git_index = least 106 107 return pruned_list 108 109 110def _GetFilesFromGit(paths=None): 111 """Gets the list of files in the git repository if |paths| includes prefix 112 path filters or is empty. All complete filenames in |paths| are also included 113 in the output. 114 115 Args: 116 paths: Prefix filter for the returned paths. May contain multiple entries. 117 """ 118 partial_paths = [] 119 files = [] 120 for p in paths: 121 real_path = os.path.realpath(p) 122 if os.path.isfile(real_path): 123 files.append(real_path) 124 else: 125 partial_paths.append(real_path) 126 if partial_paths or not files: 127 args = [] 128 if sys.platform == 'win32': 129 args.append('git.bat') 130 else: 131 args.append('git') 132 args.append('ls-files') 133 command = subprocess.Popen(args, stdout=subprocess.PIPE) 134 output, _ = command.communicate() 135 git_files = [os.path.realpath(p) for p in output.splitlines()] 136 if partial_paths: 137 git_files = _PruneGitFiles(git_files, partial_paths) 138 files.extend(git_files) 139 return files 140 141 142def _GetEntriesFromCompileDB(build_directory, source_filenames): 143 """ Gets the list of files and args mentioned in the compilation database. 144 145 Args: 146 build_directory: Directory that contains the compile database. 147 source_filenames: If not None, only include entries for the given list of 148 filenames. 149 """ 150 151 filenames_set = None if source_filenames is None else set(source_filenames) 152 return [ 153 CompDBEntry(entry['directory'], entry['file'], entry['command']) 154 for entry in compile_db.Read(build_directory) 155 if filenames_set is None or os.path.realpath( 156 os.path.join(entry['directory'], entry['file'])) in filenames_set 157 ] 158 159 160def _UpdateCompileCommandsIfNeeded(compile_commands, files_list, 161 target_os=None): 162 """ Filters compile database to only include required files, and makes it 163 more clang-tool friendly on Windows. 164 165 Args: 166 compile_commands: List of the contents of compile database. 167 files_list: List of required files for processing. Can be None to specify 168 no filtering. 169 Returns: 170 List of the contents of the compile database after processing. 171 """ 172 if sys.platform == 'win32' and files_list: 173 relative_paths = set([os.path.relpath(f) for f in files_list]) 174 filtered_compile_commands = [] 175 for entry in compile_commands: 176 file_path = os.path.relpath( 177 os.path.join(entry['directory'], entry['file'])) 178 if file_path in relative_paths: 179 filtered_compile_commands.append(entry) 180 else: 181 filtered_compile_commands = compile_commands 182 183 return compile_db.ProcessCompileDatabaseIfNeeded(filtered_compile_commands, 184 target_os) 185 186 187def _ExecuteTool(toolname, tool_args, build_directory, compdb_entry): 188 """Executes the clang tool. 189 190 This is defined outside the class so it can be pickled for the multiprocessing 191 module. 192 193 Args: 194 toolname: Name of the clang tool to execute. 195 tool_args: Arguments to be passed to the clang tool. Can be None. 196 build_directory: Directory that contains the compile database. 197 compdb_entry: The file and args to run the clang tool over. 198 199 Returns: 200 A dictionary that must contain the key "status" and a boolean value 201 associated with it. 202 203 If status is True, then the generated output is stored with the key 204 "stdout_text" in the dictionary. 205 206 Otherwise, the filename and the output from stderr are associated with the 207 keys "filename" and "stderr_text" respectively. 208 """ 209 210 args = [toolname, compdb_entry.filename] 211 if (tool_args): 212 args.extend(tool_args) 213 214 args.append('--') 215 args.extend([ 216 a for a in shlex.split(compdb_entry.command, 217 posix=(sys.platform != 'win32')) 218 # 'command' contains the full command line, including the input 219 # source file itself. We need to filter it out otherwise it's 220 # passed to the tool twice - once directly and once via 221 # the compile args. 222 if a != compdb_entry.filename 223 # /showIncludes is used by Ninja to track header file dependencies on 224 # Windows. We don't need to do this here, and it results in lots of spam 225 # and a massive log file, so we strip it. 226 and a != '/showIncludes' and a != '/showIncludes:user' 227 # -MMD has the same purpose on non-Windows. It may have a corresponding 228 # '-MF <filename>', which we strip below. 229 and a != '-MMD' 230 ]) 231 232 for i, arg in enumerate(args): 233 if arg == '-MF': 234 del args[i:i+2] 235 break 236 237 # shlex.split escapes double qoutes in non-Posix mode, so we need to strip 238 # them back. 239 if sys.platform == 'win32': 240 args = [a.replace('\\"', '"') for a in args] 241 command = subprocess.Popen( 242 args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=build_directory) 243 stdout_text, stderr_text = command.communicate() 244 stderr_text = re.sub( 245 r"^warning: .*'linker' input unused \[-Wunused-command-line-argument\]\n", 246 "", stderr_text, flags=re.MULTILINE) 247 248 if command.returncode != 0: 249 return { 250 'status': False, 251 'filename': compdb_entry.filename, 252 'stderr_text': stderr_text, 253 } 254 else: 255 return { 256 'status': True, 257 'filename': compdb_entry.filename, 258 'stdout_text': stdout_text, 259 'stderr_text': stderr_text, 260 } 261 262 263class _CompilerDispatcher(object): 264 """Multiprocessing controller for running clang tools in parallel.""" 265 266 def __init__(self, toolname, tool_args, build_directory, compdb_entries): 267 """Initializer method. 268 269 Args: 270 toolname: Path to the tool to execute. 271 tool_args: Arguments to be passed to the tool. Can be None. 272 build_directory: Directory that contains the compile database. 273 compdb_entries: The files and args to run the tool over. 274 """ 275 self.__toolname = toolname 276 self.__tool_args = tool_args 277 self.__build_directory = build_directory 278 self.__compdb_entries = compdb_entries 279 self.__success_count = 0 280 self.__failed_count = 0 281 282 @property 283 def failed_count(self): 284 return self.__failed_count 285 286 def Run(self): 287 """Does the grunt work.""" 288 pool = multiprocessing.Pool() 289 result_iterator = pool.imap_unordered( 290 functools.partial(_ExecuteTool, self.__toolname, self.__tool_args, 291 self.__build_directory), 292 self.__compdb_entries) 293 for result in result_iterator: 294 self.__ProcessResult(result) 295 sys.stderr.write('\n') 296 297 def __ProcessResult(self, result): 298 """Handles result processing. 299 300 Args: 301 result: The result dictionary returned by _ExecuteTool. 302 """ 303 if result['status']: 304 self.__success_count += 1 305 sys.stdout.write(result['stdout_text']) 306 sys.stderr.write(result['stderr_text']) 307 else: 308 self.__failed_count += 1 309 sys.stderr.write('\nFailed to process %s\n' % result['filename']) 310 sys.stderr.write(result['stderr_text']) 311 sys.stderr.write('\n') 312 done_count = self.__success_count + self.__failed_count 313 percentage = (float(done_count) / len(self.__compdb_entries)) * 100 314 # Only output progress for every 100th entry, to make log files easier to 315 # inspect. 316 if done_count % 100 == 0 or done_count == len(self.__compdb_entries): 317 sys.stderr.write( 318 'Processed %d files with %s tool (%d failures) [%.2f%%]\r' % 319 (done_count, self.__toolname, self.__failed_count, percentage)) 320 321 322def main(): 323 parser = argparse.ArgumentParser() 324 parser.add_argument( 325 '--options-file', 326 help='optional file to read options from') 327 args, argv = parser.parse_known_args() 328 if args.options_file: 329 argv = open(args.options_file).read().split() 330 331 parser.add_argument('--tool', required=True, help='clang tool to run') 332 parser.add_argument('--all', action='store_true') 333 parser.add_argument( 334 '--generate-compdb', 335 action='store_true', 336 help='regenerate the compile database before running the tool') 337 parser.add_argument( 338 '--shard', 339 metavar='<n>-of-<count>') 340 parser.add_argument( 341 '-p', 342 required=True, 343 help='path to the directory that contains the compile database') 344 parser.add_argument( 345 '--target_os', 346 choices=['android', 'chromeos', 'ios', 'linux', 'nacl', 'mac', 'win'], 347 help='Target OS - see `gn help target_os`. Set to "win" when ' + 348 'cross-compiling Windows from Linux or another host') 349 parser.add_argument( 350 'path_filter', 351 nargs='*', 352 help='optional paths to filter what files the tool is run on') 353 parser.add_argument( 354 '--tool-arg', nargs='?', action='append', 355 help='optional arguments passed to the tool') 356 parser.add_argument( 357 '--tool-path', nargs='?', 358 help='optional path to the tool directory') 359 args = parser.parse_args(argv) 360 361 if args.tool_path: 362 tool_path = os.path.abspath(args.tool_path) 363 else: 364 tool_path = os.path.abspath(os.path.join( 365 os.path.dirname(__file__), 366 '../../../third_party/llvm-build/Release+Asserts/bin')) 367 if not os.path.exists(tool_path): 368 sys.stderr.write('tool not found: %s\n' % tool_path) 369 return -1 370 371 if args.all: 372 # Reading source files is postponed to after possible regeneration of 373 # compile_commands.json. 374 source_filenames = None 375 else: 376 git_filenames = set(_GetFilesFromGit(args.path_filter)) 377 # Filter out files that aren't C/C++/Obj-C/Obj-C++. 378 extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) 379 source_filenames = [f 380 for f in git_filenames 381 if os.path.splitext(f)[1] in extensions] 382 383 if args.generate_compdb: 384 compile_commands = compile_db.GenerateWithNinja(args.p) 385 compile_commands = _UpdateCompileCommandsIfNeeded(compile_commands, 386 source_filenames, 387 args.target_os) 388 with open(os.path.join(args.p, 'compile_commands.json'), 'w') as f: 389 f.write(json.dumps(compile_commands, indent=2)) 390 391 compdb_entries = set(_GetEntriesFromCompileDB(args.p, source_filenames)) 392 393 if args.shard: 394 total_length = len(compdb_entries) 395 match = re.match(r'(\d+)-of-(\d+)$', args.shard) 396 # Input is 1-based, but modular arithmetic is 0-based. 397 shard_number = int(match.group(1)) - 1 398 shard_count = int(match.group(2)) 399 compdb_entries = [ 400 f for i, f in enumerate(sorted(compdb_entries)) 401 if i % shard_count == shard_number 402 ] 403 print('Shard %d-of-%d will process %d entries out of %d' % 404 (shard_number, shard_count, len(compdb_entries), total_length)) 405 406 dispatcher = _CompilerDispatcher(os.path.join(tool_path, args.tool), 407 args.tool_arg, 408 args.p, 409 compdb_entries) 410 dispatcher.Run() 411 return -dispatcher.failed_count 412 413 414if __name__ == '__main__': 415 sys.exit(main()) 416