1#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Wrapper script to help run clang tools across Chromium code.
6
7How to use run_tool.py:
8If you want to run a clang tool across all Chromium code:
9run_tool.py <tool> <path/to/compiledb>
10
11If you want to include all files mentioned in the compilation database
12(this will also include generated files, unlike the previous command):
13run_tool.py <tool> <path/to/compiledb> --all
14
15If you want to run the clang tool across only chrome/browser and
16content/browser:
17run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
18
19Please see docs/clang_tool_refactoring.md for more information, which documents
20the entire automated refactoring flow in Chromium.
21
22Why use run_tool.py (instead of running a clang tool directly):
23The clang tool implementation doesn't take advantage of multiple cores, and if
24it fails mysteriously in the middle, all the generated replacements will be
25lost. Additionally, if the work is simply sharded across multiple cores by
26running multiple RefactoringTools, problems arise when they attempt to rewrite a
27file at the same time.
28
29run_tool.py will
301) run multiple instances of clang tool in parallel
312) gather stdout from clang tool invocations
323) "atomically" forward #2 to stdout
33
34Output of run_tool.py can be piped into extract_edits.py and then into
35apply_edits.py. These tools will extract individual edits and apply them to the
36source files. These tools assume the clang tool emits the edits in the
37following format:
38    ...
39    ==== BEGIN EDITS ====
40    r:::<file path>:::<offset>:::<length>:::<replacement text>
41    r:::<file path>:::<offset>:::<length>:::<replacement text>
42    ...etc...
43    ==== END EDITS ====
44    ...
45
46extract_edits.py extracts only lines between BEGIN/END EDITS markers
47apply_edits.py reads edit lines from stdin and applies the edits
48"""
49
50from __future__ import print_function
51
52import argparse
53from collections import namedtuple
54import functools
55import json
56import multiprocessing
57import os
58import os.path
59import re
60import subprocess
61import shlex
62import sys
63
64script_dir = os.path.dirname(os.path.realpath(__file__))
65tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib'))
66sys.path.insert(0, tool_dir)
67
68from clang import compile_db
69
70
71CompDBEntry = namedtuple('CompDBEntry', ['directory', 'filename', 'command'])
72
73def _PruneGitFiles(git_files, paths):
74  """Prunes the list of files from git to include only those that are either in
75  |paths| or start with one item in |paths|.
76
77  Args:
78    git_files: List of all repository files.
79    paths: Prefix filter for the returned paths. May contain multiple entries,
80        and the contents should be absolute paths.
81
82  Returns:
83    Pruned list of files.
84  """
85  if not git_files:
86    return []
87  git_files.sort()
88  pruned_list = []
89  git_index = 0
90  for path in sorted(paths):
91    least = git_index
92    most = len(git_files) - 1
93    while least <= most:
94      middle = (least + most ) / 2
95      if git_files[middle] == path:
96        least = middle
97        break
98      elif git_files[middle] > path:
99        most = middle - 1
100      else:
101        least = middle + 1
102    while least < len(git_files) and git_files[least].startswith(path):
103      pruned_list.append(git_files[least])
104      least += 1
105    git_index = least
106
107  return pruned_list
108
109
110def _GetFilesFromGit(paths=None):
111  """Gets the list of files in the git repository if |paths| includes prefix
112  path filters or is empty. All complete filenames in |paths| are also included
113  in the output.
114
115  Args:
116    paths: Prefix filter for the returned paths. May contain multiple entries.
117  """
118  partial_paths = []
119  files = []
120  for p in paths:
121    real_path = os.path.realpath(p)
122    if os.path.isfile(real_path):
123      files.append(real_path)
124    else:
125      partial_paths.append(real_path)
126  if partial_paths or not files:
127    args = []
128    if sys.platform == 'win32':
129      args.append('git.bat')
130    else:
131      args.append('git')
132    args.append('ls-files')
133    command = subprocess.Popen(args, stdout=subprocess.PIPE)
134    output, _ = command.communicate()
135    git_files = [os.path.realpath(p) for p in output.splitlines()]
136    if partial_paths:
137      git_files = _PruneGitFiles(git_files, partial_paths)
138    files.extend(git_files)
139  return files
140
141
142def _GetEntriesFromCompileDB(build_directory, source_filenames):
143  """ Gets the list of files and args mentioned in the compilation database.
144
145  Args:
146    build_directory: Directory that contains the compile database.
147    source_filenames: If not None, only include entries for the given list of
148      filenames.
149  """
150
151  filenames_set = None if source_filenames is None else set(source_filenames)
152  return [
153      CompDBEntry(entry['directory'], entry['file'], entry['command'])
154      for entry in compile_db.Read(build_directory)
155      if filenames_set is None or os.path.realpath(
156          os.path.join(entry['directory'], entry['file'])) in filenames_set
157  ]
158
159
160def _UpdateCompileCommandsIfNeeded(compile_commands, files_list,
161                                   target_os=None):
162  """ Filters compile database to only include required files, and makes it
163  more clang-tool friendly on Windows.
164
165  Args:
166    compile_commands: List of the contents of compile database.
167    files_list: List of required files for processing. Can be None to specify
168      no filtering.
169  Returns:
170    List of the contents of the compile database after processing.
171  """
172  if sys.platform == 'win32' and files_list:
173    relative_paths = set([os.path.relpath(f) for f in files_list])
174    filtered_compile_commands = []
175    for entry in compile_commands:
176      file_path = os.path.relpath(
177          os.path.join(entry['directory'], entry['file']))
178      if file_path in relative_paths:
179        filtered_compile_commands.append(entry)
180  else:
181    filtered_compile_commands = compile_commands
182
183  return compile_db.ProcessCompileDatabaseIfNeeded(filtered_compile_commands,
184                                                   target_os)
185
186
187def _ExecuteTool(toolname, tool_args, build_directory, compdb_entry):
188  """Executes the clang tool.
189
190  This is defined outside the class so it can be pickled for the multiprocessing
191  module.
192
193  Args:
194    toolname: Name of the clang tool to execute.
195    tool_args: Arguments to be passed to the clang tool. Can be None.
196    build_directory: Directory that contains the compile database.
197    compdb_entry: The file and args to run the clang tool over.
198
199  Returns:
200    A dictionary that must contain the key "status" and a boolean value
201    associated with it.
202
203    If status is True, then the generated output is stored with the key
204    "stdout_text" in the dictionary.
205
206    Otherwise, the filename and the output from stderr are associated with the
207    keys "filename" and "stderr_text" respectively.
208  """
209
210  args = [toolname, compdb_entry.filename]
211  if (tool_args):
212    args.extend(tool_args)
213
214  args.append('--')
215  args.extend([
216      a for a in shlex.split(compdb_entry.command,
217                             posix=(sys.platform != 'win32'))
218      # 'command' contains the full command line, including the input
219      # source file itself. We need to filter it out otherwise it's
220      # passed to the tool twice - once directly and once via
221      # the compile args.
222      if a != compdb_entry.filename
223        # /showIncludes is used by Ninja to track header file dependencies on
224        # Windows. We don't need to do this here, and it results in lots of spam
225        # and a massive log file, so we strip it.
226        and a != '/showIncludes' and a != '/showIncludes:user'
227        # -MMD has the same purpose on non-Windows. It may have a corresponding
228        # '-MF <filename>', which we strip below.
229        and a != '-MMD'
230  ])
231
232  for i, arg in enumerate(args):
233    if arg == '-MF':
234      del args[i:i+2]
235      break
236
237  # shlex.split escapes double qoutes in non-Posix mode, so we need to strip
238  # them back.
239  if sys.platform == 'win32':
240    args = [a.replace('\\"', '"') for a in args]
241  command = subprocess.Popen(
242      args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=build_directory)
243  stdout_text, stderr_text = command.communicate()
244  stderr_text = re.sub(
245      r"^warning: .*'linker' input unused \[-Wunused-command-line-argument\]\n",
246      "", stderr_text, flags=re.MULTILINE)
247
248  if command.returncode != 0:
249    return {
250        'status': False,
251        'filename': compdb_entry.filename,
252        'stderr_text': stderr_text,
253    }
254  else:
255    return {
256        'status': True,
257        'filename': compdb_entry.filename,
258        'stdout_text': stdout_text,
259        'stderr_text': stderr_text,
260    }
261
262
263class _CompilerDispatcher(object):
264  """Multiprocessing controller for running clang tools in parallel."""
265
266  def __init__(self, toolname, tool_args, build_directory, compdb_entries):
267    """Initializer method.
268
269    Args:
270      toolname: Path to the tool to execute.
271      tool_args: Arguments to be passed to the tool. Can be None.
272      build_directory: Directory that contains the compile database.
273      compdb_entries: The files and args to run the tool over.
274    """
275    self.__toolname = toolname
276    self.__tool_args = tool_args
277    self.__build_directory = build_directory
278    self.__compdb_entries = compdb_entries
279    self.__success_count = 0
280    self.__failed_count = 0
281
282  @property
283  def failed_count(self):
284    return self.__failed_count
285
286  def Run(self):
287    """Does the grunt work."""
288    pool = multiprocessing.Pool()
289    result_iterator = pool.imap_unordered(
290        functools.partial(_ExecuteTool, self.__toolname, self.__tool_args,
291                          self.__build_directory),
292                          self.__compdb_entries)
293    for result in result_iterator:
294      self.__ProcessResult(result)
295    sys.stderr.write('\n')
296
297  def __ProcessResult(self, result):
298    """Handles result processing.
299
300    Args:
301      result: The result dictionary returned by _ExecuteTool.
302    """
303    if result['status']:
304      self.__success_count += 1
305      sys.stdout.write(result['stdout_text'])
306      sys.stderr.write(result['stderr_text'])
307    else:
308      self.__failed_count += 1
309      sys.stderr.write('\nFailed to process %s\n' % result['filename'])
310      sys.stderr.write(result['stderr_text'])
311      sys.stderr.write('\n')
312    done_count = self.__success_count + self.__failed_count
313    percentage = (float(done_count) / len(self.__compdb_entries)) * 100
314    # Only output progress for every 100th entry, to make log files easier to
315    # inspect.
316    if done_count % 100 == 0 or done_count == len(self.__compdb_entries):
317      sys.stderr.write(
318          'Processed %d files with %s tool (%d failures) [%.2f%%]\r' %
319          (done_count, self.__toolname, self.__failed_count, percentage))
320
321
322def main():
323  parser = argparse.ArgumentParser()
324  parser.add_argument(
325      '--options-file',
326      help='optional file to read options from')
327  args, argv = parser.parse_known_args()
328  if args.options_file:
329    argv = open(args.options_file).read().split()
330
331  parser.add_argument('--tool', required=True, help='clang tool to run')
332  parser.add_argument('--all', action='store_true')
333  parser.add_argument(
334      '--generate-compdb',
335      action='store_true',
336      help='regenerate the compile database before running the tool')
337  parser.add_argument(
338      '--shard',
339      metavar='<n>-of-<count>')
340  parser.add_argument(
341      '-p',
342      required=True,
343      help='path to the directory that contains the compile database')
344  parser.add_argument(
345      '--target_os',
346      choices=['android', 'chromeos', 'ios', 'linux', 'nacl', 'mac', 'win'],
347      help='Target OS - see `gn help target_os`. Set to "win" when ' +
348      'cross-compiling Windows from Linux or another host')
349  parser.add_argument(
350      'path_filter',
351      nargs='*',
352      help='optional paths to filter what files the tool is run on')
353  parser.add_argument(
354      '--tool-arg', nargs='?', action='append',
355      help='optional arguments passed to the tool')
356  parser.add_argument(
357      '--tool-path', nargs='?',
358      help='optional path to the tool directory')
359  args = parser.parse_args(argv)
360
361  if args.tool_path:
362    tool_path = os.path.abspath(args.tool_path)
363  else:
364    tool_path = os.path.abspath(os.path.join(
365          os.path.dirname(__file__),
366          '../../../third_party/llvm-build/Release+Asserts/bin'))
367  if not os.path.exists(tool_path):
368    sys.stderr.write('tool not found: %s\n' % tool_path)
369    return -1
370
371  if args.all:
372    # Reading source files is postponed to after possible regeneration of
373    # compile_commands.json.
374    source_filenames = None
375  else:
376    git_filenames = set(_GetFilesFromGit(args.path_filter))
377    # Filter out files that aren't C/C++/Obj-C/Obj-C++.
378    extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm'))
379    source_filenames = [f
380                        for f in git_filenames
381                        if os.path.splitext(f)[1] in extensions]
382
383  if args.generate_compdb:
384    compile_commands = compile_db.GenerateWithNinja(args.p)
385    compile_commands = _UpdateCompileCommandsIfNeeded(compile_commands,
386                                                      source_filenames,
387                                                      args.target_os)
388    with open(os.path.join(args.p, 'compile_commands.json'), 'w') as f:
389      f.write(json.dumps(compile_commands, indent=2))
390
391  compdb_entries = set(_GetEntriesFromCompileDB(args.p, source_filenames))
392
393  if args.shard:
394    total_length = len(compdb_entries)
395    match = re.match(r'(\d+)-of-(\d+)$', args.shard)
396    # Input is 1-based, but modular arithmetic is 0-based.
397    shard_number = int(match.group(1)) - 1
398    shard_count = int(match.group(2))
399    compdb_entries = [
400        f for i, f in enumerate(sorted(compdb_entries))
401        if i % shard_count == shard_number
402    ]
403    print('Shard %d-of-%d will process %d entries out of %d' %
404          (shard_number, shard_count, len(compdb_entries), total_length))
405
406  dispatcher = _CompilerDispatcher(os.path.join(tool_path, args.tool),
407                                   args.tool_arg,
408                                   args.p,
409                                   compdb_entries)
410  dispatcher.Run()
411  return -dispatcher.failed_count
412
413
414if __name__ == '__main__':
415  sys.exit(main())
416