1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===------------------------------------------------------------------------===#
10
11r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
24
25from __future__ import absolute_import, division, print_function
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD.  Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46  clangFormat.binary
47  clangFormat.commit
48  clangFormat.extensions
49  clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61  config = load_git_config()
62
63  # In order to keep '--' yet allow options after positionals, we need to
64  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
65  # nargs=argparse.REMAINDER disallows options after positionals.)
66  argv = sys.argv[1:]
67  try:
68    idx = argv.index('--')
69  except ValueError:
70    dash_dash = []
71  else:
72    dash_dash = argv[idx:]
73    argv = argv[:idx]
74
75  default_extensions = ','.join([
76      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77      'c', 'h',  # C
78      'm',  # ObjC
79      'mm',  # ObjC++
80      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx',  # C++
81      'cu', 'cuh',  # CUDA
82      # Other languages that clang-format supports
83      'proto', 'protodevel',  # Protocol Buffers
84      'java',  # Java
85      'js',  # JavaScript
86      'ts',  # TypeScript
87      'cs',  # C Sharp
88      'json',  # Json
89      ])
90
91  p = argparse.ArgumentParser(
92    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
93    description=desc)
94  p.add_argument('--binary',
95                 default=config.get('clangformat.binary', 'clang-format'),
96                 help='path to clang-format'),
97  p.add_argument('--commit',
98                 default=config.get('clangformat.commit', 'HEAD'),
99                 help='default commit to use if none is specified'),
100  p.add_argument('--diff', action='store_true',
101                 help='print a diff instead of applying the changes')
102  p.add_argument('--extensions',
103                 default=config.get('clangformat.extensions',
104                                    default_extensions),
105                 help=('comma-separated list of file extensions to format, '
106                       'excluding the period and case-insensitive')),
107  p.add_argument('-f', '--force', action='store_true',
108                 help='allow changes to unstaged files')
109  p.add_argument('-p', '--patch', action='store_true',
110                 help='select hunks interactively')
111  p.add_argument('-q', '--quiet', action='count', default=0,
112                 help='print less information')
113  p.add_argument('--style',
114                 default=config.get('clangformat.style', None),
115                 help='passed to clang-format'),
116  p.add_argument('-v', '--verbose', action='count', default=0,
117                 help='print extra information')
118  # We gather all the remaining positional arguments into 'args' since we need
119  # to use some heuristics to determine whether or not <commit> was present.
120  # However, to print pretty messages, we make use of metavar and help.
121  p.add_argument('args', nargs='*', metavar='<commit>',
122                 help='revision from which to compute the diff')
123  p.add_argument('ignored', nargs='*', metavar='<file>...',
124                 help='if specified, only consider differences in these files')
125  opts = p.parse_args(argv)
126
127  opts.verbose -= opts.quiet
128  del opts.quiet
129
130  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
131  if len(commits) > 1:
132    if not opts.diff:
133      die('--diff is required when two commits are given')
134  else:
135    if len(commits) > 2:
136      die('at most two commits allowed; %d given' % len(commits))
137  changed_lines = compute_diff_and_extract_lines(commits, files)
138  if opts.verbose >= 1:
139    ignored_files = set(changed_lines)
140  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
141  # The computed diff outputs absolute paths, so we must cd before accessing
142  # those files.
143  cd_to_toplevel()
144  filter_symlinks(changed_lines)
145  if opts.verbose >= 1:
146    ignored_files.difference_update(changed_lines)
147    if ignored_files:
148      print(
149        'Ignoring changes in the following files (wrong extension or symlink):')
150      for filename in ignored_files:
151        print('    %s' % filename)
152    if changed_lines:
153      print('Running clang-format on the following files:')
154      for filename in changed_lines:
155        print('    %s' % filename)
156  if not changed_lines:
157    if opts.verbose >= 0:
158      print('no modified files to format')
159    return
160  if len(commits) > 1:
161    old_tree = commits[1]
162    new_tree = run_clang_format_and_save_to_tree(changed_lines,
163                                                 revision=commits[1],
164                                                 binary=opts.binary,
165                                                 style=opts.style)
166  else:
167    old_tree = create_tree_from_workdir(changed_lines)
168    new_tree = run_clang_format_and_save_to_tree(changed_lines,
169                                                 binary=opts.binary,
170                                                 style=opts.style)
171  if opts.verbose >= 1:
172    print('old tree: %s' % old_tree)
173    print('new tree: %s' % new_tree)
174  if old_tree == new_tree:
175    if opts.verbose >= 0:
176      print('clang-format did not modify any files')
177  elif opts.diff:
178    print_diff(old_tree, new_tree)
179  else:
180    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
181                                  patch_mode=opts.patch)
182    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
183      print('changed files:')
184      for filename in changed_files:
185        print('    %s' % filename)
186
187
188def load_git_config(non_string_options=None):
189  """Return the git configuration as a dictionary.
190
191  All options are assumed to be strings unless in `non_string_options`, in which
192  is a dictionary mapping option name (in lower case) to either "--bool" or
193  "--int"."""
194  if non_string_options is None:
195    non_string_options = {}
196  out = {}
197  for entry in run('git', 'config', '--list', '--null').split('\0'):
198    if entry:
199      if '\n' in entry:
200        name, value = entry.split('\n', 1)
201      else:
202        # A setting with no '=' ('\n' with --null) is implicitly 'true'
203        name = entry
204        value = 'true'
205      if name in non_string_options:
206        value = run('git', 'config', non_string_options[name], name)
207      out[name] = value
208  return out
209
210
211def interpret_args(args, dash_dash, default_commit):
212  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
213
214  It is assumed that "--" and everything that follows has been removed from
215  args and placed in `dash_dash`.
216
217  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
218  left (if present) are taken as commits.  Otherwise, the arguments are checked
219  from left to right if they are commits or files.  If commits are not given,
220  a list with `default_commit` is used."""
221  if dash_dash:
222    if len(args) == 0:
223      commits = [default_commit]
224    else:
225      commits = args
226    for commit in commits:
227      object_type = get_object_type(commit)
228      if object_type not in ('commit', 'tag'):
229        if object_type is None:
230          die("'%s' is not a commit" % commit)
231        else:
232          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
233    files = dash_dash[1:]
234  elif args:
235    commits = []
236    while args:
237      if not disambiguate_revision(args[0]):
238        break
239      commits.append(args.pop(0))
240    if not commits:
241      commits = [default_commit]
242    files = args
243  else:
244    commits = [default_commit]
245    files = []
246  return commits, files
247
248
249def disambiguate_revision(value):
250  """Returns True if `value` is a revision, False if it is a file, or dies."""
251  # If `value` is ambiguous (neither a commit nor a file), the following
252  # command will die with an appropriate error message.
253  run('git', 'rev-parse', value, verbose=False)
254  object_type = get_object_type(value)
255  if object_type is None:
256    return False
257  if object_type in ('commit', 'tag'):
258    return True
259  die('`%s` is a %s, but a commit or filename was expected' %
260      (value, object_type))
261
262
263def get_object_type(value):
264  """Returns a string description of an object's type, or None if it is not
265  a valid git object."""
266  cmd = ['git', 'cat-file', '-t', value]
267  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
268  stdout, stderr = p.communicate()
269  if p.returncode != 0:
270    return None
271  return convert_string(stdout.strip())
272
273
274def compute_diff_and_extract_lines(commits, files):
275  """Calls compute_diff() followed by extract_lines()."""
276  diff_process = compute_diff(commits, files)
277  changed_lines = extract_lines(diff_process.stdout)
278  diff_process.stdout.close()
279  diff_process.wait()
280  if diff_process.returncode != 0:
281    # Assume error was already printed to stderr.
282    sys.exit(2)
283  return changed_lines
284
285
286def compute_diff(commits, files):
287  """Return a subprocess object producing the diff from `commits`.
288
289  The return value's `stdin` file object will produce a patch with the
290  differences between the working directory and the first commit if a single
291  one was specified, or the difference between both specified commits, filtered
292  on `files` (if non-empty).  Zero context lines are used in the patch."""
293  git_tool = 'diff-index'
294  if len(commits) > 1:
295    git_tool = 'diff-tree'
296  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
297  cmd.extend(files)
298  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
299  p.stdin.close()
300  return p
301
302
303def extract_lines(patch_file):
304  """Extract the changed lines in `patch_file`.
305
306  The return value is a dictionary mapping filename to a list of (start_line,
307  line_count) pairs.
308
309  The input must have been produced with ``-U0``, meaning unidiff format with
310  zero lines of context.  The return value is a dict mapping filename to a
311  list of line `Range`s."""
312  matches = {}
313  for line in patch_file:
314    line = convert_string(line)
315    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
316    if match:
317      filename = match.group(1).rstrip('\r\n')
318    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
319    if match:
320      start_line = int(match.group(1))
321      line_count = 1
322      if match.group(3):
323        line_count = int(match.group(3))
324      if line_count > 0:
325        matches.setdefault(filename, []).append(Range(start_line, line_count))
326  return matches
327
328
329def filter_by_extension(dictionary, allowed_extensions):
330  """Delete every key in `dictionary` that doesn't have an allowed extension.
331
332  `allowed_extensions` must be a collection of lowercase file extensions,
333  excluding the period."""
334  allowed_extensions = frozenset(allowed_extensions)
335  for filename in list(dictionary.keys()):
336    base_ext = filename.rsplit('.', 1)
337    if len(base_ext) == 1 and '' in allowed_extensions:
338        continue
339    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
340      del dictionary[filename]
341
342
343def filter_symlinks(dictionary):
344  """Delete every key in `dictionary` that is a symlink."""
345  for filename in list(dictionary.keys()):
346    if os.path.islink(filename):
347      del dictionary[filename]
348
349
350def cd_to_toplevel():
351  """Change to the top level of the git repository."""
352  toplevel = run('git', 'rev-parse', '--show-toplevel')
353  os.chdir(toplevel)
354
355
356def create_tree_from_workdir(filenames):
357  """Create a new git tree with the given files from the working directory.
358
359  Returns the object ID (SHA-1) of the created tree."""
360  return create_tree(filenames, '--stdin')
361
362
363def run_clang_format_and_save_to_tree(changed_lines, revision=None,
364                                      binary='clang-format', style=None):
365  """Run clang-format on each file and save the result to a git tree.
366
367  Returns the object ID (SHA-1) of the created tree."""
368  def iteritems(container):
369      try:
370          return container.iteritems() # Python 2
371      except AttributeError:
372          return container.items() # Python 3
373  def index_info_generator():
374    for filename, line_ranges in iteritems(changed_lines):
375      if revision:
376        git_metadata_cmd = ['git', 'ls-tree',
377                            '%s:%s' % (revision, os.path.dirname(filename)),
378                            os.path.basename(filename)]
379        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
380                                        stdout=subprocess.PIPE)
381        stdout = git_metadata.communicate()[0]
382        mode = oct(int(stdout.split()[0], 8))
383      else:
384        mode = oct(os.stat(filename).st_mode)
385      # Adjust python3 octal format so that it matches what git expects
386      if mode.startswith('0o'):
387          mode = '0' + mode[2:]
388      blob_id = clang_format_to_blob(filename, line_ranges,
389                                     revision=revision,
390                                     binary=binary,
391                                     style=style)
392      yield '%s %s\t%s' % (mode, blob_id, filename)
393  return create_tree(index_info_generator(), '--index-info')
394
395
396def create_tree(input_lines, mode):
397  """Create a tree object from the given input.
398
399  If mode is '--stdin', it must be a list of filenames.  If mode is
400  '--index-info' is must be a list of values suitable for "git update-index
401  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
402  is invalid."""
403  assert mode in ('--stdin', '--index-info')
404  cmd = ['git', 'update-index', '--add', '-z', mode]
405  with temporary_index_file():
406    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
407    for line in input_lines:
408      p.stdin.write(to_bytes('%s\0' % line))
409    p.stdin.close()
410    if p.wait() != 0:
411      die('`%s` failed' % ' '.join(cmd))
412    tree_id = run('git', 'write-tree')
413    return tree_id
414
415
416def clang_format_to_blob(filename, line_ranges, revision=None,
417                         binary='clang-format', style=None):
418  """Run clang-format on the given file and save the result to a git blob.
419
420  Runs on the file in `revision` if not None, or on the file in the working
421  directory if `revision` is None.
422
423  Returns the object ID (SHA-1) of the created blob."""
424  clang_format_cmd = [binary]
425  if style:
426    clang_format_cmd.extend(['-style='+style])
427  clang_format_cmd.extend([
428      '-lines=%s:%s' % (start_line, start_line+line_count-1)
429      for start_line, line_count in line_ranges])
430  if revision:
431    clang_format_cmd.extend(['-assume-filename='+filename])
432    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
433    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
434                                stdout=subprocess.PIPE)
435    git_show.stdin.close()
436    clang_format_stdin = git_show.stdout
437  else:
438    clang_format_cmd.extend([filename])
439    git_show = None
440    clang_format_stdin = subprocess.PIPE
441  try:
442    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
443                                    stdout=subprocess.PIPE)
444    if clang_format_stdin == subprocess.PIPE:
445      clang_format_stdin = clang_format.stdin
446  except OSError as e:
447    if e.errno == errno.ENOENT:
448      die('cannot find executable "%s"' % binary)
449    else:
450      raise
451  clang_format_stdin.close()
452  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
453  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
454                                 stdout=subprocess.PIPE)
455  clang_format.stdout.close()
456  stdout = hash_object.communicate()[0]
457  if hash_object.returncode != 0:
458    die('`%s` failed' % ' '.join(hash_object_cmd))
459  if clang_format.wait() != 0:
460    die('`%s` failed' % ' '.join(clang_format_cmd))
461  if git_show and git_show.wait() != 0:
462    die('`%s` failed' % ' '.join(git_show_cmd))
463  return convert_string(stdout).rstrip('\r\n')
464
465
466@contextlib.contextmanager
467def temporary_index_file(tree=None):
468  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
469  the file afterward."""
470  index_path = create_temporary_index(tree)
471  old_index_path = os.environ.get('GIT_INDEX_FILE')
472  os.environ['GIT_INDEX_FILE'] = index_path
473  try:
474    yield
475  finally:
476    if old_index_path is None:
477      del os.environ['GIT_INDEX_FILE']
478    else:
479      os.environ['GIT_INDEX_FILE'] = old_index_path
480    os.remove(index_path)
481
482
483def create_temporary_index(tree=None):
484  """Create a temporary index file and return the created file's path.
485
486  If `tree` is not None, use that as the tree to read in.  Otherwise, an
487  empty index is created."""
488  gitdir = run('git', 'rev-parse', '--git-dir')
489  path = os.path.join(gitdir, temp_index_basename)
490  if tree is None:
491    tree = '--empty'
492  run('git', 'read-tree', '--index-output='+path, tree)
493  return path
494
495
496def print_diff(old_tree, new_tree):
497  """Print the diff between the two trees to stdout."""
498  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
499  # is expected to be viewed by the user, and only the former does nice things
500  # like color and pagination.
501  #
502  # We also only print modified files since `new_tree` only contains the files
503  # that were modified, so unmodified files would show as deleted without the
504  # filter.
505  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
506                         '--'])
507
508
509def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
510  """Apply the changes in `new_tree` to the working directory.
511
512  Bails if there are local changes in those files and not `force`.  If
513  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
514  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
515                      '--name-only', old_tree,
516                      new_tree).rstrip('\0').split('\0')
517  if not force:
518    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
519    if unstaged_files:
520      print('The following files would be modified but '
521                'have unstaged changes:', file=sys.stderr)
522      print(unstaged_files, file=sys.stderr)
523      print('Please commit, stage, or stash them first.', file=sys.stderr)
524      sys.exit(2)
525  if patch_mode:
526    # In patch mode, we could just as well create an index from the new tree
527    # and checkout from that, but then the user will be presented with a
528    # message saying "Discard ... from worktree".  Instead, we use the old
529    # tree as the index and checkout from new_tree, which gives the slightly
530    # better message, "Apply ... to index and worktree".  This is not quite
531    # right, since it won't be applied to the user's index, but oh well.
532    with temporary_index_file(old_tree):
533      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
534    index_tree = old_tree
535  else:
536    with temporary_index_file(new_tree):
537      run('git', 'checkout-index', '-a', '-f')
538  return changed_files
539
540
541def run(*args, **kwargs):
542  stdin = kwargs.pop('stdin', '')
543  verbose = kwargs.pop('verbose', True)
544  strip = kwargs.pop('strip', True)
545  for name in kwargs:
546    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
547  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
548                       stdin=subprocess.PIPE)
549  stdout, stderr = p.communicate(input=stdin)
550
551  stdout = convert_string(stdout)
552  stderr = convert_string(stderr)
553
554  if p.returncode == 0:
555    if stderr:
556      if verbose:
557        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
558      print(stderr.rstrip(), file=sys.stderr)
559    if strip:
560      stdout = stdout.rstrip('\r\n')
561    return stdout
562  if verbose:
563    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
564  if stderr:
565    print(stderr.rstrip(), file=sys.stderr)
566  sys.exit(2)
567
568
569def die(message):
570  print('error:', message, file=sys.stderr)
571  sys.exit(2)
572
573
574def to_bytes(str_input):
575    # Encode to UTF-8 to get binary data.
576    if isinstance(str_input, bytes):
577        return str_input
578    return str_input.encode('utf-8')
579
580
581def to_string(bytes_input):
582    if isinstance(bytes_input, str):
583        return bytes_input
584    return bytes_input.encode('utf-8')
585
586
587def convert_string(bytes_input):
588    try:
589        return to_string(bytes_input.decode('utf-8'))
590    except AttributeError: # 'str' object has no attribute 'decode'.
591        return str(bytes_input)
592    except UnicodeError:
593        return str(bytes_input)
594
595if __name__ == '__main__':
596  main()
597