1#!/usr/bin/env python 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===------------------------------------------------------------------------===# 10 11r""" 12clang-format git integration 13============================ 14 15This file provides a clang-format integration for git. Put it somewhere in your 16path and ensure that it is executable. Then, "git clang-format" will invoke 17clang-format on the changes in current files or a specific commit. 18 19For further details, run: 20git clang-format -h 21 22Requires Python 2.7 or Python 3 23""" 24 25from __future__ import absolute_import, division, print_function 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 36 37desc = ''' 38If zero or one commits are given, run clang-format on all lines that differ 39between the working directory and <commit>, which defaults to HEAD. Changes are 40only applied to the working directory. 41 42If two commits are given (requires --diff), run clang-format on all lines in the 43second <commit> that differ from the first <commit>. 44 45The following git-config settings set the default of the corresponding option: 46 clangFormat.binary 47 clangFormat.commit 48 clangFormat.extensions 49 clangFormat.style 50''' 51 52# Name of the temporary index file in which save the output of clang-format. 53# This file is created within the .git directory. 54temp_index_basename = 'clang-format-index' 55 56 57Range = collections.namedtuple('Range', 'start, count') 58 59 60def main(): 61 config = load_git_config() 62 63 # In order to keep '--' yet allow options after positionals, we need to 64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 65 # nargs=argparse.REMAINDER disallows options after positionals.) 66 argv = sys.argv[1:] 67 try: 68 idx = argv.index('--') 69 except ValueError: 70 dash_dash = [] 71 else: 72 dash_dash = argv[idx:] 73 argv = argv[:idx] 74 75 default_extensions = ','.join([ 76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 77 'c', 'h', # C 78 'm', # ObjC 79 'mm', # ObjC++ 80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++ 81 'cu', 'cuh', # CUDA 82 # Other languages that clang-format supports 83 'proto', 'protodevel', # Protocol Buffers 84 'java', # Java 85 'js', # JavaScript 86 'ts', # TypeScript 87 'cs', # C Sharp 88 'json', # Json 89 ]) 90 91 p = argparse.ArgumentParser( 92 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 93 description=desc) 94 p.add_argument('--binary', 95 default=config.get('clangformat.binary', 'clang-format'), 96 help='path to clang-format'), 97 p.add_argument('--commit', 98 default=config.get('clangformat.commit', 'HEAD'), 99 help='default commit to use if none is specified'), 100 p.add_argument('--diff', action='store_true', 101 help='print a diff instead of applying the changes') 102 p.add_argument('--extensions', 103 default=config.get('clangformat.extensions', 104 default_extensions), 105 help=('comma-separated list of file extensions to format, ' 106 'excluding the period and case-insensitive')), 107 p.add_argument('-f', '--force', action='store_true', 108 help='allow changes to unstaged files') 109 p.add_argument('-p', '--patch', action='store_true', 110 help='select hunks interactively') 111 p.add_argument('-q', '--quiet', action='count', default=0, 112 help='print less information') 113 p.add_argument('--style', 114 default=config.get('clangformat.style', None), 115 help='passed to clang-format'), 116 p.add_argument('-v', '--verbose', action='count', default=0, 117 help='print extra information') 118 # We gather all the remaining positional arguments into 'args' since we need 119 # to use some heuristics to determine whether or not <commit> was present. 120 # However, to print pretty messages, we make use of metavar and help. 121 p.add_argument('args', nargs='*', metavar='<commit>', 122 help='revision from which to compute the diff') 123 p.add_argument('ignored', nargs='*', metavar='<file>...', 124 help='if specified, only consider differences in these files') 125 opts = p.parse_args(argv) 126 127 opts.verbose -= opts.quiet 128 del opts.quiet 129 130 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 131 if len(commits) > 1: 132 if not opts.diff: 133 die('--diff is required when two commits are given') 134 else: 135 if len(commits) > 2: 136 die('at most two commits allowed; %d given' % len(commits)) 137 changed_lines = compute_diff_and_extract_lines(commits, files) 138 if opts.verbose >= 1: 139 ignored_files = set(changed_lines) 140 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 141 # The computed diff outputs absolute paths, so we must cd before accessing 142 # those files. 143 cd_to_toplevel() 144 filter_symlinks(changed_lines) 145 if opts.verbose >= 1: 146 ignored_files.difference_update(changed_lines) 147 if ignored_files: 148 print( 149 'Ignoring changes in the following files (wrong extension or symlink):') 150 for filename in ignored_files: 151 print(' %s' % filename) 152 if changed_lines: 153 print('Running clang-format on the following files:') 154 for filename in changed_lines: 155 print(' %s' % filename) 156 if not changed_lines: 157 if opts.verbose >= 0: 158 print('no modified files to format') 159 return 160 if len(commits) > 1: 161 old_tree = commits[1] 162 new_tree = run_clang_format_and_save_to_tree(changed_lines, 163 revision=commits[1], 164 binary=opts.binary, 165 style=opts.style) 166 else: 167 old_tree = create_tree_from_workdir(changed_lines) 168 new_tree = run_clang_format_and_save_to_tree(changed_lines, 169 binary=opts.binary, 170 style=opts.style) 171 if opts.verbose >= 1: 172 print('old tree: %s' % old_tree) 173 print('new tree: %s' % new_tree) 174 if old_tree == new_tree: 175 if opts.verbose >= 0: 176 print('clang-format did not modify any files') 177 elif opts.diff: 178 print_diff(old_tree, new_tree) 179 else: 180 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 181 patch_mode=opts.patch) 182 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 183 print('changed files:') 184 for filename in changed_files: 185 print(' %s' % filename) 186 187 188def load_git_config(non_string_options=None): 189 """Return the git configuration as a dictionary. 190 191 All options are assumed to be strings unless in `non_string_options`, in which 192 is a dictionary mapping option name (in lower case) to either "--bool" or 193 "--int".""" 194 if non_string_options is None: 195 non_string_options = {} 196 out = {} 197 for entry in run('git', 'config', '--list', '--null').split('\0'): 198 if entry: 199 if '\n' in entry: 200 name, value = entry.split('\n', 1) 201 else: 202 # A setting with no '=' ('\n' with --null) is implicitly 'true' 203 name = entry 204 value = 'true' 205 if name in non_string_options: 206 value = run('git', 'config', non_string_options[name], name) 207 out[name] = value 208 return out 209 210 211def interpret_args(args, dash_dash, default_commit): 212 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 213 214 It is assumed that "--" and everything that follows has been removed from 215 args and placed in `dash_dash`. 216 217 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 218 left (if present) are taken as commits. Otherwise, the arguments are checked 219 from left to right if they are commits or files. If commits are not given, 220 a list with `default_commit` is used.""" 221 if dash_dash: 222 if len(args) == 0: 223 commits = [default_commit] 224 else: 225 commits = args 226 for commit in commits: 227 object_type = get_object_type(commit) 228 if object_type not in ('commit', 'tag'): 229 if object_type is None: 230 die("'%s' is not a commit" % commit) 231 else: 232 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 233 files = dash_dash[1:] 234 elif args: 235 commits = [] 236 while args: 237 if not disambiguate_revision(args[0]): 238 break 239 commits.append(args.pop(0)) 240 if not commits: 241 commits = [default_commit] 242 files = args 243 else: 244 commits = [default_commit] 245 files = [] 246 return commits, files 247 248 249def disambiguate_revision(value): 250 """Returns True if `value` is a revision, False if it is a file, or dies.""" 251 # If `value` is ambiguous (neither a commit nor a file), the following 252 # command will die with an appropriate error message. 253 run('git', 'rev-parse', value, verbose=False) 254 object_type = get_object_type(value) 255 if object_type is None: 256 return False 257 if object_type in ('commit', 'tag'): 258 return True 259 die('`%s` is a %s, but a commit or filename was expected' % 260 (value, object_type)) 261 262 263def get_object_type(value): 264 """Returns a string description of an object's type, or None if it is not 265 a valid git object.""" 266 cmd = ['git', 'cat-file', '-t', value] 267 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 268 stdout, stderr = p.communicate() 269 if p.returncode != 0: 270 return None 271 return convert_string(stdout.strip()) 272 273 274def compute_diff_and_extract_lines(commits, files): 275 """Calls compute_diff() followed by extract_lines().""" 276 diff_process = compute_diff(commits, files) 277 changed_lines = extract_lines(diff_process.stdout) 278 diff_process.stdout.close() 279 diff_process.wait() 280 if diff_process.returncode != 0: 281 # Assume error was already printed to stderr. 282 sys.exit(2) 283 return changed_lines 284 285 286def compute_diff(commits, files): 287 """Return a subprocess object producing the diff from `commits`. 288 289 The return value's `stdin` file object will produce a patch with the 290 differences between the working directory and the first commit if a single 291 one was specified, or the difference between both specified commits, filtered 292 on `files` (if non-empty). Zero context lines are used in the patch.""" 293 git_tool = 'diff-index' 294 if len(commits) > 1: 295 git_tool = 'diff-tree' 296 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 297 cmd.extend(files) 298 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 299 p.stdin.close() 300 return p 301 302 303def extract_lines(patch_file): 304 """Extract the changed lines in `patch_file`. 305 306 The return value is a dictionary mapping filename to a list of (start_line, 307 line_count) pairs. 308 309 The input must have been produced with ``-U0``, meaning unidiff format with 310 zero lines of context. The return value is a dict mapping filename to a 311 list of line `Range`s.""" 312 matches = {} 313 for line in patch_file: 314 line = convert_string(line) 315 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 316 if match: 317 filename = match.group(1).rstrip('\r\n') 318 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 319 if match: 320 start_line = int(match.group(1)) 321 line_count = 1 322 if match.group(3): 323 line_count = int(match.group(3)) 324 if line_count > 0: 325 matches.setdefault(filename, []).append(Range(start_line, line_count)) 326 return matches 327 328 329def filter_by_extension(dictionary, allowed_extensions): 330 """Delete every key in `dictionary` that doesn't have an allowed extension. 331 332 `allowed_extensions` must be a collection of lowercase file extensions, 333 excluding the period.""" 334 allowed_extensions = frozenset(allowed_extensions) 335 for filename in list(dictionary.keys()): 336 base_ext = filename.rsplit('.', 1) 337 if len(base_ext) == 1 and '' in allowed_extensions: 338 continue 339 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 340 del dictionary[filename] 341 342 343def filter_symlinks(dictionary): 344 """Delete every key in `dictionary` that is a symlink.""" 345 for filename in list(dictionary.keys()): 346 if os.path.islink(filename): 347 del dictionary[filename] 348 349 350def cd_to_toplevel(): 351 """Change to the top level of the git repository.""" 352 toplevel = run('git', 'rev-parse', '--show-toplevel') 353 os.chdir(toplevel) 354 355 356def create_tree_from_workdir(filenames): 357 """Create a new git tree with the given files from the working directory. 358 359 Returns the object ID (SHA-1) of the created tree.""" 360 return create_tree(filenames, '--stdin') 361 362 363def run_clang_format_and_save_to_tree(changed_lines, revision=None, 364 binary='clang-format', style=None): 365 """Run clang-format on each file and save the result to a git tree. 366 367 Returns the object ID (SHA-1) of the created tree.""" 368 def iteritems(container): 369 try: 370 return container.iteritems() # Python 2 371 except AttributeError: 372 return container.items() # Python 3 373 def index_info_generator(): 374 for filename, line_ranges in iteritems(changed_lines): 375 if revision: 376 git_metadata_cmd = ['git', 'ls-tree', 377 '%s:%s' % (revision, os.path.dirname(filename)), 378 os.path.basename(filename)] 379 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, 380 stdout=subprocess.PIPE) 381 stdout = git_metadata.communicate()[0] 382 mode = oct(int(stdout.split()[0], 8)) 383 else: 384 mode = oct(os.stat(filename).st_mode) 385 # Adjust python3 octal format so that it matches what git expects 386 if mode.startswith('0o'): 387 mode = '0' + mode[2:] 388 blob_id = clang_format_to_blob(filename, line_ranges, 389 revision=revision, 390 binary=binary, 391 style=style) 392 yield '%s %s\t%s' % (mode, blob_id, filename) 393 return create_tree(index_info_generator(), '--index-info') 394 395 396def create_tree(input_lines, mode): 397 """Create a tree object from the given input. 398 399 If mode is '--stdin', it must be a list of filenames. If mode is 400 '--index-info' is must be a list of values suitable for "git update-index 401 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 402 is invalid.""" 403 assert mode in ('--stdin', '--index-info') 404 cmd = ['git', 'update-index', '--add', '-z', mode] 405 with temporary_index_file(): 406 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 407 for line in input_lines: 408 p.stdin.write(to_bytes('%s\0' % line)) 409 p.stdin.close() 410 if p.wait() != 0: 411 die('`%s` failed' % ' '.join(cmd)) 412 tree_id = run('git', 'write-tree') 413 return tree_id 414 415 416def clang_format_to_blob(filename, line_ranges, revision=None, 417 binary='clang-format', style=None): 418 """Run clang-format on the given file and save the result to a git blob. 419 420 Runs on the file in `revision` if not None, or on the file in the working 421 directory if `revision` is None. 422 423 Returns the object ID (SHA-1) of the created blob.""" 424 clang_format_cmd = [binary] 425 if style: 426 clang_format_cmd.extend(['-style='+style]) 427 clang_format_cmd.extend([ 428 '-lines=%s:%s' % (start_line, start_line+line_count-1) 429 for start_line, line_count in line_ranges]) 430 if revision: 431 clang_format_cmd.extend(['-assume-filename='+filename]) 432 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 433 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 434 stdout=subprocess.PIPE) 435 git_show.stdin.close() 436 clang_format_stdin = git_show.stdout 437 else: 438 clang_format_cmd.extend([filename]) 439 git_show = None 440 clang_format_stdin = subprocess.PIPE 441 try: 442 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 443 stdout=subprocess.PIPE) 444 if clang_format_stdin == subprocess.PIPE: 445 clang_format_stdin = clang_format.stdin 446 except OSError as e: 447 if e.errno == errno.ENOENT: 448 die('cannot find executable "%s"' % binary) 449 else: 450 raise 451 clang_format_stdin.close() 452 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 453 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 454 stdout=subprocess.PIPE) 455 clang_format.stdout.close() 456 stdout = hash_object.communicate()[0] 457 if hash_object.returncode != 0: 458 die('`%s` failed' % ' '.join(hash_object_cmd)) 459 if clang_format.wait() != 0: 460 die('`%s` failed' % ' '.join(clang_format_cmd)) 461 if git_show and git_show.wait() != 0: 462 die('`%s` failed' % ' '.join(git_show_cmd)) 463 return convert_string(stdout).rstrip('\r\n') 464 465 466@contextlib.contextmanager 467def temporary_index_file(tree=None): 468 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 469 the file afterward.""" 470 index_path = create_temporary_index(tree) 471 old_index_path = os.environ.get('GIT_INDEX_FILE') 472 os.environ['GIT_INDEX_FILE'] = index_path 473 try: 474 yield 475 finally: 476 if old_index_path is None: 477 del os.environ['GIT_INDEX_FILE'] 478 else: 479 os.environ['GIT_INDEX_FILE'] = old_index_path 480 os.remove(index_path) 481 482 483def create_temporary_index(tree=None): 484 """Create a temporary index file and return the created file's path. 485 486 If `tree` is not None, use that as the tree to read in. Otherwise, an 487 empty index is created.""" 488 gitdir = run('git', 'rev-parse', '--git-dir') 489 path = os.path.join(gitdir, temp_index_basename) 490 if tree is None: 491 tree = '--empty' 492 run('git', 'read-tree', '--index-output='+path, tree) 493 return path 494 495 496def print_diff(old_tree, new_tree): 497 """Print the diff between the two trees to stdout.""" 498 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 499 # is expected to be viewed by the user, and only the former does nice things 500 # like color and pagination. 501 # 502 # We also only print modified files since `new_tree` only contains the files 503 # that were modified, so unmodified files would show as deleted without the 504 # filter. 505 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 506 '--']) 507 508 509def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 510 """Apply the changes in `new_tree` to the working directory. 511 512 Bails if there are local changes in those files and not `force`. If 513 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 514 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 515 '--name-only', old_tree, 516 new_tree).rstrip('\0').split('\0') 517 if not force: 518 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 519 if unstaged_files: 520 print('The following files would be modified but ' 521 'have unstaged changes:', file=sys.stderr) 522 print(unstaged_files, file=sys.stderr) 523 print('Please commit, stage, or stash them first.', file=sys.stderr) 524 sys.exit(2) 525 if patch_mode: 526 # In patch mode, we could just as well create an index from the new tree 527 # and checkout from that, but then the user will be presented with a 528 # message saying "Discard ... from worktree". Instead, we use the old 529 # tree as the index and checkout from new_tree, which gives the slightly 530 # better message, "Apply ... to index and worktree". This is not quite 531 # right, since it won't be applied to the user's index, but oh well. 532 with temporary_index_file(old_tree): 533 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 534 index_tree = old_tree 535 else: 536 with temporary_index_file(new_tree): 537 run('git', 'checkout-index', '-a', '-f') 538 return changed_files 539 540 541def run(*args, **kwargs): 542 stdin = kwargs.pop('stdin', '') 543 verbose = kwargs.pop('verbose', True) 544 strip = kwargs.pop('strip', True) 545 for name in kwargs: 546 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 547 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 548 stdin=subprocess.PIPE) 549 stdout, stderr = p.communicate(input=stdin) 550 551 stdout = convert_string(stdout) 552 stderr = convert_string(stderr) 553 554 if p.returncode == 0: 555 if stderr: 556 if verbose: 557 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) 558 print(stderr.rstrip(), file=sys.stderr) 559 if strip: 560 stdout = stdout.rstrip('\r\n') 561 return stdout 562 if verbose: 563 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) 564 if stderr: 565 print(stderr.rstrip(), file=sys.stderr) 566 sys.exit(2) 567 568 569def die(message): 570 print('error:', message, file=sys.stderr) 571 sys.exit(2) 572 573 574def to_bytes(str_input): 575 # Encode to UTF-8 to get binary data. 576 if isinstance(str_input, bytes): 577 return str_input 578 return str_input.encode('utf-8') 579 580 581def to_string(bytes_input): 582 if isinstance(bytes_input, str): 583 return bytes_input 584 return bytes_input.encode('utf-8') 585 586 587def convert_string(bytes_input): 588 try: 589 return to_string(bytes_input.decode('utf-8')) 590 except AttributeError: # 'str' object has no attribute 'decode'. 591 return str(bytes_input) 592 except UnicodeError: 593 return str(bytes_input) 594 595if __name__ == '__main__': 596 main() 597