1#!/usr/bin/env python 2""" 3A script that provides: 41. Ability to grab binaries where possible from LLVM. 52. Ability to download binaries from MongoDB cache for clang-format. 63. Validates clang-format is the right version. 74. Has support for checking which files are to be checked. 85. Supports validating and updating a set of files to the right coding style. 9""" 10from __future__ import print_function, absolute_import 11 12import difflib 13import glob 14import os 15import re 16import shutil 17import string 18import subprocess 19import sys 20import tarfile 21import tempfile 22import threading 23import urllib2 24from distutils import spawn 25from optparse import OptionParser 26from multiprocessing import cpu_count 27 28# Get relative imports to work when the package is not installed on the PYTHONPATH. 29if __name__ == "__main__" and __package__ is None: 30 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.path.realpath(__file__))))) 31 32from buildscripts.linter import git 33from buildscripts.linter import parallel 34 35############################################################################## 36# 37# Constants for clang-format 38# 39# 40 41# Expected version of clang-format 42CLANG_FORMAT_VERSION = "3.8.0" 43CLANG_FORMAT_SHORT_VERSION = "3.8" 44 45# Name of clang-format as a binary 46CLANG_FORMAT_PROGNAME = "clang-format" 47 48# URL location of the "cached" copy of clang-format to download 49# for users which do not have clang-format installed 50CLANG_FORMAT_HTTP_LINUX_CACHE = "https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-3.8-rhel55.tar.gz" 51 52CLANG_FORMAT_HTTP_DARWIN_CACHE = "https://s3.amazonaws.com/boxes.10gen.com/build/clang%2Bllvm-3.8.0-x86_64-apple-darwin.tar.xz" 53 54# Path in the tarball to the clang-format binary 55CLANG_FORMAT_SOURCE_TAR_BASE = string.Template("clang+llvm-$version-$tar_path/bin/" + CLANG_FORMAT_PROGNAME) 56 57############################################################################## 58def callo(args): 59 """Call a program, and capture its output 60 """ 61 return subprocess.check_output(args) 62 63def get_tar_path(version, tar_path): 64 """ Get the path to clang-format in the llvm tarball 65 """ 66 return CLANG_FORMAT_SOURCE_TAR_BASE.substitute( 67 version=version, 68 tar_path=tar_path) 69 70def extract_clang_format(tar_path): 71 # Extract just the clang-format binary 72 # On OSX, we shell out to tar because tarfile doesn't support xz compression 73 if sys.platform == 'darwin': 74 subprocess.call(['tar', '-xzf', tar_path, '*clang-format*']) 75 # Otherwise we use tarfile because some versions of tar don't support wildcards without 76 # a special flag 77 else: 78 tarfp = tarfile.open(tar_path) 79 for name in tarfp.getnames(): 80 if name.endswith('clang-format'): 81 tarfp.extract(name) 82 tarfp.close() 83 84def get_clang_format_from_cache_and_extract(url, tarball_ext): 85 """Get clang-format from mongodb's cache 86 and extract the tarball 87 """ 88 dest_dir = tempfile.gettempdir() 89 temp_tar_file = os.path.join(dest_dir, "temp.tar" + tarball_ext) 90 91 # Download from file 92 print("Downloading clang-format %s from %s, saving to %s" % (CLANG_FORMAT_VERSION, 93 url, temp_tar_file)) 94 95 # Retry download up to 5 times. 96 num_tries = 5 97 for attempt in range(num_tries): 98 try: 99 resp = urllib2.urlopen(url) 100 with open(temp_tar_file, 'wb') as f: 101 f.write(resp.read()) 102 break 103 except urllib2.URLError: 104 if attempt == num_tries - 1: 105 raise 106 continue 107 108 extract_clang_format(temp_tar_file) 109 110def get_clang_format_from_darwin_cache(dest_file): 111 """Download clang-format from llvm.org, unpack the tarball, 112 and put clang-format in the specified place 113 """ 114 get_clang_format_from_cache_and_extract(CLANG_FORMAT_HTTP_DARWIN_CACHE, ".xz") 115 116 # Destination Path 117 shutil.move(get_tar_path(CLANG_FORMAT_VERSION, "x86_64-apple-darwin"), dest_file) 118 119def get_clang_format_from_linux_cache(dest_file): 120 """Get clang-format from mongodb's cache 121 """ 122 get_clang_format_from_cache_and_extract(CLANG_FORMAT_HTTP_LINUX_CACHE, ".gz") 123 124 # Destination Path 125 shutil.move("build/bin/clang-format", dest_file) 126 127class ClangFormat(object): 128 """Class encapsulates finding a suitable copy of clang-format, 129 and linting/formating an individual file 130 """ 131 def __init__(self, path, cache_dir): 132 self.path = None 133 clang_format_progname_ext = "" 134 135 if sys.platform == "win32": 136 clang_format_progname_ext += ".exe" 137 138 # Check the clang-format the user specified 139 if path is not None: 140 if os.path.isfile(path): 141 self.path = path 142 else: 143 print("WARNING: Could not find clang-format %s" % (path)) 144 145 # Check the environment variable 146 if "MONGO_CLANG_FORMAT" in os.environ: 147 self.path = os.environ["MONGO_CLANG_FORMAT"] 148 149 if self.path and not self._validate_version(): 150 self.path = None 151 152 # Check the users' PATH environment variable now 153 if self.path is None: 154 # Check for various versions staring with binaries with version specific suffixes in the 155 # user's path 156 programs = [ 157 CLANG_FORMAT_PROGNAME + "-" + CLANG_FORMAT_VERSION, 158 CLANG_FORMAT_PROGNAME + "-" + CLANG_FORMAT_SHORT_VERSION, 159 CLANG_FORMAT_PROGNAME, 160 ] 161 162 if sys.platform == "win32": 163 for i in range(len(programs)): 164 programs[i] += '.exe' 165 166 for program in programs: 167 self.path = spawn.find_executable(program) 168 169 if self.path: 170 if not self._validate_version(): 171 self.path = None 172 else: 173 break 174 175 # If Windows, try to grab it from Program Files 176 # Check both native Program Files and WOW64 version 177 if sys.platform == "win32": 178 programfiles = [ 179 os.environ["ProgramFiles"], 180 os.environ["ProgramFiles(x86)"], 181 ] 182 183 for programfile in programfiles: 184 win32bin = os.path.join(programfile, "LLVM\\bin\\clang-format.exe") 185 if os.path.exists(win32bin): 186 self.path = win32bin 187 break 188 189 # Have not found it yet, download it from the web 190 if self.path is None: 191 if not os.path.isdir(cache_dir): 192 os.makedirs(cache_dir) 193 194 self.path = os.path.join(cache_dir, CLANG_FORMAT_PROGNAME + "-" + CLANG_FORMAT_VERSION + clang_format_progname_ext) 195 196 # Download a new version if the cache is empty or stale 197 if not os.path.isfile(self.path) or not self._validate_version(): 198 if sys.platform.startswith("linux"): 199 get_clang_format_from_linux_cache(self.path) 200 elif sys.platform == "darwin": 201 get_clang_format_from_darwin_cache(self.path) 202 else: 203 print("ERROR: clang-format.py does not support downloading clang-format " + 204 " on this platform, please install clang-format " + CLANG_FORMAT_VERSION) 205 206 # Validate we have the correct version 207 # We only can fail here if the user specified a clang-format binary and it is the wrong 208 # version 209 if not self._validate_version(): 210 print("ERROR: exiting because of previous warning.") 211 sys.exit(1) 212 213 self.print_lock = threading.Lock() 214 215 def _validate_version(self): 216 """Validate clang-format is the expected version 217 """ 218 cf_version = callo([self.path, "--version"]) 219 220 if CLANG_FORMAT_VERSION in cf_version: 221 return True 222 223 print("WARNING: clang-format found in path, but incorrect version found at " + 224 self.path + " with version: " + cf_version) 225 226 return False 227 228 def _lint(self, file_name, print_diff): 229 """Check the specified file has the correct format 230 """ 231 with open(file_name, 'rb') as original_text: 232 original_file = original_text.read() 233 234 # Get formatted file as clang-format would format the file 235 formatted_file = callo([self.path, "--style=file", file_name]) 236 237 if original_file != formatted_file: 238 if print_diff: 239 original_lines = original_file.splitlines() 240 formatted_lines = formatted_file.splitlines() 241 result = difflib.unified_diff(original_lines, formatted_lines) 242 243 # Take a lock to ensure diffs do not get mixed when printed to the screen 244 with self.print_lock: 245 print("ERROR: Found diff for " + file_name) 246 print("To fix formatting errors, run %s --style=file -i %s" % 247 (self.path, file_name)) 248 for line in result: 249 print(line.rstrip()) 250 251 return False 252 253 return True 254 255 def lint(self, file_name): 256 """Check the specified file has the correct format 257 """ 258 return self._lint(file_name, print_diff=True) 259 260 def format(self, file_name): 261 """Update the format of the specified file 262 """ 263 if self._lint(file_name, print_diff=False): 264 return True 265 266 # Update the file with clang-format 267 formatted = not subprocess.call([self.path, "--style=file", "-i", file_name]) 268 269 # Version 3.8 generates files like foo.cpp~RF83372177.TMP when it formats foo.cpp 270 # on Windows, we must clean these up 271 if sys.platform == "win32": 272 glob_pattern = file_name + "*.TMP" 273 for fglob in glob.glob(glob_pattern): 274 os.unlink(fglob) 275 276 return formatted 277 278files_re = re.compile('\\.(h|cpp|js)$') 279 280def is_interesting_file(file_name): 281 """"Return true if this file should be checked 282 """ 283 return ((file_name.startswith("jstests") or file_name.startswith("src")) 284 and not file_name.startswith("src/third_party/") 285 and not file_name.startswith("src/mongo/gotools/")) and files_re.search(file_name) 286 287def get_list_from_lines(lines): 288 """"Convert a string containing a series of lines into a list of strings 289 """ 290 return [line.rstrip() for line in lines.splitlines()] 291 292def _get_build_dir(): 293 """Get the location of the scons' build directory in case we need to download clang-format 294 """ 295 return os.path.join(git.get_base_dir(), "build") 296 297def _lint_files(clang_format, files): 298 """Lint a list of files with clang-format 299 """ 300 clang_format = ClangFormat(clang_format, _get_build_dir()) 301 302 lint_clean = parallel.parallel_process([os.path.abspath(f) for f in files], clang_format.lint) 303 304 if not lint_clean: 305 print("ERROR: Code Style does not match coding style") 306 sys.exit(1) 307 308def lint_patch(clang_format, infile): 309 """Lint patch command entry point 310 """ 311 files = git.get_files_to_check_from_patch(infile, is_interesting_file) 312 313 # Patch may have files that we do not want to check which is fine 314 if files: 315 _lint_files(clang_format, files) 316 317def lint(clang_format): 318 """Lint files command entry point 319 """ 320 files = git.get_files_to_check([], is_interesting_file) 321 322 _lint_files(clang_format, files) 323 324 return True 325 326def lint_all(clang_format): 327 """Lint files command entry point based on working tree 328 """ 329 files = git.get_files_to_check_working_tree(is_interesting_file) 330 331 _lint_files(clang_format, files) 332 333 return True 334 335def _format_files(clang_format, files): 336 """Format a list of files with clang-format 337 """ 338 clang_format = ClangFormat(clang_format, _get_build_dir()) 339 340 format_clean = parallel.parallel_process([os.path.abspath(f) for f in files], 341 clang_format.format) 342 343 if not format_clean: 344 print("ERROR: failed to format files") 345 sys.exit(1) 346 347def format_func(clang_format): 348 """Format files command entry point 349 """ 350 files = git.get_files_to_check([], is_interesting_file) 351 352 _format_files(clang_format, files) 353 354def reformat_branch(clang_format, commit_prior_to_reformat, commit_after_reformat): 355 """Reformat a branch made before a clang-format run 356 """ 357 clang_format = ClangFormat(clang_format, _get_build_dir()) 358 359 if os.getcwd() != git.get_base_dir(): 360 raise ValueError("reformat-branch must be run from the repo root") 361 362 if not os.path.exists("buildscripts/clang_format.py"): 363 raise ValueError("reformat-branch is only supported in the mongo repo") 364 365 repo = git.Repo(git.get_base_dir()) 366 367 # Validate that user passes valid commits 368 if not repo.is_commit(commit_prior_to_reformat): 369 raise ValueError("Commit Prior to Reformat '%s' is not a valid commit in this repo" % 370 commit_prior_to_reformat) 371 372 if not repo.is_commit(commit_after_reformat): 373 raise ValueError("Commit After Reformat '%s' is not a valid commit in this repo" % 374 commit_after_reformat) 375 376 if not repo.is_ancestor(commit_prior_to_reformat, commit_after_reformat): 377 raise ValueError(("Commit Prior to Reformat '%s' is not a valid ancestor of Commit After" + 378 " Reformat '%s' in this repo") % (commit_prior_to_reformat, commit_after_reformat)) 379 380 # Validate the user is on a local branch that has the right merge base 381 if repo.is_detached(): 382 raise ValueError("You must not run this script in a detached HEAD state") 383 384 # Validate the user has no pending changes 385 if repo.is_working_tree_dirty(): 386 raise ValueError("Your working tree has pending changes. You must have a clean working tree before proceeding.") 387 388 merge_base = repo.get_merge_base(commit_prior_to_reformat) 389 390 if not merge_base == commit_prior_to_reformat: 391 raise ValueError("Please rebase to '%s' and resolve all conflicts before running this script" % (commit_prior_to_reformat)) 392 393 # We assume the target branch is master, it could be a different branch if needed for testing 394 merge_base = repo.get_merge_base("master") 395 396 if not merge_base == commit_prior_to_reformat: 397 raise ValueError("This branch appears to already have advanced too far through the merge process") 398 399 # Everything looks good so lets start going through all the commits 400 branch_name = repo.get_branch_name() 401 new_branch = "%s-reformatted" % branch_name 402 403 if repo.does_branch_exist(new_branch): 404 raise ValueError("The branch '%s' already exists. Please delete the branch '%s', or rename the current branch." % (new_branch, new_branch)) 405 406 commits = get_list_from_lines(repo.log(["--reverse", "--pretty=format:%H", "%s..HEAD" % commit_prior_to_reformat])) 407 408 previous_commit_base = commit_after_reformat 409 410 files_match = re.compile('\\.(h|cpp|js)$') 411 412 # Go through all the commits the user made on the local branch and migrate to a new branch 413 # that is based on post_reformat commits instead 414 for commit_hash in commits: 415 repo.checkout(["--quiet", commit_hash]) 416 417 deleted_files = [] 418 419 # Format each of the files by checking out just a single commit from the user's branch 420 commit_files = get_list_from_lines(repo.diff(["HEAD~", "--name-only"])) 421 422 for commit_file in commit_files: 423 424 # Format each file needed if it was not deleted 425 if not os.path.exists(commit_file): 426 print("Skipping file '%s' since it has been deleted in commit '%s'" % ( 427 commit_file, commit_hash)) 428 deleted_files.append(commit_file) 429 continue 430 431 if files_match.search(commit_file): 432 clang_format.format(commit_file) 433 else: 434 print("Skipping file '%s' since it is not a file clang_format should format" % 435 commit_file) 436 437 # Check if anything needed reformatting, and if so amend the commit 438 if not repo.is_working_tree_dirty(): 439 print ("Commit %s needed no reformatting" % commit_hash) 440 else: 441 repo.commit(["--all", "--amend", "--no-edit"]) 442 443 # Rebase our new commit on top the post-reformat commit 444 previous_commit = repo.rev_parse(["HEAD"]) 445 446 # Checkout the new branch with the reformatted commits 447 # Note: we will not name as a branch until we are done with all commits on the local branch 448 repo.checkout(["--quiet", previous_commit_base]) 449 450 # Copy each file from the reformatted commit on top of the post reformat 451 diff_files = get_list_from_lines(repo.diff(["%s~..%s" % (previous_commit, previous_commit), 452 "--name-only"])) 453 454 for diff_file in diff_files: 455 # If the file was deleted in the commit we are reformatting, we need to delete it again 456 if diff_file in deleted_files: 457 repo.rm([diff_file]) 458 continue 459 460 # The file has been added or modified, continue as normal 461 file_contents = repo.show(["%s:%s" % (previous_commit, diff_file)]) 462 463 root_dir = os.path.dirname(diff_file) 464 if root_dir and not os.path.exists(root_dir): 465 os.makedirs(root_dir) 466 467 with open(diff_file, "w+") as new_file: 468 new_file.write(file_contents) 469 470 repo.add([diff_file]) 471 472 # Create a new commit onto clang-formatted branch 473 repo.commit(["--reuse-message=%s" % previous_commit]) 474 475 previous_commit_base = repo.rev_parse(["HEAD"]) 476 477 # Create a new branch to mark the hashes we have been using 478 repo.checkout(["-b", new_branch]) 479 480 print("reformat-branch is done running.\n") 481 print("A copy of your branch has been made named '%s', and formatted with clang-format.\n" % new_branch) 482 print("The original branch has been left unchanged.") 483 print("The next step is to rebase the new branch on 'master'.") 484 485 486def usage(): 487 """Print usage 488 """ 489 print("clang-format.py supports 5 commands [ lint, lint-all, lint-patch, format, reformat-branch].") 490 491def main(): 492 """Main entry point 493 """ 494 parser = OptionParser() 495 parser.add_option("-c", "--clang-format", type="string", dest="clang_format") 496 497 (options, args) = parser.parse_args(args=sys.argv) 498 499 if len(args) > 1: 500 command = args[1] 501 502 if command == "lint": 503 lint(options.clang_format) 504 elif command == "lint-all": 505 lint_all(options.clang_format) 506 elif command == "lint-patch": 507 lint_patch(options.clang_format, args[2:]) 508 elif command == "format": 509 format_func(options.clang_format) 510 elif command == "reformat-branch": 511 512 if len(args) < 3: 513 print("ERROR: reformat-branch takes two parameters: commit_prior_to_reformat commit_after_reformat") 514 return 515 516 reformat_branch(options.clang_format, args[2], args[3]) 517 else: 518 usage() 519 else: 520 usage() 521 522if __name__ == "__main__": 523 main() 524