1#!/usr/bin/env python
2"""
3A script that provides:
41. Ability to grab binaries where possible from LLVM.
52. Ability to download binaries from MongoDB cache for clang-format.
63. Validates clang-format is the right version.
74. Has support for checking which files are to be checked.
85. Supports validating and updating a set of files to the right coding style.
9"""
10from __future__ import print_function, absolute_import
11
12import difflib
13import glob
14import os
15import re
16import shutil
17import string
18import subprocess
19import sys
20import tarfile
21import tempfile
22import threading
23import urllib2
24from distutils import spawn
25from optparse import OptionParser
26from multiprocessing import cpu_count
27
28# Get relative imports to work when the package is not installed on the PYTHONPATH.
29if __name__ == "__main__" and __package__ is None:
30    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.path.realpath(__file__)))))
31
32from buildscripts.linter import git
33from buildscripts.linter import parallel
34
35##############################################################################
36#
37# Constants for clang-format
38#
39#
40
41# Expected version of clang-format
42CLANG_FORMAT_VERSION = "3.8.0"
43CLANG_FORMAT_SHORT_VERSION = "3.8"
44
45# Name of clang-format as a binary
46CLANG_FORMAT_PROGNAME = "clang-format"
47
48# URL location of the "cached" copy of clang-format to download
49# for users which do not have clang-format installed
50CLANG_FORMAT_HTTP_LINUX_CACHE = "https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-3.8-rhel55.tar.gz"
51
52CLANG_FORMAT_HTTP_DARWIN_CACHE = "https://s3.amazonaws.com/boxes.10gen.com/build/clang%2Bllvm-3.8.0-x86_64-apple-darwin.tar.xz"
53
54# Path in the tarball to the clang-format binary
55CLANG_FORMAT_SOURCE_TAR_BASE = string.Template("clang+llvm-$version-$tar_path/bin/" + CLANG_FORMAT_PROGNAME)
56
57##############################################################################
58def callo(args):
59    """Call a program, and capture its output
60    """
61    return subprocess.check_output(args)
62
63def get_tar_path(version, tar_path):
64    """ Get the path to clang-format in the llvm tarball
65    """
66    return CLANG_FORMAT_SOURCE_TAR_BASE.substitute(
67        version=version,
68        tar_path=tar_path)
69
70def extract_clang_format(tar_path):
71    # Extract just the clang-format binary
72    # On OSX, we shell out to tar because tarfile doesn't support xz compression
73    if sys.platform == 'darwin':
74         subprocess.call(['tar', '-xzf', tar_path, '*clang-format*'])
75    # Otherwise we use tarfile because some versions of tar don't support wildcards without
76    # a special flag
77    else:
78        tarfp = tarfile.open(tar_path)
79        for name in tarfp.getnames():
80            if name.endswith('clang-format'):
81                tarfp.extract(name)
82        tarfp.close()
83
84def get_clang_format_from_cache_and_extract(url, tarball_ext):
85    """Get clang-format from mongodb's cache
86    and extract the tarball
87    """
88    dest_dir = tempfile.gettempdir()
89    temp_tar_file = os.path.join(dest_dir, "temp.tar" + tarball_ext)
90
91    # Download from file
92    print("Downloading clang-format %s from %s, saving to %s" % (CLANG_FORMAT_VERSION,
93            url, temp_tar_file))
94
95    # Retry download up to 5 times.
96    num_tries = 5
97    for attempt in range(num_tries):
98        try:
99            resp = urllib2.urlopen(url)
100            with open(temp_tar_file, 'wb') as f:
101              f.write(resp.read())
102            break
103        except urllib2.URLError:
104            if attempt == num_tries - 1:
105                raise
106            continue
107
108    extract_clang_format(temp_tar_file)
109
110def get_clang_format_from_darwin_cache(dest_file):
111    """Download clang-format from llvm.org, unpack the tarball,
112    and put clang-format in the specified place
113    """
114    get_clang_format_from_cache_and_extract(CLANG_FORMAT_HTTP_DARWIN_CACHE, ".xz")
115
116    # Destination Path
117    shutil.move(get_tar_path(CLANG_FORMAT_VERSION, "x86_64-apple-darwin"), dest_file)
118
119def get_clang_format_from_linux_cache(dest_file):
120    """Get clang-format from mongodb's cache
121    """
122    get_clang_format_from_cache_and_extract(CLANG_FORMAT_HTTP_LINUX_CACHE, ".gz")
123
124    # Destination Path
125    shutil.move("build/bin/clang-format", dest_file)
126
127class ClangFormat(object):
128    """Class encapsulates finding a suitable copy of clang-format,
129    and linting/formating an individual file
130    """
131    def __init__(self, path, cache_dir):
132        self.path = None
133        clang_format_progname_ext = ""
134
135        if sys.platform == "win32":
136            clang_format_progname_ext += ".exe"
137
138        # Check the clang-format the user specified
139        if path is not None:
140            if os.path.isfile(path):
141                self.path = path
142            else:
143                print("WARNING: Could not find clang-format %s" % (path))
144
145        # Check the environment variable
146        if "MONGO_CLANG_FORMAT" in os.environ:
147            self.path = os.environ["MONGO_CLANG_FORMAT"]
148
149            if self.path and not self._validate_version():
150                self.path = None
151
152        # Check the users' PATH environment variable now
153        if self.path is None:
154            # Check for various versions staring with binaries with version specific suffixes in the
155            # user's path
156            programs = [
157                    CLANG_FORMAT_PROGNAME + "-" + CLANG_FORMAT_VERSION,
158                    CLANG_FORMAT_PROGNAME + "-" + CLANG_FORMAT_SHORT_VERSION,
159                    CLANG_FORMAT_PROGNAME,
160                    ]
161
162            if sys.platform == "win32":
163                for i in range(len(programs)):
164                    programs[i] += '.exe'
165
166            for program in programs:
167                self.path = spawn.find_executable(program)
168
169                if self.path:
170                    if not self._validate_version():
171                        self.path = None
172                    else:
173                        break
174
175        # If Windows, try to grab it from Program Files
176        # Check both native Program Files and WOW64 version
177        if sys.platform == "win32":
178            programfiles = [
179                os.environ["ProgramFiles"],
180                os.environ["ProgramFiles(x86)"],
181                ]
182
183            for programfile in programfiles:
184                win32bin = os.path.join(programfile, "LLVM\\bin\\clang-format.exe")
185                if os.path.exists(win32bin):
186                    self.path = win32bin
187                    break
188
189        # Have not found it yet, download it from the web
190        if self.path is None:
191            if not os.path.isdir(cache_dir):
192                os.makedirs(cache_dir)
193
194            self.path = os.path.join(cache_dir, CLANG_FORMAT_PROGNAME + "-" + CLANG_FORMAT_VERSION + clang_format_progname_ext)
195
196            # Download a new version if the cache is empty or stale
197            if not os.path.isfile(self.path) or not self._validate_version():
198                if sys.platform.startswith("linux"):
199                    get_clang_format_from_linux_cache(self.path)
200                elif sys.platform == "darwin":
201                    get_clang_format_from_darwin_cache(self.path)
202                else:
203                    print("ERROR: clang-format.py does not support downloading clang-format " +
204                        " on this platform, please install clang-format " + CLANG_FORMAT_VERSION)
205
206        # Validate we have the correct version
207        # We only can fail here if the user specified a clang-format binary and it is the wrong
208        # version
209        if not self._validate_version():
210            print("ERROR: exiting because of previous warning.")
211            sys.exit(1)
212
213        self.print_lock = threading.Lock()
214
215    def _validate_version(self):
216        """Validate clang-format is the expected version
217        """
218        cf_version = callo([self.path, "--version"])
219
220        if CLANG_FORMAT_VERSION in cf_version:
221            return True
222
223        print("WARNING: clang-format found in path, but incorrect version found at " +
224                self.path + " with version: " + cf_version)
225
226        return False
227
228    def _lint(self, file_name, print_diff):
229        """Check the specified file has the correct format
230        """
231        with open(file_name, 'rb') as original_text:
232            original_file = original_text.read()
233
234        # Get formatted file as clang-format would format the file
235        formatted_file = callo([self.path, "--style=file", file_name])
236
237        if original_file != formatted_file:
238            if print_diff:
239                original_lines = original_file.splitlines()
240                formatted_lines = formatted_file.splitlines()
241                result = difflib.unified_diff(original_lines, formatted_lines)
242
243                # Take a lock to ensure diffs do not get mixed when printed to the screen
244                with self.print_lock:
245                    print("ERROR: Found diff for " + file_name)
246                    print("To fix formatting errors, run %s --style=file -i %s" %
247                            (self.path, file_name))
248                    for line in result:
249                        print(line.rstrip())
250
251            return False
252
253        return True
254
255    def lint(self, file_name):
256        """Check the specified file has the correct format
257        """
258        return self._lint(file_name, print_diff=True)
259
260    def format(self, file_name):
261        """Update the format of the specified file
262        """
263        if self._lint(file_name, print_diff=False):
264            return True
265
266        # Update the file with clang-format
267        formatted = not subprocess.call([self.path, "--style=file", "-i", file_name])
268
269        # Version 3.8 generates files like foo.cpp~RF83372177.TMP when it formats foo.cpp
270        # on Windows, we must clean these up
271        if sys.platform == "win32":
272            glob_pattern = file_name + "*.TMP"
273            for fglob in glob.glob(glob_pattern):
274                os.unlink(fglob)
275
276        return formatted
277
278files_re = re.compile('\\.(h|cpp|js)$')
279
280def is_interesting_file(file_name):
281    """"Return true if this file should be checked
282    """
283    return ((file_name.startswith("jstests") or file_name.startswith("src"))
284            and not file_name.startswith("src/third_party/")
285            and not file_name.startswith("src/mongo/gotools/")) and files_re.search(file_name)
286
287def get_list_from_lines(lines):
288    """"Convert a string containing a series of lines into a list of strings
289    """
290    return [line.rstrip() for line in lines.splitlines()]
291
292def _get_build_dir():
293    """Get the location of the scons' build directory in case we need to download clang-format
294    """
295    return os.path.join(git.get_base_dir(), "build")
296
297def _lint_files(clang_format, files):
298    """Lint a list of files with clang-format
299    """
300    clang_format = ClangFormat(clang_format, _get_build_dir())
301
302    lint_clean = parallel.parallel_process([os.path.abspath(f) for f in files], clang_format.lint)
303
304    if not lint_clean:
305        print("ERROR: Code Style does not match coding style")
306        sys.exit(1)
307
308def lint_patch(clang_format, infile):
309    """Lint patch command entry point
310    """
311    files = git.get_files_to_check_from_patch(infile, is_interesting_file)
312
313    # Patch may have files that we do not want to check which is fine
314    if files:
315        _lint_files(clang_format, files)
316
317def lint(clang_format):
318    """Lint files command entry point
319    """
320    files = git.get_files_to_check([], is_interesting_file)
321
322    _lint_files(clang_format, files)
323
324    return True
325
326def lint_all(clang_format):
327    """Lint files command entry point based on working tree
328    """
329    files = git.get_files_to_check_working_tree(is_interesting_file)
330
331    _lint_files(clang_format, files)
332
333    return True
334
335def _format_files(clang_format, files):
336    """Format a list of files with clang-format
337    """
338    clang_format = ClangFormat(clang_format, _get_build_dir())
339
340    format_clean = parallel.parallel_process([os.path.abspath(f) for f in files],
341                    clang_format.format)
342
343    if not format_clean:
344        print("ERROR: failed to format files")
345        sys.exit(1)
346
347def format_func(clang_format):
348    """Format files command entry point
349    """
350    files = git.get_files_to_check([], is_interesting_file)
351
352    _format_files(clang_format, files)
353
354def reformat_branch(clang_format, commit_prior_to_reformat, commit_after_reformat):
355    """Reformat a branch made before a clang-format run
356    """
357    clang_format = ClangFormat(clang_format, _get_build_dir())
358
359    if os.getcwd() != git.get_base_dir():
360        raise ValueError("reformat-branch must be run from the repo root")
361
362    if not os.path.exists("buildscripts/clang_format.py"):
363        raise ValueError("reformat-branch is only supported in the mongo repo")
364
365    repo = git.Repo(git.get_base_dir())
366
367    # Validate that user passes valid commits
368    if not repo.is_commit(commit_prior_to_reformat):
369        raise ValueError("Commit Prior to Reformat '%s' is not a valid commit in this repo" %
370                commit_prior_to_reformat)
371
372    if not repo.is_commit(commit_after_reformat):
373        raise ValueError("Commit After Reformat '%s' is not a valid commit in this repo" %
374                commit_after_reformat)
375
376    if not repo.is_ancestor(commit_prior_to_reformat, commit_after_reformat):
377        raise ValueError(("Commit Prior to Reformat '%s' is not a valid ancestor of Commit After" +
378                " Reformat '%s' in this repo") % (commit_prior_to_reformat, commit_after_reformat))
379
380    # Validate the user is on a local branch that has the right merge base
381    if repo.is_detached():
382        raise ValueError("You must not run this script in a detached HEAD state")
383
384    # Validate the user has no pending changes
385    if repo.is_working_tree_dirty():
386        raise ValueError("Your working tree has pending changes. You must have a clean working tree before proceeding.")
387
388    merge_base = repo.get_merge_base(commit_prior_to_reformat)
389
390    if not merge_base == commit_prior_to_reformat:
391        raise ValueError("Please rebase to '%s' and resolve all conflicts before running this script" % (commit_prior_to_reformat))
392
393    # We assume the target branch is master, it could be a different branch if needed for testing
394    merge_base = repo.get_merge_base("master")
395
396    if not merge_base == commit_prior_to_reformat:
397        raise ValueError("This branch appears to already have advanced too far through the merge process")
398
399    # Everything looks good so lets start going through all the commits
400    branch_name = repo.get_branch_name()
401    new_branch = "%s-reformatted" % branch_name
402
403    if repo.does_branch_exist(new_branch):
404        raise ValueError("The branch '%s' already exists. Please delete the branch '%s', or rename the current branch." % (new_branch, new_branch))
405
406    commits = get_list_from_lines(repo.log(["--reverse", "--pretty=format:%H", "%s..HEAD" % commit_prior_to_reformat]))
407
408    previous_commit_base = commit_after_reformat
409
410    files_match = re.compile('\\.(h|cpp|js)$')
411
412    # Go through all the commits the user made on the local branch and migrate to a new branch
413    # that is based on post_reformat commits instead
414    for commit_hash in commits:
415        repo.checkout(["--quiet", commit_hash])
416
417        deleted_files = []
418
419        # Format each of the files by checking out just a single commit from the user's branch
420        commit_files = get_list_from_lines(repo.diff(["HEAD~", "--name-only"]))
421
422        for commit_file in commit_files:
423
424            # Format each file needed if it was not deleted
425            if not os.path.exists(commit_file):
426                print("Skipping file '%s' since it has been deleted in commit '%s'" % (
427                        commit_file, commit_hash))
428                deleted_files.append(commit_file)
429                continue
430
431            if files_match.search(commit_file):
432                clang_format.format(commit_file)
433            else:
434                print("Skipping file '%s' since it is not a file clang_format should format" %
435                        commit_file)
436
437        # Check if anything needed reformatting, and if so amend the commit
438        if not repo.is_working_tree_dirty():
439            print ("Commit %s needed no reformatting" % commit_hash)
440        else:
441            repo.commit(["--all", "--amend", "--no-edit"])
442
443        # Rebase our new commit on top the post-reformat commit
444        previous_commit = repo.rev_parse(["HEAD"])
445
446        # Checkout the new branch with the reformatted commits
447        # Note: we will not name as a branch until we are done with all commits on the local branch
448        repo.checkout(["--quiet", previous_commit_base])
449
450        # Copy each file from the reformatted commit on top of the post reformat
451        diff_files = get_list_from_lines(repo.diff(["%s~..%s" % (previous_commit, previous_commit),
452            "--name-only"]))
453
454        for diff_file in diff_files:
455            # If the file was deleted in the commit we are reformatting, we need to delete it again
456            if diff_file in deleted_files:
457                repo.rm([diff_file])
458                continue
459
460            # The file has been added or modified, continue as normal
461            file_contents = repo.show(["%s:%s" % (previous_commit, diff_file)])
462
463            root_dir = os.path.dirname(diff_file)
464            if root_dir and not os.path.exists(root_dir):
465                os.makedirs(root_dir)
466
467            with open(diff_file, "w+") as new_file:
468                new_file.write(file_contents)
469
470            repo.add([diff_file])
471
472        # Create a new commit onto clang-formatted branch
473        repo.commit(["--reuse-message=%s" % previous_commit])
474
475        previous_commit_base = repo.rev_parse(["HEAD"])
476
477    # Create a new branch to mark the hashes we have been using
478    repo.checkout(["-b", new_branch])
479
480    print("reformat-branch is done running.\n")
481    print("A copy of your branch has been made named '%s', and formatted with clang-format.\n" % new_branch)
482    print("The original branch has been left unchanged.")
483    print("The next step is to rebase the new branch on 'master'.")
484
485
486def usage():
487    """Print usage
488    """
489    print("clang-format.py supports 5 commands [ lint, lint-all, lint-patch, format, reformat-branch].")
490
491def main():
492    """Main entry point
493    """
494    parser = OptionParser()
495    parser.add_option("-c", "--clang-format", type="string", dest="clang_format")
496
497    (options, args) = parser.parse_args(args=sys.argv)
498
499    if len(args) > 1:
500        command = args[1]
501
502        if command == "lint":
503            lint(options.clang_format)
504        elif command == "lint-all":
505            lint_all(options.clang_format)
506        elif command == "lint-patch":
507            lint_patch(options.clang_format, args[2:])
508        elif command == "format":
509            format_func(options.clang_format)
510        elif command == "reformat-branch":
511
512            if len(args) < 3:
513                print("ERROR: reformat-branch takes two parameters: commit_prior_to_reformat commit_after_reformat")
514                return
515
516            reformat_branch(options.clang_format, args[2], args[3])
517        else:
518            usage()
519    else:
520        usage()
521
522if __name__ == "__main__":
523    main()
524