1#!/usr/bin/env python
2#
3# ======- git-llvm - LLVM Git Help Integration ---------*- python -*--========#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==------------------------------------------------------------------------==#
10
11"""
12git-llvm integration
13====================
14
15This file provides integration for git.
16"""
17
18from __future__ import print_function
19import argparse
20import collections
21import os
22import re
23import shutil
24import subprocess
25import sys
26import time
27assert sys.version_info >= (2, 7)
28
29try:
30    dict.iteritems
31except AttributeError:
32    # Python 3
33    def iteritems(d):
34        return iter(d.items())
35else:
36    # Python 2
37    def iteritems(d):
38        return d.iteritems()
39
40try:
41    # Python 3
42    from shlex import quote
43except ImportError:
44    # Python 2
45    from pipes import quote
46
47# It's *almost* a straightforward mapping from the monorepo to svn...
48LLVM_MONOREPO_SVN_MAPPING = {
49    d: (d + '/branches/release_90')
50    for d in [
51        'clang-tools-extra',
52        'compiler-rt',
53        'debuginfo-tests',
54        'dragonegg',
55        'klee',
56        'libclc',
57        'libcxx',
58        'libcxxabi',
59        'libunwind',
60        'lld',
61        'lldb',
62        'llgo',
63        'llvm',
64        'openmp',
65        'parallel-libs',
66        'polly',
67        'pstl',
68    ]
69}
70LLVM_MONOREPO_SVN_MAPPING.update({'clang': 'cfe/branches/release_90'})
71LLVM_MONOREPO_SVN_MAPPING.update({'': 'monorepo-root/branches/release_90'})
72
73SPLIT_REPO_NAMES = {'llvm-' + d: d + '/trunk'
74                    for d in ['www', 'zorg', 'test-suite', 'lnt']}
75
76VERBOSE = False
77QUIET = False
78dev_null_fd = None
79
80
81def eprint(*args, **kwargs):
82    print(*args, file=sys.stderr, **kwargs)
83
84
85def log(*args, **kwargs):
86    if QUIET:
87        return
88    print(*args, **kwargs)
89
90
91def log_verbose(*args, **kwargs):
92    if not VERBOSE:
93        return
94    print(*args, **kwargs)
95
96
97def die(msg):
98    eprint(msg)
99    sys.exit(1)
100
101
102def split_first_path_component(d):
103    # Assuming we have a git path, it'll use slashes even on windows...I hope.
104    if '/' in d:
105        return d.split('/', 1)
106    else:
107        return (d, None)
108
109
110def get_dev_null():
111    """Lazily create a /dev/null fd for use in shell()"""
112    global dev_null_fd
113    if dev_null_fd is None:
114        dev_null_fd = open(os.devnull, 'w')
115    return dev_null_fd
116
117
118def shell(cmd, strip=True, cwd=None, stdin=None, die_on_failure=True,
119          ignore_errors=False, text=True):
120    # Escape args when logging for easy repro.
121    quoted_cmd = [quote(arg) for arg in cmd]
122    log_verbose('Running in %s: %s' % (cwd, ' '.join(quoted_cmd)))
123
124    err_pipe = subprocess.PIPE
125    if ignore_errors:
126        # Silence errors if requested.
127        err_pipe = get_dev_null()
128
129    start = time.time()
130    p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=err_pipe,
131                         stdin=subprocess.PIPE,
132                         universal_newlines=text)
133    stdout, stderr = p.communicate(input=stdin)
134    elapsed = time.time() - start
135
136    log_verbose('Command took %0.1fs' % elapsed)
137
138    if p.returncode == 0 or ignore_errors:
139        if stderr and not ignore_errors:
140            eprint('`%s` printed to stderr:' % ' '.join(quoted_cmd))
141            eprint(stderr.rstrip())
142        if strip:
143            if text:
144                stdout = stdout.rstrip('\r\n')
145            else:
146                stdout = stdout.rstrip(b'\r\n')
147        if VERBOSE:
148            for l in stdout.splitlines():
149                log_verbose("STDOUT: %s" % l)
150        return stdout
151    err_msg = '`%s` returned %s' % (' '.join(quoted_cmd), p.returncode)
152    eprint(err_msg)
153    if stderr:
154        eprint(stderr.rstrip())
155    if die_on_failure:
156        sys.exit(2)
157    raise RuntimeError(err_msg)
158
159
160def git(*cmd, **kwargs):
161    return shell(['git'] + list(cmd), **kwargs)
162
163
164def svn(cwd, *cmd, **kwargs):
165    return shell(['svn'] + list(cmd), cwd=cwd, **kwargs)
166
167
168def program_exists(cmd):
169    if sys.platform == 'win32' and not cmd.endswith('.exe'):
170        cmd += '.exe'
171    for path in os.environ["PATH"].split(os.pathsep):
172        if os.access(os.path.join(path, cmd), os.X_OK):
173            return True
174    return False
175
176
177def get_default_rev_range():
178    # Get the branch tracked by the current branch, as set by
179    # git branch --set-upstream-to  See http://serverfault.com/a/352236/38694.
180    cur_branch = git('rev-parse', '--symbolic-full-name', 'HEAD')
181    upstream_branch = git('for-each-ref', '--format=%(upstream:short)',
182                          cur_branch)
183    if not upstream_branch:
184        upstream_branch = 'origin/master'
185
186    # Get the newest common ancestor between HEAD and our upstream branch.
187    upstream_rev = git('merge-base', 'HEAD', upstream_branch)
188    return '%s..' % upstream_rev
189
190
191def get_revs_to_push(rev_range):
192    if not rev_range:
193        rev_range = get_default_rev_range()
194    # Use git show rather than some plumbing command to figure out which revs
195    # are in rev_range because it handles single revs (HEAD^) and ranges
196    # (foo..bar) like we want.
197    revs = git('show', '--reverse', '--quiet',
198               '--pretty=%h', rev_range).splitlines()
199    if not revs:
200        die('Nothing to push: No revs in range %s.' % rev_range)
201    return revs
202
203
204def clean_svn(svn_repo):
205    svn(svn_repo, 'revert', '-R', '.')
206
207    # Unfortunately it appears there's no svn equivalent for git clean, so we
208    # have to do it ourselves.
209    for line in svn(svn_repo, 'status', '--no-ignore').split('\n'):
210        if not line.startswith('?'):
211            continue
212        filename = line[1:].strip()
213        filepath = os.path.abspath(os.path.join(svn_repo, filename))
214        abs_svn_repo = os.path.abspath(svn_repo)
215        # Safety check that the directory we are about to delete is
216        # actually within our svn staging dir.
217        if not filepath.startswith(abs_svn_repo):
218            die("Path to clean (%s) is not in svn staging dir (%s)"
219                % (filepath, abs_svn_repo))
220
221        if os.path.isdir(filepath):
222            shutil.rmtree(filepath)
223        else:
224            os.remove(filepath)
225
226
227def svn_init(svn_root):
228    if not os.path.exists(svn_root):
229        log('Creating svn staging directory: (%s)' % (svn_root))
230        os.makedirs(svn_root)
231        svn(svn_root, 'checkout', '--depth=empty',
232            'https://llvm.org/svn/llvm-project/', '.')
233        log("svn staging area ready in '%s'" % svn_root)
234    if not os.path.isdir(svn_root):
235        die("Can't initialize svn staging dir (%s)" % svn_root)
236
237
238def fix_eol_style_native(rev, svn_sr_path, files):
239    """Fix line endings before applying patches with Unix endings
240
241    SVN on Windows will check out files with CRLF for files with the
242    svn:eol-style property set to "native". This breaks `git apply`, which
243    typically works with Unix-line ending patches. Work around the problem here
244    by doing a dos2unix up front for files with svn:eol-style set to "native".
245    SVN will not commit a mass line ending re-doing because it detects the line
246    ending format for files with this property.
247    """
248    # Skip files that don't exist in SVN yet.
249    files = [f for f in files if os.path.exists(os.path.join(svn_sr_path, f))]
250    # Use ignore_errors because 'svn propget' prints errors if the file doesn't
251    # have the named property. There doesn't seem to be a way to suppress that.
252    eol_props = svn(svn_sr_path, 'propget', 'svn:eol-style', *files,
253                    ignore_errors=True)
254    crlf_files = []
255    if len(files) == 1:
256        # No need to split propget output on ' - ' when we have one file.
257        if eol_props.strip() in ['native', 'CRLF']:
258            crlf_files = files
259    else:
260        for eol_prop in eol_props.split('\n'):
261            # Remove spare CR.
262            eol_prop = eol_prop.strip('\r')
263            if not eol_prop:
264                continue
265            prop_parts = eol_prop.rsplit(' - ', 1)
266            if len(prop_parts) != 2:
267                eprint("unable to parse svn propget line:")
268                eprint(eol_prop)
269                continue
270            (f, eol_style) = prop_parts
271            if eol_style == 'native':
272                crlf_files.append(f)
273    if crlf_files:
274        # Reformat all files with native SVN line endings to Unix format. SVN
275        # knows files with native line endings are text files. It will commit
276        # just the diff, and not a mass line ending change.
277        shell(['dos2unix'] + crlf_files, ignore_errors=True, cwd=svn_sr_path)
278
279
280def split_subrepo(f, git_to_svn_mapping):
281    # Given a path, splits it into (subproject, rest-of-path). If the path is
282    # not in a subproject, returns ('', full-path).
283
284    subproject, remainder = split_first_path_component(f)
285
286    if subproject in git_to_svn_mapping:
287        return subproject, remainder
288    else:
289        return '', f
290
291
292def get_all_parent_dirs(name):
293    parts = []
294    head, tail = os.path.split(name)
295    while head:
296        parts.append(head)
297        head, tail = os.path.split(head)
298    return parts
299
300
301def svn_push_one_rev(svn_repo, rev, git_to_svn_mapping, dry_run):
302    files = git('diff-tree', '--no-commit-id', '--name-only', '-r',
303                rev).split('\n')
304    if not files:
305        raise RuntimeError('Empty diff for rev %s?' % rev)
306
307    # Split files by subrepo
308    subrepo_files = collections.defaultdict(list)
309    for f in files:
310        subrepo, remainder = split_subrepo(f, git_to_svn_mapping)
311        subrepo_files[subrepo].append(remainder)
312
313    status = svn(svn_repo, 'status', '--no-ignore')
314    if status:
315        die("Can't push git rev %s because svn status is not empty:\n%s" %
316            (rev, status))
317
318    svn_dirs_to_update = set()
319    for sr, files in iteritems(subrepo_files):
320        svn_sr_path = git_to_svn_mapping[sr]
321        for f in files:
322            svn_dirs_to_update.add(
323                os.path.dirname(os.path.join(svn_sr_path, f)))
324
325    # We also need to svn update any parent directories which are not yet
326    # present
327    parent_dirs = set()
328    for dir in svn_dirs_to_update:
329        parent_dirs.update(get_all_parent_dirs(dir))
330    parent_dirs = set(dir for dir in parent_dirs
331                      if not os.path.exists(os.path.join(svn_repo, dir)))
332    svn_dirs_to_update.update(parent_dirs)
333
334    # Sort by length to ensure that the parent directories are passed to svn
335    # before child directories.
336    sorted_dirs_to_update = sorted(svn_dirs_to_update, key=len)
337
338    # SVN update only in the affected directories.
339    svn(svn_repo, 'update', '--depth=files', *sorted_dirs_to_update)
340
341    for sr, files in iteritems(subrepo_files):
342        svn_sr_path = os.path.join(svn_repo, git_to_svn_mapping[sr])
343        if os.name == 'nt':
344            fix_eol_style_native(rev, svn_sr_path, files)
345        # We use text=False (and pass '--binary') so that we can get an exact
346        # diff that can be passed as-is to 'git apply' without any line ending,
347        # encoding, or other mangling.
348        diff = git('show', '--binary', rev, '--',
349                   *(os.path.join(sr, f) for f in files),
350                   strip=False, text=False)
351        # git is the only thing that can handle its own patches...
352        if sr == '':
353            prefix_strip = '-p1'
354        else:
355            prefix_strip = '-p2'
356        try:
357            shell(['git', 'apply', prefix_strip, '-'], cwd=svn_sr_path,
358                  stdin=diff, die_on_failure=False, text=False)
359        except RuntimeError as e:
360            eprint("Patch doesn't apply: maybe you should try `git pull -r` "
361                   "first?")
362            sys.exit(2)
363
364    status_lines = svn(svn_repo, 'status', '--no-ignore').split('\n')
365
366    for l in (l for l in status_lines if (l.startswith('?') or
367                                          l.startswith('I'))):
368        svn(svn_repo, 'add', '--no-ignore', l[1:].strip())
369    for l in (l for l in status_lines if l.startswith('!')):
370        svn(svn_repo, 'remove', l[1:].strip())
371
372    # Now we're ready to commit.
373    commit_msg = git('show', '--pretty=%B', '--quiet', rev)
374    if not dry_run:
375        commit_args = ['commit', '-m', commit_msg]
376        if '--force-interactive' in svn(svn_repo, 'commit', '--help'):
377            commit_args.append('--force-interactive')
378        log(svn(svn_repo, *commit_args))
379        log('Committed %s to svn.' % rev)
380    else:
381        log("Would have committed %s to svn, if this weren't a dry run." % rev)
382
383
384def cmd_push(args):
385    '''Push changes back to SVN: this is extracted from Justin Lebar's script
386    available here: https://github.com/jlebar/llvm-repo-tools/
387
388    Note: a current limitation is that git does not track file rename, so they
389    will show up in SVN as delete+add.
390    '''
391    # Get the git root
392    git_root = git('rev-parse', '--show-toplevel')
393    if not os.path.isdir(git_root):
394        die("Can't find git root dir")
395
396    # Push from the root of the git repo
397    os.chdir(git_root)
398
399    # Get the remote URL, and check if it's one of the standalone repos.
400    git_remote_url = git('remote', 'get-url', 'origin')
401    git_remote_url = git_remote_url.rstrip('.git').rstrip('/')
402    git_remote_repo_name = git_remote_url.rsplit('/', 1)[-1]
403    split_repo_path = SPLIT_REPO_NAMES.get(git_remote_repo_name)
404    if split_repo_path:
405        git_to_svn_mapping = {'': split_repo_path}
406    else:
407        # Default to the monorepo mapping
408        git_to_svn_mapping = LLVM_MONOREPO_SVN_MAPPING
409
410    # We need a staging area for SVN, let's hide it in the .git directory.
411    dot_git_dir = git('rev-parse', '--git-common-dir')
412    # Not all versions of git support --git-common-dir and just print the
413    # unknown command back. If this happens, fall back to --git-dir
414    if dot_git_dir == '--git-common-dir':
415        dot_git_dir = git('rev-parse', '--git-dir')
416
417    svn_root = os.path.join(dot_git_dir, 'llvm-upstream-svn')
418    svn_init(svn_root)
419
420    rev_range = args.rev_range
421    dry_run = args.dry_run
422    revs = get_revs_to_push(rev_range)
423    log('%sPushing %d %s commit%s:\n%s' %
424        ('[DryRun] ' if dry_run else '', len(revs),
425         'split-repo (%s)' % split_repo_path
426         if split_repo_path else 'monorepo',
427         's' if len(revs) != 1 else '',
428         '\n'.join('  ' + git('show', '--oneline', '--quiet', c)
429                   for c in revs)))
430    for r in revs:
431        clean_svn(svn_root)
432        svn_push_one_rev(svn_root, r, git_to_svn_mapping, dry_run)
433
434
435def lookup_llvm_svn_id(git_commit_hash):
436    # Use --format=%b to get the raw commit message, without any extra
437    # whitespace.
438    commit_msg = git('log', '-1', '--format=%b', git_commit_hash,
439                     ignore_errors=True)
440    if len(commit_msg) == 0:
441        die("Can't find git commit " + git_commit_hash)
442    # If a commit has multiple "llvm-svn:" lines (e.g. if the commit is
443    # reverting/quoting a previous commit), choose the last one, which should
444    # be the authoritative one.
445    svn_match_iter = re.finditer('^llvm-svn: (\d{5,7})$', commit_msg,
446                                 re.MULTILINE)
447    svn_match = None
448    for m in svn_match_iter:
449        svn_match = m.group(1)
450    if svn_match:
451        return int(svn_match)
452    die("Can't find svn revision in git commit " + git_commit_hash)
453
454
455def cmd_svn_lookup(args):
456    '''Find the SVN revision id for a given git commit hash.
457
458    This is identified by 'llvm-svn: NNNNNN' in the git commit message.'''
459    # Get the git root
460    git_root = git('rev-parse', '--show-toplevel')
461    if not os.path.isdir(git_root):
462        die("Can't find git root dir")
463
464    # Run commands from the root
465    os.chdir(git_root)
466
467    log('r' + str(lookup_llvm_svn_id(args.git_commit_hash)))
468
469
470def git_hash_by_svn_rev(svn_rev):
471    '''Find the git hash for a given svn revision.
472
473    This check is paranoid: 'llvm-svn: NNNNNN' could exist on its own line
474    somewhere else in the commit message. Look in the full log message to see
475    if it's actually on the last line.
476
477    Since this check is expensive (we're searching every single commit), limit
478    to the past 10k commits (about 5 months).
479    '''
480    possible_hashes = git(
481        'log', '--format=%H', '--grep', '^llvm-svn: %d$' % svn_rev,
482        'HEAD~10000...HEAD').split('\n')
483    matching_hashes = [h for h in possible_hashes
484                       if lookup_llvm_svn_id(h) == svn_rev]
485    if len(matching_hashes) > 1:
486        die("svn revision r%d has ambiguous commits: %s" % (
487            svn_rev, ', '.join(matching_hashes)))
488    elif len(matching_hashes) < 1:
489        die("svn revision r%d matches no commits" % svn_rev)
490    return matching_hashes[0]
491
492
493def cmd_revert(args):
494    '''Revert a commit by either SVN id (rNNNNNN) or git hash. This also
495    populates the git commit message with both the SVN revision and git hash of
496    the change being reverted.'''
497
498    # Get the git root
499    git_root = git('rev-parse', '--show-toplevel')
500    if not os.path.isdir(git_root):
501        die("Can't find git root dir")
502
503    # Run commands from the root
504    os.chdir(git_root)
505
506    # Check for a client branch first.
507    open_files = git('status', '-uno', '-s', '--porcelain')
508    if len(open_files) > 0:
509        die("Found open files. Please stash and then revert.\n" + open_files)
510
511    # If the revision looks like rNNNNNN, use that. Otherwise, look for it in
512    # the git commit.
513    svn_match = re.match('^r(\d{5,7})$', args.revision)
514    if svn_match:
515        # If the revision looks like rNNNNNN, use that as the svn revision, and
516        # grep through git commits to find which one corresponds to that svn
517        # revision.
518        svn_rev = int(svn_match.group(1))
519        git_hash = git_hash_by_svn_rev(svn_rev)
520    else:
521        # Otherwise, this looks like a git hash, so we just need to grab the
522        # svn revision from the end of the commit message.  Get the actual git
523        # hash in case the revision is something like "HEAD~1"
524        git_hash = git('rev-parse', '--verify', args.revision + '^{commit}')
525        svn_rev = lookup_llvm_svn_id(git_hash)
526
527    msg = git('log', '-1', '--format=%s', git_hash)
528
529    log_verbose('Ready to revert r%d (%s): "%s"' % (svn_rev, git_hash, msg))
530
531    revert_args = ['revert', '--no-commit', git_hash]
532    # TODO: Running --edit doesn't seem to work, with errors that stdin is not
533    # a tty.
534    commit_args = [
535        'commit', '-m', 'Revert ' + msg,
536        '-m', 'This reverts r%d (git commit %s)' % (svn_rev, git_hash)]
537    if args.dry_run:
538        log("Would have run the following commands, if this weren't a"
539            "dry run:\n"
540            '1) git %s\n2) git %s' % (
541                ' '.join(quote(arg) for arg in revert_args),
542                ' '.join(quote(arg) for arg in commit_args)))
543        return
544
545    git(*revert_args)
546    commit_log = git(*commit_args)
547
548    log('Created revert of r%d: %s' % (svn_rev, commit_log))
549    log("Run 'git llvm push -n' to inspect your changes and "
550        "run 'git llvm push' when ready")
551
552
553if __name__ == '__main__':
554    if not program_exists('svn'):
555        die('error: git-llvm needs svn command, but svn is not installed.')
556
557    argv = sys.argv[1:]
558    p = argparse.ArgumentParser(
559        prog='git llvm', formatter_class=argparse.RawDescriptionHelpFormatter,
560        description=__doc__)
561    subcommands = p.add_subparsers(title='subcommands',
562                                   description='valid subcommands',
563                                   help='additional help')
564    verbosity_group = p.add_mutually_exclusive_group()
565    verbosity_group.add_argument('-q', '--quiet', action='store_true',
566                                 help='print less information')
567    verbosity_group.add_argument('-v', '--verbose', action='store_true',
568                                 help='print more information')
569
570    parser_push = subcommands.add_parser(
571        'push', description=cmd_push.__doc__,
572        help='push changes back to the LLVM SVN repository')
573    parser_push.add_argument(
574        '-n',
575        '--dry-run',
576        dest='dry_run',
577        action='store_true',
578        help='Do everything other than commit to svn.  Leaves junk in the svn '
579        'repo, so probably will not work well if you try to commit more '
580        'than one rev.')
581    parser_push.add_argument(
582        'rev_range',
583        metavar='GIT_REVS',
584        type=str,
585        nargs='?',
586        help="revs to push (default: everything not in the branch's "
587        'upstream, or not in origin/master if the branch lacks '
588        'an explicit upstream)')
589    parser_push.set_defaults(func=cmd_push)
590
591    parser_revert = subcommands.add_parser(
592        'revert', description=cmd_revert.__doc__,
593        help='Revert a commit locally.')
594    parser_revert.add_argument(
595        'revision',
596        help='Revision to revert. Can either be an SVN revision number '
597        "(rNNNNNN) or a git commit hash (anything that doesn't look "
598        'like an SVN revision number).')
599    parser_revert.add_argument(
600        '-n',
601        '--dry-run',
602        dest='dry_run',
603        action='store_true',
604        help='Do everything other than perform a revert. Prints the git '
605        'revert command it would have run.')
606    parser_revert.set_defaults(func=cmd_revert)
607
608    parser_svn_lookup = subcommands.add_parser(
609        'svn-lookup', description=cmd_svn_lookup.__doc__,
610        help='Find the llvm-svn revision for a given commit.')
611    parser_svn_lookup.add_argument(
612        'git_commit_hash',
613        help='git_commit_hash for which we will look up the svn revision id.')
614    parser_svn_lookup.set_defaults(func=cmd_svn_lookup)
615
616    args = p.parse_args(argv)
617    VERBOSE = args.verbose
618    QUIET = args.quiet
619
620    # Dispatch to the right subcommand
621    args.func(args)
622