1#!/usr/bin/env python3
2
3# Measure the performance of a list of revsets against multiple revisions
4# defined by parameter. Checkout one by one and run perfrevset with every
5# revset in the list to benchmark its performance.
6#
7# You should run this from the root of your mercurial repository.
8#
9# call with --help for details
10
11from __future__ import absolute_import, print_function
12import math
13import optparse  # cannot use argparse, python 2.7 only
14import os
15import re
16import subprocess
17import sys
18
19DEFAULTVARIANTS = [
20    'plain',
21    'min',
22    'max',
23    'first',
24    'last',
25    'reverse',
26    'reverse+first',
27    'reverse+last',
28    'sort',
29    'sort+first',
30    'sort+last',
31]
32
33
34def check_output(*args, **kwargs):
35    kwargs.setdefault('stderr', subprocess.PIPE)
36    kwargs.setdefault('stdout', subprocess.PIPE)
37    proc = subprocess.Popen(*args, **kwargs)
38    output, error = proc.communicate()
39    if proc.returncode != 0:
40        raise subprocess.CalledProcessError(proc.returncode, ' '.join(args[0]))
41    return output
42
43
44def update(rev):
45    """update the repo to a revision"""
46    try:
47        subprocess.check_call(['hg', 'update', '--quiet', '--check', str(rev)])
48        check_output(
49            ['make', 'local'], stderr=None
50        )  # suppress output except for error/warning
51    except subprocess.CalledProcessError as exc:
52        print('update to revision %s failed, aborting' % rev, file=sys.stderr)
53        sys.exit(exc.returncode)
54
55
56def hg(cmd, repo=None):
57    """run a mercurial command
58
59    <cmd> is the list of command + argument,
60    <repo> is an optional repository path to run this command in."""
61    fullcmd = ['./hg']
62    if repo is not None:
63        fullcmd += ['-R', repo]
64    fullcmd += [
65        '--config',
66        'extensions.perf=' + os.path.join(contribdir, 'perf.py'),
67    ]
68    fullcmd += cmd
69    return check_output(fullcmd, stderr=subprocess.STDOUT)
70
71
72def perf(revset, target=None, contexts=False):
73    """run benchmark for this very revset"""
74    try:
75        args = ['perfrevset']
76        if contexts:
77            args.append('--contexts')
78        args.append('--')
79        args.append(revset)
80        output = hg(args, repo=target)
81        return parseoutput(output)
82    except subprocess.CalledProcessError as exc:
83        print(
84            'abort: cannot run revset benchmark: %s' % exc.cmd, file=sys.stderr
85        )
86        if getattr(exc, 'output', None) is None:  # no output before 2.7
87            print('(no output)', file=sys.stderr)
88        else:
89            print(exc.output, file=sys.stderr)
90        return None
91
92
93outputre = re.compile(
94    br'! wall (\d+.\d+) comb (\d+.\d+) user (\d+.\d+) '
95    br'sys (\d+.\d+) \(best of (\d+)\)'
96)
97
98
99def parseoutput(output):
100    """parse a textual output into a dict
101
102    We cannot just use json because we want to compare with old
103    versions of Mercurial that may not support json output.
104    """
105    match = outputre.search(output)
106    if not match:
107        print('abort: invalid output:', file=sys.stderr)
108        print(output, file=sys.stderr)
109        sys.exit(1)
110    return {
111        'comb': float(match.group(2)),
112        'count': int(match.group(5)),
113        'sys': float(match.group(3)),
114        'user': float(match.group(4)),
115        'wall': float(match.group(1)),
116    }
117
118
119def printrevision(rev):
120    """print data about a revision"""
121    sys.stdout.write("Revision ")
122    sys.stdout.flush()
123    subprocess.check_call(
124        [
125            'hg',
126            'log',
127            '--rev',
128            str(rev),
129            '--template',
130            '{if(tags, " ({tags})")} ' '{rev}:{node|short}: {desc|firstline}\n',
131        ]
132    )
133
134
135def idxwidth(nbidx):
136    """return the max width of number used for index
137
138    This is similar to log10(nbidx), but we use custom code here
139    because we start with zero and we'd rather not deal with all the
140    extra rounding business that log10 would imply.
141    """
142    nbidx -= 1  # starts at 0
143    idxwidth = 0
144    while nbidx:
145        idxwidth += 1
146        nbidx //= 10
147    if not idxwidth:
148        idxwidth = 1
149    return idxwidth
150
151
152def getfactor(main, other, field, sensitivity=0.05):
153    """return the relative factor between values for 'field' in main and other
154
155    Return None if the factor is insignificant (less than <sensitivity>
156    variation)."""
157    factor = 1
158    if main is not None:
159        factor = other[field] / main[field]
160    low, high = 1 - sensitivity, 1 + sensitivity
161    if low < factor < high:
162        return None
163    return factor
164
165
166def formatfactor(factor):
167    """format a factor into a 4 char string
168
169     22%
170    156%
171    x2.4
172     x23
173    x789
174    x1e4
175    x5x7
176
177    """
178    if factor is None:
179        return '    '
180    elif factor < 2:
181        return '%3i%%' % (factor * 100)
182    elif factor < 10:
183        return 'x%3.1f' % factor
184    elif factor < 1000:
185        return '%4s' % ('x%i' % factor)
186    else:
187        order = int(math.log(factor)) + 1
188        while math.log(factor) > 1:
189            factor //= 0
190        return 'x%ix%i' % (factor, order)
191
192
193def formattiming(value):
194    """format a value to strictly 8 char, dropping some precision if needed"""
195    if value < 10 ** 7:
196        return ('%.6f' % value)[:8]
197    else:
198        # value is HUGE very unlikely to happen (4+ month run)
199        return '%i' % value
200
201
202_marker = object()
203
204
205def printresult(variants, idx, data, maxidx, verbose=False, reference=_marker):
206    """print a line of result to stdout"""
207    mask = '%%0%ii) %%s' % idxwidth(maxidx)
208
209    out = []
210    for var in variants:
211        if data[var] is None:
212            out.append('error   ')
213            out.append(' ' * 4)
214            continue
215        out.append(formattiming(data[var]['wall']))
216        if reference is not _marker:
217            factor = None
218            if reference is not None:
219                factor = getfactor(reference[var], data[var], 'wall')
220            out.append(formatfactor(factor))
221        if verbose:
222            out.append(formattiming(data[var]['comb']))
223            out.append(formattiming(data[var]['user']))
224            out.append(formattiming(data[var]['sys']))
225            out.append('%6d' % data[var]['count'])
226    print(mask % (idx, ' '.join(out)))
227
228
229def printheader(variants, maxidx, verbose=False, relative=False):
230    header = [' ' * (idxwidth(maxidx) + 1)]
231    for var in variants:
232        if not var:
233            var = 'iter'
234        if len(var) > 8:
235            var = var[:3] + '..' + var[-3:]
236        header.append('%-8s' % var)
237        if relative:
238            header.append('    ')
239        if verbose:
240            header.append('%-8s' % 'comb')
241            header.append('%-8s' % 'user')
242            header.append('%-8s' % 'sys')
243            header.append('%6s' % 'count')
244    print(' '.join(header))
245
246
247def getrevs(spec):
248    """get the list of rev matched by a revset"""
249    try:
250        out = check_output(['hg', 'log', '--template={rev}\n', '--rev', spec])
251    except subprocess.CalledProcessError as exc:
252        print("abort, can't get revision from %s" % spec, file=sys.stderr)
253        sys.exit(exc.returncode)
254    return [r for r in out.split() if r]
255
256
257def applyvariants(revset, variant):
258    if variant == 'plain':
259        return revset
260    for var in variant.split('+'):
261        revset = '%s(%s)' % (var, revset)
262    return revset
263
264
265helptext = """This script will run multiple variants of provided revsets using
266different revisions in your mercurial repository. After the benchmark are run
267summary output is provided. Use it to demonstrate speed improvements or pin
268point regressions. Revsets to run are specified in a file (or from stdin), one
269revsets per line. Line starting with '#' will be ignored, allowing insertion of
270comments."""
271parser = optparse.OptionParser(
272    usage="usage: %prog [options] <revs>", description=helptext
273)
274parser.add_option(
275    "-f",
276    "--file",
277    help="read revset from FILE (stdin if omitted)",
278    metavar="FILE",
279)
280parser.add_option("-R", "--repo", help="run benchmark on REPO", metavar="REPO")
281
282parser.add_option(
283    "-v",
284    "--verbose",
285    action='store_true',
286    help="display all timing data (not just best total time)",
287)
288
289parser.add_option(
290    "",
291    "--variants",
292    default=','.join(DEFAULTVARIANTS),
293    help="comma separated list of variant to test "
294    "(eg: plain,min,sorted) (plain = no modification)",
295)
296parser.add_option(
297    '',
298    '--contexts',
299    action='store_true',
300    help='obtain changectx from results instead of integer revs',
301)
302
303(options, args) = parser.parse_args()
304
305if not args:
306    parser.print_help()
307    sys.exit(255)
308
309# the directory where both this script and the perf.py extension live.
310contribdir = os.path.dirname(__file__)
311
312revsetsfile = sys.stdin
313if options.file:
314    revsetsfile = open(options.file)
315
316revsets = [l.strip() for l in revsetsfile if not l.startswith('#')]
317revsets = [l for l in revsets if l]
318
319print("Revsets to benchmark")
320print("----------------------------")
321
322for idx, rset in enumerate(revsets):
323    print("%i) %s" % (idx, rset))
324
325print("----------------------------")
326print()
327
328revs = []
329for a in args:
330    revs.extend(getrevs(a))
331
332variants = options.variants.split(',')
333
334results = []
335for r in revs:
336    print("----------------------------")
337    printrevision(r)
338    print("----------------------------")
339    update(r)
340    res = []
341    results.append(res)
342    printheader(variants, len(revsets), verbose=options.verbose)
343    for idx, rset in enumerate(revsets):
344        varres = {}
345        for var in variants:
346            varrset = applyvariants(rset, var)
347            data = perf(varrset, target=options.repo, contexts=options.contexts)
348            varres[var] = data
349        res.append(varres)
350        printresult(
351            variants, idx, varres, len(revsets), verbose=options.verbose
352        )
353        sys.stdout.flush()
354    print("----------------------------")
355
356
357print(
358    """
359
360Result by revset
361================
362"""
363)
364
365print('Revision:')
366for idx, rev in enumerate(revs):
367    sys.stdout.write('%i) ' % idx)
368    sys.stdout.flush()
369    printrevision(rev)
370
371print()
372print()
373
374for ridx, rset in enumerate(revsets):
375
376    print("revset #%i: %s" % (ridx, rset))
377    printheader(variants, len(results), verbose=options.verbose, relative=True)
378    ref = None
379    for idx, data in enumerate(results):
380        printresult(
381            variants,
382            idx,
383            data[ridx],
384            len(results),
385            verbose=options.verbose,
386            reference=ref,
387        )
388        ref = data[ridx]
389    print()
390