1#!/usr/bin/env python
2
3# Licensed to the Apache Software Foundation (ASF) under one
4# or more contributor license agreements.  See the NOTICE file
5# distributed with this work for additional information
6# regarding copyright ownership.  The ASF licenses this file
7# to you under the Apache License, Version 2.0 (the
8# "License"); you may not use this file except in compliance
9# with the License.  You may obtain a copy of the License at
10#
11#   http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing,
14# software distributed under the License is distributed on an
15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16# KIND, either express or implied.  See the License for the
17# specific language governing permissions and limitations
18# under the License.
19
20"""Usage: benchmark.py run|list|compare|show|chart <selection> ...
21
22SELECTING TIMINGS -- B@R,LxS
23
24In the subcommands below, a timings selection consists of a string with up to
25four elements:
26  <branch>@<revision>,<levels>x<spread>
27abbreviated as:
28  B@R,LxS
29
30<branch> is a label of an svn branch, e.g. "1.7.x".
31<revision> is the last-changed-revision of above branch.
32<levels> is the number of directory levels created in the benchmark.
33<spread> is the number of child trees spreading off each dir level.
34
35<branch_name> and <revision> are simply used for labeling. Upon the actual
36test runs, you should enter labels matching the selected --svn-bin-dir.
37Later, you can select runs individually by using these labels.
38
39For <revision>, you can provide special keywords:
40- 'each' has the same effect as entering each available revision number that
41  is on record in the db in a separate timings selection.
42- 'last' is the same as 'each', but shows only the last 10 revisions. 'last'
43  can be combined with a number, e.g. 'last12'.
44
45For all subcommands except 'run', you can omit some or all of the elements of
46a timings selection to combine all available timings sets. Try that out with
47the 'list' subcommand.
48
49Examples:
50  benchmark.py run 1.7.x@12345,5x5
51  benchmark.py show trunk@12345
52  benchmark.py compare 1.7.0,1x100 trunk@each,1x100
53  benchmark.py chart compare 1.7.0,5x5 trunk@last12,5x5
54
55
56RUN BENCHMARKS
57
58  benchmark.py run B@R,LxS [N] [options]
59
60Test data is added to an sqlite database created automatically, by default
61'benchmark.db' in the current working directory. To specify a different path,
62use option -f <path_to_db>.
63
64If <N> is provided, the run is repeated N times.
65
66<levels> and <spread> control the way the tested working copy is structured:
67  <levels>: number of directory levels to create.
68  <spread>: number of files and subdirectories created in each dir.
69
70
71LIST WHAT IS ON RECORD
72
73  benchmark.py list [B@R,LxS]
74
75Find entries in the database for the given constraints. Any arguments can
76be omitted. (To select only a rev, start with a '@', like '@123'; to select
77only spread, start with an 'x', like "x100".)
78
79Call without arguments to get a listing of all available constraints.
80
81
82COMPARE TIMINGS
83
84  benchmark.py compare B@R,LxS B@R,LxS [B@R,LxS [...]]
85
86Compare any number of timings sets to the first provided set (in text mode).
87For example:
88  benchmark.py compare 1.7.0 trunk@1349903
89    Compare the total timings of all combined '1.7.0' branch runs to
90    all combined runs of 'trunk'-at-revision-1349903.
91  benchmark.py compare 1.7.0,5x5 trunk@1349903,5x5
92    Same as above, but only compare the working copy types with 5 levels
93    and a spread of 5.
94
95Use the -c option to limit comparison to specific command names.
96
97
98SHOW TIMINGS
99
100  benchmark.py show B@R,LxS [B@R,LxS [...]]
101
102Print out a summary of the timings selected from the given constraints.
103
104
105GENERATE CHARTS
106
107  benchmark.py chart compare B@R,LxS B@R,LxS [ B@R,LxS ... ]
108
109Produce a bar chart that compares any number of sets of timings.  Like with
110the plain 'compare' command, the first set is taken as a reference point for
111100% and +-0 seconds. Each following dataset produces a set of labeled bar
112charts, grouped by svn command names. At least two timings sets must be
113provided.
114
115Use the -c option to limit comparison to specific command names.
116
117
118EXAMPLES
119
120# Run 3 benchmarks on svn 1.7.0 with 5 dir levels and 5 files and subdirs for
121# each level (spread). Timings are saved in ./benchmark.db.
122# Provide label '1.7.0' and its Last-Changed-Rev for later reference.
123./benchmark.py run --svn-bin-dir ~/svn-prefix/1.7.0/bin 1.7.0@1181106,5x5 3
124
125# Record 3 benchmark runs on trunk, again naming its Last-Changed-Rev.
126# (You may also set your $PATH instead of using --svn-bin-dir.)
127./benchmark.py run --svn-bin-dir ~/svn-prefix/trunk/bin trunk@1352725,5x5 3
128
129# Work with the results of above two runs
130./benchmark.py list
131./benchmark.py compare 1.7.0 trunk
132./benchmark.py show 1.7.0 trunk
133./benchmark.py chart compare 1.7.0 trunk
134./benchmark.py chart compare 1.7.0 trunk -c "update,commit,TOTAL RUN"
135
136# Rebuild r1352598, run it and chart improvements since 1.7.0.
137svn up -r1352598 ~/src/trunk
138make -C ~/src/trunk dist-clean install
139export PATH="$HOME/svn-prefix/trunk/bin:$PATH"
140which svn
141./benchmark.py run trunk@1352598,5x5 3
142./benchmark.py chart compare 1.7.0 trunk@1352598 trunk@1352725 -o chart.svg
143
144
145GLOBAL OPTIONS"""
146
147import os
148import time
149import datetime
150import sqlite3
151import optparse
152import tempfile
153import subprocess
154import random
155import shutil
156import stat
157import string
158from copy import copy
159
160IGNORE_COMMANDS = ('--version', )
161TOTAL_RUN = 'TOTAL RUN'
162
163j = os.path.join
164
165def bail(msg=None):
166  if msg:
167    print(msg)
168  exit(1)
169
170def time_str():
171  return time.strftime('%Y-%m-%d %H:%M:%S');
172
173def timedelta_to_seconds(td):
174  return ( float(td.seconds)
175           + float(td.microseconds) / (10**6)
176           + td.days * 24 * 60 * 60 )
177
178def run_cmd(cmd, stdin=None, shell=False, verbose=False):
179  if options.verbose:
180    if shell:
181      printable_cmd = cmd
182    else:
183      printable_cmd = ' '.join(cmd)
184    print('CMD:', printable_cmd)
185
186  if stdin:
187    stdin_arg = subprocess.PIPE
188  else:
189    stdin_arg = None
190
191  p = subprocess.Popen(cmd,
192                       stdin=stdin_arg,
193                       stdout=subprocess.PIPE,
194                       stderr=subprocess.PIPE,
195                       shell=shell)
196  stdout,stderr = p.communicate(input=stdin)
197
198  if verbose:
199    if (stdout):
200      print("STDOUT: [[[\n%s]]]" % ''.join(stdout))
201  if (stderr):
202    print("STDERR: [[[\n%s]]]" % ''.join(stderr))
203
204  return stdout, stderr
205
206
207_next_unique_basename_count = 0
208
209def next_unique_basename(prefix):
210  global _next_unique_basename_count
211  _next_unique_basename_count += 1
212  return '_'.join((prefix, str(_next_unique_basename_count)))
213
214
215si_units = [
216    (1000 ** 5, 'P'),
217    (1000 ** 4, 'T'),
218    (1000 ** 3, 'G'),
219    (1000 ** 2, 'M'),
220    (1000 ** 1, 'K'),
221    (1000 ** 0, ''),
222    ]
223def n_label(n):
224    """(stolen from hurry.filesize)"""
225    for factor, suffix in si_units:
226        if n >= factor:
227            break
228    amount = int(n/factor)
229    if isinstance(suffix, tuple):
230        singular, multiple = suffix
231        if amount == 1:
232            suffix = singular
233        else:
234            suffix = multiple
235    return str(amount) + suffix
236
237
238def split_arg_once(l_r, sep):
239  if not l_r:
240    return (None, None)
241  if sep in l_r:
242    l, r = l_r.split(sep)
243  else:
244    l = l_r
245    r = None
246  if not l:
247    l = None
248  if not r:
249    r = None
250  return (l, r)
251
252RUN_KIND_SEPARATORS=('@', ',', 'x')
253
254class RunKind:
255  def __init__(self, b_r_l_s):
256    b_r, l_s = split_arg_once(b_r_l_s, RUN_KIND_SEPARATORS[1])
257    self.branch, self.revision = split_arg_once(b_r, RUN_KIND_SEPARATORS[0])
258    self.levels, self.spread = split_arg_once(l_s, RUN_KIND_SEPARATORS[2])
259    if self.levels: self.levels = int(self.levels)
260    if self.spread: self.spread = int(self.spread)
261
262  def label(self):
263    label_parts = []
264    if self.branch:
265      label_parts.append(self.branch)
266    if self.revision:
267      label_parts.append(RUN_KIND_SEPARATORS[0])
268      label_parts.append(self.revision)
269    if self.levels or self.spread:
270      label_parts.append(RUN_KIND_SEPARATORS[1])
271      if self.levels:
272        label_parts.append(str(self.levels))
273      if self.spread:
274        label_parts.append(RUN_KIND_SEPARATORS[2])
275        label_parts.append(str(self.spread))
276    return ''.join(label_parts)
277
278  def args(self):
279    return (self.branch, self.revision, self.levels, self.spread)
280
281
282def parse_timings_selections(db, *args):
283  run_kinds = []
284
285  for arg in args:
286    run_kind = RunKind(arg)
287
288    if run_kind.revision == 'each':
289      run_kind.revision = None
290      query = TimingQuery(db, run_kind)
291      for revision in query.get_sorted_revisions():
292        revision_run_kind = copy(run_kind)
293        revision_run_kind.revision = revision
294        run_kinds.append(revision_run_kind)
295    elif run_kind.revision and run_kind.revision.startswith('last'):
296      Nstr = run_kind.revision[4:]
297      if not Nstr:
298        N = 10
299      else:
300        N = int(Nstr)
301      run_kind.revision = None
302      query = TimingQuery(db, run_kind)
303      for revision in query.get_sorted_revisions()[-N:]:
304        revision_run_kind = copy(run_kind)
305        revision_run_kind.revision = revision
306        run_kinds.append(revision_run_kind)
307    else:
308      run_kinds.append(run_kind)
309
310  return run_kinds
311
312def parse_one_timing_selection(db, *args):
313  run_kinds = parse_timings_selections(db, *args)
314  if len(run_kinds) != 1:
315    bail("I need exactly one timings identifier, not '%s'"
316         % (' '.join(*args)))
317  return run_kinds[0]
318
319
320
321
322PATHNAME_VALID_CHARS = "-_.,@%s%s" % (string.ascii_letters, string.digits)
323def filesystem_safe_string(s):
324  return ''.join(c for c in s if c in PATHNAME_VALID_CHARS)
325
326def do_div(ref, val):
327  if ref:
328    return float(val) / float(ref)
329  else:
330    return 0.0
331
332def do_diff(ref, val):
333  return float(val) - float(ref)
334
335
336# ------------------------- database -------------------------
337
338class TimingsDb:
339  def __init__(self, db_path):
340    self.db_path = db_path;
341    self.conn = sqlite3.connect(db_path)
342    self.ensure_tables_created()
343
344  def ensure_tables_created(self):
345    c = self.conn.cursor()
346
347    c.execute("""SELECT name FROM sqlite_master WHERE type='table' AND
348              name='batch'""")
349    if c.fetchone():
350      # exists
351      return
352
353    print('Creating database tables.')
354    c.executescript('''
355        CREATE TABLE batch (
356          batch_id INTEGER PRIMARY KEY AUTOINCREMENT,
357          started TEXT,
358          ended TEXT
359        );
360
361        CREATE TABLE run_kind (
362          run_kind_id INTEGER PRIMARY KEY AUTOINCREMENT,
363          branch TEXT NOT NULL,
364          revision TEXT NOT NULL,
365          wc_levels INTEGER,
366          wc_spread INTEGER,
367          UNIQUE(branch, revision, wc_levels, wc_spread)
368        );
369
370        CREATE TABLE run (
371          run_id INTEGER PRIMARY KEY AUTOINCREMENT,
372          batch_id INTEGER NOT NULL REFERENCES batch(batch_id),
373          run_kind_id INTEGER NOT NULL REFERENCES run_kind(run_kind_id),
374          started TEXT,
375          ended TEXT,
376          aborted INTEGER
377        );
378
379        CREATE TABLE timings (
380          run_id INTEGER NOT NULL REFERENCES run(run_id),
381          command TEXT NOT NULL,
382          sequence INTEGER,
383          timing REAL
384        );'''
385      )
386    self.conn.commit()
387    c.close();
388
389
390class Batch:
391  def __init__(self, db):
392    self.db = db
393    self.started = time_str()
394    c = db.conn.cursor()
395    c.execute("INSERT INTO batch (started) values (?)", (self.started,))
396    db.conn.commit()
397    self.id = c.lastrowid
398    c.close()
399
400  def done(self):
401    conn = self.db.conn
402    c = conn.cursor()
403    c.execute("""
404        UPDATE batch
405        SET ended = ?
406        WHERE batch_id = ?""",
407        (time_str(), self.id))
408    conn.commit()
409    c.close()
410
411class Run:
412  def __init__(self, batch, run_kind):
413    self.batch = batch
414    conn = self.batch.db.conn
415    c = conn.cursor()
416
417    c.execute("""
418        SELECT run_kind_id FROM run_kind
419        WHERE branch = ?
420          AND revision = ?
421          AND wc_levels = ?
422          AND wc_spread = ?""",
423        run_kind.args())
424    kind_ids = c.fetchone()
425    if kind_ids:
426      kind_id = kind_ids[0]
427    else:
428      c.execute("""
429          INSERT INTO run_kind (branch, revision, wc_levels, wc_spread)
430          VALUES (?, ?, ?, ?)""",
431          run_kind.args())
432      conn.commit()
433      kind_id = c.lastrowid
434
435    self.started = time_str()
436
437    c.execute("""
438        INSERT INTO run
439          (batch_id, run_kind_id, started)
440        VALUES
441          (?, ?, ?)""",
442        (self.batch.id, kind_id, self.started))
443    conn.commit()
444    self.id = c.lastrowid
445    c.close();
446    self.tic_at = None
447    self.current_command = None
448    self.timings = []
449
450  def tic(self, command):
451    if command in IGNORE_COMMANDS:
452      return
453    self.toc()
454    self.current_command = command
455    self.tic_at = datetime.datetime.now()
456
457  def toc(self):
458    if self.current_command and self.tic_at:
459      toc_at = datetime.datetime.now()
460      self.remember_timing(self.current_command,
461                         timedelta_to_seconds(toc_at - self.tic_at))
462    self.current_command = None
463    self.tic_at = None
464
465  def remember_timing(self, command, seconds):
466    self.timings.append((command, seconds))
467
468  def submit_timings(self):
469    conn = self.batch.db.conn
470    c = conn.cursor()
471    print('submitting...')
472
473    c.executemany("""
474      INSERT INTO timings
475        (run_id, command, sequence, timing)
476      VALUES
477        (?, ?, ?, ?)""",
478      [(self.id, t[0], (i + 1), t[1]) for i,t in enumerate(self.timings)])
479
480    conn.commit()
481    c.close()
482
483  def done(self, aborted=False):
484    conn = self.batch.db.conn
485    c = conn.cursor()
486    c.execute("""
487        UPDATE run
488        SET ended = ?, aborted = ?
489        WHERE run_id = ?""",
490        (time_str(), aborted, self.id))
491    conn.commit()
492    c.close()
493
494
495class TimingQuery:
496  def __init__(self, db, run_kind):
497    self.cursor = db.conn.cursor()
498    self.constraints = []
499    self.values = []
500    self.timings = None
501    self.FROM_WHERE = """
502         FROM batch AS b,
503              timings AS t,
504              run AS r,
505              run_kind as k
506         WHERE
507              t.run_id = r.run_id
508              AND k.run_kind_id = r.run_kind_id
509              AND b.batch_id = r.batch_id
510              AND r.aborted = 0
511         """
512    self.append_constraint('k.branch', run_kind.branch)
513    self.each_revision = False
514    if run_kind.revision == 'each':
515      self.each_revision = True
516    else:
517      self.append_constraint('k.revision', run_kind.revision)
518    self.append_constraint('k.wc_levels', run_kind.levels)
519    self.append_constraint('k.wc_spread', run_kind.spread)
520    self.label = run_kind.label()
521
522  def append_constraint(self, column_name, val):
523    if val:
524      self.constraints.append('AND %s = ?' % column_name)
525      self.values.append(val)
526
527  def remove_last_constraint(self):
528    del self.constraints[-1]
529    del self.values[-1]
530
531  def get_sorted_X(self, x, n=1):
532    query = ['SELECT DISTINCT %s' % x,
533             self.FROM_WHERE ]
534    query.extend(self.constraints)
535    query.append('ORDER BY %s' % x)
536    c = db.conn.cursor()
537    try:
538      c.execute(' '.join(query), self.values)
539      if n == 1:
540        return [tpl[0] for tpl in c.fetchall()]
541      else:
542        return c.fetchall()
543    finally:
544      c.close()
545
546  def get_sorted_command_names(self):
547    return self.get_sorted_X('t.command')
548
549  def get_sorted_branches(self):
550    return self.get_sorted_X('k.branch')
551
552  def get_sorted_revisions(self):
553    return self.get_sorted_X('k.revision')
554
555  def get_sorted_levels_spread(self):
556    return self.get_sorted_X('k.wc_levels,k.wc_spread', n = 2)
557
558  def count_runs_batches(self):
559    query = ["""SELECT
560                  count(DISTINCT r.run_id),
561                  count(DISTINCT b.batch_id)""",
562             self.FROM_WHERE ]
563    query.extend(self.constraints)
564    c = db.conn.cursor()
565    try:
566      #print ' '.join(query)
567      c.execute(' '.join(query), self.values)
568      return c.fetchone()
569    finally:
570      c.close()
571
572  def get_command_timings(self, command):
573    query = ["""SELECT
574                  count(t.timing),
575                  min(t.timing),
576                  max(t.timing),
577                  avg(t.timing)""",
578             self.FROM_WHERE ]
579    self.append_constraint('t.command', command)
580    try:
581      query.extend(self.constraints)
582      c = db.conn.cursor()
583      try:
584        c.execute(' '.join(query), self.values)
585        return c.fetchone()
586      finally:
587        c.close()
588    finally:
589      self.remove_last_constraint()
590
591  def get_timings(self):
592    if self.timings:
593      return self.timings
594    self.timings = {}
595    for command_name in self.get_sorted_command_names():
596      self.timings[command_name] = self.get_command_timings(command_name)
597    return self.timings
598
599
600# ------------------------------------------------------------ run tests
601
602
603def perform_run(batch, run_kind,
604                svn_bin, svnadmin_bin, verbose):
605
606  run = Run(batch, run_kind)
607
608  def create_tree(in_dir, _levels, _spread):
609    try:
610      os.mkdir(in_dir)
611    except:
612      pass
613
614    for i in range(_spread):
615      # files
616      fn = j(in_dir, next_unique_basename('file'))
617      f = open(fn, 'w')
618      f.write('This is %s\n' % fn)
619      f.close()
620
621      # dirs
622      if (_levels > 1):
623        dn = j(in_dir, next_unique_basename('dir'))
624        create_tree(dn, _levels - 1, _spread)
625
626  def svn(*args):
627    name = args[0]
628
629    cmd = [ svn_bin ]
630    cmd.extend( list(args) )
631    if verbose:
632      print('svn cmd:', ' '.join(cmd))
633
634    stdin = None
635    if stdin:
636      stdin_arg = subprocess.PIPE
637    else:
638      stdin_arg = None
639
640    run.tic(name)
641    try:
642      p = subprocess.Popen(cmd,
643                           stdin=stdin_arg,
644                           stdout=subprocess.PIPE,
645                           stderr=subprocess.PIPE,
646                           shell=False)
647      stdout,stderr = p.communicate(input=stdin)
648    except OSError:
649      stdout = stderr = None
650    finally:
651      run.toc()
652
653    if verbose:
654      if (stdout):
655        print("STDOUT: [[[\n%s]]]" % ''.join(stdout))
656      if (stderr):
657        print("STDERR: [[[\n%s]]]" % ''.join(stderr))
658
659    return stdout,stderr
660
661
662  def add(*args):
663    return svn('add', *args)
664
665  def ci(*args):
666    return svn('commit', '-mm', *args)
667
668  def up(*args):
669    return svn('update', *args)
670
671  def st(*args):
672    return svn('status', *args)
673
674  def info(*args):
675    return svn('info', *args)
676
677  _chars = [chr(x) for x in range(ord('a'), ord('z') +1)]
678
679  def randstr(len=8):
680    return ''.join( [random.choice(_chars) for i in range(len)] )
681
682  def _copy(path):
683    dest = next_unique_basename(path + '_copied')
684    svn('copy', path, dest)
685
686  def _move(path):
687    dest = path + '_moved'
688    svn('move', path, dest)
689
690  def _propmod(path):
691    so, se = svn('proplist', path)
692    propnames = [line.strip() for line in so.strip().split('\n')[1:]]
693
694    # modify?
695    if len(propnames):
696      svn('ps', propnames[len(propnames) / 2], randstr(), path)
697
698    # del?
699    if len(propnames) > 1:
700      svn('propdel', propnames[len(propnames) / 2], path)
701
702  def _propadd(path):
703    # set a new one.
704    svn('propset', randstr(), randstr(), path)
705
706  def _mod(path):
707    if os.path.isdir(path):
708      _propmod(path)
709      return
710
711    f = open(path, 'a')
712    f.write('\n%s\n' % randstr())
713    f.close()
714
715  def _add(path):
716    if os.path.isfile(path):
717      return _mod(path)
718
719    if random.choice((True, False)):
720      # create a dir
721      svn('mkdir', j(path, next_unique_basename('new_dir')))
722    else:
723      # create a file
724      new_path = j(path, next_unique_basename('new_file'))
725      f = open(new_path, 'w')
726      f.write(randstr())
727      f.close()
728      svn('add', new_path)
729
730  def _del(path):
731    svn('delete', path)
732
733  _mod_funcs = (_mod, _add, _propmod, _propadd, )#_copy,) # _move, _del)
734
735  def modify_tree(in_dir, fraction):
736    child_names = os.listdir(in_dir)
737    for child_name in child_names:
738      if child_name[0] == '.':
739        continue
740      if random.random() < fraction:
741        path = j(in_dir, child_name)
742        random.choice(_mod_funcs)(path)
743
744    for child_name in child_names:
745      if child_name[0] == '.': continue
746      path = j(in_dir, child_name)
747      if os.path.isdir(path):
748        modify_tree(path, fraction)
749
750  def propadd_tree(in_dir, fraction):
751    for child_name in os.listdir(in_dir):
752      if child_name[0] == '.': continue
753      path = j(in_dir, child_name)
754      if random.random() < fraction:
755        _propadd(path)
756      if os.path.isdir(path):
757        propadd_tree(path, fraction)
758
759
760  def rmtree_onerror(func, path, exc_info):
761    """Error handler for ``shutil.rmtree``.
762
763    If the error is due to an access error (read only file)
764    it attempts to add write permission and then retries.
765
766    If the error is for another reason it re-raises the error.
767
768    Usage : ``shutil.rmtree(path, onerror=onerror)``
769    """
770    if not os.access(path, os.W_OK):
771      # Is the error an access error ?
772      os.chmod(path, stat.S_IWUSR)
773      func(path)
774    else:
775      raise
776
777  base = tempfile.mkdtemp()
778
779  # ensure identical modifications for every run
780  random.seed(0)
781
782  aborted = True
783
784  try:
785    repos = j(base, 'repos')
786    repos = repos.replace('\\', '/')
787    wc = j(base, 'wc')
788    wc2 = j(base, 'wc2')
789
790    if repos.startswith('/'):
791      file_url = 'file://%s' % repos
792    else:
793      file_url = 'file:///%s' % repos
794
795    print('\nRunning svn benchmark in', base)
796    print('dir levels: %s; new files and dirs per leaf: %s' %(
797          run_kind.levels, run_kind.spread))
798
799    started = datetime.datetime.now()
800
801    try:
802      run_cmd([svnadmin_bin, 'create', repos])
803      svn('checkout', file_url, wc)
804
805      trunk = j(wc, 'trunk')
806      create_tree(trunk, run_kind.levels, run_kind.spread)
807      add(trunk)
808      st(wc)
809      ci(wc)
810      up(wc)
811      propadd_tree(trunk, 0.05)
812      ci(wc)
813      up(wc)
814      st(wc)
815      info('-R', wc)
816
817      trunk_url = file_url + '/trunk'
818      branch_url = file_url + '/branch'
819
820      svn('copy', '-mm', trunk_url, branch_url)
821      st(wc)
822
823      up(wc)
824      st(wc)
825      info('-R', wc)
826
827      svn('checkout', trunk_url, wc2)
828      st(wc2)
829      modify_tree(wc2, 0.5)
830      st(wc2)
831      ci(wc2)
832      up(wc2)
833      up(wc)
834
835      svn('switch', branch_url, wc2)
836      modify_tree(wc2, 0.5)
837      st(wc2)
838      info('-R', wc2)
839      ci(wc2)
840      up(wc2)
841      up(wc)
842
843      modify_tree(trunk, 0.5)
844      st(wc)
845      ci(wc)
846      up(wc2)
847      up(wc)
848
849      svn('merge', '--accept=postpone', trunk_url, wc2)
850      st(wc2)
851      info('-R', wc2)
852      svn('resolve', '--accept=mine-conflict', wc2)
853      st(wc2)
854      svn('resolved', '-R', wc2)
855      st(wc2)
856      info('-R', wc2)
857      ci(wc2)
858      up(wc2)
859      up(wc)
860
861      svn('merge', '--accept=postpone', '--reintegrate', branch_url, trunk)
862      st(wc)
863      svn('resolve', '--accept=mine-conflict', wc)
864      st(wc)
865      svn('resolved', '-R', wc)
866      st(wc)
867      ci(wc)
868      up(wc2)
869      up(wc)
870
871      svn('delete', j(wc, 'branch'))
872      ci(wc)
873      up(wc)
874
875      aborted = False
876
877    finally:
878      stopped = datetime.datetime.now()
879      print('\nDone with svn benchmark in', (stopped - started))
880
881      run.remember_timing(TOTAL_RUN,
882                        timedelta_to_seconds(stopped - started))
883  finally:
884    run.done(aborted)
885    run.submit_timings()
886    shutil.rmtree(base, onerror=rmtree_onerror)
887
888  return aborted
889
890
891# ---------------------------------------------------------------------
892
893
894def cmdline_run(db, options, run_kind_str, N=1):
895  run_kind = parse_one_timing_selection(db, run_kind_str)
896
897  N = int(N)
898
899  print('Hi, going to run a Subversion benchmark series of %d runs...' % N)
900  print('Label is %s' % run_kind.label())
901
902  # can we run the svn binaries?
903  svn_bin = j(options.svn_bin_dir, 'svn')
904  svnadmin_bin = j(options.svn_bin_dir, 'svnadmin')
905
906  for b in (svn_bin, svnadmin_bin):
907    so,se = run_cmd([b, '--version'])
908    if not so:
909      bail("Can't run %s" % b)
910
911    print(', '.join([s.strip() for s in so.split('\n')[:2]]))
912
913  batch = Batch(db)
914
915  for i in range(N):
916    print('Run %d of %d' % (i + 1, N))
917    perform_run(batch, run_kind,
918                svn_bin, svnadmin_bin, options.verbose)
919
920  batch.done()
921
922
923def cmdline_list(db, options, *args):
924  run_kinds = parse_timings_selections(db, *args)
925
926  for run_kind in run_kinds:
927
928    constraints = []
929    def add_if_not_none(name, val):
930      if val:
931        constraints.append('  %s = %s' % (name, val))
932    add_if_not_none('branch', run_kind.branch)
933    add_if_not_none('revision', run_kind.revision)
934    add_if_not_none('levels', run_kind.levels)
935    add_if_not_none('spread', run_kind.spread)
936    if constraints:
937      print('For\n', '\n'.join(constraints))
938    print('I found:')
939
940    d = TimingQuery(db, run_kind)
941
942    cmd_names = d.get_sorted_command_names()
943    if cmd_names:
944      print('\n%d command names:\n ' % len(cmd_names), '\n  '.join(cmd_names))
945
946    branches = d.get_sorted_branches()
947    if branches and (len(branches) > 1 or branches[0] != run_kind.branch):
948      print('\n%d branches:\n ' % len(branches), '\n  '.join(branches))
949
950    revisions = d.get_sorted_revisions()
951    if revisions and (len(revisions) > 1 or revisions[0] != run_kind.revision):
952      print('\n%d revisions:\n ' % len(revisions), '\n  '.join(revisions))
953
954    levels_spread = d.get_sorted_levels_spread()
955    if levels_spread and (
956         len(levels_spread) > 1
957         or levels_spread[0] != (run_kind.levels, run_kind.spread)):
958      print('\n%d kinds of levels x spread:\n ' % len(levels_spread), '\n  '.join(
959              [ ('%dx%d' % (l, s)) for l,s in levels_spread ]))
960
961    print("\n%d runs in %d batches.\n" % (d.count_runs_batches()))
962
963
964def cmdline_show(db, options, *run_kind_strings):
965  run_kinds = parse_timings_selections(db, *run_kind_strings)
966  for run_kind in run_kinds:
967    q = TimingQuery(db, run_kind)
968    timings = q.get_timings()
969
970    s = []
971    s.append('Timings for %s' % run_kind.label())
972    s.append('   N    min     max     avg   operation  (unit is seconds)')
973
974    for command_name in q.get_sorted_command_names():
975      if options.command_names and command_name not in options.command_names:
976        continue
977      n, tmin, tmax, tavg = timings[command_name]
978
979      s.append('%4s %7.2f %7.2f %7.2f  %s' % (
980                 n_label(n),
981                 tmin,
982                 tmax,
983                 tavg,
984                 command_name))
985
986    print('\n'.join(s))
987
988
989def cmdline_compare(db, options, *args):
990  run_kinds = parse_timings_selections(db, *args)
991  if len(run_kinds) < 2:
992    bail("Need at least two sets of timings to compare.")
993
994
995  left_kind = run_kinds[0]
996  leftq = TimingQuery(db, left_kind)
997  left = leftq.get_timings()
998  if not left:
999    bail("No timings for %s" % left_kind.label())
1000
1001  for run_kind_idx in range(1, len(run_kinds)):
1002    right_kind = run_kinds[run_kind_idx]
1003
1004    rightq = TimingQuery(db, right_kind)
1005    right = rightq.get_timings()
1006    if not right:
1007      print("No timings for %s" % right_kind.label())
1008      continue
1009
1010    label = 'Compare %s to %s' % (right_kind.label(), left_kind.label())
1011
1012    s = [label]
1013
1014    verbose = options.verbose
1015    if not verbose:
1016      s.append('       N        avg         operation')
1017    else:
1018      s.append('       N        min              max              avg         operation')
1019
1020    command_names = [name for name in leftq.get_sorted_command_names()
1021                     if name in right]
1022    if options.command_names:
1023      command_names = [name for name in command_names
1024                       if name in options.command_names]
1025
1026    for command_name in command_names:
1027      left_N, left_min, left_max, left_avg = left[command_name]
1028      right_N, right_min, right_max, right_avg = right[command_name]
1029
1030      N_str = '%s/%s' % (n_label(left_N), n_label(right_N))
1031      avg_str = '%7.2f|%+7.3f' % (do_div(left_avg, right_avg),
1032                                  do_diff(left_avg, right_avg))
1033
1034      if not verbose:
1035        s.append('%9s %-16s  %s' % (N_str, avg_str, command_name))
1036      else:
1037        min_str = '%7.2f|%+7.3f' % (do_div(left_min, right_min),
1038                                    do_diff(left_min, right_min))
1039        max_str = '%7.2f|%+7.3f' % (do_div(left_max, right_max),
1040                                    do_diff(left_max, right_max))
1041
1042        s.append('%9s %-16s %-16s %-16s  %s' % (N_str, min_str, max_str, avg_str,
1043                                            command_name))
1044
1045    s.extend([
1046      '(legend: "1.23|+0.45" means: slower by factor 1.23 and by 0.45 seconds;',
1047      ' factor < 1 and seconds < 0 means \'%s\' is faster.'
1048      % right_kind.label(),
1049      ' "2/3" means: \'%s\' has 2 timings on record, the other has 3.)'
1050      % left_kind.label()
1051      ])
1052
1053
1054    print('\n'.join(s))
1055
1056
1057# ------------------------------------------------------- charts
1058
1059def cmdline_chart_compare(db, options, *args):
1060  import matplotlib
1061  matplotlib.use('Agg')
1062  import numpy as np
1063  import matplotlib.pylab as plt
1064
1065  labels = []
1066  timing_sets = []
1067  command_names = None
1068
1069  run_kinds = parse_timings_selections(db, *args)
1070
1071  # iterate the timings selections and accumulate data
1072  for run_kind in run_kinds:
1073    query = TimingQuery(db, run_kind)
1074    timings = query.get_timings()
1075    if not timings:
1076      print("No timings for %s" % run_kind.label())
1077      continue
1078    labels.append(run_kind.label())
1079    timing_sets.append(timings)
1080
1081    # it only makes sense to compare those commands that have timings
1082    # in the first selection, because that is the one everything else
1083    # is compared to. Remember the first selection's command names.
1084    if not command_names:
1085      command_names = query.get_sorted_command_names()
1086
1087
1088  if len(timing_sets) < 2:
1089    bail("Not enough timings")
1090
1091  if options.command_names:
1092    command_names = [name for name in command_names
1093                     if name in options.command_names]
1094
1095  chart_path = options.chart_path
1096  if not chart_path:
1097    chart_path = 'compare_' + '_'.join(
1098      [ filesystem_safe_string(l) for l in labels ]
1099      ) + '.svg'
1100
1101  N = len(command_names)
1102  M = len(timing_sets) - 1
1103  if M < 2:
1104    M = 2
1105
1106  group_positions = np.arange(N)  # the y locations for the groups
1107  dist = 1. / (1. + M)
1108  height = (1. - dist) / M     # the height of the bars
1109
1110  fig = plt.figure(figsize=(12, 5 + 0.2*N*M))
1111  plot1 = fig.add_subplot(121)
1112  plot2 = fig.add_subplot(122)
1113
1114  left = timing_sets[0]
1115
1116  # Iterate timing sets. Each loop produces one bar for each command name
1117  # group.
1118  for label_i,label in enumerate(labels[1:],1):
1119    right = timing_sets[label_i]
1120    if not right:
1121      continue
1122
1123    for cmd_i, command_name in enumerate(command_names):
1124      if command_name not in right:
1125        #skip
1126        continue
1127
1128      left_N, left_min, left_max, left_avg = left[command_name]
1129      right_N, right_min, right_max, right_avg = right[command_name]
1130
1131      div_avg = 100. * (do_div(left_avg, right_avg) - 1.0)
1132      if div_avg <= 0:
1133        col = '#55dd55'
1134      else:
1135        col = '#dd5555'
1136
1137      diff_val = do_diff(left_avg, right_avg)
1138
1139      ofs = (dist + height) / 2. + height * (label_i - 1)
1140
1141      barheight = height * (1.0 - dist)
1142
1143      y = float(cmd_i) + ofs
1144
1145      plot1.barh((y, ),
1146                 (div_avg, ),
1147                 barheight,
1148                 color=col, edgecolor='white')
1149      plot1.text(0., y + height/2.,
1150                 '%s %+5.1f%%' % (label, div_avg),
1151                 ha='right', va='center', size='small',
1152                 rotation=0, family='monospace')
1153
1154      plot2.barh((y, ),
1155                 (diff_val, ),
1156                 barheight,
1157                 color=col, edgecolor='white')
1158      plot2.text(0., y + height/2.,
1159                 '%s %+6.2fs' % (label, diff_val),
1160                 ha='right', va='center', size='small',
1161                 rotation=0, family='monospace')
1162
1163
1164  for p in (plot1, plot2):
1165    xlim = list(p.get_xlim())
1166    if xlim[1] < 10.:
1167      xlim[1] = 10.
1168    # make sure the zero line is far enough right so that the annotations
1169    # fit inside the chart. About half the width should suffice.
1170    if xlim[0] > -xlim[1]:
1171      xlim[0] = -xlim[1]
1172    p.set_xlim(*xlim)
1173    p.set_xticks((0,))
1174    p.set_yticks(group_positions + (height / 2.))
1175    p.set_yticklabels(())
1176    p.set_ylim((len(command_names), 0))
1177    p.grid()
1178
1179  plot1.set_xticklabels(('+-0%',), rotation=0)
1180  plot1.set_title('Average runtime change from %s in %%' % labels[0],
1181                  size='medium')
1182
1183  plot2.set_xticklabels(('+-0s',), rotation=0)
1184  plot2.set_title('Average runtime change from %s in seconds' % labels[0],
1185                  size='medium')
1186
1187  margin = 1./(2 + N*M)
1188  titlemargin = 0
1189  if options.title:
1190    titlemargin = margin * 1.5
1191
1192  fig.subplots_adjust(left=0.005, right=0.995, wspace=0.3, bottom=margin,
1193                      top=1.0-margin-titlemargin)
1194
1195  ystep = (1.0 - 2.*margin - titlemargin) / len(command_names)
1196
1197  for idx,command_name in enumerate(command_names):
1198    ylabel = '%s\nvs. %.1fs' % (
1199                     command_name,
1200                     left[command_name][3])
1201
1202    ypos=1.0 - margin - titlemargin - ystep/M - ystep * idx
1203    plt.figtext(0.5, ypos,
1204                command_name,
1205                ha='center', va='top',
1206                size='medium', weight='bold')
1207    plt.figtext(0.5, ypos - ystep/(M+1),
1208                '%s\n= %.2fs' % (
1209                  labels[0], left[command_name][3]),
1210                ha='center', va='top',
1211                size='small')
1212
1213  if options.title:
1214    plt.figtext(0.5, 1. - titlemargin/2, options.title, ha='center',
1215                va='center', weight='bold')
1216
1217  plt.savefig(chart_path)
1218  print('wrote chart file:', chart_path)
1219
1220
1221# ------------------------------------------------------------ main
1222
1223
1224# Custom option formatter, keeping newlines in the description.
1225# adapted from:
1226# http://groups.google.com/group/comp.lang.python/msg/09f28e26af0699b1
1227import textwrap
1228class IndentedHelpFormatterWithNL(optparse.IndentedHelpFormatter):
1229  def format_description(self, description):
1230    if not description: return ""
1231    desc_width = self.width - self.current_indent
1232    indent = " "*self.current_indent
1233    bits = description.split('\n')
1234    formatted_bits = [
1235      textwrap.fill(bit,
1236        desc_width,
1237        initial_indent=indent,
1238        subsequent_indent=indent)
1239      for bit in bits]
1240    result = "\n".join(formatted_bits) + "\n"
1241    return result
1242
1243if __name__ == '__main__':
1244  parser = optparse.OptionParser(formatter=IndentedHelpFormatterWithNL())
1245  # -h is automatically added.
1246  ### should probably expand the help for that. and see about -?
1247  parser.add_option('-v', '--verbose', action='store_true', dest='verbose',
1248                    help='Verbose operation')
1249  parser.add_option('-b', '--svn-bin-dir', action='store', dest='svn_bin_dir',
1250                    default='',
1251                    help='Specify directory to find Subversion binaries in')
1252  parser.add_option('-f', '--db-path', action='store', dest='db_path',
1253                    default='benchmark.db',
1254                    help='Specify path to SQLite database file')
1255  parser.add_option('-o', '--chart-path', action='store', dest='chart_path',
1256                    help='Supply a path for chart output.')
1257  parser.add_option('-c', '--command-names', action='store',
1258                    dest='command_names',
1259                    help='Comma separated list of command names to limit to.')
1260  parser.add_option('-t', '--title', action='store',
1261                    dest='title',
1262                    help='For charts, a title to print in the chart graphics.')
1263
1264  parser.set_description(__doc__)
1265  parser.set_usage('')
1266
1267
1268  options, args = parser.parse_args()
1269
1270  def usage(msg=None):
1271    parser.print_help()
1272    if msg:
1273      print("")
1274      print(msg)
1275    bail()
1276
1277  # there should be at least one arg left: the sub-command
1278  if not args:
1279    usage('No command argument supplied.')
1280
1281  cmd = args[0]
1282  del args[0]
1283
1284  db = TimingsDb(options.db_path)
1285
1286  if cmd == 'run':
1287    if len(args) < 1 or len(args) > 2:
1288      usage()
1289    cmdline_run(db, options, *args)
1290
1291  elif cmd == 'compare':
1292    if len(args) < 2:
1293      usage()
1294    cmdline_compare(db, options, *args)
1295
1296  elif cmd == 'list':
1297    cmdline_list(db, options, *args)
1298
1299  elif cmd == 'show':
1300    cmdline_show(db, options, *args)
1301
1302  elif cmd == 'chart':
1303    if 'compare'.startswith(args[0]):
1304      cmdline_chart_compare(db, options, *args[1:])
1305    else:
1306      usage()
1307
1308  else:
1309    usage('Unknown subcommand argument: %s' % cmd)
1310