1#!/usr/bin/env python
2#
3#
4# Licensed to the Apache Software Foundation (ASF) under one
5# or more contributor license agreements.  See the NOTICE file
6# distributed with this work for additional information
7# regarding copyright ownership.  The ASF licenses this file
8# to you under the Apache License, Version 2.0 (the
9# "License"); you may not use this file except in compliance
10# with the License.  You may obtain a copy of the License at
11#
12#   http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing,
15# software distributed under the License is distributed on an
16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17# KIND, either express or implied.  See the License for the
18# specific language governing permissions and limitations
19# under the License.
20#
21#
22
23## See the usage() function for operating instructions. ##
24
25import re
26try:
27  # Python >=2.6
28  from functools import reduce
29except ImportError:
30  # Python <2.6
31  pass
32import sys
33import operator
34
35_re_trail = re.compile('\((?P<txn_body>[a-z_]*), (?P<filename>[a-z_\-./]*), (?P<lineno>[0-9]*), (?P<txn>0|1)\): (?P<ops>.*)')
36_re_table_op = re.compile('\(([a-z]*), ([a-z]*)\)')
37
38_separator = '------------------------------------------------------------\n'
39
40def parse_trails_log(infile):
41  trails = []
42  lineno = 0
43  for line in infile.readlines():
44    m = _re_trail.match(line)
45
46    lineno = lineno + 1
47
48    if not m:
49      sys.stderr.write('Invalid input, line %u:\n%s\n' % (lineno, line))
50      sys.exit(1)
51
52    txn = int(m.group('txn'))
53    if not txn:
54      ### We're not interested in trails that don't use txns at this point.
55      continue
56
57    txn_body = (m.group('txn_body'), m.group('filename'),
58                int(m.group('lineno')))
59    trail = _re_table_op.findall(m.group('ops'))
60    trail.reverse()
61
62    if not trail:
63      sys.stderr.write('Warning!  Empty trail at line %u:\n%s' % (lineno, line))
64
65    trails.append((txn_body, trail))
66
67  return trails
68
69
70def output_summary(trails, outfile):
71  ops = []
72  for (txn_body, trail) in trails:
73    ops.append(len(trail))
74  ops.sort()
75
76  total_trails = len(ops)
77  total_ops = reduce(operator.add, ops)
78  max_ops = ops[-1]
79  median_ops = ops[total_trails / 2]
80  average_ops = float(total_ops) / total_trails
81
82  outfile.write(_separator)
83  outfile.write('Summary\n')
84  outfile.write(_separator)
85  outfile.write('Total number of trails: %10i\n' % total_trails)
86  outfile.write('Total number of ops:    %10i\n' % total_ops)
87  outfile.write('max ops/trail:          %10i\n' % max_ops)
88  outfile.write('median ops/trail:       %10i\n' % median_ops)
89  outfile.write('average ops/trail:      %10.2f\n' % average_ops)
90  outfile.write('\n')
91
92
93# custom compare function
94def _freqtable_cmp(a_b, c_d):
95  (a, b) = a_b
96  (c, d) = c_d
97  c = cmp(d, b)
98  if not c:
99    c = cmp(a, c)
100  return c
101
102def list_frequencies(list):
103  """
104  Given a list, return a list composed of (item, frequency)
105  in sorted order
106  """
107
108  counter = {}
109  for item in list:
110    counter[item] = counter.get(item, 0) + 1
111
112  frequencies = list(counter.items())
113  frequencies.sort(_freqtable_cmp)
114
115  return frequencies
116
117
118def output_trail_length_frequencies(trails, outfile):
119  ops = []
120  for (txn_body, trail) in trails:
121    ops.append(len(trail))
122
123  total_trails = len(ops)
124  frequencies = list_frequencies(ops)
125
126  outfile.write(_separator)
127  outfile.write('Trail length frequencies\n')
128  outfile.write(_separator)
129  outfile.write('ops/trail   frequency   percentage\n')
130  for (r, f) in frequencies:
131    p = float(f) * 100 / total_trails
132    outfile.write('%4i         %6i       %5.2f\n' % (r, f, p))
133  outfile.write('\n')
134
135
136def output_trail(outfile, trail, column = 0):
137  ### Output the trail itself, in its own column
138
139  if len(trail) == 0:
140    outfile.write('<empty>\n')
141    return
142
143  line = str(trail[0])
144  for op in trail[1:]:
145    op_str = str(op)
146    if len(line) + len(op_str) > 75 - column:
147      outfile.write('%s,\n' % line)
148      outfile.write(''.join(' ' * column))
149      line = op_str
150    else:
151      line = line + ', ' + op_str
152  outfile.write('%s\n' % line)
153
154  outfile.write('\n')
155
156
157def output_trail_frequencies(trails, outfile):
158
159  total_trails = len(trails)
160
161  ttrails = []
162  for (txn_body, trail) in trails:
163    ttrails.append((txn_body, tuple(trail)))
164
165  frequencies = list_frequencies(ttrails)
166
167  outfile.write(_separator)
168  outfile.write('Trail frequencies\n')
169  outfile.write(_separator)
170  outfile.write('frequency   percentage   ops/trail   trail\n')
171  for (((txn_body, file, line), trail), f) in frequencies:
172    p = float(f) * 100 / total_trails
173    outfile.write('-- %s - %s:%u --\n' % (txn_body, file, line))
174    outfile.write('%6i        %5.2f       %4i       ' % (f, p, len(trail)))
175    output_trail(outfile, trail, 37)
176
177
178def output_txn_body_frequencies(trails, outfile):
179  bodies = []
180  for (txn_body, trail) in trails:
181    bodies.append(txn_body)
182
183  total_trails = len(trails)
184  frequencies = list_frequencies(bodies)
185
186  outfile.write(_separator)
187  outfile.write('txn_body frequencies\n')
188  outfile.write(_separator)
189  outfile.write('frequency   percentage   txn_body\n')
190  for ((txn_body, file, line), f) in frequencies:
191    p = float(f) * 100 / total_trails
192    outfile.write('%6i        %5.2f       %s - %s:%u\n'
193                  % (f, p, txn_body, file, line))
194
195
196def usage(pgm):
197  w = sys.stderr.write
198  w("%s: a program for analyzing Subversion trail usage statistics.\n" % pgm)
199  w("\n")
200  w("Usage:\n")
201  w("\n")
202  w("   Compile Subversion with -DSVN_FS__TRAIL_DEBUG, which will cause it\n")
203  w("   it to print trail statistics to stderr.  Save the stats to a file,\n")
204  w("   invoke %s on the file, and ponder the output.\n" % pgm)
205  w("\n")
206
207
208if __name__ == '__main__':
209  if len(sys.argv) > 2:
210    sys.stderr.write("Error: too many arguments\n\n")
211    usage(sys.argv[0])
212    sys.exit(1)
213
214  if len(sys.argv) == 1:
215    infile = sys.stdin
216  else:
217    try:
218      infile = open(sys.argv[1])
219    except (IOError):
220      sys.stderr.write("Error: unable to open '%s'\n\n" % sys.argv[1])
221      usage(sys.argv[0])
222      sys.exit(1)
223
224  trails = parse_trails_log(infile)
225
226  output_summary(trails, sys.stdout)
227  output_trail_length_frequencies(trails, sys.stdout)
228  output_trail_frequencies(trails, sys.stdout)
229  output_txn_body_frequencies(trails, sys.stdout)
230