1#!/usr/bin/env python 2# 3# 4# Licensed to the Apache Software Foundation (ASF) under one 5# or more contributor license agreements. See the NOTICE file 6# distributed with this work for additional information 7# regarding copyright ownership. The ASF licenses this file 8# to you under the Apache License, Version 2.0 (the 9# "License"); you may not use this file except in compliance 10# with the License. You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, 15# software distributed under the License is distributed on an 16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17# KIND, either express or implied. See the License for the 18# specific language governing permissions and limitations 19# under the License. 20# 21# 22 23## See the usage() function for operating instructions. ## 24 25import re 26try: 27 # Python >=2.6 28 from functools import reduce 29except ImportError: 30 # Python <2.6 31 pass 32import sys 33import operator 34 35_re_trail = re.compile('\((?P<txn_body>[a-z_]*), (?P<filename>[a-z_\-./]*), (?P<lineno>[0-9]*), (?P<txn>0|1)\): (?P<ops>.*)') 36_re_table_op = re.compile('\(([a-z]*), ([a-z]*)\)') 37 38_separator = '------------------------------------------------------------\n' 39 40def parse_trails_log(infile): 41 trails = [] 42 lineno = 0 43 for line in infile.readlines(): 44 m = _re_trail.match(line) 45 46 lineno = lineno + 1 47 48 if not m: 49 sys.stderr.write('Invalid input, line %u:\n%s\n' % (lineno, line)) 50 sys.exit(1) 51 52 txn = int(m.group('txn')) 53 if not txn: 54 ### We're not interested in trails that don't use txns at this point. 55 continue 56 57 txn_body = (m.group('txn_body'), m.group('filename'), 58 int(m.group('lineno'))) 59 trail = _re_table_op.findall(m.group('ops')) 60 trail.reverse() 61 62 if not trail: 63 sys.stderr.write('Warning! Empty trail at line %u:\n%s' % (lineno, line)) 64 65 trails.append((txn_body, trail)) 66 67 return trails 68 69 70def output_summary(trails, outfile): 71 ops = [] 72 for (txn_body, trail) in trails: 73 ops.append(len(trail)) 74 ops.sort() 75 76 total_trails = len(ops) 77 total_ops = reduce(operator.add, ops) 78 max_ops = ops[-1] 79 median_ops = ops[total_trails / 2] 80 average_ops = float(total_ops) / total_trails 81 82 outfile.write(_separator) 83 outfile.write('Summary\n') 84 outfile.write(_separator) 85 outfile.write('Total number of trails: %10i\n' % total_trails) 86 outfile.write('Total number of ops: %10i\n' % total_ops) 87 outfile.write('max ops/trail: %10i\n' % max_ops) 88 outfile.write('median ops/trail: %10i\n' % median_ops) 89 outfile.write('average ops/trail: %10.2f\n' % average_ops) 90 outfile.write('\n') 91 92 93# custom compare function 94def _freqtable_cmp(a_b, c_d): 95 (a, b) = a_b 96 (c, d) = c_d 97 c = cmp(d, b) 98 if not c: 99 c = cmp(a, c) 100 return c 101 102def list_frequencies(list): 103 """ 104 Given a list, return a list composed of (item, frequency) 105 in sorted order 106 """ 107 108 counter = {} 109 for item in list: 110 counter[item] = counter.get(item, 0) + 1 111 112 frequencies = list(counter.items()) 113 frequencies.sort(_freqtable_cmp) 114 115 return frequencies 116 117 118def output_trail_length_frequencies(trails, outfile): 119 ops = [] 120 for (txn_body, trail) in trails: 121 ops.append(len(trail)) 122 123 total_trails = len(ops) 124 frequencies = list_frequencies(ops) 125 126 outfile.write(_separator) 127 outfile.write('Trail length frequencies\n') 128 outfile.write(_separator) 129 outfile.write('ops/trail frequency percentage\n') 130 for (r, f) in frequencies: 131 p = float(f) * 100 / total_trails 132 outfile.write('%4i %6i %5.2f\n' % (r, f, p)) 133 outfile.write('\n') 134 135 136def output_trail(outfile, trail, column = 0): 137 ### Output the trail itself, in its own column 138 139 if len(trail) == 0: 140 outfile.write('<empty>\n') 141 return 142 143 line = str(trail[0]) 144 for op in trail[1:]: 145 op_str = str(op) 146 if len(line) + len(op_str) > 75 - column: 147 outfile.write('%s,\n' % line) 148 outfile.write(''.join(' ' * column)) 149 line = op_str 150 else: 151 line = line + ', ' + op_str 152 outfile.write('%s\n' % line) 153 154 outfile.write('\n') 155 156 157def output_trail_frequencies(trails, outfile): 158 159 total_trails = len(trails) 160 161 ttrails = [] 162 for (txn_body, trail) in trails: 163 ttrails.append((txn_body, tuple(trail))) 164 165 frequencies = list_frequencies(ttrails) 166 167 outfile.write(_separator) 168 outfile.write('Trail frequencies\n') 169 outfile.write(_separator) 170 outfile.write('frequency percentage ops/trail trail\n') 171 for (((txn_body, file, line), trail), f) in frequencies: 172 p = float(f) * 100 / total_trails 173 outfile.write('-- %s - %s:%u --\n' % (txn_body, file, line)) 174 outfile.write('%6i %5.2f %4i ' % (f, p, len(trail))) 175 output_trail(outfile, trail, 37) 176 177 178def output_txn_body_frequencies(trails, outfile): 179 bodies = [] 180 for (txn_body, trail) in trails: 181 bodies.append(txn_body) 182 183 total_trails = len(trails) 184 frequencies = list_frequencies(bodies) 185 186 outfile.write(_separator) 187 outfile.write('txn_body frequencies\n') 188 outfile.write(_separator) 189 outfile.write('frequency percentage txn_body\n') 190 for ((txn_body, file, line), f) in frequencies: 191 p = float(f) * 100 / total_trails 192 outfile.write('%6i %5.2f %s - %s:%u\n' 193 % (f, p, txn_body, file, line)) 194 195 196def usage(pgm): 197 w = sys.stderr.write 198 w("%s: a program for analyzing Subversion trail usage statistics.\n" % pgm) 199 w("\n") 200 w("Usage:\n") 201 w("\n") 202 w(" Compile Subversion with -DSVN_FS__TRAIL_DEBUG, which will cause it\n") 203 w(" it to print trail statistics to stderr. Save the stats to a file,\n") 204 w(" invoke %s on the file, and ponder the output.\n" % pgm) 205 w("\n") 206 207 208if __name__ == '__main__': 209 if len(sys.argv) > 2: 210 sys.stderr.write("Error: too many arguments\n\n") 211 usage(sys.argv[0]) 212 sys.exit(1) 213 214 if len(sys.argv) == 1: 215 infile = sys.stdin 216 else: 217 try: 218 infile = open(sys.argv[1]) 219 except (IOError): 220 sys.stderr.write("Error: unable to open '%s'\n\n" % sys.argv[1]) 221 usage(sys.argv[0]) 222 sys.exit(1) 223 224 trails = parse_trails_log(infile) 225 226 output_summary(trails, sys.stdout) 227 output_trail_length_frequencies(trails, sys.stdout) 228 output_trail_frequencies(trails, sys.stdout) 229 output_txn_body_frequencies(trails, sys.stdout) 230