1#!/usr/bin/env python 2# 3# 4# Licensed to the Apache Software Foundation (ASF) under one 5# or more contributor license agreements. See the NOTICE file 6# distributed with this work for additional information 7# regarding copyright ownership. The ASF licenses this file 8# to you under the Apache License, Version 2.0 (the 9# "License"); you may not use this file except in compliance 10# with the License. You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, 15# software distributed under the License is distributed on an 16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17# KIND, either express or implied. See the License for the 18# specific language governing permissions and limitations 19# under the License. 20# 21# 22 23import sys 24import re 25 26header_re = re.compile(r'^([^:]*): ?(.*)$') 27 28class NodePath: 29 def __init__(self, path, headers): 30 self.path = path 31 self.headers = headers 32 33 def dump(self): 34 print((' ' * 3) + self.path) 35 headers = sorted(self.headers.keys()) 36 for header in headers: 37 print((' ' * 6) + header + ': ' + self.headers[header]) 38 39 40def dump_revision(rev, nodepaths): 41 sys.stderr.write('* Normalizing revision ' + rev + '...') 42 print('Revision ' + rev) 43 paths = sorted(nodepaths.keys()) 44 for path in paths: 45 nodepath = nodepaths[path] 46 nodepath.dump() 47 sys.stderr.write('done\n') 48 49 50 51def parse_header_block(fp): 52 headers = {} 53 while True: 54 line = fp.readline() 55 if line == '': 56 return headers, 1 57 line = line.strip() 58 if line == '': 59 return headers, 0 60 matches = header_re.match(line) 61 if not matches: 62 raise Exception('Malformed header block') 63 headers[matches.group(1)] = matches.group(2) 64 65 66def parse_file(fp): 67 nodepaths = {} 68 current_rev = None 69 70 while True: 71 # Parse a block of headers 72 headers, eof = parse_header_block(fp) 73 74 # This is a revision header block 75 if 'Revision-number' in headers: 76 77 # If there was a previous revision, dump it 78 if current_rev: 79 dump_revision(current_rev, nodepaths) 80 81 # Reset the data for this revision 82 current_rev = headers['Revision-number'] 83 nodepaths = {} 84 85 # Skip the contents 86 prop_len = headers.get('Prop-content-length', 0) 87 fp.read(int(prop_len)) 88 89 # This is a node header block 90 elif 'Node-path' in headers: 91 92 # Make a new NodePath object, and add it to the 93 # dictionary thereof 94 path = headers['Node-path'] 95 node = NodePath(path, headers) 96 nodepaths[path] = node 97 98 # Skip the content 99 text_len = headers.get('Text-content-length', 0) 100 prop_len = headers.get('Prop-content-length', 0) 101 fp.read(int(text_len) + int(prop_len)) 102 103 # Not a revision, not a node -- if we've already seen at least 104 # one revision block, we are in an errorful state. 105 elif current_rev and len(headers.keys()): 106 raise Exception('Header block from outta nowhere') 107 108 if eof: 109 if current_rev: 110 dump_revision(current_rev, nodepaths) 111 break 112 113def usage(): 114 print('Usage: ' + sys.argv[0] + ' [DUMPFILE]') 115 print('') 116 print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,') 117 print('from stdin) and normalizes the metadata contained therein,') 118 print('printing summarized and sorted information. This is useful for') 119 print('generating data about dumpfiles in a diffable fashion.') 120 sys.exit(0) 121 122def main(): 123 if len(sys.argv) > 1: 124 if sys.argv[1] == '--help': 125 usage() 126 fp = open(sys.argv[1], 'rb') 127 else: 128 fp = sys.stdin 129 parse_file(fp) 130 131 132if __name__ == '__main__': 133 main() 134 135 136 137 138