1#!/usr/bin/env python
2#
3#
4# Licensed to the Apache Software Foundation (ASF) under one
5# or more contributor license agreements.  See the NOTICE file
6# distributed with this work for additional information
7# regarding copyright ownership.  The ASF licenses this file
8# to you under the Apache License, Version 2.0 (the
9# "License"); you may not use this file except in compliance
10# with the License.  You may obtain a copy of the License at
11#
12#   http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing,
15# software distributed under the License is distributed on an
16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17# KIND, either express or implied.  See the License for the
18# specific language governing permissions and limitations
19# under the License.
20#
21#
22
23import sys
24import re
25
26header_re = re.compile(r'^([^:]*): ?(.*)$')
27
28class NodePath:
29    def __init__(self, path, headers):
30        self.path = path
31        self.headers = headers
32
33    def dump(self):
34        print((' ' * 3) + self.path)
35        headers = sorted(self.headers.keys())
36        for header in headers:
37            print((' ' * 6) + header + ': ' + self.headers[header])
38
39
40def dump_revision(rev, nodepaths):
41    sys.stderr.write('* Normalizing revision ' + rev + '...')
42    print('Revision ' + rev)
43    paths = sorted(nodepaths.keys())
44    for path in paths:
45        nodepath = nodepaths[path]
46        nodepath.dump()
47    sys.stderr.write('done\n')
48
49
50
51def parse_header_block(fp):
52    headers = {}
53    while True:
54        line = fp.readline()
55        if line == '':
56            return headers, 1
57        line = line.strip()
58        if line == '':
59            return headers, 0
60        matches = header_re.match(line)
61        if not matches:
62            raise Exception('Malformed header block')
63        headers[matches.group(1)] = matches.group(2)
64
65
66def parse_file(fp):
67    nodepaths = {}
68    current_rev = None
69
70    while True:
71        # Parse a block of headers
72        headers, eof = parse_header_block(fp)
73
74        # This is a revision header block
75        if 'Revision-number' in headers:
76
77            # If there was a previous revision, dump it
78            if current_rev:
79                dump_revision(current_rev, nodepaths)
80
81            # Reset the data for this revision
82            current_rev = headers['Revision-number']
83            nodepaths = {}
84
85            # Skip the contents
86            prop_len = headers.get('Prop-content-length', 0)
87            fp.read(int(prop_len))
88
89        # This is a node header block
90        elif 'Node-path' in headers:
91
92            # Make a new NodePath object, and add it to the
93            # dictionary thereof
94            path = headers['Node-path']
95            node = NodePath(path, headers)
96            nodepaths[path] = node
97
98            # Skip the content
99            text_len = headers.get('Text-content-length', 0)
100            prop_len = headers.get('Prop-content-length', 0)
101            fp.read(int(text_len) + int(prop_len))
102
103        # Not a revision, not a node -- if we've already seen at least
104        # one revision block, we are in an errorful state.
105        elif current_rev and len(headers.keys()):
106            raise Exception('Header block from outta nowhere')
107
108        if eof:
109            if current_rev:
110                dump_revision(current_rev, nodepaths)
111            break
112
113def usage():
114    print('Usage: ' + sys.argv[0] + ' [DUMPFILE]')
115    print('')
116    print('Reads a Subversion dumpfile from DUMPFILE (or, if not provided,')
117    print('from stdin) and normalizes the metadata contained therein,')
118    print('printing summarized and sorted information.  This is useful for')
119    print('generating data about dumpfiles in a diffable fashion.')
120    sys.exit(0)
121
122def main():
123    if len(sys.argv) > 1:
124        if sys.argv[1] == '--help':
125            usage()
126        fp = open(sys.argv[1], 'rb')
127    else:
128        fp = sys.stdin
129    parse_file(fp)
130
131
132if __name__ == '__main__':
133    main()
134
135
136
137
138