1#!/usr/bin/env python 2# 3# 4# Licensed to the Apache Software Foundation (ASF) under one 5# or more contributor license agreements. See the NOTICE file 6# distributed with this work for additional information 7# regarding copyright ownership. The ASF licenses this file 8# to you under the Apache License, Version 2.0 (the 9# "License"); you may not use this file except in compliance 10# with the License. You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, 15# software distributed under the License is distributed on an 16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17# KIND, either express or implied. See the License for the 18# specific language governing permissions and limitations 19# under the License. 20# 21# 22 23# See usage() for details, or run with --help option. 24# 25# .-------------------------------------------------. 26# | "An ad hoc format deserves an ad hoc parser." | 27# `-------------------------------------------------' 28# 29# Some Subversion project log messages include parseable data to help 30# track who's contributing what. The exact syntax is described in 31# http://subversion.apache.org/docs/community-guide/conventions.html#crediting, 32# but here's an example, indented by three spaces, i.e., the "Patch by:" 33# starts at the beginning of a line: 34# 35# Patch by: David Anderson <david.anderson@calixo.net> 36# <justin@erenkrantz.com> 37# me 38# (I wrote the regression tests.) 39# Found by: Phineas T. Phinder <phtph@ph1nderz.com> 40# Suggested by: Snosbig Q. Ptermione <sqptermione@example.com> 41# Review by: Justin Erenkrantz <justin@erenkrantz.com> 42# rooneg 43# (They caught an off-by-one error in the main loop.) 44# 45# This is a pathological example, but it shows all the things we might 46# need to parse. We need to: 47# 48# - Detect the officially-approved "WORD by: " fields. 49# - Grab every name (one per line) in each field. 50# - Handle names in various formats, unifying where possible. 51# - Expand "me" to the committer name for this revision. 52# - Associate a parenthetical aside following a field with that field. 53# 54# NOTES: You might be wondering, why not take 'svn log --xml' input? 55# Well, that would be the Right Thing to do, but in practice this was 56# a lot easier to whip up for straight 'svn log' output. I'd have no 57# objection to it being rewritten to take XML input. 58 59import functools 60import os 61import sys 62import re 63import getopt 64try: 65 my_getopt = getopt.gnu_getopt 66except AttributeError: 67 my_getopt = getopt.getopt 68try: 69 # Python >=3.0 70 from urllib.parse import quote as urllib_parse_quote 71except ImportError: 72 # Python <3.0 73 from urllib import quote as urllib_parse_quote 74 75 76# Warnings and errors start with these strings. They are typically 77# followed by a colon and a space, as in "%s: " ==> "WARNING: ". 78warning_prefix = 'WARNING' 79error_prefix = 'ERROR' 80 81def complain(msg, fatal=False): 82 """Print MSG as a warning, or if FATAL is true, print it as an error 83 and exit.""" 84 prefix = 'WARNING: ' 85 if fatal: 86 prefix = 'ERROR: ' 87 sys.stderr.write(prefix + msg + '\n') 88 if fatal: 89 sys.exit(1) 90 91 92def html_spam_guard(addr, entities_only=False): 93 """Return a spam-protected version of email ADDR that renders the 94 same in HTML as the original address. If ENTITIES_ONLY, use a less 95 thorough mangling scheme involving entities only, avoiding the use 96 of tags.""" 97 if entities_only: 98 def mangle(x): 99 return "&#%d;" % ord (x) 100 else: 101 def mangle(x): 102 return "<span>&#%d;</span>" % ord(x) 103 return "".join(map(mangle, addr)) 104 105 106def escape_html(str): 107 """Return an HTML-escaped version of STR.""" 108 return str.replace('&', '&').replace('<', '<').replace('>', '>') 109 110 111_spam_guard_in_html_block_re = re.compile(r'<([^&]*@[^&]*)>') 112def _spam_guard_in_html_block_func(m): 113 return "<%s>" % html_spam_guard(m.group(1)) 114def spam_guard_in_html_block(str): 115 """Take a block of HTML data, and run html_spam_guard() on parts of it.""" 116 return _spam_guard_in_html_block_re.subn(_spam_guard_in_html_block_func, 117 str)[0] 118 119def html_header(title, page_heading=None, highlight_targets=False): 120 """Write HTML file header. TITLE and PAGE_HEADING parameters are 121 expected to already by HTML-escaped if needed. If HIGHLIGHT_TARGETS 122is true, then write out a style header that causes anchor targets to be 123surrounded by a red border when they are jumped to.""" 124 if not page_heading: 125 page_heading = title 126 s = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n' 127 s += ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' 128 s += '<html><head>\n' 129 s += '<meta http-equiv="Content-Type"' 130 s += ' content="text/html; charset=UTF-8" />\n' 131 if highlight_targets: 132 s += '<style type="text/css">\n' 133 s += ':target { border: 2px solid red; }\n' 134 s += '</style>\n' 135 s += '<title>%s</title>\n' % title 136 s += '</head>\n\n' 137 s += '<body style="text-color: black; background-color: white">\n\n' 138 s += '<h1 style="text-align: center">%s</h1>\n\n' % page_heading 139 s += '<hr />\n\n' 140 return s 141 142 143def html_footer(): 144 return '\n</body>\n</html>\n' 145 146 147class Contributor(object): 148 # Map contributor names to contributor instances, so that there 149 # exists exactly one instance associated with a given name. 150 # Fold names with email addresses. That is, if we see someone 151 # listed first with just an email address, but later with a real 152 # name and that same email address together, we create only one 153 # instance, and store it under both the email and the real name. 154 all_contributors = { } 155 156 def __init__(self, username, real_name, email): 157 """Instantiate a contributor. Don't use this to generate a 158 Contributor for an external caller, though, use .get() instead.""" 159 self.real_name = real_name 160 self.username = username 161 self.email = email 162 self.is_committer = False # Assume not until hear otherwise. 163 self.is_full_committer = False # Assume not until hear otherwise. 164 # Map verbs (e.g., "Patch", "Suggested", "Review") to lists of 165 # LogMessage objects. For example, the log messages stored under 166 # "Patch" represent all the revisions for which this contributor 167 # contributed a patch. 168 self.activities = { } 169 170 def add_activity(self, field_name, log): 171 """Record that this contributor was active in FIELD_NAME in LOG.""" 172 logs = self.activities.get(field_name) 173 if not logs: 174 logs = [ ] 175 self.activities[field_name] = logs 176 if not log in logs: 177 logs.append(log) 178 179 @staticmethod 180 def get(username, real_name, email): 181 """If this contributor is already registered, just return it; 182 otherwise, register it then return it. Hint: use parse() to 183 generate the arguments.""" 184 c = None 185 for key in username, real_name, email: 186 if key and key in Contributor.all_contributors: 187 c = Contributor.all_contributors[key] 188 break 189 # If we didn't get a Contributor, create one now. 190 if not c: 191 c = Contributor(username, real_name, email) 192 # If we know identifying information that the Contributor lacks, 193 # then give it to the Contributor now. 194 if username: 195 if not c.username: 196 c.username = username 197 Contributor.all_contributors[username] = c 198 if real_name: 199 if not c.real_name: 200 c.real_name = real_name 201 Contributor.all_contributors[real_name] = c 202 if email: 203 if not c.email: 204 c.email = email 205 Contributor.all_contributors[email] = c 206 # This Contributor has never been in better shape; return it. 207 return c 208 209 def score(self): 210 """Return a contribution score for this contributor.""" 211 # Right now we count a patch as 2, anything else as 1. 212 score = 0 213 for activity in self.activities.keys(): 214 if activity == 'Patch': 215 score += len(self.activities[activity]) * 2 216 else: 217 score += len(self.activities[activity]) 218 return score 219 220 def score_str(self): 221 """Return a contribution score HTML string for this contributor.""" 222 patch_score = 0 223 other_score = 0 224 for activity in self.activities.keys(): 225 if activity == 'Patch': 226 patch_score += len(self.activities[activity]) 227 else: 228 other_score += len(self.activities[activity]) 229 if patch_score == 0: 230 patch_str = "" 231 elif patch_score == 1: 232 patch_str = "1 patch" 233 else: 234 patch_str = "%d patches" % patch_score 235 if other_score == 0: 236 other_str = "" 237 elif other_score == 1: 238 other_str = "1 non-patch" 239 else: 240 other_str = "%d non-patches" % other_score 241 if patch_str: 242 if other_str: 243 return ", ".join((patch_str, other_str)) 244 else: 245 return patch_str 246 else: 247 return other_str 248 249 def __cmp__(self, other): 250 if self.is_full_committer and not other.is_full_committer: 251 return 1 252 if other.is_full_committer and not self.is_full_committer: 253 return -1 254 result = cmp(self.score(), other.score()) 255 if result == 0: 256 return cmp(self.big_name(), other.big_name()) 257 else: 258 return 0 - result 259 260 def sort_key(self): 261 return (self.is_full_committer, self.score(), self.big_name()) 262 263 @staticmethod 264 def parse(name): 265 """Parse NAME, which can be 266 267 - A committer username, or 268 - A space-separated real name, or 269 - A space-separated real name followed by an email address in 270 angle brackets, or 271 - Just an email address in angle brackets. 272 273 (The email address may have '@' disguised as '{_AT_}'.) 274 275 Return a tuple of (committer_username, real_name, email_address) 276 any of which can be None if not available in NAME.""" 277 username = None 278 real_name = None 279 email = None 280 name_components = name.split() 281 if len(name_components) == 1: 282 name = name_components[0] # Effectively, name = name.strip() 283 if name[0] == '<' and name[-1] == '>': 284 email = name[1:-1] 285 elif name.find('@') != -1 or name.find('{_AT_}') != -1: 286 email = name 287 else: 288 username = name 289 elif name_components[-1][0] == '<' and name_components[-1][-1] == '>': 290 real_name = ' '.join(name_components[0:-1]) 291 email = name_components[-1][1:-1] 292 else: 293 real_name = ' '.join(name_components) 294 295 if email is not None: 296 # We unobfuscate here and work with the '@' internally, since 297 # we'll obfuscate it again (differently) before writing it out. 298 email = email.replace('{_AT_}', '@') 299 300 return username, real_name, email 301 302 def canonical_name(self): 303 """Return a canonical name for this contributor. The canonical 304 name may or may not be based on the contributor's actual email 305 address. 306 307 The canonical name will not contain filename-unsafe characters. 308 309 This method is guaranteed to return the same canonical name every 310 time only if no further contributions are recorded from this 311 contributor after the first call. This is because a contribution 312 may bring a new form of the contributor's name, one which affects 313 the algorithm used to construct canonical names.""" 314 retval = None 315 if self.username: 316 retval = self.username 317 elif self.email: 318 # Take some rudimentary steps to shorten the email address, to 319 # make it more manageable. If this is ever discovered to result 320 # in collisions, we can always just use to the full address. 321 try: 322 at_posn = self.email.index('@') 323 first_dot_after_at = self.email.index('.', at_posn) 324 retval = self.email[0:first_dot_after_at] 325 except ValueError: 326 retval = self.email 327 elif self.real_name: 328 # Last resort: construct canonical name based on real name. 329 retval = ''.join(self.real_name.lower().split(' ')) 330 if retval is None: 331 complain('Unable to construct a canonical name for Contributor.', True) 332 return urllib_parse_quote(retval, safe="!#$&'()+,;<=>@[]^`{}~") 333 334 def big_name(self, html=False, html_eo=False): 335 """Return as complete a name as possible for this contributor. 336 If HTML, then call html_spam_guard() on email addresses. 337 If HTML_EO, then do the same, but specifying entities_only mode.""" 338 html = html or html_eo 339 name_bits = [] 340 if self.real_name: 341 if html: 342 name_bits.append(escape_html(self.real_name)) 343 else: 344 name_bits.append(self.real_name) 345 if self.email: 346 if not self.real_name and not self.username: 347 name_bits.append(self.email) 348 elif html: 349 name_bits.append("<%s>" % html_spam_guard(self.email, html_eo)) 350 else: 351 name_bits.append("<%s>" % self.email) 352 if self.username: 353 if not self.real_name and not self.email: 354 name_bits.append(self.username) 355 else: 356 name_bits.append("(%s)" % self.username) 357 return " ".join(name_bits) 358 359 def __str__(self): 360 s = 'CONTRIBUTOR: ' 361 s += self.big_name() 362 s += "\ncanonical name: '%s'" % self.canonical_name() 363 if len(self.activities) > 0: 364 s += '\n ' 365 for activity in self.activities.keys(): 366 val = self.activities[activity] 367 s += '[%s:' % activity 368 for log in val: 369 s += ' %s' % log.revision 370 s += ']' 371 return s 372 373 def html_out(self, revision_url_pattern, filename): 374 """Create an HTML file named FILENAME, showing all the revisions in which 375 this contributor was active.""" 376 out = open(filename, 'w') 377 out.write(html_header(self.big_name(html_eo=True), 378 self.big_name(html=True), True)) 379 unique_logs = { } 380 381 sorted_activities = sorted(self.activities.keys()) 382 383 out.write('<div class="h2" id="activities" title="activities">\n\n') 384 out.write('<table border="1">\n') 385 out.write('<tr>\n') 386 for activity in sorted_activities: 387 out.write('<td>%s</td>\n\n' % activity) 388 out.write('</tr>\n') 389 out.write('<tr>\n') 390 for activity in sorted_activities: 391 out.write('<td>\n') 392 first_activity = True 393 for log in self.activities[activity]: 394 s = ',\n' 395 if first_activity: 396 s = '' 397 first_activity = False 398 out.write('%s<a href="#%s">%s</a>' % (s, log.revision, log.revision)) 399 unique_logs[log] = True 400 out.write('</td>\n') 401 out.write('</tr>\n') 402 out.write('</table>\n\n') 403 out.write('</div>\n\n') 404 405 sorted_logs = sorted(unique_logs.keys(), key=LogMessage.sort_key) 406 for log in sorted_logs: 407 out.write('<hr />\n') 408 out.write('<div class="h3" id="%s" title="%s">\n' % (log.revision, 409 log.revision)) 410 out.write('<pre>\n') 411 if revision_url_pattern: 412 revision_url = revision_url_pattern % log.revision[1:] 413 revision = '<a href="%s">%s</a>' \ 414 % (escape_html(revision_url), log.revision) 415 else: 416 revision = log.revision 417 out.write('<b>%s | %s | %s</b>\n\n' % (revision, 418 escape_html(log.committer), 419 escape_html(log.date))) 420 out.write(spam_guard_in_html_block(escape_html(log.message))) 421 out.write('</pre>\n') 422 out.write('</div>\n\n') 423 out.write('<hr />\n') 424 425 out.write(html_footer()) 426 out.close() 427 428 429class Field: 430 """One field in one log message.""" 431 def __init__(self, name, alias = None): 432 # The name of this field (e.g., "Patch", "Review", etc). 433 self.name = name 434 # An alias for the name of this field (e.g., "Reviewed"). 435 self.alias = alias 436 # A list of contributor objects, in the order in which they were 437 # encountered in the field. 438 self.contributors = [ ] 439 # Any parenthesized asides immediately following the field. The 440 # parentheses and trailing newline are left on. In theory, this 441 # supports concatenation of consecutive asides. In practice, the 442 # parser only detects the first one anyway, because additional 443 # ones are very uncommon and furthermore by that point one should 444 # probably be looking at the full log message. 445 self.addendum = '' 446 def add_contributor(self, contributor): 447 self.contributors.append(contributor) 448 def add_endum(self, addendum): 449 self.addendum += addendum 450 def __str__(self): 451 s = 'FIELD: %s (%d contributors)\n' % (self.name, len(self.contributors)) 452 for contributor in self.contributors: 453 s += str(contributor) + '\n' 454 s += self.addendum 455 return s 456 457 458class LogMessage(object): 459 # Maps revision strings (e.g., "r12345") onto LogMessage instances, 460 # holding all the LogMessage instances ever created. 461 all_logs = { } 462 # Keep track of youngest rev. 463 max_revnum = 0 464 def __init__(self, revision, committer, date): 465 """Instantiate a log message. All arguments are strings, 466 including REVISION, which should retain its leading 'r'.""" 467 self.revision = revision 468 self.committer = committer 469 self.date = date 470 self.message = '' 471 # Map field names (e.g., "Patch", "Review", "Suggested") onto 472 # Field objects. 473 self.fields = { } 474 if revision in LogMessage.all_logs: 475 complain("Revision '%s' seen more than once" % revision, True) 476 LogMessage.all_logs[revision] = self 477 rev_as_number = int(revision[1:]) 478 if rev_as_number > LogMessage.max_revnum: 479 LogMessage.max_revnum = rev_as_number 480 def add_field(self, field): 481 self.fields[field.name] = field 482 def accum(self, line): 483 """Accumulate one more line of raw message.""" 484 self.message += line 485 486 def __cmp__(self, other): 487 """Compare two log messages by revision number, for sort(). 488 Return -1, 0 or 1 depending on whether a > b, a == b, or a < b. 489 Note that this is reversed from normal sorting behavior, but it's 490 what we want for reverse chronological ordering of revisions.""" 491 a = int(self.revision[1:]) 492 b = int(other.revision[1:]) 493 if a > b: return -1 494 if a < b: return 1 495 else: return 0 496 497 def sort_key(self): 498 return int(self.revision[1:]) 499 500 def __str__(self): 501 s = '=' * 15 502 header = ' LOG: %s | %s ' % (self.revision, self.committer) 503 s += header 504 s += '=' * 15 505 s += '\n' 506 for field_name in self.fields.keys(): 507 s += str(self.fields[field_name]) + '\n' 508 s += '-' * 15 509 s += '-' * len(header) 510 s += '-' * 15 511 s += '\n' 512 return s 513 514 515 516### Code to parse the logs. ## 517 518log_separator = '-' * 72 + '\n' 519log_header_re = re.compile\ 520 ('^(r[0-9]+) \| ([^|]+) \| ([^|]+) \| ([0-9]+)[^0-9]') 521field_re = re.compile( 522 '^(Patch|Review(ed)?|Suggested|Found|Inspired|Tested|Reported) by:' 523 '\s*\S.*$') 524field_aliases = { 525 'Reviewed' : 'Review', 526 'Reported' : 'Found', 527} 528parenthetical_aside_re = re.compile('^\s*\(.*\)\s*$') 529 530def graze(input): 531 just_saw_separator = False 532 533 while True: 534 line = input.readline() 535 if line == '': break 536 if line == log_separator: 537 if just_saw_separator: 538 sys.stderr.write('Two separators in a row.\n') 539 sys.exit(1) 540 else: 541 just_saw_separator = True 542 num_lines = None 543 continue 544 else: 545 if just_saw_separator: 546 m = log_header_re.match(line) 547 if not m: 548 sys.stderr.write('Could not match log message header.\n') 549 sys.stderr.write('Line was:\n') 550 sys.stderr.write("'%s'\n" % line) 551 sys.exit(1) 552 else: 553 log = LogMessage(m.group(1), m.group(2), m.group(3)) 554 num_lines = int(m.group(4)) 555 just_saw_separator = False 556 saw_patch = False 557 line = input.readline() 558 # Handle 'svn log -v' by waiting for the blank line. 559 while line != '\n': 560 line = input.readline() 561 # Parse the log message. 562 field = None 563 while num_lines > 0: 564 line = input.readline() 565 log.accum(line) 566 m = field_re.match(line) 567 if m: 568 # We're on the first line of a field. Parse the field. 569 while m: 570 if not field: 571 ident = m.group(1) 572 if ident in field_aliases: 573 field = Field(field_aliases[ident], ident) 574 else: 575 field = Field(ident) 576 # Each line begins either with "WORD by:", or with whitespace. 577 in_field_re = re.compile('^(' 578 + (field.alias or field.name) 579 + ' by:\s+|\s+)([^\s(].*)') 580 m = in_field_re.match(line) 581 if m is None: 582 sys.stderr.write("Error matching: %s\n" % (line)) 583 user, real, email = Contributor.parse(m.group(2)) 584 if user == 'me': 585 user = log.committer 586 c = Contributor.get(user, real, email) 587 c.add_activity(field.name, log) 588 if (field.name == 'Patch'): 589 saw_patch = True 590 field.add_contributor(c) 591 line = input.readline() 592 if line == log_separator: 593 # If the log message doesn't end with its own 594 # newline (that is, there's the newline added by the 595 # svn client, but no further newline), then just move 596 # on to the next log entry. 597 just_saw_separator = True 598 num_lines = 0 599 break 600 log.accum(line) 601 num_lines -= 1 602 m = in_field_re.match(line) 603 if not m: 604 m = field_re.match(line) 605 if not m: 606 aside_match = parenthetical_aside_re.match(line) 607 if aside_match: 608 field.add_endum(line) 609 log.add_field(field) 610 field = None 611 num_lines -= 1 612 if not saw_patch and log.committer != '(no author)': 613 c = Contributor.get(log.committer, None, None) 614 c.add_activity('Patch', log) 615 continue 616 617index_introduction = ''' 618<p>The following list of contributors and their contributions is meant 619to help us keep track of whom to consider for commit access. The list 620was generated from "svn log" output by <a 621href="http://svn.apache.org/repos/asf/subversion/trunk/tools/dev/contribulyze.py" 622>contribulyze.py</a>, which looks for log messages that use the <a 623href="http://subversion.apache.org/docs/community-guide/conventions.html#crediting" 624>special contribution format</a>.</p> 625 626<p><i>Please do not use this list as a generic guide to who has 627contributed what to Subversion!</i> It omits existing <a 628href="http://svn.apache.org/repos/asf/subversion/trunk/COMMITTERS" 629>full committers</a>, for example, because they are irrelevant to our 630search for new committers. Also, it merely counts changes, it does 631not evaluate them. To truly understand what someone has contributed, 632you have to read their changes in detail. This page can only assist 633human judgement, not substitute for it.</p> 634 635''' 636 637def drop(revision_url_pattern): 638 # Output the data. 639 # 640 # The data structures are all linked up nicely to one another. You 641 # can get all the LogMessages, and each LogMessage contains all the 642 # Contributors involved with that commit; likewise, each Contributor 643 # points back to all the LogMessages it contributed to. 644 # 645 # However, the HTML output is pretty simple right now. It's not take 646 # full advantage of all that cross-linking. For each contributor, we 647 # just create a file listing all the revisions contributed to; and we 648 # build a master index of all contributors, each name being a link to 649 # that contributor's individual file. Much more is possible... but 650 # let's just get this up and running first. 651 652 for key in LogMessage.all_logs.keys(): 653 # You could print out all log messages this way, if you wanted to. 654 pass 655 # print LogMessage.all_logs[key] 656 657 detail_subdir = "detail" 658 if not os.path.exists(detail_subdir): 659 os.mkdir(detail_subdir) 660 661 index = open('index.html', 'w') 662 index.write(html_header('Contributors as of r%d' % LogMessage.max_revnum)) 663 index.write(index_introduction) 664 index.write('<ol>\n') 665 # The same contributor appears under multiple keys, so uniquify. 666 seen_contributors = { } 667 # Sorting alphabetically is acceptable, but even better would be to 668 # sort by number of contributions, so the most active people appear at 669 # the top -- that way we know whom to look at first for commit access 670 # proposals. 671 sorted_contributors = sorted(Contributor.all_contributors.values(), 672 key = Contributor.sort_key) 673 for c in sorted_contributors: 674 if c not in seen_contributors: 675 if c.score() > 0: 676 if c.is_full_committer: 677 # Don't even bother to print out full committers. They are 678 # a distraction from the purposes for which we're here. 679 continue 680 else: 681 committerness = '' 682 if c.is_committer: 683 committerness = ' (partial committer)' 684 urlpath = "%s/%s.html" % (detail_subdir, c.canonical_name()) 685 fname = os.path.join(detail_subdir, "%s.html" % c.canonical_name()) 686 index.write('<li><p><a href="%s">%s</a> [%s]%s</p></li>\n' 687 % (urllib_parse_quote(urlpath), 688 c.big_name(html=True), 689 c.score_str(), committerness)) 690 c.html_out(revision_url_pattern, fname) 691 seen_contributors[c] = True 692 index.write('</ol>\n') 693 index.write(html_footer()) 694 index.close() 695 696 697def process_committers(committers): 698 """Read from open file handle COMMITTERS, which should be in 699 the same format as the Subversion 'COMMITTERS' file. Create 700 Contributor objects based on the contents.""" 701 line = committers.readline() 702 while line != 'Blanket commit access:\n': 703 line = committers.readline() 704 in_full_committers = True 705 matcher = re.compile('(\S+)\s+([^\(\)]+)\s+(\([^()]+\)){0,1}') 706 line = committers.readline() 707 while line: 708 # Every @-sign we see after this point indicates a committer line... 709 if line == 'Commit access for specific areas:\n': 710 in_full_committers = False 711 # ...except in the "dormant committers" area, which comes last anyway. 712 if line == 'Committers who have asked to be listed as dormant:\n': 713 in_full_committers = True 714 elif line.find('@') >= 0: 715 line = line.lstrip() 716 m = matcher.match(line) 717 user = m.group(1) 718 real_and_email = m.group(2).strip() 719 ignored, real, email = Contributor.parse(real_and_email) 720 c = Contributor.get(user, real, email) 721 c.is_committer = True 722 c.is_full_committer = in_full_committers 723 line = committers.readline() 724 725 726def usage(): 727 print('USAGE: %s [-C COMMITTERS_FILE] < SVN_LOG_OR_LOG-V_OUTPUT' \ 728 % os.path.basename(sys.argv[0])) 729 print('') 730 print('Create HTML files in the current directory, rooted at index.html,') 731 print('in which you can browse to see who contributed what.') 732 print('') 733 print('The log input should use the contribution-tracking format defined') 734 print('in http://subversion.apache.org/docs/community-guide/conventions.html#crediting.') 735 print('') 736 print('Options:') 737 print('') 738 print(' -h, -H, -?, --help Print this usage message and exit') 739 print(' -C FILE Use FILE as the COMMITTERS file') 740 print(' -U URL Use URL as a Python interpolation pattern to') 741 print(' generate URLs to link revisions to some kind') 742 print(' of web-based viewer (e.g. ViewCVS). The') 743 print(' interpolation pattern should contain exactly') 744 print(' one format specifier, \'%s\', which will be') 745 print(' replaced with the revision number.') 746 print('') 747 748 749def main(): 750 try: 751 opts, args = my_getopt(sys.argv[1:], 'C:U:hH?', [ 'help' ]) 752 except getopt.GetoptError as e: 753 complain(str(e) + '\n\n') 754 usage() 755 sys.exit(1) 756 757 # Parse options. 758 revision_url_pattern = None 759 for opt, value in opts: 760 if opt in ('--help', '-h', '-H', '-?'): 761 usage() 762 sys.exit(0) 763 elif opt == '-C': 764 process_committers(open(value)) 765 elif opt == '-U': 766 revision_url_pattern = value 767 768 # Gather the data. 769 graze(sys.stdin) 770 771 # Output the data. 772 drop(revision_url_pattern) 773 774if __name__ == '__main__': 775 main() 776