1#!/usr/bin/env python
2#
3#
4# Licensed to the Apache Software Foundation (ASF) under one
5# or more contributor license agreements.  See the NOTICE file
6# distributed with this work for additional information
7# regarding copyright ownership.  The ASF licenses this file
8# to you under the Apache License, Version 2.0 (the
9# "License"); you may not use this file except in compliance
10# with the License.  You may obtain a copy of the License at
11#
12#   http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing,
15# software distributed under the License is distributed on an
16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17# KIND, either express or implied.  See the License for the
18# specific language governing permissions and limitations
19# under the License.
20#
21#
22
23# See usage() for details, or run with --help option.
24#
25#          .-------------------------------------------------.
26#          |  "An ad hoc format deserves an ad hoc parser."  |
27#          `-------------------------------------------------'
28#
29# Some Subversion project log messages include parseable data to help
30# track who's contributing what.  The exact syntax is described in
31# http://subversion.apache.org/docs/community-guide/conventions.html#crediting,
32# but here's an example, indented by three spaces, i.e., the "Patch by:"
33# starts at the beginning of a line:
34#
35#    Patch by: David Anderson <david.anderson@calixo.net>
36#              <justin@erenkrantz.com>
37#              me
38#    (I wrote the regression tests.)
39#    Found by: Phineas T. Phinder <phtph@ph1nderz.com>
40#    Suggested by: Snosbig Q. Ptermione <sqptermione@example.com>
41#    Review by: Justin Erenkrantz <justin@erenkrantz.com>
42#               rooneg
43#    (They caught an off-by-one error in the main loop.)
44#
45# This is a pathological example, but it shows all the things we might
46# need to parse.  We need to:
47#
48#   - Detect the officially-approved "WORD by: " fields.
49#   - Grab every name (one per line) in each field.
50#   - Handle names in various formats, unifying where possible.
51#   - Expand "me" to the committer name for this revision.
52#   - Associate a parenthetical aside following a field with that field.
53#
54# NOTES: You might be wondering, why not take 'svn log --xml' input?
55# Well, that would be the Right Thing to do, but in practice this was
56# a lot easier to whip up for straight 'svn log' output.  I'd have no
57# objection to it being rewritten to take XML input.
58
59import functools
60import os
61import sys
62import re
63import getopt
64try:
65  my_getopt = getopt.gnu_getopt
66except AttributeError:
67  my_getopt = getopt.getopt
68try:
69  # Python >=3.0
70  from urllib.parse import quote as urllib_parse_quote
71except ImportError:
72  # Python <3.0
73  from urllib import quote as urllib_parse_quote
74
75
76# Warnings and errors start with these strings.  They are typically
77# followed by a colon and a space, as in "%s: " ==> "WARNING: ".
78warning_prefix = 'WARNING'
79error_prefix = 'ERROR'
80
81def complain(msg, fatal=False):
82  """Print MSG as a warning, or if FATAL is true, print it as an error
83  and exit."""
84  prefix = 'WARNING: '
85  if fatal:
86    prefix = 'ERROR: '
87  sys.stderr.write(prefix + msg + '\n')
88  if fatal:
89    sys.exit(1)
90
91
92def html_spam_guard(addr, entities_only=False):
93  """Return a spam-protected version of email ADDR that renders the
94  same in HTML as the original address.  If ENTITIES_ONLY, use a less
95  thorough mangling scheme involving entities only, avoiding the use
96  of tags."""
97  if entities_only:
98    def mangle(x):
99      return "&#%d;" % ord (x)
100  else:
101    def mangle(x):
102      return "<span>&#%d;</span>" % ord(x)
103  return "".join(map(mangle, addr))
104
105
106def escape_html(str):
107  """Return an HTML-escaped version of STR."""
108  return str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
109
110
111_spam_guard_in_html_block_re = re.compile(r'&lt;([^&]*@[^&]*)&gt;')
112def _spam_guard_in_html_block_func(m):
113  return "&lt;%s&gt;" % html_spam_guard(m.group(1))
114def spam_guard_in_html_block(str):
115  """Take a block of HTML data, and run html_spam_guard() on parts of it."""
116  return _spam_guard_in_html_block_re.subn(_spam_guard_in_html_block_func,
117                                           str)[0]
118
119def html_header(title, page_heading=None, highlight_targets=False):
120  """Write HTML file header.  TITLE and PAGE_HEADING parameters are
121  expected to already by HTML-escaped if needed.  If HIGHLIGHT_TARGETS
122is true, then write out a style header that causes anchor targets to be
123surrounded by a red border when they are jumped to."""
124  if not page_heading:
125    page_heading = title
126  s  = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n'
127  s += ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
128  s += '<html><head>\n'
129  s += '<meta http-equiv="Content-Type"'
130  s += ' content="text/html; charset=UTF-8" />\n'
131  if highlight_targets:
132    s += '<style type="text/css">\n'
133    s += ':target { border: 2px solid red; }\n'
134    s += '</style>\n'
135  s += '<title>%s</title>\n' % title
136  s += '</head>\n\n'
137  s += '<body style="text-color: black; background-color: white">\n\n'
138  s += '<h1 style="text-align: center">%s</h1>\n\n' % page_heading
139  s += '<hr />\n\n'
140  return s
141
142
143def html_footer():
144  return '\n</body>\n</html>\n'
145
146
147class Contributor(object):
148  # Map contributor names to contributor instances, so that there
149  # exists exactly one instance associated with a given name.
150  # Fold names with email addresses.  That is, if we see someone
151  # listed first with just an email address, but later with a real
152  # name and that same email address together, we create only one
153  # instance, and store it under both the email and the real name.
154  all_contributors = { }
155
156  def __init__(self, username, real_name, email):
157    """Instantiate a contributor.  Don't use this to generate a
158    Contributor for an external caller, though, use .get() instead."""
159    self.real_name = real_name
160    self.username  = username
161    self.email     = email
162    self.is_committer = False       # Assume not until hear otherwise.
163    self.is_full_committer = False  # Assume not until hear otherwise.
164    # Map verbs (e.g., "Patch", "Suggested", "Review") to lists of
165    # LogMessage objects.  For example, the log messages stored under
166    # "Patch" represent all the revisions for which this contributor
167    # contributed a patch.
168    self.activities = { }
169
170  def add_activity(self, field_name, log):
171    """Record that this contributor was active in FIELD_NAME in LOG."""
172    logs = self.activities.get(field_name)
173    if not logs:
174      logs = [ ]
175      self.activities[field_name] = logs
176    if not log in logs:
177      logs.append(log)
178
179  @staticmethod
180  def get(username, real_name, email):
181    """If this contributor is already registered, just return it;
182    otherwise, register it then return it.  Hint: use parse() to
183    generate the arguments."""
184    c = None
185    for key in username, real_name, email:
186      if key and key in Contributor.all_contributors:
187        c = Contributor.all_contributors[key]
188        break
189    # If we didn't get a Contributor, create one now.
190    if not c:
191      c = Contributor(username, real_name, email)
192    # If we know identifying information that the Contributor lacks,
193    # then give it to the Contributor now.
194    if username:
195      if not c.username:
196        c.username = username
197      Contributor.all_contributors[username]  = c
198    if real_name:
199      if not c.real_name:
200        c.real_name = real_name
201      Contributor.all_contributors[real_name] = c
202    if email:
203      if not c.email:
204        c.email = email
205      Contributor.all_contributors[email]     = c
206    # This Contributor has never been in better shape; return it.
207    return c
208
209  def score(self):
210    """Return a contribution score for this contributor."""
211    # Right now we count a patch as 2, anything else as 1.
212    score = 0
213    for activity in self.activities.keys():
214      if activity == 'Patch':
215        score += len(self.activities[activity]) * 2
216      else:
217        score += len(self.activities[activity])
218    return score
219
220  def score_str(self):
221    """Return a contribution score HTML string for this contributor."""
222    patch_score = 0
223    other_score = 0
224    for activity in self.activities.keys():
225      if activity == 'Patch':
226        patch_score += len(self.activities[activity])
227      else:
228        other_score += len(self.activities[activity])
229    if patch_score == 0:
230      patch_str = ""
231    elif patch_score == 1:
232      patch_str = "1&nbsp;patch"
233    else:
234      patch_str = "%d&nbsp;patches" % patch_score
235    if other_score == 0:
236      other_str = ""
237    elif other_score == 1:
238      other_str = "1&nbsp;non-patch"
239    else:
240      other_str = "%d&nbsp;non-patches" % other_score
241    if patch_str:
242      if other_str:
243        return ",&nbsp;".join((patch_str, other_str))
244      else:
245        return patch_str
246    else:
247      return other_str
248
249  def __cmp__(self, other):
250    if self.is_full_committer and not other.is_full_committer:
251      return 1
252    if other.is_full_committer and not self.is_full_committer:
253      return -1
254    result = cmp(self.score(), other.score())
255    if result == 0:
256      return cmp(self.big_name(), other.big_name())
257    else:
258      return 0 - result
259
260  def sort_key(self):
261      return (self.is_full_committer, self.score(), self.big_name())
262
263  @staticmethod
264  def parse(name):
265    """Parse NAME, which can be
266
267       - A committer username, or
268       - A space-separated real name, or
269       - A space-separated real name followed by an email address in
270           angle brackets, or
271       - Just an email address in angle brackets.
272
273     (The email address may have '@' disguised as '{_AT_}'.)
274
275     Return a tuple of (committer_username, real_name, email_address)
276     any of which can be None if not available in NAME."""
277    username  = None
278    real_name = None
279    email     = None
280    name_components = name.split()
281    if len(name_components) == 1:
282      name = name_components[0] # Effectively, name = name.strip()
283      if name[0] == '<' and name[-1] == '>':
284        email = name[1:-1]
285      elif name.find('@') != -1 or name.find('{_AT_}') != -1:
286        email = name
287      else:
288        username = name
289    elif name_components[-1][0] == '<' and name_components[-1][-1] == '>':
290      real_name = ' '.join(name_components[0:-1])
291      email = name_components[-1][1:-1]
292    else:
293      real_name = ' '.join(name_components)
294
295    if email is not None:
296      # We unobfuscate here and work with the '@' internally, since
297      # we'll obfuscate it again (differently) before writing it out.
298      email = email.replace('{_AT_}', '@')
299
300    return username, real_name, email
301
302  def canonical_name(self):
303    """Return a canonical name for this contributor.  The canonical
304    name may or may not be based on the contributor's actual email
305    address.
306
307    The canonical name will not contain filename-unsafe characters.
308
309    This method is guaranteed to return the same canonical name every
310    time only if no further contributions are recorded from this
311    contributor after the first call.  This is because a contribution
312    may bring a new form of the contributor's name, one which affects
313    the algorithm used to construct canonical names."""
314    retval = None
315    if self.username:
316      retval = self.username
317    elif self.email:
318      # Take some rudimentary steps to shorten the email address, to
319      # make it more manageable.  If this is ever discovered to result
320      # in collisions, we can always just use to the full address.
321      try:
322        at_posn = self.email.index('@')
323        first_dot_after_at = self.email.index('.', at_posn)
324        retval = self.email[0:first_dot_after_at]
325      except ValueError:
326        retval = self.email
327    elif self.real_name:
328      # Last resort: construct canonical name based on real name.
329      retval = ''.join(self.real_name.lower().split(' '))
330    if retval is None:
331      complain('Unable to construct a canonical name for Contributor.', True)
332    return urllib_parse_quote(retval, safe="!#$&'()+,;<=>@[]^`{}~")
333
334  def big_name(self, html=False, html_eo=False):
335    """Return as complete a name as possible for this contributor.
336    If HTML, then call html_spam_guard() on email addresses.
337    If HTML_EO, then do the same, but specifying entities_only mode."""
338    html = html or html_eo
339    name_bits = []
340    if self.real_name:
341      if html:
342        name_bits.append(escape_html(self.real_name))
343      else:
344        name_bits.append(self.real_name)
345    if self.email:
346      if not self.real_name and not self.username:
347        name_bits.append(self.email)
348      elif html:
349        name_bits.append("&lt;%s&gt;" % html_spam_guard(self.email, html_eo))
350      else:
351        name_bits.append("<%s>" % self.email)
352    if self.username:
353      if not self.real_name and not self.email:
354        name_bits.append(self.username)
355      else:
356        name_bits.append("(%s)" % self.username)
357    return " ".join(name_bits)
358
359  def __str__(self):
360    s = 'CONTRIBUTOR: '
361    s += self.big_name()
362    s += "\ncanonical name: '%s'" % self.canonical_name()
363    if len(self.activities) > 0:
364      s += '\n   '
365    for activity in self.activities.keys():
366      val = self.activities[activity]
367      s += '[%s:' % activity
368      for log in val:
369        s += ' %s' % log.revision
370      s += ']'
371    return s
372
373  def html_out(self, revision_url_pattern, filename):
374    """Create an HTML file named FILENAME, showing all the revisions in which
375    this contributor was active."""
376    out = open(filename, 'w')
377    out.write(html_header(self.big_name(html_eo=True),
378                          self.big_name(html=True), True))
379    unique_logs = { }
380
381    sorted_activities = sorted(self.activities.keys())
382
383    out.write('<div class="h2" id="activities" title="activities">\n\n')
384    out.write('<table border="1">\n')
385    out.write('<tr>\n')
386    for activity in sorted_activities:
387      out.write('<td>%s</td>\n\n' % activity)
388    out.write('</tr>\n')
389    out.write('<tr>\n')
390    for activity in sorted_activities:
391      out.write('<td>\n')
392      first_activity = True
393      for log in self.activities[activity]:
394        s = ',\n'
395        if first_activity:
396          s = ''
397          first_activity = False
398        out.write('%s<a href="#%s">%s</a>' % (s, log.revision, log.revision))
399        unique_logs[log] = True
400      out.write('</td>\n')
401    out.write('</tr>\n')
402    out.write('</table>\n\n')
403    out.write('</div>\n\n')
404
405    sorted_logs = sorted(unique_logs.keys(), key=LogMessage.sort_key)
406    for log in sorted_logs:
407      out.write('<hr />\n')
408      out.write('<div class="h3" id="%s" title="%s">\n' % (log.revision,
409                                                           log.revision))
410      out.write('<pre>\n')
411      if revision_url_pattern:
412        revision_url = revision_url_pattern % log.revision[1:]
413        revision = '<a href="%s">%s</a>' \
414            % (escape_html(revision_url), log.revision)
415      else:
416        revision = log.revision
417      out.write('<b>%s | %s | %s</b>\n\n' % (revision,
418                                             escape_html(log.committer),
419                                             escape_html(log.date)))
420      out.write(spam_guard_in_html_block(escape_html(log.message)))
421      out.write('</pre>\n')
422      out.write('</div>\n\n')
423    out.write('<hr />\n')
424
425    out.write(html_footer())
426    out.close()
427
428
429class Field:
430  """One field in one log message."""
431  def __init__(self, name, alias = None):
432    # The name of this field (e.g., "Patch", "Review", etc).
433    self.name = name
434    # An alias for the name of this field (e.g., "Reviewed").
435    self.alias = alias
436    # A list of contributor objects, in the order in which they were
437    # encountered in the field.
438    self.contributors = [ ]
439    # Any parenthesized asides immediately following the field.  The
440    # parentheses and trailing newline are left on.  In theory, this
441    # supports concatenation of consecutive asides.  In practice, the
442    # parser only detects the first one anyway, because additional
443    # ones are very uncommon and furthermore by that point one should
444    # probably be looking at the full log message.
445    self.addendum = ''
446  def add_contributor(self, contributor):
447    self.contributors.append(contributor)
448  def add_endum(self, addendum):
449    self.addendum += addendum
450  def __str__(self):
451    s = 'FIELD: %s (%d contributors)\n' % (self.name, len(self.contributors))
452    for contributor in self.contributors:
453      s += str(contributor) + '\n'
454    s += self.addendum
455    return s
456
457
458class LogMessage(object):
459  # Maps revision strings (e.g., "r12345") onto LogMessage instances,
460  # holding all the LogMessage instances ever created.
461  all_logs = { }
462  # Keep track of youngest rev.
463  max_revnum = 0
464  def __init__(self, revision, committer, date):
465    """Instantiate a log message.  All arguments are strings,
466    including REVISION, which should retain its leading 'r'."""
467    self.revision = revision
468    self.committer = committer
469    self.date = date
470    self.message = ''
471    # Map field names (e.g., "Patch", "Review", "Suggested") onto
472    # Field objects.
473    self.fields = { }
474    if revision in LogMessage.all_logs:
475      complain("Revision '%s' seen more than once" % revision, True)
476    LogMessage.all_logs[revision] = self
477    rev_as_number = int(revision[1:])
478    if rev_as_number > LogMessage.max_revnum:
479       LogMessage.max_revnum = rev_as_number
480  def add_field(self, field):
481    self.fields[field.name] = field
482  def accum(self, line):
483    """Accumulate one more line of raw message."""
484    self.message += line
485
486  def __cmp__(self, other):
487    """Compare two log messages by revision number, for sort().
488    Return -1, 0 or 1 depending on whether a > b, a == b, or a < b.
489    Note that this is reversed from normal sorting behavior, but it's
490    what we want for reverse chronological ordering of revisions."""
491    a = int(self.revision[1:])
492    b = int(other.revision[1:])
493    if a > b: return -1
494    if a < b: return 1
495    else:     return 0
496
497  def sort_key(self):
498    return int(self.revision[1:])
499
500  def __str__(self):
501    s = '=' * 15
502    header = ' LOG: %s | %s ' % (self.revision, self.committer)
503    s += header
504    s += '=' * 15
505    s += '\n'
506    for field_name in self.fields.keys():
507      s += str(self.fields[field_name]) + '\n'
508    s += '-' * 15
509    s += '-' * len(header)
510    s += '-' * 15
511    s += '\n'
512    return s
513
514
515
516### Code to parse the logs. ##
517
518log_separator = '-' * 72 + '\n'
519log_header_re = re.compile\
520                ('^(r[0-9]+) \| ([^|]+) \| ([^|]+) \| ([0-9]+)[^0-9]')
521field_re = re.compile(
522           '^(Patch|Review(ed)?|Suggested|Found|Inspired|Tested|Reported) by:'
523           '\s*\S.*$')
524field_aliases = {
525  'Reviewed' : 'Review',
526  'Reported' : 'Found',
527}
528parenthetical_aside_re = re.compile('^\s*\(.*\)\s*$')
529
530def graze(input):
531  just_saw_separator = False
532
533  while True:
534    line = input.readline()
535    if line == '': break
536    if line == log_separator:
537      if just_saw_separator:
538        sys.stderr.write('Two separators in a row.\n')
539        sys.exit(1)
540      else:
541        just_saw_separator = True
542        num_lines = None
543        continue
544    else:
545      if just_saw_separator:
546        m = log_header_re.match(line)
547        if not m:
548          sys.stderr.write('Could not match log message header.\n')
549          sys.stderr.write('Line was:\n')
550          sys.stderr.write("'%s'\n" % line)
551          sys.exit(1)
552        else:
553          log = LogMessage(m.group(1), m.group(2), m.group(3))
554          num_lines = int(m.group(4))
555          just_saw_separator = False
556          saw_patch = False
557          line = input.readline()
558          # Handle 'svn log -v' by waiting for the blank line.
559          while line != '\n':
560            line = input.readline()
561          # Parse the log message.
562          field = None
563          while num_lines > 0:
564            line = input.readline()
565            log.accum(line)
566            m = field_re.match(line)
567            if m:
568              # We're on the first line of a field.  Parse the field.
569              while m:
570                if not field:
571                  ident = m.group(1)
572                  if ident in field_aliases:
573                    field = Field(field_aliases[ident], ident)
574                  else:
575                    field = Field(ident)
576                # Each line begins either with "WORD by:", or with whitespace.
577                in_field_re = re.compile('^('
578                                         + (field.alias or field.name)
579                                         + ' by:\s+|\s+)([^\s(].*)')
580                m = in_field_re.match(line)
581                if m is None:
582                  sys.stderr.write("Error matching: %s\n" % (line))
583                user, real, email = Contributor.parse(m.group(2))
584                if user == 'me':
585                  user = log.committer
586                c = Contributor.get(user, real, email)
587                c.add_activity(field.name, log)
588                if (field.name == 'Patch'):
589                  saw_patch = True
590                field.add_contributor(c)
591                line = input.readline()
592                if line == log_separator:
593                  # If the log message doesn't end with its own
594                  # newline (that is, there's the newline added by the
595                  # svn client, but no further newline), then just move
596                  # on to the next log entry.
597                  just_saw_separator = True
598                  num_lines = 0
599                  break
600                log.accum(line)
601                num_lines -= 1
602                m = in_field_re.match(line)
603                if not m:
604                  m = field_re.match(line)
605                  if not m:
606                    aside_match = parenthetical_aside_re.match(line)
607                    if aside_match:
608                      field.add_endum(line)
609                  log.add_field(field)
610                  field = None
611            num_lines -= 1
612          if not saw_patch and log.committer != '(no author)':
613            c = Contributor.get(log.committer, None, None)
614            c.add_activity('Patch', log)
615        continue
616
617index_introduction = '''
618<p>The following list of contributors and their contributions is meant
619to help us keep track of whom to consider for commit access.  The list
620was generated from "svn&nbsp;log" output by <a
621href="http://svn.apache.org/repos/asf/subversion/trunk/tools/dev/contribulyze.py"
622>contribulyze.py</a>, which looks for log messages that use the <a
623href="http://subversion.apache.org/docs/community-guide/conventions.html#crediting"
624>special contribution format</a>.</p>
625
626<p><i>Please do not use this list as a generic guide to who has
627contributed what to Subversion!</i> It omits existing <a
628href="http://svn.apache.org/repos/asf/subversion/trunk/COMMITTERS"
629>full committers</a>, for example, because they are irrelevant to our
630search for new committers.  Also, it merely counts changes, it does
631not evaluate them.  To truly understand what someone has contributed,
632you have to read their changes in detail.  This page can only assist
633human judgement, not substitute for it.</p>
634
635'''
636
637def drop(revision_url_pattern):
638  # Output the data.
639  #
640  # The data structures are all linked up nicely to one another.  You
641  # can get all the LogMessages, and each LogMessage contains all the
642  # Contributors involved with that commit; likewise, each Contributor
643  # points back to all the LogMessages it contributed to.
644  #
645  # However, the HTML output is pretty simple right now.  It's not take
646  # full advantage of all that cross-linking.  For each contributor, we
647  # just create a file listing all the revisions contributed to; and we
648  # build a master index of all contributors, each name being a link to
649  # that contributor's individual file.  Much more is possible... but
650  # let's just get this up and running first.
651
652  for key in LogMessage.all_logs.keys():
653    # You could print out all log messages this way, if you wanted to.
654    pass
655    # print LogMessage.all_logs[key]
656
657  detail_subdir = "detail"
658  if not os.path.exists(detail_subdir):
659    os.mkdir(detail_subdir)
660
661  index = open('index.html', 'w')
662  index.write(html_header('Contributors as of r%d' % LogMessage.max_revnum))
663  index.write(index_introduction)
664  index.write('<ol>\n')
665  # The same contributor appears under multiple keys, so uniquify.
666  seen_contributors = { }
667  # Sorting alphabetically is acceptable, but even better would be to
668  # sort by number of contributions, so the most active people appear at
669  # the top -- that way we know whom to look at first for commit access
670  # proposals.
671  sorted_contributors = sorted(Contributor.all_contributors.values(),
672                               key = Contributor.sort_key)
673  for c in sorted_contributors:
674    if c not in seen_contributors:
675      if c.score() > 0:
676        if c.is_full_committer:
677          # Don't even bother to print out full committers.  They are
678          # a distraction from the purposes for which we're here.
679          continue
680        else:
681          committerness = ''
682          if c.is_committer:
683            committerness = '&nbsp;(partial&nbsp;committer)'
684          urlpath = "%s/%s.html" % (detail_subdir, c.canonical_name())
685          fname = os.path.join(detail_subdir, "%s.html" % c.canonical_name())
686          index.write('<li><p><a href="%s">%s</a>&nbsp;[%s]%s</p></li>\n'
687                      % (urllib_parse_quote(urlpath),
688                         c.big_name(html=True),
689                         c.score_str(), committerness))
690          c.html_out(revision_url_pattern, fname)
691    seen_contributors[c] = True
692  index.write('</ol>\n')
693  index.write(html_footer())
694  index.close()
695
696
697def process_committers(committers):
698  """Read from open file handle COMMITTERS, which should be in
699  the same format as the Subversion 'COMMITTERS' file.  Create
700  Contributor objects based on the contents."""
701  line = committers.readline()
702  while line != 'Blanket commit access:\n':
703    line = committers.readline()
704  in_full_committers = True
705  matcher = re.compile('(\S+)\s+([^\(\)]+)\s+(\([^()]+\)){0,1}')
706  line = committers.readline()
707  while line:
708    # Every @-sign we see after this point indicates a committer line...
709    if line == 'Commit access for specific areas:\n':
710      in_full_committers = False
711    # ...except in the "dormant committers" area, which comes last anyway.
712    if line == 'Committers who have asked to be listed as dormant:\n':
713      in_full_committers = True
714    elif line.find('@') >= 0:
715      line = line.lstrip()
716      m = matcher.match(line)
717      user = m.group(1)
718      real_and_email = m.group(2).strip()
719      ignored, real, email = Contributor.parse(real_and_email)
720      c = Contributor.get(user, real, email)
721      c.is_committer = True
722      c.is_full_committer = in_full_committers
723    line = committers.readline()
724
725
726def usage():
727  print('USAGE: %s [-C COMMITTERS_FILE] < SVN_LOG_OR_LOG-V_OUTPUT' \
728        % os.path.basename(sys.argv[0]))
729  print('')
730  print('Create HTML files in the current directory, rooted at index.html,')
731  print('in which you can browse to see who contributed what.')
732  print('')
733  print('The log input should use the contribution-tracking format defined')
734  print('in http://subversion.apache.org/docs/community-guide/conventions.html#crediting.')
735  print('')
736  print('Options:')
737  print('')
738  print('  -h, -H, -?, --help   Print this usage message and exit')
739  print('  -C FILE              Use FILE as the COMMITTERS file')
740  print('  -U URL               Use URL as a Python interpolation pattern to')
741  print('                       generate URLs to link revisions to some kind')
742  print('                       of web-based viewer (e.g. ViewCVS).  The')
743  print('                       interpolation pattern should contain exactly')
744  print('                       one format specifier, \'%s\', which will be')
745  print('                       replaced with the revision number.')
746  print('')
747
748
749def main():
750  try:
751    opts, args = my_getopt(sys.argv[1:], 'C:U:hH?', [ 'help' ])
752  except getopt.GetoptError as e:
753    complain(str(e) + '\n\n')
754    usage()
755    sys.exit(1)
756
757  # Parse options.
758  revision_url_pattern = None
759  for opt, value in opts:
760    if opt in ('--help', '-h', '-H', '-?'):
761      usage()
762      sys.exit(0)
763    elif opt == '-C':
764      process_committers(open(value))
765    elif opt == '-U':
766      revision_url_pattern = value
767
768  # Gather the data.
769  graze(sys.stdin)
770
771  # Output the data.
772  drop(revision_url_pattern)
773
774if __name__ == '__main__':
775  main()
776