1#!/usr/local/bin/python3.83
2# -*-python-*-
3#
4# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved.
5# Copyright (C) 2000 Curt Hagenlocher <curt@hagenlocher.org>
6#
7# By using this file, you agree to the terms and conditions set forth in
8# the LICENSE.html file which can be found at the top level of the ViewVC
9# distribution or at http://viewvc.org/license-1.html.
10#
11# For more information, visit http://viewvc.org/
12#
13# -----------------------------------------------------------------------
14#
15# blame.py: Annotate each line of a CVS file with its author,
16#           revision #, date, etc.
17#
18# -----------------------------------------------------------------------
19#
20# This file is based on the cvsblame.pl portion of the Bonsai CVS tool,
21# developed by Steve Lamm for Netscape Communications Corporation.  More
22# information about Bonsai can be found at
23#    http://www.mozilla.org/bonsai.html
24#
25# cvsblame.pl, in turn, was based on Scott Furman's cvsblame script
26#
27# -----------------------------------------------------------------------
28
29import re
30import time
31import math
32from . import rcsparse
33import vclib
34
35class CVSParser(rcsparse.Sink):
36  # Precompiled regular expressions
37  trunk_rev   = re.compile(b'^[0-9]+\\.[0-9]+$')
38  last_branch = re.compile(b'(.*)\\.[0-9]+')
39  is_branch   = re.compile(b'^(.*)\\.0\\.([0-9]+)$')
40  d_command   = re.compile(br'^d(\d+)\s(\d+)')
41  a_command   = re.compile(br'^a(\d+)\s(\d+)')
42
43  SECONDS_PER_DAY = 86400
44
45  def __init__(self, encoding):
46    self.Reset()
47    self.encoding = encoding
48
49  def Reset(self):
50    self.last_revision = {}
51    self.prev_revision = {}
52    self.revision_author = {}
53    self.next_delta = {}
54    self.prev_delta = {}
55    self.tag_revision = {}
56    self.timestamp = {}
57    self.revision_log = {}
58    self.revision_deltatext = {}
59    self.revision_map = []    # map line numbers to revisions
60    self.lines_added  = {}
61    self.lines_removed = {}
62
63  # Map a tag to a numerical revision number.  The tag can be a symbolic
64  # branch tag, a symbolic revision tag, or an ordinary numerical
65  # revision number.
66  def map_tag_to_revision(self, tag_or_revision):
67    try:
68      revision = self.tag_revision[tag_or_revision]
69      match = self.is_branch.match(revision)
70      if match:
71        branch = match.group(1) + b'.' + match.group(2)
72        if self.last_revision.get(branch):
73          return self.last_revision[branch]
74        else:
75          return match.group(1)
76      else:
77        return revision
78    except:
79      return b''
80
81  # Construct an ordered list of ancestor revisions to the given
82  # revision, starting with the immediate ancestor and going back
83  # to the primordial revision (1.1).
84  #
85  # Note: The generated path does not traverse the tree the same way
86  #       that the individual revision deltas do.  In particular,
87  #       the path traverses the tree "backwards" on branches.
88  def ancestor_revisions(self, revision):
89    ancestors = []
90    revision = self.prev_revision.get(revision)
91    while revision:
92      ancestors.append(revision)
93      revision = self.prev_revision.get(revision)
94
95    return ancestors
96
97  # Split deltatext specified by rev to each line.
98  def deltatext_split(self, rev):
99    lines = self.revision_deltatext[rev].split(b'\n')
100    if lines[-1] == b'':
101      del lines[-1]
102    return lines
103
104  # Extract the given revision from the digested RCS file.
105  # (Essentially the equivalent of cvs up -rXXX)
106  def extract_revision(self, revision):
107    path = []
108    add_lines_remaining = 0
109    start_line = 0
110    count = 0
111    while revision:
112      path.append(revision)
113      revision = self.prev_delta.get(revision)
114    path.reverse()
115    path = path[1:]  # Get rid of head revision
116
117    text = self.deltatext_split(self.head_revision)
118
119    # Iterate, applying deltas to previous revision
120    for revision in path:
121      adjust = 0
122      diffs = self.deltatext_split(revision)
123      self.lines_added[revision]   = 0
124      self.lines_removed[revision] = 0
125      lines_added_now = 0
126      lines_removed_now = 0
127
128      for command in diffs:
129        dmatch = self.d_command.match(command)
130        amatch = self.a_command.match(command)
131        if add_lines_remaining > 0:
132          # Insertion lines from a prior "a" command
133          text.insert(start_line + adjust, command)
134          add_lines_remaining = add_lines_remaining - 1
135          adjust = adjust + 1
136        elif dmatch:
137          # "d" - Delete command
138          start_line = int(dmatch.group(1))
139          count      = int(dmatch.group(2))
140          begin = start_line + adjust - 1
141          del text[begin:begin + count]
142          adjust = adjust - count
143          lines_removed_now = lines_removed_now + count
144        elif amatch:
145          # "a" - Add command
146          start_line = int(amatch.group(1))
147          count      = int(amatch.group(2))
148          add_lines_remaining = count
149          lines_added_now = lines_added_now + count
150        else:
151          raise RuntimeError('Error parsing diff commands')
152
153      self.lines_added[revision]   = self.lines_added[revision]   + lines_added_now
154      self.lines_removed[revision] = self.lines_removed[revision] + lines_removed_now
155    return text
156
157  def set_head_revision(self, revision):
158    self.head_revision = revision
159
160  def set_principal_branch(self, branch_name):
161    self.principal_branch = branch_name
162
163  def define_tag(self, name, revision):
164    # Create an associate array that maps from tag name to
165    # revision number and vice-versa.
166    self.tag_revision[name] = revision
167
168  def set_comment(self, comment):
169    self.file_description = comment
170
171  def set_description(self, description):
172    self.rcs_file_description = description
173
174  # Construct dicts that represent the topology of the RCS tree
175  # and other arrays that contain info about individual revisions.
176  #
177  # The following dicts are created, keyed by revision number:
178  #   self.timestamp         -- seconds since 12:00 AM, Jan 1, 1970 GMT
179  #   self.revision_author   -- e.g. b"tom"
180  #   self.prev_revision     -- revision number of previous *ancestor* in RCS tree.
181  #                             Traversal of this array occurs in the direction
182  #                             of the primordial (1.1) revision.
183  #   self.prev_delta        -- revision number of previous revision which forms
184  #                             the basis for the edit commands in this revision.
185  #                             This causes the tree to be traversed towards the
186  #                             trunk when on a branch, and towards the latest trunk
187  #                             revision when on the trunk.
188  #   self.next_delta        -- revision number of next "delta".  Inverts prev_delta.
189  #
190  # Also creates self.last_revision, keyed by a branch revision number, which
191  # indicates the latest revision on a given branch,
192  #   e.g. self.last_revision{b"1.2.8"} == 1.2.8.5
193  def define_revision(self, revision, timestamp, author, state,
194                      branches, next):
195    self.tag_revision[revision] = revision
196    branch = self.last_branch.match(revision).group(1)
197    self.last_revision[branch] = revision
198
199    self.timestamp[revision] = timestamp
200
201    # save author
202    self.revision_author[revision] = author
203
204    # ignore the state
205
206    # process the branch information
207    for branch in branches:
208      self.prev_revision[branch] = revision
209      self.next_delta[revision] = branch
210      self.prev_delta[branch] = revision
211
212    # process the "next revision" information
213    if next:
214      self.next_delta[revision] = next
215      self.prev_delta[next] = revision
216      is_trunk_revision = self.trunk_rev.match(revision) is not None
217      if is_trunk_revision:
218        self.prev_revision[revision] = next
219      else:
220        self.prev_revision[next] = revision
221
222  # Construct associative arrays containing info about individual revisions.
223  #
224  # The following associative arrays are created, keyed by revision number:
225  #   revision_log        -- log message
226  #   revision_deltatext  -- Either the complete text of the revision,
227  #                          in the case of the head revision, or the
228  #                          encoded delta between this revision and another.
229  #                          The delta is either with respect to the successor
230  #                          revision if this revision is on the trunk or
231  #                          relative to its immediate predecessor if this
232  #                          revision is on a branch.
233  def set_revision_info(self, revision, log, text):
234    self.revision_log[revision] = log
235    self.revision_deltatext[revision] = text
236
237  def parse_cvs_file(self, rcs_pathname, opt_rev = None,
238                     opt_m_timestamp = None):
239    # Args in:  opt_rev - requested revision in bytes type
240    #           opt_m - time since modified
241    # Args out: revision_map
242    #           timestamp
243    #           revision_deltatext
244
245    # CheckHidden(rcs_pathname)
246    try:
247      rcsfile = open(rcs_pathname, 'rb')
248    except:
249      raise RuntimeError(('error: %s appeared to be under CVS control, ' +
250              'but the RCS file is inaccessible.') % rcs_pathname)
251
252    rcsparse.parse(rcsfile, self)
253    rcsfile.close()
254
255    if opt_rev in [None, b'', b'HEAD']:
256      # Explicitly specified topmost revision in tree
257      revision = self.head_revision
258    else:
259      # Symbolic tag or specific revision number specified.
260      revision = self.map_tag_to_revision(opt_rev)
261      if revision == b'':
262        raise RuntimeError('error: -r: No such revision: '
263                           + opt_rev.decode(self.encoding))
264
265    # The primordial revision is not always 1.1!  Go find it.
266    primordial = revision
267    while self.prev_revision.get(primordial):
268      primordial = self.prev_revision[primordial]
269
270    # Don't display file at all, if -m option is specified and no
271    # changes have been made in the specified file.
272    if opt_m_timestamp and self.timestamp[revision] < opt_m_timestamp:
273      return b''
274
275    # Figure out how many lines were in the primordial, i.e. version 1.1,
276    # check-in by moving backward in time from the head revision to the
277    # first revision.
278    line_count = 0
279    if self.revision_deltatext.get(self.head_revision):
280      tmp_array = self.deltatext_split(self.head_revision)
281      line_count = len(tmp_array)
282
283    skip = 0
284
285    rev = self.prev_revision.get(self.head_revision)
286    while rev:
287      diffs = self.deltatext_split(rev)
288      for command in diffs:
289        dmatch = self.d_command.match(command)
290        amatch = self.a_command.match(command)
291        if skip > 0:
292          # Skip insertion lines from a prior "a" command
293          skip = skip - 1
294        elif dmatch:
295          # "d" - Delete command
296          start_line = int(dmatch.group(1))
297          count      = int(dmatch.group(2))
298          line_count = line_count - count
299        elif amatch:
300          # "a" - Add command
301          start_line = int(amatch.group(1))
302          count      = int(amatch.group(2))
303          skip       = count
304          line_count = line_count + count
305        else:
306          raise RuntimeError('error: illegal RCS file')
307
308      rev = self.prev_revision.get(rev)
309
310    # Now, play the delta edit commands *backwards* from the primordial
311    # revision forward, but rather than applying the deltas to the text of
312    # each revision, apply the changes to an array of revision numbers.
313    # This creates a "revision map" -- an array where each element
314    # represents a line of text in the given revision but contains only
315    # the revision number in which the line was introduced rather than
316    # the line text itself.
317    #
318    # Note: These are backward deltas for revisions on the trunk and
319    # forward deltas for branch revisions.
320
321    # Create initial revision map for primordial version.
322    self.revision_map = [primordial] * line_count
323
324    ancestors = [revision, ] + self.ancestor_revisions(revision)
325    ancestors = ancestors[:-1]  # Remove b"1.1"
326    last_revision = primordial
327    ancestors.reverse()
328    for revision in ancestors:
329      is_trunk_revision = self.trunk_rev.match(revision) is not None
330
331      if is_trunk_revision:
332        diffs = self.deltatext_split(last_revision)
333
334        # Revisions on the trunk specify deltas that transform a
335        # revision into an earlier revision, so invert the translation
336        # of the 'diff' commands.
337        for command in diffs:
338          if skip > 0:
339            skip = skip - 1
340          else:
341            dmatch = self.d_command.match(command)
342            amatch = self.a_command.match(command)
343            if dmatch:
344              start_line = int(dmatch.group(1))
345              count      = int(dmatch.group(2))
346              temp = []
347              while count > 0:
348                temp.append(revision)
349                count = count - 1
350              self.revision_map = (self.revision_map[:start_line - 1] +
351                      temp + self.revision_map[start_line - 1:])
352            elif amatch:
353              start_line = int(amatch.group(1))
354              count      = int(amatch.group(2))
355              del self.revision_map[start_line:start_line + count]
356              skip = count
357            else:
358              raise RuntimeError('Error parsing diff commands')
359
360      else:
361        # Revisions on a branch are arranged backwards from those on
362        # the trunk.  They specify deltas that transform a revision
363        # into a later revision.
364        adjust = 0
365        diffs = self.deltatext_split(revision)
366        for command in diffs:
367          if skip > 0:
368            skip = skip - 1
369          else:
370            dmatch = self.d_command.match(command)
371            amatch = self.a_command.match(command)
372            if dmatch:
373              start_line = int(dmatch.group(1))
374              count      = int(dmatch.group(2))
375              adj_begin  = start_line + adjust - 1
376              adj_end    = start_line + adjust - 1 + count
377              del self.revision_map[adj_begin:adj_end]
378              adjust = adjust - count
379            elif amatch:
380              start_line = int(amatch.group(1))
381              count      = int(amatch.group(2))
382              skip = count
383              temp = []
384              while count > 0:
385                temp.append(revision)
386                count = count - 1
387              self.revision_map = (self.revision_map[:start_line + adjust] +
388                      temp + self.revision_map[start_line + adjust:])
389              adjust = adjust + skip
390            else:
391              raise RuntimeError('Error parsing diff commands')
392
393      last_revision = revision
394
395    return revision
396
397
398class BlameSource:
399  def __init__(self, rcs_file, opt_rev=None, include_text=False,
400               encoding='utf-8'):
401    # Parse the CVS file
402    parser = CVSParser(encoding)
403    revision = parser.parse_cvs_file(rcs_file, opt_rev.encode(encoding))
404    count = len(parser.revision_map)
405    lines = parser.extract_revision(revision)
406    if len(lines) != count:
407      raise RuntimeError('Internal consistency error')
408
409    # set up some state variables
410    self.revision = revision
411    self.lines = lines
412    self.num_lines = count
413    self.parser = parser
414    self.include_text = include_text
415    self.encoding = encoding
416
417    # keep track of where we are during an iteration
418    self.idx = -1
419    self.last = None
420
421  def __getitem__(self, idx):
422    if idx == self.idx:
423      return self.last
424    if idx >= self.num_lines:
425      raise IndexError("No more annotations")
426    if idx != self.idx + 1:
427      raise BlameSequencingError()
428
429    # Get the line and metadata for it.
430    rev = self.parser.revision_map[idx]
431    prev_rev = self.parser.prev_revision.get(rev)
432    line_number = idx + 1
433    author = self.parser.revision_author[rev]
434    thisline = self.lines[idx]
435    if not self.include_text:
436      thisline = None
437    ### TODO:  Put a real date in here.
438    item = vclib.Annotation(thisline, line_number, self._to_str(rev),
439                            self._to_str(prev_rev), self._to_str(author), None)
440    self.last = item
441    self.idx = idx
442    return item
443
444  def _to_str(self, b):
445    if isinstance(b, bytes):
446      return b.decode(self.encoding, 'backslashreplace')
447    return b
448
449class BlameSequencingError(Exception):
450  pass
451