1#!/usr/local/bin/python3.83 2# -*-python-*- 3# 4# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved. 5# Copyright (C) 2000 Curt Hagenlocher <curt@hagenlocher.org> 6# 7# By using this file, you agree to the terms and conditions set forth in 8# the LICENSE.html file which can be found at the top level of the ViewVC 9# distribution or at http://viewvc.org/license-1.html. 10# 11# For more information, visit http://viewvc.org/ 12# 13# ----------------------------------------------------------------------- 14# 15# blame.py: Annotate each line of a CVS file with its author, 16# revision #, date, etc. 17# 18# ----------------------------------------------------------------------- 19# 20# This file is based on the cvsblame.pl portion of the Bonsai CVS tool, 21# developed by Steve Lamm for Netscape Communications Corporation. More 22# information about Bonsai can be found at 23# http://www.mozilla.org/bonsai.html 24# 25# cvsblame.pl, in turn, was based on Scott Furman's cvsblame script 26# 27# ----------------------------------------------------------------------- 28 29import re 30import time 31import math 32from . import rcsparse 33import vclib 34 35class CVSParser(rcsparse.Sink): 36 # Precompiled regular expressions 37 trunk_rev = re.compile(b'^[0-9]+\\.[0-9]+$') 38 last_branch = re.compile(b'(.*)\\.[0-9]+') 39 is_branch = re.compile(b'^(.*)\\.0\\.([0-9]+)$') 40 d_command = re.compile(br'^d(\d+)\s(\d+)') 41 a_command = re.compile(br'^a(\d+)\s(\d+)') 42 43 SECONDS_PER_DAY = 86400 44 45 def __init__(self, encoding): 46 self.Reset() 47 self.encoding = encoding 48 49 def Reset(self): 50 self.last_revision = {} 51 self.prev_revision = {} 52 self.revision_author = {} 53 self.next_delta = {} 54 self.prev_delta = {} 55 self.tag_revision = {} 56 self.timestamp = {} 57 self.revision_log = {} 58 self.revision_deltatext = {} 59 self.revision_map = [] # map line numbers to revisions 60 self.lines_added = {} 61 self.lines_removed = {} 62 63 # Map a tag to a numerical revision number. The tag can be a symbolic 64 # branch tag, a symbolic revision tag, or an ordinary numerical 65 # revision number. 66 def map_tag_to_revision(self, tag_or_revision): 67 try: 68 revision = self.tag_revision[tag_or_revision] 69 match = self.is_branch.match(revision) 70 if match: 71 branch = match.group(1) + b'.' + match.group(2) 72 if self.last_revision.get(branch): 73 return self.last_revision[branch] 74 else: 75 return match.group(1) 76 else: 77 return revision 78 except: 79 return b'' 80 81 # Construct an ordered list of ancestor revisions to the given 82 # revision, starting with the immediate ancestor and going back 83 # to the primordial revision (1.1). 84 # 85 # Note: The generated path does not traverse the tree the same way 86 # that the individual revision deltas do. In particular, 87 # the path traverses the tree "backwards" on branches. 88 def ancestor_revisions(self, revision): 89 ancestors = [] 90 revision = self.prev_revision.get(revision) 91 while revision: 92 ancestors.append(revision) 93 revision = self.prev_revision.get(revision) 94 95 return ancestors 96 97 # Split deltatext specified by rev to each line. 98 def deltatext_split(self, rev): 99 lines = self.revision_deltatext[rev].split(b'\n') 100 if lines[-1] == b'': 101 del lines[-1] 102 return lines 103 104 # Extract the given revision from the digested RCS file. 105 # (Essentially the equivalent of cvs up -rXXX) 106 def extract_revision(self, revision): 107 path = [] 108 add_lines_remaining = 0 109 start_line = 0 110 count = 0 111 while revision: 112 path.append(revision) 113 revision = self.prev_delta.get(revision) 114 path.reverse() 115 path = path[1:] # Get rid of head revision 116 117 text = self.deltatext_split(self.head_revision) 118 119 # Iterate, applying deltas to previous revision 120 for revision in path: 121 adjust = 0 122 diffs = self.deltatext_split(revision) 123 self.lines_added[revision] = 0 124 self.lines_removed[revision] = 0 125 lines_added_now = 0 126 lines_removed_now = 0 127 128 for command in diffs: 129 dmatch = self.d_command.match(command) 130 amatch = self.a_command.match(command) 131 if add_lines_remaining > 0: 132 # Insertion lines from a prior "a" command 133 text.insert(start_line + adjust, command) 134 add_lines_remaining = add_lines_remaining - 1 135 adjust = adjust + 1 136 elif dmatch: 137 # "d" - Delete command 138 start_line = int(dmatch.group(1)) 139 count = int(dmatch.group(2)) 140 begin = start_line + adjust - 1 141 del text[begin:begin + count] 142 adjust = adjust - count 143 lines_removed_now = lines_removed_now + count 144 elif amatch: 145 # "a" - Add command 146 start_line = int(amatch.group(1)) 147 count = int(amatch.group(2)) 148 add_lines_remaining = count 149 lines_added_now = lines_added_now + count 150 else: 151 raise RuntimeError('Error parsing diff commands') 152 153 self.lines_added[revision] = self.lines_added[revision] + lines_added_now 154 self.lines_removed[revision] = self.lines_removed[revision] + lines_removed_now 155 return text 156 157 def set_head_revision(self, revision): 158 self.head_revision = revision 159 160 def set_principal_branch(self, branch_name): 161 self.principal_branch = branch_name 162 163 def define_tag(self, name, revision): 164 # Create an associate array that maps from tag name to 165 # revision number and vice-versa. 166 self.tag_revision[name] = revision 167 168 def set_comment(self, comment): 169 self.file_description = comment 170 171 def set_description(self, description): 172 self.rcs_file_description = description 173 174 # Construct dicts that represent the topology of the RCS tree 175 # and other arrays that contain info about individual revisions. 176 # 177 # The following dicts are created, keyed by revision number: 178 # self.timestamp -- seconds since 12:00 AM, Jan 1, 1970 GMT 179 # self.revision_author -- e.g. b"tom" 180 # self.prev_revision -- revision number of previous *ancestor* in RCS tree. 181 # Traversal of this array occurs in the direction 182 # of the primordial (1.1) revision. 183 # self.prev_delta -- revision number of previous revision which forms 184 # the basis for the edit commands in this revision. 185 # This causes the tree to be traversed towards the 186 # trunk when on a branch, and towards the latest trunk 187 # revision when on the trunk. 188 # self.next_delta -- revision number of next "delta". Inverts prev_delta. 189 # 190 # Also creates self.last_revision, keyed by a branch revision number, which 191 # indicates the latest revision on a given branch, 192 # e.g. self.last_revision{b"1.2.8"} == 1.2.8.5 193 def define_revision(self, revision, timestamp, author, state, 194 branches, next): 195 self.tag_revision[revision] = revision 196 branch = self.last_branch.match(revision).group(1) 197 self.last_revision[branch] = revision 198 199 self.timestamp[revision] = timestamp 200 201 # save author 202 self.revision_author[revision] = author 203 204 # ignore the state 205 206 # process the branch information 207 for branch in branches: 208 self.prev_revision[branch] = revision 209 self.next_delta[revision] = branch 210 self.prev_delta[branch] = revision 211 212 # process the "next revision" information 213 if next: 214 self.next_delta[revision] = next 215 self.prev_delta[next] = revision 216 is_trunk_revision = self.trunk_rev.match(revision) is not None 217 if is_trunk_revision: 218 self.prev_revision[revision] = next 219 else: 220 self.prev_revision[next] = revision 221 222 # Construct associative arrays containing info about individual revisions. 223 # 224 # The following associative arrays are created, keyed by revision number: 225 # revision_log -- log message 226 # revision_deltatext -- Either the complete text of the revision, 227 # in the case of the head revision, or the 228 # encoded delta between this revision and another. 229 # The delta is either with respect to the successor 230 # revision if this revision is on the trunk or 231 # relative to its immediate predecessor if this 232 # revision is on a branch. 233 def set_revision_info(self, revision, log, text): 234 self.revision_log[revision] = log 235 self.revision_deltatext[revision] = text 236 237 def parse_cvs_file(self, rcs_pathname, opt_rev = None, 238 opt_m_timestamp = None): 239 # Args in: opt_rev - requested revision in bytes type 240 # opt_m - time since modified 241 # Args out: revision_map 242 # timestamp 243 # revision_deltatext 244 245 # CheckHidden(rcs_pathname) 246 try: 247 rcsfile = open(rcs_pathname, 'rb') 248 except: 249 raise RuntimeError(('error: %s appeared to be under CVS control, ' + 250 'but the RCS file is inaccessible.') % rcs_pathname) 251 252 rcsparse.parse(rcsfile, self) 253 rcsfile.close() 254 255 if opt_rev in [None, b'', b'HEAD']: 256 # Explicitly specified topmost revision in tree 257 revision = self.head_revision 258 else: 259 # Symbolic tag or specific revision number specified. 260 revision = self.map_tag_to_revision(opt_rev) 261 if revision == b'': 262 raise RuntimeError('error: -r: No such revision: ' 263 + opt_rev.decode(self.encoding)) 264 265 # The primordial revision is not always 1.1! Go find it. 266 primordial = revision 267 while self.prev_revision.get(primordial): 268 primordial = self.prev_revision[primordial] 269 270 # Don't display file at all, if -m option is specified and no 271 # changes have been made in the specified file. 272 if opt_m_timestamp and self.timestamp[revision] < opt_m_timestamp: 273 return b'' 274 275 # Figure out how many lines were in the primordial, i.e. version 1.1, 276 # check-in by moving backward in time from the head revision to the 277 # first revision. 278 line_count = 0 279 if self.revision_deltatext.get(self.head_revision): 280 tmp_array = self.deltatext_split(self.head_revision) 281 line_count = len(tmp_array) 282 283 skip = 0 284 285 rev = self.prev_revision.get(self.head_revision) 286 while rev: 287 diffs = self.deltatext_split(rev) 288 for command in diffs: 289 dmatch = self.d_command.match(command) 290 amatch = self.a_command.match(command) 291 if skip > 0: 292 # Skip insertion lines from a prior "a" command 293 skip = skip - 1 294 elif dmatch: 295 # "d" - Delete command 296 start_line = int(dmatch.group(1)) 297 count = int(dmatch.group(2)) 298 line_count = line_count - count 299 elif amatch: 300 # "a" - Add command 301 start_line = int(amatch.group(1)) 302 count = int(amatch.group(2)) 303 skip = count 304 line_count = line_count + count 305 else: 306 raise RuntimeError('error: illegal RCS file') 307 308 rev = self.prev_revision.get(rev) 309 310 # Now, play the delta edit commands *backwards* from the primordial 311 # revision forward, but rather than applying the deltas to the text of 312 # each revision, apply the changes to an array of revision numbers. 313 # This creates a "revision map" -- an array where each element 314 # represents a line of text in the given revision but contains only 315 # the revision number in which the line was introduced rather than 316 # the line text itself. 317 # 318 # Note: These are backward deltas for revisions on the trunk and 319 # forward deltas for branch revisions. 320 321 # Create initial revision map for primordial version. 322 self.revision_map = [primordial] * line_count 323 324 ancestors = [revision, ] + self.ancestor_revisions(revision) 325 ancestors = ancestors[:-1] # Remove b"1.1" 326 last_revision = primordial 327 ancestors.reverse() 328 for revision in ancestors: 329 is_trunk_revision = self.trunk_rev.match(revision) is not None 330 331 if is_trunk_revision: 332 diffs = self.deltatext_split(last_revision) 333 334 # Revisions on the trunk specify deltas that transform a 335 # revision into an earlier revision, so invert the translation 336 # of the 'diff' commands. 337 for command in diffs: 338 if skip > 0: 339 skip = skip - 1 340 else: 341 dmatch = self.d_command.match(command) 342 amatch = self.a_command.match(command) 343 if dmatch: 344 start_line = int(dmatch.group(1)) 345 count = int(dmatch.group(2)) 346 temp = [] 347 while count > 0: 348 temp.append(revision) 349 count = count - 1 350 self.revision_map = (self.revision_map[:start_line - 1] + 351 temp + self.revision_map[start_line - 1:]) 352 elif amatch: 353 start_line = int(amatch.group(1)) 354 count = int(amatch.group(2)) 355 del self.revision_map[start_line:start_line + count] 356 skip = count 357 else: 358 raise RuntimeError('Error parsing diff commands') 359 360 else: 361 # Revisions on a branch are arranged backwards from those on 362 # the trunk. They specify deltas that transform a revision 363 # into a later revision. 364 adjust = 0 365 diffs = self.deltatext_split(revision) 366 for command in diffs: 367 if skip > 0: 368 skip = skip - 1 369 else: 370 dmatch = self.d_command.match(command) 371 amatch = self.a_command.match(command) 372 if dmatch: 373 start_line = int(dmatch.group(1)) 374 count = int(dmatch.group(2)) 375 adj_begin = start_line + adjust - 1 376 adj_end = start_line + adjust - 1 + count 377 del self.revision_map[adj_begin:adj_end] 378 adjust = adjust - count 379 elif amatch: 380 start_line = int(amatch.group(1)) 381 count = int(amatch.group(2)) 382 skip = count 383 temp = [] 384 while count > 0: 385 temp.append(revision) 386 count = count - 1 387 self.revision_map = (self.revision_map[:start_line + adjust] + 388 temp + self.revision_map[start_line + adjust:]) 389 adjust = adjust + skip 390 else: 391 raise RuntimeError('Error parsing diff commands') 392 393 last_revision = revision 394 395 return revision 396 397 398class BlameSource: 399 def __init__(self, rcs_file, opt_rev=None, include_text=False, 400 encoding='utf-8'): 401 # Parse the CVS file 402 parser = CVSParser(encoding) 403 revision = parser.parse_cvs_file(rcs_file, opt_rev.encode(encoding)) 404 count = len(parser.revision_map) 405 lines = parser.extract_revision(revision) 406 if len(lines) != count: 407 raise RuntimeError('Internal consistency error') 408 409 # set up some state variables 410 self.revision = revision 411 self.lines = lines 412 self.num_lines = count 413 self.parser = parser 414 self.include_text = include_text 415 self.encoding = encoding 416 417 # keep track of where we are during an iteration 418 self.idx = -1 419 self.last = None 420 421 def __getitem__(self, idx): 422 if idx == self.idx: 423 return self.last 424 if idx >= self.num_lines: 425 raise IndexError("No more annotations") 426 if idx != self.idx + 1: 427 raise BlameSequencingError() 428 429 # Get the line and metadata for it. 430 rev = self.parser.revision_map[idx] 431 prev_rev = self.parser.prev_revision.get(rev) 432 line_number = idx + 1 433 author = self.parser.revision_author[rev] 434 thisline = self.lines[idx] 435 if not self.include_text: 436 thisline = None 437 ### TODO: Put a real date in here. 438 item = vclib.Annotation(thisline, line_number, self._to_str(rev), 439 self._to_str(prev_rev), self._to_str(author), None) 440 self.last = item 441 self.idx = idx 442 return item 443 444 def _to_str(self, b): 445 if isinstance(b, bytes): 446 return b.decode(self.encoding, 'backslashreplace') 447 return b 448 449class BlameSequencingError(Exception): 450 pass 451