1# -*-python-*- 2# 3# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved. 4# 5# By using this file, you agree to the terms and conditions set forth in 6# the LICENSE.html file which can be found at the top level of the ViewVC 7# distribution or at http://viewvc.org/license-1.html. 8# 9# For more information, visit http://viewvc.org/ 10# 11# ----------------------------------------------------------------------- 12 13"Version Control lib driver for locally accessible cvs-repositories." 14 15import vclib 16import vcauth 17import os 18import os.path 19import sys 20import stat 21import re 22import time 23import calendar 24import subprocess 25import vclib.ccvs 26import functools 27 28# Python 3: workaround for cmp() 29def cmp(a, b): 30 return (a > b) - (a < b) 31 32def enc_decode(s, encoding='utf-8'): 33 if s is None: 34 return None 35 return s.decode(encoding, 'surrogateescape') 36 37def _path_join(path_parts): 38 return '/'.join(path_parts) 39 40class BaseCVSRepository(vclib.Repository): 41 def __init__(self, name, rootpath, authorizer, utilities, encoding): 42 if not os.path.isdir(rootpath): 43 raise vclib.ReposNotFound(name) 44 45 self.name = name 46 self.rootpath = rootpath 47 self.auth = authorizer 48 self.utilities = utilities 49 self.encoding = encoding 50 51 # See if this repository is even viewable, authz-wise. 52 if not vclib.check_root_access(self): 53 raise vclib.ReposNotFound(name) 54 55 def open(self): 56 # See if a universal read access determination can be made. 57 if self.auth and self.auth.check_universal_access(self.name) == 1: 58 self.auth = None 59 60 def rootname(self): 61 return self.name 62 63 def rootpath(self): 64 return self.rootpath 65 66 def roottype(self): 67 return vclib.CVS 68 69 def authorizer(self): 70 return self.auth 71 72 def itemtype(self, path_parts, rev): 73 basepath = self._getpath(path_parts) 74 kind = None 75 if os.path.isdir(basepath): 76 kind = vclib.DIR 77 elif os.path.isfile(basepath + ',v'): 78 kind = vclib.FILE 79 else: 80 atticpath = self._getpath(self._atticpath(path_parts)) 81 if os.path.isfile(atticpath + ',v'): 82 kind = vclib.FILE 83 if not kind: 84 raise vclib.ItemNotFound(path_parts) 85 if not vclib.check_path_access(self, path_parts, kind, rev): 86 raise vclib.ItemNotFound(path_parts) 87 return kind 88 89 def itemprops(self, path_parts, rev): 90 self.itemtype(path_parts, rev) # does auth-check 91 return {} # CVS doesn't support properties 92 93 def listdir(self, path_parts, rev, options): 94 if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check 95 raise vclib.Error("Path '%s' is not a directory." 96 % (_path_join(path_parts))) 97 98 # Only RCS files (*,v) and subdirs are returned. 99 data = [ ] 100 full_name = self._getpath(path_parts) 101 for file in os.listdir(full_name): 102 name = None 103 kind, errors = _check_path(os.path.join(full_name, file)) 104 if kind == vclib.FILE: 105 if file[-2:] == ',v': 106 name = file[:-2] 107 elif kind == vclib.DIR: 108 if file != 'Attic' and file != 'CVS': # CVS directory is for fileattr 109 name = file 110 else: 111 name = file 112 if not name: 113 continue 114 if vclib.check_path_access(self, path_parts + [name], kind, rev): 115 data.append(CVSDirEntry(name, kind, errors, 0)) 116 117 full_name = os.path.join(full_name, 'Attic') 118 if os.path.isdir(full_name): 119 for file in os.listdir(full_name): 120 name = None 121 kind, errors = _check_path(os.path.join(full_name, file)) 122 if kind == vclib.FILE: 123 if file[-2:] == ',v': 124 name = file[:-2] 125 elif kind != vclib.DIR: 126 name = file 127 if not name: 128 continue 129 if vclib.check_path_access(self, path_parts + [name], kind, rev): 130 data.append(CVSDirEntry(name, kind, errors, 1)) 131 132 return data 133 134 def _getpath(self, path_parts): 135 return os.path.join(*((self.rootpath,) + tuple(path_parts))) 136 137 def _atticpath(self, path_parts): 138 return path_parts[:-1] + ['Attic'] + path_parts[-1:] 139 140 def rcsfile(self, path_parts, root=0, v=1): 141 "Return path to RCS file" 142 143 ret_parts = path_parts 144 ret_file = self._getpath(ret_parts) 145 if not os.path.isfile(ret_file + ',v'): 146 ret_parts = self._atticpath(path_parts) 147 ret_file = self._getpath(ret_parts) 148 if not os.path.isfile(ret_file + ',v'): 149 raise vclib.ItemNotFound(path_parts) 150 if root: 151 ret = ret_file 152 else: 153 ret = _path_join(ret_parts) 154 if v: 155 ret = ret + ",v" 156 return ret 157 158 def isexecutable(self, path_parts, rev): 159 if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check 160 raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts))) 161 rcsfile = self.rcsfile(path_parts, 1) 162 return os.access(rcsfile, os.X_OK) 163 164 def filesize(self, path_parts, rev): 165 if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check 166 raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts))) 167 return -1 168 169 170class BinCVSRepository(BaseCVSRepository): 171 def _get_tip_revision(self, rcs_file, rev=None): 172 """Get the (basically) youngest revision (filtered by REV).""" 173 args = rcs_file, 174 fp = self.rcs_popen('rlog', args, True, False) 175 filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp) 176 revs = [] 177 while not eof: 178 revision, eof = _parse_log_entry(fp) 179 if revision: 180 revs.append(revision) 181 revs = _file_log(revs, tags, lockinfo, default_branch, rev) 182 if revs: 183 return revs[-1] 184 return None 185 186 def openfile(self, path_parts, rev, options): 187 """see vclib.Repository.openfile docstring 188 189 Option values recognized by this implementation: 190 191 cvs_oldkeywords 192 boolean. true to use the original keyword substitution values. 193 """ 194 if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check 195 raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts))) 196 if not rev or rev == 'HEAD' or rev == 'MAIN': 197 rev_flag = '-p' 198 else: 199 rev_flag = '-p' + rev 200 if options.get('cvs_oldkeywords', 0): 201 kv_flag = '-ko' 202 else: 203 kv_flag = '-kkv' 204 full_name = self.rcsfile(path_parts, root=1, v=0) 205 used_rlog = 0 206 tip_rev = None # used only if we have to fallback to using rlog 207 fp = self.rcs_popen('co', (kv_flag, rev_flag, full_name)) 208 try: 209 filename, revision = _parse_co_header(fp, self.encoding) 210 except COMissingRevision: 211 # We got a "revision X.Y.Z absent" error from co. This could be 212 # because we were asked to find a tip of a branch, which co 213 # doesn't seem to handle. So we do rlog-gy stuff to figure out 214 # which revision the tip of the branch currently maps to. 215 ### TODO: Only do this when 'rev' is a branch symbol name? 216 if not used_rlog: 217 tip_rev = self._get_tip_revision(full_name + ',v', rev) 218 used_rlog = 1 219 if not tip_rev: 220 raise vclib.Error("Unable to find valid revision") 221 fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name)) 222 filename, revision = _parse_co_header(fp, self.encodig) 223 224 if filename is None: 225 # CVSNT's co exits without any output if a dead revision is requested. 226 # Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190 227 # As a workaround, we invoke rlog to find the first non-dead revision 228 # that precedes it and check out that revision instead. Of course, 229 # if we've already invoked rlog above, we just reuse its output. 230 if not used_rlog: 231 tip_rev = self._get_tip_revision(full_name + ',v', rev) 232 used_rlog = 1 233 if not (tip_rev and tip_rev.undead): 234 raise vclib.Error( 235 'Could not find non-dead revision preceding "%s"' % rev) 236 fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string, full_name)) 237 filename, revision = _parse_co_header(fp, self.encoding) 238 239 if filename is None: 240 raise vclib.Error('Missing output from co (filename = "%s")' % full_name) 241 242 if not _paths_eq(filename, full_name): 243 raise vclib.Error( 244 'The filename from co ("%s") did not match (expected "%s")' 245 % (filename, full_name)) 246 247 return fp, revision 248 249 def dirlogs(self, path_parts, rev, entries, options): 250 """see vclib.Repository.dirlogs docstring 251 252 rev can be a tag name or None. if set only information from revisions 253 matching the tag will be retrieved 254 255 Option values recognized by this implementation: 256 257 cvs_subdirs 258 boolean. true to fetch logs of the most recently modified file in each 259 subdirectory 260 261 Option values returned by this implementation: 262 263 cvs_tags, cvs_branches 264 lists of tag and branch names encountered in the directory 265 """ 266 if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check 267 raise vclib.Error("Path '%s' is not a directory." 268 % (_path_join(path_parts))) 269 270 subdirs = options.get('cvs_subdirs', 0) 271 entries_to_fetch = [] 272 for entry in entries: 273 if vclib.check_path_access(self, path_parts + [entry.name], None, rev): 274 entries_to_fetch.append(entry) 275 alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs) 276 branches = options['cvs_branches'] = [] 277 tags = options['cvs_tags'] = [] 278 for name, rev in alltags.items(): 279 if Tag(None, rev).is_branch: 280 branches.append(name) 281 else: 282 tags.append(name) 283 284 def itemlog(self, path_parts, rev, sortby, first, limit, options): 285 """see vclib.Repository.itemlog docstring 286 287 rev parameter can be a revision number, a branch number, a tag name, 288 or None. If None, will return information about all revisions, otherwise, 289 will only return information about the specified revision or branch. 290 291 Option values recognized by this implementation: 292 293 cvs_pass_rev 294 boolean, default false. set to true to pass rev parameter as -r 295 argument to rlog, this is more efficient but causes less 296 information to be returned 297 298 Option values returned by this implementation: 299 300 cvs_tags 301 dictionary of Tag objects for all tags encountered 302 """ 303 304 if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check 305 raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts))) 306 307 # Invoke rlog 308 rcsfile = self.rcsfile(path_parts, 1) 309 if rev and options.get('cvs_pass_rev', 0): 310 args = '-r' + rev, rcsfile 311 else: 312 args = rcsfile, 313 314 fp = self.rcs_popen('rlog', args, True, False) 315 filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp) 316 317 # Retrieve revision objects 318 revs = [] 319 while not eof: 320 revision, eof = _parse_log_entry(fp) 321 if revision: 322 revs.append(revision) 323 324 filtered_revs = _file_log(revs, tags, lockinfo, default_branch, rev) 325 326 options['cvs_tags'] = tags 327 if sortby == vclib.SORTBY_DATE: 328 filtered_revs.sort(key=functools.cmp_to_key(_logsort_date_cmp)) 329 elif sortby == vclib.SORTBY_REV: 330 filtered_revs.sort(key=functools.cmp_to_key(_logsort_rev_cmp)) 331 332 if len(filtered_revs) < first: 333 return [] 334 if limit: 335 return filtered_revs[first:first+limit] 336 return filtered_revs 337 338 def rcs_popen(self, rcs_cmd, rcs_args, is_text=False, capture_err=True): 339 # as we use this function as "r" mode only, we don't care stdin 340 # to communicate child process. 341 if self.utilities.cvsnt: 342 cmd = self.utilities.cvsnt 343 args = ['rcsfile', rcs_cmd] 344 args.extend(list(rcs_args)) 345 else: 346 cmd = os.path.join(self.utilities.rcs_dir, rcs_cmd) 347 args = rcs_args 348 stderr = subprocess.STDOUT if capture_err else subprocess.DEVNULL 349 if is_text: 350 proc = subprocess.Popen([cmd] + list(args), bufsize = -1, 351 stdout=subprocess.PIPE, 352 stderr=stderr, 353 encoding=self.encoding, 354 errors='surrogateescape', 355 close_fds=(sys.platform != "win32")) 356 else: 357 proc = subprocess.Popen([cmd] + list(args), bufsize = -1, 358 stdout=subprocess.PIPE, 359 stderr=stderr, 360 close_fds=(sys.platform != "win32")) 361 return proc.stdout 362 363 def annotate(self, path_parts, rev=None, include_text=False): 364 if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check 365 raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts))) 366 367 from vclib.ccvs import blame 368 source = blame.BlameSource(self.rcsfile(path_parts, 1), rev, 369 include_text, self.encoding) 370 return source, source.revision 371 372 def revinfo(self, rev): 373 raise vclib.UnsupportedFeature 374 375 def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}): 376 """see vclib.Repository.rawdiff docstring 377 378 Option values recognized by this implementation: 379 380 ignore_keyword_subst - boolean, ignore keyword substitution 381 """ 382 if self.itemtype(path_parts1, rev1) != vclib.FILE: # does auth-check 383 raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts1))) 384 if self.itemtype(path_parts2, rev2) != vclib.FILE: # does auth-check 385 raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts2))) 386 387 args = vclib._diff_args(type, options) 388 if options.get('ignore_keyword_subst', 0): 389 args.append('-kk') 390 391 rcsfile = self.rcsfile(path_parts1, 1) 392 if path_parts1 != path_parts2: 393 raise NotImplementedError("cannot diff across paths in cvs") 394 args.extend(['-r' + rev1, '-r' + rev2, rcsfile]) 395 396 fp = self.rcs_popen('rcsdiff', args, True) 397 398 # Eat up the non-GNU-diff-y headers. 399 while 1: 400 line = fp.readline() 401 if not line or line[0:5] == 'diff ': 402 break 403 return fp 404 405 406class CVSDirEntry(vclib.DirEntry): 407 def __init__(self, name, kind, errors, in_attic, absent=0): 408 vclib.DirEntry.__init__(self, name, kind, errors) 409 self.in_attic = in_attic 410 self.absent = absent # meaning, no revisions found on requested tag 411 412class Revision(vclib.Revision): 413 def __init__(self, revstr, date=None, author=None, dead=None, 414 changed=None, log=None): 415 vclib.Revision.__init__(self, _revision_tuple(revstr), revstr, 416 date, author, changed, log, None, None) 417 self.dead = dead 418 419class Tag: 420 def __init__(self, name, revstr): 421 self.name = name 422 self.number = _tag_tuple(revstr) 423 self.is_branch = len(self.number) % 2 == 1 or not self.number 424 425 426# ====================================================================== 427# Functions for dealing with Revision and Tag objects 428 429def _logsort_date_cmp(rev1, rev2): 430 # sort on date; secondary on revision number 431 return -cmp(rev1.date, rev2.date) or -cmp(rev1.number, rev2.number) 432 433def _logsort_rev_cmp(rev1, rev2): 434 # sort highest revision first 435 return -cmp(rev1.number, rev2.number) 436 437def _match_revs_tags(revlist, taglist): 438 """Match up a list of Revision objects with a list of Tag objects 439 440 Sets the following properties on each Revision in revlist: 441 "tags" 442 list of non-branch tags which refer to this revision 443 example: if revision is 1.2.3.4, tags is a list of all 1.2.3.4 tags 444 445 "branches" 446 list of branch tags which refer to this revision's branch 447 example: if revision is 1.2.3.4, branches is a list of all 1.2.3 tags 448 449 "branch_points" 450 list of branch tags which branch off of this revision 451 example: if revision is 1.2, it's a list of tags like 1.2.3 and 1.2.4 452 453 "prev" 454 reference to the previous revision, possibly None 455 example: if revision is 1.2.3.4, prev is 1.2.3.3 456 457 "next" 458 reference to next revision, possibly None 459 example: if revision is 1.2.3.4, next is 1.2.3.5 460 461 "parent" 462 reference to revision this one branches off of, possibly None 463 example: if revision is 1.2.3.4, parent is 1.2 464 465 "undead" 466 If the revision is dead, then this is a reference to the first 467 previous revision which isn't dead, otherwise it's a reference 468 to itself. If all the previous revisions are dead it's None. 469 470 "branch_number" 471 tuple representing branch number or empty tuple if on trunk 472 example: if revision is 1.2.3.4, branch_number is (1, 2, 3) 473 474 Each tag in taglist gets these properties set: 475 "co_rev" 476 reference to revision that would be retrieved if tag were checked out 477 478 "branch_rev" 479 reference to revision branched off of, only set for branch tags 480 example: if tag is 1.2.3, branch_rev points to 1.2 revision 481 482 "aliases" 483 list of tags that have the same number 484 """ 485 486 # map of branch numbers to lists of corresponding branch Tags 487 branch_dict = {} 488 489 # map of revision numbers to lists of non-branch Tags 490 tag_dict = {} 491 492 # map of revision numbers to lists of branch Tags 493 branch_point_dict = {} 494 495 # toss tags into "branch_dict", "tag_dict", and "branch_point_dict" 496 # set "aliases" property and default "co_rev" and "branch_rev" values 497 for tag in taglist: 498 tag.co_rev = None 499 if tag.is_branch: 500 tag.branch_rev = None 501 _dict_list_add(branch_point_dict, tag.number[:-1], tag) 502 tag.aliases = _dict_list_add(branch_dict, tag.number, tag) 503 else: 504 tag.aliases = _dict_list_add(tag_dict, tag.number, tag) 505 506 # sort the revisions so the loop below can work properly 507 revlist.sort() 508 509 # array of the most recently encountered revision objects indexed by depth 510 history = [] 511 512 # loop through revisions, setting properties and storing state in "history" 513 for rev in revlist: 514 depth = len(rev.number) // 2 - 1 515 516 # set "prev" and "next" properties 517 rev.prev = rev.next = None 518 if depth < len(history): 519 prev = history[depth] 520 if prev and (depth == 0 or rev.number[:-1] == prev.number[:-1]): 521 rev.prev = prev 522 prev.next = rev 523 524 # set "parent" 525 rev.parent = None 526 if depth and depth <= len(history): 527 parent = history[depth-1] 528 if parent and parent.number == rev.number[:-2]: 529 rev.parent = history[depth-1] 530 531 # set "undead" 532 if rev.dead: 533 prev = rev.prev or rev.parent 534 rev.undead = prev and prev.undead 535 else: 536 rev.undead = rev 537 538 # set "tags" and "branch_points" 539 rev.tags = tag_dict.get(rev.number, []) 540 rev.branch_points = branch_point_dict.get(rev.number, []) 541 542 # set "branches" and "branch_number" 543 if rev.prev: 544 rev.branches = rev.prev.branches 545 rev.branch_number = rev.prev.branch_number 546 else: 547 rev.branch_number = depth and rev.number[:-1] or () 548 try: 549 rev.branches = branch_dict[rev.branch_number] 550 except KeyError: 551 rev.branches = [] 552 553 # set "co_rev" and "branch_rev" 554 for tag in rev.tags: 555 tag.co_rev = rev 556 557 for tag in rev.branch_points: 558 tag.co_rev = rev 559 tag.branch_rev = rev 560 561 # This loop only needs to be run for revisions at the heads of branches, 562 # but for the simplicity's sake, it actually runs for every revision on 563 # a branch. The later revisions overwrite values set by the earlier ones. 564 for branch in rev.branches: 565 branch.co_rev = rev 566 567 # end of outer loop, store most recent revision in "history" array 568 while len(history) <= depth: 569 history.append(None) 570 history[depth] = rev 571 572def _add_tag(tag_name, revision): 573 """Create a new tag object and associate it with a revision""" 574 if revision: 575 tag = Tag(tag_name, revision.string) 576 tag.aliases = revision.tags 577 revision.tags.append(tag) 578 else: 579 tag = Tag(tag_name, None) 580 tag.aliases = [] 581 tag.co_rev = revision 582 tag.is_branch = 0 583 return tag 584 585def _remove_tag(tag): 586 """Remove a tag's associations""" 587 tag.aliases.remove(tag) 588 if tag.is_branch and tag.branch_rev: 589 tag.branch_rev.branch_points.remove(tag) 590 591def _revision_tuple(revision_string): 592 """convert a revision number into a tuple of integers""" 593 t = tuple(map(int, revision_string.split('.'))) 594 if len(t) % 2 == 0: 595 return t 596 raise ValueError 597 598def _tag_tuple(revision_string): 599 """convert a revision number or branch number into a tuple of integers""" 600 if revision_string: 601 t = [int(x) for x in revision_string.split('.')] 602 l = len(t) 603 if l == 1: 604 return () 605 if l > 2 and t[-2] == 0 and l % 2 == 0: 606 del t[-2] 607 return tuple(t) 608 return () 609 610def _dict_list_add(dict, idx, elem): 611 try: 612 list = dict[idx] 613 except KeyError: 614 list = dict[idx] = [elem] 615 else: 616 list.append(elem) 617 return list 618 619 620# ====================================================================== 621# Functions for parsing output from RCS utilities 622 623 624class COMalformedOutput(vclib.Error): 625 pass 626class COMissingRevision(vclib.Error): 627 pass 628 629### suck up other warnings in _re_co_warning? 630_re_co_filename = re.compile(br'^(.*),v\s+-->\s+(?:(?:standard output)|(?:stdout))\s*\n?$') 631_re_co_warning = re.compile(br'^.*co: .*,v: warning: Unknown phrases like .*\n$') 632_re_co_missing_rev = re.compile(br'^.*co: .*,v: revision.*absent\n$') 633_re_co_side_branches = re.compile(br'^.*co: .*,v: no side branches present for [\d\.]+\n$') 634_re_co_revision = re.compile(br'^revision\s+([\d\.]+)\s*\n$') 635 636def _parse_co_header(fp, encoding='utf-8'): 637 """Parse RCS co header. 638 639 fp is a file (pipe) opened for reading the co standard error stream. 640 641 Returns: (filename, revision) or (None, None) if output is empty 642 """ 643 644 # Python 3: in this context, fp is raw mode. 645 646 # header from co: 647 # 648 #/home/cvsroot/mod_dav/dav_shared_stub.c,v --> standard output 649 #revision 1.1 650 # 651 # Sometimes, the following line might occur at line 2: 652 #co: INSTALL,v: warning: Unknown phrases like `permissions ...;' are present. 653 654 # parse the output header 655 filename = None 656 657 # look for a filename in the first line (if there is a first line). 658 line = fp.readline() 659 if not line: 660 return None, None 661 match = _re_co_filename.match(line) 662 if not match: 663 raise COMalformedOutput("Unable to find filename in co output stream") 664 filename = match.group(1) 665 666 # look through subsequent lines for a revision. we might encounter 667 # some ignorable or problematic lines along the way. 668 while 1: 669 line = fp.readline() 670 if not line: 671 break 672 # look for a revision. 673 match = _re_co_revision.match(line) 674 if match: 675 return enc_decode(filename, encoding), enc_decode(match.group(1), encoding) 676 elif _re_co_missing_rev.match(line) or _re_co_side_branches.match(line): 677 raise COMissingRevision("Got missing revision error from co output stream") 678 elif _re_co_warning.match(line): 679 pass 680 else: 681 break 682 683 raise COMalformedOutput("Unable to find revision in co output stream") 684 685# if your rlog doesn't use 77 '=' characters, then this must change 686LOG_END_MARKER = '=' * 77 + '\n' 687ENTRY_END_MARKER = '-' * 28 + '\n' 688 689_EOF_FILE = 'end of file entries' # no more entries for this RCS file 690_EOF_LOG = 'end of log' # hit the true EOF on the pipe 691_EOF_ERROR = 'error message found' # rlog issued an error 692 693# rlog error messages look like 694# 695# rlog: filename/goes/here,v: error message 696# rlog: filename/goes/here,v:123: error message 697# 698# so we should be able to match them with a regex like 699# 700# ^rlog\: (.*)(?:\:\d+)?\: (.*)$ 701# 702# But for some reason the windows version of rlog omits the "rlog: " prefix 703# for the first error message when the standard error stream has been 704# redirected to a file or pipe. (the prefix is present in subsequent errors 705# and when rlog is run from the console). So the expression below is more 706# complicated 707_re_log_error = re.compile(r'^(?:rlog\: )*(.*,v)(?:\:\d+)?\: (.*)$') 708 709# CVSNT error messages look like: 710# cvs rcsfile: `C:/path/to/file,v' does not appear to be a valid rcs file 711# cvs [rcsfile aborted]: C:/path/to/file,v: No such file or directory 712# cvs [rcsfile aborted]: cannot open C:/path/to/file,v: Permission denied 713_re_cvsnt_error = re.compile(r'^(?:cvs rcsfile\: |cvs \[rcsfile aborted\]: )' 714 r'(?:\`(.*,v)\' |cannot open (.*,v)\: |(.*,v)\: |)' 715 r'(.*)$') 716 717def _parse_log_header(fp): 718 """Parse and RCS/CVS log header. 719 720 fp is a file (pipe) opened for reading the log information. 721 722 On entry, fp should point to the start of a log entry. 723 On exit, fp will have consumed the separator line between the header and 724 the first revision log. 725 726 If there is no revision information (e.g. the "-h" switch was passed to 727 rlog), then fp will consumed the file separator line on exit. 728 729 Returns: filename, default branch, tag dictionary, lock dictionary, 730 rlog error message, and eof flag 731 """ 732 733 filename = head = branch = msg = "" 734 taginfo = { } # tag name => number 735 lockinfo = { } # revision => locker 736 state = 0 # 0 = base, 1 = parsing symbols, 2 = parsing locks 737 eof = None 738 739 while 1: 740 line = fp.readline() 741 if not line: 742 # the true end-of-file 743 eof = _EOF_LOG 744 break 745 746 if state == 1: 747 if line[0] == '\t': 748 [ tag, rev ] = [x.strip() for x in line.split(':')] 749 taginfo[tag] = rev 750 else: 751 # oops. this line isn't tag info. stop parsing tags. 752 state = 0 753 754 if state == 2: 755 if line[0] == '\t': 756 [ locker, rev ] = [x.strip() for x in line.split(':')] 757 lockinfo[rev] = locker 758 else: 759 # oops. this line isn't lock info. stop parsing tags. 760 state = 0 761 762 if state == 0: 763 if line[:9] == 'RCS file:': 764 filename = line[10:-1] 765 elif line[:5] == 'head:': 766 head = line[6:-1] 767 elif line[:7] == 'branch:': 768 branch = line[8:-1] 769 elif line[:6] == 'locks:': 770 # start parsing the lock information 771 state = 2 772 elif line[:14] == 'symbolic names': 773 # start parsing the tag information 774 state = 1 775 elif line == ENTRY_END_MARKER: 776 # end of the headers 777 break 778 elif line == LOG_END_MARKER: 779 # end of this file's log information 780 eof = _EOF_FILE 781 break 782 else: 783 error = _re_cvsnt_error.match(line) 784 if error: 785 p1, p2, p3, msg = error.groups() 786 filename = p1 or p2 or p3 787 if not filename: 788 raise vclib.Error("Could not get filename from CVSNT error:\n%s" 789 % line) 790 eof = _EOF_ERROR 791 break 792 793 error = _re_log_error.match(line) 794 if error: 795 filename, msg = error.groups() 796 if msg[:30] == 'warning: Unknown phrases like ': 797 # don't worry about this warning. it can happen with some RCS 798 # files that have unknown fields in them (e.g. "permissions 644;" 799 continue 800 eof = _EOF_ERROR 801 break 802 803 return filename, branch, taginfo, lockinfo, msg, eof 804 805_re_log_info = re.compile(r'^date:\s+([^;]+);' 806 r'\s+author:\s+([^;]+);' 807 r'\s+state:\s+([^;]+);' 808 r'(\s+lines:\s+([0-9\s+-]+);?)?' 809 r'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$') 810### _re_rev should be updated to extract the "locked" flag 811_re_rev = re.compile(r'^revision\s+([0-9.]+).*') 812def _parse_log_entry(fp): 813 """Parse a single log entry. 814 815 On entry, fp should point to the first line of the entry (the "revision" 816 line). 817 On exit, fp will have consumed the log separator line (dashes) or the 818 end-of-file marker (equals). 819 820 Returns: Revision object and eof flag (see _EOF_*) 821 """ 822 rev = None 823 line = fp.readline() 824 if not line: 825 return None, _EOF_LOG 826 if line == LOG_END_MARKER: 827 # Needed because some versions of RCS precede LOG_END_MARKER 828 # with ENTRY_END_MARKER 829 return None, _EOF_FILE 830 if line[:8] == 'revision': 831 match = _re_rev.match(line) 832 if not match: 833 return None, _EOF_LOG 834 rev = match.group(1) 835 836 line = fp.readline() 837 if not line: 838 return None, _EOF_LOG 839 match = _re_log_info.match(line) 840 841 eof = None 842 log = '' 843 while 1: 844 line = fp.readline() 845 if not line: 846 # true end-of-file 847 eof = _EOF_LOG 848 break 849 if line[:9] == 'branches:': 850 continue 851 if line == ENTRY_END_MARKER: 852 break 853 if line == LOG_END_MARKER: 854 # end of this file's log information 855 eof = _EOF_FILE 856 break 857 858 log = log + line 859 860 if not rev or not match: 861 # there was a parsing error 862 return None, eof 863 864 # parse out a time tuple for the local time 865 tm = vclib.ccvs.cvs_strptime(match.group(1)) 866 867 # rlog seems to assume that two-digit years are 1900-based (so, "04" 868 # comes out as "1904", not "2004"). 869 EPOCH = 1970 870 if tm[0] < EPOCH: 871 tm = list(tm) 872 if (tm[0] - 1900) < 70: 873 tm[0] = tm[0] + 100 874 if tm[0] < EPOCH: 875 raise ValueError('invalid year') 876 date = calendar.timegm(tm) 877 878 return Revision(rev, date, 879 # author, state, lines changed 880 match.group(2), match.group(3) == "dead", match.group(5), 881 log), eof 882 883def _skip_file(fp): 884 "Skip the rest of a file's log information." 885 while 1: 886 line = fp.readline() 887 if not line: 888 break 889 if line == LOG_END_MARKER: 890 break 891 892def _paths_eq(path1, path2): 893 "See if two path strings are the same" 894 # This function is neccessary because CVSNT (since version 2.0.29) 895 # converts paths passed as arguments to use upper case drive 896 # letter and forward slashes 897 return os.path.normcase(path1) == os.path.normcase(path2) 898 899 900# ====================================================================== 901# Functions for interpreting and manipulating log information 902 903def _file_log(revs, taginfo, lockinfo, cur_branch, filter): 904 """Augment list of Revisions and a dictionary of Tags""" 905 906 # Add artificial ViewVC tag MAIN. If the file has a default branch, then 907 # MAIN acts like a branch tag pointing to that branch. Otherwise MAIN acts 908 # like a branch tag that points to the trunk. (Note: A default branch is 909 # just a branch number specified in an RCS file that tells CVS and RCS 910 # what branch to use for checkout and update operations by default, when 911 # there's no revision argument or sticky branch to override it. Default 912 # branches get set by "cvs import" to point to newly created vendor 913 # branches. Sometimes they are also set manually with "cvs admin -b") 914 taginfo['MAIN'] = cur_branch 915 916 # Create tag objects 917 for name, num in taginfo.items(): 918 taginfo[name] = Tag(name, num) 919 tags = list(taginfo.values()) 920 921 # Set view_tag to a Tag object in order to filter results. We can filter by 922 # revision number or branch number 923 if filter: 924 try: 925 view_tag = Tag(None, filter) 926 except ValueError: 927 view_tag = None 928 else: 929 tags.append(view_tag) 930 931 # Match up tags and revisions 932 _match_revs_tags(revs, tags) 933 934 # Match up lockinfo and revision 935 for rev in revs: 936 rev.lockinfo = lockinfo.get(rev.string) 937 938 # Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing 939 # at the latest revision on the MAIN branch. The HEAD revision doesn't have 940 # anything to do with the "head" revision number specified in the RCS file 941 # and in rlog output. HEAD refers to the revision that the CVS and RCS co 942 # commands will check out by default, whereas the "head" field just refers 943 # to the highest revision on the trunk. 944 taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev) 945 946 # Determine what revisions to return 947 if filter: 948 # If view_tag isn't set, it means filter is not a valid revision or 949 # branch number. Check taginfo to see if filter is set to a valid tag 950 # name. If so, filter by that tag, otherwise raise an error. 951 if not view_tag: 952 try: 953 view_tag = taginfo[filter] 954 except KeyError: 955 raise vclib.Error('Invalid tag or revision number "%s"' % filter) 956 filtered_revs = [ ] 957 958 # only include revisions on the tag branch or it's parent branches 959 if view_tag.is_branch: 960 branch = view_tag.number 961 elif len(view_tag.number) > 2: 962 branch = view_tag.number[:-1] 963 else: 964 branch = () 965 966 # for a normal tag, include all tag revision and all preceding revisions. 967 # for a branch tag, include revisions on branch, branch point revision, 968 # and all preceding revisions 969 for rev in revs: 970 if (rev.number == view_tag.number 971 or rev.branch_number == view_tag.number 972 or (rev.number < view_tag.number 973 and rev.branch_number == branch[:len(rev.branch_number)])): 974 filtered_revs.append(rev) 975 976 # get rid of the view_tag if it was only created for filtering 977 if view_tag.name is None: 978 _remove_tag(view_tag) 979 else: 980 filtered_revs = revs 981 982 return filtered_revs 983 984def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs): 985 alltags = { # all the tags seen in the files of this dir 986 'MAIN' : '', 987 'HEAD' : '1.1' 988 } 989 990 entries_idx = 0 991 entries_len = len(entries) 992 max_args = 100 993 994 while 1: 995 chunk = [] 996 997 while len(chunk) < max_args and entries_idx < entries_len: 998 entry = entries[entries_idx] 999 path = _log_path(entry, repos._getpath(dir_path_parts), get_dirs) 1000 if path: 1001 entry.path = path 1002 entry.idx = entries_idx 1003 chunk.append(entry) 1004 1005 # set properties even if we don't retrieve logs 1006 entry.rev = entry.date = entry.author = None 1007 entry.dead = entry.log = entry.lockinfo = None 1008 1009 entries_idx = entries_idx + 1 1010 1011 if not chunk: 1012 return alltags 1013 1014 args = [] 1015 if not view_tag: 1016 # NOTE: can't pass tag on command line since a tag may contain "-" 1017 # we'll search the output for the appropriate revision 1018 # fetch the latest revision on the default branch 1019 args.append('-r') 1020 args.extend([x.path for x in chunk]) 1021 rlog = repos.rcs_popen('rlog', args, True) 1022 1023 # consume each file found in the resulting log 1024 chunk_idx = 0 1025 while chunk_idx < len(chunk): 1026 file = chunk[chunk_idx] 1027 filename, default_branch, taginfo, lockinfo, msg, eof \ 1028 = _parse_log_header(rlog) 1029 1030 if eof == _EOF_LOG: 1031 # the rlog output ended early. this can happen on errors that rlog 1032 # thinks are so serious that it stops parsing the current file and 1033 # refuses to parse any of the files that come after it. one of the 1034 # errors that triggers this obnoxious behavior looks like: 1035 # 1036 # rlog: c:\cvsroot\dir\file,v:8: unknown expand mode u 1037 # rlog aborted 1038 1039 # if current file has errors, restart on the next one 1040 if file.errors: 1041 chunk_idx = chunk_idx + 1 1042 if chunk_idx < len(chunk): 1043 entries_idx = chunk[chunk_idx].idx 1044 break 1045 1046 # otherwise just error out 1047 raise vclib.Error('Rlog output ended early. Expected RCS file "%s"' 1048 % file.path) 1049 1050 # if rlog filename doesn't match current file and we already have an 1051 # error message about this file, move on to the next file 1052 while not (file and _paths_eq(file.path, filename)): 1053 if file and file.errors: 1054 chunk_idx = chunk_idx + 1 1055 file = chunk_idx < len(chunk) and chunk[chunk_idx] or None 1056 continue 1057 1058 raise vclib.Error('Error parsing rlog output. Expected RCS file %s' 1059 ', found %s' % (file and file.path, filename)) 1060 1061 # if we get an rlog error message, restart loop without advancing 1062 # chunk_idx cause there might be more output about the same file 1063 if eof == _EOF_ERROR: 1064 file.errors.append("rlog error: %s" % msg) 1065 continue 1066 1067 tag = None 1068 if view_tag == 'MAIN' or view_tag == 'HEAD': 1069 tag = Tag(None, default_branch) 1070 elif view_tag in taginfo: 1071 tag = Tag(None, taginfo[view_tag]) 1072 elif view_tag and (eof != _EOF_FILE): 1073 # the tag wasn't found, so skip this file (unless we already 1074 # know there's nothing left of it to read) 1075 _skip_file(rlog) 1076 eof = _EOF_FILE 1077 1078 # we don't care about the specific values -- just the keys and whether 1079 # the values point to branches or revisions. this the fastest way to 1080 # merge the set of keys and keep values that allow us to make the 1081 # distinction between branch tags and normal tags 1082 alltags.update(taginfo) 1083 1084 # read all of the log entries until we find the revision we want 1085 wanted_entry = None 1086 while not eof: 1087 1088 # fetch one of the log entries 1089 entry, eof = _parse_log_entry(rlog) 1090 1091 if not entry: 1092 # parsing error 1093 break 1094 1095 # A perfect match is a revision on the branch being viewed or 1096 # a revision having the tag being viewed or any revision 1097 # when nothing is being viewed. When there's a perfect match 1098 # we set the wanted_entry value and break out of the loop. 1099 # An imperfect match is a revision at the branch point of a 1100 # branch being viewed. When there's an imperfect match we 1101 # also set the wanted_entry value but keep looping in case 1102 # something better comes along. 1103 perfect = not tag or entry.number == tag.number or \ 1104 (len(entry.number) == 2 and not tag.number) or \ 1105 entry.number[:-1] == tag.number 1106 if perfect or entry.number == tag.number[:-1]: 1107 wanted_entry = entry 1108 if perfect: 1109 break 1110 1111 if wanted_entry: 1112 file.rev = wanted_entry.string 1113 file.date = wanted_entry.date 1114 file.author = wanted_entry.author 1115 file.dead = file.kind == vclib.FILE and wanted_entry.dead 1116 file.absent = 0 1117 file.log = wanted_entry.log 1118 file.lockinfo = lockinfo.get(file.rev) 1119 # suppress rlog errors if we find a usable revision in the end 1120 del file.errors[:] 1121 elif file.kind == vclib.FILE: 1122 file.dead = 0 1123 #file.errors.append("No revisions exist on %s" % (view_tag or "MAIN")) 1124 file.absent = 1 1125 1126 # done with this file now, skip the rest of this file's revisions 1127 if not eof: 1128 _skip_file(rlog) 1129 1130 # end of while loop, advance index 1131 chunk_idx = chunk_idx + 1 1132 1133 rlog.close() 1134 1135def _log_path(entry, dirpath, getdirs): 1136 path = name = None 1137 if not entry.errors: 1138 if entry.kind == vclib.FILE: 1139 path = entry.in_attic and 'Attic' or '' 1140 name = entry.name 1141 elif entry.kind == vclib.DIR and getdirs: 1142 entry.newest_file = _newest_file(os.path.join(dirpath, entry.name)) 1143 if entry.newest_file: 1144 path = entry.name 1145 name = entry.newest_file 1146 1147 if name: 1148 return os.path.join(dirpath, path, name + ',v') 1149 return None 1150 1151 1152# ====================================================================== 1153# Functions for dealing with the filesystem 1154 1155if sys.platform == "win32": 1156 def _check_path(path): 1157 kind = None 1158 errors = [] 1159 1160 if os.path.isfile(path): 1161 kind = vclib.FILE 1162 elif os.path.isdir(path): 1163 kind = vclib.DIR 1164 else: 1165 errors.append("error: path is not a file or directory") 1166 1167 if not os.access(path, os.R_OK): 1168 errors.append("error: path is not accessible") 1169 1170 return kind, errors 1171 1172else: 1173 _uid = os.getuid() 1174 _gid = os.getgid() 1175 1176 def _check_path(pathname): 1177 try: 1178 info = os.stat(pathname) 1179 except os.error as e: 1180 return None, ["stat error: %s" % e] 1181 1182 kind = None 1183 errors = [] 1184 1185 mode = info[stat.ST_MODE] 1186 isdir = stat.S_ISDIR(mode) 1187 isreg = stat.S_ISREG(mode) 1188 if isreg or isdir: 1189 # 1190 # Quick version of access() where we use existing stat() data. 1191 # 1192 # This might not be perfect -- the OS may return slightly different 1193 # results for some bizarre reason. However, we make a good show of 1194 # "can I read this file/dir?" by checking the various perm bits. 1195 # 1196 # NOTE: if the UID matches, then we must match the user bits -- we 1197 # cannot defer to group or other bits. Similarly, if the GID matches, 1198 # then we must have read access in the group bits. 1199 # 1200 # If the UID or GID don't match, we need to check the 1201 # results of an os.access() call, in case the web server process 1202 # is in the group that owns the directory. 1203 # 1204 if isdir: 1205 mask = stat.S_IROTH | stat.S_IXOTH 1206 else: 1207 mask = stat.S_IROTH 1208 1209 if info[stat.ST_UID] == _uid: 1210 if ((mode >> 6) & mask) != mask: 1211 errors.append("error: path is not accessible to user %i" % _uid) 1212 elif info[stat.ST_GID] == _gid: 1213 if ((mode >> 3) & mask) != mask: 1214 errors.append("error: path is not accessible to group %i" % _gid) 1215 # If the process running the web server is a member of 1216 # the group stat.ST_GID access may be granted. 1217 # so the fall back to os.access is needed to figure this out. 1218 elif (mode & mask) != mask: 1219 if not os.access(pathname, isdir and (os.R_OK | os.X_OK) or os.R_OK): 1220 errors.append("error: path is not accessible") 1221 1222 if isdir: 1223 kind = vclib.DIR 1224 else: 1225 kind = vclib.FILE 1226 1227 else: 1228 errors.append("error: path is not a file or directory") 1229 1230 return kind, errors 1231 1232def _newest_file(dirpath): 1233 """Find the last modified RCS file in a directory""" 1234 newest_file = None 1235 newest_time = 0 1236 1237 ### FIXME: This sucker is leaking unauthorized paths! ### 1238 1239 for subfile in os.listdir(dirpath): 1240 ### filter CVS locks? stale NFS handles? 1241 if subfile[-2:] != ',v': 1242 continue 1243 path = os.path.join(dirpath, subfile) 1244 info = os.stat(path) 1245 if not stat.S_ISREG(info[stat.ST_MODE]): 1246 continue 1247 if info[stat.ST_MTIME] > newest_time: 1248 kind, verboten = _check_path(path) 1249 if kind == vclib.FILE and not verboten: 1250 newest_file = subfile[:-2] 1251 newest_time = info[stat.ST_MTIME] 1252 1253 return newest_file 1254