1# -*-python-*-
2#
3# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved.
4#
5# By using this file, you agree to the terms and conditions set forth in
6# the LICENSE.html file which can be found at the top level of the ViewVC
7# distribution or at http://viewvc.org/license-1.html.
8#
9# For more information, visit http://viewvc.org/
10#
11# -----------------------------------------------------------------------
12
13"Version Control lib driver for locally accessible cvs-repositories."
14
15import vclib
16import vcauth
17import os
18import os.path
19import sys
20import stat
21import re
22import time
23import calendar
24import subprocess
25import vclib.ccvs
26import functools
27
28# Python 3: workaround for cmp()
29def cmp(a, b):
30  return (a > b) - (a < b)
31
32def enc_decode(s, encoding='utf-8'):
33  if s is None:
34    return None
35  return s.decode(encoding, 'surrogateescape')
36
37def _path_join(path_parts):
38  return '/'.join(path_parts)
39
40class BaseCVSRepository(vclib.Repository):
41  def __init__(self, name, rootpath, authorizer, utilities, encoding):
42    if not os.path.isdir(rootpath):
43      raise vclib.ReposNotFound(name)
44
45    self.name = name
46    self.rootpath = rootpath
47    self.auth = authorizer
48    self.utilities = utilities
49    self.encoding = encoding
50
51    # See if this repository is even viewable, authz-wise.
52    if not vclib.check_root_access(self):
53      raise vclib.ReposNotFound(name)
54
55  def open(self):
56    # See if a universal read access determination can be made.
57    if self.auth and self.auth.check_universal_access(self.name) == 1:
58      self.auth = None
59
60  def rootname(self):
61    return self.name
62
63  def rootpath(self):
64    return self.rootpath
65
66  def roottype(self):
67    return vclib.CVS
68
69  def authorizer(self):
70    return self.auth
71
72  def itemtype(self, path_parts, rev):
73    basepath = self._getpath(path_parts)
74    kind = None
75    if os.path.isdir(basepath):
76      kind = vclib.DIR
77    elif os.path.isfile(basepath + ',v'):
78      kind = vclib.FILE
79    else:
80      atticpath = self._getpath(self._atticpath(path_parts))
81      if os.path.isfile(atticpath + ',v'):
82        kind = vclib.FILE
83    if not kind:
84      raise vclib.ItemNotFound(path_parts)
85    if not vclib.check_path_access(self, path_parts, kind, rev):
86      raise vclib.ItemNotFound(path_parts)
87    return kind
88
89  def itemprops(self, path_parts, rev):
90    self.itemtype(path_parts, rev)  # does auth-check
91    return {}  # CVS doesn't support properties
92
93  def listdir(self, path_parts, rev, options):
94    if self.itemtype(path_parts, rev) != vclib.DIR:  # does auth-check
95      raise vclib.Error("Path '%s' is not a directory."
96                        % (_path_join(path_parts)))
97
98    # Only RCS files (*,v) and subdirs are returned.
99    data = [ ]
100    full_name = self._getpath(path_parts)
101    for file in os.listdir(full_name):
102      name = None
103      kind, errors = _check_path(os.path.join(full_name, file))
104      if kind == vclib.FILE:
105        if file[-2:] == ',v':
106          name = file[:-2]
107      elif kind == vclib.DIR:
108        if file != 'Attic' and file != 'CVS': # CVS directory is for fileattr
109          name = file
110      else:
111        name = file
112      if not name:
113        continue
114      if vclib.check_path_access(self, path_parts + [name], kind, rev):
115        data.append(CVSDirEntry(name, kind, errors, 0))
116
117    full_name = os.path.join(full_name, 'Attic')
118    if os.path.isdir(full_name):
119      for file in os.listdir(full_name):
120        name = None
121        kind, errors = _check_path(os.path.join(full_name, file))
122        if kind == vclib.FILE:
123          if file[-2:] == ',v':
124            name = file[:-2]
125        elif kind != vclib.DIR:
126          name = file
127        if not name:
128          continue
129        if vclib.check_path_access(self, path_parts + [name], kind, rev):
130          data.append(CVSDirEntry(name, kind, errors, 1))
131
132    return data
133
134  def _getpath(self, path_parts):
135    return os.path.join(*((self.rootpath,) + tuple(path_parts)))
136
137  def _atticpath(self, path_parts):
138    return path_parts[:-1] + ['Attic'] + path_parts[-1:]
139
140  def rcsfile(self, path_parts, root=0, v=1):
141    "Return path to RCS file"
142
143    ret_parts = path_parts
144    ret_file = self._getpath(ret_parts)
145    if not os.path.isfile(ret_file + ',v'):
146      ret_parts = self._atticpath(path_parts)
147      ret_file = self._getpath(ret_parts)
148      if not os.path.isfile(ret_file + ',v'):
149        raise vclib.ItemNotFound(path_parts)
150    if root:
151      ret = ret_file
152    else:
153      ret = _path_join(ret_parts)
154    if v:
155      ret = ret + ",v"
156    return ret
157
158  def isexecutable(self, path_parts, rev):
159    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
160      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
161    rcsfile = self.rcsfile(path_parts, 1)
162    return os.access(rcsfile, os.X_OK)
163
164  def filesize(self, path_parts, rev):
165    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
166      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
167    return -1
168
169
170class BinCVSRepository(BaseCVSRepository):
171  def _get_tip_revision(self, rcs_file, rev=None):
172    """Get the (basically) youngest revision (filtered by REV)."""
173    args = rcs_file,
174    fp = self.rcs_popen('rlog', args, True, False)
175    filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp)
176    revs = []
177    while not eof:
178      revision, eof = _parse_log_entry(fp)
179      if revision:
180        revs.append(revision)
181    revs = _file_log(revs, tags, lockinfo, default_branch, rev)
182    if revs:
183      return revs[-1]
184    return None
185
186  def openfile(self, path_parts, rev, options):
187    """see vclib.Repository.openfile docstring
188
189    Option values recognized by this implementation:
190
191      cvs_oldkeywords
192        boolean. true to use the original keyword substitution values.
193    """
194    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
195      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
196    if not rev or rev == 'HEAD' or rev == 'MAIN':
197      rev_flag = '-p'
198    else:
199      rev_flag = '-p' + rev
200    if options.get('cvs_oldkeywords', 0):
201      kv_flag = '-ko'
202    else:
203      kv_flag = '-kkv'
204    full_name = self.rcsfile(path_parts, root=1, v=0)
205    used_rlog = 0
206    tip_rev = None  # used only if we have to fallback to using rlog
207    fp = self.rcs_popen('co', (kv_flag, rev_flag, full_name))
208    try:
209      filename, revision = _parse_co_header(fp, self.encoding)
210    except COMissingRevision:
211      # We got a "revision X.Y.Z absent" error from co.  This could be
212      # because we were asked to find a tip of a branch, which co
213      # doesn't seem to handle.  So we do rlog-gy stuff to figure out
214      # which revision the tip of the branch currently maps to.
215      ### TODO: Only do this when 'rev' is a branch symbol name?
216      if not used_rlog:
217        tip_rev = self._get_tip_revision(full_name + ',v', rev)
218        used_rlog = 1
219      if not tip_rev:
220        raise vclib.Error("Unable to find valid revision")
221      fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name))
222      filename, revision = _parse_co_header(fp, self.encodig)
223
224    if filename is None:
225      # CVSNT's co exits without any output if a dead revision is requested.
226      # Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190
227      # As a workaround, we invoke rlog to find the first non-dead revision
228      # that precedes it and check out that revision instead.  Of course,
229      # if we've already invoked rlog above, we just reuse its output.
230      if not used_rlog:
231        tip_rev = self._get_tip_revision(full_name + ',v', rev)
232        used_rlog = 1
233      if not (tip_rev and tip_rev.undead):
234        raise vclib.Error(
235          'Could not find non-dead revision preceding "%s"' % rev)
236      fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string, full_name))
237      filename, revision = _parse_co_header(fp, self.encoding)
238
239    if filename is None:
240      raise vclib.Error('Missing output from co (filename = "%s")' % full_name)
241
242    if not _paths_eq(filename, full_name):
243      raise vclib.Error(
244        'The filename from co ("%s") did not match (expected "%s")'
245        % (filename, full_name))
246
247    return fp, revision
248
249  def dirlogs(self, path_parts, rev, entries, options):
250    """see vclib.Repository.dirlogs docstring
251
252    rev can be a tag name or None. if set only information from revisions
253    matching the tag will be retrieved
254
255    Option values recognized by this implementation:
256
257      cvs_subdirs
258        boolean. true to fetch logs of the most recently modified file in each
259        subdirectory
260
261    Option values returned by this implementation:
262
263      cvs_tags, cvs_branches
264        lists of tag and branch names encountered in the directory
265    """
266    if self.itemtype(path_parts, rev) != vclib.DIR:  # does auth-check
267      raise vclib.Error("Path '%s' is not a directory."
268                        % (_path_join(path_parts)))
269
270    subdirs = options.get('cvs_subdirs', 0)
271    entries_to_fetch = []
272    for entry in entries:
273      if vclib.check_path_access(self, path_parts + [entry.name], None, rev):
274        entries_to_fetch.append(entry)
275    alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs)
276    branches = options['cvs_branches'] = []
277    tags = options['cvs_tags'] = []
278    for name, rev in alltags.items():
279      if Tag(None, rev).is_branch:
280        branches.append(name)
281      else:
282        tags.append(name)
283
284  def itemlog(self, path_parts, rev, sortby, first, limit, options):
285    """see vclib.Repository.itemlog docstring
286
287    rev parameter can be a revision number, a branch number, a tag name,
288    or None. If None, will return information about all revisions, otherwise,
289    will only return information about the specified revision or branch.
290
291    Option values recognized by this implementation:
292
293      cvs_pass_rev
294        boolean, default false. set to true to pass rev parameter as -r
295        argument to rlog, this is more efficient but causes less
296        information to be returned
297
298    Option values returned by this implementation:
299
300      cvs_tags
301        dictionary of Tag objects for all tags encountered
302    """
303
304    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
305      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
306
307    # Invoke rlog
308    rcsfile = self.rcsfile(path_parts, 1)
309    if rev and options.get('cvs_pass_rev', 0):
310      args = '-r' + rev, rcsfile
311    else:
312      args = rcsfile,
313
314    fp = self.rcs_popen('rlog', args, True, False)
315    filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp)
316
317    # Retrieve revision objects
318    revs = []
319    while not eof:
320      revision, eof = _parse_log_entry(fp)
321      if revision:
322        revs.append(revision)
323
324    filtered_revs = _file_log(revs, tags, lockinfo, default_branch, rev)
325
326    options['cvs_tags'] = tags
327    if sortby == vclib.SORTBY_DATE:
328      filtered_revs.sort(key=functools.cmp_to_key(_logsort_date_cmp))
329    elif sortby == vclib.SORTBY_REV:
330      filtered_revs.sort(key=functools.cmp_to_key(_logsort_rev_cmp))
331
332    if len(filtered_revs) < first:
333      return []
334    if limit:
335      return filtered_revs[first:first+limit]
336    return filtered_revs
337
338  def rcs_popen(self, rcs_cmd, rcs_args, is_text=False, capture_err=True):
339    # as we use this function as "r" mode only, we don't care stdin
340    # to communicate child process.
341    if self.utilities.cvsnt:
342      cmd = self.utilities.cvsnt
343      args = ['rcsfile', rcs_cmd]
344      args.extend(list(rcs_args))
345    else:
346      cmd = os.path.join(self.utilities.rcs_dir, rcs_cmd)
347      args = rcs_args
348    stderr = subprocess.STDOUT if capture_err else subprocess.DEVNULL
349    if is_text:
350      proc = subprocess.Popen([cmd] + list(args), bufsize = -1,
351                              stdout=subprocess.PIPE,
352                              stderr=stderr,
353                              encoding=self.encoding,
354                              errors='surrogateescape',
355                              close_fds=(sys.platform != "win32"))
356    else:
357      proc = subprocess.Popen([cmd] + list(args), bufsize = -1,
358                              stdout=subprocess.PIPE,
359                              stderr=stderr,
360                              close_fds=(sys.platform != "win32"))
361    return proc.stdout
362
363  def annotate(self, path_parts, rev=None, include_text=False):
364    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
365      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
366
367    from vclib.ccvs import blame
368    source = blame.BlameSource(self.rcsfile(path_parts, 1), rev,
369                               include_text, self.encoding)
370    return source, source.revision
371
372  def revinfo(self, rev):
373    raise vclib.UnsupportedFeature
374
375  def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
376    """see vclib.Repository.rawdiff docstring
377
378    Option values recognized by this implementation:
379
380      ignore_keyword_subst - boolean, ignore keyword substitution
381    """
382    if self.itemtype(path_parts1, rev1) != vclib.FILE:  # does auth-check
383      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts1)))
384    if self.itemtype(path_parts2, rev2) != vclib.FILE:  # does auth-check
385      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts2)))
386
387    args = vclib._diff_args(type, options)
388    if options.get('ignore_keyword_subst', 0):
389      args.append('-kk')
390
391    rcsfile = self.rcsfile(path_parts1, 1)
392    if path_parts1 != path_parts2:
393      raise NotImplementedError("cannot diff across paths in cvs")
394    args.extend(['-r' + rev1, '-r' + rev2, rcsfile])
395
396    fp = self.rcs_popen('rcsdiff', args, True)
397
398    # Eat up the non-GNU-diff-y headers.
399    while 1:
400      line = fp.readline()
401      if not line or line[0:5] == 'diff ':
402        break
403    return fp
404
405
406class CVSDirEntry(vclib.DirEntry):
407  def __init__(self, name, kind, errors, in_attic, absent=0):
408    vclib.DirEntry.__init__(self, name, kind, errors)
409    self.in_attic = in_attic
410    self.absent = absent # meaning, no revisions found on requested tag
411
412class Revision(vclib.Revision):
413  def __init__(self, revstr, date=None, author=None, dead=None,
414               changed=None, log=None):
415    vclib.Revision.__init__(self, _revision_tuple(revstr), revstr,
416                            date, author, changed, log, None, None)
417    self.dead = dead
418
419class Tag:
420  def __init__(self, name, revstr):
421    self.name = name
422    self.number = _tag_tuple(revstr)
423    self.is_branch = len(self.number) % 2 == 1 or not self.number
424
425
426# ======================================================================
427# Functions for dealing with Revision and Tag objects
428
429def _logsort_date_cmp(rev1, rev2):
430  # sort on date; secondary on revision number
431  return -cmp(rev1.date, rev2.date) or -cmp(rev1.number, rev2.number)
432
433def _logsort_rev_cmp(rev1, rev2):
434  # sort highest revision first
435  return -cmp(rev1.number, rev2.number)
436
437def _match_revs_tags(revlist, taglist):
438  """Match up a list of Revision objects with a list of Tag objects
439
440  Sets the following properties on each Revision in revlist:
441    "tags"
442      list of non-branch tags which refer to this revision
443      example: if revision is 1.2.3.4, tags is a list of all 1.2.3.4 tags
444
445    "branches"
446      list of branch tags which refer to this revision's branch
447      example: if revision is 1.2.3.4, branches is a list of all 1.2.3 tags
448
449    "branch_points"
450      list of branch tags which branch off of this revision
451      example: if revision is 1.2, it's a list of tags like 1.2.3 and 1.2.4
452
453    "prev"
454      reference to the previous revision, possibly None
455      example: if revision is 1.2.3.4, prev is 1.2.3.3
456
457    "next"
458      reference to next revision, possibly None
459      example: if revision is 1.2.3.4, next is 1.2.3.5
460
461    "parent"
462      reference to revision this one branches off of, possibly None
463      example: if revision is 1.2.3.4, parent is 1.2
464
465    "undead"
466      If the revision is dead, then this is a reference to the first
467      previous revision which isn't dead, otherwise it's a reference
468      to itself. If all the previous revisions are dead it's None.
469
470    "branch_number"
471      tuple representing branch number or empty tuple if on trunk
472      example: if revision is 1.2.3.4, branch_number is (1, 2, 3)
473
474  Each tag in taglist gets these properties set:
475    "co_rev"
476      reference to revision that would be retrieved if tag were checked out
477
478    "branch_rev"
479      reference to revision branched off of, only set for branch tags
480      example: if tag is 1.2.3, branch_rev points to 1.2 revision
481
482    "aliases"
483      list of tags that have the same number
484  """
485
486  # map of branch numbers to lists of corresponding branch Tags
487  branch_dict = {}
488
489  # map of revision numbers to lists of non-branch Tags
490  tag_dict = {}
491
492  # map of revision numbers to lists of branch Tags
493  branch_point_dict = {}
494
495  # toss tags into "branch_dict", "tag_dict", and "branch_point_dict"
496  # set "aliases" property and default "co_rev" and "branch_rev" values
497  for tag in taglist:
498    tag.co_rev = None
499    if tag.is_branch:
500      tag.branch_rev = None
501      _dict_list_add(branch_point_dict, tag.number[:-1], tag)
502      tag.aliases = _dict_list_add(branch_dict, tag.number, tag)
503    else:
504      tag.aliases = _dict_list_add(tag_dict, tag.number, tag)
505
506  # sort the revisions so the loop below can work properly
507  revlist.sort()
508
509  # array of the most recently encountered revision objects indexed by depth
510  history = []
511
512  # loop through revisions, setting properties and storing state in "history"
513  for rev in revlist:
514    depth = len(rev.number) // 2 - 1
515
516    # set "prev" and "next" properties
517    rev.prev = rev.next = None
518    if depth < len(history):
519      prev = history[depth]
520      if prev and (depth == 0 or rev.number[:-1] == prev.number[:-1]):
521        rev.prev = prev
522        prev.next = rev
523
524    # set "parent"
525    rev.parent = None
526    if depth and depth <= len(history):
527      parent = history[depth-1]
528      if parent and parent.number == rev.number[:-2]:
529        rev.parent = history[depth-1]
530
531    # set "undead"
532    if rev.dead:
533      prev = rev.prev or rev.parent
534      rev.undead = prev and prev.undead
535    else:
536      rev.undead = rev
537
538    # set "tags" and "branch_points"
539    rev.tags = tag_dict.get(rev.number, [])
540    rev.branch_points = branch_point_dict.get(rev.number, [])
541
542    # set "branches" and "branch_number"
543    if rev.prev:
544      rev.branches = rev.prev.branches
545      rev.branch_number = rev.prev.branch_number
546    else:
547      rev.branch_number = depth and rev.number[:-1] or ()
548      try:
549        rev.branches = branch_dict[rev.branch_number]
550      except KeyError:
551        rev.branches = []
552
553    # set "co_rev" and "branch_rev"
554    for tag in rev.tags:
555      tag.co_rev = rev
556
557    for tag in rev.branch_points:
558      tag.co_rev = rev
559      tag.branch_rev = rev
560
561    # This loop only needs to be run for revisions at the heads of branches,
562    # but for the simplicity's sake, it actually runs for every revision on
563    # a branch. The later revisions overwrite values set by the earlier ones.
564    for branch in rev.branches:
565      branch.co_rev = rev
566
567    # end of outer loop, store most recent revision in "history" array
568    while len(history) <= depth:
569      history.append(None)
570    history[depth] = rev
571
572def _add_tag(tag_name, revision):
573  """Create a new tag object and associate it with a revision"""
574  if revision:
575    tag = Tag(tag_name, revision.string)
576    tag.aliases = revision.tags
577    revision.tags.append(tag)
578  else:
579    tag = Tag(tag_name, None)
580    tag.aliases = []
581  tag.co_rev = revision
582  tag.is_branch = 0
583  return tag
584
585def _remove_tag(tag):
586  """Remove a tag's associations"""
587  tag.aliases.remove(tag)
588  if tag.is_branch and tag.branch_rev:
589    tag.branch_rev.branch_points.remove(tag)
590
591def _revision_tuple(revision_string):
592  """convert a revision number into a tuple of integers"""
593  t = tuple(map(int, revision_string.split('.')))
594  if len(t) % 2 == 0:
595    return t
596  raise ValueError
597
598def _tag_tuple(revision_string):
599  """convert a revision number or branch number into a tuple of integers"""
600  if revision_string:
601    t = [int(x) for x in revision_string.split('.')]
602    l = len(t)
603    if l == 1:
604      return ()
605    if l > 2 and t[-2] == 0 and l % 2 == 0:
606      del t[-2]
607    return tuple(t)
608  return ()
609
610def _dict_list_add(dict, idx, elem):
611  try:
612    list = dict[idx]
613  except KeyError:
614    list = dict[idx] = [elem]
615  else:
616    list.append(elem)
617  return list
618
619
620# ======================================================================
621# Functions for parsing output from RCS utilities
622
623
624class COMalformedOutput(vclib.Error):
625  pass
626class COMissingRevision(vclib.Error):
627  pass
628
629### suck up other warnings in _re_co_warning?
630_re_co_filename = re.compile(br'^(.*),v\s+-->\s+(?:(?:standard output)|(?:stdout))\s*\n?$')
631_re_co_warning = re.compile(br'^.*co: .*,v: warning: Unknown phrases like .*\n$')
632_re_co_missing_rev = re.compile(br'^.*co: .*,v: revision.*absent\n$')
633_re_co_side_branches = re.compile(br'^.*co: .*,v: no side branches present for [\d\.]+\n$')
634_re_co_revision = re.compile(br'^revision\s+([\d\.]+)\s*\n$')
635
636def _parse_co_header(fp, encoding='utf-8'):
637  """Parse RCS co header.
638
639  fp is a file (pipe) opened for reading the co standard error stream.
640
641  Returns: (filename, revision) or (None, None) if output is empty
642  """
643
644  # Python 3: in this context, fp is raw mode.
645
646  # header from co:
647  #
648  #/home/cvsroot/mod_dav/dav_shared_stub.c,v  -->  standard output
649  #revision 1.1
650  #
651  # Sometimes, the following line might occur at line 2:
652  #co: INSTALL,v: warning: Unknown phrases like `permissions ...;' are present.
653
654  # parse the output header
655  filename = None
656
657  # look for a filename in the first line (if there is a first line).
658  line = fp.readline()
659  if not line:
660    return None, None
661  match = _re_co_filename.match(line)
662  if not match:
663    raise COMalformedOutput("Unable to find filename in co output stream")
664  filename = match.group(1)
665
666  # look through subsequent lines for a revision.  we might encounter
667  # some ignorable or problematic lines along the way.
668  while 1:
669    line = fp.readline()
670    if not line:
671      break
672    # look for a revision.
673    match = _re_co_revision.match(line)
674    if match:
675      return enc_decode(filename, encoding), enc_decode(match.group(1), encoding)
676    elif _re_co_missing_rev.match(line) or _re_co_side_branches.match(line):
677      raise COMissingRevision("Got missing revision error from co output stream")
678    elif _re_co_warning.match(line):
679      pass
680    else:
681      break
682
683  raise COMalformedOutput("Unable to find revision in co output stream")
684
685# if your rlog doesn't use 77 '=' characters, then this must change
686LOG_END_MARKER = '=' * 77 + '\n'
687ENTRY_END_MARKER = '-' * 28 + '\n'
688
689_EOF_FILE = 'end of file entries'       # no more entries for this RCS file
690_EOF_LOG = 'end of log'                 # hit the true EOF on the pipe
691_EOF_ERROR = 'error message found'      # rlog issued an error
692
693# rlog error messages look like
694#
695#   rlog: filename/goes/here,v: error message
696#   rlog: filename/goes/here,v:123: error message
697#
698# so we should be able to match them with a regex like
699#
700#   ^rlog\: (.*)(?:\:\d+)?\: (.*)$
701#
702# But for some reason the windows version of rlog omits the "rlog: " prefix
703# for the first error message when the standard error stream has been
704# redirected to a file or pipe. (the prefix is present in subsequent errors
705# and when rlog is run from the console). So the expression below is more
706# complicated
707_re_log_error = re.compile(r'^(?:rlog\: )*(.*,v)(?:\:\d+)?\: (.*)$')
708
709# CVSNT error messages look like:
710# cvs rcsfile: `C:/path/to/file,v' does not appear to be a valid rcs file
711# cvs [rcsfile aborted]: C:/path/to/file,v: No such file or directory
712# cvs [rcsfile aborted]: cannot open C:/path/to/file,v: Permission denied
713_re_cvsnt_error = re.compile(r'^(?:cvs rcsfile\: |cvs \[rcsfile aborted\]: )'
714                             r'(?:\`(.*,v)\' |cannot open (.*,v)\: |(.*,v)\: |)'
715                             r'(.*)$')
716
717def _parse_log_header(fp):
718  """Parse and RCS/CVS log header.
719
720  fp is a file (pipe) opened for reading the log information.
721
722  On entry, fp should point to the start of a log entry.
723  On exit, fp will have consumed the separator line between the header and
724  the first revision log.
725
726  If there is no revision information (e.g. the "-h" switch was passed to
727  rlog), then fp will consumed the file separator line on exit.
728
729  Returns: filename, default branch, tag dictionary, lock dictionary,
730  rlog error message, and eof flag
731  """
732
733  filename = head = branch = msg = ""
734  taginfo = { }   # tag name => number
735  lockinfo = { }  # revision => locker
736  state = 0       # 0 = base, 1 = parsing symbols, 2 = parsing locks
737  eof = None
738
739  while 1:
740    line = fp.readline()
741    if not line:
742      # the true end-of-file
743      eof = _EOF_LOG
744      break
745
746    if state == 1:
747      if line[0] == '\t':
748        [ tag, rev ] = [x.strip() for x in line.split(':')]
749        taginfo[tag] = rev
750      else:
751        # oops. this line isn't tag info. stop parsing tags.
752        state = 0
753
754    if state == 2:
755      if line[0] == '\t':
756        [ locker, rev ] = [x.strip() for x in line.split(':')]
757        lockinfo[rev] = locker
758      else:
759        # oops. this line isn't lock info. stop parsing tags.
760        state = 0
761
762    if state == 0:
763      if line[:9] == 'RCS file:':
764        filename = line[10:-1]
765      elif line[:5] == 'head:':
766        head = line[6:-1]
767      elif line[:7] == 'branch:':
768        branch = line[8:-1]
769      elif line[:6] == 'locks:':
770        # start parsing the lock information
771        state = 2
772      elif line[:14] == 'symbolic names':
773        # start parsing the tag information
774        state = 1
775      elif line == ENTRY_END_MARKER:
776        # end of the headers
777        break
778      elif line == LOG_END_MARKER:
779        # end of this file's log information
780        eof = _EOF_FILE
781        break
782      else:
783        error = _re_cvsnt_error.match(line)
784        if error:
785          p1, p2, p3, msg = error.groups()
786          filename = p1 or p2 or p3
787          if not filename:
788            raise vclib.Error("Could not get filename from CVSNT error:\n%s"
789                               % line)
790          eof = _EOF_ERROR
791          break
792
793        error = _re_log_error.match(line)
794        if error:
795          filename, msg = error.groups()
796          if msg[:30] == 'warning: Unknown phrases like ':
797            # don't worry about this warning. it can happen with some RCS
798            # files that have unknown fields in them (e.g. "permissions 644;"
799            continue
800          eof = _EOF_ERROR
801          break
802
803  return filename, branch, taginfo, lockinfo, msg, eof
804
805_re_log_info = re.compile(r'^date:\s+([^;]+);'
806                          r'\s+author:\s+([^;]+);'
807                          r'\s+state:\s+([^;]+);'
808                          r'(\s+lines:\s+([0-9\s+-]+);?)?'
809                          r'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$')
810### _re_rev should be updated to extract the "locked" flag
811_re_rev = re.compile(r'^revision\s+([0-9.]+).*')
812def _parse_log_entry(fp):
813  """Parse a single log entry.
814
815  On entry, fp should point to the first line of the entry (the "revision"
816  line).
817  On exit, fp will have consumed the log separator line (dashes) or the
818  end-of-file marker (equals).
819
820  Returns: Revision object and eof flag (see _EOF_*)
821  """
822  rev = None
823  line = fp.readline()
824  if not line:
825    return None, _EOF_LOG
826  if line == LOG_END_MARKER:
827    # Needed because some versions of RCS precede LOG_END_MARKER
828    # with ENTRY_END_MARKER
829    return None, _EOF_FILE
830  if line[:8] == 'revision':
831    match = _re_rev.match(line)
832    if not match:
833      return None, _EOF_LOG
834    rev = match.group(1)
835
836    line = fp.readline()
837    if not line:
838      return None, _EOF_LOG
839    match = _re_log_info.match(line)
840
841  eof = None
842  log = ''
843  while 1:
844    line = fp.readline()
845    if not line:
846      # true end-of-file
847      eof = _EOF_LOG
848      break
849    if line[:9] == 'branches:':
850      continue
851    if line == ENTRY_END_MARKER:
852      break
853    if line == LOG_END_MARKER:
854      # end of this file's log information
855      eof = _EOF_FILE
856      break
857
858    log = log + line
859
860  if not rev or not match:
861    # there was a parsing error
862    return None, eof
863
864  # parse out a time tuple for the local time
865  tm = vclib.ccvs.cvs_strptime(match.group(1))
866
867  # rlog seems to assume that two-digit years are 1900-based (so, "04"
868  # comes out as "1904", not "2004").
869  EPOCH = 1970
870  if tm[0] < EPOCH:
871    tm = list(tm)
872    if (tm[0] - 1900) < 70:
873      tm[0] = tm[0] + 100
874    if tm[0] < EPOCH:
875      raise ValueError('invalid year')
876  date = calendar.timegm(tm)
877
878  return Revision(rev, date,
879                  # author, state, lines changed
880                  match.group(2), match.group(3) == "dead", match.group(5),
881                  log), eof
882
883def _skip_file(fp):
884  "Skip the rest of a file's log information."
885  while 1:
886    line = fp.readline()
887    if not line:
888      break
889    if line == LOG_END_MARKER:
890      break
891
892def _paths_eq(path1, path2):
893  "See if two path strings are the same"
894  # This function is neccessary because CVSNT (since version 2.0.29)
895  # converts paths passed as arguments to use upper case drive
896  # letter and forward slashes
897  return os.path.normcase(path1) == os.path.normcase(path2)
898
899
900# ======================================================================
901# Functions for interpreting and manipulating log information
902
903def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
904  """Augment list of Revisions and a dictionary of Tags"""
905
906  # Add artificial ViewVC tag MAIN. If the file has a default branch, then
907  # MAIN acts like a branch tag pointing to that branch. Otherwise MAIN acts
908  # like a branch tag that points to the trunk. (Note: A default branch is
909  # just a branch number specified in an RCS file that tells CVS and RCS
910  # what branch to use for checkout and update operations by default, when
911  # there's no revision argument or sticky branch to override it. Default
912  # branches get set by "cvs import" to point to newly created vendor
913  # branches. Sometimes they are also set manually with "cvs admin -b")
914  taginfo['MAIN'] = cur_branch
915
916  # Create tag objects
917  for name, num in taginfo.items():
918    taginfo[name] = Tag(name, num)
919  tags = list(taginfo.values())
920
921  # Set view_tag to a Tag object in order to filter results. We can filter by
922  # revision number or branch number
923  if filter:
924    try:
925      view_tag = Tag(None, filter)
926    except ValueError:
927      view_tag = None
928    else:
929      tags.append(view_tag)
930
931  # Match up tags and revisions
932  _match_revs_tags(revs, tags)
933
934  # Match up lockinfo and revision
935  for rev in revs:
936    rev.lockinfo = lockinfo.get(rev.string)
937
938  # Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing
939  # at the latest revision on the MAIN branch. The HEAD revision doesn't have
940  # anything to do with the "head" revision number specified in the RCS file
941  # and in rlog output. HEAD refers to the revision that the CVS and RCS co
942  # commands will check out by default, whereas the "head" field just refers
943  # to the highest revision on the trunk.
944  taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev)
945
946  # Determine what revisions to return
947  if filter:
948    # If view_tag isn't set, it means filter is not a valid revision or
949    # branch number. Check taginfo to see if filter is set to a valid tag
950    # name. If so, filter by that tag, otherwise raise an error.
951    if not view_tag:
952      try:
953        view_tag = taginfo[filter]
954      except KeyError:
955        raise vclib.Error('Invalid tag or revision number "%s"' % filter)
956    filtered_revs = [ ]
957
958    # only include revisions on the tag branch or it's parent branches
959    if view_tag.is_branch:
960      branch = view_tag.number
961    elif len(view_tag.number) > 2:
962      branch = view_tag.number[:-1]
963    else:
964      branch = ()
965
966    # for a normal tag, include all tag revision and all preceding revisions.
967    # for a branch tag, include revisions on branch, branch point revision,
968    # and all preceding revisions
969    for rev in revs:
970      if (rev.number == view_tag.number
971          or rev.branch_number == view_tag.number
972          or (rev.number < view_tag.number
973              and rev.branch_number == branch[:len(rev.branch_number)])):
974        filtered_revs.append(rev)
975
976    # get rid of the view_tag if it was only created for filtering
977    if view_tag.name is None:
978      _remove_tag(view_tag)
979  else:
980    filtered_revs = revs
981
982  return filtered_revs
983
984def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
985  alltags = {           # all the tags seen in the files of this dir
986    'MAIN' : '',
987    'HEAD' : '1.1'
988    }
989
990  entries_idx = 0
991  entries_len = len(entries)
992  max_args = 100
993
994  while 1:
995    chunk = []
996
997    while len(chunk) < max_args and entries_idx < entries_len:
998      entry = entries[entries_idx]
999      path = _log_path(entry, repos._getpath(dir_path_parts), get_dirs)
1000      if path:
1001        entry.path = path
1002        entry.idx = entries_idx
1003        chunk.append(entry)
1004
1005      # set properties even if we don't retrieve logs
1006      entry.rev = entry.date = entry.author = None
1007      entry.dead = entry.log = entry.lockinfo = None
1008
1009      entries_idx = entries_idx + 1
1010
1011    if not chunk:
1012      return alltags
1013
1014    args = []
1015    if not view_tag:
1016      # NOTE: can't pass tag on command line since a tag may contain "-"
1017      #       we'll search the output for the appropriate revision
1018      # fetch the latest revision on the default branch
1019      args.append('-r')
1020    args.extend([x.path for x in chunk])
1021    rlog = repos.rcs_popen('rlog', args, True)
1022
1023    # consume each file found in the resulting log
1024    chunk_idx = 0
1025    while chunk_idx < len(chunk):
1026      file = chunk[chunk_idx]
1027      filename, default_branch, taginfo, lockinfo, msg, eof \
1028        = _parse_log_header(rlog)
1029
1030      if eof == _EOF_LOG:
1031        # the rlog output ended early. this can happen on errors that rlog
1032        # thinks are so serious that it stops parsing the current file and
1033        # refuses to parse any of the files that come after it. one of the
1034        # errors that triggers this obnoxious behavior looks like:
1035        #
1036        # rlog: c:\cvsroot\dir\file,v:8: unknown expand mode u
1037        # rlog aborted
1038
1039        # if current file has errors, restart on the next one
1040        if file.errors:
1041          chunk_idx = chunk_idx + 1
1042          if chunk_idx < len(chunk):
1043            entries_idx = chunk[chunk_idx].idx
1044          break
1045
1046        # otherwise just error out
1047        raise vclib.Error('Rlog output ended early. Expected RCS file "%s"'
1048                          % file.path)
1049
1050      # if rlog filename doesn't match current file and we already have an
1051      # error message about this file, move on to the next file
1052      while not (file and _paths_eq(file.path, filename)):
1053        if file and file.errors:
1054          chunk_idx = chunk_idx + 1
1055          file = chunk_idx < len(chunk) and chunk[chunk_idx] or None
1056          continue
1057
1058        raise vclib.Error('Error parsing rlog output. Expected RCS file %s'
1059                          ', found %s' % (file and file.path, filename))
1060
1061      # if we get an rlog error message, restart loop without advancing
1062      # chunk_idx cause there might be more output about the same file
1063      if eof == _EOF_ERROR:
1064        file.errors.append("rlog error: %s" % msg)
1065        continue
1066
1067      tag = None
1068      if view_tag == 'MAIN' or view_tag == 'HEAD':
1069        tag = Tag(None, default_branch)
1070      elif view_tag in taginfo:
1071        tag = Tag(None, taginfo[view_tag])
1072      elif view_tag and (eof != _EOF_FILE):
1073        # the tag wasn't found, so skip this file (unless we already
1074        # know there's nothing left of it to read)
1075        _skip_file(rlog)
1076        eof = _EOF_FILE
1077
1078      # we don't care about the specific values -- just the keys and whether
1079      # the values point to branches or revisions. this the fastest way to
1080      # merge the set of keys and keep values that allow us to make the
1081      # distinction between branch tags and normal tags
1082      alltags.update(taginfo)
1083
1084      # read all of the log entries until we find the revision we want
1085      wanted_entry = None
1086      while not eof:
1087
1088        # fetch one of the log entries
1089        entry, eof = _parse_log_entry(rlog)
1090
1091        if not entry:
1092          # parsing error
1093          break
1094
1095        # A perfect match is a revision on the branch being viewed or
1096        # a revision having the tag being viewed or any revision
1097        # when nothing is being viewed. When there's a perfect match
1098        # we set the wanted_entry value and break out of the loop.
1099        # An imperfect match is a revision at the branch point of a
1100        # branch being viewed. When there's an imperfect match we
1101        # also set the wanted_entry value but keep looping in case
1102        # something better comes along.
1103        perfect = not tag or entry.number == tag.number or       \
1104                  (len(entry.number) == 2 and not tag.number) or \
1105                  entry.number[:-1] == tag.number
1106        if perfect or entry.number == tag.number[:-1]:
1107          wanted_entry = entry
1108          if perfect:
1109            break
1110
1111      if wanted_entry:
1112        file.rev = wanted_entry.string
1113        file.date = wanted_entry.date
1114        file.author = wanted_entry.author
1115        file.dead = file.kind == vclib.FILE and wanted_entry.dead
1116        file.absent = 0
1117        file.log = wanted_entry.log
1118        file.lockinfo = lockinfo.get(file.rev)
1119        # suppress rlog errors if we find a usable revision in the end
1120        del file.errors[:]
1121      elif file.kind == vclib.FILE:
1122        file.dead = 0
1123        #file.errors.append("No revisions exist on %s" % (view_tag or "MAIN"))
1124        file.absent = 1
1125
1126      # done with this file now, skip the rest of this file's revisions
1127      if not eof:
1128        _skip_file(rlog)
1129
1130      # end of while loop, advance index
1131      chunk_idx = chunk_idx + 1
1132
1133    rlog.close()
1134
1135def _log_path(entry, dirpath, getdirs):
1136  path = name = None
1137  if not entry.errors:
1138    if entry.kind == vclib.FILE:
1139      path = entry.in_attic and 'Attic' or ''
1140      name = entry.name
1141    elif entry.kind == vclib.DIR and getdirs:
1142      entry.newest_file = _newest_file(os.path.join(dirpath, entry.name))
1143      if entry.newest_file:
1144        path = entry.name
1145        name = entry.newest_file
1146
1147  if name:
1148    return os.path.join(dirpath, path, name + ',v')
1149  return None
1150
1151
1152# ======================================================================
1153# Functions for dealing with the filesystem
1154
1155if sys.platform == "win32":
1156  def _check_path(path):
1157    kind = None
1158    errors = []
1159
1160    if os.path.isfile(path):
1161      kind = vclib.FILE
1162    elif os.path.isdir(path):
1163      kind = vclib.DIR
1164    else:
1165      errors.append("error: path is not a file or directory")
1166
1167    if not os.access(path, os.R_OK):
1168      errors.append("error: path is not accessible")
1169
1170    return kind, errors
1171
1172else:
1173  _uid = os.getuid()
1174  _gid = os.getgid()
1175
1176  def _check_path(pathname):
1177    try:
1178      info = os.stat(pathname)
1179    except os.error as e:
1180      return None, ["stat error: %s" % e]
1181
1182    kind = None
1183    errors = []
1184
1185    mode = info[stat.ST_MODE]
1186    isdir = stat.S_ISDIR(mode)
1187    isreg = stat.S_ISREG(mode)
1188    if isreg or isdir:
1189      #
1190      # Quick version of access() where we use existing stat() data.
1191      #
1192      # This might not be perfect -- the OS may return slightly different
1193      # results for some bizarre reason. However, we make a good show of
1194      # "can I read this file/dir?" by checking the various perm bits.
1195      #
1196      # NOTE: if the UID matches, then we must match the user bits -- we
1197      # cannot defer to group or other bits. Similarly, if the GID matches,
1198      # then we must have read access in the group bits.
1199      #
1200      # If the UID or GID don't match, we need to check the
1201      # results of an os.access() call, in case the web server process
1202      # is in the group that owns the directory.
1203      #
1204      if isdir:
1205        mask = stat.S_IROTH | stat.S_IXOTH
1206      else:
1207        mask = stat.S_IROTH
1208
1209      if info[stat.ST_UID] == _uid:
1210        if ((mode >> 6) & mask) != mask:
1211          errors.append("error: path is not accessible to user %i" % _uid)
1212      elif info[stat.ST_GID] == _gid:
1213        if ((mode >> 3) & mask) != mask:
1214          errors.append("error: path is not accessible to group %i" % _gid)
1215      # If the process running the web server is a member of
1216      # the group stat.ST_GID access may be granted.
1217      # so the fall back to os.access is needed to figure this out.
1218      elif (mode & mask) != mask:
1219        if not os.access(pathname, isdir and (os.R_OK | os.X_OK) or os.R_OK):
1220          errors.append("error: path is not accessible")
1221
1222      if isdir:
1223        kind = vclib.DIR
1224      else:
1225        kind = vclib.FILE
1226
1227    else:
1228      errors.append("error: path is not a file or directory")
1229
1230    return kind, errors
1231
1232def _newest_file(dirpath):
1233  """Find the last modified RCS file in a directory"""
1234  newest_file = None
1235  newest_time = 0
1236
1237  ### FIXME:  This sucker is leaking unauthorized paths! ###
1238
1239  for subfile in os.listdir(dirpath):
1240    ### filter CVS locks? stale NFS handles?
1241    if subfile[-2:] != ',v':
1242      continue
1243    path = os.path.join(dirpath, subfile)
1244    info = os.stat(path)
1245    if not stat.S_ISREG(info[stat.ST_MODE]):
1246      continue
1247    if info[stat.ST_MTIME] > newest_time:
1248      kind, verboten = _check_path(path)
1249      if kind == vclib.FILE and not verboten:
1250        newest_file = subfile[:-2]
1251        newest_time = info[stat.ST_MTIME]
1252
1253  return newest_file
1254