1# -*-python-*-
2#
3# Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved.
4#
5# By using this file, you agree to the terms and conditions set forth in
6# the LICENSE.html file which can be found at the top level of the ViewVC
7# distribution or at http://viewvc.org/license-1.html.
8#
9# For more information, visit http://viewvc.org/
10#
11# -----------------------------------------------------------------------
12
13"Version Control lib driver for locally accessible Subversion repositories"
14
15import sys
16import vclib
17import os
18import os.path
19import time
20import tempfile
21import re
22from io import StringIO
23from urllib.parse import quote as _quote
24from svn import fs, repos, core, client, delta
25
26long = int
27
28
29### Verify that we have an acceptable version of Subversion.
30MIN_SUBVERSION_VERSION = (1, 14, 0)
31HAS_SUBVERSION_VERSION = (core.SVN_VER_MAJOR,
32                          core.SVN_VER_MINOR,
33                          core.SVN_VER_PATCH)
34if HAS_SUBVERSION_VERSION < MIN_SUBVERSION_VERSION:
35  found_ver = '.'.join([str(x) for x in HAS_SUBVERSION_VERSION])
36  needs_ver = '.'.join([str(x) for x in MIN_SUBVERSION_VERSION])
37  raise Exception("Subversion version %s is required (%s found)"
38                  % (needs_ver, found_ver))
39
40
41def _allow_all(root, path, pool):
42  """Generic authz_read_func that permits access to all paths"""
43  return 1
44
45def _to_str(bs):
46  """Convert Subversion internal path objects represented in bytes into str"""
47  if bs is None:
48    return bs
49  return bs.decode('utf-8', 'surrogateescape')
50
51def _path_parts(path):
52  """Return a list of PATH's components (using '/' as the delimiter).
53  PATH may be of type str or bytes, and the returned value will carry
54  the same type."""
55  splitchar = isinstance(path, bytes) and b'/' or '/'
56  return [p for p in path.split(splitchar) if p]
57
58def _cleanup_path(path):
59  """Return a cleaned-up Subversion filesystem path.  PATH may be of
60  type str or bytes, and the returned value will carry the same
61  type."""
62  splitchar = isinstance(path, bytes) and b'/' or '/'
63  return splitchar.join(_path_parts(path))
64
65def _fs_path_join(base, relative):
66  return _cleanup_path(base + '/' + relative)
67
68
69# Python 3 workaround for cmp()
70def cmp(a, b):
71  return (a > b) - (a < b)
72
73def _compare_paths(path1, path2):
74  path1_len = len (path1);
75  path2_len = len (path2);
76  min_len = min(path1_len, path2_len)
77  i = 0
78
79  # Are the paths exactly the same?
80  if path1 == path2:
81    return 0
82
83  # Skip past common prefix
84  while (i < min_len) and (path1[i] == path2[i]):
85    i = i + 1
86
87  # Children of paths are greater than their parents, but less than
88  # greater siblings of their parents
89  char1 = '\0'
90  char2 = '\0'
91  if (i < path1_len):
92    char1 = path1[i]
93  if (i < path2_len):
94    char2 = path2[i]
95
96  if (char1 == '/') and (i == path2_len):
97    return 1
98  if (char2 == '/') and (i == path1_len):
99    return -1
100  if (i < path1_len) and (char1 == '/'):
101    return -1
102  if (i < path2_len) and (char2 == '/'):
103    return 1
104
105  # Common prefix was skipped above, next character is compared to
106  # determine order
107  return cmp(char1, char2)
108
109
110def _rev2optrev(rev):
111  assert isinstance(rev, int)
112  rt = core.svn_opt_revision_t()
113  rt.kind = core.svn_opt_revision_number
114  rt.value.number = rev
115  return rt
116
117
118def _rootpath2url(rootpath, path):
119  rootpath = os.path.abspath(rootpath)
120  drive, rootpath = os.path.splitdrive(rootpath)
121  if os.sep != '/':
122    rootpath = rootpath.replace(os.sep, '/')
123  rootpath = _quote(rootpath)
124  path = _quote(path)
125  if drive:
126    url = 'file:///' + drive + rootpath + '/' + path
127  else:
128    url = 'file://' + rootpath + '/' + path
129  return core.svn_path_canonicalize(url)
130
131
132# Return a stringfied copy of a bytestring Subversion property
133# (versioned or unversioned) VALUE if possible; otherwise return the
134# original bytestring.
135def _normalize_property_value(value, encoding_hint=None):
136  try:
137    value = value.decode('utf-8')
138  except UnicodeDecodeError:
139    if encoding_hint:
140      try:
141        value = value.decode(encoding_hint)
142      except UnicodeDecodeError:
143        pass
144  return value
145
146
147# Given raw bytestring Subversion property (versioned or unversioned)
148# NAME and VALUE, return a 2-tuple of the same but readied for Python
149# 3 usage.  If NAME can't be stringfied (that is, converted to a
150# Unicode string), both the returned NAME and VALUE will be None.
151# Otherwise, NAME will be a Unicode string and VALUE will be a Unicode
152# string of it could be stringified or a bytestring if it couldn't.
153def _normalize_property(name, value, encoding_hint=None):
154  try:
155    name = name.decode('utf-8')
156  except UnicodeDecodeError:
157    return None, None
158  value = _normalize_property_value(value, encoding_hint)
159  return name, value
160
161
162# Given a dictionary REVPROPS of revision properties, pull special
163# ones out of them and return a 4-tuple containing the log message,
164# the author, the date (converted from the date string property), and
165# a dictionary of any/all other revprops.
166def _split_revprops(revprops, encoding_hint=None):
167  if not revprops:
168    return None, None, None, {}
169  msg = author = date = None
170  other_props = {}
171  for prop in revprops:
172    pname, pval = _normalize_property(prop, revprops[prop], encoding_hint)
173    if pname == core.SVN_PROP_REVISION_LOG.decode('utf-8'):
174      msg = pval
175    elif pname == core.SVN_PROP_REVISION_AUTHOR.decode('utf-8'):
176      author = pval
177    elif pname == core.SVN_PROP_REVISION_DATE.decode('utf-8'):
178      date = _datestr_to_date(pval)
179    elif pname is not None:
180      other_props[pname] = pval
181  return msg, author, date, other_props
182
183
184def _datestr_to_date(datestr):
185  try:
186    return core.svn_time_from_cstring(datestr) // 1000000
187  except:
188    return None
189
190
191class Revision(vclib.Revision):
192  "Hold state for each revision's log entry."
193  def __init__(self, rev, date, author, msg, size, lockinfo,
194               filename, copy_path, copy_rev):
195    vclib.Revision.__init__(self, rev, str(rev), date, author, None,
196                            msg, size, lockinfo)
197    self.filename = filename
198    self.copy_path = copy_path
199    self.copy_rev = copy_rev
200
201
202class NodeHistory:
203  """An iterable object that returns 2-tuples of (revision, path)
204  locations along a node's change history, ordered from youngest to
205  oldest."""
206
207  def __init__(self, fs_ptr, show_all_logs, limit=0):
208    self.histories = []
209    self.fs_ptr = fs_ptr
210    self.show_all_logs = show_all_logs
211    self.oldest_rev = None
212    self.limit = limit
213
214  def add_history(self, path, revision, pool):
215    # If filtering, only add the path and revision to the histories
216    # list if they were actually changed in this revision (where
217    # change means the path itself was changed, or one of its parents
218    # was copied).  This is useful for omitting bubble-up directory
219    # changes.
220    if not self.oldest_rev:
221      self.oldest_rev = revision
222    else:
223      assert(revision < self.oldest_rev)
224
225    if not self.show_all_logs:
226      rev_root = fs.revision_root(self.fs_ptr, revision)
227      changed_paths = fs.paths_changed(rev_root)
228      paths = list(changed_paths.keys())
229      if path not in paths:
230        # Look for a copied parent
231        test_path = path
232        found = 0
233        while 1:
234          off = test_path.rfind('/')
235          if off < 0:
236            break
237          test_path = test_path[0:off]
238          if test_path in paths:
239            copyfrom_rev, copyfrom_path = fs.copied_from(rev_root, test_path)
240            if copyfrom_rev >= 0 and copyfrom_path:
241              found = 1
242              break
243        if not found:
244          return
245    self.histories.append([revision, _cleanup_path(path)])
246    if self.limit and len(self.histories) == self.limit:
247      raise core.SubversionException("", core.SVN_ERR_CEASE_INVOCATION)
248
249  def __getitem__(self, idx):
250    return self.histories[idx]
251
252def _get_last_history_rev(fsroot, path):
253  history = fs.node_history(fsroot, path)
254  history = fs.history_prev(history, 0)
255  history_path, history_rev = fs.history_location(history)
256  return history_rev
257
258def temp_checkout(svnrepos, path, rev):
259  """Check out file revision to temporary file"""
260  fd, temp = tempfile.mkstemp()
261  fp = os.fdopen(fd, 'wb')
262  try:
263    root = svnrepos._getroot(rev)
264    stream = fs.file_contents(root, path)
265    try:
266      while 1:
267        chunk = core.svn_stream_read(stream, core.SVN_STREAM_CHUNK_SIZE)
268        if not chunk:
269          break
270        fp.write(chunk)
271    finally:
272      core.svn_stream_close(stream)
273  finally:
274    fp.close()
275  return temp
276
277class FileContentsPipe:
278  def __init__(self, root, path):
279    self.readable = True
280    self._stream = fs.file_contents(root, path)
281    self._eof = 0
282
283  def read(self, len=None):
284    chunk = None
285    if not self._eof:
286      if len is None:
287        buffer = StringIO()
288        try:
289          while 1:
290            hunk = core.svn_stream_read(self._stream, 8192)
291            if not hunk:
292              break
293            buffer.write(hunk)
294          chunk = buffer.getvalue()
295        finally:
296          buffer.close()
297
298      else:
299        chunk = core.svn_stream_read(self._stream, len)
300    if not chunk:
301      self._eof = 1
302    return chunk
303
304  def readline(self):
305    chunk = None
306    if not self._eof:
307      chunk, self._eof = core.svn_stream_readline(self._stream, b'\n')
308      if not self._eof:
309        chunk = chunk + b'\n'
310    if not chunk:
311      self._eof = 1
312    return chunk
313
314  def readlines(self):
315    lines = []
316    while True:
317      line = self.readline()
318      if not line:
319        break
320      lines.append(line)
321    return lines
322
323  def close(self):
324    return core.svn_stream_close(self._stream)
325
326  def eof(self):
327    return self._eof
328
329
330class BlameSource:
331  def __init__(self, local_url, rev, first_rev, include_text, config_dir,
332               encoding):
333    self.idx = -1
334    self.first_rev = first_rev
335    self.blame_data = []
336    self.include_text = include_text
337    self.encoding = encoding
338
339    ctx = client.svn_client_create_context()
340    core.svn_config_ensure(config_dir)
341    ctx.config = core.svn_config_get_config(config_dir)
342    ctx.auth_baton = core.svn_auth_open([])
343    try:
344      ### TODO: Is this use of FIRST_REV always what we want?  Should we
345      ### pass 1 here instead and do filtering later?
346      client.blame2(local_url, _rev2optrev(rev), _rev2optrev(first_rev),
347                    _rev2optrev(rev), self._blame_cb, ctx)
348    except core.SubversionException as e:
349      if e.apr_err == core.SVN_ERR_CLIENT_IS_BINARY_FILE:
350        raise vclib.NonTextualFileContents
351      raise
352
353  def _blame_cb(self, line_no, rev, author, date, text, pool):
354    prev_rev = None
355    if rev > self.first_rev:
356      prev_rev = rev - 1
357    if not self.include_text:
358      text = None
359    if author is not None:
360      try:
361        author = author.decode(self.encoding, 'xmlcharrefreplace')
362      except:
363        author = author.decode(self.encoding, 'backslashreplace')
364    self.blame_data.append(vclib.Annotation(text, line_no + 1, rev,
365                                            prev_rev, author, None))
366
367  def __getitem__(self, idx):
368    if idx != self.idx + 1:
369      raise BlameSequencingError()
370    self.idx = idx
371    return self.blame_data[idx]
372
373
374class BlameSequencingError(Exception):
375  pass
376
377
378class SVNChangedPath(vclib.ChangedPath):
379  """Wrapper around vclib.ChangedPath which handles path splitting."""
380
381  def __init__(self, path, rev, pathtype, base_path, base_rev,
382               action, copied, text_changed, props_changed):
383    path_parts = _path_parts(path or '')
384    base_path_parts = _path_parts(base_path or '')
385    vclib.ChangedPath.__init__(self, path_parts, rev, pathtype,
386                               base_path_parts, base_rev, action,
387                               copied, text_changed, props_changed)
388
389
390class LocalSubversionRepository(vclib.Repository):
391  def __init__(self, name, rootpath, authorizer, utilities, config_dir,
392               encoding):
393    if not (os.path.isdir(rootpath) \
394            and os.path.isfile(os.path.join(rootpath, 'format'))):
395      raise vclib.ReposNotFound(name)
396
397    # Initialize some stuff.
398    self.rootpath = rootpath
399    self.name = name
400    self.auth = authorizer
401    self.diff_cmd = utilities.diff or 'diff'
402    self.config_dir = config_dir or None
403    self.encoding = encoding
404
405    # See if this repository is even viewable, authz-wise.
406    if not vclib.check_root_access(self):
407      raise vclib.ReposNotFound(name)
408
409  def open(self):
410    # Open the repository and init some other variables.
411    self.repos = repos.svn_repos_open(self.rootpath)
412    self.fs_ptr = repos.svn_repos_fs(self.repos)
413    self.youngest = fs.youngest_rev(self.fs_ptr)
414    self._fsroots = {}
415    self._revinfo_cache = {}
416
417    # See if a universal read access determination can be made.
418    if self.auth and self.auth.check_universal_access(self.name) == 1:
419      self.auth = None
420
421  def rootname(self):
422    return self.name
423
424  def rootpath(self):
425    return self.rootpath
426
427  def roottype(self):
428    return vclib.SVN
429
430  def authorizer(self):
431    return self.auth
432
433  def itemtype(self, path_parts, rev):
434    rev = self._getrev(rev)
435    basepath = self._getpath(path_parts)
436    pathtype = self._gettype(basepath, rev)
437    if pathtype is None:
438      raise vclib.ItemNotFound(path_parts)
439    if not vclib.check_path_access(self, path_parts, pathtype, rev):
440      raise vclib.ItemNotFound(path_parts)
441    return pathtype
442
443  def openfile(self, path_parts, rev, options):
444    path = self._getpath(path_parts)
445    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
446      raise vclib.Error("Path '%s' is not a file." % path)
447    rev = self._getrev(rev)
448    fsroot = self._getroot(rev)
449    revision = str(_get_last_history_rev(fsroot, path))
450    fp = FileContentsPipe(fsroot, path)
451    return fp, revision
452
453  def listdir(self, path_parts, rev, options):
454    path = self._getpath(path_parts)
455    if self.itemtype(path_parts, rev) != vclib.DIR:  # does auth-check
456      raise vclib.Error("Path '%s' is not a directory." % path)
457    rev = self._getrev(rev)
458    fsroot = self._getroot(rev)
459    dirents = fs.dir_entries(fsroot, path)
460    entries = [ ]
461    for entry in dirents.values():
462      if entry.kind == core.svn_node_dir:
463        kind = vclib.DIR
464      elif entry.kind == core.svn_node_file:
465        kind = vclib.FILE
466      ent_path = _to_str(entry.name)
467      if vclib.check_path_access(self,
468                                 path_parts + [ent_path], kind, rev):
469        entries.append(vclib.DirEntry(ent_path, kind))
470    return entries
471
472  def dirlogs(self, path_parts, rev, entries, options):
473    path = self._getpath(path_parts)
474    if self.itemtype(path_parts, rev) != vclib.DIR:  # does auth-check
475      raise vclib.Error("Path '%s' is not a directory." % path)
476    fsroot = self._getroot(self._getrev(rev))
477    rev = self._getrev(rev)
478    for entry in entries:
479      ent_path = entry.name
480      entry_path_parts = path_parts + [ent_path]
481      if not vclib.check_path_access(self, entry_path_parts, entry.kind, rev):
482        continue
483      path = self._getpath(entry_path_parts)
484      entry_rev = _get_last_history_rev(fsroot, path)
485      date, author, msg, revprops, changes = self._revinfo(entry_rev)
486      entry.rev = str(entry_rev)
487      entry.date = date
488      entry.author = author
489      entry.log = msg
490      if entry.kind == vclib.FILE:
491        entry.size = fs.file_length(fsroot, path)
492      lock = fs.get_lock(self.fs_ptr, path)
493      entry.lockinfo = lock and _to_str(lock.owner) or None
494
495  def itemlog(self, path_parts, rev, sortby, first, limit, options):
496    """see vclib.Repository.itemlog docstring
497
498    Option values recognized by this implementation
499
500      svn_show_all_dir_logs
501        boolean, default false. if set for a directory path, will include
502        revisions where files underneath the directory have changed
503
504      svn_cross_copies
505        boolean, default false. if set for a path created by a copy, will
506        include revisions from before the copy
507
508      svn_latest_log
509        boolean, default false. if set will return only newest single log
510        entry
511    """
512    assert sortby == vclib.SORTBY_DEFAULT or sortby == vclib.SORTBY_REV
513
514    path = self._getpath(path_parts)
515    path_type = self.itemtype(path_parts, rev)  # does auth-check
516    rev = self._getrev(rev)
517    revs = []
518    lockinfo = None
519
520    # See if this path is locked.
521    try:
522      lock = fs.get_lock(self.fs_ptr, path)
523      if lock:
524        lockinfo = _to_str(lock.owner)
525    except NameError:
526      pass
527
528    # If our caller only wants the latest log, we'll invoke
529    # _log_helper for just the one revision.  Otherwise, we go off
530    # into history-fetching mode.  ### TODO: we could stand to have a
531    # 'limit' parameter here as numeric cut-off for the depth of our
532    # history search.
533    if options.get('svn_latest_log', 0):
534      revision = self._log_helper(path, rev, lockinfo)
535      if revision:
536        revision.prev = None
537        revs.append(revision)
538    else:
539      history = self._get_history(path, rev, path_type, first + limit, options)
540      if len(history) < first:
541        history = []
542      if limit:
543        history = history[first:first+limit]
544
545      for hist_rev, hist_path in history:
546        revision = self._log_helper(hist_path, hist_rev, lockinfo)
547        if revision:
548          # If we have unreadable copyfrom data, obscure it.
549          if revision.copy_path is not None:
550            cp_parts = _path_parts(revision.copy_path)
551            if not vclib.check_path_access(self, cp_parts, path_type,
552                                           revision.copy_rev):
553              revision.copy_path = revision.copy_rev = None
554          revision.prev = None
555          if len(revs):
556            revs[-1].prev = revision
557          revs.append(revision)
558    return revs
559
560  def itemprops(self, path_parts, rev):
561    path = self._getpath(path_parts)
562    path_type = self.itemtype(path_parts, rev)  # does auth-check
563    rev = self._getrev(rev)
564    fsroot = self._getroot(rev)
565    proptable = fs.node_proplist(fsroot, path)
566    propdict = {}
567    for pname in proptable.keys():
568      pvalue = proptable[pname]
569      pname, pvalue = _normalize_property(pname, pvalue, self.encoding)
570      if pname:
571        propdict[pname] = pvalue
572    return propdict
573
574  def annotate(self, path_parts, rev, include_text=False):
575    path = self._getpath(path_parts)
576    path_type = self.itemtype(path_parts, rev)  # does auth-check
577    if path_type != vclib.FILE:
578      raise vclib.Error("Path '%s' is not a file." % path)
579    rev = self._getrev(rev)
580    fsroot = self._getroot(rev)
581    history = self._get_history(path, rev, path_type, 0,
582                                {'svn_cross_copies': 1})
583    youngest_rev, youngest_path = history[0]
584    oldest_rev, oldest_path = history[-1]
585    source = BlameSource(_rootpath2url(self.rootpath, path), youngest_rev,
586                         oldest_rev, include_text, self.config_dir,
587                         self.encoding)
588    return source, youngest_rev
589
590  def revinfo(self, rev):
591    return self._revinfo(rev, 1)
592
593  def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
594    p1 = self._getpath(path_parts1)
595    p2 = self._getpath(path_parts2)
596    r1 = self._getrev(rev1)
597    r2 = self._getrev(rev2)
598    if not vclib.check_path_access(self, path_parts1, vclib.FILE, rev1):
599      raise vclib.ItemNotFound(path_parts1)
600    if not vclib.check_path_access(self, path_parts2, vclib.FILE, rev2):
601      raise vclib.ItemNotFound(path_parts2)
602
603    args = vclib._diff_args(type, options)
604
605    def _date_from_rev(rev):
606      date, author, msg, revprops, changes = self._revinfo(rev)
607      return date
608
609    try:
610      temp1 = temp_checkout(self, p1, r1)
611      temp2 = temp_checkout(self, p2, r2)
612      info1 = p1, _date_from_rev(r1), r1
613      info2 = p2, _date_from_rev(r2), r2
614      return vclib._diff_fp(temp1, temp2, info1, info2, self.diff_cmd, args)
615    except core.SubversionException as e:
616      if e.apr_err == core.SVN_ERR_FS_NOT_FOUND:
617        raise vclib.InvalidRevision
618      raise
619
620  def isexecutable(self, path_parts, rev):
621    props = self.itemprops(path_parts, rev) # does authz-check
622    return core.SVN_PROP_EXECUTABLE in props
623
624  def filesize(self, path_parts, rev):
625    path = self._getpath(path_parts)
626    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
627      raise vclib.Error("Path '%s' is not a file." % path)
628    fsroot = self._getroot(self._getrev(rev))
629    return fs.file_length(fsroot, path)
630
631  ##--- helpers ---##
632
633  def _revinfo(self, rev, include_changed_paths=0):
634    """Internal-use, cache-friendly revision information harvester."""
635
636    def _get_changed_paths(fsroot):
637      """Return a 3-tuple: found_readable, found_unreadable, changed_paths."""
638      editor = repos.ChangeCollector(self.fs_ptr, fsroot)
639      e_ptr, e_baton = delta.make_editor(editor)
640      repos.svn_repos_replay(fsroot, e_ptr, e_baton)
641      changedpaths = {}
642      changes = editor.get_changes()
643
644      # Copy the Subversion changes into a new hash, checking
645      # authorization and converting them into ChangedPath objects.
646      found_readable = found_unreadable = 0
647      for path in changes.keys():
648        spath = _to_str(path)
649        change = changes[path]
650        if change.path:
651          change.path = _cleanup_path(change.path)
652        if change.base_path:
653          change.base_path = _cleanup_path(change.base_path)
654        is_copy = 0
655        if not hasattr(change, 'action'): # new to subversion 1.4.0
656          action = vclib.MODIFIED
657          if not change.path:
658            action = vclib.DELETED
659          elif change.added:
660            action = vclib.ADDED
661            replace_check_path = path
662            if change.base_path and change.base_rev:
663              replace_check_path = change.base_path
664            if replace_check_path in changedpaths \
665               and changedpaths[replace_check_path].action == vclib.DELETED:
666              action = vclib.REPLACED
667        else:
668          if change.action == repos.CHANGE_ACTION_ADD:
669            action = vclib.ADDED
670          elif change.action == repos.CHANGE_ACTION_DELETE:
671            action = vclib.DELETED
672          elif change.action == repos.CHANGE_ACTION_REPLACE:
673            action = vclib.REPLACED
674          else:
675            action = vclib.MODIFIED
676        if (action == vclib.ADDED or action == vclib.REPLACED) \
677           and change.base_path \
678           and change.base_rev:
679          is_copy = 1
680        if change.item_kind == core.svn_node_dir:
681          pathtype = vclib.DIR
682        elif change.item_kind == core.svn_node_file:
683          pathtype = vclib.FILE
684        else:
685          pathtype = None
686
687        parts = _path_parts(spath)
688        if vclib.check_path_access(self, parts, pathtype, rev):
689          if is_copy and change.base_path and (change.base_path != path):
690            parts = _path_parts(_to_str(change.base_path))
691            if not vclib.check_path_access(self, parts, pathtype,
692                                           change.base_rev):
693              is_copy = 0
694              change.base_path = None
695              change.base_rev = None
696              found_unreadable = 1
697          if change.base_path:
698            base_path = _to_str(change.base_path)
699          else:
700            base_path = None
701          changedpaths[spath] = SVNChangedPath(spath, rev, pathtype,
702                                               base_path,
703                                               change.base_rev, action,
704                                               is_copy, change.text_changed,
705                                               change.prop_changes)
706          found_readable = 1
707        else:
708          found_unreadable = 1
709      return found_readable, found_unreadable, list(changedpaths.values())
710
711    def _get_change_copyinfo(fsroot, path, change):
712      # If we know the copyfrom info, return it...
713      if hasattr(change, 'copyfrom_known') and change.copyfrom_known:
714        copyfrom_path = change.copyfrom_path
715        copyfrom_rev = change.copyfrom_rev
716      # ...otherwise, if this change could be a copy (that is, it
717      # contains an add action), query the copyfrom info ...
718      elif (change.change_kind == fs.path_change_add or
719            change.change_kind == fs.path_change_replace):
720        copyfrom_rev, copyfrom_path = fs.copied_from(fsroot, path)
721      # ...else, there's no copyfrom info.
722      else:
723        copyfrom_rev = core.SVN_INVALID_REVNUM
724        copyfrom_path = None
725      return copyfrom_path, copyfrom_rev
726
727    def _simple_auth_check(fsroot):
728      """Return a 2-tuple: found_readable, found_unreadable."""
729      found_unreadable = found_readable = 0
730      if hasattr(fs, 'paths_changed2'):
731        changes = fs.paths_changed2(fsroot)
732      else:
733        changes = fs.paths_changed(fsroot)
734      paths = list(changes.keys())
735      for path in paths:
736        change = changes[path]
737        pathtype = None
738        if hasattr(change, 'node_kind'):
739          if change.node_kind == core.svn_node_file:
740            pathtype = vclib.FILE
741          elif change.node_kind == core.svn_node_dir:
742            pathtype = vclib.DIR
743        parts = _path_parts(path)
744        if pathtype is None:
745          # Figure out the pathtype so we can query the authz subsystem.
746          if change.change_kind == fs.path_change_delete:
747            # Deletions are annoying, because they might be underneath
748            # copies (make their previous location non-trivial).
749            prev_parts = parts
750            prev_rev = rev - 1
751            parent_parts = parts[:-1]
752            while parent_parts:
753              parent_path = '/' + self._getpath(parent_parts)
754              parent_change = changes.get(parent_path)
755              if not (parent_change and \
756                      (parent_change.change_kind == fs.path_change_add or
757                       parent_change.change_kind == fs.path_change_replace)):
758                del(parent_parts[-1])
759                continue
760              copyfrom_path, copyfrom_rev = \
761                _get_change_copyinfo(fsroot, parent_path, parent_change)
762              if copyfrom_path:
763                prev_rev = copyfrom_rev
764                prev_parts = _path_parts(copyfrom_path) + \
765                             parts[len(parent_parts):]
766                break
767              del(parent_parts[-1])
768            pathtype = self._gettype(self._getpath(prev_parts), prev_rev)
769          else:
770            pathtype = self._gettype(self._getpath(parts), rev)
771        if vclib.check_path_access(self, parts, pathtype, rev):
772          found_readable = 1
773          copyfrom_path, copyfrom_rev = \
774            _get_change_copyinfo(fsroot, path, change)
775          if copyfrom_path and copyfrom_path != path:
776            parts = _path_parts(copyfrom_path)
777            if not vclib.check_path_access(self, parts, pathtype,
778                                           copyfrom_rev):
779              found_unreadable = 1
780        else:
781          found_unreadable = 1
782        if found_readable and found_unreadable:
783          break
784      return found_readable, found_unreadable
785
786    def _revinfo_helper(rev, include_changed_paths):
787      # Get the revision property info.  (Would use
788      # editor.get_root_props(), but something is broken there...)
789      revprops = fs.revision_proplist(self.fs_ptr, rev)
790      msg, author, date, revprops = _split_revprops(revprops)
791
792      # The iterfaces that use this function expect string values.
793      if isinstance(msg, bytes):
794        msg = _to_str(msg)
795      if isinstance(author, bytes):
796        author = _to_str(author) or pval
797
798      # Optimization: If our caller doesn't care about the changed
799      # paths, and we don't need them to do authz determinations, let's
800      # get outta here.
801      if self.auth is None and not include_changed_paths:
802        return date, author, msg, revprops, None
803
804      # If we get here, then we either need the changed paths because we
805      # were asked for them, or we need them to do authorization checks.
806      #
807      # If we only need them for authorization checks, though, we
808      # won't bother generating fully populated ChangedPath items (the
809      # cost is too great).
810      fsroot = self._getroot(rev)
811      if include_changed_paths:
812        found_readable, found_unreadable, changedpaths = \
813          _get_changed_paths(fsroot)
814      else:
815        changedpaths = None
816        found_readable, found_unreadable = _simple_auth_check(fsroot)
817
818      # Filter our metadata where necessary, and return the requested data.
819      if found_unreadable:
820        msg = None
821        if not found_readable:
822          author = None
823          date = None
824      return date, author, msg, revprops, changedpaths
825
826    # Consult the revinfo cache first.  If we don't have cached info,
827    # or our caller wants changed paths and we don't have those for
828    # this revision, go do the real work.
829    rev = self._getrev(rev)
830    cached_info = self._revinfo_cache.get(rev)
831    if not cached_info \
832       or (include_changed_paths and cached_info[4] is None):
833      cached_info = _revinfo_helper(rev, include_changed_paths)
834      self._revinfo_cache[rev] = cached_info
835    return tuple(cached_info)
836
837  def _log_helper(self, path, rev, lockinfo):
838    rev_root = fs.revision_root(self.fs_ptr, rev)
839    copyfrom_rev, copyfrom_path = fs.copied_from(rev_root, path)
840    date, author, msg, revprops, changes = self._revinfo(rev)
841    if fs.is_file(rev_root, path):
842      size = fs.file_length(rev_root, path)
843    else:
844      size = None
845    if copyfrom_path:
846      copyfrom_path = _cleanup_path(_to_str(copyfrom_path))
847    else:
848      copyfrom_path = None
849    return Revision(rev, date, author, msg, size,
850                    lockinfo, path, copyfrom_path, copyfrom_rev)
851
852  def _get_history(self, path, rev, path_type, limit=0, options={}):
853    if self.youngest == 0:
854      return []
855
856    rev_paths = []
857    fsroot = self._getroot(rev)
858    show_all_logs = options.get('svn_show_all_dir_logs', 0)
859    if not show_all_logs:
860      # See if the path is a file or directory.
861      kind = fs.check_path(fsroot, path)
862      if kind is core.svn_node_file:
863        show_all_logs = 1
864
865    # Instantiate a NodeHistory collector object, and use it to collect
866    # history items for PATH@REV.
867    history = NodeHistory(self.fs_ptr, show_all_logs, limit)
868    try:
869      repos.svn_repos_history(self.fs_ptr, path, history.add_history,
870                              1, rev, options.get('svn_cross_copies', 0))
871    except core.SubversionException as e:
872      if e.apr_err != core.SVN_ERR_CEASE_INVOCATION:
873        raise
874
875    # Now, iterate over those history items, checking for changes of
876    # location, pruning as necessitated by authz rules.
877    for hist_rev, hist_path in history:
878      hist_path = _to_str(hist_path)
879      path_parts = _path_parts(hist_path)
880      if not vclib.check_path_access(self, path_parts, path_type, hist_rev):
881        break
882      rev_paths.append([hist_rev, hist_path])
883    return rev_paths
884
885  def _getpath(self, path_parts):
886    return '/'.join(path_parts)
887
888  def _getrev(self, rev):
889    if rev is None or rev == 'HEAD':
890      return self.youngest
891    try:
892      if type(rev) == type(''):
893        while rev[0] == 'r':
894          rev = rev[1:]
895      rev = int(rev)
896    except:
897      raise vclib.InvalidRevision(rev)
898    if (rev < 0) or (rev > self.youngest):
899      raise vclib.InvalidRevision(rev)
900    return rev
901
902  def _getroot(self, rev):
903    try:
904      return self._fsroots[rev]
905    except KeyError:
906      r = self._fsroots[rev] = fs.revision_root(self.fs_ptr, rev)
907      return r
908
909  def _gettype(self, path, rev):
910    # Similar to itemtype(), but without the authz check.  Returns
911    # None for missing paths.
912    try:
913      kind = fs.check_path(self._getroot(rev), path)
914    except:
915      return None
916    if kind == core.svn_node_dir:
917      return vclib.DIR
918    if kind == core.svn_node_file:
919      return vclib.FILE
920    return None
921
922  ##--- custom ---##
923
924  def get_youngest_revision(self):
925    return self.youngest
926
927  def get_location(self, path, rev, old_rev):
928    try:
929      results = repos.svn_repos_trace_node_locations(self.fs_ptr, path,
930                                                     rev, [old_rev], _allow_all)
931    except core.SubversionException as e:
932      if e.apr_err == core.SVN_ERR_FS_NOT_FOUND:
933        raise vclib.ItemNotFound(path)
934      raise
935    try:
936      old_path = results[old_rev]
937    except KeyError:
938      raise vclib.ItemNotFound(path)
939
940    return _cleanup_path(_to_str(old_path))
941
942  def created_rev(self, full_name, rev):
943    return fs.node_created_rev(self._getroot(rev), full_name)
944
945  def last_rev(self, path, peg_revision, limit_revision=None):
946    """Given PATH, known to exist in PEG_REVISION, find the youngest
947    revision older than, or equal to, LIMIT_REVISION in which path
948    exists.  Return that revision, and the path at which PATH exists in
949    that revision."""
950
951    # Here's the plan, man.  In the trivial case (where PEG_REVISION is
952    # the same as LIMIT_REVISION), this is a no-brainer.  If
953    # LIMIT_REVISION is older than PEG_REVISION, we can use Subversion's
954    # history tracing code to find the right location.  If, however,
955    # LIMIT_REVISION is younger than PEG_REVISION, we suffer from
956    # Subversion's lack of forward history searching.  Our workaround,
957    # ugly as it may be, involves a binary search through the revisions
958    # between PEG_REVISION and LIMIT_REVISION to find our last live
959    # revision.
960    peg_revision = self._getrev(peg_revision)
961    limit_revision = self._getrev(limit_revision)
962    try:
963      if peg_revision == limit_revision:
964        return peg_revision, path
965      elif peg_revision > limit_revision:
966        fsroot = self._getroot(peg_revision)
967        history = fs.node_history(fsroot, path)
968        while history:
969          path, peg_revision = fs.history_location(history)
970          if peg_revision <= limit_revision:
971            return max(peg_revision, limit_revision), _cleanup_path(path)
972          history = fs.history_prev(history, 1)
973        return peg_revision, _cleanup_path(path)
974      else:
975        orig_id = fs.node_id(self._getroot(peg_revision), path)
976        while peg_revision != limit_revision:
977          mid = (peg_revision + 1 + limit_revision) // 2
978          try:
979            mid_id = fs.node_id(self._getroot(mid), path)
980          except core.SubversionException as e:
981            if e.apr_err == core.SVN_ERR_FS_NOT_FOUND:
982              cmp = -1
983            else:
984              raise
985          else:
986            ### Not quite right.  Need a comparison function that only returns
987            ### true when the two nodes are the same copy, not just related.
988            cmp = fs.compare_ids(orig_id, mid_id)
989
990          if cmp in (0, 1):
991            peg_revision = mid
992          else:
993            limit_revision = mid - 1
994
995        return peg_revision, path
996    finally:
997      pass
998
999  def get_symlink_target(self, path_parts, rev):
1000    """Return the target of the symbolic link versioned at PATH_PARTS
1001    in REV, or None if that object is not a symlink."""
1002
1003    path = self._getpath(path_parts)
1004    rev = self._getrev(rev)
1005    path_type = self.itemtype(path_parts, rev)  # does auth-check
1006    fsroot = self._getroot(rev)
1007
1008    # Symlinks must be files with the svn:special property set on them
1009    # and with file contents which read "link SOME_PATH".
1010    if path_type != vclib.FILE:
1011      return None
1012    props = fs.node_proplist(fsroot, path)
1013    if core.SVN_PROP_SPECIAL not in props:
1014      return None
1015    pathspec = ''
1016    ### FIXME: We're being a touch sloppy here, only checking the first line
1017    ### of the file.
1018    stream = fs.file_contents(fsroot, path)
1019    try:
1020      pathspec, eof = core.svn_stream_readline(stream, b'\n')
1021    finally:
1022      core.svn_stream_close(stream)
1023    if pathspec[:5] != 'link ':
1024      return None
1025    return pathspec[5:]
1026