1#
2#  wc.py: functions for interacting with a Subversion working copy
3#
4#  Subversion is a tool for revision control.
5#  See http://subversion.tigris.org for more information.
6#
7# ====================================================================
8#    Licensed to the Apache Software Foundation (ASF) under one
9#    or more contributor license agreements.  See the NOTICE file
10#    distributed with this work for additional information
11#    regarding copyright ownership.  The ASF licenses this file
12#    to you under the Apache License, Version 2.0 (the
13#    "License"); you may not use this file except in compliance
14#    with the License.  You may obtain a copy of the License at
15#
16#      http://www.apache.org/licenses/LICENSE-2.0
17#
18#    Unless required by applicable law or agreed to in writing,
19#    software distributed under the License is distributed on an
20#    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21#    KIND, either express or implied.  See the License for the
22#    specific language governing permissions and limitations
23#    under the License.
24######################################################################
25
26import os
27import sys
28import re
29import logging
30import pprint
31import io
32
33if sys.version_info[0] >= 3:
34  # Python >=3.0
35  from io import StringIO
36  from urllib.parse import quote as urllib_quote
37else:
38  # Python <3.0
39  from cStringIO import StringIO
40  from urllib import quote as urllib_quote
41
42import svntest
43
44logger = logging.getLogger()
45
46
47#
48# 'status -v' output looks like this:
49#
50#      "%c%c%c%c%c%c%c %c   %6s   %6s %-12s %s\n"
51#
52# (Taken from 'print_status' in subversion/svn/status.c.)
53#
54# Here are the parameters.  The middle number or string in parens is the
55# match.group(), followed by a brief description of the field:
56#
57#    - text status           (1)  (single letter)
58#    - prop status           (1)  (single letter)
59#    - wc-lockedness flag    (2)  (single letter: "L" or " ")
60#    - copied flag           (3)  (single letter: "+" or " ")
61#    - switched flag         (4)  (single letter: "S", "X" or " ")
62#    - repos lock status     (5)  (single letter: "K", "O", "B", "T", " ")
63#    - tree conflict flag    (6)  (single letter: "C" or " ")
64#
65#    [one space]
66#
67#    - out-of-date flag      (7)  (single letter: "*" or " ")
68#
69#    [three spaces]
70#
71#    - working revision ('wc_rev') (either digits or "-", "?" or " ")
72#
73#    [one space]
74#
75#    - last-changed revision      (either digits or "?" or " ")
76#
77#    [one space]
78#
79#    - last author                (optional string of non-whitespace
80#                                  characters)
81#
82#    [spaces]
83#
84#    - path              ('path') (string of characters until newline)
85#
86# Working revision, last-changed revision, and last author are whitespace
87# only if the item is missing.
88#
89_re_parse_status = re.compile('^([?!MACDRUGXI_~ ][MACDRUG_ ])'
90                              '([L ])'
91                              '([+ ])'
92                              '([SX ])'
93                              '([KOBT ])'
94                              '([C ]) '
95                              '([* ]) +'
96                              '((?P<wc_rev>\d+|-|\?) +(\d|-|\?)+ +(\S+) +)?'
97                              '(?P<path>.+)$')
98
99_re_parse_status_ex = re.compile('^      ('
100               '(  \> moved (from (?P<moved_from>.+)|to (?P<moved_to>.*)))'
101              '|(  \> swapped places with (?P<swapped_with>.+).*)'
102              '|(\>   (?P<tc>.+))'
103  ')$')
104
105_re_parse_skipped = re.compile("^(Skipped[^']*) '(.+)'( --.*)?\n")
106
107_re_parse_summarize = re.compile("^([MAD ][M ])      (.+)\n")
108
109_re_parse_checkout = re.compile('^([RMAGCUDE_ B][MAGCUDE_ ])'
110                                '([B ])'
111                                '([CAUD ])\s+'
112                                '(.+)')
113_re_parse_co_skipped = re.compile('^(Restored|Skipped|Removed external)'
114                                  '\s+\'(.+)\'(( --|: ).*)?')
115_re_parse_co_restored = re.compile('^(Restored)\s+\'(.+)\'')
116
117# Lines typically have a verb followed by whitespace then a path.
118_re_parse_commit_ext = re.compile('^(([A-Za-z]+( [a-z]+)*)) \'(.+)\'( --.*)?')
119_re_parse_commit = re.compile('^(\w+(  \(bin\))?)\s+(.+)')
120
121#rN: eids 0 15 branches 4
122_re_parse_eid_header = re.compile('^r(-1|[0-9]+): eids ([0-9]+) ([0-9]+) '
123                                  'branches ([0-9]+)$')
124# B0.2 root-eid 3
125_re_parse_eid_branch = re.compile('^(B[0-9.]+) root-eid ([0-9]+) num-eids ([0-9]+)( from [^ ]*)?$')
126_re_parse_eid_merge_history = re.compile('merge-history: merge-ancestors ([0-9]+)')
127# e4: normal 6 C
128_re_parse_eid_ele = re.compile('^e([0-9]+): (none|normal|subbranch) '
129                               '(-1|[0-9]+) (.*)$')
130
131class State:
132  """Describes an existing or expected state of a working copy.
133
134  The primary metaphor here is a dictionary of paths mapping to instances
135  of StateItem, which describe each item in a working copy.
136
137  Note: the paths should be *relative* to the root of the working copy,
138  using '/' for the separator (see to_relpath()), and the root of the
139  working copy is identified by the empty path: ''.
140  """
141
142  def __init__(self, wc_dir, desc):
143    "Create a State using the specified description."
144    assert isinstance(desc, dict)
145
146    self.wc_dir = wc_dir
147    self.desc = desc      # dictionary: path -> StateItem
148
149  def add(self, more_desc):
150    "Add more state items into the State."
151    assert isinstance(more_desc, dict)
152
153    self.desc.update(more_desc)
154
155  def add_state(self, parent, state, strict=False):
156    "Import state items from a State object, reparent the items to PARENT."
157    assert isinstance(state, State)
158
159    for path, item in state.desc.items():
160      if strict:
161        path = parent + path
162      elif path == '':
163        path = parent
164      else:
165        path = parent + '/' + path
166      self.desc[path] = item
167
168  def remove(self, *paths):
169    "Remove PATHS from the state (the paths must exist)."
170    for path in paths:
171      del self.desc[to_relpath(path)]
172
173  def remove_subtree(self, *paths):
174    "Remove PATHS recursively from the state (the paths must exist)."
175    for subtree_path in paths:
176      subtree_path = to_relpath(subtree_path)
177      for path, item in svntest.main.ensure_list(self.desc.items()):
178        if path == subtree_path or path[:len(subtree_path) + 1] == subtree_path + '/':
179          del self.desc[path]
180
181  def copy(self, new_root=None):
182    """Make a deep copy of self.  If NEW_ROOT is not None, then set the
183    copy's wc_dir NEW_ROOT instead of to self's wc_dir."""
184    desc = { }
185    for path, item in self.desc.items():
186      desc[path] = item.copy()
187    if new_root is None:
188      new_root = self.wc_dir
189    return State(new_root, desc)
190
191  def tweak(self, *args, **kw):
192    """Tweak the items' values.
193
194    Each argument in ARGS is the path of a StateItem that already exists in
195    this State. Each keyword argument in KW is a modifiable property of
196    StateItem.
197
198    The general form of this method is .tweak([paths...,] key=value...). If
199    one or more paths are provided, then those items' values are
200    modified.  If no paths are given, then all items are modified.
201    """
202    if args:
203      for path in args:
204        try:
205          path_ref = self.desc[to_relpath(path)]
206        except KeyError as e:
207          e.args = ["Path '%s' not present in WC state descriptor" % path]
208          raise
209        path_ref.tweak(**kw)
210    else:
211      for item in self.desc.values():
212        item.tweak(**kw)
213
214  def tweak_some(self, filter, **kw):
215    "Tweak the items for which the filter returns true."
216    for path, item in self.desc.items():
217      if list(filter(path, item)):
218        item.tweak(**kw)
219
220  def rename(self, moves):
221    """Change the path of some items.
222
223    MOVES is a dictionary mapping source path to destination
224    path. Children move with moved parents.  All subtrees are moved in
225    reverse depth order to temporary storage before being moved in
226    depth order to the final location.  This allows nested moves.
227
228    """
229    temp = {}
230    for src, dst in sorted(moves.items(), key=lambda pair: pair[0])[::-1]:
231      temp[src] = {}
232      for path, item in svntest.main.ensure_list(self.desc.items()):
233        if path == src or path[:len(src) + 1] == src + '/':
234          temp[src][path] = item;
235          del self.desc[path]
236    for src, dst in sorted(moves.items(), key=lambda pair: pair[1]):
237      for path, item in temp[src].items():
238        if path == src:
239          new_path = dst
240        else:
241          new_path = dst + path[len(src):]
242        self.desc[new_path] = item
243
244  def subtree(self, subtree_path):
245    """Return a State object which is a deep copy of the sub-tree
246    beneath SUBTREE_PATH (which is assumed to be rooted at the tree of
247    this State object's WC_DIR).  Exclude SUBTREE_PATH itself."""
248    desc = { }
249    for path, item in self.desc.items():
250      if path[:len(subtree_path) + 1] == subtree_path + '/':
251        desc[path[len(subtree_path) + 1:]] = item.copy()
252    return State(self.wc_dir, desc)
253
254  def write_to_disk(self, target_dir):
255    """Construct a directory structure on disk, matching our state.
256
257    WARNING: any StateItem that does not have contents (.contents is None)
258    is assumed to be a directory.
259    """
260    if not os.path.exists(target_dir):
261      os.makedirs(target_dir)
262
263    for path, item in self.desc.items():
264      fullpath = os.path.join(target_dir, path)
265      if item.contents is None:
266        # a directory
267        if not os.path.exists(fullpath):
268          os.makedirs(fullpath)
269      else:
270        # a file
271
272        # ensure its directory exists
273        dirpath = os.path.dirname(fullpath)
274        if not os.path.exists(dirpath):
275          os.makedirs(dirpath)
276
277        # write out the file contents now
278        svntest.main.file_write(fullpath, item.contents, 'wb')
279
280  def normalize(self):
281    """Return a "normalized" version of self.
282
283    A normalized version has the following characteristics:
284
285      * wc_dir == ''
286      * paths use forward slashes
287      * paths are relative
288
289    If self is already normalized, then it is returned. Otherwise, a
290    new State is constructed with (shallow) references to self's
291    StateItem instances.
292
293    If the caller needs a fully disjoint State, then use .copy() on
294    the result.
295    """
296    if self.wc_dir == '':
297      return self
298
299    base = to_relpath(os.path.normpath(self.wc_dir))
300
301    desc = dict([(repos_join(base, path), item)
302                 for path, item in self.desc.items()])
303
304    for path, item in desc.copy().items():
305      if item.moved_from or item.moved_to:
306        i = item.copy()
307
308        if i.moved_from:
309          i.moved_from = to_relpath(os.path.normpath(
310                                        repos_join(base, i.moved_from)))
311        if i.moved_to:
312          i.moved_to = to_relpath(os.path.normpath(
313                                        repos_join(base, i.moved_to)))
314
315        desc[path] = i
316
317    return State('', desc)
318
319  def compare(self, other):
320    """Compare this State against an OTHER State.
321
322    Three new set objects will be returned: CHANGED, UNIQUE_SELF, and
323    UNIQUE_OTHER. These contain paths of StateItems that are different
324    between SELF and OTHER, paths of items unique to SELF, and paths
325    of item that are unique to OTHER, respectively.
326    """
327    assert isinstance(other, State)
328
329    norm_self = self.normalize()
330    norm_other = other.normalize()
331
332    # fast-path the easy case
333    if norm_self == norm_other:
334      fs = frozenset()
335      return fs, fs, fs
336
337    paths_self = set(norm_self.desc.keys())
338    paths_other = set(norm_other.desc.keys())
339    changed = set()
340    for path in paths_self.intersection(paths_other):
341      if norm_self.desc[path] != norm_other.desc[path]:
342        changed.add(path)
343
344    return changed, paths_self - paths_other, paths_other - paths_self
345
346  def compare_and_display(self, label, other):
347    """Compare this State against an OTHER State, and display differences.
348
349    Information will be written to stdout, displaying any differences
350    between the two states. LABEL will be used in the display. SELF is the
351    "expected" state, and OTHER is the "actual" state.
352
353    If any changes are detected/displayed, then SVNTreeUnequal is raised.
354    """
355    norm_self = self.normalize()
356    norm_other = other.normalize()
357
358    changed, unique_self, unique_other = norm_self.compare(norm_other)
359    if not changed and not unique_self and not unique_other:
360      return
361
362    # Use the shortest path as a way to find the "root-most" affected node.
363    def _shortest_path(path_set):
364      shortest = None
365      for path in path_set:
366        if shortest is None or len(path) < len(shortest):
367          shortest = path
368      return shortest
369
370    if changed:
371      path = _shortest_path(changed)
372      display_nodes(label, path, norm_self.desc[path], norm_other.desc[path])
373    elif unique_self:
374      path = _shortest_path(unique_self)
375      default_singleton_handler('actual ' + label, path, norm_self.desc[path])
376    elif unique_other:
377      path = _shortest_path(unique_other)
378      default_singleton_handler('expected ' + label, path,
379                                norm_other.desc[path])
380
381    raise svntest.tree.SVNTreeUnequal
382
383  def tweak_for_entries_compare(self):
384    for path, item in self.desc.copy().items():
385      if item.status and path in self.desc:
386        # If this is an unversioned tree-conflict, remove it.
387        # These are only in their parents' THIS_DIR, they don't have entries.
388        if item.status[0] in '!?' and item.treeconflict == 'C' and \
389                                      item.entry_status is None:
390          del self.desc[path]
391        # Normal externals are not stored in the parent wc, drop the root
392        # and everything in these working copies
393        elif item.status == 'X ' or item.prev_status == 'X ':
394          del self.desc[path]
395          for p, i in self.desc.copy().items():
396            if p.startswith(path + '/'):
397              del self.desc[p]
398        elif item.entry_kind == 'file':
399          # A file has no descendants in svn_wc_entry_t
400          for p, i in self.desc.copy().items():
401            if p.startswith(path + '/'):
402              del self.desc[p]
403        else:
404          # when reading the entry structures, we don't examine for text or
405          # property mods, so clear those flags. we also do not examine the
406          # filesystem, so we cannot detect missing or obstructed files.
407          if item.status[0] in 'M!~':
408            item.status = ' ' + item.status[1]
409          if item.status[1] == 'M':
410            item.status = item.status[0] + ' '
411          # under wc-ng terms, we may report a different revision than the
412          # backwards-compatible code should report. if there is a special
413          # value for compatibility, then use it.
414          if item.entry_rev is not None:
415            item.wc_rev = item.entry_rev
416            item.entry_rev = None
417          # status might vary as well, e.g. when a directory is missing
418          if item.entry_status is not None:
419            item.status = item.entry_status
420            item.entry_status = None
421          if item.entry_copied is not None:
422            item.copied = item.entry_copied
423            item.entry_copied = None
424      if item.writelocked:
425        # we don't contact the repository, so our only information is what
426        # is in the working copy. 'K' means we have one and it matches the
427        # repos. 'O' means we don't have one but the repos says the item
428        # is locked by us, elsewhere. 'T' means we have one, and the repos
429        # has one, but it is now owned by somebody else. 'B' means we have
430        # one, but the repos does not.
431        #
432        # for each case of "we have one", set the writelocked state to 'K',
433        # and clear it to None for the others. this will match what is
434        # generated when we examine our working copy state.
435        if item.writelocked in 'TB':
436          item.writelocked = 'K'
437        elif item.writelocked == 'O':
438          item.writelocked = None
439      item.moved_from = None
440      item.moved_to = None
441      if path == '':
442        item.switched = None
443      item.treeconflict = None
444
445  def old_tree(self):
446    "Return an old-style tree (for compatibility purposes)."
447    nodelist = [ ]
448    for path, item in self.desc.items():
449      nodelist.append(item.as_node_tuple(os.path.join(self.wc_dir, path)))
450
451    tree = svntest.tree.build_generic_tree(nodelist)
452    if 0:
453      check = tree.as_state()
454      if self != check:
455        logger.warn(pprint.pformat(self.desc))
456        logger.warn(pprint.pformat(check.desc))
457        # STATE -> TREE -> STATE is lossy.
458        # In many cases, TREE -> STATE -> TREE is not.
459        # Even though our conversion from a TREE has lost some information, we
460        # may be able to verify that our lesser-STATE produces the same TREE.
461        svntest.tree.compare_trees('mismatch', tree, check.old_tree())
462
463    return tree
464
465  def __str__(self):
466    return str(self.old_tree())
467
468  def __eq__(self, other):
469    if not isinstance(other, State):
470      return False
471    norm_self = self.normalize()
472    norm_other = other.normalize()
473    return norm_self.desc == norm_other.desc
474
475  def __ne__(self, other):
476    return not self.__eq__(other)
477
478  @classmethod
479  def from_status(cls, lines, wc_dir=None):
480    """Create a State object from 'svn status' output."""
481
482    def not_space(value):
483      if value and value != ' ':
484        return value
485      return None
486
487    def parse_move(path, wc_dir):
488      if path.startswith('../'):
489        # ../ style paths are relative from the status root
490        return to_relpath(os.path.normpath(repos_join(wc_dir, path)))
491      else:
492        # Other paths are just relative from cwd
493        return to_relpath(path)
494
495    if not wc_dir:
496      wc_dir = ''
497
498    desc = { }
499    last = None
500    for line in lines:
501      if line.startswith('DBG:'):
502        continue
503
504      match = _re_parse_status.search(line)
505      if not match or match.group(10) == '-':
506
507        ex_match = _re_parse_status_ex.search(line)
508
509        if ex_match:
510          if ex_match.group('moved_from'):
511            path = to_relpath(ex_match.group('moved_from'))
512            last.tweak(moved_from = parse_move(path, wc_dir))
513          elif ex_match.group('moved_to'):
514            path = to_relpath(ex_match.group('moved_to'))
515            last.tweak(moved_to = parse_move(path, wc_dir))
516          elif ex_match.group('swapped_with'):
517            path = to_relpath(ex_match.group('swapped_with'))
518            last.tweak(moved_to = parse_move(path, wc_dir))
519            last.tweak(moved_from = parse_move(path, wc_dir))
520
521          # Parse TC description?
522
523        # ignore non-matching lines, or items that only exist on repos
524        continue
525
526      prev_status = None
527      prev_treeconflict = None
528
529      path = to_relpath(match.group('path'))
530      if path == '.':
531        path = ''
532      if path in desc:
533        prev_status = desc[path].status
534        prev_treeconflict = desc[path].treeconflict
535
536      item = StateItem(status=match.group(1),
537                       locked=not_space(match.group(2)),
538                       copied=not_space(match.group(3)),
539                       switched=not_space(match.group(4)),
540                       writelocked=not_space(match.group(5)),
541                       treeconflict=not_space(match.group(6)),
542                       wc_rev=not_space(match.group('wc_rev')),
543                       prev_status=prev_status,
544                       prev_treeconflict =prev_treeconflict
545                       )
546      desc[path] = item
547      last = item
548
549    return cls('', desc)
550
551  @classmethod
552  def from_skipped(cls, lines):
553    """Create a State object from 'Skipped' lines."""
554
555    desc = { }
556    for line in lines:
557      if line.startswith('DBG:'):
558        continue
559
560      match = _re_parse_skipped.search(line)
561      if match:
562        desc[to_relpath(match.group(2))] =  StateItem(
563          verb=(match.group(1).strip(':')))
564
565    return cls('', desc)
566
567  @classmethod
568  def from_summarize(cls, lines):
569    """Create a State object from 'svn diff --summarize' lines."""
570
571    desc = { }
572    for line in lines:
573      if line.startswith('DBG:'):
574        continue
575
576      match = _re_parse_summarize.search(line)
577      if match:
578        desc[to_relpath(match.group(2))] = StateItem(status=match.group(1))
579
580    return cls('', desc)
581
582  @classmethod
583  def from_checkout(cls, lines, include_skipped=True):
584    """Create a State object from 'svn checkout' lines."""
585
586    if include_skipped:
587      re_extra = _re_parse_co_skipped
588    else:
589      re_extra = _re_parse_co_restored
590
591    desc = { }
592    for line in lines:
593      if line.startswith('DBG:'):
594        continue
595
596      match = _re_parse_checkout.search(line)
597      if match:
598        if match.group(3) != ' ':
599          treeconflict = match.group(3)
600        else:
601          treeconflict = None
602        path = to_relpath(match.group(4))
603        prev_status = None
604        prev_verb = None
605        prev_treeconflict = None
606
607        if path in desc:
608          prev_status = desc[path].status
609          prev_verb = desc[path].verb
610          prev_treeconflict = desc[path].treeconflict
611
612        desc[path] = StateItem(status=match.group(1),
613                               treeconflict=treeconflict,
614                               prev_status=prev_status,
615                               prev_verb=prev_verb,
616                               prev_treeconflict=prev_treeconflict)
617      else:
618        match = re_extra.search(line)
619        if match:
620          path = to_relpath(match.group(2))
621          prev_status = None
622          prev_verb = None
623          prev_treeconflict = None
624
625          if path in desc:
626            prev_status = desc[path].status
627            prev_verb = desc[path].verb
628            prev_treeconflict = desc[path].treeconflict
629
630          desc[path] = StateItem(verb=match.group(1),
631                                 prev_status=prev_status,
632                                 prev_verb=prev_verb,
633                                 prev_treeconflict=prev_treeconflict)
634
635    return cls('', desc)
636
637  @classmethod
638  def from_commit(cls, lines):
639    """Create a State object from 'svn commit' lines."""
640
641    desc = { }
642    for line in lines:
643      if line.startswith('DBG:') or line.startswith('Transmitting'):
644        continue
645
646      if line.startswith('Committing transaction'):
647        continue
648
649      match = _re_parse_commit_ext.search(line)
650      if match:
651        desc[to_relpath(match.group(4))] = StateItem(verb=match.group(1))
652        continue
653
654      match = _re_parse_commit.search(line)
655      if match:
656        desc[to_relpath(match.group(3))] = StateItem(verb=match.group(1))
657
658    return cls('', desc)
659
660  @classmethod
661  def from_wc(cls, base, load_props=False, ignore_svn=True,
662              keep_eol_style=False):
663    """Create a State object from a working copy.
664
665    Walks the tree at PATH, building a State based on the actual files
666    and directories found. If LOAD_PROPS is True, then the properties
667    will be loaded for all nodes (Very Expensive!). If IGNORE_SVN is
668    True, then the .svn subdirectories will be excluded from the State.
669
670    If KEEP_EOL_STYLE is set, don't let Python normalize the EOL when
671    reading working copy contents as text files.  It has no effect on
672    binary files.
673    """
674    if not base:
675      # we're going to walk the base, and the OS wants "."
676      base = '.'
677
678    desc = { }
679    dot_svn = svntest.main.get_admin_name()
680
681    for dirpath, dirs, files in os.walk(base):
682      parent = path_to_key(dirpath, base)
683      if ignore_svn and dot_svn in dirs:
684        dirs.remove(dot_svn)
685      for name in dirs + files:
686        node = os.path.join(dirpath, name)
687        if os.path.isfile(node):
688          try:
689            if keep_eol_style:
690
691              contents = io.open(node, 'r', newline='',
692                                 encoding='utf-8').read()
693            else:
694              contents = io.open(node, 'r', encoding='utf-8').read()
695            if not isinstance(contents, str):
696              # Python 2: contents is read as an unicode object,
697              # but we expect it is a str.
698              contents = contents.encode()
699          except:
700            # If the file contains non UTF-8 character, we treat its
701            # content as binary represented as a bytes object.
702            contents = open(node, 'rb').read()
703        else:
704          contents = None
705        desc[repos_join(parent, name)] = StateItem(contents=contents)
706
707    if load_props:
708      paths = [os.path.join(base, to_ospath(p)) for p in desc.keys()]
709      paths.append(base)
710      all_props = svntest.tree.get_props(paths)
711      for node, props in all_props.items():
712        if node == base:
713          desc['.'] = StateItem(props=props)
714        else:
715          if base == '.':
716            # 'svn proplist' strips './' from the paths. put it back on.
717            node = os.path.join('.', node)
718          desc[path_to_key(node, base)].props = props
719
720    return cls('', desc)
721
722  @classmethod
723  def from_entries(cls, base):
724    """Create a State object from a working copy, via the old "entries" API.
725
726    Walks the tree at PATH, building a State based on the information
727    provided by the old entries API, as accessed via the 'entries-dump'
728    program.
729    """
730    if not base:
731      # we're going to walk the base, and the OS wants "."
732      base = '.'
733
734    if os.path.isfile(base):
735      # a few tests run status on a single file. quick-and-dirty this. we
736      # really should analyze the entry (similar to below) to be general.
737      dirpath, basename = os.path.split(base)
738      entries = svntest.main.run_entriesdump(dirpath)
739      return cls('', {
740          to_relpath(base): StateItem.from_entry(entries[basename]),
741          })
742
743    desc = { }
744    dump_data = svntest.main.run_entriesdump_tree(base)
745
746    if not dump_data:
747      # Probably 'svn status' run on an actual only node
748      # ### Improve!
749      return cls('', desc)
750
751    dirent_join = repos_join
752    if len(base) == 2 and base[1:]==':' and sys.platform=='win32':
753      # We have a win32 drive relative path... Auch. Fix joining
754      def drive_join(a, b):
755        if len(a) == 2:
756          return a+b
757        else:
758          return repos_join(a,b)
759      dirent_join = drive_join
760
761    for parent, entries in sorted(dump_data.items()):
762
763      parent_url = entries[''].url
764
765      for name, entry in entries.items():
766        # if the entry is marked as DELETED *and* it is something other than
767        # schedule-add, then skip it. we can add a new node "over" where a
768        # DELETED node lives.
769        if entry.deleted and entry.schedule != 1:
770          continue
771        # entries that are ABSENT don't show up in status
772        if entry.absent:
773          continue
774        # entries that are User Excluded don't show up in status
775        if entry.depth == -1:
776          continue
777        if name and entry.kind == 2:
778          # stub subdirectory. leave a "missing" StateItem in here. note
779          # that we can't put the status as "! " because that gets tweaked
780          # out of our expected tree.
781          item = StateItem(status='  ', wc_rev='?')
782          desc[dirent_join(parent, name)] = item
783          continue
784        item = StateItem.from_entry(entry)
785        if name:
786          desc[dirent_join(parent, name)] = item
787          implied_url = repos_join(parent_url, svn_uri_quote(name))
788        else:
789          item._url = entry.url  # attach URL to directory StateItems
790          desc[parent] = item
791
792          grandpa, this_name = repos_split(parent)
793          if grandpa in desc:
794            implied_url = repos_join(desc[grandpa]._url,
795                                     svn_uri_quote(this_name))
796          else:
797            implied_url = None
798
799        if implied_url and implied_url != entry.url:
800          item.switched = 'S'
801
802        if entry.file_external:
803          item.switched = 'X'
804
805    return cls('', desc)
806
807  @classmethod
808  def from_eids(cls, lines):
809
810    # Need to read all elements in a branch before we can construct
811    # the full path to an element.
812    # For the full path we use <branch-id>/<path-within-branch>.
813
814    def eid_path(eids, eid):
815      ele = eids[eid]
816      if ele[0] == '-1':
817        return ele[1]
818      parent_path = eid_path(eids, ele[0])
819      if parent_path == '':
820        return ele[1]
821      return parent_path + '/' + ele[1]
822
823    def eid_full_path(eids, eid, branch_id):
824      path = eid_path(eids, eid)
825      if path == '':
826        return branch_id
827      return branch_id + '/' + path
828
829    def add_to_desc(eids, desc, branch_id):
830      for k, v in eids.items():
831        desc[eid_full_path(eids, k, branch_id)] = StateItem(eid=k)
832
833    branch_id = None
834    eids = {}
835    desc = {}
836    for line in lines:
837
838      match = _re_parse_eid_ele.search(line)
839      if match and match.group(2) != 'none':
840        eid = match.group(1)
841        parent_eid = match.group(3)
842        path = match.group(4)
843        if path == '.':
844          path = ''
845        eids[eid] = [parent_eid, path]
846
847      match = _re_parse_eid_branch.search(line)
848      if match:
849        if branch_id:
850          add_to_desc(eids, desc, branch_id)
851          eids = {}
852        branch_id = match.group(1)
853        root_eid = match.group(2)
854
855      match = _re_parse_eid_merge_history.search(line)
856      if match:
857        ### TODO: store the merge history
858        pass
859
860    add_to_desc(eids, desc, branch_id)
861
862    return cls('', desc)
863
864
865class StateItem:
866  """Describes an individual item within a working copy.
867
868  Note that the location of this item is not specified. An external
869  mechanism, such as the State class, will provide location information
870  for each item.
871  """
872
873  def __init__(self, contents=None, props=None,
874               status=None, verb=None, wc_rev=None, entry_kind=None,
875               entry_rev=None, entry_status=None, entry_copied=None,
876               locked=None, copied=None, switched=None, writelocked=None,
877               treeconflict=None, moved_from=None, moved_to=None,
878               prev_status=None, prev_verb=None, prev_treeconflict=None,
879               eid=None):
880    # provide an empty prop dict if it wasn't provided
881    if props is None:
882      props = { }
883
884    ### keep/make these ints one day?
885    if wc_rev is not None:
886      wc_rev = str(wc_rev)
887    if eid is not None:
888      eid = str(eid)
889
890    # Any attribute can be None if not relevant, unless otherwise stated.
891
892    # A string of content (if the node is a file).
893    self.contents = contents
894    # A dictionary mapping prop name to prop value; never None.
895    self.props = props
896    # A two-character string from the first two columns of 'svn status'.
897    self.status = status
898    self.prev_status = prev_status
899    # The action word such as 'Adding' printed by commands like 'svn update'.
900    self.verb = verb
901    self.prev_verb = prev_verb
902    # The base revision number of the node in the WC, as a string.
903    self.wc_rev = wc_rev
904    # If 'file' specifies that the node is a file, and as such has no svn_wc_entry_t
905    # descendants
906    self.entry_kind = None
907    # These will be set when we expect the wc_rev/status to differ from those
908    # found in the entries code.
909    self.entry_rev = entry_rev
910    self.entry_status = entry_status
911    self.entry_copied = entry_copied
912    # For the following attributes, the value is the status character of that
913    # field from 'svn status', except using value None instead of status ' '.
914    self.locked = locked
915    self.copied = copied
916    self.switched = switched
917    self.writelocked = writelocked
918    # Value 'C', 'A', 'D' or ' ', or None as an expected status meaning 'do not check'.
919    self.treeconflict = treeconflict
920    self.prev_treeconflict = prev_treeconflict
921    # Relative paths to the move locations
922    self.moved_from = moved_from
923    self.moved_to = moved_to
924    self.eid = eid
925
926  def copy(self):
927    "Make a deep copy of self."
928    new = StateItem()
929    vars(new).update(vars(self))
930    new.props = self.props.copy()
931    return new
932
933  def tweak(self, **kw):
934    for name, value in kw.items():
935      # Refine the revision args (for now) to ensure they are strings.
936      if value is not None and name == 'wc_rev':
937        value = str(value)
938      if value is not None and name == 'eid':
939        value = str(value)
940      setattr(self, name, value)
941
942  def __eq__(self, other):
943    if not isinstance(other, StateItem):
944      return False
945    v_self = dict([(k, v) for k, v in vars(self).items()
946                   if not k.startswith('_') and not k.startswith('entry_')])
947    v_other = dict([(k, v) for k, v in vars(other).items()
948                    if not k.startswith('_') and not k.startswith('entry_')])
949
950    if self.wc_rev == '0' and self.status == 'A ':
951      v_self['wc_rev'] = '-'
952    if other.wc_rev == '0' and other.status == 'A ':
953      v_other['wc_rev'] = '-'
954    return v_self == v_other
955
956  def __ne__(self, other):
957    return not self.__eq__(other)
958
959  def as_node_tuple(self, path):
960    atts = { }
961    if self.status is not None:
962      atts['status'] = self.status
963    if self.prev_status is not None:
964      atts['prev_status'] = self.prev_status
965    if self.verb is not None:
966      atts['verb'] = self.verb
967    if self.prev_verb is not None:
968      atts['prev_verb'] = self.prev_verb
969    if self.wc_rev is not None:
970      atts['wc_rev'] = self.wc_rev
971    if self.locked is not None:
972      atts['locked'] = self.locked
973    if self.copied is not None:
974      atts['copied'] = self.copied
975    if self.switched is not None:
976      atts['switched'] = self.switched
977    if self.writelocked is not None:
978      atts['writelocked'] = self.writelocked
979    if self.treeconflict is not None:
980      atts['treeconflict'] = self.treeconflict
981    if self.prev_treeconflict is not None:
982      atts['prev_treeconflict'] = self.prev_treeconflict
983    if self.moved_from is not None:
984      atts['moved_from'] = self.moved_from
985    if self.moved_to is not None:
986      atts['moved_to'] = self.moved_to
987    if self.eid is not None:
988      atts['eid'] = self.eid
989
990    return (os.path.normpath(path), self.contents, self.props, atts)
991
992  @classmethod
993  def from_entry(cls, entry):
994    status = '  '
995    if entry.schedule == 1:  # svn_wc_schedule_add
996      status = 'A '
997    elif entry.schedule == 2:  # svn_wc_schedule_delete
998      status = 'D '
999    elif entry.schedule == 3:  # svn_wc_schedule_replace
1000      status = 'R '
1001    elif entry.conflict_old:
1002      ### I'm assuming we only need to check one, rather than all conflict_*
1003      status = 'C '
1004
1005    ### is this the sufficient? guessing here w/o investigation.
1006    if entry.prejfile:
1007      status = status[0] + 'C'
1008
1009    if entry.locked:
1010      locked = 'L'
1011    else:
1012      locked = None
1013
1014    if entry.copied:
1015      wc_rev = '-'
1016      copied = '+'
1017    else:
1018      if entry.revision == -1:
1019        wc_rev = '?'
1020      else:
1021        wc_rev = entry.revision
1022      copied = None
1023
1024    ### figure out switched
1025    switched = None
1026
1027    if entry.lock_token:
1028      writelocked = 'K'
1029    else:
1030      writelocked = None
1031
1032    return cls(status=status,
1033               wc_rev=wc_rev,
1034               locked=locked,
1035               copied=copied,
1036               switched=switched,
1037               writelocked=writelocked,
1038               )
1039
1040
1041if os.sep == '/':
1042  to_relpath = to_ospath = lambda path: path
1043else:
1044  def to_relpath(path):
1045    """Return PATH but with all native path separators changed to '/'."""
1046    return path.replace(os.sep, '/')
1047  def to_ospath(path):
1048    """Return PATH but with each '/' changed to the native path separator."""
1049    return path.replace('/', os.sep)
1050
1051
1052def path_to_key(path, base):
1053  """Return the relative path that represents the absolute path PATH under
1054  the absolute path BASE.  PATH must be a path under BASE.  The returned
1055  path has '/' separators."""
1056  if path == base:
1057    return ''
1058
1059  if base.endswith(os.sep) or base.endswith('/') or base.endswith(':'):
1060    # Special path format on Windows:
1061    #  'C:/' Is a valid root which includes its separator ('C:/file')
1062    #  'C:'  is a valid root which isn't followed by a separator ('C:file')
1063    #
1064    # In this case, we don't need a separator between the base and the path.
1065    pass
1066  else:
1067    # Account for a separator between the base and the relpath we're creating
1068    base += os.sep
1069
1070  assert path.startswith(base), "'%s' is not a prefix of '%s'" % (base, path)
1071  return to_relpath(path[len(base):])
1072
1073
1074def repos_split(repos_relpath):
1075  """Split a repos path into its directory and basename parts."""
1076  idx = repos_relpath.rfind('/')
1077  if idx == -1:
1078    return '', repos_relpath
1079  return repos_relpath[:idx], repos_relpath[idx+1:]
1080
1081
1082def repos_join(base, path):
1083  """Join two repos paths. This generally works for URLs too."""
1084  if base == '':
1085    return path
1086  elif path == '':
1087    return base
1088  elif base[len(base)-1:] == '/':
1089    return base + path
1090  else:
1091    return base + '/' + path
1092
1093
1094def svn_uri_quote(url):
1095  # svn defines a different set of "safe" characters than Python does, so
1096  # we need to avoid escaping them. see subr/path.c:uri_char_validity[]
1097  return urllib_quote(url, "!$&'()*+,-./:=@_~")
1098
1099
1100# ------------
1101
1102def python_sqlite_can_read_wc():
1103  """Check if the Python builtin is capable enough to peek into wc.db"""
1104  return svntest.main.python_sqlite_can_read_our_wc_db()
1105
1106def open_wc_db(local_path):
1107  """Open the SQLite DB for the WC path LOCAL_PATH.
1108     Return (DB object, WC root path, WC relpath of LOCAL_PATH)."""
1109  dot_svn = svntest.main.get_admin_name()
1110  root_path = local_path
1111  relpath = ''
1112
1113  while True:
1114    db_path = os.path.join(root_path, dot_svn, 'wc.db')
1115    try:
1116      db = svntest.sqlite3.connect(db_path)
1117      break
1118    except: pass
1119    head, tail = os.path.split(root_path)
1120    if head == root_path:
1121      raise svntest.Failure("No DB for " + local_path)
1122    root_path = head
1123    relpath = os.path.join(tail, relpath).replace(os.path.sep, '/').rstrip('/')
1124
1125  return db, root_path, relpath
1126
1127# ------------
1128
1129def text_base_path(file_path):
1130  """Return the path to the text-base file for the versioned file
1131     FILE_PATH."""
1132
1133  info = svntest.actions.run_and_parse_info(file_path)[0]
1134
1135  checksum = info['Checksum']
1136  db, root_path, relpath = open_wc_db(file_path)
1137
1138  # Calculate single DB location
1139  dot_svn = svntest.main.get_admin_name()
1140  fn = os.path.join(root_path, dot_svn, 'pristine', checksum[0:2], checksum)
1141
1142  # For SVN_WC__VERSION < 29
1143  if os.path.isfile(fn):
1144    return fn
1145
1146  # For SVN_WC__VERSION >= 29
1147  if os.path.isfile(fn + ".svn-base"):
1148    return fn + ".svn-base"
1149
1150  raise svntest.Failure("No pristine text for " + relpath)
1151
1152def sqlite_stmt(wc_root_path, stmt):
1153  """Execute STMT on the SQLite wc.db in WC_ROOT_PATH and return the
1154     results."""
1155
1156  db = open_wc_db(wc_root_path)[0]
1157  c = db.cursor()
1158  c.execute(stmt)
1159  return c.fetchall()
1160
1161def sqlite_exec(wc_root_path, stmt):
1162  """Execute STMT on the SQLite wc.db in WC_ROOT_PATH and return the
1163     results."""
1164
1165  db = open_wc_db(wc_root_path)[0]
1166  c = db.cursor()
1167  c.execute(stmt)
1168  db.commit()
1169
1170
1171# ------------
1172### probably toss these at some point. or major rework. or something.
1173### just bootstrapping some changes for now.
1174#
1175
1176def item_to_node(path, item):
1177  tree = svntest.tree.build_generic_tree([item.as_node_tuple(path)])
1178  while tree.children:
1179    assert len(tree.children) == 1
1180    tree = tree.children[0]
1181  return tree
1182
1183### yanked from tree.compare_trees()
1184def display_nodes(label, path, expected, actual):
1185  'Display two nodes, expected and actual.'
1186  expected = item_to_node(path, expected)
1187  actual = item_to_node(path, actual)
1188
1189  o = StringIO()
1190  o.write("=============================================================\n")
1191  o.write("Expected '%s' and actual '%s' in %s tree are different!\n"
1192                % (expected.name, actual.name, label))
1193  o.write("=============================================================\n")
1194  o.write("EXPECTED NODE TO BE:\n")
1195  o.write("=============================================================\n")
1196  expected.pprint(o)
1197  o.write("=============================================================\n")
1198  o.write("ACTUAL NODE FOUND:\n")
1199  o.write("=============================================================\n")
1200  actual.pprint(o)
1201
1202  logger.warn(o.getvalue())
1203  o.close()
1204
1205### yanked from tree.py
1206def default_singleton_handler(description, path, item):
1207  node = item_to_node(path, item)
1208  logger.warn("Couldn't find node '%s' in %s tree" % (node.name, description))
1209  o = StringIO()
1210  node.pprint(o)
1211  logger.warn(o.getvalue())
1212  o.close()
1213  raise svntest.tree.SVNTreeUnequal
1214