1# -*- coding: utf-8 -*-
2
3# virtual_path.py --- Classes used to manipulate slash-separated virtual paths
4#
5# Copyright (C) 2018  Florent Rougon
6#
7# This program is free software; you can redistribute it and/or
8# modify it under the terms of the GNU General Public License as
9# published by the Free Software Foundation; either version 2 of the
10# License, or (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful, but
13# WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15# General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20
21"""Module containing the VirtualPath and MutableVirtualPath classes."""
22
23import pathlib
24
25
26class VirtualPath:
27    """Class used to represent virtual paths using the slash separator.
28
29    This class always uses the slash ('/') as the separator between
30    components. For terrasync.py, the root path '/' corresponds to the
31    repository root, regardless of where it is stored (hard drive,
32    remote server, etc.).
33
34    Note: because of this, the class is not supposed to be used directly
35          for filesystem accesses, since some root directory or
36          protocol://server/root-dir prefix would have to be prepended
37          to provide reasonably useful functionality. This is why the
38          paths managed by this class are said to be virtual. This also
39          implies that even in Python 3.6 or later, this class should
40          *not* inherit from os.PathLike.
41
42    Whenever a given feature exists in pathlib.PurePath, this class
43    replicates the corresponding pathlib.PurePath API, but using
44    mixedCaseStyle instead of underscore_style (the latter being used
45    for every method of pathlib.PurePath). Of course, types are adapted:
46    for instance, methods of this class often return a VirtualPath
47    instance, whereas the corresponding pathlib.PurePath methods would
48    return a pathlib.PurePath instance.
49
50    """
51    def __init__(self, p):
52        # Once this function exits, self._path *must not be changed* anymore
53        # (doing so would violate the contract for a hashable object: the
54        # hash must not change once the object has been constructed).
55        self._path = self.normalizeStringPath(p)
56        # This check could of course be skipped if it is found to really affect
57        # performance.
58        self._check()
59
60    def __str__(self):
61        """Return a string representation of the path in self.
62
63        The return value:
64          - always starts with a '/';
65          - never ends with a '/' except if it is exactly '/' (i.e.,
66            the root virtual path).
67
68        """
69        return self._path
70
71    def asPosix(self):
72        """Return a string representation of the path in self.
73
74        This method returns str(self), it is only present for
75        compatibility with pathlib.PurePath.
76
77        """
78        return str(self)
79
80    def __repr__(self):
81        return "{}.{}({!r})".format(__name__, type(self).__name__, self._path)
82
83    def __lt__(self, other):
84        # Allow sorting with instances of VirtualPath, or of any subclass. Note
85        # that the == operator (__eq__()) and therefore also != are stricter
86        # with respect to typing.
87        if isinstance(other, VirtualPath):
88            return self._path < other._path
89        else:
90            return NotImplemented
91
92    def __le__(self, other):
93        if isinstance(other, VirtualPath):
94            return self._path <= other._path
95        else:
96            return NotImplemented
97
98    def __eq__(self, other):
99        # The types must be the same, therefore a VirtualPath never compares
100        # equal to a MutableVirtualPath with the == operator. For such
101        # comparisons, use the samePath() method. If __eq__() (and thus
102        # necessarily __hash__()) were more lax about typing, adding
103        # VirtualPath instances and instances of hashable subclasses of
104        # VirtualPath with the same _path to a set or frozenset would lead to
105        # unintuitive behavior, since they would all be considered equal.
106        return type(self) == type(other) and self._path == other._path
107
108    def __ne__(self, other):
109        return type(self) == type(other) and self._path != other._path
110
111    def __gt__(self, other):
112        if isinstance(other, VirtualPath):
113            return self._path > other._path
114        else:
115            return NotImplemented
116
117    def __ge__(self, other):
118        if isinstance(other, VirtualPath):
119            return self._path >= other._path
120        else:
121            return NotImplemented
122
123    def __hash__(self):
124        # Be strict about typing, as for __eq__().
125        return hash((type(self), self._path))
126
127    def samePath(self, other):
128        """Compare the path with another instance, possibly of a subclass.
129
130        other -- instance of VirtualPath, or of a subclass of
131                 VirtualPath
132
133        """
134        if isinstance(other, VirtualPath):
135            return self._path == other._path
136        else:
137            raise TypeError("{obj!r} is of type {klass}, which is neither "
138                            "VirtualPath nor a subclass thereof"
139                            .format(obj=other, klass=type(other).__name__))
140
141    def _check(self):
142        """Run consistency checks on self."""
143        assert (self._path.startswith('/') and not self._path.startswith('//')
144                and (self._path == '/' or not self._path.endswith('/'))), \
145                repr(self._path)
146
147    @classmethod
148    def normalizeStringPath(cls, path):
149        """Normalize a string representing a virtual path.
150
151        path -- input path (string)
152
153        Return a string that always starts with a slash, never contains
154        consecutive slashes and only ends with a slash if it's the root
155        virtual path ('/').
156
157        If 'path' doesn't start with a slash ('/'), it is considered
158        relative to the root. This implies that if 'path' is the empty
159        string, the return value is '/'.
160
161        """
162        if not path.startswith('/'):
163            # / is the “virtual root” of the TerraSync repository
164            path = '/' + path
165        elif path.startswith('//') and not path.startswith('///'):
166            # Nasty special case. As allowed (but not mandated!) by POSIX[1],
167            # in pathlib.PurePosixPath('//some/path'), no collapsing happens[2].
168            # This is only the case for exactly *two* *leading* slashes.
169            # [1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11
170            # [2] https://www.python.org/dev/peps/pep-0428/#construction
171            path = path[1:]
172
173        return pathlib.PurePosixPath(path).as_posix()
174
175    def __truediv__(self, s):
176        """Path concatenation with the '/' operator.
177
178        's' must be a string representing a relative path using the '/'
179        separator, for instance "dir/subdir/other-subdir".
180
181        Return a new instance of type(self).
182
183        """
184        assert not (s.startswith('/') or s.endswith('/')), repr(s)
185
186        if self._path == '/':
187            return type(self)(self._path + s)
188        else:
189            return type(self)(self._path + '/' + s)
190
191    def joinpath(self, *args):
192        """Combine 'self' with each given string argument in turn.
193
194        Each argument should be of the form "foo", "foo/bar",
195        "foo/bar/baz", etc. Return the corresponding instance of
196        type(self).
197
198        >>> p = VirtualPath("/foo").joinpath("bar", "baz", "quux/zoot")
199        >>> str(p)
200        '/foo/bar/baz/quux/zoot'
201
202        """
203        return self / '/'.join(args)
204
205    @property
206    def name(self):
207        """Return a string representing the final path component.
208
209        >>> p = VirtualPath("/foo/bar/baz")
210        >>> p.name
211        'baz'
212
213        """
214        pos = self._path.rfind('/')
215        assert pos != -1, (pos, self._path)
216
217        return self._path[pos+1:]
218
219    @property
220    def parts(self):
221        """Return a tuple containing the path’s components.
222
223        >>> p = VirtualPath('/usr/bin/python3')
224        >>> p.parts
225        ('/', 'usr', 'bin', 'python3')
226
227        """
228        if self._path == "/":
229            return ('/',)
230        else:
231            # Skip the leading slash before splitting
232            return ('/',) + tuple(self._path[1:].split('/'))
233
234    def generateParents(self):
235        """Generator function for the parents of the path.
236
237        See the 'parents' property for details.
238
239        """
240        if self._path == '/':
241            return
242
243        assert self._path.startswith('/'), repr(self._path)
244        prevPos = len(self._path)
245
246        while True:
247            pos = self._path.rfind('/', 0, prevPos)
248
249            if pos > 0:
250                yield type(self)(self._path[:pos])
251                prevPos = pos
252            else:
253                assert pos == 0, pos
254                break
255
256        yield type(self)('/')
257
258    @property
259    def parents(self):
260        """The path ancestors.
261
262        Return an immutable sequence providing access to the logical
263        ancestors of the path.
264
265        >>> p = VirtualPath('/foo/bar/baz')
266        >>> len(p.parents)
267        3
268        >>> p.parents[0]
269        terrasync.virtual_path.VirtualPath('/foo/bar')
270        >>> p.parents[1]
271        terrasync.virtual_path.VirtualPath('/foo')
272        >>> p.parents[2]
273        terrasync.virtual_path.VirtualPath('/')
274
275        """
276        return tuple(self.generateParents())
277
278    @property
279    def parent(self):
280        """The logical parent of the path.
281
282        >>> p = VirtualPath('/foo/bar/baz')
283        >>> p.parent
284        terrasync.virtual_path.VirtualPath('/foo/bar')
285        >>> q = VirtualPath('/')
286        >>> q.parent
287        terrasync.virtual_path.VirtualPath('/')
288
289        """
290        pos = self._path.rfind('/')
291        assert pos >= 0, pos
292
293        if pos == 0:
294            return type(self)('/')
295        else:
296            return type(self)(self._path[:pos])
297
298    @property
299    def suffix(self):
300        """The extension of the final component, if any.
301
302        >>> VirtualPath('/my/library/setup.py').suffix
303        '.py'
304        >>> VirtualPath('/my/library.tar.gz').suffix
305        '.gz'
306        >>> VirtualPath('/my/library').suffix
307        ''
308
309        """
310        name = self.name
311        pos = name.rfind('.')
312        return name[pos:] if pos != -1 else ''
313
314    @property
315    def suffixes(self):
316        """A list of the path’s extensions.
317
318        >>> VirtualPath('/my/library/setup.py').suffixes
319        ['.py']
320        >>> VirtualPath('/my/library.tar.gz').suffixes
321        ['.tar', '.gz']
322        >>> VirtualPath('/my/library').suffixes
323        []
324
325        """
326        name = self.name
327        prevPos = len(name)
328        l = []
329
330        while True:
331            pos = name.rfind('.', 0, prevPos)
332            if pos == -1:
333                break
334            else:
335                l.insert(0, name[pos:prevPos])
336                prevPos = pos
337
338        return l
339
340    @property
341    def stem(self):
342        """The final path component, without its suffix.
343
344        >>> VirtualPath('/my/library.tar.gz').stem
345        'library.tar'
346        >>> VirtualPath('/my/library.tar').stem
347        'library'
348        >>> VirtualPath('/my/library').stem
349        'library'
350        >>> VirtualPath('/').stem
351        ''
352
353        """
354        name = self.name
355        pos = name.rfind('.')
356
357        return name if pos == -1 else name[:pos]
358
359    def asRelative(self):
360        """Return the virtual path without its leading '/'.
361
362        >>> p = VirtualPath('/usr/bin/python3')
363        >>> p.asRelative()
364        'usr/bin/python3'
365
366        >>> VirtualPath('').asRelative()
367        ''
368        >>> VirtualPath('/').asRelative()
369        ''
370
371        """
372        assert self._path.startswith('/'), repr(self._path)
373        return self._path[1:]
374
375    def relativeTo(self, other):
376        """Return the portion of this path that follows 'other'.
377
378        The return value is a string. If the operation is impossible,
379        ValueError is raised.
380
381        >>> VirtualPath('/etc/passwd').relativeTo('/')
382        'etc/passwd'
383        >>> VirtualPath('/etc/passwd').relativeTo('/etc')
384        'passwd'
385
386        """
387        normedOther = self.normalizeStringPath(other)
388
389        if normedOther == '/':
390            return self._path[1:]
391        elif self._path.startswith(normedOther):
392            rest = self._path[len(normedOther):]
393
394            if rest.startswith('/'):
395                return rest[1:]
396
397        raise ValueError("{!r} does not start with '{}'".format(self, other))
398
399    def withName(self, newName):
400        """Return a new VirtualPath instance with the 'name' part changed.
401
402        If the original path is '/' (which doesn’t have a name in the
403        sense of the 'name' property), ValueError is raised.
404
405        >>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
406        >>> p.withName('setup.py')
407        terrasync.virtual_path.VirtualPath('/foobar/downloads/setup.py')
408
409        """
410        if self._path == '/':
411            raise ValueError("{!r} has an empty name".format(self))
412        else:
413            pos = self._path.rfind('/')
414            assert pos != -1, (pos, self._path)
415
416            if newName.startswith('/'):
417                raise ValueError("{!r} starts with a '/'".format(newName))
418            elif newName.endswith('/'):
419                raise ValueError("{!r} ends with a '/'".format(newName))
420            else:
421                return VirtualPath(self._path[:pos]) / newName
422
423
424    def withSuffix(self, newSuffix):
425        """Return a new VirtualPath instance with the suffix changed.
426
427        If the original path doesn’t have a suffix, the new suffix is
428        appended:
429
430        >>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
431        >>> p.withSuffix('.bz2')
432        terrasync.virtual_path.VirtualPath('/foobar/downloads/pathlib.tar.bz2')
433        >>> p = VirtualPath('/foobar/README')
434        >>> p.withSuffix('.txt')
435        terrasync.virtual_path.VirtualPath('/foobar/README.txt')
436
437        If 'self' is the root virtual path ('/') or 'newSuffix' doesn't
438        start with '.', ValueError is raised.
439
440        """
441        if not newSuffix.startswith('.'):
442            raise ValueError("new suffix {!r} doesn't start with '.'"
443                             .format(newSuffix))
444
445        name = self.name
446        if not name:
447            raise ValueError("{!r} has an empty 'name' part".format(self))
448
449        pos = name.rfind('.')
450
451        if pos == -1:
452            return self.withName(name + newSuffix)       # append suffix
453        else:
454            return self.withName(name[:pos] + newSuffix) # replace suffix
455
456
457class MutableVirtualPath(VirtualPath):
458
459    """Mutable subclass of VirtualPath.
460
461    Contrary to VirtualPath objects, instances of this class can be
462    modified in-place with the /= operator, in order to append path
463    components. The price to pay for this advantage is that they can't
464    be used as dictionary keys or as elements of a set or frozenset,
465    because they are not hashable.
466
467    """
468
469    __hash__ = None             # ensure the type is not hashable
470
471    def _normalize(self):
472        self._path = self.normalizeStringPath(self._path)
473
474    def __itruediv__(self, s):
475        """Path concatenation with the '/=' operator.
476
477        's' must be a string representing a relative path using the '/'
478        separator, for instance "dir/subdir/other-subdir".
479
480        """
481        # This check could of course be skipped if it is found to really affect
482        # performance.
483        self._check()
484        assert not (s.startswith('/') or s.endswith('/')), repr(s)
485
486        if self._path == '/':
487            self._path += s
488        else:
489            self._path += '/' + s
490
491        # Collapse multiple slashes, remove trailing '/' except if the whole
492        # path is '/', etc.
493        self._normalize()
494
495        return self
496
497
498if __name__ == "__main__":
499    # The doctest setup below works, but for full test coverage, use the
500    # unittest framework (it is set up to automatically run all doctests from
501    # this module!).
502    #
503    # Hint: 'python3 -m unittest discover' from the TerraSync directory
504    #       should do the trick.
505    import doctest
506    doctest.testmod()
507