1# -*- coding: utf-8 -*- 2 3# virtual_path.py --- Classes used to manipulate slash-separated virtual paths 4# 5# Copyright (C) 2018 Florent Rougon 6# 7# This program is free software; you can redistribute it and/or 8# modify it under the terms of the GNU General Public License as 9# published by the Free Software Foundation; either version 2 of the 10# License, or (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, but 13# WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15# General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program; if not, write to the Free Software 19# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 20 21"""Module containing the VirtualPath and MutableVirtualPath classes.""" 22 23import pathlib 24 25 26class VirtualPath: 27 """Class used to represent virtual paths using the slash separator. 28 29 This class always uses the slash ('/') as the separator between 30 components. For terrasync.py, the root path '/' corresponds to the 31 repository root, regardless of where it is stored (hard drive, 32 remote server, etc.). 33 34 Note: because of this, the class is not supposed to be used directly 35 for filesystem accesses, since some root directory or 36 protocol://server/root-dir prefix would have to be prepended 37 to provide reasonably useful functionality. This is why the 38 paths managed by this class are said to be virtual. This also 39 implies that even in Python 3.6 or later, this class should 40 *not* inherit from os.PathLike. 41 42 Whenever a given feature exists in pathlib.PurePath, this class 43 replicates the corresponding pathlib.PurePath API, but using 44 mixedCaseStyle instead of underscore_style (the latter being used 45 for every method of pathlib.PurePath). Of course, types are adapted: 46 for instance, methods of this class often return a VirtualPath 47 instance, whereas the corresponding pathlib.PurePath methods would 48 return a pathlib.PurePath instance. 49 50 """ 51 def __init__(self, p): 52 # Once this function exits, self._path *must not be changed* anymore 53 # (doing so would violate the contract for a hashable object: the 54 # hash must not change once the object has been constructed). 55 self._path = self.normalizeStringPath(p) 56 # This check could of course be skipped if it is found to really affect 57 # performance. 58 self._check() 59 60 def __str__(self): 61 """Return a string representation of the path in self. 62 63 The return value: 64 - always starts with a '/'; 65 - never ends with a '/' except if it is exactly '/' (i.e., 66 the root virtual path). 67 68 """ 69 return self._path 70 71 def asPosix(self): 72 """Return a string representation of the path in self. 73 74 This method returns str(self), it is only present for 75 compatibility with pathlib.PurePath. 76 77 """ 78 return str(self) 79 80 def __repr__(self): 81 return "{}.{}({!r})".format(__name__, type(self).__name__, self._path) 82 83 def __lt__(self, other): 84 # Allow sorting with instances of VirtualPath, or of any subclass. Note 85 # that the == operator (__eq__()) and therefore also != are stricter 86 # with respect to typing. 87 if isinstance(other, VirtualPath): 88 return self._path < other._path 89 else: 90 return NotImplemented 91 92 def __le__(self, other): 93 if isinstance(other, VirtualPath): 94 return self._path <= other._path 95 else: 96 return NotImplemented 97 98 def __eq__(self, other): 99 # The types must be the same, therefore a VirtualPath never compares 100 # equal to a MutableVirtualPath with the == operator. For such 101 # comparisons, use the samePath() method. If __eq__() (and thus 102 # necessarily __hash__()) were more lax about typing, adding 103 # VirtualPath instances and instances of hashable subclasses of 104 # VirtualPath with the same _path to a set or frozenset would lead to 105 # unintuitive behavior, since they would all be considered equal. 106 return type(self) == type(other) and self._path == other._path 107 108 def __ne__(self, other): 109 return type(self) == type(other) and self._path != other._path 110 111 def __gt__(self, other): 112 if isinstance(other, VirtualPath): 113 return self._path > other._path 114 else: 115 return NotImplemented 116 117 def __ge__(self, other): 118 if isinstance(other, VirtualPath): 119 return self._path >= other._path 120 else: 121 return NotImplemented 122 123 def __hash__(self): 124 # Be strict about typing, as for __eq__(). 125 return hash((type(self), self._path)) 126 127 def samePath(self, other): 128 """Compare the path with another instance, possibly of a subclass. 129 130 other -- instance of VirtualPath, or of a subclass of 131 VirtualPath 132 133 """ 134 if isinstance(other, VirtualPath): 135 return self._path == other._path 136 else: 137 raise TypeError("{obj!r} is of type {klass}, which is neither " 138 "VirtualPath nor a subclass thereof" 139 .format(obj=other, klass=type(other).__name__)) 140 141 def _check(self): 142 """Run consistency checks on self.""" 143 assert (self._path.startswith('/') and not self._path.startswith('//') 144 and (self._path == '/' or not self._path.endswith('/'))), \ 145 repr(self._path) 146 147 @classmethod 148 def normalizeStringPath(cls, path): 149 """Normalize a string representing a virtual path. 150 151 path -- input path (string) 152 153 Return a string that always starts with a slash, never contains 154 consecutive slashes and only ends with a slash if it's the root 155 virtual path ('/'). 156 157 If 'path' doesn't start with a slash ('/'), it is considered 158 relative to the root. This implies that if 'path' is the empty 159 string, the return value is '/'. 160 161 """ 162 if not path.startswith('/'): 163 # / is the “virtual root” of the TerraSync repository 164 path = '/' + path 165 elif path.startswith('//') and not path.startswith('///'): 166 # Nasty special case. As allowed (but not mandated!) by POSIX[1], 167 # in pathlib.PurePosixPath('//some/path'), no collapsing happens[2]. 168 # This is only the case for exactly *two* *leading* slashes. 169 # [1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11 170 # [2] https://www.python.org/dev/peps/pep-0428/#construction 171 path = path[1:] 172 173 return pathlib.PurePosixPath(path).as_posix() 174 175 def __truediv__(self, s): 176 """Path concatenation with the '/' operator. 177 178 's' must be a string representing a relative path using the '/' 179 separator, for instance "dir/subdir/other-subdir". 180 181 Return a new instance of type(self). 182 183 """ 184 assert not (s.startswith('/') or s.endswith('/')), repr(s) 185 186 if self._path == '/': 187 return type(self)(self._path + s) 188 else: 189 return type(self)(self._path + '/' + s) 190 191 def joinpath(self, *args): 192 """Combine 'self' with each given string argument in turn. 193 194 Each argument should be of the form "foo", "foo/bar", 195 "foo/bar/baz", etc. Return the corresponding instance of 196 type(self). 197 198 >>> p = VirtualPath("/foo").joinpath("bar", "baz", "quux/zoot") 199 >>> str(p) 200 '/foo/bar/baz/quux/zoot' 201 202 """ 203 return self / '/'.join(args) 204 205 @property 206 def name(self): 207 """Return a string representing the final path component. 208 209 >>> p = VirtualPath("/foo/bar/baz") 210 >>> p.name 211 'baz' 212 213 """ 214 pos = self._path.rfind('/') 215 assert pos != -1, (pos, self._path) 216 217 return self._path[pos+1:] 218 219 @property 220 def parts(self): 221 """Return a tuple containing the path’s components. 222 223 >>> p = VirtualPath('/usr/bin/python3') 224 >>> p.parts 225 ('/', 'usr', 'bin', 'python3') 226 227 """ 228 if self._path == "/": 229 return ('/',) 230 else: 231 # Skip the leading slash before splitting 232 return ('/',) + tuple(self._path[1:].split('/')) 233 234 def generateParents(self): 235 """Generator function for the parents of the path. 236 237 See the 'parents' property for details. 238 239 """ 240 if self._path == '/': 241 return 242 243 assert self._path.startswith('/'), repr(self._path) 244 prevPos = len(self._path) 245 246 while True: 247 pos = self._path.rfind('/', 0, prevPos) 248 249 if pos > 0: 250 yield type(self)(self._path[:pos]) 251 prevPos = pos 252 else: 253 assert pos == 0, pos 254 break 255 256 yield type(self)('/') 257 258 @property 259 def parents(self): 260 """The path ancestors. 261 262 Return an immutable sequence providing access to the logical 263 ancestors of the path. 264 265 >>> p = VirtualPath('/foo/bar/baz') 266 >>> len(p.parents) 267 3 268 >>> p.parents[0] 269 terrasync.virtual_path.VirtualPath('/foo/bar') 270 >>> p.parents[1] 271 terrasync.virtual_path.VirtualPath('/foo') 272 >>> p.parents[2] 273 terrasync.virtual_path.VirtualPath('/') 274 275 """ 276 return tuple(self.generateParents()) 277 278 @property 279 def parent(self): 280 """The logical parent of the path. 281 282 >>> p = VirtualPath('/foo/bar/baz') 283 >>> p.parent 284 terrasync.virtual_path.VirtualPath('/foo/bar') 285 >>> q = VirtualPath('/') 286 >>> q.parent 287 terrasync.virtual_path.VirtualPath('/') 288 289 """ 290 pos = self._path.rfind('/') 291 assert pos >= 0, pos 292 293 if pos == 0: 294 return type(self)('/') 295 else: 296 return type(self)(self._path[:pos]) 297 298 @property 299 def suffix(self): 300 """The extension of the final component, if any. 301 302 >>> VirtualPath('/my/library/setup.py').suffix 303 '.py' 304 >>> VirtualPath('/my/library.tar.gz').suffix 305 '.gz' 306 >>> VirtualPath('/my/library').suffix 307 '' 308 309 """ 310 name = self.name 311 pos = name.rfind('.') 312 return name[pos:] if pos != -1 else '' 313 314 @property 315 def suffixes(self): 316 """A list of the path’s extensions. 317 318 >>> VirtualPath('/my/library/setup.py').suffixes 319 ['.py'] 320 >>> VirtualPath('/my/library.tar.gz').suffixes 321 ['.tar', '.gz'] 322 >>> VirtualPath('/my/library').suffixes 323 [] 324 325 """ 326 name = self.name 327 prevPos = len(name) 328 l = [] 329 330 while True: 331 pos = name.rfind('.', 0, prevPos) 332 if pos == -1: 333 break 334 else: 335 l.insert(0, name[pos:prevPos]) 336 prevPos = pos 337 338 return l 339 340 @property 341 def stem(self): 342 """The final path component, without its suffix. 343 344 >>> VirtualPath('/my/library.tar.gz').stem 345 'library.tar' 346 >>> VirtualPath('/my/library.tar').stem 347 'library' 348 >>> VirtualPath('/my/library').stem 349 'library' 350 >>> VirtualPath('/').stem 351 '' 352 353 """ 354 name = self.name 355 pos = name.rfind('.') 356 357 return name if pos == -1 else name[:pos] 358 359 def asRelative(self): 360 """Return the virtual path without its leading '/'. 361 362 >>> p = VirtualPath('/usr/bin/python3') 363 >>> p.asRelative() 364 'usr/bin/python3' 365 366 >>> VirtualPath('').asRelative() 367 '' 368 >>> VirtualPath('/').asRelative() 369 '' 370 371 """ 372 assert self._path.startswith('/'), repr(self._path) 373 return self._path[1:] 374 375 def relativeTo(self, other): 376 """Return the portion of this path that follows 'other'. 377 378 The return value is a string. If the operation is impossible, 379 ValueError is raised. 380 381 >>> VirtualPath('/etc/passwd').relativeTo('/') 382 'etc/passwd' 383 >>> VirtualPath('/etc/passwd').relativeTo('/etc') 384 'passwd' 385 386 """ 387 normedOther = self.normalizeStringPath(other) 388 389 if normedOther == '/': 390 return self._path[1:] 391 elif self._path.startswith(normedOther): 392 rest = self._path[len(normedOther):] 393 394 if rest.startswith('/'): 395 return rest[1:] 396 397 raise ValueError("{!r} does not start with '{}'".format(self, other)) 398 399 def withName(self, newName): 400 """Return a new VirtualPath instance with the 'name' part changed. 401 402 If the original path is '/' (which doesn’t have a name in the 403 sense of the 'name' property), ValueError is raised. 404 405 >>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz') 406 >>> p.withName('setup.py') 407 terrasync.virtual_path.VirtualPath('/foobar/downloads/setup.py') 408 409 """ 410 if self._path == '/': 411 raise ValueError("{!r} has an empty name".format(self)) 412 else: 413 pos = self._path.rfind('/') 414 assert pos != -1, (pos, self._path) 415 416 if newName.startswith('/'): 417 raise ValueError("{!r} starts with a '/'".format(newName)) 418 elif newName.endswith('/'): 419 raise ValueError("{!r} ends with a '/'".format(newName)) 420 else: 421 return VirtualPath(self._path[:pos]) / newName 422 423 424 def withSuffix(self, newSuffix): 425 """Return a new VirtualPath instance with the suffix changed. 426 427 If the original path doesn’t have a suffix, the new suffix is 428 appended: 429 430 >>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz') 431 >>> p.withSuffix('.bz2') 432 terrasync.virtual_path.VirtualPath('/foobar/downloads/pathlib.tar.bz2') 433 >>> p = VirtualPath('/foobar/README') 434 >>> p.withSuffix('.txt') 435 terrasync.virtual_path.VirtualPath('/foobar/README.txt') 436 437 If 'self' is the root virtual path ('/') or 'newSuffix' doesn't 438 start with '.', ValueError is raised. 439 440 """ 441 if not newSuffix.startswith('.'): 442 raise ValueError("new suffix {!r} doesn't start with '.'" 443 .format(newSuffix)) 444 445 name = self.name 446 if not name: 447 raise ValueError("{!r} has an empty 'name' part".format(self)) 448 449 pos = name.rfind('.') 450 451 if pos == -1: 452 return self.withName(name + newSuffix) # append suffix 453 else: 454 return self.withName(name[:pos] + newSuffix) # replace suffix 455 456 457class MutableVirtualPath(VirtualPath): 458 459 """Mutable subclass of VirtualPath. 460 461 Contrary to VirtualPath objects, instances of this class can be 462 modified in-place with the /= operator, in order to append path 463 components. The price to pay for this advantage is that they can't 464 be used as dictionary keys or as elements of a set or frozenset, 465 because they are not hashable. 466 467 """ 468 469 __hash__ = None # ensure the type is not hashable 470 471 def _normalize(self): 472 self._path = self.normalizeStringPath(self._path) 473 474 def __itruediv__(self, s): 475 """Path concatenation with the '/=' operator. 476 477 's' must be a string representing a relative path using the '/' 478 separator, for instance "dir/subdir/other-subdir". 479 480 """ 481 # This check could of course be skipped if it is found to really affect 482 # performance. 483 self._check() 484 assert not (s.startswith('/') or s.endswith('/')), repr(s) 485 486 if self._path == '/': 487 self._path += s 488 else: 489 self._path += '/' + s 490 491 # Collapse multiple slashes, remove trailing '/' except if the whole 492 # path is '/', etc. 493 self._normalize() 494 495 return self 496 497 498if __name__ == "__main__": 499 # The doctest setup below works, but for full test coverage, use the 500 # unittest framework (it is set up to automatically run all doctests from 501 # this module!). 502 # 503 # Hint: 'python3 -m unittest discover' from the TerraSync directory 504 # should do the trick. 505 import doctest 506 doctest.testmod() 507