1 2# Copyright 2015-2016 Jaap Karssenberg <jaap.karssenberg@gmail.com> 3 4'''Base classes for filesystem and storage implementation''' 5 6import os 7import re 8import hashlib 9import contextlib 10 11import logging 12 13logger = logging.getLogger('zim.newfs') 14 15 16from . import FS_SUPPORT_NON_LOCAL_FILE_SHARES 17 18from zim.errors import Error 19from zim.parsing import url_encode, url_decode 20 21 22is_url_re = re.compile('^\w{2,}:/') 23is_share_re = re.compile(r'^\\\\\w') 24 25 26if os.name == 'nt': 27 SEP = '\\' # os.path.sep can still be "/" under msys 28 _EOL = 'dos' 29else: 30 SEP = os.path.sep 31 _EOL = 'unix' 32 33 34 35 36class FileNotFoundError(Error): 37 38 # TODO - description and translation 39 40 def __init__(self, path): 41 self.file = path 42 path = path.path if hasattr(path, 'path') else path 43 Error.__init__(self, 'No such file or folder: %s' % path) 44 45 46class FileExistsError(Error): 47 48 # TODO - description and translation 49 50 def __init__(self, path): 51 self.file = path 52 path = path.path if hasattr(path, 'path') else path 53 Error.__init__(self, 'File or folder already exists: %s' % path) 54 55 56class FileUnicodeError(Error): 57 '''Error raised when there is an issue decoding the file contents. 58 Typically due to different encoding where UTF-8 is expected. 59 ''' 60 61 def __init__(self, file, error): 62 self.file = file 63 self.error = error 64 self.msg = _('Could not read: %s') % file.path 65 # T: message for FileUnicodeError (%s is the file name) 66 self.description = _('This usually means the file contains invalid characters') 67 # T: message for FileUnicodeError 68 self.description += '\n\n' + _('Details') + ':\n' + str(error) 69 # T: label for detailed error 70 71 72class FileChangedError(Error): 73 74 # TODO - description and translation 75 76 def __init__(self, path): 77 self.file = path 78 path = path.path if hasattr(path, 'path') else path 79 Error.__init__(self, 'File changed on disk: %s' % path) 80 81 82class FileNotWritableError(Error): 83 84 # TODO - description and translation 85 86 def __init__(self, path): 87 self.file = path 88 path = path.path if hasattr(path, 'path') else path 89 Error.__init__(self, 'No permission to write file: %s' % path) 90 91 92class FolderNotEmptyError(Error): 93 94 # TODO - description and translation 95 96 def __init__(self, path): 97 path = path.path if hasattr(path, 'path') else path 98 Error.__init__(self, 'Folder not empty: %s' % path) 99 100 101 102def _split_file_url(url): 103 scheme, path = url.replace('\\', '/').split(':/', 1) 104 if scheme not in ('file', 'smb'): 105 raise ValueError('Not a file URL: %s' % url) 106 107 if path.startswith('/localhost/'): # exact 2 '/' before 'localhost' 108 path = path[11:] 109 isshare = False 110 elif scheme == 'smb' or re.match('^/\w', path): # exact 2 '/' before 'localhost' 111 isshare = True 112 else: 113 isshare = False # either 'file:/' or 'file:///' 114 115 return url_decode(path).strip('/').split('/'), isshare 116 117 118def _splitnormpath(path, force_rel=False): 119 # Takes either string or list of names and returns a normalized tuple 120 # Keeps leading "/" or "\\" to distinguish absolute paths 121 # Split must be robust for both "/" and "\" pathseperators regardless of 122 # the os we are running on ! 123 if isinstance(path, str) and not force_rel: 124 if is_url_re.match(path): 125 makeroot = True 126 path, makeshare = _split_file_url(path) 127 else: 128 if path.startswith('~'): 129 makeroot = True 130 path = _os_expanduser(path) 131 else: 132 makeroot = path.startswith('/') 133 makeshare = re.match(r'^\\\\\w', path) is not None # exact 2 "\" 134 path = re.split(r'[/\\]+', path.strip('/\\')) 135 else: 136 makeshare = False 137 makeroot = False 138 if isinstance(path, str): 139 path = re.split(r'[/\\]+', path.strip('/\\')) 140 141 names = [] 142 for name in path: 143 if name == '.' and names: 144 pass 145 elif name == '..': 146 if names and names[-1] != '..': 147 names.pop() 148 else: 149 names.append(name) 150 makeroot = False 151 else: 152 names.append(name) 153 154 if not names: 155 raise ValueError('path reduces to empty string') 156 elif makeshare: 157 names[0] = '\\\\' + names[0] # UNC host needs leading "\\" 158 elif makeroot and os.name != 'nt' and not names[0].startswith('/'): 159 names[0] = '/' + names[0] 160 161 return tuple(names) 162 163 164if os.name == 'nt': 165 def _joinabspath(names): 166 # first element must be either drive letter or UNC host 167 if not re.match(r'^(\w:|\\\\\w)', names[0]): 168 raise ValueError('Not an absolute path: %s' % '\\'.join(names)) 169 else: 170 return '\\'.join(names) # Don't rely on SEP here, msys sets it to '/' 171 172 def _joinuri(names): 173 # first element must be either drive letter or UNC host 174 if not re.match(r'^(\w:|\\\\\w)', names[0]): 175 raise ValueError('Not an absolute path: %s' % '\\'.join(names)) 176 elif re.match(r'^\w:$', names[0]): # Drive letter - e.g. file:///C:/foo 177 return 'file:///' + names[0] + '/' + url_encode('/'.join(names[1:])) 178 elif re.match(r'^\\\\\w+$', names[0]): # UNC path - e.g. file://host/share 179 return 'file://' + url_encode(names[0].strip('\\') + '/' + '/'.join(names[1:])) 180 181else: 182 def _joinabspath(names): 183 if names[0].startswith('\\\\'): 184 return '\\'.join(names) # Windows share drive 185 elif names[0].startswith('/'): 186 return '/'.join(names) 187 else: 188 raise ValueError('Not an absolute path: %s' % '/'.join(names)) 189 190 def _joinuri(names): 191 if names[0][0] == '/': 192 return 'file://' + url_encode('/'.join(names)) 193 else: 194 return 'file:///' + url_encode('/'.join(names)) 195 196 197def _os_expanduser(path): 198 # Force usage of $HOME (especially on windows) instead of default logic 199 # in os.path.expanduser 200 # This depends on us setting HOME correctly based on USERPROFILE or similar 201 assert path.startswith('~') 202 home = os.environ['HOME'] 203 parts = path.replace('\\', '/').strip('/').split('/') 204 if parts[0] == '~': 205 return SEP.join([home] + parts[1:]) 206 else: # ~user 207 path = os.path.expanduser(path) 208 if path.startswith('~'): 209 # fallback 210 homedir = os.path.dirname(home) 211 return SEP.join([homedir, parts[0][1:]] + parts[1:]) 212 else: 213 return path 214 215 216def is_abs_filepath(string): 217 try: 218 _joinabspath(_splitnormpath(string)) 219 except ValueError: 220 return False 221 else: 222 return True 223 224 225class FilePath(object): 226 '''Class to represent filesystem paths and the base class for all 227 file and folder objects. Contains methods for file path manipulation. 228 229 File paths should always be absolute paths and can e.g. not start 230 with "../" or "./". On windows they should always start with either 231 a drive letter or a share drive. On unix they should start at the 232 root of the filesystem. 233 234 Paths can be handled either as strings representing a local file 235 path ("/" or "\" separated), strings representing a file uri 236 ("file:///" or "smb://") or list of path names. 237 ''' 238 239 __slots__ = ('path', 'pathnames', 'islocal') 240 241 def __init__(self, path): 242 if isinstance(path, (tuple, list, str)): 243 self.pathnames = _splitnormpath(path) 244 self.path = _joinabspath(self.pathnames) 245 elif isinstance(path, FilePath): 246 self.pathnames = path.pathnames 247 self.path = path.path 248 else: 249 raise TypeError('Cannot convert %r to a FilePath' % path) 250 251 self.islocal = not self.pathnames[0].startswith('\\\\') 252 253 def __repr__(self): 254 return "<%s: %s>" % (self.__class__.__name__, self.path) 255 256 def __str__(self): 257 return self.path 258 259 def __eq__(self, other): 260 return isinstance(other, self.__class__) and other.path == self.path 261 262 def serialize_zim_config(self): 263 '''Returns the file path as string for serializing the object''' 264 return self.userpath 265 266 @classmethod 267 def new_from_zim_config(klass, string): 268 '''Returns a new object based on the string representation for 269 that path 270 ''' 271 return klass(string) 272 273 @property 274 def uri(self): 275 return _joinuri(self.pathnames) 276 277 @property 278 def basename(self): 279 return self.pathnames[-1] 280 281 @property 282 def dirname(self): 283 if len(self.pathnames) >= 2: 284 return _joinabspath(self.pathnames[:-1]) 285 else: 286 return None 287 288 @property 289 def userpath(self): 290 if self.ischild(_HOME): 291 return '~' + SEP + self.relpath(_HOME) 292 else: 293 return self.path 294 295 def get_childpath(self, path): 296 assert path 297 names = _splitnormpath(path, force_rel=True) 298 if not names or names[0] == '..': 299 raise ValueError('Relative path not below parent: %s' % path) 300 return FilePath(self.pathnames + names) 301 302 def get_abspath(self, path): 303 '''Returns a C{FilePath} for C{path} where C{path} can be 304 either an absolute path or a path relative to this path 305 (either upward or downward - use L{get_childpath()} to only 306 get child paths). 307 ''' 308 try: 309 return FilePath(path) 310 except ValueError: 311 # Not an absolute path 312 names = _splitnormpath(path) 313 return FilePath(self.pathnames + names) 314 315 def ischild(self, parent): 316 names = parent.pathnames 317 return len(names) < len(self.pathnames) \ 318 and self.pathnames[:len(names)] == names 319 320 def relpath(self, start, allowupward=False): 321 if allowupward and not self.ischild(start): 322 parent = self.commonparent(start) 323 if parent is None: 324 raise ValueError('No common parent between %s and %s' % (self.path, start.path)) 325 relpath = self.relpath(parent) 326 level_up = len(start.pathnames) - len(parent.pathnames) 327 return (('..' + SEP) * level_up) + relpath 328 else: 329 names = start.pathnames 330 if not self.pathnames[:len(names)] == names: 331 raise ValueError('Not a parent path: %s' % start.path) 332 return SEP.join(self.pathnames[len(names):]) 333 334 def commonparent(self, other): 335 if self.pathnames[0] != other.pathnames[0]: 336 return None # also prevent other drives and other shares 337 elif self.ischild(other): 338 return other 339 elif other.ischild(self): 340 return self 341 else: 342 for i in range(1, len(self.pathnames)): 343 if self.pathnames[:i + 1] != other.pathnames[:i + 1]: 344 return FilePath(self.pathnames[:i]) 345 346 347 348_HOME = FilePath('~') 349 350class FSObjectMeta(type): 351 '''This meta class allows implementing wrappers for file and folder objects 352 with C{isinstance()} checking the wrapped class as well as the wrapper. 353 Main use case is filtered version of folder object where e.g. 354 C{isinstance(folder, LocalFolder)} is used to check whether the underlying 355 resources exist external to the application. 356 ''' 357 358 def __instancecheck__(cls, instance): 359 if instance.__class__ == cls or issubclass(instance.__class__, cls): 360 return True 361 elif hasattr(instance, '_inner_fs_object') and isinstance(instance._inner_fs_object, cls): 362 return True 363 else: 364 return False 365 366 367class FSObjectBase(FilePath, metaclass=FSObjectMeta): 368 '''Base class for L{File} and L{Folder}''' 369 370 def __init__(self, path, watcher=None): 371 FilePath.__init__(self, path) 372 if not FS_SUPPORT_NON_LOCAL_FILE_SHARES and not self.islocal: 373 raise ValueError('File system does not support non-local files') 374 375 self.watcher = watcher 376 377 def isequal(self, other): 378 '''Check file paths are equal based on stat results (inode 379 number etc.). Intended to detect when two files or dirs are the 380 same on case-insensitive filesystems. Does not explicitly check 381 the content is the same. 382 @param other: an other L{FilePath} object 383 @returns: C{True} when the two paths are one and the same file 384 ''' 385 raise NotImplementedError 386 387 def parent(self): 388 raise NotImplementedError 389 390 def ctime(self): 391 raise NotImplementedError 392 393 def mtime(self): 394 raise NotImplementedError 395 396 def exists(self): 397 raise NotImplementedError 398 399 def iswritable(self): 400 raise NotImplementedError 401 402 def touch(self): 403 raise NotImplementedError 404 405 def moveto(self, other): 406 raise NotImplementedError 407 408 def copyto(self, other): 409 raise NotImplementedError 410 411 def _set_mtime(self, mtime): 412 raise NotImplementedError 413 414 def _moveto(self, other): 415 logger.debug('Cross FS type move %s --> %s', (self, other)) 416 self._copyto(other) 417 self.remove() 418 419 def remove(self, cleanup=True): 420 raise NotImplementedError 421 422 def _cleanup(self): 423 try: 424 self.parent().remove() 425 except (ValueError, FolderNotEmptyError): 426 pass 427 428 429class Folder(FSObjectBase): 430 '''Base class for folder implementations. Cannot be intatiated 431 directly; use one of the subclasses instead. Main use outside of 432 this module is to check C{isinstance(object, Folder)}. 433 ''' 434 435 def __init__(self, path): 436 raise NotImplementedError('This class is not meant to be instantiated directly') 437 438 def __iter__(self): 439 names = self.list_names() 440 return self._object_iter(names, True, True) 441 442 def list_files(self): 443 names = self.list_names() 444 return self._object_iter(names, True, False) 445 446 def list_folders(self): 447 names = self.list_names() 448 return self._object_iter(names, False, True) 449 450 def _object_iter(self, names, showfile, showdir): 451 raise NotImplementedError 452 453 def list_names(self, include_hidden=False): 454 raise NotImplementedError 455 456 def walk(self): 457 for child in self: 458 yield child 459 if isinstance(child, Folder): 460 for grandchild in child.walk(): 461 yield grandchild 462 463 def file(self, path): 464 raise NotImplementedError 465 466 def folder(self, path): 467 raise NotImplementedError 468 469 def child(self, path): 470 raise NotImplementedError 471 472 def new_file(self, path, check=None): 473 '''Get a L{File} object for a new file below this folder. 474 Like L{file()} but guarantees the file does not yet exist by 475 adding sequential numbers if needed. So the resulting file 476 may have a modified name. 477 478 @param path: the relative file path 479 @param check: a function that can check and reject the choice before it 480 is given back 481 @returns: a L{File} object 482 ''' 483 return self._new_child(path, self.file, check) 484 485 def new_folder(self, path, check=None): 486 '''Get a L{Folder} object for a new folder below this folder. 487 Like L{folder()} but guarantees the file does not yet exist by 488 adding sequential numbers if needed. So the resulting file 489 may have a modified name. 490 491 @param path: the relative file path 492 @param check: a function that can check and reject the choice before it 493 is given back 494 @returns: a L{Folder} object 495 ''' 496 return self._new_child(path, self.folder, check) 497 498 def _new_child(self, path, factory, check=None): 499 p = self.get_childpath(path.replace('%', '%%')) 500 if '.' in p.basename: 501 basename, ext = p.basename.split('.', 1) 502 pattern = p.relpath(self)[:len(basename)] + '%03i.' + ext 503 else: 504 pattern = p.relpath(self) + '%03i' 505 506 i = 0 507 trypath = path 508 while i < 1000: 509 try: 510 file = self.child(trypath) # this way we catch both exiting files and folders 511 except FileNotFoundError: 512 child = factory(trypath) 513 if check is None or check(child): 514 return child 515 else: 516 logger.debug('File rejected by check "%s" trying increment', child.path) 517 else: 518 logger.debug('File exists "%s" trying increment', file.path) 519 520 i += 1 521 trypath = pattern % i 522 else: 523 raise Exception('Could not find new file for: %s' % path) 524 525 def remove_children(self): 526 '''Recursively remove everything below this folder . 527 528 B{WARNING:} This is quite powerful and can do a lot of damage 529 when executed for the wrong folder, so please make sure to double 530 check the dir is actually what you think it is before calling this. 531 ''' 532 for name in self.list_names(include_hidden=True): 533 child = self.child(name) 534 assert child.path.startswith(self.path) # just to be real sure 535 if isinstance(child, Folder): 536 child.remove_children() 537 child.remove() 538 539 def _copyto(self, other): 540 if other.exists(): 541 raise FileExistsError(other) 542 other.touch() 543 for child in self: 544 if isinstance(child, File): 545 child.copyto(other.file(child.basename)) 546 else: 547 child.copyto(other.folder(child.basename)) 548 other._set_mtime(self.mtime()) 549 550 551xdgmime = None 552mimetypes = None 553if os.name == 'nt': 554 # On windows even if xdg is installed, the database is not (always) 555 # well initialized, so always fallback to mimetypes 556 import mimetypes 557else: 558 try: 559 import xdg.Mime as xdgmime 560 except ImportError: 561 logger.info("Can not import 'xdg.Mime' - falling back to 'mimetypes'") 562 import mimetypes 563 564#: Extensions to determine image mimetypes - used in L{File.isimage()} 565IMAGE_EXTENSIONS = ( 566 # Gleaned from Gdk.get_formats() 567 'bmp', # image/bmp 568 'gif', # image/gif 569 'icns', # image/x-icns 570 'ico', # image/x-icon 571 'cur', # image/x-icon 572 'jp2', # image/jp2 573 'jpc', # image/jp2 574 'jpx', # image/jp2 575 'j2k', # image/jp2 576 'jpf', # image/jp2 577 'jpeg', # image/jpeg 578 'jpe', # image/jpeg 579 'jpg', # image/jpeg 580 'pcx', # image/x-pcx 581 'png', # image/png 582 'pnm', # image/x-portable-anymap 583 'pbm', # image/x-portable-anymap 584 'pgm', # image/x-portable-anymap 585 'ppm', # image/x-portable-anymap 586 'ras', # image/x-cmu-raster 587 'tga', # image/x-tga 588 'targa', # image/x-tga 589 'tiff', # image/tiff 590 'tif', # image/tiff 591 'wbmp', # image/vnd.wap.wbmp 592 'xbm', # image/x-xbitmap 593 'xpm', # image/x-xpixmap 594 'wmf', # image/x-wmf 595 'apm', # image/x-wmf 596 'svg', # image/svg+xml 597 'svgz', # image/svg+xml 598 'svg.gz', # image/svg+xml 599 # Custom additions 600 'webp', # image/webp 601) 602 603 604def _md5(content): 605 # Provide encoded content to avoid double work 606 if isinstance(content, str): 607 content = (content,) 608 609 m = hashlib.md5() 610 for l in content: 611 m.update(l.encode('UTF-8')) 612 return m.digest() 613 614 615class File(FSObjectBase): 616 '''Base class for folder implementations. Cannot be intatiated 617 directly; use one of the subclasses instead. Main use outside of 618 this module is to check C{isinstance(object, Folder)}. 619 ''' 620 621 def __init__(self, path, endofline=_EOL): 622 raise NotImplementedError('This class is not meant to be instantiated directly') 623 624 def __iter__(self): 625 return iter(self.readlines()) 626 627 def isimage(self): 628 '''Check if this is an image file. Convenience method that 629 works even when no real mime-type suport is available. 630 If this method returns C{True} it is no guarantee 631 this image type is actually supported by Gtk. 632 @returns: C{True} when this is an image file 633 ''' 634 # Quick shortcut to be able to load images in the gui even if 635 # we have no proper mimetype support 636 if '.' in self.basename: 637 _, ext = self.basename.rsplit('.', 1) 638 if ext.lower() in IMAGE_EXTENSIONS: 639 return True 640 641 return self.mimetype().startswith('image/') 642 643 def mimetype(self): 644 '''Get the mime-type for this file. 645 Will use the XDG mimetype system if available, otherwise 646 fallsback to the standard library C{mimetypes}. 647 @returns: the mimetype as a string, e.g. "text/plain" 648 ''' 649 if self._mimetype is None: 650 if xdgmime: 651 mimetype = xdgmime.get_type(self.path, name_pri=80) 652 self._mimetype = str(mimetype) 653 else: 654 mimetype, encoding = mimetypes.guess_type(self.path, strict=False) 655 if encoding == 'gzip': 656 mimetype = 'application/x-gzip' 657 elif encoding == 'bzip2': 658 mimetype = 'application/x-bzip2' 659 elif encoding == 'compress': 660 mimetype = 'application/x-compress' 661 self._mimetype = mimetype or 'application/octet-stream' 662 663 return self._mimetype 664 665 def size(self): 666 raise NotImplementedError 667 668 def read(self, size=-1): 669 raise NotImplementedError 670 671 def readline(self, size=-1): 672 raise NotImplementedError 673 674 def readlines(self): 675 raise NotImplementedError 676 677 def read_binary(self): 678 raise NotImplementedError 679 680 def touch(self): 681 if not self.exists(): 682 self.write('') 683 684 def write(self, text): 685 raise NotImplementedError 686 687 def writelines(self, lines): 688 raise NotImplementedError 689 690 def write_binary(self, data): 691 raise NotImplementedError 692 693 @contextlib.contextmanager 694 def _write_decoration(self): 695 existed = self.exists() 696 if not existed: 697 self.parent().touch() 698 elif not self.iswritable(): 699 raise FileNotWritableError(self) 700 701 yield 702 703 if self.watcher: 704 if existed: 705 self.watcher.emit('changed', self) 706 else: 707 self.watcher.emit('created', self) 708 709 def read_with_etag(self): 710 return self._read_with_etag(self.read) 711 712 def readlines_with_etag(self): 713 return self._read_with_etag(self.readlines) 714 715 def _read_with_etag(self, func): 716 mtime = self.mtime() # Get before read! 717 content = func() 718 etag = (mtime, _md5(content)) 719 return content, etag 720 721 def write_with_etag(self, text, etag): 722 return self._write_with_etag(self.write, text, etag) 723 724 def writelines_with_etag(self, lines, etag): 725 return self._write_with_etag(self.writelines, lines, etag) 726 727 def _write_with_etag(self, func, content, etag): 728 # TODO, to make rock-solid would also need to lock the file 729 # before etag check and release after write 730 731 if not self.exists(): 732 # Goal is to prevent overwriting new content. If the file 733 # does not yet exist or went missing, just write it anyway. 734 pass 735 else: 736 if not self.verify_etag(etag): 737 raise FileChangedError(self) 738 739 func(content) 740 return (self.mtime(), _md5(content)) 741 742 def verify_etag(self, etag): 743 if isinstance(etag, tuple) and len(etag) == 2: 744 mtime = self.mtime() 745 if etag[0] != mtime: 746 # mtime fails .. lets see about md5 747 md5 = _md5(self.read()) 748 return etag[1] == md5 749 else: 750 return True 751 else: 752 raise AssertionError('Invalid etag: %r' % etag) 753 754 def _copyto(self, other): 755 if other.exists(): 756 raise FileExistsError(other) 757 other.write_binary(self.read_binary()) 758 other._set_mtime(self.mtime()) 759