1 2# Copyright 2008-2014 Jaap Karssenberg <jaap.karssenberg@gmail.com> 3 4'''Module with basic filesystem objects. 5 6This module must be used by all other zim modules for filesystem 7interaction. It takes care of proper encoding file paths 8(system dependent) and file contents (UTF-8) and implements a number 9of sanity checks. 10 11The main classes are L{File} and L{Dir} which implement file and 12folder objects. There is also a singleton object to represent the whole 13filesystem, whichprovides signals when a file or folder is created, 14moved or deleted. This is stored in L{zim.fs.FS}. 15''' 16 17import os 18import re 19import sys 20import shutil 21import tempfile 22import errno 23import logging 24 25 26from zim.errors import Error 27from zim.parsing import url_encode, url_decode, URL_ENCODE_READABLE 28from zim.signals import SignalEmitter, SIGNAL_AFTER 29 30logger = logging.getLogger('zim.fs') 31 32 33from zim.newfs.base import _os_expanduser, SEP 34from zim.newfs.local import AtomicWriteContext 35from zim.newfs.local import get_tmpdir as _newfs_get_tmpdir 36 37def adapt_from_newfs(file): 38 from zim.newfs import LocalFile, LocalFolder 39 40 if isinstance(file, LocalFile): 41 return File(file.path) 42 elif isinstance(file, LocalFolder): 43 return Dir(file.path) 44 else: 45 return file 46 47 48def adapt_from_oldfs(file): 49 from zim.newfs import LocalFile, LocalFolder 50 51 if isinstance(file, File): 52 return LocalFile(file.path) 53 elif isinstance(file, Dir): 54 return LocalFolder(file.path) 55 else: 56 return file 57 58 59try: 60 from gi.repository import Gio 61except ImportError: 62 Gio = None 63 64if not Gio: 65 logger.info('No file monitor support - changes will go undetected') 66 67 68xdgmime = None 69mimetypes = None 70if os.name == 'nt': 71 # On windows even if xdg is installed, the database is not (always) 72 # well initialized, so always fallback to mimetypes 73 import mimetypes 74else: 75 try: 76 import xdg.Mime as xdgmime 77 except ImportError: 78 logger.info("Can not import 'xdg.Mime' - falling back to 'mimetypes'") 79 import mimetypes 80 81 82#: Extensions to determine image mimetypes - used in L{File.isimage()} 83IMAGE_EXTENSIONS = ( 84 # Gleaned from Gdk.get_formats() 85 'bmp', # image/bmp 86 'gif', # image/gif 87 'icns', # image/x-icns 88 'ico', # image/x-icon 89 'cur', # image/x-icon 90 'jp2', # image/jp2 91 'jpc', # image/jp2 92 'jpx', # image/jp2 93 'j2k', # image/jp2 94 'jpf', # image/jp2 95 'jpeg', # image/jpeg 96 'jpe', # image/jpeg 97 'jpg', # image/jpeg 98 'pcx', # image/x-pcx 99 'png', # image/png 100 'pnm', # image/x-portable-anymap 101 'pbm', # image/x-portable-anymap 102 'pgm', # image/x-portable-anymap 103 'ppm', # image/x-portable-anymap 104 'ras', # image/x-cmu-raster 105 'tga', # image/x-tga 106 'targa', # image/x-tga 107 'tiff', # image/tiff 108 'tif', # image/tiff 109 'wbmp', # image/vnd.wap.wbmp 110 'xbm', # image/x-xbitmap 111 'xpm', # image/x-xpixmap 112 'wmf', # image/x-wmf 113 'apm', # image/x-wmf 114 'svg', # image/svg+xml 115 'svgz', # image/svg+xml 116 'svg.gz', # image/svg+xml 117 # Custom additions 118 'webp', # image/webp 119) 120 121 122def isabs(path): 123 '''Wrapper for C{os.path.isabs}. 124 @param path: a file system path as string 125 @returns: C{True} when the path is absolute instead of a relative path 126 ''' 127 return path.startswith('file:/') \ 128 or path.startswith('~') \ 129 or os.path.isabs(path) 130 131 132_tmpdir = None 133def get_tmpdir(): 134 '''Get a folder in the system temp dir for usage by zim. 135 This zim specific temp folder has permission set to be readable 136 only by the current users, and is touched if it didn't exist yet. 137 Used as base folder by L{TmpFile}. 138 @returns: a L{Dir} object for the zim specific tmp folder 139 ''' 140 global _tmpdir 141 142 if _tmpdir is None: 143 localdir = _newfs_get_tmpdir() 144 _tmpdir = Dir(localdir.path) 145 146 return _tmpdir 147 148 149def normalize_file_uris(path): 150 '''Function to deal with invalid or non-local file URIs. 151 Translates C{file:/} to the proper C{file:///} form and replaces 152 URIs of the form C{file://host/share} to C{smb://host/share}. 153 @param path: a filesystem path or URL 154 @returns: the proper URI or the original input path 155 ''' 156 if path.startswith('file:///') \ 157 or path.startswith('file://localhost/'): 158 return path 159 elif path.startswith('file://'): 160 return 'smb://' + path[7:] 161 elif path.startswith('file:/'): 162 return 'file:///' + path[6:] 163 else: 164 return path 165 166 167def normalize_win32_share(path): 168 '''Translates paths for windows shares in the platform specific 169 form. So on windows it translates C{smb://} URLs to C{\\host\share} 170 form, and vice versa on all other platforms. 171 Just returns the original path if it was already in the right form, 172 or when it is not a path for a share drive. 173 @param path: a filesystem path or URL 174 @returns: the platform specific path or the original input path 175 ''' 176 if os.name == 'nt': 177 if path.startswith('smb://'): 178 # smb://host/share/.. -> \\host\share\.. 179 path = path[4:].replace('/', '\\') 180 path = url_decode(path) 181 else: 182 if path.startswith('\\\\'): 183 # \\host\share\.. -> smb://host/share/.. 184 path = 'smb:' + url_encode(path.replace('\\', '/')) 185 186 return path 187 188 189def lrmdir(path): 190 '''Wrapper for C{os.rmdir} that also knows how to unlink symlinks. 191 Fails when the folder is not a link and is not empty. 192 @param path: a file system path as string 193 ''' 194 try: 195 os.rmdir(path) 196 except OSError: 197 if os.path.islink(path) and os.path.isdir(path): 198 os.unlink(path) 199 else: 200 raise 201 202 203def cleanup_filename(name): 204 '''Removes all characters in 'name' that are not allowed as part 205 of a file name. This function is intended for e.g. config files etc. 206 B{not} for page files in a store. 207 For file system filenames we can not use: 208 '\\', '/', ':', '*', '?', '"', '<', '>', '|' 209 And we also exclude "\\t" and "\\n". 210 @param name: the filename as string 211 @returns: the name with invalid characters removed 212 ''' 213 for char in ("/", "\\", ":", "*", "?", '"', "<", ">", "|", "\t", "\n"): 214 name = name.replace(char, '') 215 return name 216 217 218def format_file_size(bytes): 219 '''Returns a human readable label for a file size 220 E.g. C{1230} becomes C{"1.23kb"}, idem for "Mb" and "Gb" 221 @param bytes: file size in bytes as integer 222 @returns: size as string 223 ''' 224 for unit, label in ( 225 (1000000000, 'Gb'), 226 (1000000, 'Mb'), 227 (1000, 'kb'), 228 ): 229 if bytes >= unit: 230 size = float(bytes) / unit 231 if size < 10: 232 return "%.2f%s" % (size, label) 233 elif size < 100: 234 return "%.1f%s" % (size, label) 235 else: 236 return "%.0f%s" % (size, label) 237 else: 238 return str(bytes) + 'b' 239 240 241 242 243def _md5(content): 244 import hashlib 245 m = hashlib.md5() 246 if isinstance(content, str): 247 m.update(content.encode('UTF-8')) 248 else: 249 for l in content: 250 m.update(l.encode('UTF-8')) 251 return m.digest() 252 253 254class PathLookupError(Error): 255 '''Error raised when there is an error finding the specified path''' 256 pass # TODO description 257 258 259class FileWriteError(Error): 260 '''Error raised when we can not write a file. Either due to file 261 permissions or e.g. because it is detected the file changed on 262 disk. 263 ''' 264 pass # TODO description 265 266 267class FileNotFoundError(PathLookupError): 268 '''Error raised when a file does not exist that is expected to 269 exist. 270 271 @todo: reconcile this class with the NoSuchFileError in zim.gui 272 ''' 273 274 def __init__(self, file): 275 self.file = file 276 self.msg = _('No such file: %s') % file.path 277 # T: message for FileNotFoundError 278 279 280class FileUnicodeError(Error): 281 '''Error raised when there is an issue decoding the file contents. 282 Typically due to different encoding where UTF-8 is expected. 283 ''' 284 285 def __init__(self, file, error): 286 self.file = file 287 self.error = error 288 self.msg = _('Could not read: %s') % file.path 289 # T: message for FileUnicodeError (%s is the file name) 290 self.description = _('This usually means the file contains invalid characters') 291 # T: message for FileUnicodeError 292 self.description += '\n\n' + _('Details') + ':\n' + str(error) 293 # T: label for detailed error 294 295 296# TODO actually hook the signal for deleting files and folders 297 298class FSSingletonClass(SignalEmitter): 299 '''Class used for the singleton 'zim.fs.FS' instance 300 301 @signal: C{path-created (L{FilePath})}: Emitted when a new file or 302 folder has been created 303 @signal: C{path-moved (L{FilePath}, L{FilePath})}: Emitted when 304 a file or folder has been moved 305 @signal: C{path-deleted (L{FilePath})}: Emitted when a file or 306 folder has been deleted 307 308 @todo: fix the FS signals for folders as well 309 ''' 310 311 # define signals we want to use - (closure type, return type and arg types) 312 __signals__ = { 313 'path-created': (SIGNAL_AFTER, None, (object,)), 314 'path-moved': (SIGNAL_AFTER, None, (object, object)), 315 'path-deleted': (SIGNAL_AFTER, None, (object,)), 316 } 317 318 319#: Singleton object for the system filesystem - see L{FSSingletonClass} 320FS = FSSingletonClass() 321 322 323class UnixPath(object): 324 '''Base class for Dir and File objects, represents a file path 325 326 @ivar path: the absolute file path as string 327 file system encoding (should only be used by low-level functions) 328 @ivar user_path: the absolute file path relative to the user's 329 C{HOME} folder or C{None} 330 @ivar uri: the C{file://} URI for this path 331 @ivar basename: the basename of the path 332 @ivar dirname: the dirname of the path 333 @ivar dir: L{Dir} object for the parent folder 334 335 @signal: C{changed (file, other_file, event_type)}: emitted when file 336 changed - availability based on C{gio} support for file monitors on 337 this platform 338 ''' 339 340 def __init__(self, path): 341 '''Constructor 342 343 @param path: an absolute file path, file URL, L{FilePath} object 344 or a list of path elements. When a list is given, the first 345 element is allowed to be an absolute path, URL or L{FilePath} 346 object as well. 347 ''' 348 self._serialized = None 349 350 if isinstance(path, FilePath): 351 self.path = path.path 352 return 353 354 try: 355 if isinstance(path, (list, tuple)): 356 path = list(map(str, path)) 357 # Flatten objects - strings should be unicode or ascii already 358 path = SEP.join(path) 359 # os.path.join is too intelligent for it's own good 360 # just join with the path separator. 361 else: 362 path = str(path) # make sure we can decode 363 except UnicodeDecodeError: 364 raise Error('BUG: invalid input, file names should be in ascii, or given as unicode') 365 366 if path.startswith('file:/'): 367 path = self._parse_uri(path) 368 elif path.startswith('~'): 369 path = _os_expanduser(path) 370 371 self._set_path(path) # overloaded in WindowsPath 372 373 def serialize_zim_config(self): 374 '''Returns the file path as string for serializing the object''' 375 if self._serialized is None: 376 self._serialized = self.user_path or self.path 377 return self._serialized 378 379 @classmethod 380 def new_from_zim_config(klass, string): 381 '''Returns a new object based on the string representation for 382 that path 383 ''' 384 return klass(string) 385 386 @staticmethod 387 def _parse_uri(uri): 388 # Spec is file:/// or file://host/ 389 # But file:/ is sometimes used by non-compliant apps 390 # Windows uses file:///C:/ which is compliant 391 if uri.startswith('file:///'): 392 uri = uri[7:] 393 elif uri.startswith('file://localhost/'): 394 uri = uri[16:] 395 elif uri.startswith('file://'): 396 assert False, 'Can not handle non-local file uris' 397 elif uri.startswith('file:/'): 398 uri = uri[5:] 399 else: 400 assert False, 'Not a file uri: %s' % uri 401 return url_decode(uri) 402 403 def _set_path(self, path): 404 self.path = os.path.abspath(path) 405 406 def __iter__(self): 407 parts = self.split() 408 for i in range(1, len(parts)): 409 path = os.path.join(*parts[0:i]) 410 yield Dir(path) 411 412 #~ if self.isdir(): 413 yield Dir(self.path) 414 #~ else: 415 #~ yield self 416 417 def __str__(self): 418 return self.path 419 420 def __repr__(self): 421 return '<%s: %s>' % (self.__class__.__name__, self.path) 422 423 def __add__(self, other): 424 '''Concatenates paths, only creates objects of the same class. See 425 L{Dir.file()} and L{Dir.subdir()} instead to create other objects. 426 ''' 427 return self.__class__((self, other)) 428 429 def __eq__(self, other): 430 return self.path == other.path 431 432 def __ne__(self, other): 433 return not self.__eq__(other) 434 435 @property 436 def basename(self): 437 '''Basename property''' 438 return os.path.basename(self.path) # encoding safe 439 440 @property 441 def dirname(self): 442 '''Dirname property''' 443 return os.path.dirname(self.path) # encoding safe 444 445 @property 446 def user_path(self): 447 '''User_path property''' 448 dir = Dir('~') # FIXME: Should we cache this folder somewhere ? 449 if self.ischild(dir): 450 return '~/' + self.relpath(dir) 451 else: 452 return None 453 454 @property 455 def uri(self): 456 '''File uri property''' 457 return 'file://' + url_encode(self.path) 458 459 @property 460 def dir(self): 461 '''Returns a L{Dir} object for the parent dir''' 462 path = os.path.dirname(self.path) # encoding safe 463 return Dir(path) 464 465 def monitor(self): 466 '''Creates a L{FSObjectMonitor} for this path''' 467 return FSObjectMonitor(self) 468 469 def exists(self): 470 '''Check if a file or folder exists. 471 @returns: C{True} if the file or folder exists 472 @implementation: must be implemented by sub classes in order 473 that they enforce the type of the resource as well 474 ''' 475 return os.path.exists(self.path) 476 477 def iswritable(self): 478 '''Check if a file or folder is writable. Uses permissions of 479 parent folder if the file or folder does not (yet) exist. 480 @returns: C{True} if the file or folder is writable 481 ''' 482 if self.exists(): 483 return os.access(self.path, os.W_OK) 484 else: 485 return self.dir.iswritable() # recurs 486 487 def mtime(self): 488 '''Get the modification time of the file path. 489 @returns: the mtime timestamp 490 ''' 491 return os.stat(self.path).st_mtime 492 493 def ctime(self): 494 '''Get the creation time of the file path. 495 @returns: the mtime timestamp 496 ''' 497 return os.stat(self.path).st_ctime 498 499 def size(self): 500 '''Get file size in bytes 501 See L{format_file_size()} to get a human readable label 502 @returns: file size in bytes 503 ''' 504 return os.stat(self.path).st_size 505 506 def isequal(self, other): 507 '''Check file paths are equal based on stat results (inode 508 number etc.). Intended to detect when two files or dirs are the 509 same on case-insensitive filesystems. Does not explicitly check 510 the content is the same. 511 If you just want to know if two files have the same content, 512 see L{File.compare()} 513 @param other: an other L{FilePath} object 514 @returns: C{True} when the two paths are one and the same file 515 ''' 516 # Do NOT assume paths are the same - could be hard link 517 # or it could be a case-insensitive filesystem 518 try: 519 stat_result = os.stat(self.path) 520 other_stat_result = os.stat(other.path) 521 except OSError: 522 return False 523 else: 524 return stat_result == other_stat_result 525 526 def split(self): 527 '''Split the parts of the path on the path separator. 528 If the OS uses the concept of a drive the first part will 529 include the drive. (So using split() to count the number of 530 path elements will not be robust for the path "/".) 531 @returns: a list of path elements 532 ''' 533 drive, path = os.path.splitdrive(self.path) 534 parts = path.replace('\\', '/').strip('/').split('/') 535 parts[0] = drive + SEP + parts[0] 536 return parts 537 538 def relpath(self, reference, allowupward=False): 539 '''Get a relative path for this file path with respect to 540 another path. This method always returns paths using "/" as 541 separator, even on windows. 542 @param reference: a reference L{FilePath} 543 @param allowupward: if C{True} the relative path is allowed to 544 start with 'C{../}', if C{False} the reference should be a 545 parent folder of this path. 546 @returns: a relative file path 547 @raises AssertionError: when C{allowupward} is C{False} and 548 C{reference} is not a parent folder 549 ''' 550 sep = SEP # '/' or '\' 551 refdir = reference.path + sep 552 if allowupward and not self.path.startswith(refdir): 553 parent = self.commonparent(reference) 554 if parent is None: 555 return None # maybe on different drive under win32 556 557 i = len(parent.path) 558 j = refdir[i:].strip(sep).count(sep) + 1 559 reference = parent 560 path = '../' * j 561 else: 562 if not self.path.startswith(refdir): 563 raise AssertionError('Not a parent folder') 564 path = '' 565 566 i = len(reference.path) 567 path += self.path[i:].lstrip(sep).replace(sep, '/') 568 return path 569 570 def commonparent(self, other): 571 '''Find a comon parent folder between two file paths. 572 @param other: another L{FilePath} 573 @returns: a L{Dir} object for the common parent folder, or 574 C{None} when there is no common parent 575 ''' 576 path = os.path.commonprefix((self.path, other.path)) # encoding safe 577 path = path.replace(os.path.sep, SEP) # msys can have '/' as seperator 578 i = path.rfind(SEP) # win32 save... 579 if i >= 0: 580 return Dir(path[:i + 1]) 581 else: 582 # different drive ? 583 return None 584 585 def ischild(self, parent): 586 '''Check if this path is a child path of a folder 587 @returns: C{True} if this path is a child path of C{parent} 588 ''' 589 return self.path.startswith(parent.path + SEP) 590 591 def isdir(self): 592 '''Check if this path is a folder or not. Used to detect if 593 e.g. a L{File} object should have really been a L{Dir} object. 594 @returns: C{True} when this path is a folder 595 ''' 596 return os.path.isdir(self.path) 597 598 def rename(self, newpath): 599 '''Rename (move) the content this file or folder to another 600 location. This will B{not} change the current file path, so the 601 object keeps pointing to the old location. 602 @param newpath: the destination C{FilePath} which can either be a 603 file or a folder. 604 @emits: path-moved 605 ''' 606 # Using shutil.move instead of os.rename because move can cross 607 # file system boundaries, while rename can not 608 logger.info('Rename %s to %s', self, newpath) 609 newpath = adapt_from_newfs(newpath) 610 if self.path == newpath.path: 611 raise AssertionError('Renaming %s to itself !?' % self.path) 612 613 if newpath.isdir(): 614 if self.isequal(newpath): 615 # We checked name above, so must be case insensitive file system 616 # but we still want to be able to rename to other case, so need to 617 # do some moving around 618 tmpdir = self.dir.new_subdir(self.basename) 619 shutil.move(self.path, tmpdir.path) 620 shutil.move(tmpdir.path, newpath.path) 621 else: 622 # Needed because shutil.move() has different behavior for this case 623 raise AssertionError('Folder already exists: %s' % newpath.path) 624 else: 625 # normal case 626 newpath.dir.touch() 627 shutil.move(self.path, newpath.path) 628 629 FS.emit('path-moved', self, newpath) 630 self.dir.cleanup() 631 632 633class WindowsPath(UnixPath): 634 '''Base class for Dir and File objects, represents a file path 635 on windows. 636 ''' 637 638 def _set_path(self, path): 639 # Strip leading / for absolute paths 640 if re.match(r'^[/\\]+[A-Za-z]:[/\\]', path): 641 path = path.lstrip('/').lstrip('\\') 642 self.path = os.path.abspath(path).replace('/', SEP) # msys can use '/' instead of '\\' 643 644 @property 645 def uri(self): 646 '''File uri property with win32 logic''' 647 # win32 paths do not start with '/', so add another one 648 # and avoid url encoding the second ":" in "file:///C:/..." 649 path = self.path.replace('\\', '/') 650 if re.match('[A-Za-z]:/', path): 651 return 'file:///' + path[:2] + url_encode(path[2:]) 652 else: 653 return 'file:///' + url_encode(path) 654 655 656# Determine which base class to use for classes below 657if os.name == 'posix': 658 FilePath = UnixPath 659elif os.name == 'nt': 660 FilePath = WindowsPath 661else: 662 logger.critical('os name "%s" unknown, falling back to posix', os.name) 663 FilePath = UnixPath 664 665 666class Dir(FilePath): 667 '''Class representing a single file system folder''' 668 669 def __eq__(self, other): 670 if isinstance(other, Dir): 671 return self.path == other.path 672 else: 673 return False 674 675 def exists(self): 676 return os.path.isdir(self.path) 677 678 def list(self, glob=None, includehidden=False, includetmp=False, raw=False): 679 '''List the file contents 680 681 @param glob: a file name glob to filter the listed files, e.g C{"*.png"} 682 @param includehidden: if C{True} include hidden files 683 (e.g. names starting with "."), ignore otherwise 684 @param includetmp: if C{True} include temporary files 685 (e.g. names ending in "~"), ignore otherwise 686 @param raw: for filtered folders (C{FilteredDir} instances) 687 setting C{raw} to C{True} will disable filtering 688 689 @returns: a sorted list of names for files and subdirectories. 690 Will not return names that could not be decoded properly and 691 will throw warnings if those are encountered. 692 Hidden files are silently ignored. 693 ''' 694 files = self._list(includehidden, includetmp) 695 696 if glob: 697 expr = _glob_to_regex(glob) 698 files = list(filter(expr.match, files)) 699 700 files.sort() 701 return files 702 703 def _list(self, includehidden, includetmp): 704 if self.exists(): 705 files = [] 706 for file in os.listdir(self.path): 707 if file.startswith('.') and not includehidden: 708 continue # skip hidden files 709 elif (file.endswith('~') or file.startswith('~')) and not includetmp: 710 continue # skip temporary files 711 else: 712 files.append(file) 713 return files 714 else: 715 return [] 716 717 def walk(self, raw=True): 718 '''Generator that yields all files and folders below this dir 719 as objects. 720 @param raw: see L{list()} 721 @returns: yields L{File} and L{Dir} objects, depth first 722 ''' 723 for name in self.list(raw=raw): 724 path = self.path + SEP + name 725 if os.path.isdir(path): 726 dir = self.subdir(name) 727 yield dir 728 for child in dir.walk(raw=raw): 729 yield child 730 else: 731 yield self.file(name) 732 733 def get_file_tree_as_text(self, raw=True): 734 '''Returns an overview of files and folders below this dir 735 as text. Used in tests. 736 @param raw: see L{list()} 737 @returns: file listing as string 738 ''' 739 text = '' 740 for child in self.walk(raw=raw): 741 path = child.relpath(self) 742 if isinstance(child, Dir): 743 path += '/' 744 text += path + '\n' 745 return text 746 747 def touch(self, mode=None): 748 '''Create this folder and any parent folders that do not yet 749 exist. 750 @param mode: creation mode (e.g. 0700) 751 ''' 752 if self.exists(): 753 # Additional check needed because makedirs can not handle 754 # a path like "E:\" on windows (while "E:\foo" works fine) 755 return 756 757 try: 758 if mode is not None: 759 os.makedirs(self.path, mode=mode) 760 else: 761 os.makedirs(self.path) 762 except OSError as e: 763 if e.errno != errno.EEXIST: 764 raise 765 766 def remove(self): 767 '''Remove this folder, fails if it is not empty.''' 768 logger.info('Remove dir: %s', self) 769 lrmdir(self.path) 770 FS.emit('path-deleted', self) 771 772 def cleanup(self): 773 '''Remove this foldder and any empty parent folders. If the 774 folder does not exist, still check for empty parent folders. 775 Fails silently if the folder is not empty. 776 @returns: C{True} when successfull (so C{False} means it still exists). 777 ''' 778 if not self.exists(): 779 return True 780 781 try: 782 os.removedirs(self.path) 783 except OSError: 784 return False # probably dir not empty 785 else: 786 return True 787 788 def remove_children(self): 789 '''Recursively remove everything below this folder . 790 791 B{WARNING:} This is quite powerful and can do a lot of damage 792 when executed for the wrong folder, so pleae make sure to double 793 check the dir is actually what you think it is before calling this. 794 ''' 795 assert self.path and self.path != '/' 796 logger.info('Remove file tree: %s', self) 797 for root, dirs, files in os.walk(self.path, topdown=False): 798 # walk should not decent into symlinked folders by default 799 # remove() and rmdir() both should remove a symlink rather 800 # than the target of the link 801 for name in files: 802 os.remove(os.path.join(root, name)) 803 for name in dirs: 804 lrmdir(os.path.join(root, name)) 805 806 def copyto(self, dest): 807 '''Recursively copy the contents of this folder. 808 When the destination folder already exists the contents will be 809 merged, so you need to check existence of the destination first 810 if you want a clean new copy. 811 @param dest: a L{Dir} object 812 ''' 813 # We do not use shutil.copytree() because it requires that 814 # the target dir does not exist 815 assert isinstance(dest, Dir) 816 assert not dest == self, 'BUG: trying to copy a dir to itself' 817 logger.info('Copy dir %s to %s', self, dest) 818 819 def copy_dir(source, target): 820 target.touch() 821 for item in source.list(): 822 child = FilePath((source, item)) 823 if child.isdir(): 824 copy_dir(Dir(child), target.subdir(item)) # recur 825 else: 826 child = File(child) 827 child.copyto(target) 828 829 copy_dir(self, dest) 830 # TODO - not hooked with FS signals 831 832 def file(self, path): 833 '''Get a L{File} object for a path below this folder 834 835 @param path: a (relative) file path as string, tuple or 836 L{FilePath} object. When C{path} is a L{File} object already 837 this method still enforces it is below this folder. 838 So this method can be used as check as well. 839 840 @returns: a L{File} object 841 @raises PathLookupError: if the path is not below this folder 842 ''' 843 file = self.resolve_file(path) 844 if not file.path.startswith(self.path): 845 raise PathLookupError('%s is not below %s' % (file, self)) 846 return file 847 848 def resolve_file(self, path): 849 '''Get a L{File} object for a path relative to this folder 850 851 Like L{file()} but allows the path to start with "../" as 852 well, so can handle any relative path. 853 854 @param path: a (relative) file path as string, tuple or 855 L{FilePath} object. 856 @returns: a L{File} object 857 ''' 858 assert isinstance(path, (FilePath, str, list, tuple)) 859 if isinstance(path, str): 860 return File((self.path, path)) 861 elif isinstance(path, (list, tuple)): 862 return File((self.path,) + tuple(path)) 863 elif isinstance(path, File): 864 return path 865 elif isinstance(path, FilePath): 866 return File(path.path) 867 868 def new_file(self, path): 869 '''Get a L{File} object for a new file below this folder. 870 Like L{file()} but guarantees the file does not yet exist by 871 adding sequential numbers if needed. So the resulting file 872 may have a modified name. 873 874 @param path: a (relative) file path as string, tuple or 875 L{FilePath} object. 876 877 @returns: a L{File} object 878 @raises PathLookupError: if the path is not below this folder 879 ''' 880 file = self.file(path) 881 basename = file.basename 882 if '.' in basename: 883 basename, ext = basename.split('.', 1) 884 else: 885 ext = '' 886 dir = file.dir 887 i = 0 888 while file.exists(): 889 logger.debug('File exists "%s" trying increment', file) 890 i += 1 891 newname = basename + '%03i' % i 892 if ext: 893 newname += '.' + ext 894 file = dir.file(newname) 895 return file 896 897 def subdir(self, path): 898 '''Get a L{Dir} object for a path below this folder 899 900 @param path: a (relative) file path as string, tuple or 901 L{FilePath} object. When C{path} is a L{Dir} object already 902 this method still enforces it is below this folder. 903 So this method can be used as check as well. 904 905 @returns: a L{Dir} object 906 @raises PathLookupError: if the path is not below this folder 907 908 ''' 909 910 dir = self.resolve_dir(path) 911 if not dir.path.startswith(self.path): 912 raise PathLookupError('%s is not below %s' % (dir, self)) 913 return dir 914 915 def resolve_dir(self, path): 916 '''Get a L{Dir} object for a path relative to this folder 917 918 Like L{subdir()} but allows the path to start with "../" as 919 well, so can handle any relative path. 920 921 @param path: a (relative) file path as string, tuple or 922 L{FilePath} object. 923 @returns: a L{Dir} object 924 ''' 925 assert isinstance(path, (FilePath, str, list, tuple)) 926 if isinstance(path, str): 927 return Dir((self.path, path)) 928 elif isinstance(path, (list, tuple)): 929 return Dir((self.path,) + tuple(path)) 930 elif isinstance(path, Dir): 931 return path 932 elif isinstance(path, FilePath): 933 return Dir(path.path) 934 935 def new_subdir(self, path): 936 '''Get a L{Dir} object for a new sub-folder below this folder. 937 Like L{subdir()} but guarantees the folder does not yet exist by 938 adding sequential numbers if needed. So the resulting folder 939 may have a modified name. 940 941 @param path: a (relative) file path as string, tuple or 942 L{FilePath} object. 943 944 @returns: a L{Dir} object 945 @raises PathLookupError: if the path is not below this folder 946 ''' 947 subdir = self.subdir(path) 948 basename = subdir.basename 949 i = 0 950 while subdir.exists(): 951 logger.debug('Dir exists "%s" trying increment', subdir) 952 i += 1 953 newname = basename + '%03i' % i 954 subdir = self.subdir(newname) 955 return subdir 956 957 958def _glob_to_regex(glob): 959 glob = glob.replace('.', '\\.') 960 glob = glob.replace('*', '.*') 961 glob = glob.replace('?', '.?') 962 return re.compile(glob) 963 964 965class FilteredDir(Dir): 966 '''Class implementing a folder with a filtered listing. Can be 967 used to e.g. filter all objects that are also ignored by version 968 control. 969 ''' 970 971 def __init__(self, path): 972 '''Constructor 973 974 @param path: an absolute file path, file URL, L{FilePath} object 975 or a list of path elements. When a list is given, the first 976 element is allowed to be an absolute path, URL or L{FilePath} 977 object as well. 978 ''' 979 Dir.__init__(self, path) 980 self._ignore = [] 981 982 def ignore(self, glob): 983 '''Add a file pattern to ignore 984 @param glob: a file path pattern (e.g. "*.txt") 985 ''' 986 regex = _glob_to_regex(glob) 987 self._ignore.append(regex) 988 989 def filter(self, name): 990 for regex in self._ignore: 991 if regex.match(name): 992 return False 993 else: 994 return True 995 996 def list(self, includehidden=False, includetmp=False, raw=False): 997 files = Dir.list(self, includehidden, includetmp) 998 if not raw: 999 files = list(filter(self.filter, files)) 1000 return files 1001 1002 1003class File(FilePath): 1004 '''Class representing a single file. 1005 1006 This class implements much more complex logic than the default 1007 python file objects. E.g. on writing we first write to a temporary 1008 files, then flush and sync and finally replace the file we intended 1009 to write with the temporary file. This makes it much more difficult 1010 to loose file contents when something goes wrong during the writing. 1011 1012 Also it implements logic to check the modification time before 1013 writing to prevent overwriting a file that was changed on disk in 1014 between read and write operations. If this mtime check fails MD5 1015 sums are used to verify before raising an exception (because some 1016 share drives do not maintain mtime very precisely). 1017 This logic is not atomic, so your mileage may vary. 1018 ''' 1019 1020 # For atomic write we first write a tmp file which has the extension 1021 # .zim-new~ when is was written successfully we replace the actual file 1022 # with the tmp file. Because rename is atomic on POSIX platforms and 1023 # replaces the existing file this either succeeds or not, it can never 1024 # truncate the existing file but fail to write the new file. So if writing 1025 # fails we should always at least have the old file still present. 1026 # If we encounter a left over .zim-new~ we ignore it since it may be 1027 # corrupted. 1028 # 1029 # For Window the behavior is more complicated, see the WindowsFile class 1030 # below. 1031 # 1032 # Note that the mechanism to avoid overwriting files that changed on disks 1033 # does not prevent conflicts when two processes try to write to the same 1034 # file at the same time. This is a hard problem that is currently not 1035 # addressed in this implementation. 1036 1037 def __init__(self, path, checkoverwrite=False, endofline=None): 1038 '''Constructor 1039 1040 @param path: an absolute file path, file URL, L{FilePath} object 1041 or a list of path elements. When a list is given, the first 1042 element is allowed to be an absolute path, URL or L{FilePath} 1043 object as well. 1044 1045 @param checkoverwrite: when C{True} this object checks the 1046 modification time before writing to prevent overwriting a file 1047 that was changed on disk in between read and write operations. 1048 1049 @param endofline: the line end style used when writing, can be 1050 one of "unix" ('\\n') or "dos" ('\\r\\n'). Whan C{None} the local 1051 default is used. 1052 ''' 1053 FilePath.__init__(self, path) 1054 self.checkoverwrite = checkoverwrite 1055 self.endofline = endofline 1056 self._mtime = None 1057 self._md5 = None 1058 1059 def __eq__(self, other): 1060 if isinstance(other, File): 1061 return self.path == other.path 1062 else: 1063 return False 1064 1065 def exists(self): 1066 return os.path.isfile(self.path) 1067 1068 def isimage(self): 1069 '''Check if this is an image file. Convenience method that 1070 works even when no real mime-type suport is available. 1071 If this method returns C{True} it is no guarantee 1072 this image type is actually supported by Gtk. 1073 @returns: C{True} when this is an image file 1074 ''' 1075 1076 # Quick shortcut to be able to load images in the gui even if 1077 # we have no proper mimetype support 1078 if '.' in self.basename: 1079 _, ext = self.basename.rsplit('.', 1) 1080 if ext in IMAGE_EXTENSIONS: 1081 return True 1082 1083 return self.get_mimetype().startswith('image/') 1084 1085 def get_mimetype(self): 1086 '''Get the mime-type for this file. 1087 Will use the XDG mimetype system if available, otherwise 1088 fallsback to the standard library C{mimetypes}. 1089 @returns: the mimetype as a string, e.g. "text/plain" 1090 ''' 1091 if xdgmime: 1092 mimetype = xdgmime.get_type(self.path, name_pri=80) 1093 return str(mimetype) 1094 else: 1095 mimetype, encoding = mimetypes.guess_type(self.path, strict=False) 1096 if encoding == 'gzip': 1097 return 'application/x-gzip' 1098 elif encoding == 'bzip': 1099 return 'application/x-bzip' 1100 elif encoding == 'compress': 1101 return 'application/x-compress' 1102 else: 1103 return mimetype or 'application/octet-stream' 1104 1105 def get_endofline(self): 1106 '''Get the end-of-line character(s) used for writing this file. 1107 @returns: the end-of-line character(s) 1108 ''' 1109 if self.endofline is None: 1110 if isinstance(self, WindowsPath): 1111 return '\r\n' 1112 else: 1113 return '\n' 1114 else: 1115 assert self.endofline in ('unix', 'dos') 1116 if self.endofline == 'dos': 1117 return '\r\n' 1118 else: 1119 return '\n' 1120 1121 def raw(self): 1122 '''Get the raw content without UTF-8 decoding, newline logic, 1123 etc. Used to read binary data, e.g. when serving files over www. 1124 Note that this function also does not integrates with checking 1125 mtime, so intended for read only usage. 1126 @returns: file content as string 1127 ''' 1128 try: 1129 fh = open(self.path, mode='rb') 1130 content = fh.read() 1131 fh.close() 1132 return content 1133 except IOError: 1134 raise FileNotFoundError(self) 1135 1136 def read(self): 1137 '''Get the file contents as a string. Takes case of decoding 1138 UTF-8 and fixes line endings. 1139 @returns: the content as (unicode) string. 1140 @raises FileNotFoundError: when the file does not exist. 1141 ''' 1142 try: 1143 content = self._read() 1144 self._checkoverwrite(content) 1145 return content.lstrip('\ufeff').replace('\x00', '') 1146 # Strip unicode byte order mark 1147 # And remove any NULL byte since they screw up parsing 1148 except IOError: 1149 raise FileNotFoundError(self) 1150 except UnicodeDecodeError as error: 1151 raise FileUnicodeError(self, error) 1152 1153 return text 1154 1155 def _read(self): 1156 with open(self.path, encoding='UTF-8') as fh: 1157 return fh.read() 1158 1159 def readlines(self): 1160 '''Get the file contents as a list of lines. Takes case of 1161 decoding UTF-8 and fixes line endings. 1162 1163 @returns: the content as a list of lines. 1164 @raises FileNotFoundError: when the file does not exist. 1165 ''' 1166 try: 1167 file = open(self.path, encoding='UTF-8') 1168 lines = file.readlines() 1169 self._checkoverwrite(lines) 1170 return [line.lstrip('\ufeff').replace('\x00', '') for line in lines] 1171 # Strip unicode byte order mark 1172 # And remove any NULL byte since they screw up parsing 1173 except IOError: 1174 raise FileNotFoundError(self) 1175 except UnicodeDecodeError as error: 1176 raise FileUnicodeError(self, error) 1177 1178 return lines 1179 1180 def _write_check(self): 1181 if not self.iswritable(): 1182 raise FileWriteError(_('File is not writable: %s') % self.path) # T: Error message 1183 elif not self.exists(): 1184 self.dir.touch() 1185 else: 1186 pass # exists and writable 1187 1188 def write(self, text): 1189 '''Write file contents from string. This overwrites the current 1190 content. Will automatically create all parent folders. 1191 If writing fails the file will either have the new content or the 1192 old content, but it should not be possible to have the content 1193 truncated. 1194 @param text: new content as (unicode) string 1195 @emits: path-created if the file did not yet exist 1196 ''' 1197 self._assertoverwrite() 1198 isnew = not os.path.isfile(self.path) 1199 newline = self.get_endofline() 1200 self._write_check() 1201 with AtomicWriteContext(self, newline=newline) as fh: 1202 fh.write(text) 1203 1204 self._checkoverwrite(text) 1205 if isnew: 1206 FS.emit('path-created', self) 1207 1208 def writelines(self, lines): 1209 '''Write file contents from a list of lines. 1210 Like L{write()} but input is a list instead of a string. 1211 @param lines: new content as list of lines 1212 @emits: path-created if the file did not yet exist 1213 ''' 1214 self._assertoverwrite() 1215 isnew = not os.path.isfile(self.path) 1216 newline = self.get_endofline() 1217 self._write_check() 1218 with AtomicWriteContext(self, newline=newline) as fh: 1219 fh.writelines(lines) 1220 1221 self._checkoverwrite(lines) 1222 if isnew: 1223 FS.emit('path-created', self) 1224 1225 def _checkoverwrite(self, content): 1226 # Set properties needed by assertoverwrite for the in-memory object 1227 if self.checkoverwrite: 1228 self._mtime = self.mtime() 1229 self._md5 = _md5(content) 1230 1231 def _assertoverwrite(self): 1232 # When we read a file and than write it, this method asserts the file 1233 # did not change in between (e.g. by another process, or another async 1234 # function of our own process). We use properties of this object instance 1235 # We check the timestamp, if that does not match we check md5 to be sure. 1236 # (Sometimes e.g. network filesystems do not maintain timestamps as strict 1237 # as we would like.) 1238 # 1239 # This function should not prohibit writing without reading first. 1240 # Also we just write the file if it went missing in between 1241 if self._mtime and self._md5: 1242 try: 1243 mtime = self.mtime() 1244 except OSError: 1245 if not os.path.isfile(self.path): 1246 logger.critical('File missing: %s', self.path) 1247 return 1248 else: 1249 raise 1250 1251 if not self._mtime == mtime: 1252 logger.warn('mtime check failed for %s, trying md5', self.path) 1253 if self._md5 != _md5(self._read()): 1254 raise FileWriteError(_('File changed on disk: %s') % self.path) 1255 # T: error message 1256 # Why are we using MD5 here ?? could just compare content... 1257 1258 def check_has_changed_on_disk(self): 1259 '''Returns C{True} when this file has changed on disk''' 1260 if not (self._mtime and self._md5): 1261 if os.path.isfile(self.path): 1262 return True # may well been just created 1263 else: 1264 return False # ?? 1265 elif not os.path.isfile(self.path): 1266 return True 1267 else: 1268 try: 1269 self._assertoverwrite() 1270 except FileWriteError: 1271 return True 1272 else: 1273 return False 1274 1275 def touch(self): 1276 '''Create this file and any parent folders if it does not yet 1277 exist. (Parent folders are also created when writing to a file, 1278 so you only need to call this method in special cases - e.g. 1279 when an external program requires the file to exist.) 1280 ''' 1281 if self.exists(): 1282 return 1283 else: 1284 self.write('') 1285 1286 def remove(self): 1287 '''Remove (delete) this file and cleanup any related temporary 1288 files we created. This action can not be un-done. 1289 Ignores silently if the file did not exist in the first place. 1290 ''' 1291 logger.info('Remove file: %s', self) 1292 if os.path.isfile(self.path): 1293 os.remove(self.path) 1294 1295 tmp = self.path + '.zim-new~' 1296 if os.path.isfile(tmp): 1297 os.remove(tmp) 1298 1299 FS.emit('path-deleted', self) 1300 1301 def cleanup(self): 1302 '''Remove this file and cleanup any empty parent folder. 1303 Convenience method calling L{File.remove()} and L{Dir.cleanup()}. 1304 ''' 1305 self.remove() 1306 self.dir.cleanup() 1307 1308 def copyto(self, dest): 1309 '''Copy this file to another location. Preserves all file 1310 attributes (by using C{shutil.copy2()}) 1311 @param dest: a L{File} or L{Dir} object for the destination. If the 1312 destination is a folder, we will copy to a file below that 1313 folder of the same name 1314 ''' 1315 dest = adapt_from_newfs(dest) 1316 assert isinstance(dest, (File, Dir)) 1317 if isinstance(dest, Dir): 1318 assert not dest == self.dir, 'BUG: trying to copy a file to itself' 1319 else: 1320 assert not dest == self, 'BUG: trying to copy a file to itself' 1321 logger.info('Copy %s to %s', self, dest) 1322 if isinstance(dest, Dir): 1323 dest.touch() 1324 else: 1325 dest.dir.touch() 1326 shutil.copy2(self.path, dest.path) 1327 # TODO - not hooked with FS signals 1328 1329 def compare(self, other): 1330 '''Check if file contents are the same. This differs from 1331 L{isequal()} because files can be different physical locations. 1332 @param other: another L{File} object 1333 @returns: C{True} when the files have the same content 1334 ''' 1335 # TODO: can be more efficient, e.g. by checking stat size first 1336 # also wonder if MD5 is needed here ... could just compare text 1337 return _md5(self.read()) == _md5(other.read()) 1338 1339 1340class TmpFile(File): 1341 '''Class for temporary files. These are stored in the temp directory 1342 and by default they are deleted again when the object is destructed. 1343 ''' 1344 1345 def __init__(self, basename, unique=True, persistent=False): 1346 '''Constructor 1347 1348 @param basename: gives the name for this tmp file. 1349 @param unique: if C{True} the L{Dir.new_file()} method is used 1350 to make sure we have a new file. 1351 @param persistent: if C{False} the file will be removed when the 1352 object is destructed, if C{True} we leave it alone 1353 ''' 1354 dir = get_tmpdir() 1355 if unique: 1356 file = dir.new_file(basename) 1357 File.__init__(self, file.path) 1358 else: 1359 File.__init__(self, (dir, basename)) 1360 1361 self.persistent = persistent 1362 1363 def __del__(self): 1364 if not self.persistent: 1365 self.remove() 1366 1367 1368 1369# Replace logic based on discussion here: 1370# http://stupidpythonideas.blogspot.nl/2014/07/getting-atomic-writes-right.html 1371# 1372# The point is to get a function to replace an old file with a new 1373# file as "atomic" as possible 1374 1375if hasattr(os, 'replace'): 1376 _replace_file = os.replace 1377elif sys.platform == 'win32': 1378 # The win32api.MoveFileEx method somehow does not like our unicode, 1379 # the ctypes version does ??! 1380 import ctypes 1381 _MoveFileEx = ctypes.windll.kernel32.MoveFileExW 1382 _MoveFileEx.argtypes = [ctypes.c_wchar_p, ctypes.c_wchar_p, ctypes.c_uint32] 1383 _MoveFileEx.restype = ctypes.c_bool 1384 def _replace_file(src, dst): 1385 try: 1386 if not _MoveFileEx(src, dst, 1): # MOVEFILE_REPLACE_EXISTING 1387 raise OSError('Could not replace "%s" -> "%s"' % (src, dst)) 1388 except: 1389 # Sometimes it fails - we play stupid and try again... 1390 time.sleep(0.5) 1391 if not _MoveFileEx(src, dst, 1): # MOVEFILE_REPLACE_EXISTING 1392 raise OSError('Could not replace "%s" -> "%s"' % (src, dst)) 1393else: 1394 _replace_file = os.rename 1395 1396 1397### TODO filter Dir.list directly for hidden files 1398if os.name != 'nt': 1399 def is_hidden_file(file): 1400 return file.basename.startswith('.') 1401 1402else: 1403 import ctypes 1404 1405 def is_hidden_file(file): 1406 INVALID_FILE_ATTRIBUTES = -1 1407 FILE_ATTRIBUTE_HIDDEN = 2 1408 1409 try: 1410 attrs = ctypes.windll.kernel32.GetFileAttributesW(file.path) 1411 # note: GetFileAttributesW is unicode version of GetFileAttributes 1412 except AttributeError: 1413 return False 1414 else: 1415 if attrs == INVALID_FILE_ATTRIBUTES: 1416 return False 1417 else: 1418 return bool(attrs & FILE_ATTRIBUTE_HIDDEN) 1419### 1420 1421 1422class FSObjectMonitor(SignalEmitter): 1423 1424 __signals__ = { 1425 'changed': (None, None, (None, None)), 1426 } 1427 1428 def __init__(self, path): 1429 self.path = path 1430 self._gio_file_monitor = None 1431 1432 def _setup_signal(self, signal): 1433 if signal == 'changed' \ 1434 and self._gio_file_monitor is None \ 1435 and Gio: 1436 try: 1437 file = Gio.File.new_for_uri(self.path.uri) 1438 self._gio_file_monitor = file.monitor() 1439 self._gio_file_monitor.connect('changed', self._on_changed) 1440 except: 1441 logger.exception('Error while setting up file monitor') 1442 1443 def _teardown_signal(self, signal): 1444 if signal == 'changed' \ 1445 and self._gio_file_monitor: 1446 try: 1447 self._gio_file_monitor.cancel() 1448 except: 1449 logger.exception('Error while tearing down file monitor') 1450 finally: 1451 self._gio_file_monitor = None 1452 1453 def _on_changed(self, filemonitor, file, other_file, event_type): 1454 # 'FILE_MONITOR_EVENT_CHANGED' is always followed by 1455 # a 'FILE_MONITOR_EVENT_CHANGES_DONE_HINT' when the filehandle 1456 # is closed (or after timeout). Idem for "created", assuming it 1457 # is not created empty. 1458 # 1459 # TODO: do not emit changed on CREATED - separate signal that 1460 # can be used when monitoring a file list, but reserve 1461 # changed for changes-done-hint so that we ensure the 1462 # content is complete. 1463 # + emit on write and block redundant signals here 1464 # 1465 # Also note that in many cases "moved" will not be used, but a 1466 # sequence of deleted, created will be signaled 1467 # 1468 # For Dir objects, the event will refer to files contained in 1469 # the dir. 1470 1471 #~ print('MONITOR:', self, event_type) 1472 if event_type in ( 1473 Gio.FileMonitorEvent.CREATED, 1474 Gio.FileMonitorEvent.CHANGES_DONE_HINT, 1475 Gio.FileMonitorEvent.DELETED, 1476 Gio.FileMonitorEvent.MOVED, 1477 ): 1478 self.emit('changed', None, None) # TODO translate otherfile and eventtype 1479