1
2# Copyright 2008-2014 Jaap Karssenberg <jaap.karssenberg@gmail.com>
3
4'''Module with basic filesystem objects.
5
6This module must be used by all other zim modules for filesystem
7interaction. It takes care of proper encoding file paths
8(system dependent) and file contents (UTF-8) and implements a number
9of sanity checks.
10
11The main classes are L{File} and L{Dir} which implement file and
12folder objects. There is also a singleton object to represent the whole
13filesystem, whichprovides signals when a file or folder is created,
14moved or deleted. This is stored in L{zim.fs.FS}.
15'''
16
17import os
18import re
19import sys
20import shutil
21import tempfile
22import errno
23import logging
24
25
26from zim.errors import Error
27from zim.parsing import url_encode, url_decode, URL_ENCODE_READABLE
28from zim.signals import SignalEmitter, SIGNAL_AFTER
29
30logger = logging.getLogger('zim.fs')
31
32
33from zim.newfs.base import _os_expanduser, SEP
34from zim.newfs.local import AtomicWriteContext
35from zim.newfs.local import get_tmpdir as _newfs_get_tmpdir
36
37def adapt_from_newfs(file):
38	from zim.newfs import LocalFile, LocalFolder
39
40	if isinstance(file, LocalFile):
41		return File(file.path)
42	elif isinstance(file, LocalFolder):
43		return Dir(file.path)
44	else:
45		return file
46
47
48def adapt_from_oldfs(file):
49	from zim.newfs import LocalFile, LocalFolder
50
51	if isinstance(file, File):
52		return LocalFile(file.path)
53	elif isinstance(file, Dir):
54		return LocalFolder(file.path)
55	else:
56		return file
57
58
59try:
60	from gi.repository import Gio
61except ImportError:
62	Gio = None
63
64if not Gio:
65	logger.info('No file monitor support - changes will go undetected')
66
67
68xdgmime = None
69mimetypes = None
70if os.name == 'nt':
71	# On windows even if xdg is installed, the database is not (always)
72	# well initialized, so always fallback to mimetypes
73	import mimetypes
74else:
75	try:
76		import xdg.Mime as xdgmime
77	except ImportError:
78		logger.info("Can not import 'xdg.Mime' - falling back to 'mimetypes'")
79		import mimetypes
80
81
82#: Extensions to determine image mimetypes - used in L{File.isimage()}
83IMAGE_EXTENSIONS = (
84	# Gleaned from Gdk.get_formats()
85	'bmp', # image/bmp
86	'gif', # image/gif
87	'icns', # image/x-icns
88	'ico', # image/x-icon
89	'cur', # image/x-icon
90	'jp2', # image/jp2
91	'jpc', # image/jp2
92	'jpx', # image/jp2
93	'j2k', # image/jp2
94	'jpf', # image/jp2
95	'jpeg', # image/jpeg
96	'jpe', # image/jpeg
97	'jpg', # image/jpeg
98	'pcx', # image/x-pcx
99	'png', # image/png
100	'pnm', # image/x-portable-anymap
101	'pbm', # image/x-portable-anymap
102	'pgm', # image/x-portable-anymap
103	'ppm', # image/x-portable-anymap
104	'ras', # image/x-cmu-raster
105	'tga', # image/x-tga
106	'targa', # image/x-tga
107	'tiff', # image/tiff
108	'tif', # image/tiff
109	'wbmp', # image/vnd.wap.wbmp
110	'xbm', # image/x-xbitmap
111	'xpm', # image/x-xpixmap
112	'wmf', # image/x-wmf
113	'apm', # image/x-wmf
114	'svg', # image/svg+xml
115	'svgz', # image/svg+xml
116	'svg.gz', # image/svg+xml
117	# Custom additions
118	'webp', # image/webp
119)
120
121
122def isabs(path):
123	'''Wrapper for C{os.path.isabs}.
124	@param path: a file system path as string
125	@returns: C{True} when the path is absolute instead of a relative path
126	'''
127	return path.startswith('file:/') \
128	or path.startswith('~') \
129	or os.path.isabs(path)
130
131
132_tmpdir = None
133def get_tmpdir():
134	'''Get a folder in the system temp dir for usage by zim.
135	This zim specific temp folder has permission set to be readable
136	only by the current users, and is touched if it didn't exist yet.
137	Used as base folder by L{TmpFile}.
138	@returns: a L{Dir} object for the zim specific tmp folder
139	'''
140	global _tmpdir
141
142	if _tmpdir is None:
143		localdir = _newfs_get_tmpdir()
144		_tmpdir = Dir(localdir.path)
145
146	return _tmpdir
147
148
149def normalize_file_uris(path):
150	'''Function to deal with invalid or non-local file URIs.
151	Translates C{file:/} to the proper C{file:///} form and replaces
152	URIs of the form C{file://host/share} to C{smb://host/share}.
153	@param path: a filesystem path or URL
154	@returns: the proper URI or the original input path
155	'''
156	if path.startswith('file:///') \
157	or path.startswith('file://localhost/'):
158		return path
159	elif path.startswith('file://'):
160		return 'smb://' + path[7:]
161	elif path.startswith('file:/'):
162		return 'file:///' + path[6:]
163	else:
164		return path
165
166
167def normalize_win32_share(path):
168	'''Translates paths for windows shares in the platform specific
169	form. So on windows it translates C{smb://} URLs to C{\\host\share}
170	form, and vice versa on all other platforms.
171	Just returns the original path if it was already in the right form,
172	or when it is not a path for a share drive.
173	@param path: a filesystem path or URL
174	@returns: the platform specific path or the original input path
175	'''
176	if os.name == 'nt':
177		if path.startswith('smb://'):
178			# smb://host/share/.. -> \\host\share\..
179			path = path[4:].replace('/', '\\')
180			path = url_decode(path)
181	else:
182		if path.startswith('\\\\'):
183			# \\host\share\.. -> smb://host/share/..
184			path = 'smb:' + url_encode(path.replace('\\', '/'))
185
186	return path
187
188
189def lrmdir(path):
190	'''Wrapper for C{os.rmdir} that also knows how to unlink symlinks.
191	Fails when the folder is not a link and is not empty.
192	@param path: a file system path as string
193	'''
194	try:
195		os.rmdir(path)
196	except OSError:
197		if os.path.islink(path) and os.path.isdir(path):
198			os.unlink(path)
199		else:
200			raise
201
202
203def cleanup_filename(name):
204	'''Removes all characters in 'name' that are not allowed as part
205	of a file name. This function is intended for e.g. config files etc.
206	B{not} for page files in a store.
207	For file system filenames we can not use:
208	'\\', '/', ':', '*', '?', '"', '<', '>', '|'
209	And we also exclude "\\t" and "\\n".
210	@param name: the filename as string
211	@returns: the name with invalid characters removed
212	'''
213	for char in ("/", "\\", ":", "*", "?", '"', "<", ">", "|", "\t", "\n"):
214		name = name.replace(char, '')
215	return name
216
217
218def format_file_size(bytes):
219	'''Returns a human readable label  for a file size
220	E.g. C{1230} becomes C{"1.23kb"}, idem for "Mb" and "Gb"
221	@param bytes: file size in bytes as integer
222	@returns: size as string
223	'''
224	for unit, label in (
225		(1000000000, 'Gb'),
226		(1000000, 'Mb'),
227		(1000, 'kb'),
228	):
229		if bytes >= unit:
230			size = float(bytes) / unit
231			if size < 10:
232				return "%.2f%s" % (size, label)
233			elif size < 100:
234				return "%.1f%s" % (size, label)
235			else:
236				return "%.0f%s" % (size, label)
237	else:
238		return str(bytes) + 'b'
239
240
241
242
243def _md5(content):
244	import hashlib
245	m = hashlib.md5()
246	if isinstance(content, str):
247		m.update(content.encode('UTF-8'))
248	else:
249		for l in content:
250			m.update(l.encode('UTF-8'))
251	return m.digest()
252
253
254class PathLookupError(Error):
255	'''Error raised when there is an error finding the specified path'''
256	pass # TODO description
257
258
259class FileWriteError(Error):
260	'''Error raised when we can not write a file. Either due to file
261	permissions or e.g. because it is detected the file changed on
262	disk.
263	'''
264	pass # TODO description
265
266
267class FileNotFoundError(PathLookupError):
268	'''Error raised when a file does not exist that is expected to
269	exist.
270
271	@todo: reconcile this class with the NoSuchFileError in zim.gui
272	'''
273
274	def __init__(self, file):
275		self.file = file
276		self.msg = _('No such file: %s') % file.path
277			# T: message for FileNotFoundError
278
279
280class FileUnicodeError(Error):
281	'''Error raised when there is an issue decoding the file contents.
282	Typically due to different encoding where UTF-8 is expected.
283	'''
284
285	def __init__(self, file, error):
286		self.file = file
287		self.error = error
288		self.msg = _('Could not read: %s') % file.path
289			# T: message for FileUnicodeError (%s is the file name)
290		self.description = _('This usually means the file contains invalid characters')
291			# T: message for FileUnicodeError
292		self.description += '\n\n' + _('Details') + ':\n' + str(error)
293			# T: label for detailed error
294
295
296# TODO actually hook the signal for deleting files and folders
297
298class FSSingletonClass(SignalEmitter):
299	'''Class used for the singleton 'zim.fs.FS' instance
300
301	@signal: C{path-created (L{FilePath})}: Emitted when a new file or
302	folder has been created
303	@signal: C{path-moved (L{FilePath}, L{FilePath})}: Emitted when
304	a file or folder has been moved
305	@signal: C{path-deleted (L{FilePath})}: Emitted when a file or
306	folder has been deleted
307
308	@todo: fix the FS signals for folders as well
309	'''
310
311	# define signals we want to use - (closure type, return type and arg types)
312	__signals__ = {
313		'path-created': (SIGNAL_AFTER, None, (object,)),
314		'path-moved': (SIGNAL_AFTER, None, (object, object)),
315		'path-deleted': (SIGNAL_AFTER, None, (object,)),
316	}
317
318
319#: Singleton object for the system filesystem - see L{FSSingletonClass}
320FS = FSSingletonClass()
321
322
323class UnixPath(object):
324	'''Base class for Dir and File objects, represents a file path
325
326	@ivar path: the absolute file path as string
327	file system encoding (should only be used by low-level functions)
328	@ivar user_path: the absolute file path relative to the user's
329	C{HOME} folder or C{None}
330	@ivar uri: the C{file://} URI for this path
331	@ivar basename: the basename of the path
332	@ivar dirname: the dirname of the path
333	@ivar dir: L{Dir} object for the parent folder
334
335	@signal: C{changed (file, other_file, event_type)}: emitted when file
336	changed - availability based on C{gio} support for file monitors on
337	this platform
338	'''
339
340	def __init__(self, path):
341		'''Constructor
342
343		@param path: an absolute file path, file URL, L{FilePath} object
344		or a list of path elements. When a list is given, the first
345		element is allowed to be an absolute path, URL or L{FilePath}
346		object as well.
347		'''
348		self._serialized = None
349
350		if isinstance(path, FilePath):
351			self.path = path.path
352			return
353
354		try:
355			if isinstance(path, (list, tuple)):
356				path = list(map(str, path))
357					# Flatten objects - strings should be unicode or ascii already
358				path = SEP.join(path)
359					# os.path.join is too intelligent for it's own good
360					# just join with the path separator.
361			else:
362				path = str(path) # make sure we can decode
363		except UnicodeDecodeError:
364			raise Error('BUG: invalid input, file names should be in ascii, or given as unicode')
365
366		if path.startswith('file:/'):
367			path = self._parse_uri(path)
368		elif path.startswith('~'):
369			path = _os_expanduser(path)
370
371		self._set_path(path) # overloaded in WindowsPath
372
373	def serialize_zim_config(self):
374		'''Returns the file path as string for serializing the object'''
375		if self._serialized is None:
376			self._serialized = self.user_path or self.path
377		return self._serialized
378
379	@classmethod
380	def new_from_zim_config(klass, string):
381		'''Returns a new object based on the string representation for
382		that path
383		'''
384		return klass(string)
385
386	@staticmethod
387	def _parse_uri(uri):
388		# Spec is file:/// or file://host/
389		# But file:/ is sometimes used by non-compliant apps
390		# Windows uses file:///C:/ which is compliant
391		if uri.startswith('file:///'):
392			uri = uri[7:]
393		elif uri.startswith('file://localhost/'):
394			uri = uri[16:]
395		elif uri.startswith('file://'):
396			assert False, 'Can not handle non-local file uris'
397		elif uri.startswith('file:/'):
398			uri = uri[5:]
399		else:
400			assert False, 'Not a file uri: %s' % uri
401		return url_decode(uri)
402
403	def _set_path(self, path):
404		self.path = os.path.abspath(path)
405
406	def __iter__(self):
407		parts = self.split()
408		for i in range(1, len(parts)):
409			path = os.path.join(*parts[0:i])
410			yield Dir(path)
411
412		#~ if self.isdir():
413		yield Dir(self.path)
414		#~ else:
415			#~ yield self
416
417	def __str__(self):
418		return self.path
419
420	def __repr__(self):
421		return '<%s: %s>' % (self.__class__.__name__, self.path)
422
423	def __add__(self, other):
424		'''Concatenates paths, only creates objects of the same class. See
425		L{Dir.file()} and L{Dir.subdir()} instead to create other objects.
426		'''
427		return self.__class__((self, other))
428
429	def __eq__(self, other):
430		return self.path == other.path
431
432	def __ne__(self, other):
433		return not self.__eq__(other)
434
435	@property
436	def basename(self):
437		'''Basename property'''
438		return os.path.basename(self.path) # encoding safe
439
440	@property
441	def dirname(self):
442		'''Dirname property'''
443		return os.path.dirname(self.path) # encoding safe
444
445	@property
446	def user_path(self):
447		'''User_path property'''
448		dir = Dir('~') # FIXME: Should we cache this folder somewhere ?
449		if self.ischild(dir):
450			return '~/' + self.relpath(dir)
451		else:
452			return None
453
454	@property
455	def uri(self):
456		'''File uri property'''
457		return 'file://' + url_encode(self.path)
458
459	@property
460	def dir(self):
461		'''Returns a L{Dir} object for the parent dir'''
462		path = os.path.dirname(self.path) # encoding safe
463		return Dir(path)
464
465	def monitor(self):
466		'''Creates a L{FSObjectMonitor} for this path'''
467		return FSObjectMonitor(self)
468
469	def exists(self):
470		'''Check if a file or folder exists.
471		@returns: C{True} if the file or folder exists
472		@implementation: must be implemented by sub classes in order
473		that they enforce the type of the resource as well
474		'''
475		return os.path.exists(self.path)
476
477	def iswritable(self):
478		'''Check if a file or folder is writable. Uses permissions of
479		parent folder if the file or folder does not (yet) exist.
480		@returns: C{True} if the file or folder is writable
481		'''
482		if self.exists():
483			return os.access(self.path, os.W_OK)
484		else:
485			return self.dir.iswritable() # recurs
486
487	def mtime(self):
488		'''Get the modification time of the file path.
489		@returns: the mtime timestamp
490		'''
491		return os.stat(self.path).st_mtime
492
493	def ctime(self):
494		'''Get the creation time of the file path.
495		@returns: the mtime timestamp
496		'''
497		return os.stat(self.path).st_ctime
498
499	def size(self):
500		'''Get file size in bytes
501		See L{format_file_size()} to get a human readable label
502		@returns: file size in bytes
503		'''
504		return os.stat(self.path).st_size
505
506	def isequal(self, other):
507		'''Check file paths are equal based on stat results (inode
508		number etc.). Intended to detect when two files or dirs are the
509		same on case-insensitive filesystems. Does not explicitly check
510		the content is the same.
511		If you just want to know if two files have the same content,
512		see L{File.compare()}
513		@param other: an other L{FilePath} object
514		@returns: C{True} when the two paths are one and the same file
515		'''
516		# Do NOT assume paths are the same - could be hard link
517		# or it could be a case-insensitive filesystem
518		try:
519			stat_result = os.stat(self.path)
520			other_stat_result = os.stat(other.path)
521		except OSError:
522			return False
523		else:
524			return stat_result == other_stat_result
525
526	def split(self):
527		'''Split the parts of the path on the path separator.
528		If the OS uses the concept of a drive the first part will
529		include the drive. (So using split() to count the number of
530		path elements will not be robust for the path "/".)
531		@returns: a list of path elements
532		'''
533		drive, path = os.path.splitdrive(self.path)
534		parts = path.replace('\\', '/').strip('/').split('/')
535		parts[0] = drive + SEP + parts[0]
536		return parts
537
538	def relpath(self, reference, allowupward=False):
539		'''Get a relative path for this file path with respect to
540		another path. This method always returns paths using "/" as
541		separator, even on windows.
542		@param reference: a reference L{FilePath}
543		@param allowupward: if C{True} the relative path is allowed to
544		start with 'C{../}', if C{False} the reference should be a
545		parent folder of this path.
546		@returns: a relative file path
547		@raises AssertionError: when C{allowupward} is C{False} and
548		C{reference} is not a parent folder
549		'''
550		sep = SEP # '/' or '\'
551		refdir = reference.path + sep
552		if allowupward and not self.path.startswith(refdir):
553			parent = self.commonparent(reference)
554			if parent is None:
555				return None # maybe on different drive under win32
556
557			i = len(parent.path)
558			j = refdir[i:].strip(sep).count(sep) + 1
559			reference = parent
560			path = '../' * j
561		else:
562			if not self.path.startswith(refdir):
563				raise AssertionError('Not a parent folder')
564			path = ''
565
566		i = len(reference.path)
567		path += self.path[i:].lstrip(sep).replace(sep, '/')
568		return path
569
570	def commonparent(self, other):
571		'''Find a comon parent folder between two file paths.
572		@param other: another L{FilePath}
573		@returns: a L{Dir} object for the common parent folder, or
574		C{None} when there is no common parent
575		'''
576		path = os.path.commonprefix((self.path, other.path)) # encoding safe
577		path = path.replace(os.path.sep, SEP) # msys can have '/' as seperator
578		i = path.rfind(SEP) # win32 save...
579		if i >= 0:
580			return Dir(path[:i + 1])
581		else:
582			# different drive ?
583			return None
584
585	def ischild(self, parent):
586		'''Check if this path is a child path of a folder
587		@returns: C{True} if this path is a child path of C{parent}
588		'''
589		return self.path.startswith(parent.path + SEP)
590
591	def isdir(self):
592		'''Check if this path is a folder or not. Used to detect if
593		e.g. a L{File} object should have really been a L{Dir} object.
594		@returns: C{True} when this path is a folder
595		'''
596		return os.path.isdir(self.path)
597
598	def rename(self, newpath):
599		'''Rename (move) the content this file or folder to another
600		location. This will B{not} change the current file path, so the
601		object keeps pointing to the old location.
602		@param newpath: the destination C{FilePath} which can either be a
603		file or a folder.
604		@emits: path-moved
605		'''
606		# Using shutil.move instead of os.rename because move can cross
607		# file system boundaries, while rename can not
608		logger.info('Rename %s to %s', self, newpath)
609		newpath = adapt_from_newfs(newpath)
610		if self.path == newpath.path:
611			raise AssertionError('Renaming %s to itself !?' % self.path)
612
613		if newpath.isdir():
614			if self.isequal(newpath):
615				# We checked name above, so must be case insensitive file system
616				# but we still want to be able to rename to other case, so need to
617				# do some moving around
618				tmpdir = self.dir.new_subdir(self.basename)
619				shutil.move(self.path, tmpdir.path)
620				shutil.move(tmpdir.path, newpath.path)
621			else:
622				# Needed because shutil.move() has different behavior for this case
623				raise AssertionError('Folder already exists: %s' % newpath.path)
624		else:
625			# normal case
626			newpath.dir.touch()
627			shutil.move(self.path, newpath.path)
628
629		FS.emit('path-moved', self, newpath)
630		self.dir.cleanup()
631
632
633class WindowsPath(UnixPath):
634	'''Base class for Dir and File objects, represents a file path
635	on windows.
636	'''
637
638	def _set_path(self, path):
639		# Strip leading / for absolute paths
640		if re.match(r'^[/\\]+[A-Za-z]:[/\\]', path):
641			path = path.lstrip('/').lstrip('\\')
642		self.path = os.path.abspath(path).replace('/', SEP) # msys can use '/' instead of '\\'
643
644	@property
645	def uri(self):
646		'''File uri property with win32 logic'''
647		# win32 paths do not start with '/', so add another one
648		# and avoid url encoding the second ":" in "file:///C:/..."
649		path = self.path.replace('\\', '/')
650		if re.match('[A-Za-z]:/', path):
651			return 'file:///' + path[:2] + url_encode(path[2:])
652		else:
653			return 'file:///' + url_encode(path)
654
655
656# Determine which base class to use for classes below
657if os.name == 'posix':
658	FilePath = UnixPath
659elif os.name == 'nt':
660	FilePath = WindowsPath
661else:
662	logger.critical('os name "%s" unknown, falling back to posix', os.name)
663	FilePath = UnixPath
664
665
666class Dir(FilePath):
667	'''Class representing a single file system folder'''
668
669	def __eq__(self, other):
670		if isinstance(other, Dir):
671			return self.path == other.path
672		else:
673			return False
674
675	def exists(self):
676		return os.path.isdir(self.path)
677
678	def list(self, glob=None, includehidden=False, includetmp=False, raw=False):
679		'''List the file contents
680
681		@param glob: a file name glob to filter the listed files, e.g C{"*.png"}
682		@param includehidden: if C{True} include hidden files
683		(e.g. names starting with "."), ignore otherwise
684		@param includetmp: if C{True} include temporary files
685		(e.g. names ending in "~"), ignore otherwise
686		@param raw: for filtered folders (C{FilteredDir} instances)
687		setting C{raw} to C{True} will disable filtering
688
689		@returns: a sorted list of names for files and subdirectories.
690		Will not return names that could not be decoded properly and
691		will throw warnings if those are encountered.
692		Hidden files are silently ignored.
693		'''
694		files = self._list(includehidden, includetmp)
695
696		if glob:
697			expr = _glob_to_regex(glob)
698			files = list(filter(expr.match, files))
699
700		files.sort()
701		return files
702
703	def _list(self, includehidden, includetmp):
704		if self.exists():
705			files = []
706			for file in os.listdir(self.path):
707				if file.startswith('.') and not includehidden:
708					continue # skip hidden files
709				elif (file.endswith('~') or file.startswith('~')) and not includetmp:
710					continue # skip temporary files
711				else:
712					files.append(file)
713			return files
714		else:
715			return []
716
717	def walk(self, raw=True):
718		'''Generator that yields all files and folders below this dir
719		as objects.
720		@param raw: see L{list()}
721		@returns: yields L{File} and L{Dir} objects, depth first
722		'''
723		for name in self.list(raw=raw):
724			path = self.path + SEP + name
725			if os.path.isdir(path):
726				dir = self.subdir(name)
727				yield dir
728				for child in dir.walk(raw=raw):
729					yield child
730			else:
731				yield self.file(name)
732
733	def get_file_tree_as_text(self, raw=True):
734		'''Returns an overview of files and folders below this dir
735		as text. Used in tests.
736		@param raw: see L{list()}
737		@returns: file listing as string
738		'''
739		text = ''
740		for child in self.walk(raw=raw):
741			path = child.relpath(self)
742			if isinstance(child, Dir):
743				path += '/'
744			text += path + '\n'
745		return text
746
747	def touch(self, mode=None):
748		'''Create this folder and any parent folders that do not yet
749		exist.
750		@param mode: creation mode (e.g. 0700)
751		'''
752		if self.exists():
753			# Additional check needed because makedirs can not handle
754			# a path like "E:\" on windows (while "E:\foo" works fine)
755			return
756
757		try:
758			if mode is not None:
759				os.makedirs(self.path, mode=mode)
760			else:
761				os.makedirs(self.path)
762		except OSError as e:
763			if e.errno != errno.EEXIST:
764				raise
765
766	def remove(self):
767		'''Remove this folder, fails if it is not empty.'''
768		logger.info('Remove dir: %s', self)
769		lrmdir(self.path)
770		FS.emit('path-deleted', self)
771
772	def cleanup(self):
773		'''Remove this foldder and any empty parent folders. If the
774		folder does not exist, still check for empty parent folders.
775		Fails silently if the folder is not empty.
776		@returns: C{True} when successfull (so C{False} means it still exists).
777		'''
778		if not self.exists():
779			return True
780
781		try:
782			os.removedirs(self.path)
783		except OSError:
784			return False # probably dir not empty
785		else:
786			return True
787
788	def remove_children(self):
789		'''Recursively remove everything below this folder .
790
791		B{WARNING:} This is quite powerful and can do a lot of damage
792		when executed for the wrong folder, so pleae make sure to double
793		check the dir is actually what you think it is before calling this.
794		'''
795		assert self.path and self.path != '/'
796		logger.info('Remove file tree: %s', self)
797		for root, dirs, files in os.walk(self.path, topdown=False):
798			# walk should not decent into symlinked folders by default
799			# remove() and rmdir() both should remove a symlink rather
800			# than the target of the link
801			for name in files:
802				os.remove(os.path.join(root, name))
803			for name in dirs:
804				lrmdir(os.path.join(root, name))
805
806	def copyto(self, dest):
807		'''Recursively copy the contents of this folder.
808		When the destination folder already exists the contents will be
809		merged, so you need to check existence of the destination first
810		if you want a clean new copy.
811		@param dest: a L{Dir} object
812		'''
813		# We do not use shutil.copytree() because it requires that
814		# the target dir does not exist
815		assert isinstance(dest, Dir)
816		assert not dest == self, 'BUG: trying to copy a dir to itself'
817		logger.info('Copy dir %s to %s', self, dest)
818
819		def copy_dir(source, target):
820			target.touch()
821			for item in source.list():
822				child = FilePath((source, item))
823				if child.isdir():
824					copy_dir(Dir(child), target.subdir(item)) # recur
825				else:
826					child = File(child)
827					child.copyto(target)
828
829		copy_dir(self, dest)
830		# TODO - not hooked with FS signals
831
832	def file(self, path):
833		'''Get a L{File} object for a path below this folder
834
835		@param path: a (relative) file path as string, tuple or
836		L{FilePath} object. When C{path} is a L{File} object already
837		this method still enforces it is below this folder.
838		So this method can be used as check as well.
839
840		@returns: a L{File} object
841		@raises PathLookupError: if the path is not below this folder
842		'''
843		file = self.resolve_file(path)
844		if not file.path.startswith(self.path):
845			raise PathLookupError('%s is not below %s' % (file, self))
846		return file
847
848	def resolve_file(self, path):
849		'''Get a L{File} object for a path relative to this folder
850
851		Like L{file()} but allows the path to start with "../" as
852		well, so can handle any relative path.
853
854		@param path: a (relative) file path as string, tuple or
855		L{FilePath} object.
856		@returns: a L{File} object
857		'''
858		assert isinstance(path, (FilePath, str, list, tuple))
859		if isinstance(path, str):
860			return File((self.path, path))
861		elif isinstance(path, (list, tuple)):
862			return File((self.path,) + tuple(path))
863		elif isinstance(path, File):
864			return path
865		elif isinstance(path, FilePath):
866			return File(path.path)
867
868	def new_file(self, path):
869		'''Get a L{File} object for a new file below this folder.
870		Like L{file()} but guarantees the file does not yet exist by
871		adding sequential numbers if needed. So the resulting file
872		may have a modified name.
873
874		@param path: a (relative) file path as string, tuple or
875		L{FilePath} object.
876
877		@returns: a L{File} object
878		@raises PathLookupError: if the path is not below this folder
879		'''
880		file = self.file(path)
881		basename = file.basename
882		if '.' in basename:
883			basename, ext = basename.split('.', 1)
884		else:
885			ext = ''
886		dir = file.dir
887		i = 0
888		while file.exists():
889			logger.debug('File exists "%s" trying increment', file)
890			i += 1
891			newname = basename + '%03i' % i
892			if ext:
893				newname += '.' + ext
894			file = dir.file(newname)
895		return file
896
897	def subdir(self, path):
898		'''Get a L{Dir} object for a path below this folder
899
900		@param path: a (relative) file path as string, tuple or
901		L{FilePath} object. When C{path} is a L{Dir} object already
902		this method still enforces it is below this folder.
903		So this method can be used as check as well.
904
905		@returns: a L{Dir} object
906		@raises PathLookupError: if the path is not below this folder
907
908		'''
909
910		dir = self.resolve_dir(path)
911		if not dir.path.startswith(self.path):
912			raise PathLookupError('%s is not below %s' % (dir, self))
913		return dir
914
915	def resolve_dir(self, path):
916		'''Get a L{Dir} object for a path relative to this folder
917
918		Like L{subdir()} but allows the path to start with "../" as
919		well, so can handle any relative path.
920
921		@param path: a (relative) file path as string, tuple or
922		L{FilePath} object.
923		@returns: a L{Dir} object
924		'''
925		assert isinstance(path, (FilePath, str, list, tuple))
926		if isinstance(path, str):
927			return Dir((self.path, path))
928		elif isinstance(path, (list, tuple)):
929			return Dir((self.path,) + tuple(path))
930		elif isinstance(path, Dir):
931			return path
932		elif isinstance(path, FilePath):
933			return Dir(path.path)
934
935	def new_subdir(self, path):
936		'''Get a L{Dir} object for a new sub-folder below this folder.
937		Like L{subdir()} but guarantees the folder does not yet exist by
938		adding sequential numbers if needed. So the resulting folder
939		may have a modified name.
940
941		@param path: a (relative) file path as string, tuple or
942		L{FilePath} object.
943
944		@returns: a L{Dir} object
945		@raises PathLookupError: if the path is not below this folder
946		'''
947		subdir = self.subdir(path)
948		basename = subdir.basename
949		i = 0
950		while subdir.exists():
951			logger.debug('Dir exists "%s" trying increment', subdir)
952			i += 1
953			newname = basename + '%03i' % i
954			subdir = self.subdir(newname)
955		return subdir
956
957
958def _glob_to_regex(glob):
959	glob = glob.replace('.', '\\.')
960	glob = glob.replace('*', '.*')
961	glob = glob.replace('?', '.?')
962	return re.compile(glob)
963
964
965class FilteredDir(Dir):
966	'''Class implementing a folder with a filtered listing. Can be
967	used to e.g. filter all objects that are also ignored by version
968	control.
969	'''
970
971	def __init__(self, path):
972		'''Constructor
973
974		@param path: an absolute file path, file URL, L{FilePath} object
975		or a list of path elements. When a list is given, the first
976		element is allowed to be an absolute path, URL or L{FilePath}
977		object as well.
978		'''
979		Dir.__init__(self, path)
980		self._ignore = []
981
982	def ignore(self, glob):
983		'''Add a file pattern to ignore
984		@param glob: a file path pattern (e.g. "*.txt")
985		'''
986		regex = _glob_to_regex(glob)
987		self._ignore.append(regex)
988
989	def filter(self, name):
990		for regex in self._ignore:
991			if regex.match(name):
992				return False
993		else:
994			return True
995
996	def list(self, includehidden=False, includetmp=False, raw=False):
997		files = Dir.list(self, includehidden, includetmp)
998		if not raw:
999			files = list(filter(self.filter, files))
1000		return files
1001
1002
1003class File(FilePath):
1004	'''Class representing a single file.
1005
1006	This class implements much more complex logic than the default
1007	python file objects. E.g. on writing we first write to a temporary
1008	files, then flush and sync and finally replace the file we intended
1009	to write with the temporary file. This makes it much more difficult
1010	to loose file contents when something goes wrong during the writing.
1011
1012	Also it implements logic to check the modification time before
1013	writing to prevent overwriting a file that was changed on disk in
1014	between read and write operations. If this mtime check fails MD5
1015	sums are used to verify before raising an exception (because some
1016	share drives do not maintain mtime very precisely).
1017	This logic is not atomic, so your mileage may vary.
1018	'''
1019
1020	# For atomic write we first write a tmp file which has the extension
1021	# .zim-new~ when is was written successfully we replace the actual file
1022	# with the tmp file. Because rename is atomic on POSIX platforms and
1023	# replaces the existing file this either succeeds or not, it can never
1024	# truncate the existing file but fail to write the new file. So if writing
1025	# fails we should always at least have the old file still present.
1026	# If we encounter a left over .zim-new~ we ignore it since it may be
1027	# corrupted.
1028	#
1029	# For Window the behavior is more complicated, see the WindowsFile class
1030	# below.
1031	#
1032	# Note that the mechanism to avoid overwriting files that changed on disks
1033	# does not prevent conflicts when two processes try to write to the same
1034	# file at the same time. This is a hard problem that is currently not
1035	# addressed in this implementation.
1036
1037	def __init__(self, path, checkoverwrite=False, endofline=None):
1038		'''Constructor
1039
1040		@param path: an absolute file path, file URL, L{FilePath} object
1041		or a list of path elements. When a list is given, the first
1042		element is allowed to be an absolute path, URL or L{FilePath}
1043		object as well.
1044
1045		@param checkoverwrite: when C{True} this object checks the
1046		modification time before writing to prevent overwriting a file
1047		that was changed on disk in between read and write operations.
1048
1049		@param endofline: the line end style used when writing, can be
1050		one of "unix" ('\\n') or "dos" ('\\r\\n'). Whan C{None} the local
1051		default is used.
1052		'''
1053		FilePath.__init__(self, path)
1054		self.checkoverwrite = checkoverwrite
1055		self.endofline = endofline
1056		self._mtime = None
1057		self._md5 = None
1058
1059	def __eq__(self, other):
1060		if isinstance(other, File):
1061			return self.path == other.path
1062		else:
1063			return False
1064
1065	def exists(self):
1066		return os.path.isfile(self.path)
1067
1068	def isimage(self):
1069		'''Check if this is an image file. Convenience method that
1070		works even when no real mime-type suport is available.
1071		If this method returns C{True} it is no guarantee
1072		this image type is actually supported by Gtk.
1073		@returns: C{True} when this is an image file
1074		'''
1075
1076		# Quick shortcut to be able to load images in the gui even if
1077		# we have no proper mimetype support
1078		if '.' in self.basename:
1079			_, ext = self.basename.rsplit('.', 1)
1080			if ext in IMAGE_EXTENSIONS:
1081				return True
1082
1083		return self.get_mimetype().startswith('image/')
1084
1085	def get_mimetype(self):
1086		'''Get the mime-type for this file.
1087		Will use the XDG mimetype system if available, otherwise
1088		fallsback to the standard library C{mimetypes}.
1089		@returns: the mimetype as a string, e.g. "text/plain"
1090		'''
1091		if xdgmime:
1092			mimetype = xdgmime.get_type(self.path, name_pri=80)
1093			return str(mimetype)
1094		else:
1095			mimetype, encoding = mimetypes.guess_type(self.path, strict=False)
1096			if encoding == 'gzip':
1097				return 'application/x-gzip'
1098			elif encoding == 'bzip':
1099				return 'application/x-bzip'
1100			elif encoding == 'compress':
1101				return 'application/x-compress'
1102			else:
1103				return mimetype or 'application/octet-stream'
1104
1105	def get_endofline(self):
1106		'''Get the end-of-line character(s) used for writing this file.
1107		@returns: the end-of-line character(s)
1108		'''
1109		if self.endofline is None:
1110			if isinstance(self, WindowsPath):
1111				return '\r\n'
1112			else:
1113				return '\n'
1114		else:
1115			assert self.endofline in ('unix', 'dos')
1116			if self.endofline == 'dos':
1117				return '\r\n'
1118			else:
1119				return '\n'
1120
1121	def raw(self):
1122		'''Get the raw content without UTF-8 decoding, newline logic,
1123		etc. Used to read binary data, e.g. when serving files over www.
1124		Note that this function also does not integrates with checking
1125		mtime, so intended for read only usage.
1126		@returns: file content as string
1127		'''
1128		try:
1129			fh = open(self.path, mode='rb')
1130			content = fh.read()
1131			fh.close()
1132			return content
1133		except IOError:
1134			raise FileNotFoundError(self)
1135
1136	def read(self):
1137		'''Get the file contents as a string. Takes case of decoding
1138		UTF-8 and fixes line endings.
1139		@returns: the content as (unicode) string.
1140		@raises FileNotFoundError: when the file does not exist.
1141		'''
1142		try:
1143			content = self._read()
1144			self._checkoverwrite(content)
1145			return content.lstrip('\ufeff').replace('\x00', '')
1146				# Strip unicode byte order mark
1147				# And remove any NULL byte since they screw up parsing
1148		except IOError:
1149			raise FileNotFoundError(self)
1150		except UnicodeDecodeError as error:
1151			raise FileUnicodeError(self, error)
1152
1153		return text
1154
1155	def _read(self):
1156		with open(self.path, encoding='UTF-8') as fh:
1157			return fh.read()
1158
1159	def readlines(self):
1160		'''Get the file contents as a list of lines. Takes case of
1161		decoding UTF-8 and fixes line endings.
1162
1163		@returns: the content as a list of lines.
1164		@raises FileNotFoundError: when the file does not exist.
1165		'''
1166		try:
1167			file = open(self.path, encoding='UTF-8')
1168			lines = file.readlines()
1169			self._checkoverwrite(lines)
1170			return [line.lstrip('\ufeff').replace('\x00', '') for line in lines]
1171				# Strip unicode byte order mark
1172				# And remove any NULL byte since they screw up parsing
1173		except IOError:
1174			raise FileNotFoundError(self)
1175		except UnicodeDecodeError as error:
1176			raise FileUnicodeError(self, error)
1177
1178		return lines
1179
1180	def _write_check(self):
1181		if not self.iswritable():
1182			raise FileWriteError(_('File is not writable: %s') % self.path) # T: Error message
1183		elif not self.exists():
1184			self.dir.touch()
1185		else:
1186			pass # exists and writable
1187
1188	def write(self, text):
1189		'''Write file contents from string. This overwrites the current
1190		content. Will automatically create all parent folders.
1191		If writing fails the file will either have the new content or the
1192		old content, but it should not be possible to have the content
1193		truncated.
1194		@param text: new content as (unicode) string
1195		@emits: path-created if the file did not yet exist
1196		'''
1197		self._assertoverwrite()
1198		isnew = not os.path.isfile(self.path)
1199		newline = self.get_endofline()
1200		self._write_check()
1201		with AtomicWriteContext(self, newline=newline) as fh:
1202			fh.write(text)
1203
1204		self._checkoverwrite(text)
1205		if isnew:
1206			FS.emit('path-created', self)
1207
1208	def writelines(self, lines):
1209		'''Write file contents from a list of lines.
1210		Like L{write()} but input is a list instead of a string.
1211		@param lines: new content as list of lines
1212		@emits: path-created if the file did not yet exist
1213		'''
1214		self._assertoverwrite()
1215		isnew = not os.path.isfile(self.path)
1216		newline = self.get_endofline()
1217		self._write_check()
1218		with AtomicWriteContext(self, newline=newline) as fh:
1219			fh.writelines(lines)
1220
1221		self._checkoverwrite(lines)
1222		if isnew:
1223			FS.emit('path-created', self)
1224
1225	def _checkoverwrite(self, content):
1226		# Set properties needed by assertoverwrite for the in-memory object
1227		if self.checkoverwrite:
1228			self._mtime = self.mtime()
1229			self._md5 = _md5(content)
1230
1231	def _assertoverwrite(self):
1232		# When we read a file and than write it, this method asserts the file
1233		# did not change in between (e.g. by another process, or another async
1234		# function of our own process). We use properties of this object instance
1235		# We check the timestamp, if that does not match we check md5 to be sure.
1236		# (Sometimes e.g. network filesystems do not maintain timestamps as strict
1237		# as we would like.)
1238		#
1239		# This function should not prohibit writing without reading first.
1240		# Also we just write the file if it went missing in between
1241		if self._mtime and self._md5:
1242			try:
1243				mtime = self.mtime()
1244			except OSError:
1245				if not os.path.isfile(self.path):
1246					logger.critical('File missing: %s', self.path)
1247					return
1248				else:
1249					raise
1250
1251			if not self._mtime == mtime:
1252				logger.warn('mtime check failed for %s, trying md5', self.path)
1253				if self._md5 != _md5(self._read()):
1254					raise FileWriteError(_('File changed on disk: %s') % self.path)
1255						# T: error message
1256					# Why are we using MD5 here ?? could just compare content...
1257
1258	def check_has_changed_on_disk(self):
1259		'''Returns C{True} when this file has changed on disk'''
1260		if not (self._mtime and self._md5):
1261			if os.path.isfile(self.path):
1262				return True # may well been just created
1263			else:
1264				return False # ??
1265		elif not os.path.isfile(self.path):
1266			return True
1267		else:
1268			try:
1269				self._assertoverwrite()
1270			except FileWriteError:
1271				return True
1272			else:
1273				return False
1274
1275	def touch(self):
1276		'''Create this file and any parent folders if it does not yet
1277		exist. (Parent folders are also created when writing to a file,
1278		so you only need to call this method in special cases - e.g.
1279		when an external program requires the file to exist.)
1280		'''
1281		if self.exists():
1282			return
1283		else:
1284			self.write('')
1285
1286	def remove(self):
1287		'''Remove (delete) this file and cleanup any related temporary
1288		files we created. This action can not be un-done.
1289		Ignores silently if the file did not exist in the first place.
1290		'''
1291		logger.info('Remove file: %s', self)
1292		if os.path.isfile(self.path):
1293			os.remove(self.path)
1294
1295		tmp = self.path + '.zim-new~'
1296		if os.path.isfile(tmp):
1297			os.remove(tmp)
1298
1299		FS.emit('path-deleted', self)
1300
1301	def cleanup(self):
1302		'''Remove this file and cleanup any empty parent folder.
1303		Convenience method calling L{File.remove()} and L{Dir.cleanup()}.
1304		'''
1305		self.remove()
1306		self.dir.cleanup()
1307
1308	def copyto(self, dest):
1309		'''Copy this file to another location. Preserves all file
1310		attributes (by using C{shutil.copy2()})
1311		@param dest: a L{File} or L{Dir} object for the destination. If the
1312		destination is a folder, we will copy to a file below that
1313		folder of the same name
1314		'''
1315		dest = adapt_from_newfs(dest)
1316		assert isinstance(dest, (File, Dir))
1317		if isinstance(dest, Dir):
1318			assert not dest == self.dir, 'BUG: trying to copy a file to itself'
1319		else:
1320			assert not dest == self, 'BUG: trying to copy a file to itself'
1321		logger.info('Copy %s to %s', self, dest)
1322		if isinstance(dest, Dir):
1323			dest.touch()
1324		else:
1325			dest.dir.touch()
1326		shutil.copy2(self.path, dest.path)
1327		# TODO - not hooked with FS signals
1328
1329	def compare(self, other):
1330		'''Check if file contents are the same. This differs from
1331		L{isequal()} because files can be different physical locations.
1332		@param other: another L{File} object
1333		@returns: C{True} when the files have the same content
1334		'''
1335		# TODO: can be more efficient, e.g. by checking stat size first
1336		# also wonder if MD5 is needed here ... could just compare text
1337		return _md5(self.read()) == _md5(other.read())
1338
1339
1340class TmpFile(File):
1341	'''Class for temporary files. These are stored in the temp directory
1342	and by default they are deleted again when the object is destructed.
1343	'''
1344
1345	def __init__(self, basename, unique=True, persistent=False):
1346		'''Constructor
1347
1348		@param basename: gives the name for this tmp file.
1349		@param unique: if C{True} the L{Dir.new_file()} method is used
1350		to make sure we have a new file.
1351		@param persistent: if C{False} the file will be removed when the
1352		object is destructed, if C{True} we leave it alone
1353		'''
1354		dir = get_tmpdir()
1355		if unique:
1356			file = dir.new_file(basename)
1357			File.__init__(self, file.path)
1358		else:
1359			File.__init__(self, (dir, basename))
1360
1361		self.persistent = persistent
1362
1363	def __del__(self):
1364		if not self.persistent:
1365			self.remove()
1366
1367
1368
1369# Replace logic based on discussion here:
1370# http://stupidpythonideas.blogspot.nl/2014/07/getting-atomic-writes-right.html
1371#
1372# The point is to get a function to replace an old file with a new
1373# file as "atomic" as possible
1374
1375if hasattr(os, 'replace'):
1376	_replace_file = os.replace
1377elif sys.platform == 'win32':
1378	# The win32api.MoveFileEx method somehow does not like our unicode,
1379	# the ctypes version does ??!
1380	import ctypes
1381	_MoveFileEx = ctypes.windll.kernel32.MoveFileExW
1382	_MoveFileEx.argtypes = [ctypes.c_wchar_p, ctypes.c_wchar_p, ctypes.c_uint32]
1383	_MoveFileEx.restype = ctypes.c_bool
1384	def _replace_file(src, dst):
1385		try:
1386			if not _MoveFileEx(src, dst, 1): # MOVEFILE_REPLACE_EXISTING
1387				raise OSError('Could not replace "%s" -> "%s"' % (src, dst))
1388		except:
1389			# Sometimes it fails - we play stupid and try again...
1390			time.sleep(0.5)
1391			if not _MoveFileEx(src, dst, 1): # MOVEFILE_REPLACE_EXISTING
1392				raise OSError('Could not replace "%s" -> "%s"' % (src, dst))
1393else:
1394	_replace_file = os.rename
1395
1396
1397### TODO filter Dir.list directly for hidden files
1398if os.name != 'nt':
1399	def is_hidden_file(file):
1400			return file.basename.startswith('.')
1401
1402else:
1403	import ctypes
1404
1405	def is_hidden_file(file):
1406		INVALID_FILE_ATTRIBUTES = -1
1407		FILE_ATTRIBUTE_HIDDEN = 2
1408
1409		try:
1410			attrs = ctypes.windll.kernel32.GetFileAttributesW(file.path)
1411				# note: GetFileAttributesW is unicode version of GetFileAttributes
1412		except AttributeError:
1413			return False
1414		else:
1415			if attrs == INVALID_FILE_ATTRIBUTES:
1416				return False
1417			else:
1418				return bool(attrs & FILE_ATTRIBUTE_HIDDEN)
1419###
1420
1421
1422class FSObjectMonitor(SignalEmitter):
1423
1424	__signals__ = {
1425		'changed': (None, None, (None, None)),
1426	}
1427
1428	def __init__(self, path):
1429		self.path = path
1430		self._gio_file_monitor = None
1431
1432	def _setup_signal(self, signal):
1433		if signal == 'changed' \
1434		and self._gio_file_monitor is None \
1435		and Gio:
1436			try:
1437				file = Gio.File.new_for_uri(self.path.uri)
1438				self._gio_file_monitor = file.monitor()
1439				self._gio_file_monitor.connect('changed', self._on_changed)
1440			except:
1441				logger.exception('Error while setting up file monitor')
1442
1443	def _teardown_signal(self, signal):
1444		if signal == 'changed' \
1445		and self._gio_file_monitor:
1446			try:
1447				self._gio_file_monitor.cancel()
1448			except:
1449				logger.exception('Error while tearing down file monitor')
1450			finally:
1451				self._gio_file_monitor = None
1452
1453	def _on_changed(self, filemonitor, file, other_file, event_type):
1454		# 'FILE_MONITOR_EVENT_CHANGED' is always followed by
1455		# a 'FILE_MONITOR_EVENT_CHANGES_DONE_HINT' when the filehandle
1456		# is closed (or after timeout). Idem for "created", assuming it
1457		# is not created empty.
1458		#
1459		# TODO: do not emit changed on CREATED - separate signal that
1460		#       can be used when monitoring a file list, but reserve
1461		#       changed for changes-done-hint so that we ensure the
1462		#       content is complete.
1463		#       + emit on write and block redundant signals here
1464		#
1465		# Also note that in many cases "moved" will not be used, but a
1466		# sequence of deleted, created will be signaled
1467		#
1468		# For Dir objects, the event will refer to files contained in
1469		# the dir.
1470
1471		#~ print('MONITOR:', self, event_type)
1472		if event_type in (
1473			Gio.FileMonitorEvent.CREATED,
1474			Gio.FileMonitorEvent.CHANGES_DONE_HINT,
1475			Gio.FileMonitorEvent.DELETED,
1476			Gio.FileMonitorEvent.MOVED,
1477		):
1478			self.emit('changed', None, None) # TODO translate otherfile and eventtype
1479