1
2# Copyright 2015-2016 Jaap Karssenberg <jaap.karssenberg@gmail.com>
3
4'''Base classes for filesystem and storage implementation'''
5
6import os
7import re
8import hashlib
9import contextlib
10
11import logging
12
13logger = logging.getLogger('zim.newfs')
14
15
16from . import FS_SUPPORT_NON_LOCAL_FILE_SHARES
17
18from zim.errors import Error
19from zim.parsing import url_encode, url_decode
20
21
22is_url_re = re.compile('^\w{2,}:/')
23is_share_re = re.compile(r'^\\\\\w')
24
25
26if os.name == 'nt':
27	SEP = '\\' # os.path.sep can still be "/" under msys
28	_EOL = 'dos'
29else:
30	SEP = os.path.sep
31	_EOL = 'unix'
32
33
34
35
36class FileNotFoundError(Error):
37
38	# TODO - description and translation
39
40	def __init__(self, path):
41		self.file = path
42		path = path.path if hasattr(path, 'path') else path
43		Error.__init__(self, 'No such file or folder: %s' % path)
44
45
46class FileExistsError(Error):
47
48	# TODO - description and translation
49
50	def __init__(self, path):
51		self.file = path
52		path = path.path if hasattr(path, 'path') else path
53		Error.__init__(self, 'File or folder already exists: %s' % path)
54
55
56class FileUnicodeError(Error):
57	'''Error raised when there is an issue decoding the file contents.
58	Typically due to different encoding where UTF-8 is expected.
59	'''
60
61	def __init__(self, file, error):
62		self.file = file
63		self.error = error
64		self.msg = _('Could not read: %s') % file.path
65			# T: message for FileUnicodeError (%s is the file name)
66		self.description = _('This usually means the file contains invalid characters')
67			# T: message for FileUnicodeError
68		self.description += '\n\n' + _('Details') + ':\n' + str(error)
69			# T: label for detailed error
70
71
72class FileChangedError(Error):
73
74	# TODO - description and translation
75
76	def __init__(self, path):
77		self.file = path
78		path = path.path if hasattr(path, 'path') else path
79		Error.__init__(self, 'File changed on disk: %s' % path)
80
81
82class FileNotWritableError(Error):
83
84	# TODO - description and translation
85
86	def __init__(self, path):
87		self.file = path
88		path = path.path if hasattr(path, 'path') else path
89		Error.__init__(self, 'No permission to write file: %s' % path)
90
91
92class FolderNotEmptyError(Error):
93
94	# TODO - description and translation
95
96	def __init__(self, path):
97		path = path.path if hasattr(path, 'path') else path
98		Error.__init__(self, 'Folder not empty: %s' % path)
99
100
101
102def _split_file_url(url):
103	scheme, path = url.replace('\\', '/').split(':/', 1)
104	if scheme not in ('file', 'smb'):
105		raise ValueError('Not a file URL: %s' % url)
106
107	if path.startswith('/localhost/'): # exact 2 '/' before 'localhost'
108		path = path[11:]
109		isshare = False
110	elif scheme == 'smb' or re.match('^/\w', path): # exact 2 '/' before 'localhost'
111		isshare = True
112	else:
113		isshare = False # either 'file:/' or 'file:///'
114
115	return url_decode(path).strip('/').split('/'), isshare
116
117
118def _splitnormpath(path, force_rel=False):
119	# Takes either string or list of names and returns a normalized tuple
120	# Keeps leading "/" or "\\" to distinguish absolute paths
121	# Split must be robust for both "/" and "\" pathseperators regardless of
122	# the os we are running on !
123	if isinstance(path, str) and not force_rel:
124		if is_url_re.match(path):
125			makeroot = True
126			path, makeshare = _split_file_url(path)
127		else:
128			if path.startswith('~'):
129				makeroot = True
130				path = _os_expanduser(path)
131			else:
132				makeroot = path.startswith('/')
133			makeshare = re.match(r'^\\\\\w', path) is not None # exact 2 "\"
134			path = re.split(r'[/\\]+', path.strip('/\\'))
135	else:
136		makeshare = False
137		makeroot = False
138		if isinstance(path, str):
139			path = re.split(r'[/\\]+', path.strip('/\\'))
140
141	names = []
142	for name in path:
143		if name == '.' and names:
144			pass
145		elif name == '..':
146			if names and names[-1] != '..':
147				names.pop()
148			else:
149				names.append(name)
150				makeroot = False
151		else:
152			names.append(name)
153
154	if not names:
155		raise ValueError('path reduces to empty string')
156	elif makeshare:
157		names[0] = '\\\\' + names[0] # UNC host needs leading "\\"
158	elif makeroot and os.name != 'nt' and not names[0].startswith('/'):
159		names[0] = '/' + names[0]
160
161	return tuple(names)
162
163
164if os.name == 'nt':
165	def _joinabspath(names):
166		# first element must be either drive letter or UNC host
167		if not re.match(r'^(\w:|\\\\\w)', names[0]):
168			raise ValueError('Not an absolute path: %s' % '\\'.join(names))
169		else:
170			return '\\'.join(names) # Don't rely on SEP here, msys sets it to '/'
171
172	def _joinuri(names):
173		# first element must be either drive letter or UNC host
174		if not re.match(r'^(\w:|\\\\\w)', names[0]):
175			raise ValueError('Not an absolute path: %s' % '\\'.join(names))
176		elif re.match(r'^\w:$', names[0]): # Drive letter - e.g. file:///C:/foo
177			return 'file:///' + names[0] + '/' + url_encode('/'.join(names[1:]))
178		elif re.match(r'^\\\\\w+$', names[0]): # UNC path - e.g. file://host/share
179			return 'file://' + url_encode(names[0].strip('\\') + '/' + '/'.join(names[1:]))
180
181else:
182	def _joinabspath(names):
183		if names[0].startswith('\\\\'):
184			return '\\'.join(names) # Windows share drive
185		elif names[0].startswith('/'):
186			return '/'.join(names)
187		else:
188			raise ValueError('Not an absolute path: %s' % '/'.join(names))
189
190	def _joinuri(names):
191		if names[0][0] == '/':
192			return 'file://' + url_encode('/'.join(names))
193		else:
194			return 'file:///' + url_encode('/'.join(names))
195
196
197def _os_expanduser(path):
198	# Force usage of $HOME (especially on windows) instead of default logic
199	# in os.path.expanduser
200	# This depends on us setting HOME correctly based on USERPROFILE or similar
201	assert path.startswith('~')
202	home = os.environ['HOME']
203	parts = path.replace('\\', '/').strip('/').split('/')
204	if parts[0] == '~':
205		return SEP.join([home] + parts[1:])
206	else: # ~user
207		path = os.path.expanduser(path)
208		if path.startswith('~'):
209			# fallback
210			homedir = os.path.dirname(home)
211			return SEP.join([homedir, parts[0][1:]] + parts[1:])
212		else:
213			return path
214
215
216def is_abs_filepath(string):
217	try:
218		_joinabspath(_splitnormpath(string))
219	except ValueError:
220		return False
221	else:
222		return True
223
224
225class FilePath(object):
226	'''Class to represent filesystem paths and the base class for all
227	file and folder objects. Contains methods for file path manipulation.
228
229	File paths should always be absolute paths and can e.g. not start
230	with "../" or "./". On windows they should always start with either
231	a drive letter or a share drive. On unix they should start at the
232	root of the filesystem.
233
234	Paths can be handled either as strings representing a local file
235	path ("/" or "\" separated), strings representing a file uri
236	("file:///" or "smb://") or list of path names.
237	'''
238
239	__slots__ = ('path', 'pathnames', 'islocal')
240
241	def __init__(self, path):
242		if isinstance(path, (tuple, list, str)):
243			self.pathnames = _splitnormpath(path)
244			self.path = _joinabspath(self.pathnames)
245		elif isinstance(path, FilePath):
246			self.pathnames = path.pathnames
247			self.path = path.path
248		else:
249			raise TypeError('Cannot convert %r to a FilePath' % path)
250
251		self.islocal = not self.pathnames[0].startswith('\\\\')
252
253	def __repr__(self):
254		return "<%s: %s>" % (self.__class__.__name__, self.path)
255
256	def __str__(self):
257		return self.path
258
259	def __eq__(self, other):
260		return isinstance(other, self.__class__) and other.path == self.path
261
262	def serialize_zim_config(self):
263		'''Returns the file path as string for serializing the object'''
264		return self.userpath
265
266	@classmethod
267	def new_from_zim_config(klass, string):
268		'''Returns a new object based on the string representation for
269		that path
270		'''
271		return klass(string)
272
273	@property
274	def uri(self):
275		return _joinuri(self.pathnames)
276
277	@property
278	def basename(self):
279		return self.pathnames[-1]
280
281	@property
282	def dirname(self):
283		if len(self.pathnames) >= 2:
284			return _joinabspath(self.pathnames[:-1])
285		else:
286			return None
287
288	@property
289	def userpath(self):
290		if self.ischild(_HOME):
291			return '~' + SEP + self.relpath(_HOME)
292		else:
293			return self.path
294
295	def get_childpath(self, path):
296		assert path
297		names = _splitnormpath(path, force_rel=True)
298		if not names or names[0] == '..':
299			raise ValueError('Relative path not below parent: %s' % path)
300		return FilePath(self.pathnames + names)
301
302	def get_abspath(self, path):
303		'''Returns a C{FilePath} for C{path} where C{path} can be
304		either an absolute path or a path relative to this path
305		(either upward or downward - use L{get_childpath()} to only
306		get child paths).
307		'''
308		try:
309			return FilePath(path)
310		except ValueError:
311			# Not an absolute path
312			names = _splitnormpath(path)
313			return FilePath(self.pathnames + names)
314
315	def ischild(self, parent):
316		names = parent.pathnames
317		return len(names) < len(self.pathnames) \
318			and self.pathnames[:len(names)] == names
319
320	def relpath(self, start, allowupward=False):
321		if allowupward and not self.ischild(start):
322			parent = self.commonparent(start)
323			if parent is None:
324				raise ValueError('No common parent between %s and %s' % (self.path, start.path))
325			relpath = self.relpath(parent)
326			level_up = len(start.pathnames) - len(parent.pathnames)
327			return (('..' + SEP) * level_up) + relpath
328		else:
329			names = start.pathnames
330			if not self.pathnames[:len(names)] == names:
331				raise ValueError('Not a parent path: %s' % start.path)
332			return SEP.join(self.pathnames[len(names):])
333
334	def commonparent(self, other):
335		if self.pathnames[0] != other.pathnames[0]:
336			return None # also prevent other drives and other shares
337		elif self.ischild(other):
338			return other
339		elif other.ischild(self):
340			return self
341		else:
342			for i in range(1, len(self.pathnames)):
343				if self.pathnames[:i + 1] != other.pathnames[:i + 1]:
344					return FilePath(self.pathnames[:i])
345
346
347
348_HOME = FilePath('~')
349
350class FSObjectMeta(type):
351	'''This meta class allows implementing wrappers for file and folder objects
352	with C{isinstance()} checking the wrapped class as well as the wrapper.
353	Main use case is filtered version of folder object where e.g.
354	C{isinstance(folder, LocalFolder)} is used to check whether the underlying
355	resources exist external to the application.
356	'''
357
358	def __instancecheck__(cls, instance):
359		if instance.__class__ == cls or issubclass(instance.__class__, cls):
360			return True
361		elif hasattr(instance, '_inner_fs_object') and isinstance(instance._inner_fs_object, cls):
362			return True
363		else:
364			return False
365
366
367class FSObjectBase(FilePath, metaclass=FSObjectMeta):
368	'''Base class for L{File} and L{Folder}'''
369
370	def __init__(self, path, watcher=None):
371		FilePath.__init__(self, path)
372		if not FS_SUPPORT_NON_LOCAL_FILE_SHARES and not self.islocal:
373			raise ValueError('File system does not support non-local files')
374
375		self.watcher = watcher
376
377	def isequal(self, other):
378		'''Check file paths are equal based on stat results (inode
379		number etc.). Intended to detect when two files or dirs are the
380		same on case-insensitive filesystems. Does not explicitly check
381		the content is the same.
382		@param other: an other L{FilePath} object
383		@returns: C{True} when the two paths are one and the same file
384		'''
385		raise NotImplementedError
386
387	def parent(self):
388		raise NotImplementedError
389
390	def ctime(self):
391		raise NotImplementedError
392
393	def mtime(self):
394		raise NotImplementedError
395
396	def exists(self):
397		raise NotImplementedError
398
399	def iswritable(self):
400		raise NotImplementedError
401
402	def touch(self):
403		raise NotImplementedError
404
405	def moveto(self, other):
406		raise NotImplementedError
407
408	def copyto(self, other):
409		raise NotImplementedError
410
411	def _set_mtime(self, mtime):
412		raise NotImplementedError
413
414	def _moveto(self, other):
415		logger.debug('Cross FS type move %s --> %s', (self, other))
416		self._copyto(other)
417		self.remove()
418
419	def remove(self, cleanup=True):
420		raise NotImplementedError
421
422	def _cleanup(self):
423		try:
424			self.parent().remove()
425		except (ValueError, FolderNotEmptyError):
426			pass
427
428
429class Folder(FSObjectBase):
430	'''Base class for folder implementations. Cannot be intatiated
431	directly; use one of the subclasses instead. Main use outside of
432	this module is to check C{isinstance(object, Folder)}.
433	'''
434
435	def __init__(self, path):
436		raise NotImplementedError('This class is not meant to be instantiated directly')
437
438	def __iter__(self):
439		names = self.list_names()
440		return self._object_iter(names, True, True)
441
442	def list_files(self):
443		names = self.list_names()
444		return self._object_iter(names, True, False)
445
446	def list_folders(self):
447		names = self.list_names()
448		return self._object_iter(names, False, True)
449
450	def _object_iter(self, names, showfile, showdir):
451		raise NotImplementedError
452
453	def list_names(self, include_hidden=False):
454		raise NotImplementedError
455
456	def walk(self):
457		for child in self:
458			yield child
459			if isinstance(child, Folder):
460				for grandchild in child.walk():
461					yield grandchild
462
463	def file(self, path):
464		raise NotImplementedError
465
466	def folder(self, path):
467		raise NotImplementedError
468
469	def child(self, path):
470		raise NotImplementedError
471
472	def new_file(self, path, check=None):
473		'''Get a L{File} object for a new file below this folder.
474		Like L{file()} but guarantees the file does not yet exist by
475		adding sequential numbers if needed. So the resulting file
476		may have a modified name.
477
478		@param path: the relative file path
479		@param check: a function that can check and reject the choice before it
480		is given back
481		@returns: a L{File} object
482		'''
483		return self._new_child(path, self.file, check)
484
485	def new_folder(self, path, check=None):
486		'''Get a L{Folder} object for a new folder below this folder.
487		Like L{folder()} but guarantees the file does not yet exist by
488		adding sequential numbers if needed. So the resulting file
489		may have a modified name.
490
491		@param path: the relative file path
492		@param check: a function that can check and reject the choice before it
493		is given back
494		@returns: a L{Folder} object
495		'''
496		return self._new_child(path, self.folder, check)
497
498	def _new_child(self, path, factory, check=None):
499		p = self.get_childpath(path.replace('%', '%%'))
500		if '.' in p.basename:
501			basename, ext = p.basename.split('.', 1)
502			pattern = p.relpath(self)[:len(basename)] + '%03i.' + ext
503		else:
504			pattern = p.relpath(self) + '%03i'
505
506		i = 0
507		trypath = path
508		while i < 1000:
509			try:
510				file = self.child(trypath) # this way we catch both exiting files and folders
511			except FileNotFoundError:
512				child = factory(trypath)
513				if check is None or check(child):
514					return child
515				else:
516					logger.debug('File rejected by check "%s" trying increment', child.path)
517			else:
518				logger.debug('File exists "%s" trying increment', file.path)
519
520			i += 1
521			trypath = pattern % i
522		else:
523			raise Exception('Could not find new file for: %s' % path)
524
525	def remove_children(self):
526		'''Recursively remove everything below this folder .
527
528		B{WARNING:} This is quite powerful and can do a lot of damage
529		when executed for the wrong folder, so please make sure to double
530		check the dir is actually what you think it is before calling this.
531		'''
532		for name in self.list_names(include_hidden=True):
533			child = self.child(name)
534			assert child.path.startswith(self.path) # just to be real sure
535			if isinstance(child, Folder):
536				child.remove_children()
537			child.remove()
538
539	def _copyto(self, other):
540		if other.exists():
541			raise FileExistsError(other)
542		other.touch()
543		for child in self:
544			if isinstance(child, File):
545				child.copyto(other.file(child.basename))
546			else:
547				child.copyto(other.folder(child.basename))
548		other._set_mtime(self.mtime())
549
550
551xdgmime = None
552mimetypes = None
553if os.name == 'nt':
554	# On windows even if xdg is installed, the database is not (always)
555	# well initialized, so always fallback to mimetypes
556	import mimetypes
557else:
558	try:
559		import xdg.Mime as xdgmime
560	except ImportError:
561		logger.info("Can not import 'xdg.Mime' - falling back to 'mimetypes'")
562		import mimetypes
563
564#: Extensions to determine image mimetypes - used in L{File.isimage()}
565IMAGE_EXTENSIONS = (
566	# Gleaned from Gdk.get_formats()
567	'bmp', # image/bmp
568	'gif', # image/gif
569	'icns', # image/x-icns
570	'ico', # image/x-icon
571	'cur', # image/x-icon
572	'jp2', # image/jp2
573	'jpc', # image/jp2
574	'jpx', # image/jp2
575	'j2k', # image/jp2
576	'jpf', # image/jp2
577	'jpeg', # image/jpeg
578	'jpe', # image/jpeg
579	'jpg', # image/jpeg
580	'pcx', # image/x-pcx
581	'png', # image/png
582	'pnm', # image/x-portable-anymap
583	'pbm', # image/x-portable-anymap
584	'pgm', # image/x-portable-anymap
585	'ppm', # image/x-portable-anymap
586	'ras', # image/x-cmu-raster
587	'tga', # image/x-tga
588	'targa', # image/x-tga
589	'tiff', # image/tiff
590	'tif', # image/tiff
591	'wbmp', # image/vnd.wap.wbmp
592	'xbm', # image/x-xbitmap
593	'xpm', # image/x-xpixmap
594	'wmf', # image/x-wmf
595	'apm', # image/x-wmf
596	'svg', # image/svg+xml
597	'svgz', # image/svg+xml
598	'svg.gz', # image/svg+xml
599	# Custom additions
600	'webp', # image/webp
601)
602
603
604def _md5(content):
605	# Provide encoded content to avoid double work
606	if isinstance(content, str):
607		content = (content,)
608
609	m = hashlib.md5()
610	for l in content:
611		m.update(l.encode('UTF-8'))
612	return m.digest()
613
614
615class File(FSObjectBase):
616	'''Base class for folder implementations. Cannot be intatiated
617	directly; use one of the subclasses instead. Main use outside of
618	this module is to check C{isinstance(object, Folder)}.
619	'''
620
621	def __init__(self, path, endofline=_EOL):
622		raise NotImplementedError('This class is not meant to be instantiated directly')
623
624	def __iter__(self):
625		return iter(self.readlines())
626
627	def isimage(self):
628		'''Check if this is an image file. Convenience method that
629		works even when no real mime-type suport is available.
630		If this method returns C{True} it is no guarantee
631		this image type is actually supported by Gtk.
632		@returns: C{True} when this is an image file
633		'''
634		# Quick shortcut to be able to load images in the gui even if
635		# we have no proper mimetype support
636		if '.' in self.basename:
637			_, ext = self.basename.rsplit('.', 1)
638			if ext.lower() in IMAGE_EXTENSIONS:
639				return True
640
641		return self.mimetype().startswith('image/')
642
643	def mimetype(self):
644		'''Get the mime-type for this file.
645		Will use the XDG mimetype system if available, otherwise
646		fallsback to the standard library C{mimetypes}.
647		@returns: the mimetype as a string, e.g. "text/plain"
648		'''
649		if self._mimetype is None:
650			if xdgmime:
651				mimetype = xdgmime.get_type(self.path, name_pri=80)
652				self._mimetype = str(mimetype)
653			else:
654				mimetype, encoding = mimetypes.guess_type(self.path, strict=False)
655				if encoding == 'gzip':
656					mimetype = 'application/x-gzip'
657				elif encoding == 'bzip2':
658					mimetype = 'application/x-bzip2'
659				elif encoding == 'compress':
660					mimetype = 'application/x-compress'
661				self._mimetype = mimetype or 'application/octet-stream'
662
663		return self._mimetype
664
665	def size(self):
666		raise NotImplementedError
667
668	def read(self, size=-1):
669		raise NotImplementedError
670
671	def readline(self, size=-1):
672		raise NotImplementedError
673
674	def readlines(self):
675		raise NotImplementedError
676
677	def read_binary(self):
678		raise NotImplementedError
679
680	def touch(self):
681		if not self.exists():
682			self.write('')
683
684	def write(self, text):
685		raise NotImplementedError
686
687	def writelines(self, lines):
688		raise NotImplementedError
689
690	def write_binary(self, data):
691		raise NotImplementedError
692
693	@contextlib.contextmanager
694	def _write_decoration(self):
695		existed = self.exists()
696		if not existed:
697			self.parent().touch()
698		elif not self.iswritable():
699			raise FileNotWritableError(self)
700
701		yield
702
703		if self.watcher:
704			if existed:
705				self.watcher.emit('changed', self)
706			else:
707				self.watcher.emit('created', self)
708
709	def read_with_etag(self):
710		return self._read_with_etag(self.read)
711
712	def readlines_with_etag(self):
713		return self._read_with_etag(self.readlines)
714
715	def _read_with_etag(self, func):
716		mtime = self.mtime() # Get before read!
717		content = func()
718		etag = (mtime, _md5(content))
719		return content, etag
720
721	def write_with_etag(self, text, etag):
722		return self._write_with_etag(self.write, text, etag)
723
724	def writelines_with_etag(self, lines, etag):
725		return self._write_with_etag(self.writelines, lines, etag)
726
727	def _write_with_etag(self, func, content, etag):
728		# TODO, to make rock-solid would also need to lock the file
729		# before etag check and release after write
730
731		if not self.exists():
732			# Goal is to prevent overwriting new content. If the file
733			# does not yet exist or went missing, just write it anyway.
734			pass
735		else:
736			if not self.verify_etag(etag):
737				raise FileChangedError(self)
738
739		func(content)
740		return (self.mtime(), _md5(content))
741
742	def verify_etag(self, etag):
743		if isinstance(etag, tuple) and len(etag) == 2:
744			mtime = self.mtime()
745			if etag[0] != mtime:
746				# mtime fails .. lets see about md5
747				md5 = _md5(self.read())
748				return etag[1] == md5
749			else:
750				return True
751		else:
752			raise AssertionError('Invalid etag: %r' % etag)
753
754	def _copyto(self, other):
755		if other.exists():
756			raise FileExistsError(other)
757		other.write_binary(self.read_binary())
758		other._set_mtime(self.mtime())
759