1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*-
2#
3# Copyright 2002 Ben Escoto <ben@emerose.org>
4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
5#
6# This file is part of duplicity.
7#
8# Duplicity is free software; you can redistribute it and/or modify it
9# under the terms of the GNU General Public License as published by the
10# Free Software Foundation; either version 2 of the License, or (at your
11# option) any later version.
12#
13# Duplicity is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with duplicity; if not, write to the Free Software Foundation,
20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22u"""
23Provides a common interface to all backends and certain sevices
24intended to be used by the backends themselves.
25"""
26
27from future import standard_library
28standard_library.install_aliases()
29from builtins import str
30from builtins import range
31from builtins import object
32
33import errno
34import os
35import sys
36import time
37import re
38import getpass
39import re
40import urllib.request  # pylint: disable=import-error
41import urllib.parse  # pylint: disable=import-error
42import urllib.error  # pylint: disable=import-error
43
44from duplicity import dup_temp
45from duplicity import file_naming
46from duplicity import config
47from duplicity import log
48from duplicity import path
49from duplicity import util
50
51from duplicity.util import exception_traceback
52
53from duplicity.errors import BackendException
54from duplicity.errors import FatalBackendException
55from duplicity.errors import TemporaryLoadException
56from duplicity.errors import ConflictingScheme
57from duplicity.errors import InvalidBackendURL
58from duplicity.errors import UnsupportedBackendScheme
59
60import duplicity.backends
61
62_backends = {}
63_backend_prefixes = {}
64_last_exception = None
65
66# These URL schemes have a backend with a notion of an RFC "network location".
67# The 'file' and 's3+http' schemes should not be in this list.
68# 'http' and 'https' are not actually used for duplicity backend urls, but are needed
69# in order to properly support urls returned from some webdav servers. adding them here
70# is a hack. we should instead not stomp on the url parsing module to begin with.
71#
72# This looks similar to urlparse's 'uses_netloc' list, but urlparse doesn't use
73# that list for parsing, only creating urls.  And doesn't include our custom
74# schemes anyway.  So we keep our own here for our own use.
75#
76# NOTE: this is filled by the respective backends during registering
77uses_netloc = []
78
79
80def import_backends():
81    u"""
82    Import files in the duplicity/backends directory where
83    the filename ends in 'backend.py' and ignore the rest.
84
85    @rtype: void
86    @return: void
87    """
88    path = duplicity.backends.__path__[0]
89    assert path.endswith(u"duplicity/backends"), duplicity.backends.__path__
90
91    files = os.listdir(path)
92    files.sort()
93    for fn in files:
94        if fn.endswith(u"backend.py"):
95            fn = fn[:-3]
96            imp = u"duplicity.backends.%s" % (fn,)
97            try:
98                __import__(imp)
99                res = u"Succeeded"
100            except Exception:
101                res = u"Failed: " + str(sys.exc_info()[1])
102            log.Log(_(u"Import of %s %s") % (imp, res), log.INFO)
103        else:
104            continue
105
106
107def register_backend(scheme, backend_factory):
108    u"""
109    Register a given backend factory responsible for URL:s with the
110    given scheme.
111
112    The backend must be a callable which, when called with a URL as
113    the single parameter, returns an object implementing the backend
114    protocol (i.e., a subclass of Backend).
115
116    Typically the callable will be the Backend subclass itself.
117
118    This function is not thread-safe and is intended to be called
119    during module importation or start-up.
120    """
121    global _backends
122
123    assert callable(backend_factory), u"backend factory must be callable"
124
125    if scheme in _backends:
126        raise ConflictingScheme(u"the scheme %s already has a backend "
127                                u"associated with it"
128                                u"" % (scheme,))
129
130    _backends[scheme] = backend_factory
131
132
133def register_backend_prefix(scheme, backend_factory):
134    u"""
135    Register a given backend factory responsible for URL:s with the
136    given scheme prefix.
137
138    The backend must be a callable which, when called with a URL as
139    the single parameter, returns an object implementing the backend
140    protocol (i.e., a subclass of Backend).
141
142    Typically the callable will be the Backend subclass itself.
143
144    This function is not thread-safe and is intended to be called
145    during module importation or start-up.
146    """
147    global _backend_prefixes
148
149    assert callable(backend_factory), u"backend factory must be callable"
150
151    if scheme in _backend_prefixes:
152        raise ConflictingScheme(u"the prefix %s already has a backend "
153                                u"associated with it"
154                                u"" % (scheme,))
155
156    _backend_prefixes[scheme] = backend_factory
157
158
159def strip_prefix(url_string, prefix_scheme):
160    u"""
161    strip the prefix from a string e.g. par2+ftp://... -> ftp://...
162    """
163    return re.sub(r'(?i)^' + re.escape(prefix_scheme) + r'\+', r'', url_string)
164
165
166def is_backend_url(url_string):
167    u"""
168    @return Whether the given string looks like a backend URL.
169    """
170    pu = ParsedUrl(url_string)
171
172    # Be verbose to actually return True/False rather than string.
173    if pu.scheme:
174        return True
175    else:
176        return False
177
178
179def get_backend_object(url_string):
180    u"""
181    Find the right backend class instance for the given URL, or return None
182    if the given string looks like a local path rather than a URL.
183
184    Raise InvalidBackendURL if the URL is not a valid URL.
185    """
186    if not is_backend_url(url_string):
187        return None
188
189    global _backends, _backend_prefixes
190
191    pu = ParsedUrl(url_string)
192    assert pu.scheme, u"should be a backend url according to is_backend_url"
193
194    factory = None
195
196    for prefix in _backend_prefixes:
197        if url_string.startswith(prefix + u'+'):
198            factory = _backend_prefixes[prefix]
199            pu = ParsedUrl(strip_prefix(url_string, prefix))
200            break
201
202    if factory is None:
203        if pu.scheme not in _backends:
204            raise UnsupportedBackendScheme(url_string)
205        else:
206            factory = _backends[pu.scheme]
207
208    try:
209        return factory(pu)
210    except ImportError:
211        raise BackendException(_(u"Could not initialize backend: %s") % str(sys.exc_info()[1]))
212
213
214def get_backend(url_string):
215    u"""
216    Instantiate a backend suitable for the given URL, or return None
217    if the given string looks like a local path rather than a URL.
218
219    Raise InvalidBackendURL if the URL is not a valid URL.
220    """
221    if config.use_gio:
222        url_string = u'gio+' + url_string
223    obj = get_backend_object(url_string)
224    if obj:
225        obj = BackendWrapper(obj)
226    return obj
227
228
229class ParsedUrl(object):
230    u"""
231    Parse the given URL as a duplicity backend URL.
232
233    Returns the data of a parsed URL with the same names as that of
234    the standard urlparse.urlparse() except that all values have been
235    resolved rather than deferred.  There are no get_* members.  This
236    makes sure that the URL parsing errors are detected early.
237
238    Raise InvalidBackendURL on invalid URL's
239    """
240    def __init__(self, url_string):
241        self.url_string = url_string
242
243        # Python < 2.6.5 still examine urlparse.uses_netlock when parsing urls,
244        # so stuff our custom list in there before we parse.
245        urllib.parse.uses_netloc = uses_netloc
246
247        # While useful in some cases, the fact is that the urlparser makes
248        # all the properties in the URL deferred or lazy.  This means that
249        # problems don't get detected till called.  We'll try to trap those
250        # problems here, so they will be caught early.
251
252        try:
253            pu = urllib.parse.urlparse(url_string)
254        except Exception:
255            raise InvalidBackendURL(u"Syntax error in: %s" % url_string)
256
257        try:
258            self.scheme = pu.scheme
259        except Exception:
260            raise InvalidBackendURL(u"Syntax error (scheme) in: %s" % url_string)
261
262        try:
263            self.netloc = pu.netloc
264        except Exception:
265            raise InvalidBackendURL(u"Syntax error (netloc) in: %s" % url_string)
266
267        try:
268            self.path = pu.path
269            if self.path:
270                self.path = urllib.parse.unquote(self.path)
271        except Exception:
272            raise InvalidBackendURL(u"Syntax error (path) in: %s" % url_string)
273
274        try:
275            self.username = pu.username
276        except Exception:
277            raise InvalidBackendURL(u"Syntax error (username) in: %s" % url_string)
278        if self.username:
279            self.username = urllib.parse.unquote(pu.username)
280        else:
281            self.username = None
282
283        try:
284            self.password = pu.password
285        except Exception:
286            raise InvalidBackendURL(u"Syntax error (password) in: %s" % url_string)
287        if self.password:
288            self.password = urllib.parse.unquote(self.password)
289        else:
290            self.password = None
291
292        try:
293            self.hostname = pu.hostname
294        except Exception:
295            raise InvalidBackendURL(u"Syntax error (hostname) in: %s" % url_string)
296
297        try:
298            self.query = pu.query
299        except Exception:
300            raise InvalidBackendURL(u"Syntax error (query) in: %s" % url_string)
301        if self.query:
302            self.query_args = urllib.parse.parse_qs(self.query)
303        else:
304            self.query = None
305            self.query_args = {}
306
307        # init to None, overwrite with actual value on success
308        self.port = None
309        try:
310            self.port = pu.port
311        except Exception:  # not raised in python2.7, just returns None
312            # TODO: remove after dropping python 2.7 support
313            if self.scheme in [u'rclone']:
314                pass
315            # old style rsync://host::[/]dest, are still valid, though they contain no port
316            elif not (u'rsync' in self.scheme and re.search(u'::[^:]*$', self.url_string)):
317                raise InvalidBackendURL(u"Syntax error (port) in: %s A%s B%s C%s" %
318                                        (url_string, (u'rsync' in self.scheme),
319                                         re.search(u'::[^:]+$', self.netloc), self.netloc))
320
321        # Our URL system uses two slashes more than urlparse's does when using
322        # non-netloc URLs.  And we want to make sure that if urlparse assuming
323        # a netloc where we don't want one, that we correct it.
324        if self.scheme not in uses_netloc:
325            if self.netloc:
326                self.path = u'//' + self.netloc + self.path
327                self.netloc = u''
328                self.hostname = None
329            elif not self.path.startswith(u'//') and self.path.startswith(u'/'):
330                self.path = u'//' + self.path
331
332        # This happens for implicit local paths.
333        if not self.scheme:
334            return
335
336        # Our backends do not handle implicit hosts.
337        if self.scheme in uses_netloc and not self.hostname:
338            raise InvalidBackendURL(u"Missing hostname in a backend URL which "
339                                    u"requires an explicit hostname: %s"
340                                    u"" % (url_string))
341
342        # Our backends do not handle implicit relative paths.
343        if self.scheme not in uses_netloc and not self.path.startswith(u'//'):
344            raise InvalidBackendURL(u"missing // - relative paths not supported "
345                                    u"for scheme %s: %s"
346                                    u"" % (self.scheme, url_string))
347
348    def geturl(self):
349        return self.url_string
350
351
352def strip_auth_from_url(parsed_url):
353    u"""Return a URL from a urlparse object without a username or password."""
354
355    clean_url = re.sub(u'^([^:/]+://)(.*@)?(.*)', r'\1\3', parsed_url.geturl())
356    return clean_url
357
358
359def _get_code_from_exception(backend, operation, e):
360    if isinstance(e, BackendException) and e.code != log.ErrorCode.backend_error:
361        return e.code
362    elif hasattr(backend, u'_error_code'):
363        return backend._error_code(operation, e) or log.ErrorCode.backend_error
364    elif hasattr(e, u'errno'):
365        # A few backends return such errors (local, paramiko, etc)
366        if e.errno == errno.EACCES:
367            return log.ErrorCode.backend_permission_denied
368        elif e.errno == errno.ENOENT:
369            return log.ErrorCode.backend_not_found
370        elif e.errno == errno.ENOSPC:
371            return log.ErrorCode.backend_no_space
372    return log.ErrorCode.backend_error
373
374
375def retry(operation, fatal=True):
376    # Decorators with arguments introduce a new level of indirection.  So we
377    # have to return a decorator function (which itself returns a function!)
378    def outer_retry(fn):
379        def inner_retry(self, *args):
380            global _last_exception
381            errors_fatal, errors_default = config.are_errors_fatal.get(operation, (True, None))
382            for n in range(1, config.num_retries + 1):
383                try:
384                    return fn(self, *args)
385                except FatalBackendException as e:
386                    _last_exception = e
387                    if not errors_fatal:
388                        # backend wants to report and ignore errors
389                        return errors_default
390                    else:
391                        # die on fatal errors
392                        raise e
393                except Exception as e:
394                    _last_exception = e
395                    if not errors_fatal:
396                        # backend wants to report and ignore errors
397                        return errors_default
398                    else:
399                        # retry on anything else
400                        log.Debug(_(u"Backtrace of previous error: %s")
401                                  % exception_traceback())
402                        at_end = n == config.num_retries
403                        code = _get_code_from_exception(self.backend, operation, e)
404                        if code == log.ErrorCode.backend_not_found:
405                            # If we tried to do something, but the file just isn't there,
406                            # no need to retry.
407                            at_end = True
408                        if at_end and fatal:
409                            def make_filename(f):
410                                if isinstance(f, path.ROPath):
411                                    return util.escape(f.uc_name)
412                                else:
413                                    return util.escape(f)
414                            extra = u' '.join([operation] + [make_filename(x) for x in args
415                                                             if (x and isinstance(x, str))])
416                            log.FatalError(_(u"Giving up after %s attempts. %s: %s")
417                                           % (n, e.__class__.__name__,
418                                              util.uexc(e)), code=code, extra=extra)
419                        else:
420                            log.Warn(_(u"Attempt %s failed. %s: %s")
421                                     % (n, e.__class__.__name__, util.uexc(e)))
422                        if not at_end:
423                            if isinstance(e, TemporaryLoadException):
424                                time.sleep(3 * config.backend_retry_delay)  # wait longer before trying again
425                            else:
426                                time.sleep(config.backend_retry_delay)  # wait a bit before trying again
427                            if hasattr(self.backend, u'_retry_cleanup'):
428                                self.backend._retry_cleanup()
429
430        return inner_retry
431    return outer_retry
432
433
434class Backend(object):
435    u"""
436    See README in backends directory for information on how to write a backend.
437    """
438    def __init__(self, parsed_url):
439        self.parsed_url = parsed_url
440
441    u""" use getpass by default, inherited backends may overwrite this behaviour """
442    use_getpass = True
443
444    def get_password(self):
445        u"""
446        Return a password for authentication purposes. The password
447        will be obtained from the backend URL, the environment, by
448        asking the user, or by some other method. When applicable, the
449        result will be cached for future invocations.
450        """
451        if self.parsed_url.password:
452            return self.parsed_url.password
453
454        try:
455            password = os.environ[u'FTP_PASSWORD']
456        except KeyError:
457            if self.use_getpass:
458                password = getpass.getpass(u"Password for '%s@%s': " %
459                                           (self.parsed_url.username, self.parsed_url.hostname))
460                os.environ[u'FTP_PASSWORD'] = password
461            else:
462                password = None
463        return password
464
465    def munge_password(self, commandline):
466        u"""
467        Remove password from commandline by substituting the password
468        found in the URL, if any, with a generic place-holder.
469
470        This is intended for display purposes only, and it is not
471        guaranteed that the results are correct (i.e., more than just
472        the ':password@' may be substituted.
473        """
474        if self.parsed_url.password:
475            return re.sub(r'(:([^\s:/@]+)@([^\s@]+))', r':*****@\3', commandline)
476        else:
477            return commandline
478
479    def __subprocess_popen(self, args):
480        u"""
481        For internal use.
482        Execute the given command line, interpreted as a shell command.
483        Returns int Exitcode, string StdOut, string StdErr
484        """
485        from subprocess import Popen, PIPE
486
487        args[0] = util.which(args[0])
488        p = Popen(args, stdout=PIPE, stderr=PIPE, universal_newlines=True)
489        stdout, stderr = p.communicate()
490
491        return p.returncode, stdout, stderr
492
493    u""" a dictionary for breaking exceptions, syntax is
494        { 'command' : [ code1, code2 ], ... } see ftpbackend for an example """
495    popen_breaks = {}
496
497    def subprocess_popen(self, commandline):
498        u"""
499        Execute the given command line with error check.
500        Returns int Exitcode, string StdOut, string StdErr
501
502        Raise a BackendException on failure.
503        """
504        import shlex
505
506        if isinstance(commandline, (list, tuple)):
507            logstr = u' '.join(commandline)
508            args = commandline
509        else:
510            logstr = commandline
511            args = shlex.split(commandline)
512
513        logstr = self.munge_password(logstr)
514        log.Info(_(u"Reading results of '%s'") % logstr)
515
516        result, stdout, stderr = self.__subprocess_popen(args)
517        if result != 0:
518            try:
519                ignores = self.popen_breaks[args[0]]
520                ignores.index(result)
521                u""" ignore a predefined set of error codes """
522                return 0, u'', u''
523            except (KeyError, ValueError):
524                raise BackendException(u"Error running '%s': returned %d, with output:\n%s" %
525                                       (logstr, result, stdout + u'\n' + stderr + u'\n'))
526        return result, stdout, stderr
527
528
529class BackendWrapper(object):
530    u"""
531    Represents a generic duplicity backend, capable of storing and
532    retrieving files.
533    """
534
535    def __init__(self, backend):
536        self.backend = backend
537
538    def __do_put(self, source_path, remote_filename):
539        if hasattr(self.backend, u'_put'):
540            log.Info(_(u"Writing %s") % util.fsdecode(remote_filename))
541            self.backend._put(source_path, remote_filename)
542        else:
543            raise NotImplementedError()
544
545    @retry(u'put', fatal=True)
546    def put(self, source_path, remote_filename=None):
547        u"""
548        Transfer source_path (Path object) to remote_filename (string)
549
550        If remote_filename is None, get the filename from the last
551        path component of pathname.
552        """
553        if not remote_filename:
554            remote_filename = source_path.get_filename()
555        self.__do_put(source_path, remote_filename)
556
557    @retry(u'move', fatal=True)
558    def move(self, source_path, remote_filename=None):
559        u"""
560        Move source_path (Path object) to remote_filename (string)
561
562        Same as put(), but unlinks source_path in the process.  This allows the
563        local backend to do this more efficiently using rename.
564        """
565        if not remote_filename:
566            remote_filename = source_path.get_filename()
567        if hasattr(self.backend, u'_move'):
568            if self.backend._move(source_path, remote_filename) is not False:
569                source_path.setdata()
570                return
571        self.__do_put(source_path, remote_filename)
572        source_path.delete()
573
574    @retry(u'get', fatal=True)
575    def get(self, remote_filename, local_path):
576        u"""Retrieve remote_filename and place in local_path"""
577        if hasattr(self.backend, u'_get'):
578            self.backend._get(remote_filename, local_path)
579            local_path.setdata()
580            if not local_path.exists():
581                raise BackendException(_(u"File %s not found locally after get "
582                                         u"from backend") % local_path.uc_name)
583        else:
584            raise NotImplementedError()
585
586    @retry(u'list', fatal=True)
587    def list(self):
588        u"""
589        Return list of filenames (byte strings) present in backend
590        """
591        def tobytes(filename):
592            u"Convert a (maybe unicode) filename to bytes"
593            if isinstance(filename, str):
594                # There shouldn't be any encoding errors for files we care
595                # about, since duplicity filenames are ascii.  But user files
596                # may be in the same directory.  So just replace characters.
597                return util.fsencode(filename)
598            else:
599                return filename
600
601        if hasattr(self.backend, u'_list'):
602            # Make sure that duplicity internals only ever see byte strings
603            # for filenames, no matter what the backend thinks it is talking.
604            return [tobytes(x) for x in self.backend._list()]
605        else:
606            raise NotImplementedError()
607
608    def delete(self, filename_list):
609        u"""
610        Delete each filename in filename_list, in order if possible.
611        """
612        assert not isinstance(filename_list, bytes)
613        if hasattr(self.backend, u'_delete_list'):
614            self._do_delete_list(filename_list)
615        elif hasattr(self.backend, u'_delete'):
616            for filename in filename_list:
617                self._do_delete(filename)
618        else:
619            raise NotImplementedError()
620
621    @retry(u'delete', fatal=False)
622    def _do_delete_list(self, filename_list):
623        while filename_list:
624            sublist = filename_list[:100]
625            self.backend._delete_list(sublist)
626            filename_list = filename_list[100:]
627
628    @retry(u'delete', fatal=False)
629    def _do_delete(self, filename):
630        self.backend._delete(filename)
631
632    # Should never cause FatalError.
633    # Returns a dictionary of dictionaries.  The outer dictionary maps
634    # filenames to metadata dictionaries.  Supported metadata are:
635    #
636    # 'size': if >= 0, size of file
637    #         if -1, file is not found
638    #         if None, error querying file
639    #
640    # Returned dictionary is guaranteed to contain a metadata dictionary for
641    # each filename, and all metadata are guaranteed to be present.
642    def query_info(self, filename_list):
643        u"""
644        Return metadata about each filename in filename_list
645        """
646        info = {}
647        if hasattr(self.backend, u'_query_list'):
648            info = self._do_query_list(filename_list)
649            if info is None:
650                info = {}
651        elif hasattr(self.backend, u'_query'):
652            for filename in filename_list:
653                info[filename] = self._do_query(filename)
654
655        # Fill out any missing entries (may happen if backend has no support
656        # or its query_list support is lazy)
657        for filename in filename_list:
658            if filename not in info or info[filename] is None:
659                info[filename] = {}
660            for metadata in [u'size']:
661                info[filename].setdefault(metadata, None)
662
663        return info
664
665    @retry(u'query', fatal=False)
666    def _do_query_list(self, filename_list):
667        info = self.backend._query_list(filename_list)
668        if info is None:
669            info = {}
670        return info
671
672    @retry(u'query', fatal=False)
673    def _do_query(self, filename):
674        try:
675            return self.backend._query(filename)
676        except Exception as e:
677            code = _get_code_from_exception(self.backend, u'query', e)
678            if code == log.ErrorCode.backend_not_found:
679                return {u'size': -1}
680            else:
681                raise e
682
683    def close(self):
684        u"""
685        Close the backend, releasing any resources held and
686        invalidating any file objects obtained from the backend.
687        """
688        if hasattr(self.backend, u'_close'):
689            self.backend._close()
690
691    def get_fileobj_read(self, filename, parseresults=None):
692        u"""
693        Return fileobject opened for reading of filename on backend
694
695        The file will be downloaded first into a temp file.  When the
696        returned fileobj is closed, the temp file will be deleted.
697        """
698        if not parseresults:
699            parseresults = file_naming.parse(filename)
700            assert parseresults, u"Filename not correctly parsed"
701        tdp = dup_temp.new_tempduppath(parseresults)
702        self.get(filename, tdp)
703        tdp.setdata()
704        return tdp.filtered_open_with_delete(u"rb")
705
706    def get_data(self, filename, parseresults=None):
707        u"""
708        Retrieve a file from backend, process it, return contents.
709        """
710        fin = self.get_fileobj_read(filename, parseresults)
711        buf = fin.read()
712        assert not fin.close()
713        return buf
714