1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*- 2# 3# Copyright 2002 Ben Escoto <ben@emerose.org> 4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 5# 6# This file is part of duplicity. 7# 8# Duplicity is free software; you can redistribute it and/or modify it 9# under the terms of the GNU General Public License as published by the 10# Free Software Foundation; either version 2 of the License, or (at your 11# option) any later version. 12# 13# Duplicity is distributed in the hope that it will be useful, but 14# WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16# General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with duplicity; if not, write to the Free Software Foundation, 20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 22u""" 23Provides a common interface to all backends and certain sevices 24intended to be used by the backends themselves. 25""" 26 27from future import standard_library 28standard_library.install_aliases() 29from builtins import str 30from builtins import range 31from builtins import object 32 33import errno 34import os 35import sys 36import time 37import re 38import getpass 39import re 40import urllib.request # pylint: disable=import-error 41import urllib.parse # pylint: disable=import-error 42import urllib.error # pylint: disable=import-error 43 44from duplicity import dup_temp 45from duplicity import file_naming 46from duplicity import config 47from duplicity import log 48from duplicity import path 49from duplicity import util 50 51from duplicity.util import exception_traceback 52 53from duplicity.errors import BackendException 54from duplicity.errors import FatalBackendException 55from duplicity.errors import TemporaryLoadException 56from duplicity.errors import ConflictingScheme 57from duplicity.errors import InvalidBackendURL 58from duplicity.errors import UnsupportedBackendScheme 59 60import duplicity.backends 61 62_backends = {} 63_backend_prefixes = {} 64_last_exception = None 65 66# These URL schemes have a backend with a notion of an RFC "network location". 67# The 'file' and 's3+http' schemes should not be in this list. 68# 'http' and 'https' are not actually used for duplicity backend urls, but are needed 69# in order to properly support urls returned from some webdav servers. adding them here 70# is a hack. we should instead not stomp on the url parsing module to begin with. 71# 72# This looks similar to urlparse's 'uses_netloc' list, but urlparse doesn't use 73# that list for parsing, only creating urls. And doesn't include our custom 74# schemes anyway. So we keep our own here for our own use. 75# 76# NOTE: this is filled by the respective backends during registering 77uses_netloc = [] 78 79 80def import_backends(): 81 u""" 82 Import files in the duplicity/backends directory where 83 the filename ends in 'backend.py' and ignore the rest. 84 85 @rtype: void 86 @return: void 87 """ 88 path = duplicity.backends.__path__[0] 89 assert path.endswith(u"duplicity/backends"), duplicity.backends.__path__ 90 91 files = os.listdir(path) 92 files.sort() 93 for fn in files: 94 if fn.endswith(u"backend.py"): 95 fn = fn[:-3] 96 imp = u"duplicity.backends.%s" % (fn,) 97 try: 98 __import__(imp) 99 res = u"Succeeded" 100 except Exception: 101 res = u"Failed: " + str(sys.exc_info()[1]) 102 log.Log(_(u"Import of %s %s") % (imp, res), log.INFO) 103 else: 104 continue 105 106 107def register_backend(scheme, backend_factory): 108 u""" 109 Register a given backend factory responsible for URL:s with the 110 given scheme. 111 112 The backend must be a callable which, when called with a URL as 113 the single parameter, returns an object implementing the backend 114 protocol (i.e., a subclass of Backend). 115 116 Typically the callable will be the Backend subclass itself. 117 118 This function is not thread-safe and is intended to be called 119 during module importation or start-up. 120 """ 121 global _backends 122 123 assert callable(backend_factory), u"backend factory must be callable" 124 125 if scheme in _backends: 126 raise ConflictingScheme(u"the scheme %s already has a backend " 127 u"associated with it" 128 u"" % (scheme,)) 129 130 _backends[scheme] = backend_factory 131 132 133def register_backend_prefix(scheme, backend_factory): 134 u""" 135 Register a given backend factory responsible for URL:s with the 136 given scheme prefix. 137 138 The backend must be a callable which, when called with a URL as 139 the single parameter, returns an object implementing the backend 140 protocol (i.e., a subclass of Backend). 141 142 Typically the callable will be the Backend subclass itself. 143 144 This function is not thread-safe and is intended to be called 145 during module importation or start-up. 146 """ 147 global _backend_prefixes 148 149 assert callable(backend_factory), u"backend factory must be callable" 150 151 if scheme in _backend_prefixes: 152 raise ConflictingScheme(u"the prefix %s already has a backend " 153 u"associated with it" 154 u"" % (scheme,)) 155 156 _backend_prefixes[scheme] = backend_factory 157 158 159def strip_prefix(url_string, prefix_scheme): 160 u""" 161 strip the prefix from a string e.g. par2+ftp://... -> ftp://... 162 """ 163 return re.sub(r'(?i)^' + re.escape(prefix_scheme) + r'\+', r'', url_string) 164 165 166def is_backend_url(url_string): 167 u""" 168 @return Whether the given string looks like a backend URL. 169 """ 170 pu = ParsedUrl(url_string) 171 172 # Be verbose to actually return True/False rather than string. 173 if pu.scheme: 174 return True 175 else: 176 return False 177 178 179def get_backend_object(url_string): 180 u""" 181 Find the right backend class instance for the given URL, or return None 182 if the given string looks like a local path rather than a URL. 183 184 Raise InvalidBackendURL if the URL is not a valid URL. 185 """ 186 if not is_backend_url(url_string): 187 return None 188 189 global _backends, _backend_prefixes 190 191 pu = ParsedUrl(url_string) 192 assert pu.scheme, u"should be a backend url according to is_backend_url" 193 194 factory = None 195 196 for prefix in _backend_prefixes: 197 if url_string.startswith(prefix + u'+'): 198 factory = _backend_prefixes[prefix] 199 pu = ParsedUrl(strip_prefix(url_string, prefix)) 200 break 201 202 if factory is None: 203 if pu.scheme not in _backends: 204 raise UnsupportedBackendScheme(url_string) 205 else: 206 factory = _backends[pu.scheme] 207 208 try: 209 return factory(pu) 210 except ImportError: 211 raise BackendException(_(u"Could not initialize backend: %s") % str(sys.exc_info()[1])) 212 213 214def get_backend(url_string): 215 u""" 216 Instantiate a backend suitable for the given URL, or return None 217 if the given string looks like a local path rather than a URL. 218 219 Raise InvalidBackendURL if the URL is not a valid URL. 220 """ 221 if config.use_gio: 222 url_string = u'gio+' + url_string 223 obj = get_backend_object(url_string) 224 if obj: 225 obj = BackendWrapper(obj) 226 return obj 227 228 229class ParsedUrl(object): 230 u""" 231 Parse the given URL as a duplicity backend URL. 232 233 Returns the data of a parsed URL with the same names as that of 234 the standard urlparse.urlparse() except that all values have been 235 resolved rather than deferred. There are no get_* members. This 236 makes sure that the URL parsing errors are detected early. 237 238 Raise InvalidBackendURL on invalid URL's 239 """ 240 def __init__(self, url_string): 241 self.url_string = url_string 242 243 # Python < 2.6.5 still examine urlparse.uses_netlock when parsing urls, 244 # so stuff our custom list in there before we parse. 245 urllib.parse.uses_netloc = uses_netloc 246 247 # While useful in some cases, the fact is that the urlparser makes 248 # all the properties in the URL deferred or lazy. This means that 249 # problems don't get detected till called. We'll try to trap those 250 # problems here, so they will be caught early. 251 252 try: 253 pu = urllib.parse.urlparse(url_string) 254 except Exception: 255 raise InvalidBackendURL(u"Syntax error in: %s" % url_string) 256 257 try: 258 self.scheme = pu.scheme 259 except Exception: 260 raise InvalidBackendURL(u"Syntax error (scheme) in: %s" % url_string) 261 262 try: 263 self.netloc = pu.netloc 264 except Exception: 265 raise InvalidBackendURL(u"Syntax error (netloc) in: %s" % url_string) 266 267 try: 268 self.path = pu.path 269 if self.path: 270 self.path = urllib.parse.unquote(self.path) 271 except Exception: 272 raise InvalidBackendURL(u"Syntax error (path) in: %s" % url_string) 273 274 try: 275 self.username = pu.username 276 except Exception: 277 raise InvalidBackendURL(u"Syntax error (username) in: %s" % url_string) 278 if self.username: 279 self.username = urllib.parse.unquote(pu.username) 280 else: 281 self.username = None 282 283 try: 284 self.password = pu.password 285 except Exception: 286 raise InvalidBackendURL(u"Syntax error (password) in: %s" % url_string) 287 if self.password: 288 self.password = urllib.parse.unquote(self.password) 289 else: 290 self.password = None 291 292 try: 293 self.hostname = pu.hostname 294 except Exception: 295 raise InvalidBackendURL(u"Syntax error (hostname) in: %s" % url_string) 296 297 try: 298 self.query = pu.query 299 except Exception: 300 raise InvalidBackendURL(u"Syntax error (query) in: %s" % url_string) 301 if self.query: 302 self.query_args = urllib.parse.parse_qs(self.query) 303 else: 304 self.query = None 305 self.query_args = {} 306 307 # init to None, overwrite with actual value on success 308 self.port = None 309 try: 310 self.port = pu.port 311 except Exception: # not raised in python2.7, just returns None 312 # TODO: remove after dropping python 2.7 support 313 if self.scheme in [u'rclone']: 314 pass 315 # old style rsync://host::[/]dest, are still valid, though they contain no port 316 elif not (u'rsync' in self.scheme and re.search(u'::[^:]*$', self.url_string)): 317 raise InvalidBackendURL(u"Syntax error (port) in: %s A%s B%s C%s" % 318 (url_string, (u'rsync' in self.scheme), 319 re.search(u'::[^:]+$', self.netloc), self.netloc)) 320 321 # Our URL system uses two slashes more than urlparse's does when using 322 # non-netloc URLs. And we want to make sure that if urlparse assuming 323 # a netloc where we don't want one, that we correct it. 324 if self.scheme not in uses_netloc: 325 if self.netloc: 326 self.path = u'//' + self.netloc + self.path 327 self.netloc = u'' 328 self.hostname = None 329 elif not self.path.startswith(u'//') and self.path.startswith(u'/'): 330 self.path = u'//' + self.path 331 332 # This happens for implicit local paths. 333 if not self.scheme: 334 return 335 336 # Our backends do not handle implicit hosts. 337 if self.scheme in uses_netloc and not self.hostname: 338 raise InvalidBackendURL(u"Missing hostname in a backend URL which " 339 u"requires an explicit hostname: %s" 340 u"" % (url_string)) 341 342 # Our backends do not handle implicit relative paths. 343 if self.scheme not in uses_netloc and not self.path.startswith(u'//'): 344 raise InvalidBackendURL(u"missing // - relative paths not supported " 345 u"for scheme %s: %s" 346 u"" % (self.scheme, url_string)) 347 348 def geturl(self): 349 return self.url_string 350 351 352def strip_auth_from_url(parsed_url): 353 u"""Return a URL from a urlparse object without a username or password.""" 354 355 clean_url = re.sub(u'^([^:/]+://)(.*@)?(.*)', r'\1\3', parsed_url.geturl()) 356 return clean_url 357 358 359def _get_code_from_exception(backend, operation, e): 360 if isinstance(e, BackendException) and e.code != log.ErrorCode.backend_error: 361 return e.code 362 elif hasattr(backend, u'_error_code'): 363 return backend._error_code(operation, e) or log.ErrorCode.backend_error 364 elif hasattr(e, u'errno'): 365 # A few backends return such errors (local, paramiko, etc) 366 if e.errno == errno.EACCES: 367 return log.ErrorCode.backend_permission_denied 368 elif e.errno == errno.ENOENT: 369 return log.ErrorCode.backend_not_found 370 elif e.errno == errno.ENOSPC: 371 return log.ErrorCode.backend_no_space 372 return log.ErrorCode.backend_error 373 374 375def retry(operation, fatal=True): 376 # Decorators with arguments introduce a new level of indirection. So we 377 # have to return a decorator function (which itself returns a function!) 378 def outer_retry(fn): 379 def inner_retry(self, *args): 380 global _last_exception 381 errors_fatal, errors_default = config.are_errors_fatal.get(operation, (True, None)) 382 for n in range(1, config.num_retries + 1): 383 try: 384 return fn(self, *args) 385 except FatalBackendException as e: 386 _last_exception = e 387 if not errors_fatal: 388 # backend wants to report and ignore errors 389 return errors_default 390 else: 391 # die on fatal errors 392 raise e 393 except Exception as e: 394 _last_exception = e 395 if not errors_fatal: 396 # backend wants to report and ignore errors 397 return errors_default 398 else: 399 # retry on anything else 400 log.Debug(_(u"Backtrace of previous error: %s") 401 % exception_traceback()) 402 at_end = n == config.num_retries 403 code = _get_code_from_exception(self.backend, operation, e) 404 if code == log.ErrorCode.backend_not_found: 405 # If we tried to do something, but the file just isn't there, 406 # no need to retry. 407 at_end = True 408 if at_end and fatal: 409 def make_filename(f): 410 if isinstance(f, path.ROPath): 411 return util.escape(f.uc_name) 412 else: 413 return util.escape(f) 414 extra = u' '.join([operation] + [make_filename(x) for x in args 415 if (x and isinstance(x, str))]) 416 log.FatalError(_(u"Giving up after %s attempts. %s: %s") 417 % (n, e.__class__.__name__, 418 util.uexc(e)), code=code, extra=extra) 419 else: 420 log.Warn(_(u"Attempt %s failed. %s: %s") 421 % (n, e.__class__.__name__, util.uexc(e))) 422 if not at_end: 423 if isinstance(e, TemporaryLoadException): 424 time.sleep(3 * config.backend_retry_delay) # wait longer before trying again 425 else: 426 time.sleep(config.backend_retry_delay) # wait a bit before trying again 427 if hasattr(self.backend, u'_retry_cleanup'): 428 self.backend._retry_cleanup() 429 430 return inner_retry 431 return outer_retry 432 433 434class Backend(object): 435 u""" 436 See README in backends directory for information on how to write a backend. 437 """ 438 def __init__(self, parsed_url): 439 self.parsed_url = parsed_url 440 441 u""" use getpass by default, inherited backends may overwrite this behaviour """ 442 use_getpass = True 443 444 def get_password(self): 445 u""" 446 Return a password for authentication purposes. The password 447 will be obtained from the backend URL, the environment, by 448 asking the user, or by some other method. When applicable, the 449 result will be cached for future invocations. 450 """ 451 if self.parsed_url.password: 452 return self.parsed_url.password 453 454 try: 455 password = os.environ[u'FTP_PASSWORD'] 456 except KeyError: 457 if self.use_getpass: 458 password = getpass.getpass(u"Password for '%s@%s': " % 459 (self.parsed_url.username, self.parsed_url.hostname)) 460 os.environ[u'FTP_PASSWORD'] = password 461 else: 462 password = None 463 return password 464 465 def munge_password(self, commandline): 466 u""" 467 Remove password from commandline by substituting the password 468 found in the URL, if any, with a generic place-holder. 469 470 This is intended for display purposes only, and it is not 471 guaranteed that the results are correct (i.e., more than just 472 the ':password@' may be substituted. 473 """ 474 if self.parsed_url.password: 475 return re.sub(r'(:([^\s:/@]+)@([^\s@]+))', r':*****@\3', commandline) 476 else: 477 return commandline 478 479 def __subprocess_popen(self, args): 480 u""" 481 For internal use. 482 Execute the given command line, interpreted as a shell command. 483 Returns int Exitcode, string StdOut, string StdErr 484 """ 485 from subprocess import Popen, PIPE 486 487 args[0] = util.which(args[0]) 488 p = Popen(args, stdout=PIPE, stderr=PIPE, universal_newlines=True) 489 stdout, stderr = p.communicate() 490 491 return p.returncode, stdout, stderr 492 493 u""" a dictionary for breaking exceptions, syntax is 494 { 'command' : [ code1, code2 ], ... } see ftpbackend for an example """ 495 popen_breaks = {} 496 497 def subprocess_popen(self, commandline): 498 u""" 499 Execute the given command line with error check. 500 Returns int Exitcode, string StdOut, string StdErr 501 502 Raise a BackendException on failure. 503 """ 504 import shlex 505 506 if isinstance(commandline, (list, tuple)): 507 logstr = u' '.join(commandline) 508 args = commandline 509 else: 510 logstr = commandline 511 args = shlex.split(commandline) 512 513 logstr = self.munge_password(logstr) 514 log.Info(_(u"Reading results of '%s'") % logstr) 515 516 result, stdout, stderr = self.__subprocess_popen(args) 517 if result != 0: 518 try: 519 ignores = self.popen_breaks[args[0]] 520 ignores.index(result) 521 u""" ignore a predefined set of error codes """ 522 return 0, u'', u'' 523 except (KeyError, ValueError): 524 raise BackendException(u"Error running '%s': returned %d, with output:\n%s" % 525 (logstr, result, stdout + u'\n' + stderr + u'\n')) 526 return result, stdout, stderr 527 528 529class BackendWrapper(object): 530 u""" 531 Represents a generic duplicity backend, capable of storing and 532 retrieving files. 533 """ 534 535 def __init__(self, backend): 536 self.backend = backend 537 538 def __do_put(self, source_path, remote_filename): 539 if hasattr(self.backend, u'_put'): 540 log.Info(_(u"Writing %s") % util.fsdecode(remote_filename)) 541 self.backend._put(source_path, remote_filename) 542 else: 543 raise NotImplementedError() 544 545 @retry(u'put', fatal=True) 546 def put(self, source_path, remote_filename=None): 547 u""" 548 Transfer source_path (Path object) to remote_filename (string) 549 550 If remote_filename is None, get the filename from the last 551 path component of pathname. 552 """ 553 if not remote_filename: 554 remote_filename = source_path.get_filename() 555 self.__do_put(source_path, remote_filename) 556 557 @retry(u'move', fatal=True) 558 def move(self, source_path, remote_filename=None): 559 u""" 560 Move source_path (Path object) to remote_filename (string) 561 562 Same as put(), but unlinks source_path in the process. This allows the 563 local backend to do this more efficiently using rename. 564 """ 565 if not remote_filename: 566 remote_filename = source_path.get_filename() 567 if hasattr(self.backend, u'_move'): 568 if self.backend._move(source_path, remote_filename) is not False: 569 source_path.setdata() 570 return 571 self.__do_put(source_path, remote_filename) 572 source_path.delete() 573 574 @retry(u'get', fatal=True) 575 def get(self, remote_filename, local_path): 576 u"""Retrieve remote_filename and place in local_path""" 577 if hasattr(self.backend, u'_get'): 578 self.backend._get(remote_filename, local_path) 579 local_path.setdata() 580 if not local_path.exists(): 581 raise BackendException(_(u"File %s not found locally after get " 582 u"from backend") % local_path.uc_name) 583 else: 584 raise NotImplementedError() 585 586 @retry(u'list', fatal=True) 587 def list(self): 588 u""" 589 Return list of filenames (byte strings) present in backend 590 """ 591 def tobytes(filename): 592 u"Convert a (maybe unicode) filename to bytes" 593 if isinstance(filename, str): 594 # There shouldn't be any encoding errors for files we care 595 # about, since duplicity filenames are ascii. But user files 596 # may be in the same directory. So just replace characters. 597 return util.fsencode(filename) 598 else: 599 return filename 600 601 if hasattr(self.backend, u'_list'): 602 # Make sure that duplicity internals only ever see byte strings 603 # for filenames, no matter what the backend thinks it is talking. 604 return [tobytes(x) for x in self.backend._list()] 605 else: 606 raise NotImplementedError() 607 608 def delete(self, filename_list): 609 u""" 610 Delete each filename in filename_list, in order if possible. 611 """ 612 assert not isinstance(filename_list, bytes) 613 if hasattr(self.backend, u'_delete_list'): 614 self._do_delete_list(filename_list) 615 elif hasattr(self.backend, u'_delete'): 616 for filename in filename_list: 617 self._do_delete(filename) 618 else: 619 raise NotImplementedError() 620 621 @retry(u'delete', fatal=False) 622 def _do_delete_list(self, filename_list): 623 while filename_list: 624 sublist = filename_list[:100] 625 self.backend._delete_list(sublist) 626 filename_list = filename_list[100:] 627 628 @retry(u'delete', fatal=False) 629 def _do_delete(self, filename): 630 self.backend._delete(filename) 631 632 # Should never cause FatalError. 633 # Returns a dictionary of dictionaries. The outer dictionary maps 634 # filenames to metadata dictionaries. Supported metadata are: 635 # 636 # 'size': if >= 0, size of file 637 # if -1, file is not found 638 # if None, error querying file 639 # 640 # Returned dictionary is guaranteed to contain a metadata dictionary for 641 # each filename, and all metadata are guaranteed to be present. 642 def query_info(self, filename_list): 643 u""" 644 Return metadata about each filename in filename_list 645 """ 646 info = {} 647 if hasattr(self.backend, u'_query_list'): 648 info = self._do_query_list(filename_list) 649 if info is None: 650 info = {} 651 elif hasattr(self.backend, u'_query'): 652 for filename in filename_list: 653 info[filename] = self._do_query(filename) 654 655 # Fill out any missing entries (may happen if backend has no support 656 # or its query_list support is lazy) 657 for filename in filename_list: 658 if filename not in info or info[filename] is None: 659 info[filename] = {} 660 for metadata in [u'size']: 661 info[filename].setdefault(metadata, None) 662 663 return info 664 665 @retry(u'query', fatal=False) 666 def _do_query_list(self, filename_list): 667 info = self.backend._query_list(filename_list) 668 if info is None: 669 info = {} 670 return info 671 672 @retry(u'query', fatal=False) 673 def _do_query(self, filename): 674 try: 675 return self.backend._query(filename) 676 except Exception as e: 677 code = _get_code_from_exception(self.backend, u'query', e) 678 if code == log.ErrorCode.backend_not_found: 679 return {u'size': -1} 680 else: 681 raise e 682 683 def close(self): 684 u""" 685 Close the backend, releasing any resources held and 686 invalidating any file objects obtained from the backend. 687 """ 688 if hasattr(self.backend, u'_close'): 689 self.backend._close() 690 691 def get_fileobj_read(self, filename, parseresults=None): 692 u""" 693 Return fileobject opened for reading of filename on backend 694 695 The file will be downloaded first into a temp file. When the 696 returned fileobj is closed, the temp file will be deleted. 697 """ 698 if not parseresults: 699 parseresults = file_naming.parse(filename) 700 assert parseresults, u"Filename not correctly parsed" 701 tdp = dup_temp.new_tempduppath(parseresults) 702 self.get(filename, tdp) 703 tdp.setdata() 704 return tdp.filtered_open_with_delete(u"rb") 705 706 def get_data(self, filename, parseresults=None): 707 u""" 708 Retrieve a file from backend, process it, return contents. 709 """ 710 fin = self.get_fileobj_read(filename, parseresults) 711 buf = fin.read() 712 assert not fin.close() 713 return buf 714