1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*- 2# 3# duplicity -- Encrypted bandwidth efficient backup 4# 5# Copyright 2002 Ben Escoto <ben@emerose.org> 6# Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 7# 8# This file is part of duplicity. 9# 10# Duplicity is free software; you can redistribute it and/or modify it 11# under the terms of the GNU General Public License as published by the 12# Free Software Foundation; either version 2 of the License, or (at your 13# option) any later version. 14# 15# Duplicity is distributed in the hope that it will be useful, but 16# WITHOUT ANY WARRANTY; without even the implied warranty of 17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18# General Public License for more details. 19# 20# You should have received a copy of the GNU General Public License 21# along with duplicity; if not, write to the Free Software Foundation, 22# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23# 24# See http://www.nongnu.org/duplicity for more information. 25# Please send mail to me or the mailing list if you find bugs or have 26# any suggestions. 27 28from __future__ import print_function 29from future import standard_library 30standard_library.install_aliases() 31from builtins import map 32from builtins import next 33from builtins import object 34from builtins import range 35 36import copy 37import fasteners 38import os 39import platform 40import resource 41import sys 42import time 43 44from duplicity import __version__ 45from duplicity import asyncscheduler 46from duplicity import commandline 47from duplicity import diffdir 48from duplicity import dup_collections 49from duplicity import dup_temp 50from duplicity import dup_time 51from duplicity import file_naming 52from duplicity import config 53from duplicity import gpg 54from duplicity import log 55from duplicity import manifest 56from duplicity import patchdir 57from duplicity import path 58from duplicity import progress 59from duplicity import tempdir 60from duplicity import util 61 62from datetime import datetime 63 64# If exit_val is not None, exit with given value at end. 65exit_val = None 66 67 68def getpass_safe(message): 69 # getpass() in Python 2.x will call str() on our prompt. So we can't pass 70 # in non-ascii characters. 71 import getpass 72 import locale 73 if sys.version_info.major == 2: 74 message = message.encode(locale.getpreferredencoding(), u'replace') 75 return getpass.getpass(message) 76 77 78def get_passphrase(n, action, for_signing=False): 79 u""" 80 Check to make sure passphrase is indeed needed, then get 81 the passphrase from environment, from gpg-agent, or user 82 83 If n=3, a password is requested and verified. If n=2, the current 84 password is verified. If n=1, a password is requested without 85 verification for the time being. 86 87 @type n: int 88 @param n: verification level for a passphrase being requested 89 @type action: string 90 @param action: action to perform 91 @type for_signing: boolean 92 @param for_signing: true if the passphrase is for a signing key, false if not 93 @rtype: string 94 @return: passphrase 95 """ 96 97 # First try the environment 98 try: 99 if for_signing: 100 return os.environ[u'SIGN_PASSPHRASE'] 101 else: 102 return os.environ[u'PASSPHRASE'] 103 except KeyError: 104 pass 105 106 # check if we can reuse an already set (signing_)passphrase 107 # if signing key is also an encryption key assume that the passphrase is identical 108 if (for_signing and 109 (config.gpg_profile.sign_key in config.gpg_profile.recipients or 110 config.gpg_profile.sign_key in config.gpg_profile.hidden_recipients) and 111 u'PASSPHRASE' in os.environ): # noqa 112 log.Notice(_(u"Reuse configured PASSPHRASE as SIGN_PASSPHRASE")) 113 return os.environ[u'PASSPHRASE'] 114 # if one encryption key is also the signing key assume that the passphrase is identical 115 if (not for_signing and 116 (config.gpg_profile.sign_key in config.gpg_profile.recipients or 117 config.gpg_profile.sign_key in config.gpg_profile.hidden_recipients) and 118 u'SIGN_PASSPHRASE' in os.environ): # noqa 119 log.Notice(_(u"Reuse configured SIGN_PASSPHRASE as PASSPHRASE")) 120 return os.environ[u'SIGN_PASSPHRASE'] 121 122 # Next, verify we need to ask the user 123 124 # Assumptions: 125 # - encrypt-key has no passphrase 126 # - sign-key requires passphrase 127 # - gpg-agent supplies all, no user interaction 128 129 # no passphrase if --no-encryption or --use-agent 130 if not config.encryption or config.use_agent: 131 return u"" 132 133 # these commands don't need a password 134 elif action in [u"collection-status", 135 u"list-current", 136 u"remove-all-but-n-full", 137 u"remove-all-inc-of-but-n-full", 138 u"remove-old", 139 ]: 140 return u"" 141 142 # for a full backup, we don't need a password if 143 # there is no sign_key and there are recipients 144 elif (action == u"full" and 145 (config.gpg_profile.recipients or config.gpg_profile.hidden_recipients) and not 146 config.gpg_profile.sign_key): 147 return u"" 148 149 # for an inc backup, we don't need a password if 150 # there is no sign_key and there are recipients 151 elif (action == u"inc" and 152 (config.gpg_profile.recipients or config.gpg_profile.hidden_recipients) and not 153 config.gpg_profile.sign_key): 154 return u"" 155 156 # Finally, ask the user for the passphrase 157 else: 158 log.Info(_(u"PASSPHRASE variable not set, asking user.")) 159 use_cache = True 160 while 1: 161 # ask the user to enter a new passphrase to avoid an infinite loop 162 # if the user made a typo in the first passphrase 163 if use_cache and n == 2: 164 if for_signing: 165 pass1 = config.gpg_profile.signing_passphrase 166 else: 167 pass1 = config.gpg_profile.passphrase 168 else: 169 if for_signing: 170 if use_cache and config.gpg_profile.signing_passphrase: 171 pass1 = config.gpg_profile.signing_passphrase 172 else: 173 pass1 = getpass_safe(_(u"GnuPG passphrase for signing key:") + u" ") 174 else: 175 if use_cache and config.gpg_profile.passphrase: 176 pass1 = config.gpg_profile.passphrase 177 else: 178 pass1 = getpass_safe(_(u"GnuPG passphrase for decryption:") + u" ") 179 180 if n == 1: 181 pass2 = pass1 182 elif for_signing: 183 pass2 = getpass_safe(_(u"Retype passphrase for signing key to confirm: ")) 184 else: 185 pass2 = getpass_safe(_(u"Retype passphrase for decryption to confirm: ")) 186 187 if not pass1 == pass2: 188 log.Log(_(u"First and second passphrases do not match! Please try again."), 189 log.WARNING, force_print=True) 190 use_cache = False 191 continue 192 193 if not pass1 and not (config.gpg_profile.recipients or 194 config.gpg_profile.hidden_recipients) and not for_signing: 195 log.Log(_(u"Cannot use empty passphrase with symmetric encryption! Please try again."), 196 log.WARNING, force_print=True) 197 use_cache = False 198 continue 199 200 return pass1 201 202 203def dummy_backup(tarblock_iter): 204 u""" 205 Fake writing to backend, but do go through all the source paths. 206 207 @type tarblock_iter: tarblock_iter 208 @param tarblock_iter: iterator for current tar block 209 210 @rtype: int 211 @return: constant 0 (zero) 212 """ 213 try: 214 # Just spin our wheels 215 while next(tarblock_iter): 216 pass 217 except StopIteration: 218 pass 219 log.Progress(None, diffdir.stats.SourceFileSize) 220 return 0 221 222 223def restart_position_iterator(tarblock_iter): 224 u""" 225 Fake writing to backend, but do go through all the source paths. 226 Stop when we have processed the last file and block from the 227 last backup. Normal backup will proceed at the start of the 228 next volume in the set. 229 230 @type tarblock_iter: tarblock_iter 231 @param tarblock_iter: iterator for current tar block 232 233 @rtype: int 234 @return: constant 0 (zero) 235 """ 236 last_index = config.restart.last_index 237 last_block = config.restart.last_block 238 try: 239 # Just spin our wheels 240 iter_result = next(tarblock_iter) 241 while iter_result: 242 if (tarblock_iter.previous_index == last_index): 243 # If both the previous index and this index are done, exit now 244 # before we hit the next index, to prevent skipping its first 245 # block. 246 if not last_block and not tarblock_iter.previous_block: 247 break 248 # Only check block number if last_block is also a number 249 if last_block and tarblock_iter.previous_block > last_block: 250 break 251 if tarblock_iter.previous_index > last_index: 252 log.Warn(_(u"File %s complete in backup set.\n" 253 u"Continuing restart on file %s.") % 254 (util.uindex(last_index), util.uindex(tarblock_iter.previous_index)), 255 log.ErrorCode.restart_file_not_found) 256 # We went too far! Stuff the data back into place before restarting 257 tarblock_iter.queue_index_data(iter_result) 258 break 259 iter_result = next(tarblock_iter) 260 except StopIteration: 261 log.Warn(_(u"File %s missing in backup set.\n" 262 u"Continuing restart on file %s.") % 263 (util.uindex(last_index), util.uindex(tarblock_iter.previous_index)), 264 log.ErrorCode.restart_file_not_found) 265 266 267def write_multivol(backup_type, tarblock_iter, man_outfp, sig_outfp, backend): 268 u""" 269 Encrypt volumes of tarblock_iter and write to backend 270 271 backup_type should be "inc" or "full" and only matters here when 272 picking the filenames. The path_prefix will determine the names 273 of the files written to backend. Also writes manifest file. 274 Returns number of bytes written. 275 276 @type backup_type: string 277 @param backup_type: type of backup to perform, either 'inc' or 'full' 278 @type tarblock_iter: tarblock_iter 279 @param tarblock_iter: iterator for current tar block 280 @type backend: callable backend object 281 @param backend: I/O backend for selected protocol 282 283 @rtype: int 284 @return: bytes written 285 """ 286 287 def get_indicies(tarblock_iter): 288 u"""Return start_index and end_index of previous volume""" 289 start_index, start_block = tarblock_iter.recall_index() 290 if start_index is None: 291 start_index = () 292 start_block = None 293 if start_block: 294 start_block -= 1 295 end_index, end_block = tarblock_iter.get_previous_index() 296 if end_index is None: 297 end_index = start_index 298 end_block = start_block 299 if end_block: 300 end_block -= 1 301 return start_index, start_block, end_index, end_block 302 303 def validate_block(orig_size, dest_filename): 304 info = backend.query_info([dest_filename])[dest_filename] 305 size = info[u'size'] 306 if size is None: 307 return # error querying file 308 for attempt in range(1, config.num_retries + 1): 309 info = backend.query_info([dest_filename])[dest_filename] 310 size = info[u'size'] 311 if size == orig_size: 312 break 313 if size is None: 314 return 315 log.Notice(_(u"%s Remote filesize %d for %s does not match local size %d, retrying.") % (datetime.now(), 316 size, util.escape(dest_filename), orig_size)) 317 time.sleep(2**attempt) 318 if size != orig_size: 319 code_extra = u"%s %d %d" % (util.escape(dest_filename), orig_size, size) 320 log.FatalError(_(u"File %s was corrupted during upload.") % util.fsdecode(dest_filename), 321 log.ErrorCode.volume_wrong_size, code_extra) 322 323 def put(tdp, dest_filename, vol_num): 324 u""" 325 Retrieve file size *before* calling backend.put(), which may (at least 326 in case of the localbackend) rename the temporary file to the target 327 instead of copying. 328 """ 329 putsize = tdp.getsize() 330 if config.skip_volume != vol_num: # for testing purposes only 331 backend.put(tdp, dest_filename) 332 validate_block(putsize, dest_filename) 333 if tdp.stat: 334 tdp.delete() 335 return putsize 336 337 def validate_encryption_settings(backup_set, manifest): 338 u""" 339 When restarting a backup, we have no way to verify that the current 340 passphrase is the same as the one used for the beginning of the backup. 341 This is because the local copy of the manifest is unencrypted and we 342 don't need to decrypt the existing volumes on the backend. To ensure 343 that we are using the same passphrase, we manually download volume 1 344 and decrypt it with the current passphrase. We also want to confirm 345 that we're using the same encryption settings (i.e. we don't switch 346 from encrypted to non in the middle of a backup chain), so we check 347 that the vol1 filename on the server matches the settings of this run. 348 """ 349 if ((config.gpg_profile.recipients or config.gpg_profile.hidden_recipients) and 350 not config.gpg_profile.sign_key): 351 # When using gpg encryption without a signing key, we skip this validation 352 # step to ensure that we can still backup without needing the secret key 353 # on the machine. 354 return 355 356 vol1_filename = file_naming.get(backup_type, 1, 357 encrypted=config.encryption, 358 gzipped=config.compression) 359 if vol1_filename != backup_set.volume_name_dict[1]: 360 log.FatalError(_(u"Restarting backup, but current encryption " 361 u"settings do not match original settings"), 362 log.ErrorCode.enryption_mismatch) 363 364 # Settings are same, let's check passphrase itself if we are encrypted 365 if config.encryption: 366 fileobj = restore_get_enc_fileobj(config.backend, vol1_filename, 367 manifest.volume_info_dict[1]) 368 fileobj.close() 369 370 if not config.restart: 371 # normal backup start 372 vol_num = 0 373 mf = manifest.Manifest(fh=man_outfp) 374 mf.set_dirinfo() 375 else: 376 # restart from last known position 377 mf = config.restart.last_backup.get_local_manifest() 378 config.restart.checkManifest(mf) 379 config.restart.setLastSaved(mf) 380 if not (config.s3_use_deep_archive or config.s3_use_glacier): 381 validate_encryption_settings(config.restart.last_backup, mf) 382 else: 383 log.Warn(_(u"Skipping encryption validation due to glacier/deep storage")) 384 mf.fh = man_outfp 385 last_block = config.restart.last_block 386 log.Notice(_(u"Restarting after volume %s, file %s, block %s") % 387 (config.restart.start_vol, 388 util.uindex(config.restart.last_index), 389 config.restart.last_block)) 390 vol_num = config.restart.start_vol 391 restart_position_iterator(tarblock_iter) 392 393 at_end = 0 394 bytes_written = 0 395 396 # If --progress option is given, initiate a background thread that will 397 # periodically report progress to the Log. 398 if config.progress: 399 progress.tracker.set_start_volume(vol_num + 1) 400 progress.progress_thread.start() 401 402 # This assertion must be kept until we have solved the problem 403 # of concurrency at the backend level. Concurrency 1 is fine 404 # because the actual I/O concurrency on backends is limited to 405 # 1 as usual, but we are allowed to perform local CPU 406 # intensive tasks while that single upload is happening. This 407 # is an assert put in place to avoid someone accidentally 408 # enabling concurrency above 1, before adequate work has been 409 # done on the backends to make them support concurrency. 410 assert config.async_concurrency <= 1 411 412 io_scheduler = asyncscheduler.AsyncScheduler(config.async_concurrency) 413 async_waiters = [] 414 415 while not at_end: 416 # set up iterator 417 tarblock_iter.remember_next_index() # keep track of start index 418 419 # Create volume 420 vol_num += 1 421 dest_filename = file_naming.get(backup_type, vol_num, 422 encrypted=config.encryption, 423 gzipped=config.compression) 424 tdp = dup_temp.new_tempduppath(file_naming.parse(dest_filename)) 425 426 # write volume 427 if config.encryption: 428 at_end = gpg.GPGWriteFile(tarblock_iter, tdp.name, config.gpg_profile, 429 config.volsize) 430 elif config.compression: 431 at_end = gpg.GzipWriteFile(tarblock_iter, tdp.name, config.volsize) 432 else: 433 at_end = gpg.PlainWriteFile(tarblock_iter, tdp.name, config.volsize) 434 tdp.setdata() 435 436 # Add volume information to manifest 437 vi = manifest.VolumeInfo() 438 vi.set_info(vol_num, *get_indicies(tarblock_iter)) 439 vi.set_hash(u"SHA1", gpg.get_hash(u"SHA1", tdp)) 440 mf.add_volume_info(vi) 441 442 # Checkpoint after each volume so restart has a place to restart. 443 # Note that until after the first volume, all files are temporary. 444 if vol_num == 1: 445 sig_outfp.to_partial() 446 man_outfp.to_partial() 447 else: 448 sig_outfp.flush() 449 man_outfp.flush() 450 451 async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename, 452 vol_num: put(tdp, dest_filename, vol_num), 453 (tdp, dest_filename, vol_num))) 454 455 # Log human-readable version as well as raw numbers for machine consumers 456 log.Progress(_(u'Processed volume %d') % vol_num, diffdir.stats.SourceFileSize) 457 # Snapshot (serialize) progress now as a Volume has been completed. 458 # This is always the last restore point when it comes to restart a failed backup 459 if config.progress: 460 progress.tracker.snapshot_progress(vol_num) 461 462 # for testing purposes only - assert on inc or full 463 assert config.fail_on_volume != vol_num, u"Forced assertion for testing at volume %d" % vol_num 464 465 # Collect byte count from all asynchronous jobs; also implicitly waits 466 # for them all to complete. 467 for waiter in async_waiters: 468 bytes_written += waiter() 469 470 # Upload the collection summary. 471 # bytes_written += write_manifest(mf, backup_type, backend) 472 mf.set_files_changed_info(diffdir.stats.get_delta_entries_file()) 473 474 return bytes_written 475 476 477def get_man_fileobj(backup_type): 478 u""" 479 Return a fileobj opened for writing, save results as manifest 480 481 Save manifest in config.archive_dir_path gzipped. 482 Save them on the backend encrypted as needed. 483 484 @type man_type: string 485 @param man_type: either "full" or "new" 486 487 @rtype: fileobj 488 @return: fileobj opened for writing 489 """ 490 assert backup_type == u"full" or backup_type == u"inc" 491 492 part_man_filename = file_naming.get(backup_type, 493 manifest=True, 494 partial=True) 495 perm_man_filename = file_naming.get(backup_type, 496 manifest=True) 497 remote_man_filename = file_naming.get(backup_type, 498 manifest=True, 499 encrypted=config.encryption) 500 501 fh = dup_temp.get_fileobj_duppath(config.archive_dir_path, 502 part_man_filename, 503 perm_man_filename, 504 remote_man_filename) 505 return fh 506 507 508def get_sig_fileobj(sig_type): 509 u""" 510 Return a fileobj opened for writing, save results as signature 511 512 Save signatures in config.archive_dir gzipped. 513 Save them on the backend encrypted as needed. 514 515 @type sig_type: string 516 @param sig_type: either "full-sig" or "new-sig" 517 518 @rtype: fileobj 519 @return: fileobj opened for writing 520 """ 521 assert sig_type in [u"full-sig", u"new-sig"] 522 523 part_sig_filename = file_naming.get(sig_type, 524 gzipped=False, 525 partial=True) 526 perm_sig_filename = file_naming.get(sig_type, 527 gzipped=True) 528 remote_sig_filename = file_naming.get(sig_type, encrypted=config.encryption, 529 gzipped=config.compression) 530 531 fh = dup_temp.get_fileobj_duppath(config.archive_dir_path, 532 part_sig_filename, 533 perm_sig_filename, 534 remote_sig_filename, 535 overwrite=True) 536 return fh 537 538 539def full_backup(col_stats): 540 u""" 541 Do full backup of directory to backend, using archive_dir_path 542 543 @type col_stats: CollectionStatus object 544 @param col_stats: collection status 545 546 @rtype: void 547 @return: void 548 """ 549 if config.progress: 550 progress.tracker = progress.ProgressTracker() 551 # Fake a backup to compute total of moving bytes 552 tarblock_iter = diffdir.DirFull(config.select) 553 dummy_backup(tarblock_iter) 554 # Store computed stats to compute progress later 555 progress.tracker.set_evidence(diffdir.stats, True) 556 # Reinit the config.select iterator, so 557 # the core of duplicity can rescan the paths 558 commandline.set_selection() 559 progress.progress_thread = progress.LogProgressThread() 560 561 if config.dry_run: 562 tarblock_iter = diffdir.DirFull(config.select) 563 bytes_written = dummy_backup(tarblock_iter) 564 col_stats.set_values(sig_chain_warning=None) 565 else: 566 sig_outfp = get_sig_fileobj(u"full-sig") 567 man_outfp = get_man_fileobj(u"full") 568 tarblock_iter = diffdir.DirFull_WriteSig(config.select, 569 sig_outfp) 570 bytes_written = write_multivol(u"full", tarblock_iter, 571 man_outfp, sig_outfp, 572 config.backend) 573 574 # close sig file, send to remote, and rename to final 575 sig_outfp.close() 576 sig_outfp.to_remote() 577 sig_outfp.to_final() 578 579 # close manifest, send to remote, and rename to final 580 man_outfp.close() 581 man_outfp.to_remote() 582 man_outfp.to_final() 583 584 if config.progress: 585 # Terminate the background thread now, if any 586 progress.progress_thread.finished = True 587 progress.progress_thread.join() 588 log.TransferProgress(100.0, 0, progress.tracker.total_bytecount, 589 progress.tracker.total_elapsed_seconds(), 590 progress.tracker.speed, False) 591 592 col_stats.set_values(sig_chain_warning=None) 593 594 print_statistics(diffdir.stats, bytes_written) 595 596 597def check_sig_chain(col_stats): 598 u""" 599 Get last signature chain for inc backup, or None if none available 600 601 @type col_stats: CollectionStatus object 602 @param col_stats: collection status 603 """ 604 if not col_stats.matched_chain_pair: 605 if config.incremental: 606 log.FatalError(_(u"Fatal Error: Unable to start incremental backup. " 607 u"Old signatures not found and incremental specified"), 608 log.ErrorCode.inc_without_sigs) 609 else: 610 log.Warn(_(u"No signatures found, switching to full backup.")) 611 return None 612 return col_stats.matched_chain_pair[0] 613 614 615def print_statistics(stats, bytes_written): # pylint: disable=unused-argument 616 u""" 617 If config.print_statistics, print stats after adding bytes_written 618 619 @rtype: void 620 @return: void 621 """ 622 if config.print_statistics: 623 diffdir.stats.TotalDestinationSizeChange = bytes_written 624 logstring = diffdir.stats.get_stats_logstring(_(u"Backup Statistics")) 625 log.Log(logstring, log.NOTICE, force_print=True) 626 627 628def incremental_backup(sig_chain): 629 u""" 630 Do incremental backup of directory to backend, using archive_dir_path 631 632 @rtype: void 633 @return: void 634 """ 635 if not config.restart: 636 dup_time.setprevtime(sig_chain.end_time) 637 if dup_time.curtime == dup_time.prevtime: 638 time.sleep(2) 639 dup_time.setcurtime() 640 assert dup_time.curtime != dup_time.prevtime, \ 641 u"time not moving forward at appropriate pace - system clock issues?" 642 643 if config.progress: 644 progress.tracker = progress.ProgressTracker() 645 # Fake a backup to compute total of moving bytes 646 tarblock_iter = diffdir.DirDelta(config.select, 647 sig_chain.get_fileobjs()) 648 dummy_backup(tarblock_iter) 649 # Store computed stats to compute progress later 650 progress.tracker.set_evidence(diffdir.stats, False) 651 # Reinit the config.select iterator, so 652 # the core of duplicity can rescan the paths 653 commandline.set_selection() 654 progress.progress_thread = progress.LogProgressThread() 655 656 if config.dry_run: 657 tarblock_iter = diffdir.DirDelta(config.select, 658 sig_chain.get_fileobjs()) 659 bytes_written = dummy_backup(tarblock_iter) 660 else: 661 new_sig_outfp = get_sig_fileobj(u"new-sig") 662 new_man_outfp = get_man_fileobj(u"inc") 663 tarblock_iter = diffdir.DirDelta_WriteSig(config.select, 664 sig_chain.get_fileobjs(), 665 new_sig_outfp) 666 bytes_written = write_multivol(u"inc", tarblock_iter, 667 new_man_outfp, new_sig_outfp, 668 config.backend) 669 670 # close sig file and rename to final 671 new_sig_outfp.close() 672 new_sig_outfp.to_remote() 673 new_sig_outfp.to_final() 674 675 # close manifest and rename to final 676 new_man_outfp.close() 677 new_man_outfp.to_remote() 678 new_man_outfp.to_final() 679 680 if config.progress: 681 # Terminate the background thread now, if any 682 progress.progress_thread.finished = True 683 progress.progress_thread.join() 684 log.TransferProgress(100.0, 0, progress.tracker.total_bytecount, 685 progress.tracker.total_elapsed_seconds(), 686 progress.tracker.speed, False) 687 688 print_statistics(diffdir.stats, bytes_written) 689 690 691def list_current(col_stats): 692 u""" 693 List the files current in the archive (examining signature only) 694 695 @type col_stats: CollectionStatus object 696 @param col_stats: collection status 697 698 @rtype: void 699 @return: void 700 """ 701 time = config.restore_time or dup_time.curtime 702 sig_chain = col_stats.get_signature_chain_at_time(time) 703 path_iter = diffdir.get_combined_path_iter(sig_chain.get_fileobjs(time)) 704 for path in path_iter: 705 if path.difftype != u"deleted": 706 user_info = u"%s %s" % (dup_time.timetopretty(path.getmtime()), 707 util.fsdecode(path.get_relative_path())) 708 log_info = u"%s %s %s" % (dup_time.timetostring(path.getmtime()), 709 util.escape(path.get_relative_path()), 710 path.type) 711 log.Log(user_info, log.INFO, log.InfoCode.file_list, 712 log_info, True) 713 714 715def restore(col_stats): 716 u""" 717 Restore archive in config.backend to config.local_path 718 719 @type col_stats: CollectionStatus object 720 @param col_stats: collection status 721 722 @rtype: void 723 @return: void 724 """ 725 if config.dry_run: 726 # Only prints list of required volumes when running dry 727 restore_get_patched_rop_iter(col_stats) 728 return 729 if not patchdir.Write_ROPaths(config.local_path, 730 restore_get_patched_rop_iter(col_stats)): 731 if config.restore_dir: 732 log.FatalError(_(u"%s not found in archive - no files restored.") 733 % (util.fsdecode(config.restore_dir)), 734 log.ErrorCode.restore_dir_not_found) 735 else: 736 log.FatalError(_(u"No files found in archive - nothing restored."), 737 log.ErrorCode.no_restore_files) 738 739 740def restore_get_patched_rop_iter(col_stats): 741 u""" 742 Return iterator of patched ROPaths of desired restore data 743 744 @type col_stats: CollectionStatus object 745 @param col_stats: collection status 746 """ 747 if config.restore_dir: 748 index = tuple(config.restore_dir.split(b"/")) 749 else: 750 index = () 751 time = config.restore_time or dup_time.curtime 752 backup_chain = col_stats.get_backup_chain_at_time(time) 753 assert backup_chain, col_stats.all_backup_chains 754 backup_setlist = backup_chain.get_sets_at_time(time) 755 num_vols = 0 756 for s in backup_setlist: 757 num_vols += len(s) 758 cur_vol = [0] 759 760 def get_fileobj_iter(backup_set): 761 u"""Get file object iterator from backup_set contain given index""" 762 manifest = backup_set.get_manifest() 763 volumes = manifest.get_containing_volumes(index) 764 765 if hasattr(backup_set.backend.backend, u'pre_process_download_batch'): 766 backup_set.backend.backend.pre_process_download_batch(backup_set.volume_name_dict.values()) 767 768 for vol_num in volumes: 769 yield restore_get_enc_fileobj(backup_set.backend, 770 backup_set.volume_name_dict[vol_num], 771 manifest.volume_info_dict[vol_num]) 772 cur_vol[0] += 1 773 log.Progress(_(u'Processed volume %d of %d') % (cur_vol[0], num_vols), 774 cur_vol[0], num_vols) 775 776 if hasattr(config.backend, u'pre_process_download') or config.dry_run: 777 file_names = [] 778 for backup_set in backup_setlist: 779 manifest = backup_set.get_manifest() 780 volumes = manifest.get_containing_volumes(index) 781 for vol_num in volumes: 782 file_names.append(backup_set.volume_name_dict[vol_num]) 783 if config.dry_run: 784 log.Notice(u"Required volumes to restore:\n\t" + 785 u'\n\t'.join(file_name.decode() for file_name in file_names)) 786 return None 787 else: 788 config.backend.pre_process_download(file_names) 789 790 fileobj_iters = list(map(get_fileobj_iter, backup_setlist)) 791 tarfiles = list(map(patchdir.TarFile_FromFileobjs, fileobj_iters)) 792 return patchdir.tarfiles2rop_iter(tarfiles, index) 793 794 795def restore_get_enc_fileobj(backend, filename, volume_info): 796 u""" 797 Return plaintext fileobj from encrypted filename on backend 798 799 If volume_info is set, the hash of the file will be checked, 800 assuming some hash is available. Also, if config.sign_key is 801 set, a fatal error will be raised if file not signed by sign_key. 802 803 """ 804 parseresults = file_naming.parse(filename) 805 tdp = dup_temp.new_tempduppath(parseresults) 806 backend.get(filename, tdp) 807 808 u""" verify hash of the remote file """ 809 verified, hash_pair, calculated_hash = restore_check_hash(volume_info, tdp) 810 if not verified: 811 log.FatalError(u"%s\n %s\n %s\n %s\n" % 812 (_(u"Invalid data - %s hash mismatch for file:") % 813 hash_pair[0], 814 util.fsdecode(filename), 815 _(u"Calculated hash: %s") % calculated_hash, 816 _(u"Manifest hash: %s") % hash_pair[1]), 817 log.ErrorCode.mismatched_hash) 818 819 fileobj = tdp.filtered_open_with_delete(u"rb") 820 if parseresults.encrypted and config.gpg_profile.sign_key: 821 restore_add_sig_check(fileobj) 822 return fileobj 823 824 825def restore_check_hash(volume_info, vol_path): 826 u""" 827 Check the hash of vol_path path against data in volume_info 828 829 @rtype: boolean 830 @return: true (verified) / false (failed) 831 """ 832 hash_pair = volume_info.get_best_hash() 833 if hash_pair: 834 calculated_hash = gpg.get_hash(hash_pair[0], vol_path) 835 if calculated_hash != hash_pair[1]: 836 return False, hash_pair, calculated_hash 837 u""" reached here, verification passed """ 838 return True, hash_pair, calculated_hash 839 840 841def restore_add_sig_check(fileobj): 842 u""" 843 Require signature when closing fileobj matches sig in gpg_profile 844 845 @rtype: void 846 @return: void 847 """ 848 assert (isinstance(fileobj, dup_temp.FileobjHooked) and 849 isinstance(fileobj.fileobj, gpg.GPGFile)), fileobj 850 851 def check_signature(): 852 u"""Thunk run when closing volume file""" 853 actual_sig = fileobj.fileobj.get_signature() 854 actual_sig = u"None" if actual_sig is None else actual_sig 855 sign_key = config.gpg_profile.sign_key 856 sign_key = u"None" if sign_key is None else sign_key 857 ofs = -min(len(actual_sig), len(sign_key)) 858 if actual_sig[ofs:] != sign_key[ofs:]: 859 log.FatalError(_(u"Volume was signed by key %s, not %s") % 860 (actual_sig[ofs:], sign_key[ofs:]), 861 log.ErrorCode.unsigned_volume) 862 863 fileobj.addhook(check_signature) 864 865 866def verify(col_stats): 867 u""" 868 Verify files, logging differences 869 870 @type col_stats: CollectionStatus object 871 @param col_stats: collection status 872 873 @rtype: void 874 @return: void 875 """ 876 global exit_val 877 collated = diffdir.collate2iters(restore_get_patched_rop_iter(col_stats), 878 config.select) 879 diff_count = 0 880 total_count = 0 881 for backup_ropath, current_path in collated: 882 if not backup_ropath: 883 backup_ropath = path.ROPath(current_path.index) 884 if not current_path: 885 current_path = path.ROPath(backup_ropath.index) 886 if not backup_ropath.compare_verbose(current_path, config.compare_data): 887 diff_count += 1 888 total_count += 1 889 # Unfortunately, ngettext doesn't handle multiple number variables, so we 890 # split up the string. 891 log.Notice(_(u"Verify complete: %s, %s.") % 892 (ngettext(u"%d file compared", 893 u"%d files compared", total_count) % total_count, 894 ngettext(u"%d difference found", 895 u"%d differences found", diff_count) % diff_count)) 896 if diff_count >= 1: 897 exit_val = 1 898 899 900def cleanup(col_stats): 901 u""" 902 Delete the extraneous files in the current backend 903 904 @type col_stats: CollectionStatus object 905 @param col_stats: collection status 906 907 @rtype: void 908 @return: void 909 """ 910 ext_local, ext_remote = col_stats.get_extraneous() 911 extraneous = ext_local + ext_remote 912 if not extraneous: 913 log.Warn(_(u"No extraneous files found, nothing deleted in cleanup.")) 914 return 915 916 filestr = u"\n".join(map(util.fsdecode, extraneous)) 917 if config.force: 918 log.Notice(ngettext(u"Deleting this file from backend:", 919 u"Deleting these files from backend:", 920 len(extraneous)) + u"\n" + filestr) 921 if not config.dry_run: 922 col_stats.backend.delete(ext_remote) 923 for fn in ext_local: 924 try: 925 config.archive_dir_path.append(fn).delete() 926 except Exception: 927 pass 928 else: 929 log.Notice(ngettext(u"Found the following file to delete:", 930 u"Found the following files to delete:", 931 len(extraneous)) + u"\n" + filestr + u"\n" + 932 _(u"Run duplicity again with the --force option to actually delete.")) 933 934 935def remove_all_but_n_full(col_stats): 936 u""" 937 Remove backup files older than the last n full backups. 938 939 @type col_stats: CollectionStatus object 940 @param col_stats: collection status 941 942 @rtype: void 943 @return: void 944 """ 945 assert config.keep_chains is not None 946 947 config.remove_time = col_stats.get_nth_last_full_backup_time(config.keep_chains) 948 949 remove_old(col_stats) 950 951 952def remove_old(col_stats): 953 u""" 954 Remove backup files older than config.remove_time from backend 955 956 @type col_stats: CollectionStatus object 957 @param col_stats: collection status 958 959 @rtype: void 960 @return: void 961 """ 962 assert config.remove_time is not None 963 964 def set_times_str(setlist): 965 u"""Return string listing times of sets in setlist""" 966 return u"\n".join([dup_time.timetopretty(s.get_time()) for s in setlist]) 967 968 def chain_times_str(chainlist): 969 u"""Return string listing times of chains in chainlist""" 970 return u"\n".join([dup_time.timetopretty(s.end_time) for s in chainlist]) 971 972 req_list = col_stats.get_older_than_required(config.remove_time) 973 if req_list: 974 log.Warn(u"%s\n%s\n%s" % 975 (_(u"There are backup set(s) at time(s):"), 976 set_times_str(req_list), 977 _(u"Which can't be deleted because newer sets depend on them."))) 978 979 if (col_stats.matched_chain_pair and 980 col_stats.matched_chain_pair[1].end_time < config.remove_time): 981 log.Warn(_(u"Current active backup chain is older than specified time. " 982 u"However, it will not be deleted. To remove all your backups, " 983 u"manually purge the repository.")) 984 985 chainlist = col_stats.get_chains_older_than(config.remove_time) 986 987 if config.remove_all_inc_of_but_n_full_mode: 988 # ignore chains without incremental backups: 989 chainlist = list(x for x in chainlist if 990 (isinstance(x, dup_collections.SignatureChain) and x.inclist) or 991 (isinstance(x, dup_collections.BackupChain) and x.incset_list)) 992 993 if not chainlist: 994 log.Notice(_(u"No old backup sets found, nothing deleted.")) 995 return 996 if config.force: 997 log.Notice(ngettext(u"Deleting backup chain at time:", 998 u"Deleting backup chains at times:", 999 len(chainlist)) + 1000 u"\n" + chain_times_str(chainlist)) 1001 # Add signature files too, since they won't be needed anymore 1002 chainlist += col_stats.get_signature_chains_older_than(config.remove_time) 1003 chainlist.reverse() # save oldest for last 1004 for chain in chainlist: 1005 # if remove_all_inc_of_but_n_full_mode mode, remove only 1006 # incrementals one and not full 1007 if config.remove_all_inc_of_but_n_full_mode: 1008 if isinstance(chain, dup_collections.SignatureChain): 1009 chain_desc = _(u"Deleting any incremental signature chain rooted at %s") 1010 else: 1011 chain_desc = _(u"Deleting any incremental backup chain rooted at %s") 1012 else: 1013 if isinstance(chain, dup_collections.SignatureChain): 1014 chain_desc = _(u"Deleting complete signature chain %s") 1015 else: 1016 chain_desc = _(u"Deleting complete backup chain %s") 1017 log.Notice(chain_desc % dup_time.timetopretty(chain.end_time)) 1018 if not config.dry_run: 1019 chain.delete(keep_full=config.remove_all_inc_of_but_n_full_mode) 1020 col_stats.set_values(sig_chain_warning=None) 1021 else: 1022 log.Notice(ngettext(u"Found old backup chain at the following time:", 1023 u"Found old backup chains at the following times:", 1024 len(chainlist)) + 1025 u"\n" + chain_times_str(chainlist) + u"\n" + 1026 _(u"Rerun command with --force option to actually delete.")) 1027 1028 1029def replicate(): 1030 u""" 1031 Replicate backup files from one remote to another, possibly encrypting or adding parity. 1032 1033 @rtype: void 1034 @return: void 1035 """ 1036 action = u"replicate" 1037 time = config.restore_time or dup_time.curtime 1038 src_stats = dup_collections.CollectionsStatus(config.src_backend, None, action).set_values(sig_chain_warning=None) 1039 tgt_stats = dup_collections.CollectionsStatus(config.backend, None, action).set_values(sig_chain_warning=None) 1040 1041 src_list = config.src_backend.list() 1042 tgt_list = config.backend.list() 1043 1044 src_chainlist = src_stats.get_signature_chains(local=False, filelist=src_list)[0] 1045 tgt_chainlist = tgt_stats.get_signature_chains(local=False, filelist=tgt_list)[0] 1046 sorted(src_chainlist, key=lambda chain: chain.start_time) 1047 sorted(tgt_chainlist, key=lambda chain: chain.start_time) 1048 if not src_chainlist: 1049 log.Notice(_(u"No old backup sets found.")) 1050 return 1051 for src_chain in src_chainlist: 1052 try: 1053 tgt_chain = list([chain for chain in tgt_chainlist if chain.start_time == src_chain.start_time])[0] 1054 except IndexError: 1055 tgt_chain = None 1056 1057 tgt_sigs = list(map(file_naming.parse, tgt_chain.get_filenames())) if tgt_chain else [] 1058 for src_sig_filename in src_chain.get_filenames(): 1059 src_sig = file_naming.parse(src_sig_filename) 1060 if not (src_sig.time or src_sig.end_time) < time: 1061 continue 1062 try: 1063 tgt_sigs.remove(src_sig) 1064 log.Info(_(u"Signature %s already replicated") % (src_sig_filename,)) 1065 continue 1066 except ValueError: 1067 pass 1068 if src_sig.type == u'new-sig': 1069 dup_time.setprevtime(src_sig.start_time) 1070 dup_time.setcurtime(src_sig.time or src_sig.end_time) 1071 log.Notice(_(u"Replicating %s.") % (src_sig_filename,)) 1072 fileobj = config.src_backend.get_fileobj_read(src_sig_filename) 1073 filename = file_naming.get(src_sig.type, encrypted=config.encryption, gzipped=config.compression) 1074 tdp = dup_temp.new_tempduppath(file_naming.parse(filename)) 1075 tmpobj = tdp.filtered_open(mode=u'wb') 1076 util.copyfileobj(fileobj, tmpobj) # decrypt, compress, (re)-encrypt 1077 fileobj.close() 1078 tmpobj.close() 1079 config.backend.put(tdp, filename) 1080 tdp.delete() 1081 1082 src_chainlist = src_stats.get_backup_chains(filename_list=src_list)[0] 1083 tgt_chainlist = tgt_stats.get_backup_chains(filename_list=tgt_list)[0] 1084 sorted(src_chainlist, key=lambda chain: chain.start_time) 1085 sorted(tgt_chainlist, key=lambda chain: chain.start_time) 1086 for src_chain in src_chainlist: 1087 try: 1088 tgt_chain = list([chain for chain in tgt_chainlist if chain.start_time == src_chain.start_time])[0] 1089 except IndexError: 1090 tgt_chain = None 1091 1092 tgt_sets = tgt_chain.get_all_sets() if tgt_chain else [] 1093 for src_set in src_chain.get_all_sets(): 1094 if not src_set.get_time() < time: 1095 continue 1096 try: 1097 tgt_sets.remove(src_set) 1098 log.Info(_(u"Backupset %s already replicated") % (src_set.remote_manifest_name,)) 1099 continue 1100 except ValueError: 1101 pass 1102 if src_set.type == u'inc': 1103 dup_time.setprevtime(src_set.start_time) 1104 dup_time.setcurtime(src_set.get_time()) 1105 rmf = src_set.get_remote_manifest() 1106 mf_filename = file_naming.get(src_set.type, manifest=True) 1107 mf_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_filename)) 1108 mf = manifest.Manifest(fh=mf_tdp.filtered_open(mode=u'wb')) 1109 for i, filename in list(src_set.volume_name_dict.items()): 1110 log.Notice(_(u"Replicating %s.") % (filename,)) 1111 fileobj = restore_get_enc_fileobj(config.src_backend, filename, rmf.volume_info_dict[i]) 1112 filename = file_naming.get(src_set.type, i, encrypted=config.encryption, gzipped=config.compression) 1113 tdp = dup_temp.new_tempduppath(file_naming.parse(filename)) 1114 tmpobj = tdp.filtered_open(mode=u'wb') 1115 util.copyfileobj(fileobj, tmpobj) # decrypt, compress, (re)-encrypt 1116 fileobj.close() 1117 tmpobj.close() 1118 config.backend.put(tdp, filename) 1119 1120 vi = copy.copy(rmf.volume_info_dict[i]) 1121 vi.set_hash(u"SHA1", gpg.get_hash(u"SHA1", tdp)) 1122 mf.add_volume_info(vi) 1123 1124 tdp.delete() 1125 1126 mf.fh.close() 1127 # incremental GPG writes hang on close, so do any encryption here at once 1128 mf_fileobj = mf_tdp.filtered_open_with_delete(mode=u'rb') 1129 mf_final_filename = file_naming.get(src_set.type, 1130 manifest=True, 1131 encrypted=config.encryption, 1132 gzipped=config.compression) 1133 mf_final_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_final_filename)) 1134 mf_final_fileobj = mf_final_tdp.filtered_open(mode=u'wb') 1135 util.copyfileobj(mf_fileobj, mf_final_fileobj) # compress, encrypt 1136 mf_fileobj.close() 1137 mf_final_fileobj.close() 1138 config.backend.put(mf_final_tdp, mf_final_filename) 1139 mf_final_tdp.delete() 1140 1141 config.src_backend.close() 1142 config.backend.close() 1143 1144 1145def sync_archive(col_stats): 1146 u""" 1147 Synchronize local archive manifest file and sig chains to remote archives. 1148 Copy missing files from remote to local as needed to make sure the local 1149 archive is synchronized to remote storage. 1150 1151 @rtype: void 1152 @return: void 1153 """ 1154 suffixes = [b".g", b".gpg", b".z", b".gz", b".part"] 1155 1156 def is_needed(filename): 1157 u"""Indicates if the metadata file should be synced. 1158 1159 In full sync mode, or if there's a collection misbehavior, all files 1160 are needed. 1161 1162 Otherwise, only the metadata for the target chain needs sync. 1163 """ 1164 if config.metadata_sync_mode == u"full": 1165 return True 1166 assert config.metadata_sync_mode == u"partial" 1167 parsed = file_naming.parse(filename) 1168 try: 1169 target_chain = col_stats.get_backup_chain_at_time( 1170 config.restore_time or dup_time.curtime) 1171 except dup_collections.CollectionsError: 1172 # With zero or multiple chains at this time, do a full sync 1173 return True 1174 if parsed.start_time is None and parsed.end_time is None: 1175 start_time = end_time = parsed.time 1176 else: 1177 start_time = parsed.start_time 1178 end_time = parsed.end_time 1179 1180 return end_time >= target_chain.start_time and \ 1181 start_time <= target_chain.end_time 1182 1183 def get_metafiles(filelist): 1184 u""" 1185 Return metafiles of interest from the file list. 1186 Files of interest are: 1187 sigtar - signature files 1188 manifest - signature files 1189 duplicity partial versions of the above 1190 Files excluded are: 1191 non-duplicity files 1192 1193 @rtype: list 1194 @return: list of duplicity metadata files 1195 """ 1196 metafiles = {} 1197 partials = {} 1198 need_passphrase = False 1199 for fn in filelist: 1200 pr = file_naming.parse(fn) 1201 if not pr: 1202 continue 1203 if pr.encrypted: 1204 need_passphrase = True 1205 if pr.type in [u"full-sig", u"new-sig"] or pr.manifest: 1206 base, ext = os.path.splitext(fn) 1207 if ext not in suffixes: 1208 base = fn 1209 if pr.partial: 1210 partials[base] = fn 1211 else: 1212 metafiles[base] = fn 1213 return metafiles, partials, need_passphrase 1214 1215 def copy_raw(src_iter, filename): 1216 u""" 1217 Copy data from src_iter to file at fn 1218 """ 1219 file = open(filename, u"wb") 1220 while True: 1221 try: 1222 data = src_iter.__next__().data 1223 except StopIteration: 1224 break 1225 file.write(data) 1226 file.close() 1227 1228 def resolve_basename(fn): 1229 u""" 1230 @return: (parsedresult, local_name, remote_name) 1231 """ 1232 pr = file_naming.parse(fn) 1233 1234 base, ext = os.path.splitext(fn) 1235 if ext not in suffixes: 1236 base = fn 1237 1238 suffix = file_naming.get_suffix(False, not pr.manifest) 1239 loc_name = base + suffix 1240 1241 return (pr, loc_name, fn) 1242 1243 def remove_local(fn): 1244 del_name = config.archive_dir_path.append(fn).name 1245 1246 log.Notice(_(u"Deleting local %s (not authoritative at backend).") % 1247 util.fsdecode(del_name)) 1248 try: 1249 util.ignore_missing(os.unlink, del_name) 1250 except Exception as e: 1251 log.Warn(_(u"Unable to delete %s: %s") % (util.fsdecode(del_name), 1252 util.uexc(e))) 1253 1254 def copy_to_local(fn): 1255 u""" 1256 Copy remote file fn to local cache. 1257 """ 1258 class Block(object): 1259 u""" 1260 Data block to return from SrcIter 1261 """ 1262 1263 def __init__(self, data): 1264 self.data = data 1265 1266 class SrcIter(object): 1267 u""" 1268 Iterate over source and return Block of data. 1269 """ 1270 1271 def __init__(self, fileobj): 1272 self.fileobj = fileobj 1273 1274 def __next__(self): 1275 try: 1276 res = Block(self.fileobj.read(self.get_read_size())) 1277 except Exception: 1278 if hasattr(self.fileobj, u'name'): 1279 name = self.fileobj.name 1280 # name may be a path 1281 if hasattr(name, u'name'): 1282 name = name.name 1283 else: 1284 name = None 1285 log.FatalError(_(u"Failed to read %s: %s") % 1286 (util.fsdecode(name), sys.exc_info()), 1287 log.ErrorCode.generic) 1288 if not res.data: 1289 self.fileobj.close() 1290 raise StopIteration 1291 return res 1292 1293 def get_read_size(self): 1294 return 128 * 1024 1295 1296 def get_footer(self): 1297 return b"" 1298 1299 log.Notice(_(u"Copying %s to local cache.") % util.fsdecode(fn)) 1300 1301 pr, loc_name, rem_name = resolve_basename(fn) 1302 1303 fileobj = config.backend.get_fileobj_read(fn) 1304 src_iter = SrcIter(fileobj) 1305 tdp = dup_temp.new_tempduppath(file_naming.parse(loc_name)) 1306 if pr.manifest: 1307 copy_raw(src_iter, tdp.name) 1308 else: 1309 gpg.GzipWriteFile(src_iter, tdp.name, size=sys.maxsize) 1310 tdp.setdata() 1311 tdp.move(config.archive_dir_path.append(loc_name)) 1312 1313 # get remote metafile list 1314 remlist = config.backend.list() 1315 remote_metafiles, ignored, rem_needpass = get_metafiles(remlist) 1316 1317 # get local metafile list 1318 loclist = config.archive_dir_path.listdir() 1319 local_metafiles, local_partials, loc_needpass = get_metafiles(loclist) 1320 1321 # we have the list of metafiles on both sides. remote is always 1322 # authoritative. figure out which are local spurious (should not 1323 # be there) and missing (should be there but are not). 1324 local_keys = list(local_metafiles.keys()) 1325 remote_keys = list(remote_metafiles.keys()) 1326 1327 local_missing = [] 1328 local_spurious = [] 1329 1330 for key in remote_keys: 1331 # If we lost our cache, re-get the remote file. But don't do it if we 1332 # already have a local partial. The local partial will already be 1333 # complete in this case (seems we got interrupted before we could move 1334 # it to its final location). 1335 if key not in local_keys and key not in local_partials and is_needed(key): 1336 local_missing.append(remote_metafiles[key]) 1337 1338 for key in local_keys: 1339 # If we have a file locally that is unnecessary, delete it. Also 1340 # delete final versions of partial files because if we have both, it 1341 # means the write of the final version got interrupted. 1342 if key not in remote_keys or key in local_partials: 1343 local_spurious.append(local_metafiles[key]) 1344 1345 # finally finish the process 1346 if not local_missing and not local_spurious: 1347 log.Notice(_(u"Local and Remote metadata are synchronized, no sync needed.")) 1348 else: 1349 local_missing.sort() 1350 local_spurious.sort() 1351 if not config.dry_run: 1352 log.Notice(_(u"Synchronizing remote metadata to local cache...")) 1353 if local_missing and (rem_needpass or loc_needpass): 1354 # password for the --encrypt-key 1355 config.gpg_profile.passphrase = get_passphrase(1, u"sync") 1356 for fn in local_spurious: 1357 remove_local(fn) 1358 if hasattr(config.backend, u'pre_process_download'): 1359 config.backend.pre_process_download(local_missing) 1360 for fn in local_missing: 1361 copy_to_local(fn) 1362 col_stats.set_values() 1363 else: 1364 if local_missing: 1365 log.Notice(_(u"Sync would copy the following from remote to local:") + 1366 u"\n" + u"\n".join(map(util.fsdecode, local_missing))) 1367 if local_spurious: 1368 log.Notice(_(u"Sync would remove the following spurious local files:") + 1369 u"\n" + u"\n".join(map(util.fsdecode, local_spurious))) 1370 1371 1372def check_last_manifest(col_stats): 1373 u""" 1374 Check consistency and hostname/directory of last manifest 1375 1376 @type col_stats: CollectionStatus object 1377 @param col_stats: collection status 1378 1379 @rtype: void 1380 @return: void 1381 """ 1382 assert col_stats.all_backup_chains 1383 last_backup_set = col_stats.all_backup_chains[-1].get_last() 1384 # check remote manifest only if we can decrypt it (see #1729796) 1385 check_remote = not config.encryption or config.gpg_profile.passphrase 1386 last_backup_set.check_manifests(check_remote=check_remote) 1387 1388 1389def check_resources(action): 1390 u""" 1391 Check for sufficient resources: 1392 - temp space for volume build 1393 - enough max open files 1394 Put out fatal error if not sufficient to run 1395 1396 @type action: string 1397 @param action: action in progress 1398 1399 @rtype: void 1400 @return: void 1401 """ 1402 if action in [u"full", u"inc", u"restore"]: 1403 # Make sure we have enough resouces to run 1404 # First check disk space in temp area. 1405 tempfile, tempname = tempdir.default().mkstemp() 1406 os.close(tempfile) 1407 # strip off the temp dir and file 1408 tempfs = os.path.sep.join(tempname.split(os.path.sep)[:-2]) 1409 try: 1410 stats = os.statvfs(tempfs) 1411 except Exception: 1412 log.FatalError(_(u"Unable to get free space on temp."), 1413 log.ErrorCode.get_freespace_failed) 1414 # Calculate space we need for at least 2 volumes of full or inc 1415 # plus about 30% of one volume for the signature files. 1416 freespace = stats.f_frsize * stats.f_bavail 1417 needspace = (((config.async_concurrency + 1) * config.volsize) + 1418 int(0.30 * config.volsize)) 1419 if freespace < needspace: 1420 log.FatalError(_(u"Temp space has %d available, backup needs approx %d.") % 1421 (freespace, needspace), log.ErrorCode.not_enough_freespace) 1422 else: 1423 log.Info(_(u"Temp has %d available, backup will use approx %d.") % 1424 (freespace, needspace)) 1425 1426 # Some environments like Cygwin run with an artificially 1427 # low value for max open files. Check for safe number. 1428 try: 1429 soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) 1430 except resource.error: 1431 log.FatalError(_(u"Unable to get max open files."), 1432 log.ErrorCode.get_ulimit_failed) 1433 maxopen = min([l for l in (soft, hard) if l > -1]) 1434 if maxopen < 1024: 1435 log.FatalError(_(u"Max open files of %s is too low, should be >= 1024.\n" 1436 u"Use 'ulimit -n 1024' or higher to correct.\n") % (maxopen,), 1437 log.ErrorCode.maxopen_too_low) 1438 1439 1440def log_startup_parms(verbosity=log.INFO): 1441 u""" 1442 log Python, duplicity, and system versions 1443 """ 1444 log.Log(u'=' * 80, verbosity) 1445 log.Log(u"duplicity %s" % __version__, verbosity) 1446 u_args = (util.fsdecode(arg) for arg in sys.argv) 1447 log.Log(u"Args: %s" % u' '.join(u_args), verbosity) 1448 log.Log(u' '.join(platform.uname()), verbosity) 1449 log.Log(u"%s %s" % (sys.executable or sys.platform, sys.version), verbosity) 1450 log.Log(u'=' * 80, verbosity) 1451 1452 1453class Restart(object): 1454 u""" 1455 Class to aid in restart of inc or full backup. 1456 Instance in config.restart if restart in progress. 1457 """ 1458 1459 def __init__(self, last_backup): 1460 self.type = None 1461 self.start_time = None 1462 self.end_time = None 1463 self.start_vol = None 1464 self.last_index = None 1465 self.last_block = None 1466 self.last_backup = last_backup 1467 self.setParms(last_backup) 1468 1469 def setParms(self, last_backup): 1470 if last_backup.time: 1471 self.type = u"full" 1472 self.time = last_backup.time 1473 else: 1474 self.type = u"inc" 1475 self.end_time = last_backup.end_time 1476 self.start_time = last_backup.start_time 1477 # We start one volume back in case we weren't able to finish writing 1478 # the most recent block. Actually checking if we did (via hash) would 1479 # involve downloading the block. Easier to just redo one block. 1480 self.start_vol = max(len(last_backup) - 1, 0) 1481 1482 def checkManifest(self, mf): 1483 mf_len = len(mf.volume_info_dict) 1484 if (mf_len != self.start_vol) or not (mf_len and self.start_vol): 1485 if self.start_vol == 0: 1486 # upload of 1st vol failed, clean and restart 1487 log.Notice(_(u"RESTART: The first volume failed to upload before termination.\n" 1488 u" Restart is impossible...starting backup from beginning.")) 1489 self.last_backup.delete() 1490 os.execve(sys.argv[0], sys.argv, os.environ) 1491 elif mf_len - self.start_vol > 0: 1492 # upload of N vols failed, fix manifest and restart 1493 log.Notice(_(u"RESTART: Volumes %d to %d failed to upload before termination.\n" 1494 u" Restarting backup at volume %d.") % 1495 (self.start_vol + 1, mf_len, self.start_vol + 1)) 1496 for vol in range(self.start_vol + 1, mf_len + 1): 1497 mf.del_volume_info(vol) 1498 else: 1499 # this is an 'impossible' state, remove last partial and restart 1500 log.Notice(_(u"RESTART: Impossible backup state: manifest has %d vols, remote has %d vols.\n" 1501 u" Restart is impossible ... duplicity will clean off the last partial\n" 1502 u" backup then restart the backup from the beginning.") % 1503 (mf_len, self.start_vol)) 1504 self.last_backup.delete() 1505 os.execve(sys.argv[0], sys.argv, os.environ) 1506 1507 def setLastSaved(self, mf): 1508 vi = mf.volume_info_dict[self.start_vol] 1509 self.last_index = vi.end_index 1510 self.last_block = vi.end_block or 0 1511 1512 1513def main(): 1514 u""" 1515 Start/end here 1516 """ 1517 # per bug https://bugs.launchpad.net/duplicity/+bug/931175 1518 # duplicity crashes when PYTHONOPTIMIZE is set, so check 1519 # and refuse to run if it is set. 1520 if u'PYTHONOPTIMIZE' in os.environ: 1521 log.FatalError(_(u""" 1522PYTHONOPTIMIZE in the environment causes duplicity to fail to 1523recognize its own backups. Please remove PYTHONOPTIMIZE from 1524the environment and rerun the backup. 1525 1526See https://bugs.launchpad.net/duplicity/+bug/931175 1527"""), log.ErrorCode.pythonoptimize_set) 1528 1529 # if python is run setuid, it's only partway set, 1530 # so make sure to run with euid/egid of root 1531 if os.geteuid() == 0: 1532 # make sure uid/gid match euid/egid 1533 os.setuid(os.geteuid()) 1534 os.setgid(os.getegid()) 1535 1536 # set the current time strings (make it available for command line processing) 1537 dup_time.setcurtime() 1538 1539 # determine what action we're performing and process command line 1540 action = commandline.ProcessCommandLine(sys.argv[1:]) 1541 1542 config.lockpath = os.path.join(config.archive_dir_path.name, b"lockfile") 1543 config.lockfile = fasteners.process_lock.InterProcessLock(config.lockpath) 1544 log.Debug(_(u"Acquiring lockfile %s") % config.lockpath) 1545 if not config.lockfile.acquire(blocking=False): 1546 log.FatalError( 1547 u"Another duplicity instance is already running with this archive directory\n", 1548 log.ErrorCode.user_error) 1549 log.shutdown() 1550 sys.exit(2) 1551 1552 try: 1553 do_backup(action) 1554 1555 finally: 1556 util.release_lockfile() 1557 1558 1559def do_backup(action): 1560 # set the current time strings again now that we have time separator 1561 if config.current_time: 1562 dup_time.setcurtime(config.current_time) 1563 else: 1564 dup_time.setcurtime() 1565 1566 # log some debugging status info 1567 log_startup_parms(log.INFO) 1568 1569 # check for disk space and available file handles 1570 check_resources(action) 1571 1572 # get current collection status 1573 col_stats = dup_collections.CollectionsStatus(config.backend, 1574 config.archive_dir_path, 1575 action).set_values() 1576 1577 # check archive synch with remote, fix if needed 1578 if action not in [u"collection-status", 1579 u"remove-all-but-n-full", 1580 u"remove-all-inc-of-but-n-full", 1581 u"remove-old", 1582 u"replicate", 1583 ]: 1584 sync_archive(col_stats) 1585 1586 while True: 1587 # if we have to clean up the last partial, then col_stats are invalidated 1588 # and we have to start the process all over again until clean. 1589 if action in [u"full", u"inc", u"cleanup"]: 1590 last_full_chain = col_stats.get_last_backup_chain() 1591 if not last_full_chain: 1592 break 1593 last_backup = last_full_chain.get_last() 1594 if last_backup.partial: 1595 if action in [u"full", u"inc"]: 1596 # set restart parms from last_backup info 1597 config.restart = Restart(last_backup) 1598 # (possibly) reset action 1599 action = config.restart.type 1600 # reset the time strings 1601 if action == u"full": 1602 dup_time.setcurtime(config.restart.time) 1603 else: 1604 dup_time.setcurtime(config.restart.end_time) 1605 dup_time.setprevtime(config.restart.start_time) 1606 # log it -- main restart heavy lifting is done in write_multivol 1607 log.Notice(_(u"Last %s backup left a partial set, restarting." % action)) 1608 break 1609 else: 1610 # remove last partial backup and get new collection status 1611 log.Notice(_(u"Cleaning up previous partial %s backup set, restarting." % action)) 1612 last_backup.delete() 1613 col_stats = dup_collections.CollectionsStatus(config.backend, 1614 config.archive_dir_path, 1615 action).set_values() 1616 continue 1617 break 1618 break 1619 1620 # OK, now we have a stable collection 1621 last_full_time = col_stats.get_last_full_backup_time() 1622 if last_full_time > 0: 1623 log.Notice(_(u"Last full backup date:") + u" " + dup_time.timetopretty(last_full_time)) 1624 else: 1625 log.Notice(_(u"Last full backup date: none")) 1626 if not config.restart and action == u"inc" and config.full_force_time is not None and \ 1627 last_full_time < config.full_force_time: 1628 log.Notice(_(u"Last full backup is too old, forcing full backup")) 1629 action = u"full" 1630 log.PrintCollectionStatus(col_stats) 1631 1632 # get the passphrase if we need to based on action/options 1633 config.gpg_profile.passphrase = get_passphrase(1, action) 1634 1635 if action == u"restore": 1636 restore(col_stats) 1637 elif action == u"verify": 1638 verify(col_stats) 1639 elif action == u"list-current": 1640 list_current(col_stats) 1641 elif action == u"collection-status": 1642 if not config.file_changed: 1643 log.PrintCollectionStatus(col_stats, True) 1644 else: 1645 log.PrintCollectionFileChangedStatus(col_stats, config.file_changed, True) 1646 elif action == u"cleanup": 1647 cleanup(col_stats) 1648 elif action == u"remove-old": 1649 remove_old(col_stats) 1650 elif action == u"remove-all-but-n-full" or action == u"remove-all-inc-of-but-n-full": 1651 remove_all_but_n_full(col_stats) 1652 elif action == u"sync": 1653 sync_archive(col_stats) 1654 elif action == u"replicate": 1655 replicate() 1656 else: 1657 assert action == u"inc" or action == u"full", action 1658 # the passphrase for full and inc is used by --sign-key 1659 # the sign key can have a different passphrase than the encrypt 1660 # key, therefore request a passphrase 1661 if config.gpg_profile.sign_key: 1662 config.gpg_profile.signing_passphrase = get_passphrase(1, action, True) 1663 1664 # if there are no recipients (no --encrypt-key), it must be a 1665 # symmetric key. Therefore, confirm the passphrase 1666 if not (config.gpg_profile.recipients or config.gpg_profile.hidden_recipients): 1667 config.gpg_profile.passphrase = get_passphrase(2, action) 1668 # a limitation in the GPG implementation does not allow for 1669 # inputting different passphrases, this affects symmetric+sign. 1670 # Allow an empty passphrase for the key though to allow a non-empty 1671 # symmetric key 1672 if (config.gpg_profile.signing_passphrase and 1673 config.gpg_profile.passphrase != config.gpg_profile.signing_passphrase): 1674 log.FatalError(_( 1675 u"When using symmetric encryption, the signing passphrase " 1676 u"must equal the encryption passphrase."), 1677 log.ErrorCode.user_error) 1678 1679 if action == u"full": 1680 full_backup(col_stats) 1681 else: # attempt incremental 1682 sig_chain = check_sig_chain(col_stats) 1683 # action == "inc" was requested, but no full backup is available 1684 if not sig_chain: 1685 full_backup(col_stats) 1686 else: 1687 if not config.restart: 1688 # only ask for a passphrase if there was a previous backup 1689 if col_stats.all_backup_chains: 1690 config.gpg_profile.passphrase = get_passphrase(1, action) 1691 check_last_manifest(col_stats) # not needed for full backups 1692 incremental_backup(sig_chain) 1693 config.backend.close() 1694 log.shutdown() 1695 if exit_val is not None: 1696 sys.exit(exit_val) 1697