1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*- 2# 3# Copyright 2002 Ben Escoto <ben@emerose.org> 4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 5# 6# This file is part of duplicity. 7# 8# Duplicity is free software; you can redistribute it and/or modify it 9# under the terms of the GNU General Public License as published by the 10# Free Software Foundation; either version 2 of the License, or (at your 11# option) any later version. 12# 13# Duplicity is distributed in the hope that it will be useful, but 14# WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16# General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with duplicity; if not, write to the Free Software Foundation, 20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 22u"""Classes and functions on collections of backup volumes""" 23 24from builtins import str 25from builtins import zip 26from builtins import map 27from builtins import range 28from builtins import object 29 30import sys 31 32from duplicity import log 33from duplicity import file_naming 34from duplicity import path 35from duplicity import util 36from duplicity import dup_time 37from duplicity import config 38from duplicity import manifest 39from duplicity import util 40from duplicity.gpg import GPGError 41 42# For type testing against both int and long types that works in python 2/3 43if sys.version_info < (3,): 44 integer_types = (int, int) 45else: 46 integer_types = (int,) 47 48 49class CollectionsError(Exception): 50 pass 51 52 53class BackupSet(object): 54 u""" 55 Backup set - the backup information produced by one session 56 """ 57 def __init__(self, backend, action): 58 u""" 59 Initialize new backup set, only backend is required at first 60 """ 61 self.backend = backend 62 self.info_set = False # true if fields are set 63 self.volume_name_dict = {} # dict from volume number to filename 64 self.remote_manifest_name = None # full name of remote manifest 65 self.local_manifest_path = None # full path to local manifest 66 self.time = None # will be set if is full backup set 67 self.start_time = None # will be set if inc 68 self.end_time = None # will be set if inc 69 self.partial = False # true if a partial backup 70 self.encrypted = False # true if an encrypted backup 71 self.files_changed = [] 72 self.action = action 73 74 def is_complete(self): 75 u""" 76 Assume complete if found manifest file 77 """ 78 return self.remote_manifest_name 79 80 def add_filename(self, filename, pr=None): 81 u""" 82 Add a filename to given set. Return true if it fits. 83 84 The filename will match the given set if it has the right 85 times and is of the right type. The information will be set 86 from the first filename given. 87 88 @param filename: name of file to add 89 @type filename: string 90 91 @param pr: pre-computed result of file_naming.parse(filename) 92 @type pr: Optional[ParseResults] 93 """ 94 if not pr: 95 pr = file_naming.parse(filename) 96 if not pr or not (pr.type == u"full" or pr.type == u"inc"): 97 return False 98 99 if not self.info_set: 100 self.set_info(pr) 101 else: 102 if pr.type != self.type: 103 return False 104 if pr.time != self.time: 105 return False 106 if (pr.start_time != self.start_time or 107 pr.end_time != self.end_time): 108 return False 109 if bool(pr.encrypted) != bool(self.encrypted): 110 if self.partial and pr.encrypted: 111 self.encrypted = pr.encrypted 112 113 if pr.manifest: 114 self.set_manifest(filename) 115 else: 116 assert pr.volume_number is not None 117 assert pr.volume_number not in self.volume_name_dict, \ 118 (self.volume_name_dict, filename) 119 self.volume_name_dict[pr.volume_number] = filename 120 121 return True 122 123 def set_info(self, pr): 124 u""" 125 Set BackupSet information from ParseResults object 126 127 @param pr: parse results 128 @type pf: ParseResults 129 """ 130 assert not self.info_set 131 self.type = pr.type 132 self.time = pr.time 133 self.start_time = pr.start_time 134 self.end_time = pr.end_time 135 self.time = pr.time 136 self.partial = pr.partial 137 self.encrypted = bool(pr.encrypted) 138 self.info_set = True 139 140 def set_files_changed(self): 141 mf = self.get_manifest() 142 self.files_changed = mf.get_files_changed() 143 144 def set_manifest(self, remote_filename): 145 u""" 146 Add local and remote manifest filenames to backup set 147 """ 148 assert not self.remote_manifest_name, \ 149 u"Cannot set filename of remote manifest to %s; already set to %s." % ( 150 remote_filename, 151 self.remote_manifest_name, 152 ) 153 self.remote_manifest_name = remote_filename 154 155 if self.action != u"replicate": 156 local_filename_list = config.archive_dir_path.listdir() 157 else: 158 local_filename_list = [] 159 for local_filename in local_filename_list: 160 pr = file_naming.parse(local_filename) 161 if (pr and pr.manifest and pr.type == self.type and 162 pr.time == self.time and 163 pr.start_time == self.start_time and 164 pr.end_time == self.end_time): 165 self.local_manifest_path = \ 166 config.archive_dir_path.append(local_filename) 167 168 self.set_files_changed() 169 break 170 171 def delete(self): 172 u""" 173 Remove all files in set, both local and remote 174 """ 175 rfn = self.get_filenames() 176 rfn.reverse() 177 try: 178 self.backend.delete(rfn) 179 except Exception: 180 log.Debug(_(u"BackupSet.delete: missing %s") % [util.fsdecode(f) for f in rfn]) 181 pass 182 if self.action != u"replicate": 183 local_filename_list = config.archive_dir_path.listdir() 184 else: 185 local_filename_list = [] 186 for lfn in local_filename_list: 187 pr = file_naming.parse(lfn) 188 if (pr and pr.time == self.time and 189 pr.start_time == self.start_time and 190 pr.end_time == self.end_time): 191 try: 192 config.archive_dir_path.append(lfn).delete() 193 except Exception: 194 log.Debug(_(u"BackupSet.delete: missing %s") % [util.fsdecode(f) for f in lfn]) 195 pass 196 util.release_lockfile() 197 198 def __str__(self): 199 u""" 200 For now just list files in set 201 """ 202 filelist = [] 203 if self.remote_manifest_name: 204 filelist.append(self.remote_manifest_name) 205 filelist.extend(list(self.volume_name_dict.values())) 206 return u"[%s]" % u", ".join(map(util.fsdecode, filelist)) 207 208 def get_timestr(self): 209 u""" 210 Return time string suitable for log statements 211 """ 212 return dup_time.timetopretty(self.time or self.end_time) 213 214 def check_manifests(self, check_remote=True): 215 u""" 216 Make sure remote manifest is equal to local one 217 """ 218 if not self.remote_manifest_name and not self.local_manifest_path: 219 log.FatalError(_(u"Fatal Error: No manifests found for most recent backup"), 220 log.ErrorCode.no_manifests) 221 assert self.remote_manifest_name, u"if only one, should be remote" 222 223 remote_manifest = self.get_remote_manifest() if check_remote else None 224 if self.local_manifest_path: 225 local_manifest = self.get_local_manifest() 226 if remote_manifest and self.local_manifest_path and local_manifest: 227 if remote_manifest != local_manifest: 228 log.FatalError(_(u"Fatal Error: Remote manifest does not match " 229 u"local one. Either the remote backup set or " 230 u"the local archive directory has been corrupted."), 231 log.ErrorCode.mismatched_manifests) 232 if not remote_manifest: 233 if self.local_manifest_path: 234 remote_manifest = local_manifest 235 else: 236 log.FatalError(_(u"Fatal Error: Neither remote nor local " 237 u"manifest is readable."), 238 log.ErrorCode.unreadable_manifests) 239 remote_manifest.check_dirinfo() 240 241 def get_local_manifest(self): 242 u""" 243 Return manifest object by reading local manifest file 244 """ 245 assert self.local_manifest_path 246 manifest_buffer = self.local_manifest_path.get_data() 247 log.Info(_(u"Processing local manifest %s (%s)") % ( 248 self.local_manifest_path.name, len(manifest_buffer))) 249 return manifest.Manifest().from_string(manifest_buffer) 250 251 def get_remote_manifest(self): 252 u""" 253 Return manifest by reading remote manifest on backend 254 """ 255 assert self.remote_manifest_name 256 try: 257 manifest_buffer = self.backend.get_data(self.remote_manifest_name) 258 except GPGError as message: 259 log.Error(_(u"Error processing remote manifest (%s): %s") % 260 (util.fsdecode(self.remote_manifest_name), util.uexc(message))) 261 return None 262 log.Info(_(u"Processing remote manifest %s (%s)") % ( 263 util.fsdecode(self.remote_manifest_name), len(manifest_buffer))) 264 return manifest.Manifest().from_string(manifest_buffer) 265 266 def get_manifest(self): 267 u""" 268 Return manifest object, showing preference for local copy 269 """ 270 if self.local_manifest_path: 271 return self.get_local_manifest() 272 else: 273 return self.get_remote_manifest() 274 275 def get_filenames(self): 276 u""" 277 Return sorted list of (remote) filenames of files in set 278 """ 279 assert self.info_set 280 volume_num_list = list(self.volume_name_dict.keys()) 281 volume_num_list.sort() 282 volume_filenames = [self.volume_name_dict[x] for x in volume_num_list] 283 if self.remote_manifest_name: 284 # For convenience of implementation for restart support, we treat 285 # local partial manifests as this set's remote manifest. But 286 # when specifically asked for a list of remote filenames, we 287 # should not include it. 288 pr = file_naming.parse(self.remote_manifest_name) 289 if pr and not pr.partial: 290 volume_filenames.append(self.remote_manifest_name) 291 return volume_filenames 292 293 def get_time(self): 294 u""" 295 Return time if full backup, or end_time if incremental 296 """ 297 if self.time: 298 return self.time 299 if self.end_time: 300 return self.end_time 301 assert 0, u"Neither self.time nor self.end_time set" 302 303 def get_files_changed(self): 304 return self.files_changed 305 306 def __len__(self): 307 u""" 308 Return the number of volumes in the set 309 """ 310 return len(list(self.volume_name_dict.keys())) 311 312 def __eq__(self, other): 313 u""" 314 Return whether this backup set is equal to other 315 """ 316 return self.type == other.type and \ 317 self.time == other.time and \ 318 self.start_time == other.start_time and \ 319 self.end_time == other.end_time and \ 320 len(self) == len(other) 321 322 323class BackupChain(object): 324 u""" 325 BackupChain - a number of linked BackupSets 326 327 A BackupChain always starts with a full backup set and continues 328 with incremental ones. 329 """ 330 def __init__(self, backend): 331 u""" 332 Initialize new chain, only backend is required at first 333 """ 334 self.backend = backend 335 self.fullset = None 336 self.incset_list = [] # sorted list of BackupSets 337 self.start_time, self.end_time = None, None 338 339 def set_full(self, fullset): 340 u""" 341 Add full backup set 342 """ 343 assert not self.fullset and isinstance(fullset, BackupSet) 344 self.fullset = fullset 345 assert fullset.time 346 self.start_time, self.end_time = fullset.time, fullset.time 347 348 def add_inc(self, incset): 349 u""" 350 Add incset to self. Return False if incset does not match 351 """ 352 if self.end_time == incset.start_time: 353 self.incset_list.append(incset) 354 else: 355 if (self.incset_list and 356 incset.start_time == self.incset_list[-1].start_time and 357 incset.end_time > self.incset_list[-1].end_time): 358 log.Info(_(u"Preferring Backupset over previous one!")) 359 self.incset_list[-1] = incset 360 else: 361 log.Info(_(u"Ignoring incremental Backupset (start_time: %s; needed: %s)") % 362 (dup_time.timetopretty(incset.start_time), 363 dup_time.timetopretty(self.end_time))) 364 return False 365 self.end_time = incset.end_time 366 log.Info(_(u"Added incremental Backupset (start_time: %s / end_time: %s)") % 367 (dup_time.timetopretty(incset.start_time), 368 dup_time.timetopretty(incset.end_time))) 369 assert self.end_time 370 return True 371 372 def delete(self, keep_full=False): 373 u""" 374 Delete all sets in chain, in reverse order 375 """ 376 for i in range(len(self.incset_list) - 1, -1, -1): 377 self.incset_list[i].delete() 378 if self.fullset and not keep_full: 379 self.fullset.delete() 380 381 def get_sets_at_time(self, time): 382 u""" 383 Return a list of sets in chain earlier or equal to time 384 """ 385 older_incsets = [s for s in self.incset_list if s.end_time <= time] 386 return [self.fullset] + older_incsets 387 388 def get_last(self): 389 u""" 390 Return last BackupSet in chain 391 """ 392 if self.incset_list: 393 return self.incset_list[-1] 394 else: 395 return self.fullset 396 397 def get_first(self): 398 u""" 399 Return first BackupSet in chain (ie the full backup) 400 """ 401 return self.fullset 402 403 def short_desc(self): 404 u""" 405 Return a short one-line description of the chain, 406 suitable for log messages. 407 """ 408 return u"[%s]-[%s]" % (dup_time.timetopretty(self.start_time), 409 dup_time.timetopretty(self.end_time)) 410 411 def to_log_info(self, prefix=u''): 412 u""" 413 Return summary, suitable for printing to log 414 """ 415 l = [] 416 for s in self.get_all_sets(): 417 if s.time: 418 btype = u"full" 419 time = s.time 420 else: 421 btype = u"inc" 422 time = s.end_time 423 if s.encrypted: 424 enc = u"enc" 425 else: 426 enc = u"noenc" 427 l.append(u"%s%s %s %d %s" % (prefix, btype, dup_time.timetostring(time), (len(s)), enc)) 428 return l 429 430 def __str__(self): 431 u""" 432 Return string representation, for testing purposes 433 """ 434 set_schema = u"%20s %30s %15s" 435 l = [u"-------------------------", 436 _(u"Chain start time: ") + dup_time.timetopretty(self.start_time), 437 _(u"Chain end time: ") + dup_time.timetopretty(self.end_time), 438 _(u"Number of contained backup sets: %d") % 439 (len(self.incset_list) + 1,), 440 _(u"Total number of contained volumes: %d") % 441 (self.get_num_volumes(),), 442 set_schema % (_(u"Type of backup set:"), _(u"Time:"), _(u"Num volumes:"))] 443 444 for s in self.get_all_sets(): 445 if s.time: 446 btype = _(u"Full") 447 time = s.time 448 else: 449 btype = _(u"Incremental") 450 time = s.end_time 451 l.append(set_schema % (btype, dup_time.timetopretty(time), len(s))) 452 453 l.append(u"-------------------------") 454 return u"\n".join(l) 455 456 def get_num_volumes(self): 457 u""" 458 Return the total number of volumes in the chain 459 """ 460 n = 0 461 for s in self.get_all_sets(): 462 n += len(s) 463 return n 464 465 def get_all_sets(self): 466 u""" 467 Return list of all backup sets in chain 468 """ 469 if self.fullset: 470 return [self.fullset] + self.incset_list 471 else: 472 return self.incset_list 473 474 475class SignatureChain(object): 476 u""" 477 A number of linked SignatureSets 478 479 Analog to BackupChain - start with a full-sig, and continue with 480 new-sigs. 481 """ 482 def __init__(self, local, location): 483 u""" 484 Return new SignatureChain. 485 486 local should be true iff the signature chain resides in 487 config.archive_dir_path and false if the chain is in 488 config.backend. 489 490 @param local: True if sig chain in config.archive_dir_path 491 @type local: Boolean 492 493 @param location: Where the sig chain is located 494 @type location: config.archive_dir_path or config.backend 495 """ 496 if local: 497 self.archive_dir_path, self.backend = location, None 498 else: 499 self.archive_dir_path, self.backend = None, location 500 self.fullsig = None # filename of full signature 501 self.inclist = [] # list of filenames of incremental signatures 502 self.start_time, self.end_time = None, None 503 504 def __str__(self): 505 u""" 506 Local or Remote and List of files in the set 507 """ 508 if self.archive_dir_path: 509 place = _(u"local") 510 else: 511 place = _(u"remote") 512 filelist = [] 513 if self.fullsig: 514 filelist.append(self.fullsig) 515 filelist.extend(self.inclist) 516 return u"%s: [%s]" % (place, u", ".join(filelist)) 517 518 def check_times(self, time_list): 519 u""" 520 Check to make sure times are in whole seconds 521 """ 522 for time in time_list: 523 if type(time) not in integer_types: 524 assert 0, u"Time %s in %s wrong type" % (time, time_list) 525 526 def islocal(self): 527 u""" 528 Return true if represents a signature chain in archive_dir_path 529 """ 530 if self.archive_dir_path: 531 return True 532 else: 533 return False 534 535 def add_filename(self, filename, pr=None): 536 u""" 537 Add new sig filename to current chain. Return true if fits 538 """ 539 if not pr: 540 pr = file_naming.parse(filename) 541 if not pr: 542 return None 543 544 if self.fullsig: 545 if pr.type != u"new-sig": 546 return None 547 if pr.start_time != self.end_time: 548 return None 549 self.inclist.append(filename) 550 self.check_times([pr.end_time]) 551 self.end_time = pr.end_time 552 return 1 553 else: 554 if pr.type != u"full-sig": 555 return None 556 self.fullsig = filename 557 self.check_times([pr.time, pr.time]) 558 self.start_time, self.end_time = pr.time, pr.time 559 return 1 560 561 def get_fileobjs(self, time=None): 562 u""" 563 Return ordered list of signature fileobjs opened for reading, 564 optionally at a certain time 565 """ 566 assert self.fullsig 567 if self.archive_dir_path: # local 568 def filename_to_fileobj(filename): 569 u"""Open filename in archive_dir_path, return filtered fileobj""" 570 sig_dp = path.DupPath(self.archive_dir_path.name, (filename,)) 571 return sig_dp.filtered_open(u"rb") 572 else: 573 filename_to_fileobj = self.backend.get_fileobj_read 574 return [filename_to_fileobj(f) for f in self.get_filenames(time)] 575 576 def delete(self, keep_full=False): 577 u""" 578 Remove all files in signature set 579 """ 580 # Try to delete in opposite order, so something useful even if aborted 581 if self.archive_dir_path: 582 for i in range(len(self.inclist) - 1, -1, -1): 583 self.archive_dir_path.append(self.inclist[i]).delete() 584 if not keep_full: 585 self.archive_dir_path.append(self.fullsig).delete() 586 else: 587 assert self.backend 588 inclist_copy = self.inclist[:] 589 inclist_copy.reverse() 590 if not keep_full: 591 inclist_copy.append(self.fullsig) 592 self.backend.delete(inclist_copy) 593 594 def get_filenames(self, time=None): 595 u""" 596 Return ordered list of filenames in set, up to a provided time 597 """ 598 if self.fullsig: 599 l = [self.fullsig] 600 else: 601 l = [] 602 603 inclist = self.inclist 604 if time: 605 inclist = [n for n in inclist if file_naming.parse(n).end_time <= time] 606 607 l.extend(inclist) 608 return l 609 610 611class CollectionsStatus(object): 612 u""" 613 Hold information about available chains and sets 614 """ 615 def __init__(self, backend, archive_dir_path, action): 616 u""" 617 Make new object. Does not set values 618 """ 619 self.backend = backend 620 self.archive_dir_path = archive_dir_path 621 self.action = action 622 623 # Will hold (signature chain, backup chain) pair of active 624 # (most recent) chains 625 self.matched_chain_pair = None 626 627 # These should be sorted by end_time 628 self.all_backup_chains = None 629 self.other_backup_chains = None 630 self.all_sig_chains = None 631 632 # Other misc paths and sets which shouldn't be there 633 self.local_orphaned_sig_names = [] 634 self.remote_orphaned_sig_names = [] 635 self.orphaned_backup_sets = None 636 self.incomplete_backup_sets = None 637 638 # True if set_values() below has run 639 self.values_set = None 640 641 def to_log_info(self): 642 u""" 643 Return summary of the collection, suitable for printing to log 644 """ 645 l = [u"backend %s" % (self.backend.__class__.__name__,), 646 u"archive-dir %s" % (self.archive_dir_path,)] 647 648 for i in range(len(self.other_backup_chains)): 649 # A bit of a misnomer. Chain might have a sig. 650 l.append(u"chain-no-sig %d" % (i,)) 651 l += self.other_backup_chains[i].to_log_info(u' ') 652 653 if self.matched_chain_pair: 654 l.append(u"chain-complete") 655 l += self.matched_chain_pair[1].to_log_info(u' ') 656 657 l.append(u"orphaned-sets-num %d" % (len(self.orphaned_backup_sets),)) 658 l.append(u"incomplete-sets-num %d" % (len(self.incomplete_backup_sets),)) 659 660 return l 661 662 def __str__(self): 663 u""" 664 Return string summary of the collection 665 """ 666 l = [_(u"Collection Status"), 667 u"-----------------", 668 _(u"Connecting with backend: %s") % 669 (self.backend.__class__.__name__,), 670 _(u"Archive dir: %s") % (self.archive_dir_path.uc_name if self.archive_dir_path else u'None',)] 671 672 l.append(u"\n" + 673 ngettext(u"Found %d secondary backup chain.", 674 u"Found %d secondary backup chains.", 675 len(self.other_backup_chains)) 676 % len(self.other_backup_chains)) 677 for i in range(len(self.other_backup_chains)): 678 l.append(_(u"Secondary chain %d of %d:") % 679 (i + 1, len(self.other_backup_chains))) 680 l.append(str(self.other_backup_chains[i])) 681 l.append(u"") 682 683 if self.matched_chain_pair: 684 l.append(u"\n" + _(u"Found primary backup chain with matching " 685 u"signature chain:")) 686 l.append(str(self.matched_chain_pair[1])) 687 else: 688 l.append(_(u"No backup chains with active signatures found")) 689 690 if self.orphaned_backup_sets or self.incomplete_backup_sets: 691 l.append(ngettext(u"Also found %d backup set not part of any chain,", 692 u"Also found %d backup sets not part of any chain,", 693 len(self.orphaned_backup_sets)) 694 % (len(self.orphaned_backup_sets),)) 695 l.append(ngettext(u"and %d incomplete backup set.", 696 u"and %d incomplete backup sets.", 697 len(self.incomplete_backup_sets)) 698 % (len(self.incomplete_backup_sets),)) 699 # TRANSL: "cleanup" is a hard-coded command, so do not translate it 700 l.append(_(u'These may be deleted by running duplicity with the ' 701 u'"cleanup" command.')) 702 else: 703 l.append(_(u"No orphaned or incomplete backup sets found.")) 704 705 return u"\n".join(l) 706 707 def set_values(self, sig_chain_warning=1): 708 u""" 709 Set values from archive_dir_path and backend. 710 711 Returns self for convenience. If sig_chain_warning is set to None, 712 do not warn about unnecessary sig chains. This is because there may 713 naturally be some unecessary ones after a full backup. 714 """ 715 self.values_set = 1 716 717 # get remote filename list 718 backend_filename_list = self.backend.list() 719 log.Debug(ngettext(u"%d file exists on backend", 720 u"%d files exist on backend", 721 len(backend_filename_list)) % 722 len(backend_filename_list)) 723 724 # get local filename list 725 if self.action != u"replicate": 726 local_filename_list = self.archive_dir_path.listdir() 727 else: 728 local_filename_list = [] 729 log.Debug(ngettext(u"%d file exists in cache", 730 u"%d files exist in cache", 731 len(local_filename_list)) % 732 len(local_filename_list)) 733 734 # check for partial backups 735 partials = [] 736 for local_filename in local_filename_list: 737 pr = file_naming.parse(local_filename) 738 if pr and pr.partial: 739 partials.append(local_filename) 740 741 # get various backup sets and chains 742 (backup_chains, self.orphaned_backup_sets, 743 self.incomplete_backup_sets) = \ 744 self.get_backup_chains(partials + backend_filename_list) 745 backup_chains = self.get_sorted_chains(backup_chains) 746 self.all_backup_chains = backup_chains 747 748 assert len(backup_chains) == len(self.all_backup_chains), \ 749 u"get_sorted_chains() did something more than re-ordering" 750 751 local_sig_chains, self.local_orphaned_sig_names = \ 752 self.get_signature_chains(True) 753 remote_sig_chains, self.remote_orphaned_sig_names = \ 754 self.get_signature_chains(False, filelist=backend_filename_list) 755 self.set_matched_chain_pair(local_sig_chains + remote_sig_chains, 756 backup_chains) 757 self.warn(sig_chain_warning) 758 return self 759 760 def set_matched_chain_pair(self, sig_chains, backup_chains): 761 u""" 762 Set self.matched_chain_pair and self.other_sig/backup_chains 763 764 The latest matched_chain_pair will be set. If there are both 765 remote and local signature chains capable of matching the 766 latest backup chain, use the local sig chain (it does not need 767 to be downloaded). 768 """ 769 sig_chains = sig_chains and self.get_sorted_chains(sig_chains) 770 self.all_sig_chains = sig_chains 771 self.other_backup_chains = backup_chains[:] 772 self.matched_chain_pair = None 773 if sig_chains and backup_chains: 774 latest_backup_chain = backup_chains[-1] 775 for i in range(len(sig_chains) - 1, -1, -1): 776 if sig_chains[i].end_time == latest_backup_chain.end_time: 777 pass 778 # See if the set before last matches: 779 elif (len(latest_backup_chain.get_all_sets()) >= 2 and 780 sig_chains[i].end_time == latest_backup_chain.get_all_sets()[-2].end_time): 781 # It matches, remove the last backup set: 782 log.Warn(_(u"Warning, discarding last backup set, because " 783 u"of missing signature file.")) 784 self.incomplete_backup_sets.append(latest_backup_chain.incset_list[-1]) 785 latest_backup_chain.incset_list = latest_backup_chain.incset_list[:-1] 786 else: 787 continue 788 789 # Found a matching pair: 790 if self.matched_chain_pair is None: 791 self.matched_chain_pair = (sig_chains[i], latest_backup_chain) 792 793 break 794 795 if self.matched_chain_pair: 796 self.other_backup_chains.remove(self.matched_chain_pair[1]) 797 798 def warn(self, sig_chain_warning): 799 u""" 800 Log various error messages if find incomplete/orphaned files 801 """ 802 assert self.values_set 803 804 if self.local_orphaned_sig_names: 805 log.Warn(ngettext(u"Warning, found the following local orphaned " 806 u"signature file:", 807 u"Warning, found the following local orphaned " 808 u"signature files:", 809 len(self.local_orphaned_sig_names)) + u"\n" + 810 u"\n".join(map(util.fsdecode, self.local_orphaned_sig_names)), 811 log.WarningCode.orphaned_sig) 812 813 if self.remote_orphaned_sig_names: 814 log.Warn(ngettext(u"Warning, found the following remote orphaned " 815 u"signature file:", 816 u"Warning, found the following remote orphaned " 817 u"signature files:", 818 len(self.remote_orphaned_sig_names)) + u"\n" + 819 u"\n".join(map(util.fsdecode, self.remote_orphaned_sig_names)), 820 log.WarningCode.orphaned_sig) 821 822 if self.all_sig_chains and sig_chain_warning and not self.matched_chain_pair: 823 log.Warn(_(u"Warning, found signatures but no corresponding " 824 u"backup files"), log.WarningCode.unmatched_sig) 825 826 if self.incomplete_backup_sets: 827 log.Warn(_(u"Warning, found incomplete backup sets, probably left " 828 u"from aborted session"), log.WarningCode.incomplete_backup) 829 830 if self.orphaned_backup_sets: 831 log.Warn(ngettext(u"Warning, found the following orphaned " 832 u"backup file:", 833 u"Warning, found the following orphaned " 834 u"backup files:", 835 len(self.orphaned_backup_sets)) + u"\n" + 836 u"\n".join(map(str, self.orphaned_backup_sets)), 837 log.WarningCode.orphaned_backup) 838 839 def get_backup_chains(self, filename_list): 840 u""" 841 Split given filename_list into chains 842 843 Return value will be tuple (list of chains, list of sets, list 844 of incomplete sets), where the list of sets will comprise sets 845 not fitting into any chain, and the incomplete sets are sets 846 missing files. 847 """ 848 log.Debug(_(u"Extracting backup chains from list of files: %s") 849 % [util.fsdecode(f) for f in filename_list]) 850 # First put filenames in set form 851 sets = [] 852 853 def add_to_sets(filename): 854 u""" 855 Try adding filename to existing sets, or make new one 856 """ 857 pr = file_naming.parse(filename) 858 for set in sets: # pylint: disable=redefined-builtin 859 if set.add_filename(filename, pr): 860 log.Debug(_(u"File %s is part of known set") % (util.fsdecode(filename),)) 861 break 862 else: 863 log.Debug(_(u"File %s is not part of a known set; creating new set") % (util.fsdecode(filename),)) 864 new_set = BackupSet(self.backend, self.action) 865 if new_set.add_filename(filename, pr): 866 sets.append(new_set) 867 else: 868 log.Debug(_(u"Ignoring file (rejected by backup set) '%s'") % util.fsdecode(filename)) 869 870 for f in filename_list: 871 add_to_sets(f) 872 sets, incomplete_sets = self.get_sorted_sets(sets) 873 874 chains, orphaned_sets = [], [] 875 876 def add_to_chains(set): # pylint: disable=redefined-builtin 877 u""" 878 Try adding set to existing chains, or make new one 879 """ 880 if set.type == u"full": 881 new_chain = BackupChain(self.backend) 882 new_chain.set_full(set) 883 chains.append(new_chain) 884 log.Debug(_(u"Found backup chain %s") % (new_chain.short_desc())) 885 else: 886 assert set.type == u"inc" 887 for chain in chains: 888 if chain.add_inc(set): 889 log.Debug(_(u"Added set %s to pre-existing chain %s") % (set.get_timestr(), 890 chain.short_desc())) 891 break 892 else: 893 log.Debug(_(u"Found orphaned set %s") % (set.get_timestr(),)) 894 orphaned_sets.append(set) 895 for s in sets: 896 add_to_chains(s) 897 return (chains, orphaned_sets, incomplete_sets) 898 899 def get_sorted_sets(self, set_list): 900 u""" 901 Sort set list by end time, return (sorted list, incomplete) 902 """ 903 time_set_pairs, incomplete_sets = [], [] 904 for set in set_list: # pylint: disable=redefined-builtin 905 if not set.is_complete(): 906 incomplete_sets.append(set) 907 elif set.type == u"full": 908 time_set_pairs.append((set.time, set)) 909 else: 910 time_set_pairs.append((set.end_time, set)) 911 time_set_pairs.sort(key=lambda x: x[0]) 912 return ([p[1] for p in time_set_pairs], incomplete_sets) 913 914 def get_signature_chains(self, local, filelist=None): 915 u""" 916 Find chains in archive_dir_path (if local is true) or backend 917 918 Use filelist if given, otherwise regenerate. Return value is 919 pair (list of chains, list of signature paths not in any 920 chains). 921 """ 922 def get_filelist(): 923 if filelist is not None: 924 return filelist 925 elif local: 926 if self.action != u"replicate": 927 return self.archive_dir_path.listdir() 928 else: 929 return [] 930 else: 931 return self.backend.list() 932 933 def get_new_sigchain(): 934 u""" 935 Return new empty signature chain 936 """ 937 if local: 938 return SignatureChain(True, self.archive_dir_path) 939 else: 940 return SignatureChain(False, self.backend) 941 942 # Build initial chains from full sig filenames 943 chains, new_sig_filenames = [], [] 944 for filename in get_filelist(): 945 pr = file_naming.parse(filename) 946 if pr: 947 if pr.type == u"full-sig": 948 new_chain = get_new_sigchain() 949 assert new_chain.add_filename(filename, pr) 950 chains.append(new_chain) 951 elif pr.type == u"new-sig": 952 new_sig_filenames.append(filename) 953 954 # Try adding new signatures to existing chains 955 orphaned_filenames = [] 956 new_sig_filenames.sort(key=lambda x: int(file_naming.parse(x).start_time)) 957 for sig_filename in new_sig_filenames: 958 for chain in chains: 959 if chain.add_filename(sig_filename): 960 break 961 else: 962 orphaned_filenames.append(sig_filename) 963 return (chains, orphaned_filenames) 964 965 def get_sorted_chains(self, chain_list): 966 u""" 967 Return chains sorted by end_time. If tie, local goes last 968 """ 969 # Build dictionary from end_times to lists of corresponding chains 970 endtime_chain_dict = {} 971 for chain in chain_list: 972 if chain.end_time in endtime_chain_dict: 973 endtime_chain_dict[chain.end_time].append(chain) 974 else: 975 endtime_chain_dict[chain.end_time] = [chain] 976 977 # Use dictionary to build final sorted list 978 sorted_end_times = list(endtime_chain_dict.keys()) 979 sorted_end_times.sort() 980 sorted_chain_list = [] 981 for end_time in sorted_end_times: 982 chain_list = endtime_chain_dict[end_time] 983 if len(chain_list) == 1: 984 sorted_chain_list.append(chain_list[0]) 985 else: 986 assert len(chain_list) == 2 987 if chain_list[0].backend: # is remote, goes first 988 sorted_chain_list.append(chain_list[0]) 989 sorted_chain_list.append(chain_list[1]) 990 else: # is local, goes second 991 sorted_chain_list.append(chain_list[1]) 992 sorted_chain_list.append(chain_list[0]) 993 994 return sorted_chain_list 995 996 def get_backup_chain_at_time(self, time): 997 u""" 998 Return backup chain covering specified time 999 1000 Tries to find the backup chain covering the given time. If 1001 there is none, return the earliest chain before, and failing 1002 that, the earliest chain. 1003 """ 1004 if not self.all_backup_chains: 1005 raise CollectionsError(u"No backup chains found") 1006 1007 covering_chains = [c for c in self.all_backup_chains 1008 if c.start_time <= time <= c.end_time] 1009 if len(covering_chains) > 1: 1010 raise CollectionsError(u"Two chains cover the given time") 1011 elif len(covering_chains) == 1: 1012 return covering_chains[0] 1013 1014 old_chains = [c for c in self.all_backup_chains if c.end_time < time] 1015 if old_chains: 1016 return old_chains[-1] 1017 else: 1018 return self.all_backup_chains[0] # no chains are old enough 1019 1020 def get_signature_chain_at_time(self, time): 1021 u""" 1022 Return signature chain covering specified time 1023 1024 Tries to find the signature chain covering the given time. If 1025 there is none, return the earliest chain before, and failing 1026 that, the earliest chain. 1027 """ 1028 if not self.all_sig_chains: 1029 raise CollectionsError(u"No signature chains found") 1030 1031 covering_chains = [c for c in self.all_sig_chains 1032 if c.start_time <= time <= c.end_time] 1033 if covering_chains: 1034 return covering_chains[-1] # prefer local if multiple sig chains 1035 1036 old_chains = [c for c in self.all_sig_chains if c.end_time < time] 1037 if old_chains: 1038 return old_chains[-1] 1039 else: 1040 # no chains are old enough, give oldest and warn user 1041 oldest = self.all_sig_chains[0] 1042 if time < oldest.start_time: 1043 log.Warn(_(u"No signature chain for the requested time. " 1044 u"Using oldest available chain, starting at time %s.") % 1045 dup_time.timetopretty(oldest.start_time), 1046 log.WarningCode.no_sig_for_time, 1047 dup_time.timetostring(oldest.start_time)) 1048 return oldest 1049 1050 def get_extraneous(self): 1051 u""" 1052 Return list of the names of extraneous duplicity files 1053 1054 A duplicity file is considered extraneous if it is 1055 recognizable as a duplicity file, but isn't part of some 1056 complete backup set, or current signature chain. 1057 """ 1058 assert self.values_set 1059 local_filenames = [] 1060 remote_filenames = [] 1061 ext_containers = self.orphaned_backup_sets + self.incomplete_backup_sets 1062 for set_or_chain in ext_containers: 1063 if set_or_chain.backend: 1064 remote_filenames.extend(set_or_chain.get_filenames()) 1065 else: 1066 local_filenames.extend(set_or_chain.get_filenames()) 1067 local_filenames += self.local_orphaned_sig_names 1068 remote_filenames += self.remote_orphaned_sig_names 1069 return local_filenames, remote_filenames 1070 1071 def sort_sets(self, setlist): 1072 u"""Return new list containing same elems of setlist, sorted by time""" 1073 pairs = [(s.get_time(), s) for s in setlist] 1074 pairs.sort() 1075 return [p[1] for p in pairs] 1076 1077 def get_chains_older_than(self, t): 1078 u""" 1079 Returns a list of backup chains older than the given time t 1080 1081 All of the times will be associated with an intact chain. 1082 Furthermore, none of the times will be of a chain which a newer 1083 set may depend on. For instance, if set A is a full set older 1084 than t, and set B is an incremental based on A which is newer 1085 than t, then the time of set A will not be returned. 1086 """ 1087 assert self.values_set 1088 old_chains = [] 1089 for chain in self.all_backup_chains: 1090 if (chain.end_time < t and 1091 (not self.matched_chain_pair or 1092 chain is not self.matched_chain_pair[1])): 1093 # don't delete the active (matched) chain 1094 old_chains.append(chain) 1095 return old_chains 1096 1097 def get_signature_chains_older_than(self, t): 1098 u""" 1099 Returns a list of signature chains older than the given time t 1100 1101 All of the times will be associated with an intact chain. 1102 Furthermore, none of the times will be of a chain which a newer 1103 set may depend on. For instance, if set A is a full set older 1104 than t, and set B is an incremental based on A which is newer 1105 than t, then the time of set A will not be returned. 1106 """ 1107 assert self.values_set 1108 old_chains = [] 1109 for chain in self.all_sig_chains: 1110 if (chain.end_time < t and 1111 (not self.matched_chain_pair or 1112 chain is not self.matched_chain_pair[0])): 1113 # don't delete the active (matched) chain 1114 old_chains.append(chain) 1115 return old_chains 1116 1117 def get_last_full_backup_time(self): 1118 u""" 1119 Return the time of the last full backup, 1120 or 0 if there is none. 1121 """ 1122 return self.get_nth_last_full_backup_time(1) 1123 1124 def get_nth_last_full_backup_time(self, n): 1125 u""" 1126 Return the time of the nth to last full backup, 1127 or 0 if there is none. 1128 """ 1129 chain = self.get_nth_last_backup_chain(n) 1130 if chain is None: 1131 return 0 1132 else: 1133 return chain.get_first().time 1134 1135 def get_last_backup_chain(self): 1136 u""" 1137 Return the last full backup of the collection, 1138 or None if there is no full backup chain. 1139 """ 1140 return self.get_nth_last_backup_chain(1) 1141 1142 def get_nth_last_backup_chain(self, n): 1143 u""" 1144 Return the nth-to-last full backup of the collection, 1145 or None if there is less than n backup chains. 1146 1147 NOTE: n = 1 -> time of latest available chain (n = 0 is not 1148 a valid input). Thus the second-to-last is obtained with n=2 1149 rather than n=1. 1150 """ 1151 assert self.values_set 1152 assert n > 0 1153 1154 if len(self.all_backup_chains) < n: 1155 return None 1156 1157 sorted = self.all_backup_chains[:] # pylint: disable=redefined-builtin 1158 sorted.sort(key=lambda x: x.get_first().time) 1159 1160 sorted.reverse() 1161 return sorted[n - 1] 1162 1163 def get_older_than(self, t): 1164 u""" 1165 Returns a list of backup sets older than the given time t 1166 1167 All of the times will be associated with an intact chain. 1168 Furthermore, none of the times will be of a set which a newer 1169 set may depend on. For instance, if set A is a full set older 1170 than t, and set B is an incremental based on A which is newer 1171 than t, then the time of set A will not be returned. 1172 """ 1173 old_sets = [] 1174 for chain in self.get_chains_older_than(t): 1175 old_sets.extend(chain.get_all_sets()) 1176 return self.sort_sets(old_sets) 1177 1178 def get_older_than_required(self, t): 1179 u""" 1180 Returns list of old backup sets required by new sets 1181 1182 This function is similar to the previous one, but it only 1183 returns the times of sets which are old but part of the chains 1184 where the newer end of the chain is newer than t. 1185 """ 1186 assert self.values_set 1187 new_chains = [c for c in self.all_backup_chains if c.end_time >= t] 1188 result_sets = [] 1189 for chain in new_chains: 1190 old_sets = [s for s in chain.get_all_sets() if s.get_time() < t] 1191 result_sets.extend(old_sets) 1192 return self.sort_sets(result_sets) 1193 1194 def get_file_changed_record(self, filepath): 1195 u""" 1196 Returns time line of specified file changed 1197 """ 1198 # quick fix to spaces in filepath 1199 modified_filepath = filepath 1200 if u" " in filepath: 1201 modified_filepath = u'"' + filepath.replace(u" ", r"\x20") + u'"' 1202 1203 if not self.matched_chain_pair: 1204 return u"" 1205 1206 all_backup_set = self.matched_chain_pair[1].get_all_sets() 1207 specified_file_backup_set = [] 1208 specified_file_backup_type = [] 1209 1210 modified_filepath = util.fsencode(modified_filepath) 1211 for bs in all_backup_set: 1212 filelist = [fileinfo[1] for fileinfo in bs.get_files_changed()] 1213 if modified_filepath in filelist: 1214 specified_file_backup_set.append(bs) 1215 index = filelist.index(modified_filepath) 1216 specified_file_backup_type.append(bs.get_files_changed()[index][0]) 1217 1218 return FileChangedStatus(filepath, list(zip(specified_file_backup_type, specified_file_backup_set))) 1219 1220 1221class FileChangedStatus(object): 1222 def __init__(self, filepath, fileinfo_list): 1223 self.filepath = filepath 1224 self.fileinfo_list = fileinfo_list 1225 1226 def __str__(self): 1227 set_schema = u"%20s %30s %20s" 1228 l = [u"-------------------------", 1229 _(u"File: %s") % (self.filepath), 1230 _(u"Total number of backup: %d") % len(self.fileinfo_list), 1231 set_schema % (_(u"Type of backup set:"), _(u"Time:"), _(u"Type of file change:"))] 1232 1233 for s in self.fileinfo_list: 1234 backup_type = s[0] 1235 backup_set = s[1] 1236 if backup_set.time: 1237 type = _(u"Full") # pylint: disable=redefined-builtin 1238 else: 1239 type = _(u"Incremental") 1240 l.append(set_schema % (type, dup_time.timetopretty(backup_set.get_time()), backup_type.title())) 1241 1242 l.append(u"-------------------------") 1243 return u"\n".join(l) 1244