1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*-
2#
3# Copyright 2002 Ben Escoto <ben@emerose.org>
4# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
5#
6# This file is part of duplicity.
7#
8# Duplicity is free software; you can redistribute it and/or modify it
9# under the terms of the GNU General Public License as published by the
10# Free Software Foundation; either version 2 of the License, or (at your
11# option) any later version.
12#
13# Duplicity is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with duplicity; if not, write to the Free Software Foundation,
20# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22u"""Classes and functions on collections of backup volumes"""
23
24from builtins import str
25from builtins import zip
26from builtins import map
27from builtins import range
28from builtins import object
29
30import sys
31
32from duplicity import log
33from duplicity import file_naming
34from duplicity import path
35from duplicity import util
36from duplicity import dup_time
37from duplicity import config
38from duplicity import manifest
39from duplicity import util
40from duplicity.gpg import GPGError
41
42# For type testing against both int and long types that works in python 2/3
43if sys.version_info < (3,):
44    integer_types = (int, int)
45else:
46    integer_types = (int,)
47
48
49class CollectionsError(Exception):
50    pass
51
52
53class BackupSet(object):
54    u"""
55    Backup set - the backup information produced by one session
56    """
57    def __init__(self, backend, action):
58        u"""
59        Initialize new backup set, only backend is required at first
60        """
61        self.backend = backend
62        self.info_set = False  # true if fields are set
63        self.volume_name_dict = {}  # dict from volume number to filename
64        self.remote_manifest_name = None  # full name of remote manifest
65        self.local_manifest_path = None  # full path to local manifest
66        self.time = None  # will be set if is full backup set
67        self.start_time = None  # will be set if inc
68        self.end_time = None  # will be set if inc
69        self.partial = False  # true if a partial backup
70        self.encrypted = False  # true if an encrypted backup
71        self.files_changed = []
72        self.action = action
73
74    def is_complete(self):
75        u"""
76        Assume complete if found manifest file
77        """
78        return self.remote_manifest_name
79
80    def add_filename(self, filename, pr=None):
81        u"""
82        Add a filename to given set.  Return true if it fits.
83
84        The filename will match the given set if it has the right
85        times and is of the right type.  The information will be set
86        from the first filename given.
87
88        @param filename: name of file to add
89        @type filename: string
90
91        @param pr: pre-computed result of file_naming.parse(filename)
92        @type pr: Optional[ParseResults]
93        """
94        if not pr:
95            pr = file_naming.parse(filename)
96        if not pr or not (pr.type == u"full" or pr.type == u"inc"):
97            return False
98
99        if not self.info_set:
100            self.set_info(pr)
101        else:
102            if pr.type != self.type:
103                return False
104            if pr.time != self.time:
105                return False
106            if (pr.start_time != self.start_time or
107                    pr.end_time != self.end_time):
108                return False
109            if bool(pr.encrypted) != bool(self.encrypted):
110                if self.partial and pr.encrypted:
111                    self.encrypted = pr.encrypted
112
113        if pr.manifest:
114            self.set_manifest(filename)
115        else:
116            assert pr.volume_number is not None
117            assert pr.volume_number not in self.volume_name_dict, \
118                (self.volume_name_dict, filename)
119            self.volume_name_dict[pr.volume_number] = filename
120
121        return True
122
123    def set_info(self, pr):
124        u"""
125        Set BackupSet information from ParseResults object
126
127        @param pr: parse results
128        @type pf: ParseResults
129        """
130        assert not self.info_set
131        self.type = pr.type
132        self.time = pr.time
133        self.start_time = pr.start_time
134        self.end_time = pr.end_time
135        self.time = pr.time
136        self.partial = pr.partial
137        self.encrypted = bool(pr.encrypted)
138        self.info_set = True
139
140    def set_files_changed(self):
141        mf = self.get_manifest()
142        self.files_changed = mf.get_files_changed()
143
144    def set_manifest(self, remote_filename):
145        u"""
146        Add local and remote manifest filenames to backup set
147        """
148        assert not self.remote_manifest_name, \
149            u"Cannot set filename of remote manifest to %s; already set to %s." % (
150                remote_filename,
151                self.remote_manifest_name,
152            )
153        self.remote_manifest_name = remote_filename
154
155        if self.action != u"replicate":
156            local_filename_list = config.archive_dir_path.listdir()
157        else:
158            local_filename_list = []
159        for local_filename in local_filename_list:
160            pr = file_naming.parse(local_filename)
161            if (pr and pr.manifest and pr.type == self.type and
162                    pr.time == self.time and
163                    pr.start_time == self.start_time and
164                    pr.end_time == self.end_time):
165                self.local_manifest_path = \
166                    config.archive_dir_path.append(local_filename)
167
168                self.set_files_changed()
169                break
170
171    def delete(self):
172        u"""
173        Remove all files in set, both local and remote
174        """
175        rfn = self.get_filenames()
176        rfn.reverse()
177        try:
178            self.backend.delete(rfn)
179        except Exception:
180            log.Debug(_(u"BackupSet.delete: missing %s") % [util.fsdecode(f) for f in rfn])
181            pass
182        if self.action != u"replicate":
183            local_filename_list = config.archive_dir_path.listdir()
184        else:
185            local_filename_list = []
186        for lfn in local_filename_list:
187            pr = file_naming.parse(lfn)
188            if (pr and pr.time == self.time and
189                    pr.start_time == self.start_time and
190                    pr.end_time == self.end_time):
191                try:
192                    config.archive_dir_path.append(lfn).delete()
193                except Exception:
194                    log.Debug(_(u"BackupSet.delete: missing %s") % [util.fsdecode(f) for f in lfn])
195                    pass
196        util.release_lockfile()
197
198    def __str__(self):
199        u"""
200        For now just list files in set
201        """
202        filelist = []
203        if self.remote_manifest_name:
204            filelist.append(self.remote_manifest_name)
205        filelist.extend(list(self.volume_name_dict.values()))
206        return u"[%s]" % u", ".join(map(util.fsdecode, filelist))
207
208    def get_timestr(self):
209        u"""
210        Return time string suitable for log statements
211        """
212        return dup_time.timetopretty(self.time or self.end_time)
213
214    def check_manifests(self, check_remote=True):
215        u"""
216        Make sure remote manifest is equal to local one
217        """
218        if not self.remote_manifest_name and not self.local_manifest_path:
219            log.FatalError(_(u"Fatal Error: No manifests found for most recent backup"),
220                           log.ErrorCode.no_manifests)
221        assert self.remote_manifest_name, u"if only one, should be remote"
222
223        remote_manifest = self.get_remote_manifest() if check_remote else None
224        if self.local_manifest_path:
225            local_manifest = self.get_local_manifest()
226        if remote_manifest and self.local_manifest_path and local_manifest:
227            if remote_manifest != local_manifest:
228                log.FatalError(_(u"Fatal Error: Remote manifest does not match "
229                                 u"local one.  Either the remote backup set or "
230                                 u"the local archive directory has been corrupted."),
231                               log.ErrorCode.mismatched_manifests)
232        if not remote_manifest:
233            if self.local_manifest_path:
234                remote_manifest = local_manifest
235            else:
236                log.FatalError(_(u"Fatal Error: Neither remote nor local "
237                                 u"manifest is readable."),
238                               log.ErrorCode.unreadable_manifests)
239        remote_manifest.check_dirinfo()
240
241    def get_local_manifest(self):
242        u"""
243        Return manifest object by reading local manifest file
244        """
245        assert self.local_manifest_path
246        manifest_buffer = self.local_manifest_path.get_data()
247        log.Info(_(u"Processing local manifest %s (%s)") % (
248            self.local_manifest_path.name, len(manifest_buffer)))
249        return manifest.Manifest().from_string(manifest_buffer)
250
251    def get_remote_manifest(self):
252        u"""
253        Return manifest by reading remote manifest on backend
254        """
255        assert self.remote_manifest_name
256        try:
257            manifest_buffer = self.backend.get_data(self.remote_manifest_name)
258        except GPGError as message:
259            log.Error(_(u"Error processing remote manifest (%s): %s") %
260                      (util.fsdecode(self.remote_manifest_name), util.uexc(message)))
261            return None
262        log.Info(_(u"Processing remote manifest %s (%s)") % (
263            util.fsdecode(self.remote_manifest_name), len(manifest_buffer)))
264        return manifest.Manifest().from_string(manifest_buffer)
265
266    def get_manifest(self):
267        u"""
268        Return manifest object, showing preference for local copy
269        """
270        if self.local_manifest_path:
271            return self.get_local_manifest()
272        else:
273            return self.get_remote_manifest()
274
275    def get_filenames(self):
276        u"""
277        Return sorted list of (remote) filenames of files in set
278        """
279        assert self.info_set
280        volume_num_list = list(self.volume_name_dict.keys())
281        volume_num_list.sort()
282        volume_filenames = [self.volume_name_dict[x] for x in volume_num_list]
283        if self.remote_manifest_name:
284            # For convenience of implementation for restart support, we treat
285            # local partial manifests as this set's remote manifest.  But
286            # when specifically asked for a list of remote filenames, we
287            # should not include it.
288            pr = file_naming.parse(self.remote_manifest_name)
289            if pr and not pr.partial:
290                volume_filenames.append(self.remote_manifest_name)
291        return volume_filenames
292
293    def get_time(self):
294        u"""
295        Return time if full backup, or end_time if incremental
296        """
297        if self.time:
298            return self.time
299        if self.end_time:
300            return self.end_time
301        assert 0, u"Neither self.time nor self.end_time set"
302
303    def get_files_changed(self):
304        return self.files_changed
305
306    def __len__(self):
307        u"""
308        Return the number of volumes in the set
309        """
310        return len(list(self.volume_name_dict.keys()))
311
312    def __eq__(self, other):
313        u"""
314        Return whether this backup set is equal to other
315        """
316        return self.type == other.type and \
317            self.time == other.time and \
318            self.start_time == other.start_time and \
319            self.end_time == other.end_time and \
320            len(self) == len(other)
321
322
323class BackupChain(object):
324    u"""
325    BackupChain - a number of linked BackupSets
326
327    A BackupChain always starts with a full backup set and continues
328    with incremental ones.
329    """
330    def __init__(self, backend):
331        u"""
332        Initialize new chain, only backend is required at first
333        """
334        self.backend = backend
335        self.fullset = None
336        self.incset_list = []  # sorted list of BackupSets
337        self.start_time, self.end_time = None, None
338
339    def set_full(self, fullset):
340        u"""
341        Add full backup set
342        """
343        assert not self.fullset and isinstance(fullset, BackupSet)
344        self.fullset = fullset
345        assert fullset.time
346        self.start_time, self.end_time = fullset.time, fullset.time
347
348    def add_inc(self, incset):
349        u"""
350        Add incset to self.  Return False if incset does not match
351        """
352        if self.end_time == incset.start_time:
353            self.incset_list.append(incset)
354        else:
355            if (self.incset_list and
356                    incset.start_time == self.incset_list[-1].start_time and
357                    incset.end_time > self.incset_list[-1].end_time):
358                log.Info(_(u"Preferring Backupset over previous one!"))
359                self.incset_list[-1] = incset
360            else:
361                log.Info(_(u"Ignoring incremental Backupset (start_time: %s; needed: %s)") %
362                         (dup_time.timetopretty(incset.start_time),
363                          dup_time.timetopretty(self.end_time)))
364                return False
365        self.end_time = incset.end_time
366        log.Info(_(u"Added incremental Backupset (start_time: %s / end_time: %s)") %
367                 (dup_time.timetopretty(incset.start_time),
368                  dup_time.timetopretty(incset.end_time)))
369        assert self.end_time
370        return True
371
372    def delete(self, keep_full=False):
373        u"""
374        Delete all sets in chain, in reverse order
375        """
376        for i in range(len(self.incset_list) - 1, -1, -1):
377            self.incset_list[i].delete()
378        if self.fullset and not keep_full:
379            self.fullset.delete()
380
381    def get_sets_at_time(self, time):
382        u"""
383        Return a list of sets in chain earlier or equal to time
384        """
385        older_incsets = [s for s in self.incset_list if s.end_time <= time]
386        return [self.fullset] + older_incsets
387
388    def get_last(self):
389        u"""
390        Return last BackupSet in chain
391        """
392        if self.incset_list:
393            return self.incset_list[-1]
394        else:
395            return self.fullset
396
397    def get_first(self):
398        u"""
399        Return first BackupSet in chain (ie the full backup)
400        """
401        return self.fullset
402
403    def short_desc(self):
404        u"""
405        Return a short one-line description of the chain,
406        suitable for log messages.
407        """
408        return u"[%s]-[%s]" % (dup_time.timetopretty(self.start_time),
409                               dup_time.timetopretty(self.end_time))
410
411    def to_log_info(self, prefix=u''):
412        u"""
413        Return summary, suitable for printing to log
414        """
415        l = []
416        for s in self.get_all_sets():
417            if s.time:
418                btype = u"full"
419                time = s.time
420            else:
421                btype = u"inc"
422                time = s.end_time
423            if s.encrypted:
424                enc = u"enc"
425            else:
426                enc = u"noenc"
427            l.append(u"%s%s %s %d %s" % (prefix, btype, dup_time.timetostring(time), (len(s)), enc))
428        return l
429
430    def __str__(self):
431        u"""
432        Return string representation, for testing purposes
433        """
434        set_schema = u"%20s   %30s   %15s"
435        l = [u"-------------------------",
436             _(u"Chain start time: ") + dup_time.timetopretty(self.start_time),
437             _(u"Chain end time: ") + dup_time.timetopretty(self.end_time),
438             _(u"Number of contained backup sets: %d") %
439             (len(self.incset_list) + 1,),
440             _(u"Total number of contained volumes: %d") %
441             (self.get_num_volumes(),),
442             set_schema % (_(u"Type of backup set:"), _(u"Time:"), _(u"Num volumes:"))]
443
444        for s in self.get_all_sets():
445            if s.time:
446                btype = _(u"Full")
447                time = s.time
448            else:
449                btype = _(u"Incremental")
450                time = s.end_time
451            l.append(set_schema % (btype, dup_time.timetopretty(time), len(s)))
452
453        l.append(u"-------------------------")
454        return u"\n".join(l)
455
456    def get_num_volumes(self):
457        u"""
458        Return the total number of volumes in the chain
459        """
460        n = 0
461        for s in self.get_all_sets():
462            n += len(s)
463        return n
464
465    def get_all_sets(self):
466        u"""
467        Return list of all backup sets in chain
468        """
469        if self.fullset:
470            return [self.fullset] + self.incset_list
471        else:
472            return self.incset_list
473
474
475class SignatureChain(object):
476    u"""
477    A number of linked SignatureSets
478
479    Analog to BackupChain - start with a full-sig, and continue with
480    new-sigs.
481    """
482    def __init__(self, local, location):
483        u"""
484        Return new SignatureChain.
485
486        local should be true iff the signature chain resides in
487        config.archive_dir_path and false if the chain is in
488        config.backend.
489
490        @param local: True if sig chain in config.archive_dir_path
491        @type local: Boolean
492
493        @param location: Where the sig chain is located
494        @type location: config.archive_dir_path or config.backend
495        """
496        if local:
497            self.archive_dir_path, self.backend = location, None
498        else:
499            self.archive_dir_path, self.backend = None, location
500        self.fullsig = None  # filename of full signature
501        self.inclist = []  # list of filenames of incremental signatures
502        self.start_time, self.end_time = None, None
503
504    def __str__(self):
505        u"""
506        Local or Remote and List of files in the set
507        """
508        if self.archive_dir_path:
509            place = _(u"local")
510        else:
511            place = _(u"remote")
512        filelist = []
513        if self.fullsig:
514            filelist.append(self.fullsig)
515        filelist.extend(self.inclist)
516        return u"%s: [%s]" % (place, u", ".join(filelist))
517
518    def check_times(self, time_list):
519        u"""
520        Check to make sure times are in whole seconds
521        """
522        for time in time_list:
523            if type(time) not in integer_types:
524                assert 0, u"Time %s in %s wrong type" % (time, time_list)
525
526    def islocal(self):
527        u"""
528        Return true if represents a signature chain in archive_dir_path
529        """
530        if self.archive_dir_path:
531            return True
532        else:
533            return False
534
535    def add_filename(self, filename, pr=None):
536        u"""
537        Add new sig filename to current chain.  Return true if fits
538        """
539        if not pr:
540            pr = file_naming.parse(filename)
541        if not pr:
542            return None
543
544        if self.fullsig:
545            if pr.type != u"new-sig":
546                return None
547            if pr.start_time != self.end_time:
548                return None
549            self.inclist.append(filename)
550            self.check_times([pr.end_time])
551            self.end_time = pr.end_time
552            return 1
553        else:
554            if pr.type != u"full-sig":
555                return None
556            self.fullsig = filename
557            self.check_times([pr.time, pr.time])
558            self.start_time, self.end_time = pr.time, pr.time
559            return 1
560
561    def get_fileobjs(self, time=None):
562        u"""
563        Return ordered list of signature fileobjs opened for reading,
564        optionally at a certain time
565        """
566        assert self.fullsig
567        if self.archive_dir_path:  # local
568            def filename_to_fileobj(filename):
569                u"""Open filename in archive_dir_path, return filtered fileobj"""
570                sig_dp = path.DupPath(self.archive_dir_path.name, (filename,))
571                return sig_dp.filtered_open(u"rb")
572        else:
573            filename_to_fileobj = self.backend.get_fileobj_read
574        return [filename_to_fileobj(f) for f in self.get_filenames(time)]
575
576    def delete(self, keep_full=False):
577        u"""
578        Remove all files in signature set
579        """
580        # Try to delete in opposite order, so something useful even if aborted
581        if self.archive_dir_path:
582            for i in range(len(self.inclist) - 1, -1, -1):
583                self.archive_dir_path.append(self.inclist[i]).delete()
584            if not keep_full:
585                self.archive_dir_path.append(self.fullsig).delete()
586        else:
587            assert self.backend
588            inclist_copy = self.inclist[:]
589            inclist_copy.reverse()
590            if not keep_full:
591                inclist_copy.append(self.fullsig)
592            self.backend.delete(inclist_copy)
593
594    def get_filenames(self, time=None):
595        u"""
596        Return ordered list of filenames in set, up to a provided time
597        """
598        if self.fullsig:
599            l = [self.fullsig]
600        else:
601            l = []
602
603        inclist = self.inclist
604        if time:
605            inclist = [n for n in inclist if file_naming.parse(n).end_time <= time]
606
607        l.extend(inclist)
608        return l
609
610
611class CollectionsStatus(object):
612    u"""
613    Hold information about available chains and sets
614    """
615    def __init__(self, backend, archive_dir_path, action):
616        u"""
617        Make new object.  Does not set values
618        """
619        self.backend = backend
620        self.archive_dir_path = archive_dir_path
621        self.action = action
622
623        # Will hold (signature chain, backup chain) pair of active
624        # (most recent) chains
625        self.matched_chain_pair = None
626
627        # These should be sorted by end_time
628        self.all_backup_chains = None
629        self.other_backup_chains = None
630        self.all_sig_chains = None
631
632        # Other misc paths and sets which shouldn't be there
633        self.local_orphaned_sig_names = []
634        self.remote_orphaned_sig_names = []
635        self.orphaned_backup_sets = None
636        self.incomplete_backup_sets = None
637
638        # True if set_values() below has run
639        self.values_set = None
640
641    def to_log_info(self):
642        u"""
643        Return summary of the collection, suitable for printing to log
644        """
645        l = [u"backend %s" % (self.backend.__class__.__name__,),
646             u"archive-dir %s" % (self.archive_dir_path,)]
647
648        for i in range(len(self.other_backup_chains)):
649            # A bit of a misnomer.  Chain might have a sig.
650            l.append(u"chain-no-sig %d" % (i,))
651            l += self.other_backup_chains[i].to_log_info(u' ')
652
653        if self.matched_chain_pair:
654            l.append(u"chain-complete")
655            l += self.matched_chain_pair[1].to_log_info(u' ')
656
657        l.append(u"orphaned-sets-num %d" % (len(self.orphaned_backup_sets),))
658        l.append(u"incomplete-sets-num %d" % (len(self.incomplete_backup_sets),))
659
660        return l
661
662    def __str__(self):
663        u"""
664        Return string summary of the collection
665        """
666        l = [_(u"Collection Status"),
667             u"-----------------",
668             _(u"Connecting with backend: %s") %
669             (self.backend.__class__.__name__,),
670             _(u"Archive dir: %s") % (self.archive_dir_path.uc_name if self.archive_dir_path else u'None',)]
671
672        l.append(u"\n" +
673                 ngettext(u"Found %d secondary backup chain.",
674                          u"Found %d secondary backup chains.",
675                          len(self.other_backup_chains))
676                 % len(self.other_backup_chains))
677        for i in range(len(self.other_backup_chains)):
678            l.append(_(u"Secondary chain %d of %d:") %
679                     (i + 1, len(self.other_backup_chains)))
680            l.append(str(self.other_backup_chains[i]))
681            l.append(u"")
682
683        if self.matched_chain_pair:
684            l.append(u"\n" + _(u"Found primary backup chain with matching "
685                     u"signature chain:"))
686            l.append(str(self.matched_chain_pair[1]))
687        else:
688            l.append(_(u"No backup chains with active signatures found"))
689
690        if self.orphaned_backup_sets or self.incomplete_backup_sets:
691            l.append(ngettext(u"Also found %d backup set not part of any chain,",
692                              u"Also found %d backup sets not part of any chain,",
693                              len(self.orphaned_backup_sets))
694                     % (len(self.orphaned_backup_sets),))
695            l.append(ngettext(u"and %d incomplete backup set.",
696                              u"and %d incomplete backup sets.",
697                              len(self.incomplete_backup_sets))
698                     % (len(self.incomplete_backup_sets),))
699            # TRANSL: "cleanup" is a hard-coded command, so do not translate it
700            l.append(_(u'These may be deleted by running duplicity with the '
701                       u'"cleanup" command.'))
702        else:
703            l.append(_(u"No orphaned or incomplete backup sets found."))
704
705        return u"\n".join(l)
706
707    def set_values(self, sig_chain_warning=1):
708        u"""
709        Set values from archive_dir_path and backend.
710
711        Returns self for convenience.  If sig_chain_warning is set to None,
712        do not warn about unnecessary sig chains.  This is because there may
713        naturally be some unecessary ones after a full backup.
714        """
715        self.values_set = 1
716
717        # get remote filename list
718        backend_filename_list = self.backend.list()
719        log.Debug(ngettext(u"%d file exists on backend",
720                           u"%d files exist on backend",
721                           len(backend_filename_list)) %
722                  len(backend_filename_list))
723
724        # get local filename list
725        if self.action != u"replicate":
726            local_filename_list = self.archive_dir_path.listdir()
727        else:
728            local_filename_list = []
729        log.Debug(ngettext(u"%d file exists in cache",
730                           u"%d files exist in cache",
731                           len(local_filename_list)) %
732                  len(local_filename_list))
733
734        # check for partial backups
735        partials = []
736        for local_filename in local_filename_list:
737            pr = file_naming.parse(local_filename)
738            if pr and pr.partial:
739                partials.append(local_filename)
740
741        # get various backup sets and chains
742        (backup_chains, self.orphaned_backup_sets,
743         self.incomplete_backup_sets) = \
744            self.get_backup_chains(partials + backend_filename_list)
745        backup_chains = self.get_sorted_chains(backup_chains)
746        self.all_backup_chains = backup_chains
747
748        assert len(backup_chains) == len(self.all_backup_chains), \
749            u"get_sorted_chains() did something more than re-ordering"
750
751        local_sig_chains, self.local_orphaned_sig_names = \
752            self.get_signature_chains(True)
753        remote_sig_chains, self.remote_orphaned_sig_names = \
754            self.get_signature_chains(False, filelist=backend_filename_list)
755        self.set_matched_chain_pair(local_sig_chains + remote_sig_chains,
756                                    backup_chains)
757        self.warn(sig_chain_warning)
758        return self
759
760    def set_matched_chain_pair(self, sig_chains, backup_chains):
761        u"""
762        Set self.matched_chain_pair and self.other_sig/backup_chains
763
764        The latest matched_chain_pair will be set.  If there are both
765        remote and local signature chains capable of matching the
766        latest backup chain, use the local sig chain (it does not need
767        to be downloaded).
768        """
769        sig_chains = sig_chains and self.get_sorted_chains(sig_chains)
770        self.all_sig_chains = sig_chains
771        self.other_backup_chains = backup_chains[:]
772        self.matched_chain_pair = None
773        if sig_chains and backup_chains:
774            latest_backup_chain = backup_chains[-1]
775            for i in range(len(sig_chains) - 1, -1, -1):
776                if sig_chains[i].end_time == latest_backup_chain.end_time:
777                    pass
778                # See if the set before last matches:
779                elif (len(latest_backup_chain.get_all_sets()) >= 2 and
780                      sig_chains[i].end_time == latest_backup_chain.get_all_sets()[-2].end_time):
781                    # It matches, remove the last backup set:
782                    log.Warn(_(u"Warning, discarding last backup set, because "
783                               u"of missing signature file."))
784                    self.incomplete_backup_sets.append(latest_backup_chain.incset_list[-1])
785                    latest_backup_chain.incset_list = latest_backup_chain.incset_list[:-1]
786                else:
787                    continue
788
789                # Found a matching pair:
790                if self.matched_chain_pair is None:
791                    self.matched_chain_pair = (sig_chains[i], latest_backup_chain)
792
793                break
794
795        if self.matched_chain_pair:
796            self.other_backup_chains.remove(self.matched_chain_pair[1])
797
798    def warn(self, sig_chain_warning):
799        u"""
800        Log various error messages if find incomplete/orphaned files
801        """
802        assert self.values_set
803
804        if self.local_orphaned_sig_names:
805            log.Warn(ngettext(u"Warning, found the following local orphaned "
806                              u"signature file:",
807                              u"Warning, found the following local orphaned "
808                              u"signature files:",
809                              len(self.local_orphaned_sig_names)) + u"\n" +
810                     u"\n".join(map(util.fsdecode, self.local_orphaned_sig_names)),
811                     log.WarningCode.orphaned_sig)
812
813        if self.remote_orphaned_sig_names:
814            log.Warn(ngettext(u"Warning, found the following remote orphaned "
815                              u"signature file:",
816                              u"Warning, found the following remote orphaned "
817                              u"signature files:",
818                              len(self.remote_orphaned_sig_names)) + u"\n" +
819                     u"\n".join(map(util.fsdecode, self.remote_orphaned_sig_names)),
820                     log.WarningCode.orphaned_sig)
821
822        if self.all_sig_chains and sig_chain_warning and not self.matched_chain_pair:
823            log.Warn(_(u"Warning, found signatures but no corresponding "
824                       u"backup files"), log.WarningCode.unmatched_sig)
825
826        if self.incomplete_backup_sets:
827            log.Warn(_(u"Warning, found incomplete backup sets, probably left "
828                       u"from aborted session"), log.WarningCode.incomplete_backup)
829
830        if self.orphaned_backup_sets:
831            log.Warn(ngettext(u"Warning, found the following orphaned "
832                              u"backup file:",
833                              u"Warning, found the following orphaned "
834                              u"backup files:",
835                              len(self.orphaned_backup_sets)) + u"\n" +
836                     u"\n".join(map(str, self.orphaned_backup_sets)),
837                     log.WarningCode.orphaned_backup)
838
839    def get_backup_chains(self, filename_list):
840        u"""
841        Split given filename_list into chains
842
843        Return value will be tuple (list of chains, list of sets, list
844        of incomplete sets), where the list of sets will comprise sets
845        not fitting into any chain, and the incomplete sets are sets
846        missing files.
847        """
848        log.Debug(_(u"Extracting backup chains from list of files: %s")
849                  % [util.fsdecode(f) for f in filename_list])
850        # First put filenames in set form
851        sets = []
852
853        def add_to_sets(filename):
854            u"""
855            Try adding filename to existing sets, or make new one
856            """
857            pr = file_naming.parse(filename)
858            for set in sets:  # pylint: disable=redefined-builtin
859                if set.add_filename(filename, pr):
860                    log.Debug(_(u"File %s is part of known set") % (util.fsdecode(filename),))
861                    break
862            else:
863                log.Debug(_(u"File %s is not part of a known set; creating new set") % (util.fsdecode(filename),))
864                new_set = BackupSet(self.backend, self.action)
865                if new_set.add_filename(filename, pr):
866                    sets.append(new_set)
867                else:
868                    log.Debug(_(u"Ignoring file (rejected by backup set) '%s'") % util.fsdecode(filename))
869
870        for f in filename_list:
871            add_to_sets(f)
872        sets, incomplete_sets = self.get_sorted_sets(sets)
873
874        chains, orphaned_sets = [], []
875
876        def add_to_chains(set):  # pylint: disable=redefined-builtin
877            u"""
878            Try adding set to existing chains, or make new one
879            """
880            if set.type == u"full":
881                new_chain = BackupChain(self.backend)
882                new_chain.set_full(set)
883                chains.append(new_chain)
884                log.Debug(_(u"Found backup chain %s") % (new_chain.short_desc()))
885            else:
886                assert set.type == u"inc"
887                for chain in chains:
888                    if chain.add_inc(set):
889                        log.Debug(_(u"Added set %s to pre-existing chain %s") % (set.get_timestr(),
890                                                                                 chain.short_desc()))
891                        break
892                else:
893                    log.Debug(_(u"Found orphaned set %s") % (set.get_timestr(),))
894                    orphaned_sets.append(set)
895        for s in sets:
896            add_to_chains(s)
897        return (chains, orphaned_sets, incomplete_sets)
898
899    def get_sorted_sets(self, set_list):
900        u"""
901        Sort set list by end time, return (sorted list, incomplete)
902        """
903        time_set_pairs, incomplete_sets = [], []
904        for set in set_list:  # pylint: disable=redefined-builtin
905            if not set.is_complete():
906                incomplete_sets.append(set)
907            elif set.type == u"full":
908                time_set_pairs.append((set.time, set))
909            else:
910                time_set_pairs.append((set.end_time, set))
911        time_set_pairs.sort(key=lambda x: x[0])
912        return ([p[1] for p in time_set_pairs], incomplete_sets)
913
914    def get_signature_chains(self, local, filelist=None):
915        u"""
916        Find chains in archive_dir_path (if local is true) or backend
917
918        Use filelist if given, otherwise regenerate.  Return value is
919        pair (list of chains, list of signature paths not in any
920        chains).
921        """
922        def get_filelist():
923            if filelist is not None:
924                return filelist
925            elif local:
926                if self.action != u"replicate":
927                    return self.archive_dir_path.listdir()
928                else:
929                    return []
930            else:
931                return self.backend.list()
932
933        def get_new_sigchain():
934            u"""
935            Return new empty signature chain
936            """
937            if local:
938                return SignatureChain(True, self.archive_dir_path)
939            else:
940                return SignatureChain(False, self.backend)
941
942        # Build initial chains from full sig filenames
943        chains, new_sig_filenames = [], []
944        for filename in get_filelist():
945            pr = file_naming.parse(filename)
946            if pr:
947                if pr.type == u"full-sig":
948                    new_chain = get_new_sigchain()
949                    assert new_chain.add_filename(filename, pr)
950                    chains.append(new_chain)
951                elif pr.type == u"new-sig":
952                    new_sig_filenames.append(filename)
953
954        # Try adding new signatures to existing chains
955        orphaned_filenames = []
956        new_sig_filenames.sort(key=lambda x: int(file_naming.parse(x).start_time))
957        for sig_filename in new_sig_filenames:
958            for chain in chains:
959                if chain.add_filename(sig_filename):
960                    break
961            else:
962                orphaned_filenames.append(sig_filename)
963        return (chains, orphaned_filenames)
964
965    def get_sorted_chains(self, chain_list):
966        u"""
967        Return chains sorted by end_time.  If tie, local goes last
968        """
969        # Build dictionary from end_times to lists of corresponding chains
970        endtime_chain_dict = {}
971        for chain in chain_list:
972            if chain.end_time in endtime_chain_dict:
973                endtime_chain_dict[chain.end_time].append(chain)
974            else:
975                endtime_chain_dict[chain.end_time] = [chain]
976
977        # Use dictionary to build final sorted list
978        sorted_end_times = list(endtime_chain_dict.keys())
979        sorted_end_times.sort()
980        sorted_chain_list = []
981        for end_time in sorted_end_times:
982            chain_list = endtime_chain_dict[end_time]
983            if len(chain_list) == 1:
984                sorted_chain_list.append(chain_list[0])
985            else:
986                assert len(chain_list) == 2
987                if chain_list[0].backend:  # is remote, goes first
988                    sorted_chain_list.append(chain_list[0])
989                    sorted_chain_list.append(chain_list[1])
990                else:  # is local, goes second
991                    sorted_chain_list.append(chain_list[1])
992                    sorted_chain_list.append(chain_list[0])
993
994        return sorted_chain_list
995
996    def get_backup_chain_at_time(self, time):
997        u"""
998        Return backup chain covering specified time
999
1000        Tries to find the backup chain covering the given time.  If
1001        there is none, return the earliest chain before, and failing
1002        that, the earliest chain.
1003        """
1004        if not self.all_backup_chains:
1005            raise CollectionsError(u"No backup chains found")
1006
1007        covering_chains = [c for c in self.all_backup_chains
1008                           if c.start_time <= time <= c.end_time]
1009        if len(covering_chains) > 1:
1010            raise CollectionsError(u"Two chains cover the given time")
1011        elif len(covering_chains) == 1:
1012            return covering_chains[0]
1013
1014        old_chains = [c for c in self.all_backup_chains if c.end_time < time]
1015        if old_chains:
1016            return old_chains[-1]
1017        else:
1018            return self.all_backup_chains[0]  # no chains are old enough
1019
1020    def get_signature_chain_at_time(self, time):
1021        u"""
1022        Return signature chain covering specified time
1023
1024        Tries to find the signature chain covering the given time.  If
1025        there is none, return the earliest chain before, and failing
1026        that, the earliest chain.
1027        """
1028        if not self.all_sig_chains:
1029            raise CollectionsError(u"No signature chains found")
1030
1031        covering_chains = [c for c in self.all_sig_chains
1032                           if c.start_time <= time <= c.end_time]
1033        if covering_chains:
1034            return covering_chains[-1]  # prefer local if multiple sig chains
1035
1036        old_chains = [c for c in self.all_sig_chains if c.end_time < time]
1037        if old_chains:
1038            return old_chains[-1]
1039        else:
1040            # no chains are old enough, give oldest and warn user
1041            oldest = self.all_sig_chains[0]
1042            if time < oldest.start_time:
1043                log.Warn(_(u"No signature chain for the requested time. "
1044                           u"Using oldest available chain, starting at time %s.") %
1045                         dup_time.timetopretty(oldest.start_time),
1046                         log.WarningCode.no_sig_for_time,
1047                         dup_time.timetostring(oldest.start_time))
1048            return oldest
1049
1050    def get_extraneous(self):
1051        u"""
1052        Return list of the names of extraneous duplicity files
1053
1054        A duplicity file is considered extraneous if it is
1055        recognizable as a duplicity file, but isn't part of some
1056        complete backup set, or current signature chain.
1057        """
1058        assert self.values_set
1059        local_filenames = []
1060        remote_filenames = []
1061        ext_containers = self.orphaned_backup_sets + self.incomplete_backup_sets
1062        for set_or_chain in ext_containers:
1063            if set_or_chain.backend:
1064                remote_filenames.extend(set_or_chain.get_filenames())
1065            else:
1066                local_filenames.extend(set_or_chain.get_filenames())
1067        local_filenames += self.local_orphaned_sig_names
1068        remote_filenames += self.remote_orphaned_sig_names
1069        return local_filenames, remote_filenames
1070
1071    def sort_sets(self, setlist):
1072        u"""Return new list containing same elems of setlist, sorted by time"""
1073        pairs = [(s.get_time(), s) for s in setlist]
1074        pairs.sort()
1075        return [p[1] for p in pairs]
1076
1077    def get_chains_older_than(self, t):
1078        u"""
1079        Returns a list of backup chains older than the given time t
1080
1081        All of the times will be associated with an intact chain.
1082        Furthermore, none of the times will be of a chain which a newer
1083        set may depend on.  For instance, if set A is a full set older
1084        than t, and set B is an incremental based on A which is newer
1085        than t, then the time of set A will not be returned.
1086        """
1087        assert self.values_set
1088        old_chains = []
1089        for chain in self.all_backup_chains:
1090            if (chain.end_time < t and
1091                (not self.matched_chain_pair or
1092                 chain is not self.matched_chain_pair[1])):
1093                # don't delete the active (matched) chain
1094                old_chains.append(chain)
1095        return old_chains
1096
1097    def get_signature_chains_older_than(self, t):
1098        u"""
1099        Returns a list of signature chains older than the given time t
1100
1101        All of the times will be associated with an intact chain.
1102        Furthermore, none of the times will be of a chain which a newer
1103        set may depend on.  For instance, if set A is a full set older
1104        than t, and set B is an incremental based on A which is newer
1105        than t, then the time of set A will not be returned.
1106        """
1107        assert self.values_set
1108        old_chains = []
1109        for chain in self.all_sig_chains:
1110            if (chain.end_time < t and
1111                (not self.matched_chain_pair or
1112                 chain is not self.matched_chain_pair[0])):
1113                # don't delete the active (matched) chain
1114                old_chains.append(chain)
1115        return old_chains
1116
1117    def get_last_full_backup_time(self):
1118        u"""
1119        Return the time of the last full backup,
1120        or 0 if there is none.
1121        """
1122        return self.get_nth_last_full_backup_time(1)
1123
1124    def get_nth_last_full_backup_time(self, n):
1125        u"""
1126        Return the time of the nth to last full backup,
1127        or 0 if there is none.
1128        """
1129        chain = self.get_nth_last_backup_chain(n)
1130        if chain is None:
1131            return 0
1132        else:
1133            return chain.get_first().time
1134
1135    def get_last_backup_chain(self):
1136        u"""
1137        Return the last full backup of the collection,
1138        or None if there is no full backup chain.
1139        """
1140        return self.get_nth_last_backup_chain(1)
1141
1142    def get_nth_last_backup_chain(self, n):
1143        u"""
1144        Return the nth-to-last full backup of the collection,
1145        or None if there is less than n backup chains.
1146
1147        NOTE: n = 1 -> time of latest available chain (n = 0 is not
1148        a valid input). Thus the second-to-last is obtained with n=2
1149        rather than n=1.
1150        """
1151        assert self.values_set
1152        assert n > 0
1153
1154        if len(self.all_backup_chains) < n:
1155            return None
1156
1157        sorted = self.all_backup_chains[:]  # pylint: disable=redefined-builtin
1158        sorted.sort(key=lambda x: x.get_first().time)
1159
1160        sorted.reverse()
1161        return sorted[n - 1]
1162
1163    def get_older_than(self, t):
1164        u"""
1165        Returns a list of backup sets older than the given time t
1166
1167        All of the times will be associated with an intact chain.
1168        Furthermore, none of the times will be of a set which a newer
1169        set may depend on.  For instance, if set A is a full set older
1170        than t, and set B is an incremental based on A which is newer
1171        than t, then the time of set A will not be returned.
1172        """
1173        old_sets = []
1174        for chain in self.get_chains_older_than(t):
1175            old_sets.extend(chain.get_all_sets())
1176        return self.sort_sets(old_sets)
1177
1178    def get_older_than_required(self, t):
1179        u"""
1180        Returns list of old backup sets required by new sets
1181
1182        This function is similar to the previous one, but it only
1183        returns the times of sets which are old but part of the chains
1184        where the newer end of the chain is newer than t.
1185        """
1186        assert self.values_set
1187        new_chains = [c for c in self.all_backup_chains if c.end_time >= t]
1188        result_sets = []
1189        for chain in new_chains:
1190            old_sets = [s for s in chain.get_all_sets() if s.get_time() < t]
1191            result_sets.extend(old_sets)
1192        return self.sort_sets(result_sets)
1193
1194    def get_file_changed_record(self, filepath):
1195        u"""
1196        Returns time line of specified file changed
1197        """
1198        # quick fix to spaces in filepath
1199        modified_filepath = filepath
1200        if u" " in filepath:
1201            modified_filepath = u'"' + filepath.replace(u" ", r"\x20") + u'"'
1202
1203        if not self.matched_chain_pair:
1204            return u""
1205
1206        all_backup_set = self.matched_chain_pair[1].get_all_sets()
1207        specified_file_backup_set = []
1208        specified_file_backup_type = []
1209
1210        modified_filepath = util.fsencode(modified_filepath)
1211        for bs in all_backup_set:
1212            filelist = [fileinfo[1] for fileinfo in bs.get_files_changed()]
1213            if modified_filepath in filelist:
1214                specified_file_backup_set.append(bs)
1215                index = filelist.index(modified_filepath)
1216                specified_file_backup_type.append(bs.get_files_changed()[index][0])
1217
1218        return FileChangedStatus(filepath, list(zip(specified_file_backup_type, specified_file_backup_set)))
1219
1220
1221class FileChangedStatus(object):
1222    def __init__(self, filepath, fileinfo_list):
1223        self.filepath = filepath
1224        self.fileinfo_list = fileinfo_list
1225
1226    def __str__(self):
1227        set_schema = u"%20s   %30s  %20s"
1228        l = [u"-------------------------",
1229             _(u"File: %s") % (self.filepath),
1230             _(u"Total number of backup: %d") % len(self.fileinfo_list),
1231             set_schema % (_(u"Type of backup set:"), _(u"Time:"), _(u"Type of file change:"))]
1232
1233        for s in self.fileinfo_list:
1234            backup_type = s[0]
1235            backup_set = s[1]
1236            if backup_set.time:
1237                type = _(u"Full")  # pylint: disable=redefined-builtin
1238            else:
1239                type = _(u"Incremental")
1240            l.append(set_schema % (type, dup_time.timetopretty(backup_set.get_time()), backup_type.title()))
1241
1242        l.append(u"-------------------------")
1243        return u"\n".join(l)
1244