1# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*-
2#
3# duplicity -- Encrypted bandwidth efficient backup
4#
5# Copyright 2002 Ben Escoto <ben@emerose.org>
6# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
7#
8# This file is part of duplicity.
9#
10# Duplicity is free software; you can redistribute it and/or modify it
11# under the terms of the GNU General Public License as published by the
12# Free Software Foundation; either version 2 of the License, or (at your
13# option) any later version.
14#
15# Duplicity is distributed in the hope that it will be useful, but
16# WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18# General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with duplicity; if not, write to the Free Software Foundation,
22# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23#
24# See http://www.nongnu.org/duplicity for more information.
25# Please send mail to me or the mailing list if you find bugs or have
26# any suggestions.
27
28from __future__ import print_function
29from future import standard_library
30standard_library.install_aliases()
31from builtins import map
32from builtins import next
33from builtins import object
34from builtins import range
35
36import copy
37import fasteners
38import os
39import platform
40import resource
41import sys
42import time
43
44from duplicity import __version__
45from duplicity import asyncscheduler
46from duplicity import commandline
47from duplicity import diffdir
48from duplicity import dup_collections
49from duplicity import dup_temp
50from duplicity import dup_time
51from duplicity import file_naming
52from duplicity import config
53from duplicity import gpg
54from duplicity import log
55from duplicity import manifest
56from duplicity import patchdir
57from duplicity import path
58from duplicity import progress
59from duplicity import tempdir
60from duplicity import util
61
62from datetime import datetime
63
64# If exit_val is not None, exit with given value at end.
65exit_val = None
66
67
68def getpass_safe(message):
69    # getpass() in Python 2.x will call str() on our prompt.  So we can't pass
70    # in non-ascii characters.
71    import getpass
72    import locale
73    if sys.version_info.major == 2:
74        message = message.encode(locale.getpreferredencoding(), u'replace')
75    return getpass.getpass(message)
76
77
78def get_passphrase(n, action, for_signing=False):
79    u"""
80    Check to make sure passphrase is indeed needed, then get
81    the passphrase from environment, from gpg-agent, or user
82
83    If n=3, a password is requested and verified. If n=2, the current
84    password is verified. If n=1, a password is requested without
85    verification for the time being.
86
87    @type  n: int
88    @param n: verification level for a passphrase being requested
89    @type  action: string
90    @param action: action to perform
91    @type  for_signing: boolean
92    @param for_signing: true if the passphrase is for a signing key, false if not
93    @rtype: string
94    @return: passphrase
95    """
96
97    # First try the environment
98    try:
99        if for_signing:
100            return os.environ[u'SIGN_PASSPHRASE']
101        else:
102            return os.environ[u'PASSPHRASE']
103    except KeyError:
104        pass
105
106    # check if we can reuse an already set (signing_)passphrase
107    # if signing key is also an encryption key assume that the passphrase is identical
108    if (for_signing and
109            (config.gpg_profile.sign_key in config.gpg_profile.recipients or
110             config.gpg_profile.sign_key in config.gpg_profile.hidden_recipients) and
111             u'PASSPHRASE' in os.environ):  # noqa
112        log.Notice(_(u"Reuse configured PASSPHRASE as SIGN_PASSPHRASE"))
113        return os.environ[u'PASSPHRASE']
114    # if one encryption key is also the signing key assume that the passphrase is identical
115    if (not for_signing and
116            (config.gpg_profile.sign_key in config.gpg_profile.recipients or
117             config.gpg_profile.sign_key in config.gpg_profile.hidden_recipients) and
118             u'SIGN_PASSPHRASE' in os.environ):  # noqa
119        log.Notice(_(u"Reuse configured SIGN_PASSPHRASE as PASSPHRASE"))
120        return os.environ[u'SIGN_PASSPHRASE']
121
122    # Next, verify we need to ask the user
123
124    # Assumptions:
125    #   - encrypt-key has no passphrase
126    #   - sign-key requires passphrase
127    #   - gpg-agent supplies all, no user interaction
128
129    # no passphrase if --no-encryption or --use-agent
130    if not config.encryption or config.use_agent:
131        return u""
132
133    # these commands don't need a password
134    elif action in [u"collection-status",
135                    u"list-current",
136                    u"remove-all-but-n-full",
137                    u"remove-all-inc-of-but-n-full",
138                    u"remove-old",
139                    ]:
140        return u""
141
142    # for a full backup, we don't need a password if
143    # there is no sign_key and there are recipients
144    elif (action == u"full" and
145          (config.gpg_profile.recipients or config.gpg_profile.hidden_recipients) and not
146          config.gpg_profile.sign_key):
147        return u""
148
149    # for an inc backup, we don't need a password if
150    # there is no sign_key and there are recipients
151    elif (action == u"inc" and
152          (config.gpg_profile.recipients or config.gpg_profile.hidden_recipients) and not
153          config.gpg_profile.sign_key):
154        return u""
155
156    # Finally, ask the user for the passphrase
157    else:
158        log.Info(_(u"PASSPHRASE variable not set, asking user."))
159        use_cache = True
160        while 1:
161            # ask the user to enter a new passphrase to avoid an infinite loop
162            # if the user made a typo in the first passphrase
163            if use_cache and n == 2:
164                if for_signing:
165                    pass1 = config.gpg_profile.signing_passphrase
166                else:
167                    pass1 = config.gpg_profile.passphrase
168            else:
169                if for_signing:
170                    if use_cache and config.gpg_profile.signing_passphrase:
171                        pass1 = config.gpg_profile.signing_passphrase
172                    else:
173                        pass1 = getpass_safe(_(u"GnuPG passphrase for signing key:") + u" ")
174                else:
175                    if use_cache and config.gpg_profile.passphrase:
176                        pass1 = config.gpg_profile.passphrase
177                    else:
178                        pass1 = getpass_safe(_(u"GnuPG passphrase for decryption:") + u" ")
179
180            if n == 1:
181                pass2 = pass1
182            elif for_signing:
183                pass2 = getpass_safe(_(u"Retype passphrase for signing key to confirm: "))
184            else:
185                pass2 = getpass_safe(_(u"Retype passphrase for decryption to confirm: "))
186
187            if not pass1 == pass2:
188                log.Log(_(u"First and second passphrases do not match!  Please try again."),
189                        log.WARNING, force_print=True)
190                use_cache = False
191                continue
192
193            if not pass1 and not (config.gpg_profile.recipients or
194                                  config.gpg_profile.hidden_recipients) and not for_signing:
195                log.Log(_(u"Cannot use empty passphrase with symmetric encryption!  Please try again."),
196                        log.WARNING, force_print=True)
197                use_cache = False
198                continue
199
200            return pass1
201
202
203def dummy_backup(tarblock_iter):
204    u"""
205    Fake writing to backend, but do go through all the source paths.
206
207    @type tarblock_iter: tarblock_iter
208    @param tarblock_iter: iterator for current tar block
209
210    @rtype: int
211    @return: constant 0 (zero)
212    """
213    try:
214        # Just spin our wheels
215        while next(tarblock_iter):
216            pass
217    except StopIteration:
218        pass
219    log.Progress(None, diffdir.stats.SourceFileSize)
220    return 0
221
222
223def restart_position_iterator(tarblock_iter):
224    u"""
225    Fake writing to backend, but do go through all the source paths.
226    Stop when we have processed the last file and block from the
227    last backup.  Normal backup will proceed at the start of the
228    next volume in the set.
229
230    @type tarblock_iter: tarblock_iter
231    @param tarblock_iter: iterator for current tar block
232
233    @rtype: int
234    @return: constant 0 (zero)
235    """
236    last_index = config.restart.last_index
237    last_block = config.restart.last_block
238    try:
239        # Just spin our wheels
240        iter_result = next(tarblock_iter)
241        while iter_result:
242            if (tarblock_iter.previous_index == last_index):
243                # If both the previous index and this index are done, exit now
244                # before we hit the next index, to prevent skipping its first
245                # block.
246                if not last_block and not tarblock_iter.previous_block:
247                    break
248                # Only check block number if last_block is also a number
249                if last_block and tarblock_iter.previous_block > last_block:
250                    break
251            if tarblock_iter.previous_index > last_index:
252                log.Warn(_(u"File %s complete in backup set.\n"
253                           u"Continuing restart on file %s.") %
254                         (util.uindex(last_index), util.uindex(tarblock_iter.previous_index)),
255                         log.ErrorCode.restart_file_not_found)
256                # We went too far! Stuff the data back into place before restarting
257                tarblock_iter.queue_index_data(iter_result)
258                break
259            iter_result = next(tarblock_iter)
260    except StopIteration:
261        log.Warn(_(u"File %s missing in backup set.\n"
262                   u"Continuing restart on file %s.") %
263                 (util.uindex(last_index), util.uindex(tarblock_iter.previous_index)),
264                 log.ErrorCode.restart_file_not_found)
265
266
267def write_multivol(backup_type, tarblock_iter, man_outfp, sig_outfp, backend):
268    u"""
269    Encrypt volumes of tarblock_iter and write to backend
270
271    backup_type should be "inc" or "full" and only matters here when
272    picking the filenames.  The path_prefix will determine the names
273    of the files written to backend.  Also writes manifest file.
274    Returns number of bytes written.
275
276    @type backup_type: string
277    @param backup_type: type of backup to perform, either 'inc' or 'full'
278    @type tarblock_iter: tarblock_iter
279    @param tarblock_iter: iterator for current tar block
280    @type backend: callable backend object
281    @param backend: I/O backend for selected protocol
282
283    @rtype: int
284    @return: bytes written
285    """
286
287    def get_indicies(tarblock_iter):
288        u"""Return start_index and end_index of previous volume"""
289        start_index, start_block = tarblock_iter.recall_index()
290        if start_index is None:
291            start_index = ()
292            start_block = None
293        if start_block:
294            start_block -= 1
295        end_index, end_block = tarblock_iter.get_previous_index()
296        if end_index is None:
297            end_index = start_index
298            end_block = start_block
299        if end_block:
300            end_block -= 1
301        return start_index, start_block, end_index, end_block
302
303    def validate_block(orig_size, dest_filename):
304        info = backend.query_info([dest_filename])[dest_filename]
305        size = info[u'size']
306        if size is None:
307            return  # error querying file
308        for attempt in range(1, config.num_retries + 1):
309            info = backend.query_info([dest_filename])[dest_filename]
310            size = info[u'size']
311            if size == orig_size:
312                break
313            if size is None:
314                return
315            log.Notice(_(u"%s Remote filesize %d for %s does not match local size %d, retrying.") % (datetime.now(),
316                       size, util.escape(dest_filename), orig_size))
317            time.sleep(2**attempt)
318        if size != orig_size:
319            code_extra = u"%s %d %d" % (util.escape(dest_filename), orig_size, size)
320            log.FatalError(_(u"File %s was corrupted during upload.") % util.fsdecode(dest_filename),
321                           log.ErrorCode.volume_wrong_size, code_extra)
322
323    def put(tdp, dest_filename, vol_num):
324        u"""
325        Retrieve file size *before* calling backend.put(), which may (at least
326        in case of the localbackend) rename the temporary file to the target
327        instead of copying.
328        """
329        putsize = tdp.getsize()
330        if config.skip_volume != vol_num:  # for testing purposes only
331            backend.put(tdp, dest_filename)
332        validate_block(putsize, dest_filename)
333        if tdp.stat:
334            tdp.delete()
335        return putsize
336
337    def validate_encryption_settings(backup_set, manifest):
338        u"""
339        When restarting a backup, we have no way to verify that the current
340        passphrase is the same as the one used for the beginning of the backup.
341        This is because the local copy of the manifest is unencrypted and we
342        don't need to decrypt the existing volumes on the backend.  To ensure
343        that we are using the same passphrase, we manually download volume 1
344        and decrypt it with the current passphrase.  We also want to confirm
345        that we're using the same encryption settings (i.e. we don't switch
346        from encrypted to non in the middle of a backup chain), so we check
347        that the vol1 filename on the server matches the settings of this run.
348        """
349        if ((config.gpg_profile.recipients or config.gpg_profile.hidden_recipients) and
350                not config.gpg_profile.sign_key):
351            # When using gpg encryption without a signing key, we skip this validation
352            # step to ensure that we can still backup without needing the secret key
353            # on the machine.
354            return
355
356        vol1_filename = file_naming.get(backup_type, 1,
357                                        encrypted=config.encryption,
358                                        gzipped=config.compression)
359        if vol1_filename != backup_set.volume_name_dict[1]:
360            log.FatalError(_(u"Restarting backup, but current encryption "
361                             u"settings do not match original settings"),
362                           log.ErrorCode.enryption_mismatch)
363
364        # Settings are same, let's check passphrase itself if we are encrypted
365        if config.encryption:
366            fileobj = restore_get_enc_fileobj(config.backend, vol1_filename,
367                                              manifest.volume_info_dict[1])
368            fileobj.close()
369
370    if not config.restart:
371        # normal backup start
372        vol_num = 0
373        mf = manifest.Manifest(fh=man_outfp)
374        mf.set_dirinfo()
375    else:
376        # restart from last known position
377        mf = config.restart.last_backup.get_local_manifest()
378        config.restart.checkManifest(mf)
379        config.restart.setLastSaved(mf)
380        if not (config.s3_use_deep_archive or config.s3_use_glacier):
381            validate_encryption_settings(config.restart.last_backup, mf)
382        else:
383            log.Warn(_(u"Skipping encryption validation due to glacier/deep storage"))
384        mf.fh = man_outfp
385        last_block = config.restart.last_block
386        log.Notice(_(u"Restarting after volume %s, file %s, block %s") %
387                   (config.restart.start_vol,
388                    util.uindex(config.restart.last_index),
389                    config.restart.last_block))
390        vol_num = config.restart.start_vol
391        restart_position_iterator(tarblock_iter)
392
393    at_end = 0
394    bytes_written = 0
395
396    # If --progress option is given, initiate a background thread that will
397    # periodically report progress to the Log.
398    if config.progress:
399        progress.tracker.set_start_volume(vol_num + 1)
400        progress.progress_thread.start()
401
402    # This assertion must be kept until we have solved the problem
403    # of concurrency at the backend level. Concurrency 1 is fine
404    # because the actual I/O concurrency on backends is limited to
405    # 1 as usual, but we are allowed to perform local CPU
406    # intensive tasks while that single upload is happening. This
407    # is an assert put in place to avoid someone accidentally
408    # enabling concurrency above 1, before adequate work has been
409    # done on the backends to make them support concurrency.
410    assert config.async_concurrency <= 1
411
412    io_scheduler = asyncscheduler.AsyncScheduler(config.async_concurrency)
413    async_waiters = []
414
415    while not at_end:
416        # set up iterator
417        tarblock_iter.remember_next_index()  # keep track of start index
418
419        # Create volume
420        vol_num += 1
421        dest_filename = file_naming.get(backup_type, vol_num,
422                                        encrypted=config.encryption,
423                                        gzipped=config.compression)
424        tdp = dup_temp.new_tempduppath(file_naming.parse(dest_filename))
425
426        # write volume
427        if config.encryption:
428            at_end = gpg.GPGWriteFile(tarblock_iter, tdp.name, config.gpg_profile,
429                                      config.volsize)
430        elif config.compression:
431            at_end = gpg.GzipWriteFile(tarblock_iter, tdp.name, config.volsize)
432        else:
433            at_end = gpg.PlainWriteFile(tarblock_iter, tdp.name, config.volsize)
434        tdp.setdata()
435
436        # Add volume information to manifest
437        vi = manifest.VolumeInfo()
438        vi.set_info(vol_num, *get_indicies(tarblock_iter))
439        vi.set_hash(u"SHA1", gpg.get_hash(u"SHA1", tdp))
440        mf.add_volume_info(vi)
441
442        # Checkpoint after each volume so restart has a place to restart.
443        # Note that until after the first volume, all files are temporary.
444        if vol_num == 1:
445            sig_outfp.to_partial()
446            man_outfp.to_partial()
447        else:
448            sig_outfp.flush()
449            man_outfp.flush()
450
451        async_waiters.append(io_scheduler.schedule_task(lambda tdp, dest_filename,
452                                                        vol_num: put(tdp, dest_filename, vol_num),
453                                                        (tdp, dest_filename, vol_num)))
454
455        # Log human-readable version as well as raw numbers for machine consumers
456        log.Progress(_(u'Processed volume %d') % vol_num, diffdir.stats.SourceFileSize)
457        # Snapshot (serialize) progress now as a Volume has been completed.
458        # This is always the last restore point when it comes to restart a failed backup
459        if config.progress:
460            progress.tracker.snapshot_progress(vol_num)
461
462        # for testing purposes only - assert on inc or full
463        assert config.fail_on_volume != vol_num, u"Forced assertion for testing at volume %d" % vol_num
464
465    # Collect byte count from all asynchronous jobs; also implicitly waits
466    # for them all to complete.
467    for waiter in async_waiters:
468        bytes_written += waiter()
469
470    # Upload the collection summary.
471    # bytes_written += write_manifest(mf, backup_type, backend)
472    mf.set_files_changed_info(diffdir.stats.get_delta_entries_file())
473
474    return bytes_written
475
476
477def get_man_fileobj(backup_type):
478    u"""
479    Return a fileobj opened for writing, save results as manifest
480
481    Save manifest in config.archive_dir_path gzipped.
482    Save them on the backend encrypted as needed.
483
484    @type man_type: string
485    @param man_type: either "full" or "new"
486
487    @rtype: fileobj
488    @return: fileobj opened for writing
489    """
490    assert backup_type == u"full" or backup_type == u"inc"
491
492    part_man_filename = file_naming.get(backup_type,
493                                        manifest=True,
494                                        partial=True)
495    perm_man_filename = file_naming.get(backup_type,
496                                        manifest=True)
497    remote_man_filename = file_naming.get(backup_type,
498                                          manifest=True,
499                                          encrypted=config.encryption)
500
501    fh = dup_temp.get_fileobj_duppath(config.archive_dir_path,
502                                      part_man_filename,
503                                      perm_man_filename,
504                                      remote_man_filename)
505    return fh
506
507
508def get_sig_fileobj(sig_type):
509    u"""
510    Return a fileobj opened for writing, save results as signature
511
512    Save signatures in config.archive_dir gzipped.
513    Save them on the backend encrypted as needed.
514
515    @type sig_type: string
516    @param sig_type: either "full-sig" or "new-sig"
517
518    @rtype: fileobj
519    @return: fileobj opened for writing
520    """
521    assert sig_type in [u"full-sig", u"new-sig"]
522
523    part_sig_filename = file_naming.get(sig_type,
524                                        gzipped=False,
525                                        partial=True)
526    perm_sig_filename = file_naming.get(sig_type,
527                                        gzipped=True)
528    remote_sig_filename = file_naming.get(sig_type, encrypted=config.encryption,
529                                          gzipped=config.compression)
530
531    fh = dup_temp.get_fileobj_duppath(config.archive_dir_path,
532                                      part_sig_filename,
533                                      perm_sig_filename,
534                                      remote_sig_filename,
535                                      overwrite=True)
536    return fh
537
538
539def full_backup(col_stats):
540    u"""
541    Do full backup of directory to backend, using archive_dir_path
542
543    @type col_stats: CollectionStatus object
544    @param col_stats: collection status
545
546    @rtype: void
547    @return: void
548    """
549    if config.progress:
550        progress.tracker = progress.ProgressTracker()
551        # Fake a backup to compute total of moving bytes
552        tarblock_iter = diffdir.DirFull(config.select)
553        dummy_backup(tarblock_iter)
554        # Store computed stats to compute progress later
555        progress.tracker.set_evidence(diffdir.stats, True)
556        # Reinit the config.select iterator, so
557        # the core of duplicity can rescan the paths
558        commandline.set_selection()
559        progress.progress_thread = progress.LogProgressThread()
560
561    if config.dry_run:
562        tarblock_iter = diffdir.DirFull(config.select)
563        bytes_written = dummy_backup(tarblock_iter)
564        col_stats.set_values(sig_chain_warning=None)
565    else:
566        sig_outfp = get_sig_fileobj(u"full-sig")
567        man_outfp = get_man_fileobj(u"full")
568        tarblock_iter = diffdir.DirFull_WriteSig(config.select,
569                                                 sig_outfp)
570        bytes_written = write_multivol(u"full", tarblock_iter,
571                                       man_outfp, sig_outfp,
572                                       config.backend)
573
574        # close sig file, send to remote, and rename to final
575        sig_outfp.close()
576        sig_outfp.to_remote()
577        sig_outfp.to_final()
578
579        # close manifest, send to remote, and rename to final
580        man_outfp.close()
581        man_outfp.to_remote()
582        man_outfp.to_final()
583
584        if config.progress:
585            # Terminate the background thread now, if any
586            progress.progress_thread.finished = True
587            progress.progress_thread.join()
588            log.TransferProgress(100.0, 0, progress.tracker.total_bytecount,
589                                 progress.tracker.total_elapsed_seconds(),
590                                 progress.tracker.speed, False)
591
592        col_stats.set_values(sig_chain_warning=None)
593
594    print_statistics(diffdir.stats, bytes_written)
595
596
597def check_sig_chain(col_stats):
598    u"""
599    Get last signature chain for inc backup, or None if none available
600
601    @type col_stats: CollectionStatus object
602    @param col_stats: collection status
603    """
604    if not col_stats.matched_chain_pair:
605        if config.incremental:
606            log.FatalError(_(u"Fatal Error: Unable to start incremental backup.  "
607                             u"Old signatures not found and incremental specified"),
608                           log.ErrorCode.inc_without_sigs)
609        else:
610            log.Warn(_(u"No signatures found, switching to full backup."))
611        return None
612    return col_stats.matched_chain_pair[0]
613
614
615def print_statistics(stats, bytes_written):  # pylint: disable=unused-argument
616    u"""
617    If config.print_statistics, print stats after adding bytes_written
618
619    @rtype: void
620    @return: void
621    """
622    if config.print_statistics:
623        diffdir.stats.TotalDestinationSizeChange = bytes_written
624        logstring = diffdir.stats.get_stats_logstring(_(u"Backup Statistics"))
625        log.Log(logstring, log.NOTICE, force_print=True)
626
627
628def incremental_backup(sig_chain):
629    u"""
630    Do incremental backup of directory to backend, using archive_dir_path
631
632    @rtype: void
633    @return: void
634    """
635    if not config.restart:
636        dup_time.setprevtime(sig_chain.end_time)
637        if dup_time.curtime == dup_time.prevtime:
638            time.sleep(2)
639            dup_time.setcurtime()
640            assert dup_time.curtime != dup_time.prevtime, \
641                u"time not moving forward at appropriate pace - system clock issues?"
642
643    if config.progress:
644        progress.tracker = progress.ProgressTracker()
645        # Fake a backup to compute total of moving bytes
646        tarblock_iter = diffdir.DirDelta(config.select,
647                                         sig_chain.get_fileobjs())
648        dummy_backup(tarblock_iter)
649        # Store computed stats to compute progress later
650        progress.tracker.set_evidence(diffdir.stats, False)
651        # Reinit the config.select iterator, so
652        # the core of duplicity can rescan the paths
653        commandline.set_selection()
654        progress.progress_thread = progress.LogProgressThread()
655
656    if config.dry_run:
657        tarblock_iter = diffdir.DirDelta(config.select,
658                                         sig_chain.get_fileobjs())
659        bytes_written = dummy_backup(tarblock_iter)
660    else:
661        new_sig_outfp = get_sig_fileobj(u"new-sig")
662        new_man_outfp = get_man_fileobj(u"inc")
663        tarblock_iter = diffdir.DirDelta_WriteSig(config.select,
664                                                  sig_chain.get_fileobjs(),
665                                                  new_sig_outfp)
666        bytes_written = write_multivol(u"inc", tarblock_iter,
667                                       new_man_outfp, new_sig_outfp,
668                                       config.backend)
669
670        # close sig file and rename to final
671        new_sig_outfp.close()
672        new_sig_outfp.to_remote()
673        new_sig_outfp.to_final()
674
675        # close manifest and rename to final
676        new_man_outfp.close()
677        new_man_outfp.to_remote()
678        new_man_outfp.to_final()
679
680        if config.progress:
681            # Terminate the background thread now, if any
682            progress.progress_thread.finished = True
683            progress.progress_thread.join()
684            log.TransferProgress(100.0, 0, progress.tracker.total_bytecount,
685                                 progress.tracker.total_elapsed_seconds(),
686                                 progress.tracker.speed, False)
687
688    print_statistics(diffdir.stats, bytes_written)
689
690
691def list_current(col_stats):
692    u"""
693    List the files current in the archive (examining signature only)
694
695    @type col_stats: CollectionStatus object
696    @param col_stats: collection status
697
698    @rtype: void
699    @return: void
700    """
701    time = config.restore_time or dup_time.curtime
702    sig_chain = col_stats.get_signature_chain_at_time(time)
703    path_iter = diffdir.get_combined_path_iter(sig_chain.get_fileobjs(time))
704    for path in path_iter:
705        if path.difftype != u"deleted":
706            user_info = u"%s %s" % (dup_time.timetopretty(path.getmtime()),
707                                    util.fsdecode(path.get_relative_path()))
708            log_info = u"%s %s %s" % (dup_time.timetostring(path.getmtime()),
709                                      util.escape(path.get_relative_path()),
710                                      path.type)
711            log.Log(user_info, log.INFO, log.InfoCode.file_list,
712                    log_info, True)
713
714
715def restore(col_stats):
716    u"""
717    Restore archive in config.backend to config.local_path
718
719    @type col_stats: CollectionStatus object
720    @param col_stats: collection status
721
722    @rtype: void
723    @return: void
724    """
725    if config.dry_run:
726        # Only prints list of required volumes when running dry
727        restore_get_patched_rop_iter(col_stats)
728        return
729    if not patchdir.Write_ROPaths(config.local_path,
730                                  restore_get_patched_rop_iter(col_stats)):
731        if config.restore_dir:
732            log.FatalError(_(u"%s not found in archive - no files restored.")
733                           % (util.fsdecode(config.restore_dir)),
734                           log.ErrorCode.restore_dir_not_found)
735        else:
736            log.FatalError(_(u"No files found in archive - nothing restored."),
737                           log.ErrorCode.no_restore_files)
738
739
740def restore_get_patched_rop_iter(col_stats):
741    u"""
742    Return iterator of patched ROPaths of desired restore data
743
744    @type col_stats: CollectionStatus object
745    @param col_stats: collection status
746    """
747    if config.restore_dir:
748        index = tuple(config.restore_dir.split(b"/"))
749    else:
750        index = ()
751    time = config.restore_time or dup_time.curtime
752    backup_chain = col_stats.get_backup_chain_at_time(time)
753    assert backup_chain, col_stats.all_backup_chains
754    backup_setlist = backup_chain.get_sets_at_time(time)
755    num_vols = 0
756    for s in backup_setlist:
757        num_vols += len(s)
758    cur_vol = [0]
759
760    def get_fileobj_iter(backup_set):
761        u"""Get file object iterator from backup_set contain given index"""
762        manifest = backup_set.get_manifest()
763        volumes = manifest.get_containing_volumes(index)
764
765        if hasattr(backup_set.backend.backend, u'pre_process_download_batch'):
766            backup_set.backend.backend.pre_process_download_batch(backup_set.volume_name_dict.values())
767
768        for vol_num in volumes:
769            yield restore_get_enc_fileobj(backup_set.backend,
770                                          backup_set.volume_name_dict[vol_num],
771                                          manifest.volume_info_dict[vol_num])
772            cur_vol[0] += 1
773            log.Progress(_(u'Processed volume %d of %d') % (cur_vol[0], num_vols),
774                         cur_vol[0], num_vols)
775
776    if hasattr(config.backend, u'pre_process_download') or config.dry_run:
777        file_names = []
778        for backup_set in backup_setlist:
779            manifest = backup_set.get_manifest()
780            volumes = manifest.get_containing_volumes(index)
781            for vol_num in volumes:
782                file_names.append(backup_set.volume_name_dict[vol_num])
783        if config.dry_run:
784            log.Notice(u"Required volumes to restore:\n\t" +
785                       u'\n\t'.join(file_name.decode() for file_name in file_names))
786            return None
787        else:
788            config.backend.pre_process_download(file_names)
789
790    fileobj_iters = list(map(get_fileobj_iter, backup_setlist))
791    tarfiles = list(map(patchdir.TarFile_FromFileobjs, fileobj_iters))
792    return patchdir.tarfiles2rop_iter(tarfiles, index)
793
794
795def restore_get_enc_fileobj(backend, filename, volume_info):
796    u"""
797    Return plaintext fileobj from encrypted filename on backend
798
799    If volume_info is set, the hash of the file will be checked,
800    assuming some hash is available.  Also, if config.sign_key is
801    set, a fatal error will be raised if file not signed by sign_key.
802
803    """
804    parseresults = file_naming.parse(filename)
805    tdp = dup_temp.new_tempduppath(parseresults)
806    backend.get(filename, tdp)
807
808    u""" verify hash of the remote file """
809    verified, hash_pair, calculated_hash = restore_check_hash(volume_info, tdp)
810    if not verified:
811        log.FatalError(u"%s\n %s\n %s\n %s\n" %
812                       (_(u"Invalid data - %s hash mismatch for file:") %
813                        hash_pair[0],
814                        util.fsdecode(filename),
815                        _(u"Calculated hash: %s") % calculated_hash,
816                        _(u"Manifest hash: %s") % hash_pair[1]),
817                       log.ErrorCode.mismatched_hash)
818
819    fileobj = tdp.filtered_open_with_delete(u"rb")
820    if parseresults.encrypted and config.gpg_profile.sign_key:
821        restore_add_sig_check(fileobj)
822    return fileobj
823
824
825def restore_check_hash(volume_info, vol_path):
826    u"""
827    Check the hash of vol_path path against data in volume_info
828
829    @rtype: boolean
830    @return: true (verified) / false (failed)
831    """
832    hash_pair = volume_info.get_best_hash()
833    if hash_pair:
834        calculated_hash = gpg.get_hash(hash_pair[0], vol_path)
835        if calculated_hash != hash_pair[1]:
836            return False, hash_pair, calculated_hash
837    u""" reached here, verification passed """
838    return True, hash_pair, calculated_hash
839
840
841def restore_add_sig_check(fileobj):
842    u"""
843    Require signature when closing fileobj matches sig in gpg_profile
844
845    @rtype: void
846    @return: void
847    """
848    assert (isinstance(fileobj, dup_temp.FileobjHooked) and
849            isinstance(fileobj.fileobj, gpg.GPGFile)), fileobj
850
851    def check_signature():
852        u"""Thunk run when closing volume file"""
853        actual_sig = fileobj.fileobj.get_signature()
854        actual_sig = u"None" if actual_sig is None else actual_sig
855        sign_key = config.gpg_profile.sign_key
856        sign_key = u"None" if sign_key is None else sign_key
857        ofs = -min(len(actual_sig), len(sign_key))
858        if actual_sig[ofs:] != sign_key[ofs:]:
859            log.FatalError(_(u"Volume was signed by key %s, not %s") %
860                           (actual_sig[ofs:], sign_key[ofs:]),
861                           log.ErrorCode.unsigned_volume)
862
863    fileobj.addhook(check_signature)
864
865
866def verify(col_stats):
867    u"""
868    Verify files, logging differences
869
870    @type col_stats: CollectionStatus object
871    @param col_stats: collection status
872
873    @rtype: void
874    @return: void
875    """
876    global exit_val
877    collated = diffdir.collate2iters(restore_get_patched_rop_iter(col_stats),
878                                     config.select)
879    diff_count = 0
880    total_count = 0
881    for backup_ropath, current_path in collated:
882        if not backup_ropath:
883            backup_ropath = path.ROPath(current_path.index)
884        if not current_path:
885            current_path = path.ROPath(backup_ropath.index)
886        if not backup_ropath.compare_verbose(current_path, config.compare_data):
887            diff_count += 1
888        total_count += 1
889    # Unfortunately, ngettext doesn't handle multiple number variables, so we
890    # split up the string.
891    log.Notice(_(u"Verify complete: %s, %s.") %
892               (ngettext(u"%d file compared",
893                         u"%d files compared", total_count) % total_count,
894                ngettext(u"%d difference found",
895                         u"%d differences found", diff_count) % diff_count))
896    if diff_count >= 1:
897        exit_val = 1
898
899
900def cleanup(col_stats):
901    u"""
902    Delete the extraneous files in the current backend
903
904    @type col_stats: CollectionStatus object
905    @param col_stats: collection status
906
907    @rtype: void
908    @return: void
909    """
910    ext_local, ext_remote = col_stats.get_extraneous()
911    extraneous = ext_local + ext_remote
912    if not extraneous:
913        log.Warn(_(u"No extraneous files found, nothing deleted in cleanup."))
914        return
915
916    filestr = u"\n".join(map(util.fsdecode, extraneous))
917    if config.force:
918        log.Notice(ngettext(u"Deleting this file from backend:",
919                            u"Deleting these files from backend:",
920                            len(extraneous)) + u"\n" + filestr)
921        if not config.dry_run:
922            col_stats.backend.delete(ext_remote)
923            for fn in ext_local:
924                try:
925                    config.archive_dir_path.append(fn).delete()
926                except Exception:
927                    pass
928    else:
929        log.Notice(ngettext(u"Found the following file to delete:",
930                            u"Found the following files to delete:",
931                            len(extraneous)) + u"\n" + filestr + u"\n" +
932                   _(u"Run duplicity again with the --force option to actually delete."))
933
934
935def remove_all_but_n_full(col_stats):
936    u"""
937    Remove backup files older than the last n full backups.
938
939    @type col_stats: CollectionStatus object
940    @param col_stats: collection status
941
942    @rtype: void
943    @return: void
944    """
945    assert config.keep_chains is not None
946
947    config.remove_time = col_stats.get_nth_last_full_backup_time(config.keep_chains)
948
949    remove_old(col_stats)
950
951
952def remove_old(col_stats):
953    u"""
954    Remove backup files older than config.remove_time from backend
955
956    @type col_stats: CollectionStatus object
957    @param col_stats: collection status
958
959    @rtype: void
960    @return: void
961    """
962    assert config.remove_time is not None
963
964    def set_times_str(setlist):
965        u"""Return string listing times of sets in setlist"""
966        return u"\n".join([dup_time.timetopretty(s.get_time()) for s in setlist])
967
968    def chain_times_str(chainlist):
969        u"""Return string listing times of chains in chainlist"""
970        return u"\n".join([dup_time.timetopretty(s.end_time) for s in chainlist])
971
972    req_list = col_stats.get_older_than_required(config.remove_time)
973    if req_list:
974        log.Warn(u"%s\n%s\n%s" %
975                 (_(u"There are backup set(s) at time(s):"),
976                  set_times_str(req_list),
977                  _(u"Which can't be deleted because newer sets depend on them.")))
978
979    if (col_stats.matched_chain_pair and
980            col_stats.matched_chain_pair[1].end_time < config.remove_time):
981        log.Warn(_(u"Current active backup chain is older than specified time.  "
982                   u"However, it will not be deleted.  To remove all your backups, "
983                   u"manually purge the repository."))
984
985    chainlist = col_stats.get_chains_older_than(config.remove_time)
986
987    if config.remove_all_inc_of_but_n_full_mode:
988        # ignore chains without incremental backups:
989        chainlist = list(x for x in chainlist if
990                         (isinstance(x, dup_collections.SignatureChain) and x.inclist) or
991                         (isinstance(x, dup_collections.BackupChain) and x.incset_list))
992
993    if not chainlist:
994        log.Notice(_(u"No old backup sets found, nothing deleted."))
995        return
996    if config.force:
997        log.Notice(ngettext(u"Deleting backup chain at time:",
998                            u"Deleting backup chains at times:",
999                            len(chainlist)) +
1000                   u"\n" + chain_times_str(chainlist))
1001        # Add signature files too, since they won't be needed anymore
1002        chainlist += col_stats.get_signature_chains_older_than(config.remove_time)
1003        chainlist.reverse()  # save oldest for last
1004        for chain in chainlist:
1005            # if remove_all_inc_of_but_n_full_mode mode, remove only
1006            # incrementals one and not full
1007            if config.remove_all_inc_of_but_n_full_mode:
1008                if isinstance(chain, dup_collections.SignatureChain):
1009                    chain_desc = _(u"Deleting any incremental signature chain rooted at %s")
1010                else:
1011                    chain_desc = _(u"Deleting any incremental backup chain rooted at %s")
1012            else:
1013                if isinstance(chain, dup_collections.SignatureChain):
1014                    chain_desc = _(u"Deleting complete signature chain %s")
1015                else:
1016                    chain_desc = _(u"Deleting complete backup chain %s")
1017            log.Notice(chain_desc % dup_time.timetopretty(chain.end_time))
1018            if not config.dry_run:
1019                chain.delete(keep_full=config.remove_all_inc_of_but_n_full_mode)
1020        col_stats.set_values(sig_chain_warning=None)
1021    else:
1022        log.Notice(ngettext(u"Found old backup chain at the following time:",
1023                            u"Found old backup chains at the following times:",
1024                            len(chainlist)) +
1025                   u"\n" + chain_times_str(chainlist) + u"\n" +
1026                   _(u"Rerun command with --force option to actually delete."))
1027
1028
1029def replicate():
1030    u"""
1031    Replicate backup files from one remote to another, possibly encrypting or adding parity.
1032
1033    @rtype: void
1034    @return: void
1035    """
1036    action = u"replicate"
1037    time = config.restore_time or dup_time.curtime
1038    src_stats = dup_collections.CollectionsStatus(config.src_backend, None, action).set_values(sig_chain_warning=None)
1039    tgt_stats = dup_collections.CollectionsStatus(config.backend, None, action).set_values(sig_chain_warning=None)
1040
1041    src_list = config.src_backend.list()
1042    tgt_list = config.backend.list()
1043
1044    src_chainlist = src_stats.get_signature_chains(local=False, filelist=src_list)[0]
1045    tgt_chainlist = tgt_stats.get_signature_chains(local=False, filelist=tgt_list)[0]
1046    sorted(src_chainlist, key=lambda chain: chain.start_time)
1047    sorted(tgt_chainlist, key=lambda chain: chain.start_time)
1048    if not src_chainlist:
1049        log.Notice(_(u"No old backup sets found."))
1050        return
1051    for src_chain in src_chainlist:
1052        try:
1053            tgt_chain = list([chain for chain in tgt_chainlist if chain.start_time == src_chain.start_time])[0]
1054        except IndexError:
1055            tgt_chain = None
1056
1057        tgt_sigs = list(map(file_naming.parse, tgt_chain.get_filenames())) if tgt_chain else []
1058        for src_sig_filename in src_chain.get_filenames():
1059            src_sig = file_naming.parse(src_sig_filename)
1060            if not (src_sig.time or src_sig.end_time) < time:
1061                continue
1062            try:
1063                tgt_sigs.remove(src_sig)
1064                log.Info(_(u"Signature %s already replicated") % (src_sig_filename,))
1065                continue
1066            except ValueError:
1067                pass
1068            if src_sig.type == u'new-sig':
1069                dup_time.setprevtime(src_sig.start_time)
1070            dup_time.setcurtime(src_sig.time or src_sig.end_time)
1071            log.Notice(_(u"Replicating %s.") % (src_sig_filename,))
1072            fileobj = config.src_backend.get_fileobj_read(src_sig_filename)
1073            filename = file_naming.get(src_sig.type, encrypted=config.encryption, gzipped=config.compression)
1074            tdp = dup_temp.new_tempduppath(file_naming.parse(filename))
1075            tmpobj = tdp.filtered_open(mode=u'wb')
1076            util.copyfileobj(fileobj, tmpobj)  # decrypt, compress, (re)-encrypt
1077            fileobj.close()
1078            tmpobj.close()
1079            config.backend.put(tdp, filename)
1080            tdp.delete()
1081
1082    src_chainlist = src_stats.get_backup_chains(filename_list=src_list)[0]
1083    tgt_chainlist = tgt_stats.get_backup_chains(filename_list=tgt_list)[0]
1084    sorted(src_chainlist, key=lambda chain: chain.start_time)
1085    sorted(tgt_chainlist, key=lambda chain: chain.start_time)
1086    for src_chain in src_chainlist:
1087        try:
1088            tgt_chain = list([chain for chain in tgt_chainlist if chain.start_time == src_chain.start_time])[0]
1089        except IndexError:
1090            tgt_chain = None
1091
1092        tgt_sets = tgt_chain.get_all_sets() if tgt_chain else []
1093        for src_set in src_chain.get_all_sets():
1094            if not src_set.get_time() < time:
1095                continue
1096            try:
1097                tgt_sets.remove(src_set)
1098                log.Info(_(u"Backupset %s already replicated") % (src_set.remote_manifest_name,))
1099                continue
1100            except ValueError:
1101                pass
1102            if src_set.type == u'inc':
1103                dup_time.setprevtime(src_set.start_time)
1104            dup_time.setcurtime(src_set.get_time())
1105            rmf = src_set.get_remote_manifest()
1106            mf_filename = file_naming.get(src_set.type, manifest=True)
1107            mf_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_filename))
1108            mf = manifest.Manifest(fh=mf_tdp.filtered_open(mode=u'wb'))
1109            for i, filename in list(src_set.volume_name_dict.items()):
1110                log.Notice(_(u"Replicating %s.") % (filename,))
1111                fileobj = restore_get_enc_fileobj(config.src_backend, filename, rmf.volume_info_dict[i])
1112                filename = file_naming.get(src_set.type, i, encrypted=config.encryption, gzipped=config.compression)
1113                tdp = dup_temp.new_tempduppath(file_naming.parse(filename))
1114                tmpobj = tdp.filtered_open(mode=u'wb')
1115                util.copyfileobj(fileobj, tmpobj)  # decrypt, compress, (re)-encrypt
1116                fileobj.close()
1117                tmpobj.close()
1118                config.backend.put(tdp, filename)
1119
1120                vi = copy.copy(rmf.volume_info_dict[i])
1121                vi.set_hash(u"SHA1", gpg.get_hash(u"SHA1", tdp))
1122                mf.add_volume_info(vi)
1123
1124                tdp.delete()
1125
1126            mf.fh.close()
1127            # incremental GPG writes hang on close, so do any encryption here at once
1128            mf_fileobj = mf_tdp.filtered_open_with_delete(mode=u'rb')
1129            mf_final_filename = file_naming.get(src_set.type,
1130                                                manifest=True,
1131                                                encrypted=config.encryption,
1132                                                gzipped=config.compression)
1133            mf_final_tdp = dup_temp.new_tempduppath(file_naming.parse(mf_final_filename))
1134            mf_final_fileobj = mf_final_tdp.filtered_open(mode=u'wb')
1135            util.copyfileobj(mf_fileobj, mf_final_fileobj)  # compress, encrypt
1136            mf_fileobj.close()
1137            mf_final_fileobj.close()
1138            config.backend.put(mf_final_tdp, mf_final_filename)
1139            mf_final_tdp.delete()
1140
1141    config.src_backend.close()
1142    config.backend.close()
1143
1144
1145def sync_archive(col_stats):
1146    u"""
1147    Synchronize local archive manifest file and sig chains to remote archives.
1148    Copy missing files from remote to local as needed to make sure the local
1149    archive is synchronized to remote storage.
1150
1151    @rtype: void
1152    @return: void
1153    """
1154    suffixes = [b".g", b".gpg", b".z", b".gz", b".part"]
1155
1156    def is_needed(filename):
1157        u"""Indicates if the metadata file should be synced.
1158
1159        In full sync mode, or if there's a collection misbehavior, all files
1160        are needed.
1161
1162        Otherwise, only the metadata for the target chain needs sync.
1163        """
1164        if config.metadata_sync_mode == u"full":
1165            return True
1166        assert config.metadata_sync_mode == u"partial"
1167        parsed = file_naming.parse(filename)
1168        try:
1169            target_chain = col_stats.get_backup_chain_at_time(
1170                config.restore_time or dup_time.curtime)
1171        except dup_collections.CollectionsError:
1172            # With zero or multiple chains at this time, do a full sync
1173            return True
1174        if parsed.start_time is None and parsed.end_time is None:
1175            start_time = end_time = parsed.time
1176        else:
1177            start_time = parsed.start_time
1178            end_time = parsed.end_time
1179
1180        return end_time >= target_chain.start_time and \
1181            start_time <= target_chain.end_time
1182
1183    def get_metafiles(filelist):
1184        u"""
1185        Return metafiles of interest from the file list.
1186        Files of interest are:
1187          sigtar - signature files
1188          manifest - signature files
1189          duplicity partial versions of the above
1190        Files excluded are:
1191          non-duplicity files
1192
1193        @rtype: list
1194        @return: list of duplicity metadata files
1195        """
1196        metafiles = {}
1197        partials = {}
1198        need_passphrase = False
1199        for fn in filelist:
1200            pr = file_naming.parse(fn)
1201            if not pr:
1202                continue
1203            if pr.encrypted:
1204                need_passphrase = True
1205            if pr.type in [u"full-sig", u"new-sig"] or pr.manifest:
1206                base, ext = os.path.splitext(fn)
1207                if ext not in suffixes:
1208                    base = fn
1209                if pr.partial:
1210                    partials[base] = fn
1211                else:
1212                    metafiles[base] = fn
1213        return metafiles, partials, need_passphrase
1214
1215    def copy_raw(src_iter, filename):
1216        u"""
1217        Copy data from src_iter to file at fn
1218        """
1219        file = open(filename, u"wb")
1220        while True:
1221            try:
1222                data = src_iter.__next__().data
1223            except StopIteration:
1224                break
1225            file.write(data)
1226        file.close()
1227
1228    def resolve_basename(fn):
1229        u"""
1230        @return: (parsedresult, local_name, remote_name)
1231        """
1232        pr = file_naming.parse(fn)
1233
1234        base, ext = os.path.splitext(fn)
1235        if ext not in suffixes:
1236            base = fn
1237
1238        suffix = file_naming.get_suffix(False, not pr.manifest)
1239        loc_name = base + suffix
1240
1241        return (pr, loc_name, fn)
1242
1243    def remove_local(fn):
1244        del_name = config.archive_dir_path.append(fn).name
1245
1246        log.Notice(_(u"Deleting local %s (not authoritative at backend).") %
1247                   util.fsdecode(del_name))
1248        try:
1249            util.ignore_missing(os.unlink, del_name)
1250        except Exception as e:
1251            log.Warn(_(u"Unable to delete %s: %s") % (util.fsdecode(del_name),
1252                                                      util.uexc(e)))
1253
1254    def copy_to_local(fn):
1255        u"""
1256        Copy remote file fn to local cache.
1257        """
1258        class Block(object):
1259            u"""
1260            Data block to return from SrcIter
1261            """
1262
1263            def __init__(self, data):
1264                self.data = data
1265
1266        class SrcIter(object):
1267            u"""
1268            Iterate over source and return Block of data.
1269            """
1270
1271            def __init__(self, fileobj):
1272                self.fileobj = fileobj
1273
1274            def __next__(self):
1275                try:
1276                    res = Block(self.fileobj.read(self.get_read_size()))
1277                except Exception:
1278                    if hasattr(self.fileobj, u'name'):
1279                        name = self.fileobj.name
1280                        # name may be a path
1281                        if hasattr(name, u'name'):
1282                            name = name.name
1283                    else:
1284                        name = None
1285                    log.FatalError(_(u"Failed to read %s: %s") %
1286                                   (util.fsdecode(name), sys.exc_info()),
1287                                   log.ErrorCode.generic)
1288                if not res.data:
1289                    self.fileobj.close()
1290                    raise StopIteration
1291                return res
1292
1293            def get_read_size(self):
1294                return 128 * 1024
1295
1296            def get_footer(self):
1297                return b""
1298
1299        log.Notice(_(u"Copying %s to local cache.") % util.fsdecode(fn))
1300
1301        pr, loc_name, rem_name = resolve_basename(fn)
1302
1303        fileobj = config.backend.get_fileobj_read(fn)
1304        src_iter = SrcIter(fileobj)
1305        tdp = dup_temp.new_tempduppath(file_naming.parse(loc_name))
1306        if pr.manifest:
1307            copy_raw(src_iter, tdp.name)
1308        else:
1309            gpg.GzipWriteFile(src_iter, tdp.name, size=sys.maxsize)
1310        tdp.setdata()
1311        tdp.move(config.archive_dir_path.append(loc_name))
1312
1313    # get remote metafile list
1314    remlist = config.backend.list()
1315    remote_metafiles, ignored, rem_needpass = get_metafiles(remlist)
1316
1317    # get local metafile list
1318    loclist = config.archive_dir_path.listdir()
1319    local_metafiles, local_partials, loc_needpass = get_metafiles(loclist)
1320
1321    # we have the list of metafiles on both sides. remote is always
1322    # authoritative. figure out which are local spurious (should not
1323    # be there) and missing (should be there but are not).
1324    local_keys = list(local_metafiles.keys())
1325    remote_keys = list(remote_metafiles.keys())
1326
1327    local_missing = []
1328    local_spurious = []
1329
1330    for key in remote_keys:
1331        # If we lost our cache, re-get the remote file.  But don't do it if we
1332        # already have a local partial.  The local partial will already be
1333        # complete in this case (seems we got interrupted before we could move
1334        # it to its final location).
1335        if key not in local_keys and key not in local_partials and is_needed(key):
1336            local_missing.append(remote_metafiles[key])
1337
1338    for key in local_keys:
1339        # If we have a file locally that is unnecessary, delete it.  Also
1340        # delete final versions of partial files because if we have both, it
1341        # means the write of the final version got interrupted.
1342        if key not in remote_keys or key in local_partials:
1343            local_spurious.append(local_metafiles[key])
1344
1345    # finally finish the process
1346    if not local_missing and not local_spurious:
1347        log.Notice(_(u"Local and Remote metadata are synchronized, no sync needed."))
1348    else:
1349        local_missing.sort()
1350        local_spurious.sort()
1351        if not config.dry_run:
1352            log.Notice(_(u"Synchronizing remote metadata to local cache..."))
1353            if local_missing and (rem_needpass or loc_needpass):
1354                # password for the --encrypt-key
1355                config.gpg_profile.passphrase = get_passphrase(1, u"sync")
1356            for fn in local_spurious:
1357                remove_local(fn)
1358            if hasattr(config.backend, u'pre_process_download'):
1359                config.backend.pre_process_download(local_missing)
1360            for fn in local_missing:
1361                copy_to_local(fn)
1362            col_stats.set_values()
1363        else:
1364            if local_missing:
1365                log.Notice(_(u"Sync would copy the following from remote to local:") +
1366                           u"\n" + u"\n".join(map(util.fsdecode, local_missing)))
1367            if local_spurious:
1368                log.Notice(_(u"Sync would remove the following spurious local files:") +
1369                           u"\n" + u"\n".join(map(util.fsdecode, local_spurious)))
1370
1371
1372def check_last_manifest(col_stats):
1373    u"""
1374    Check consistency and hostname/directory of last manifest
1375
1376    @type col_stats: CollectionStatus object
1377    @param col_stats: collection status
1378
1379    @rtype: void
1380    @return: void
1381    """
1382    assert col_stats.all_backup_chains
1383    last_backup_set = col_stats.all_backup_chains[-1].get_last()
1384    # check remote manifest only if we can decrypt it (see #1729796)
1385    check_remote = not config.encryption or config.gpg_profile.passphrase
1386    last_backup_set.check_manifests(check_remote=check_remote)
1387
1388
1389def check_resources(action):
1390    u"""
1391    Check for sufficient resources:
1392      - temp space for volume build
1393      - enough max open files
1394    Put out fatal error if not sufficient to run
1395
1396    @type action: string
1397    @param action: action in progress
1398
1399    @rtype: void
1400    @return: void
1401    """
1402    if action in [u"full", u"inc", u"restore"]:
1403        # Make sure we have enough resouces to run
1404        # First check disk space in temp area.
1405        tempfile, tempname = tempdir.default().mkstemp()
1406        os.close(tempfile)
1407        # strip off the temp dir and file
1408        tempfs = os.path.sep.join(tempname.split(os.path.sep)[:-2])
1409        try:
1410            stats = os.statvfs(tempfs)
1411        except Exception:
1412            log.FatalError(_(u"Unable to get free space on temp."),
1413                           log.ErrorCode.get_freespace_failed)
1414        # Calculate space we need for at least 2 volumes of full or inc
1415        # plus about 30% of one volume for the signature files.
1416        freespace = stats.f_frsize * stats.f_bavail
1417        needspace = (((config.async_concurrency + 1) * config.volsize) +
1418                     int(0.30 * config.volsize))
1419        if freespace < needspace:
1420            log.FatalError(_(u"Temp space has %d available, backup needs approx %d.") %
1421                           (freespace, needspace), log.ErrorCode.not_enough_freespace)
1422        else:
1423            log.Info(_(u"Temp has %d available, backup will use approx %d.") %
1424                     (freespace, needspace))
1425
1426        # Some environments like Cygwin run with an artificially
1427        # low value for max open files.  Check for safe number.
1428        try:
1429            soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
1430        except resource.error:
1431            log.FatalError(_(u"Unable to get max open files."),
1432                           log.ErrorCode.get_ulimit_failed)
1433        maxopen = min([l for l in (soft, hard) if l > -1])
1434        if maxopen < 1024:
1435            log.FatalError(_(u"Max open files of %s is too low, should be >= 1024.\n"
1436                             u"Use 'ulimit -n 1024' or higher to correct.\n") % (maxopen,),
1437                           log.ErrorCode.maxopen_too_low)
1438
1439
1440def log_startup_parms(verbosity=log.INFO):
1441    u"""
1442    log Python, duplicity, and system versions
1443    """
1444    log.Log(u'=' * 80, verbosity)
1445    log.Log(u"duplicity %s" % __version__, verbosity)
1446    u_args = (util.fsdecode(arg) for arg in sys.argv)
1447    log.Log(u"Args: %s" % u' '.join(u_args), verbosity)
1448    log.Log(u' '.join(platform.uname()), verbosity)
1449    log.Log(u"%s %s" % (sys.executable or sys.platform, sys.version), verbosity)
1450    log.Log(u'=' * 80, verbosity)
1451
1452
1453class Restart(object):
1454    u"""
1455    Class to aid in restart of inc or full backup.
1456    Instance in config.restart if restart in progress.
1457    """
1458
1459    def __init__(self, last_backup):
1460        self.type = None
1461        self.start_time = None
1462        self.end_time = None
1463        self.start_vol = None
1464        self.last_index = None
1465        self.last_block = None
1466        self.last_backup = last_backup
1467        self.setParms(last_backup)
1468
1469    def setParms(self, last_backup):
1470        if last_backup.time:
1471            self.type = u"full"
1472            self.time = last_backup.time
1473        else:
1474            self.type = u"inc"
1475            self.end_time = last_backup.end_time
1476            self.start_time = last_backup.start_time
1477        # We start one volume back in case we weren't able to finish writing
1478        # the most recent block.  Actually checking if we did (via hash) would
1479        # involve downloading the block.  Easier to just redo one block.
1480        self.start_vol = max(len(last_backup) - 1, 0)
1481
1482    def checkManifest(self, mf):
1483        mf_len = len(mf.volume_info_dict)
1484        if (mf_len != self.start_vol) or not (mf_len and self.start_vol):
1485            if self.start_vol == 0:
1486                # upload of 1st vol failed, clean and restart
1487                log.Notice(_(u"RESTART: The first volume failed to upload before termination.\n"
1488                             u"         Restart is impossible...starting backup from beginning."))
1489                self.last_backup.delete()
1490                os.execve(sys.argv[0], sys.argv, os.environ)
1491            elif mf_len - self.start_vol > 0:
1492                # upload of N vols failed, fix manifest and restart
1493                log.Notice(_(u"RESTART: Volumes %d to %d failed to upload before termination.\n"
1494                             u"         Restarting backup at volume %d.") %
1495                           (self.start_vol + 1, mf_len, self.start_vol + 1))
1496                for vol in range(self.start_vol + 1, mf_len + 1):
1497                    mf.del_volume_info(vol)
1498            else:
1499                # this is an 'impossible' state, remove last partial and restart
1500                log.Notice(_(u"RESTART: Impossible backup state: manifest has %d vols, remote has %d vols.\n"
1501                             u"         Restart is impossible ... duplicity will clean off the last partial\n"
1502                             u"         backup then restart the backup from the beginning.") %
1503                           (mf_len, self.start_vol))
1504                self.last_backup.delete()
1505                os.execve(sys.argv[0], sys.argv, os.environ)
1506
1507    def setLastSaved(self, mf):
1508        vi = mf.volume_info_dict[self.start_vol]
1509        self.last_index = vi.end_index
1510        self.last_block = vi.end_block or 0
1511
1512
1513def main():
1514    u"""
1515    Start/end here
1516    """
1517    # per bug https://bugs.launchpad.net/duplicity/+bug/931175
1518    # duplicity crashes when PYTHONOPTIMIZE is set, so check
1519    # and refuse to run if it is set.
1520    if u'PYTHONOPTIMIZE' in os.environ:
1521        log.FatalError(_(u"""
1522PYTHONOPTIMIZE in the environment causes duplicity to fail to
1523recognize its own backups.  Please remove PYTHONOPTIMIZE from
1524the environment and rerun the backup.
1525
1526See https://bugs.launchpad.net/duplicity/+bug/931175
1527"""), log.ErrorCode.pythonoptimize_set)
1528
1529    # if python is run setuid, it's only partway set,
1530    # so make sure to run with euid/egid of root
1531    if os.geteuid() == 0:
1532        # make sure uid/gid match euid/egid
1533        os.setuid(os.geteuid())
1534        os.setgid(os.getegid())
1535
1536    # set the current time strings (make it available for command line processing)
1537    dup_time.setcurtime()
1538
1539    # determine what action we're performing and process command line
1540    action = commandline.ProcessCommandLine(sys.argv[1:])
1541
1542    config.lockpath = os.path.join(config.archive_dir_path.name, b"lockfile")
1543    config.lockfile = fasteners.process_lock.InterProcessLock(config.lockpath)
1544    log.Debug(_(u"Acquiring lockfile %s") % config.lockpath)
1545    if not config.lockfile.acquire(blocking=False):
1546        log.FatalError(
1547            u"Another duplicity instance is already running with this archive directory\n",
1548            log.ErrorCode.user_error)
1549        log.shutdown()
1550        sys.exit(2)
1551
1552    try:
1553        do_backup(action)
1554
1555    finally:
1556        util.release_lockfile()
1557
1558
1559def do_backup(action):
1560    # set the current time strings again now that we have time separator
1561    if config.current_time:
1562        dup_time.setcurtime(config.current_time)
1563    else:
1564        dup_time.setcurtime()
1565
1566    # log some debugging status info
1567    log_startup_parms(log.INFO)
1568
1569    # check for disk space and available file handles
1570    check_resources(action)
1571
1572    # get current collection status
1573    col_stats = dup_collections.CollectionsStatus(config.backend,
1574                                                  config.archive_dir_path,
1575                                                  action).set_values()
1576
1577    # check archive synch with remote, fix if needed
1578    if action not in [u"collection-status",
1579                      u"remove-all-but-n-full",
1580                      u"remove-all-inc-of-but-n-full",
1581                      u"remove-old",
1582                      u"replicate",
1583                      ]:
1584        sync_archive(col_stats)
1585
1586    while True:
1587        # if we have to clean up the last partial, then col_stats are invalidated
1588        # and we have to start the process all over again until clean.
1589        if action in [u"full", u"inc", u"cleanup"]:
1590            last_full_chain = col_stats.get_last_backup_chain()
1591            if not last_full_chain:
1592                break
1593            last_backup = last_full_chain.get_last()
1594            if last_backup.partial:
1595                if action in [u"full", u"inc"]:
1596                    # set restart parms from last_backup info
1597                    config.restart = Restart(last_backup)
1598                    # (possibly) reset action
1599                    action = config.restart.type
1600                    # reset the time strings
1601                    if action == u"full":
1602                        dup_time.setcurtime(config.restart.time)
1603                    else:
1604                        dup_time.setcurtime(config.restart.end_time)
1605                        dup_time.setprevtime(config.restart.start_time)
1606                    # log it -- main restart heavy lifting is done in write_multivol
1607                    log.Notice(_(u"Last %s backup left a partial set, restarting." % action))
1608                    break
1609                else:
1610                    # remove last partial backup and get new collection status
1611                    log.Notice(_(u"Cleaning up previous partial %s backup set, restarting." % action))
1612                    last_backup.delete()
1613                    col_stats = dup_collections.CollectionsStatus(config.backend,
1614                                                                  config.archive_dir_path,
1615                                                                  action).set_values()
1616                    continue
1617            break
1618        break
1619
1620    # OK, now we have a stable collection
1621    last_full_time = col_stats.get_last_full_backup_time()
1622    if last_full_time > 0:
1623        log.Notice(_(u"Last full backup date:") + u" " + dup_time.timetopretty(last_full_time))
1624    else:
1625        log.Notice(_(u"Last full backup date: none"))
1626    if not config.restart and action == u"inc" and config.full_force_time is not None and \
1627       last_full_time < config.full_force_time:
1628        log.Notice(_(u"Last full backup is too old, forcing full backup"))
1629        action = u"full"
1630    log.PrintCollectionStatus(col_stats)
1631
1632    # get the passphrase if we need to based on action/options
1633    config.gpg_profile.passphrase = get_passphrase(1, action)
1634
1635    if action == u"restore":
1636        restore(col_stats)
1637    elif action == u"verify":
1638        verify(col_stats)
1639    elif action == u"list-current":
1640        list_current(col_stats)
1641    elif action == u"collection-status":
1642        if not config.file_changed:
1643            log.PrintCollectionStatus(col_stats, True)
1644        else:
1645            log.PrintCollectionFileChangedStatus(col_stats, config.file_changed, True)
1646    elif action == u"cleanup":
1647        cleanup(col_stats)
1648    elif action == u"remove-old":
1649        remove_old(col_stats)
1650    elif action == u"remove-all-but-n-full" or action == u"remove-all-inc-of-but-n-full":
1651        remove_all_but_n_full(col_stats)
1652    elif action == u"sync":
1653        sync_archive(col_stats)
1654    elif action == u"replicate":
1655        replicate()
1656    else:
1657        assert action == u"inc" or action == u"full", action
1658        # the passphrase for full and inc is used by --sign-key
1659        # the sign key can have a different passphrase than the encrypt
1660        # key, therefore request a passphrase
1661        if config.gpg_profile.sign_key:
1662            config.gpg_profile.signing_passphrase = get_passphrase(1, action, True)
1663
1664        # if there are no recipients (no --encrypt-key), it must be a
1665        # symmetric key. Therefore, confirm the passphrase
1666        if not (config.gpg_profile.recipients or config.gpg_profile.hidden_recipients):
1667            config.gpg_profile.passphrase = get_passphrase(2, action)
1668            # a limitation in the GPG implementation does not allow for
1669            # inputting different passphrases, this affects symmetric+sign.
1670            # Allow an empty passphrase for the key though to allow a non-empty
1671            # symmetric key
1672            if (config.gpg_profile.signing_passphrase and
1673                    config.gpg_profile.passphrase != config.gpg_profile.signing_passphrase):
1674                log.FatalError(_(
1675                    u"When using symmetric encryption, the signing passphrase "
1676                    u"must equal the encryption passphrase."),
1677                    log.ErrorCode.user_error)
1678
1679        if action == u"full":
1680            full_backup(col_stats)
1681        else:  # attempt incremental
1682            sig_chain = check_sig_chain(col_stats)
1683            # action == "inc" was requested, but no full backup is available
1684            if not sig_chain:
1685                full_backup(col_stats)
1686            else:
1687                if not config.restart:
1688                    # only ask for a passphrase if there was a previous backup
1689                    if col_stats.all_backup_chains:
1690                        config.gpg_profile.passphrase = get_passphrase(1, action)
1691                        check_last_manifest(col_stats)  # not needed for full backups
1692                incremental_backup(sig_chain)
1693    config.backend.close()
1694    log.shutdown()
1695    if exit_val is not None:
1696        sys.exit(exit_val)
1697