1# borg cli interface / toplevel archiver code
2
3import sys
4import traceback
5
6try:
7    import argparse
8    import collections
9    import configparser
10    import faulthandler
11    import functools
12    import hashlib
13    import inspect
14    import itertools
15    import json
16    import logging
17    import os
18    import re
19    import shlex
20    import shutil
21    import signal
22    import stat
23    import subprocess
24    import tarfile
25    import textwrap
26    import time
27    from binascii import unhexlify, hexlify
28    from contextlib import contextmanager
29    from datetime import datetime, timedelta
30    from itertools import zip_longest
31
32    from .logger import create_logger, setup_logging
33
34    logger = create_logger()
35
36    import borg
37    from . import __version__
38    from . import helpers
39    from . import shellpattern
40    from .algorithms.checksums import crc32
41    from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
42    from .archive import BackupError, BackupOSError, backup_io, has_link
43    from .cache import Cache, assert_secure, SecurityManager
44    from .constants import *  # NOQA
45    from .compress import CompressionSpec
46    from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required, RepoKey, PassphraseKey
47    from .crypto.keymanager import KeyManager
48    from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, EXIT_SIGNAL_BASE
49    from .helpers import Error, NoManifestError, set_ec
50    from .helpers import positive_int_validator, location_validator, archivename_validator, ChunkerParams, Location
51    from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, HUMAN_SORT_KEYS, FilesCacheMode
52    from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
53    from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
54    from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
55    from .helpers import interval, prune_within, prune_split
56    from .helpers import timestamp
57    from .helpers import get_cache_dir
58    from .helpers import Manifest
59    from .helpers import hardlinkable
60    from .helpers import StableDict
61    from .helpers import check_python, check_extension_modules
62    from .helpers import dir_is_tagged, is_slow_msgpack, is_supported_msgpack, yes, sysinfo
63    from .helpers import log_multi
64    from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
65    from .helpers import ErrorIgnoringTextIOWrapper
66    from .helpers import ProgressIndicatorPercent
67    from .helpers import basic_json_data, json_print
68    from .helpers import replace_placeholders
69    from .helpers import ChunkIteratorFileWrapper
70    from .helpers import popen_with_error_handling, prepare_subprocess_env
71    from .helpers import dash_open
72    from .helpers import umount
73    from .helpers import msgpack, msgpack_fallback
74    from .helpers import uid2user, gid2group
75    from .nanorst import rst_to_terminal
76    from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
77    from .patterns import PatternMatcher
78    from .item import Item
79    from .platform import get_flags, get_process_id, SyncFile
80    from .remote import RepositoryServer, RemoteRepository, cache_if_remote
81    from .repository import Repository, LIST_SCAN_LIMIT, TAG_PUT, TAG_DELETE, TAG_COMMIT
82    from .selftest import selftest
83    from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader
84except BaseException:
85    # an unhandled exception in the try-block would cause the borg cli command to exit with rc 1 due to python's
86    # default behavior, see issue #4424.
87    # as borg defines rc 1 as WARNING, this would be a mismatch, because a crash should be an ERROR (rc 2).
88    traceback.print_exc()
89    sys.exit(2)  # == EXIT_ERROR
90
91assert EXIT_ERROR == 2, "EXIT_ERROR is not 2, as expected - fix assert AND exception handler right above this line."
92
93STATS_HEADER = "                       Original size      Compressed size    Deduplicated size"
94
95PURE_PYTHON_MSGPACK_WARNING = "Using a pure-python msgpack! This will result in lower performance."
96
97
98def argument(args, str_or_bool):
99    """If bool is passed, return it. If str is passed, retrieve named attribute from args."""
100    if isinstance(str_or_bool, str):
101        return getattr(args, str_or_bool)
102    if isinstance(str_or_bool, (list, tuple)):
103        return any(getattr(args, item) for item in str_or_bool)
104    return str_or_bool
105
106
107def with_repository(fake=False, invert_fake=False, create=False, lock=True,
108                    exclusive=False, manifest=True, cache=False, secure=True,
109                    compatibility=None):
110    """
111    Method decorator for subcommand-handling methods: do_XYZ(self, args, repository, …)
112
113    If a parameter (where allowed) is a str the attribute named of args is used instead.
114    :param fake: (str or bool) use None instead of repository, don't do anything else
115    :param create: create repository
116    :param lock: lock repository
117    :param exclusive: (str or bool) lock repository exclusively (for writing)
118    :param manifest: load manifest and key, pass them as keyword arguments
119    :param cache: open cache, pass it as keyword argument (implies manifest)
120    :param secure: do assert_secure after loading manifest
121    :param compatibility: mandatory if not create and (manifest or cache), specifies mandatory feature categories to check
122    """
123
124    if not create and (manifest or cache):
125        if compatibility is None:
126            raise AssertionError("with_repository decorator used without compatibility argument")
127        if type(compatibility) is not tuple:
128            raise AssertionError("with_repository decorator compatibility argument must be of type tuple")
129    else:
130        if compatibility is not None:
131            raise AssertionError("with_repository called with compatibility argument but would not check" + repr(compatibility))
132        if create:
133            compatibility = Manifest.NO_OPERATION_CHECK
134
135    # To process the `--bypass-lock` option if specified, we need to
136    # modify `lock` inside `wrapper`. Therefore we cannot use the
137    # `nonlocal` statement to access `lock` as modifications would also
138    # affect the scope outside of `wrapper`. Subsequent calls would
139    # only see the overwritten value of `lock`, not the original one.
140    # The solution is to define a place holder variable `_lock` to
141    # propagate the value into `wrapper`.
142    _lock = lock
143
144    def decorator(method):
145        @functools.wraps(method)
146        def wrapper(self, args, **kwargs):
147            lock = getattr(args, 'lock', _lock)
148            location = args.location  # note: 'location' must be always present in args
149            append_only = getattr(args, 'append_only', False)
150            storage_quota = getattr(args, 'storage_quota', None)
151            make_parent_dirs = getattr(args, 'make_parent_dirs', False)
152            if argument(args, fake) ^ invert_fake:
153                return method(self, args, repository=None, **kwargs)
154            elif location.proto == 'ssh':
155                repository = RemoteRepository(location, create=create, exclusive=argument(args, exclusive),
156                                              lock_wait=self.lock_wait, lock=lock, append_only=append_only,
157                                              make_parent_dirs=make_parent_dirs, args=args)
158            else:
159                repository = Repository(location.path, create=create, exclusive=argument(args, exclusive),
160                                        lock_wait=self.lock_wait, lock=lock, append_only=append_only,
161                                        storage_quota=storage_quota, make_parent_dirs=make_parent_dirs)
162            with repository:
163                if manifest or cache:
164                    kwargs['manifest'], kwargs['key'] = Manifest.load(repository, compatibility)
165                    if 'compression' in args:
166                        kwargs['key'].compressor = args.compression.compressor
167                    if secure:
168                        assert_secure(repository, kwargs['manifest'], self.lock_wait)
169                if cache:
170                    with Cache(repository, kwargs['key'], kwargs['manifest'],
171                               do_files=getattr(args, 'cache_files', False),
172                               ignore_inode=getattr(args, 'ignore_inode', False),
173                               progress=getattr(args, 'progress', False), lock_wait=self.lock_wait,
174                               cache_mode=getattr(args, 'files_cache_mode', DEFAULT_FILES_CACHE_MODE)) as cache_:
175                        return method(self, args, repository=repository, cache=cache_, **kwargs)
176                else:
177                    return method(self, args, repository=repository, **kwargs)
178        return wrapper
179    return decorator
180
181
182def with_archive(method):
183    @functools.wraps(method)
184    def wrapper(self, args, repository, key, manifest, **kwargs):
185        archive = Archive(repository, key, manifest, args.location.archive,
186                          numeric_owner=getattr(args, 'numeric_owner', False),
187                          nobsdflags=getattr(args, 'nobsdflags', False),
188                          noacls=getattr(args, 'noacls', False),
189                          noxattrs=getattr(args, 'noxattrs', False),
190                          cache=kwargs.get('cache'),
191                          consider_part_files=args.consider_part_files, log_json=args.log_json)
192        return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs)
193    return wrapper
194
195
196def parse_storage_quota(storage_quota):
197    parsed = parse_file_size(storage_quota)
198    if parsed < parse_file_size('10M'):
199        raise argparse.ArgumentTypeError('quota is too small (%s). At least 10M are required.' % storage_quota)
200    return parsed
201
202
203def get_func(args):
204    # This works around https://bugs.python.org/issue9351
205    # func is used at the leaf parsers of the argparse parser tree,
206    # fallback_func at next level towards the root,
207    # fallback2_func at the 2nd next level (which is root in our case).
208    for name in 'func', 'fallback_func', 'fallback2_func':
209        func = getattr(args, name, None)
210        if func is not None:
211            return func
212    raise Exception('expected func attributes not found')
213
214
215class Archiver:
216
217    def __init__(self, lock_wait=None, prog=None):
218        self.exit_code = EXIT_SUCCESS
219        self.lock_wait = lock_wait
220        self.prog = prog
221
222    def print_error(self, msg, *args):
223        msg = args and msg % args or msg
224        self.exit_code = EXIT_ERROR
225        logger.error(msg)
226
227    def print_warning(self, msg, *args):
228        msg = args and msg % args or msg
229        self.exit_code = EXIT_WARNING  # we do not terminate here, so it is a warning
230        logger.warning(msg)
231
232    def print_file_status(self, status, path):
233        if self.output_list and (self.output_filter is None or status in self.output_filter):
234            if self.log_json:
235                print(json.dumps({
236                    'type': 'file_status',
237                    'status': status,
238                    'path': remove_surrogates(path),
239                }), file=sys.stderr)
240            else:
241                logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path))
242
243    @staticmethod
244    def compare_chunk_contents(chunks1, chunks2):
245        """Compare two chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`)"""
246        end = object()
247        alen = ai = 0
248        blen = bi = 0
249        while True:
250            if not alen - ai:
251                a = next(chunks1, end)
252                if a is end:
253                    return not blen - bi and next(chunks2, end) is end
254                a = memoryview(a)
255                alen = len(a)
256                ai = 0
257            if not blen - bi:
258                b = next(chunks2, end)
259                if b is end:
260                    return not alen - ai and next(chunks1, end) is end
261                b = memoryview(b)
262                blen = len(b)
263                bi = 0
264            slicelen = min(alen - ai, blen - bi)
265            if a[ai:ai + slicelen] != b[bi:bi + slicelen]:
266                return False
267            ai += slicelen
268            bi += slicelen
269
270    @staticmethod
271    def build_matcher(inclexcl_patterns, include_paths):
272        matcher = PatternMatcher()
273        matcher.add_inclexcl(inclexcl_patterns)
274        matcher.add_includepaths(include_paths)
275        return matcher
276
277    def do_serve(self, args):
278        """Start in server mode. This command is usually not used manually."""
279        RepositoryServer(
280            restrict_to_paths=args.restrict_to_paths,
281            restrict_to_repositories=args.restrict_to_repositories,
282            append_only=args.append_only,
283            storage_quota=args.storage_quota,
284        ).serve()
285        return EXIT_SUCCESS
286
287    @with_repository(create=True, exclusive=True, manifest=False)
288    def do_init(self, args, repository):
289        """Initialize an empty repository"""
290        path = args.location.canonical_path()
291        logger.info('Initializing repository at "%s"' % path)
292        try:
293            key = key_creator(repository, args)
294        except (EOFError, KeyboardInterrupt):
295            repository.destroy()
296            return EXIT_WARNING
297        manifest = Manifest(key, repository)
298        manifest.key = key
299        manifest.write()
300        repository.commit()
301        with Cache(repository, key, manifest, warn_if_unencrypted=False):
302            pass
303        if key.tam_required:
304            tam_file = tam_required_file(repository)
305            open(tam_file, 'w').close()
306            logger.warning(
307                '\n'
308                'By default repositories initialized with this version will produce security\n'
309                'errors if written to with an older version (up to and including Borg 1.0.8).\n'
310                '\n'
311                'If you want to use these older versions, you can disable the check by running:\n'
312                'borg upgrade --disable-tam %s\n'
313                '\n'
314                'See https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability '
315                'for details about the security implications.', shlex.quote(path))
316
317        if key.NAME != 'plaintext':
318            logger.warning(
319                '\n'
320                'IMPORTANT: you will need both KEY AND PASSPHRASE to access this repo!\n'
321                'If you used a repokey mode, the key is stored in the repo, but you should back it up separately.\n'
322                'Use "borg key export" to export the key, optionally in printable format.\n'
323                'Write down the passphrase. Store both at safe place(s).\n')
324        return self.exit_code
325
326    @with_repository(exclusive=True, manifest=False)
327    def do_check(self, args, repository):
328        """Check repository consistency"""
329        if args.repair:
330            msg = ("This is a potentially dangerous function.\n"
331                   "check --repair might lead to data loss (for kinds of corruption it is not\n"
332                   "capable of dealing with). BE VERY CAREFUL!\n"
333                   "\n"
334                   "Type 'YES' if you understand this and want to continue: ")
335            if not yes(msg, false_msg="Aborting.", invalid_msg="Invalid answer, aborting.",
336                       truish=('YES', ), retry=False,
337                       env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
338                return EXIT_ERROR
339        if args.repo_only and any(
340           (args.verify_data, args.first, args.last, args.prefix is not None, args.glob_archives)):
341            self.print_error("--repository-only contradicts --first, --last, --prefix and --verify-data arguments.")
342            return EXIT_ERROR
343        if not args.archives_only:
344            if not repository.check(repair=args.repair, save_space=args.save_space):
345                return EXIT_WARNING
346        if args.prefix is not None:
347            args.glob_archives = args.prefix + '*'
348        if not args.repo_only and not ArchiveChecker().check(
349                repository, repair=args.repair, archive=args.location.archive,
350                first=args.first, last=args.last, sort_by=args.sort_by or 'ts', glob=args.glob_archives,
351                verify_data=args.verify_data, save_space=args.save_space):
352            return EXIT_WARNING
353        return EXIT_SUCCESS
354
355    @with_repository(compatibility=(Manifest.Operation.CHECK,))
356    def do_change_passphrase(self, args, repository, manifest, key):
357        """Change repository key file passphrase"""
358        if not hasattr(key, 'change_passphrase'):
359            print('This repository is not encrypted, cannot change the passphrase.')
360            return EXIT_ERROR
361        key.change_passphrase()
362        logger.info('Key updated')
363        if hasattr(key, 'find_key'):
364            # print key location to make backing it up easier
365            logger.info('Key location: %s', key.find_key())
366        return EXIT_SUCCESS
367
368    def do_change_passphrase_deprecated(self, args):
369        logger.warning('"borg change-passphrase" is deprecated and will be removed in Borg 1.2.\n'
370                       'Use "borg key change-passphrase" instead.')
371        return self.do_change_passphrase(args)
372
373    @with_repository(lock=False, exclusive=False, manifest=False, cache=False)
374    def do_key_export(self, args, repository):
375        """Export the repository key for backup"""
376        manager = KeyManager(repository)
377        manager.load_keyblob()
378        if args.paper:
379            manager.export_paperkey(args.path)
380        else:
381            if not args.path:
382                self.print_error("output file to export key to expected")
383                return EXIT_ERROR
384            try:
385                if args.qr:
386                    manager.export_qr(args.path)
387                else:
388                    manager.export(args.path)
389            except IsADirectoryError:
390                self.print_error("'{}' must be a file, not a directory".format(args.path))
391                return EXIT_ERROR
392        return EXIT_SUCCESS
393
394    @with_repository(lock=False, exclusive=False, manifest=False, cache=False)
395    def do_key_import(self, args, repository):
396        """Import the repository key from backup"""
397        manager = KeyManager(repository)
398        if args.paper:
399            if args.path:
400                self.print_error("with --paper import from file is not supported")
401                return EXIT_ERROR
402            manager.import_paperkey(args)
403        else:
404            if not args.path:
405                self.print_error("input file to import key from expected")
406                return EXIT_ERROR
407            if args.path != '-' and not os.path.exists(args.path):
408                self.print_error("input file does not exist: " + args.path)
409                return EXIT_ERROR
410            manager.import_keyfile(args)
411        return EXIT_SUCCESS
412
413    @with_repository(manifest=False)
414    def do_migrate_to_repokey(self, args, repository):
415        """Migrate passphrase -> repokey"""
416        manifest_data = repository.get(Manifest.MANIFEST_ID)
417        key_old = PassphraseKey.detect(repository, manifest_data)
418        key_new = RepoKey(repository)
419        key_new.target = repository
420        key_new.repository_id = repository.id
421        key_new.enc_key = key_old.enc_key
422        key_new.enc_hmac_key = key_old.enc_hmac_key
423        key_new.id_key = key_old.id_key
424        key_new.chunk_seed = key_old.chunk_seed
425        key_new.change_passphrase()  # option to change key protection passphrase, save
426        logger.info('Key updated')
427        return EXIT_SUCCESS
428
429    def do_benchmark_crud(self, args):
430        """Benchmark Create, Read, Update, Delete for archives."""
431        def measurement_run(repo, path):
432            archive = repo + '::borg-benchmark-crud'
433            compression = '--compression=none'
434            # measure create perf (without files cache to always have it chunking)
435            t_start = time.monotonic()
436            rc = self.do_create(self.parse_args(['create', compression, '--files-cache=disabled', archive + '1', path]))
437            t_end = time.monotonic()
438            dt_create = t_end - t_start
439            assert rc == 0
440            # now build files cache
441            rc1 = self.do_create(self.parse_args(['create', compression, archive + '2', path]))
442            rc2 = self.do_delete(self.parse_args(['delete', archive + '2']))
443            assert rc1 == rc2 == 0
444            # measure a no-change update (archive1 is still present)
445            t_start = time.monotonic()
446            rc1 = self.do_create(self.parse_args(['create', compression, archive + '3', path]))
447            t_end = time.monotonic()
448            dt_update = t_end - t_start
449            rc2 = self.do_delete(self.parse_args(['delete', archive + '3']))
450            assert rc1 == rc2 == 0
451            # measure extraction (dry-run: without writing result to disk)
452            t_start = time.monotonic()
453            rc = self.do_extract(self.parse_args(['extract', '--dry-run', archive + '1']))
454            t_end = time.monotonic()
455            dt_extract = t_end - t_start
456            assert rc == 0
457            # measure archive deletion (of LAST present archive with the data)
458            t_start = time.monotonic()
459            rc = self.do_delete(self.parse_args(['delete', archive + '1']))
460            t_end = time.monotonic()
461            dt_delete = t_end - t_start
462            assert rc == 0
463            return dt_create, dt_update, dt_extract, dt_delete
464
465        @contextmanager
466        def test_files(path, count, size, random):
467            try:
468                path = os.path.join(path, 'borg-test-data')
469                os.makedirs(path)
470                for i in range(count):
471                    fname = os.path.join(path, 'file_%d' % i)
472                    data = b'\0' * size if not random else os.urandom(size)
473                    with SyncFile(fname, binary=True) as fd:  # used for posix_fadvise's sake
474                        fd.write(data)
475                yield path
476            finally:
477                shutil.rmtree(path)
478
479        if '_BORG_BENCHMARK_CRUD_TEST' in os.environ:
480            tests = [
481                ('Z-TEST', 1, 1, False),
482                ('R-TEST', 1, 1, True),
483            ]
484        else:
485            tests = [
486                ('Z-BIG', 10, 100000000, False),
487                ('R-BIG', 10, 100000000, True),
488                ('Z-MEDIUM', 1000, 1000000, False),
489                ('R-MEDIUM', 1000, 1000000, True),
490                ('Z-SMALL', 10000, 10000, False),
491                ('R-SMALL', 10000, 10000, True),
492            ]
493
494        for msg, count, size, random in tests:
495            with test_files(args.path, count, size, random) as path:
496                dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path)
497            total_size_MB = count * size / 1e06
498            file_size_formatted = format_file_size(size)
499            content = 'random' if random else 'all-zero'
500            fmt = '%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)'
501            print(fmt % ('C', msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
502            print(fmt % ('R', msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
503            print(fmt % ('U', msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
504            print(fmt % ('D', msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
505
506        return 0
507
508    @with_repository(fake='dry_run', exclusive=True, compatibility=(Manifest.Operation.WRITE,))
509    def do_create(self, args, repository, manifest=None, key=None):
510        """Create new archive"""
511        matcher = PatternMatcher(fallback=True)
512        matcher.add_inclexcl(args.patterns)
513
514        def create_inner(archive, cache):
515            # Add cache dir to inode_skip list
516            skip_inodes = set()
517            try:
518                st = os.stat(get_cache_dir())
519                skip_inodes.add((st.st_ino, st.st_dev))
520            except OSError:
521                pass
522            # Add local repository dir to inode_skip list
523            if not args.location.host:
524                try:
525                    st = os.stat(args.location.path)
526                    skip_inodes.add((st.st_ino, st.st_dev))
527                except OSError:
528                    pass
529            logger.debug('Processing files ...')
530            for path in args.paths:
531                if path == '-':  # stdin
532                    path = args.stdin_name
533                    mode = args.stdin_mode
534                    user = args.stdin_user
535                    group = args.stdin_group
536                    if not dry_run:
537                        try:
538                            status = archive.process_stdin(path, cache, mode, user, group)
539                        except BackupOSError as e:
540                            status = 'E'
541                            self.print_warning('%s: %s', path, e)
542                    else:
543                        status = '-'
544                    self.print_file_status(status, path)
545                    continue
546                path = os.path.normpath(path)
547                try:
548                    st = os.stat(path, follow_symlinks=False)
549                except OSError as e:
550                    self.print_warning('%s: %s', path, e)
551                    continue
552                if args.one_file_system:
553                    restrict_dev = st.st_dev
554                else:
555                    restrict_dev = None
556                self._process(archive, cache, matcher, args.exclude_caches, args.exclude_if_present,
557                              args.keep_exclude_tags, skip_inodes, path, restrict_dev,
558                              read_special=args.read_special, dry_run=dry_run, st=st)
559                # if we get back here, we've finished recursing into <path>,
560                # we do not ever want to get back in there (even if path is given twice as recursion root)
561                skip_inodes.add((st.st_ino, st.st_dev))
562            if not dry_run:
563                archive.save(comment=args.comment, timestamp=args.timestamp)
564                if args.progress:
565                    archive.stats.show_progress(final=True)
566                args.stats |= args.json
567                if args.stats:
568                    if args.json:
569                        json_print(basic_json_data(manifest, cache=cache, extra={
570                            'archive': archive,
571                        }))
572                    else:
573                        log_multi(DASHES,
574                                  str(archive),
575                                  DASHES,
576                                  STATS_HEADER,
577                                  str(archive.stats),
578                                  str(cache),
579                                  DASHES, logger=logging.getLogger('borg.output.stats'))
580
581        self.output_filter = args.output_filter
582        self.output_list = args.output_list
583        self.exclude_nodump = args.exclude_nodump
584        dry_run = args.dry_run
585        t0 = datetime.utcnow()
586        t0_monotonic = time.monotonic()
587        logger.info('Creating archive at "%s"' % args.location.orig)
588        if not dry_run:
589            with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress,
590                       lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync,
591                       cache_mode=args.files_cache_mode, ignore_inode=args.ignore_inode) as cache:
592                archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
593                                  create=True, checkpoint_interval=args.checkpoint_interval,
594                                  numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime, nobirthtime=args.nobirthtime,
595                                  nobsdflags=args.nobsdflags, noacls=args.noacls, noxattrs=args.noxattrs, progress=args.progress,
596                                  chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic,
597                                  log_json=args.log_json)
598                create_inner(archive, cache)
599        else:
600            create_inner(None, None)
601        return self.exit_code
602
603    def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
604                 keep_exclude_tags, skip_inodes, path, restrict_dev,
605                 read_special=False, dry_run=False, st=None):
606        """
607        Process *path* recursively according to the various parameters.
608
609        *st* (if given) is a *os.stat_result* object for *path*.
610
611        This should only raise on critical errors. Per-item errors must be handled within this method.
612        """
613        try:
614            recurse_excluded_dir = False
615            if matcher.match(path):
616                if st is None:
617                    with backup_io('stat'):
618                        st = os.stat(path, follow_symlinks=False)
619            else:
620                self.print_file_status('x', path)
621                # get out here as quickly as possible:
622                # we only need to continue if we shall recurse into an excluded directory.
623                # if we shall not recurse, then do not even touch (stat()) the item, it
624                # could trigger an error, e.g. if access is forbidden, see #3209.
625                if not matcher.recurse_dir:
626                    return
627                if st is None:
628                    with backup_io('stat'):
629                        st = os.stat(path, follow_symlinks=False)
630                recurse_excluded_dir = stat.S_ISDIR(st.st_mode)
631                if not recurse_excluded_dir:
632                    return
633
634            if (st.st_ino, st.st_dev) in skip_inodes:
635                return
636            # if restrict_dev is given, we do not want to recurse into a new filesystem,
637            # but we WILL save the mountpoint directory (or more precise: the root
638            # directory of the mounted filesystem that shadows the mountpoint dir).
639            recurse = restrict_dev is None or st.st_dev == restrict_dev
640            status = None
641            if self.exclude_nodump:
642                # Ignore if nodump flag is set
643                with backup_io('flags'):
644                    if get_flags(path, st) & stat.UF_NODUMP:
645                        self.print_file_status('x', path)
646                        return
647            if stat.S_ISREG(st.st_mode):
648                if not dry_run:
649                    status = archive.process_file(path, st, cache)
650            elif stat.S_ISDIR(st.st_mode):
651                if recurse:
652                    tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
653                    if tag_paths:
654                        # if we are already recursing in an excluded dir, we do not need to do anything else than
655                        # returning (we do not need to archive or recurse into tagged directories), see #3991:
656                        if not recurse_excluded_dir:
657                            if keep_exclude_tags:
658                                if not dry_run:
659                                    archive.process_dir(path, st)
660                                for tag_path in tag_paths:
661                                    self._process(archive, cache, matcher, exclude_caches, exclude_if_present,
662                                                  keep_exclude_tags, skip_inodes, tag_path, restrict_dev,
663                                                  read_special=read_special, dry_run=dry_run)
664                            self.print_file_status('x', path)
665                        return
666                if not dry_run:
667                    if not recurse_excluded_dir:
668                        status = archive.process_dir(path, st)
669                if recurse:
670                    with backup_io('scandir'):
671                        entries = helpers.scandir_inorder(path)
672                    for dirent in entries:
673                        normpath = os.path.normpath(dirent.path)
674                        self._process(archive, cache, matcher, exclude_caches, exclude_if_present,
675                                      keep_exclude_tags, skip_inodes, normpath, restrict_dev,
676                                      read_special=read_special, dry_run=dry_run)
677            elif stat.S_ISLNK(st.st_mode):
678                if not dry_run:
679                    if not read_special:
680                        status = archive.process_symlink(path, st)
681                    else:
682                        try:
683                            st_target = os.stat(path)
684                        except OSError:
685                            special = False
686                        else:
687                            special = is_special(st_target.st_mode)
688                        if special:
689                            status = archive.process_file(path, st_target, cache)
690                        else:
691                            status = archive.process_symlink(path, st)
692            elif stat.S_ISFIFO(st.st_mode):
693                if not dry_run:
694                    if not read_special:
695                        status = archive.process_fifo(path, st)
696                    else:
697                        status = archive.process_file(path, st, cache)
698            elif stat.S_ISCHR(st.st_mode):
699                if not dry_run:
700                    if not read_special:
701                        status = archive.process_dev(path, st, 'c')
702                    else:
703                        status = archive.process_file(path, st, cache)
704            elif stat.S_ISBLK(st.st_mode):
705                if not dry_run:
706                    if not read_special:
707                        status = archive.process_dev(path, st, 'b')
708                    else:
709                        status = archive.process_file(path, st, cache)
710            elif stat.S_ISSOCK(st.st_mode):
711                # Ignore unix sockets
712                return
713            elif stat.S_ISDOOR(st.st_mode):
714                # Ignore Solaris doors
715                return
716            elif stat.S_ISPORT(st.st_mode):
717                # Ignore Solaris event ports
718                return
719            else:
720                self.print_warning('Unknown file type: %s', path)
721                return
722        except BackupOSError as e:
723            self.print_warning('%s: %s', path, e)
724            status = 'E'
725        # Status output
726        if status is None:
727            if not dry_run:
728                status = '?'  # need to add a status code somewhere
729            else:
730                status = '-'  # dry run, item was not backed up
731
732        if not recurse_excluded_dir:
733            self.print_file_status(status, path)
734
735    @staticmethod
736    def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
737        if strip_components:
738            def item_filter(item):
739                matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:])
740                peek_and_store_hardlink_masters(item, matched)
741                return matched
742        else:
743            def item_filter(item):
744                matched = matcher.match(item.path)
745                peek_and_store_hardlink_masters(item, matched)
746                return matched
747        return item_filter
748
749    @with_repository(compatibility=(Manifest.Operation.READ,))
750    @with_archive
751    def do_extract(self, args, repository, manifest, key, archive):
752        """Extract archive contents"""
753        # be restrictive when restoring files, restore permissions later
754        if sys.getfilesystemencoding() == 'ascii':
755            logger.warning('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.')
756            if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
757                logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
758
759        matcher = self.build_matcher(args.patterns, args.paths)
760
761        progress = args.progress
762        output_list = args.output_list
763        dry_run = args.dry_run
764        stdout = args.stdout
765        sparse = args.sparse
766        strip_components = args.strip_components
767        dirs = []
768        partial_extract = not matcher.empty() or strip_components
769        hardlink_masters = {} if partial_extract or not has_link else None
770
771        def peek_and_store_hardlink_masters(item, matched):
772            # not has_link:
773            # OS does not have hardlink capability thus we need to remember the chunks so that
774            # we can extract all hardlinks as separate normal (not-hardlinked) files instead.
775            #
776            # partial_extract and not matched and hardlinkable:
777            # we do not extract the very first hardlink, so we need to remember the chunks
778            # in hardlinks_master, so we can use them when we extract some 2nd+ hardlink item
779            # that has no chunks list.
780            if ((not has_link or (partial_extract and not matched and hardlinkable(item.mode))) and
781                    (item.get('hardlink_master', True) and 'source' not in item)):
782                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
783
784        filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
785        if progress:
786            pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1, msgid='extract')
787            pi.output('Calculating total archive size for the progress indicator (might take long for large archives)')
788            extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
789            pi.total = extracted_size
790        else:
791            pi = None
792
793        for item in archive.iter_items(filter, partial_extract=partial_extract,
794                                       preload=True, hardlink_masters=hardlink_masters):
795            orig_path = item.path
796            if strip_components:
797                item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
798            if not args.dry_run:
799                while dirs and not item.path.startswith(dirs[-1].path):
800                    dir_item = dirs.pop(-1)
801                    try:
802                        archive.extract_item(dir_item, stdout=stdout)
803                    except BackupOSError as e:
804                        self.print_warning('%s: %s', remove_surrogates(dir_item.path), e)
805            if output_list:
806                logging.getLogger('borg.output.list').info(remove_surrogates(item.path))
807            try:
808                if dry_run:
809                    archive.extract_item(item, dry_run=True, pi=pi)
810                else:
811                    if stat.S_ISDIR(item.mode):
812                        dirs.append(item)
813                        archive.extract_item(item, stdout=stdout, restore_attrs=False)
814                    else:
815                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
816                                             stripped_components=strip_components, original_path=orig_path, pi=pi)
817            except (BackupOSError, BackupError) as e:
818                self.print_warning('%s: %s', remove_surrogates(orig_path), e)
819
820        if pi:
821            pi.finish()
822
823        if not args.dry_run:
824            pi = ProgressIndicatorPercent(total=len(dirs), msg='Setting directory permissions %3.0f%%',
825                                          msgid='extract.permissions')
826            while dirs:
827                pi.show()
828                dir_item = dirs.pop(-1)
829                try:
830                    archive.extract_item(dir_item, stdout=stdout)
831                except BackupOSError as e:
832                    self.print_warning('%s: %s', remove_surrogates(dir_item.path), e)
833        for pattern in matcher.get_unmatched_include_patterns():
834            self.print_warning("Include pattern '%s' never matched.", pattern)
835        if pi:
836            # clear progress output
837            pi.finish()
838        return self.exit_code
839
840    @with_repository(compatibility=(Manifest.Operation.READ,))
841    @with_archive
842    def do_export_tar(self, args, repository, manifest, key, archive):
843        """Export archive contents as a tarball"""
844        self.output_list = args.output_list
845
846        # A quick note about the general design of tar_filter and tarfile;
847        # The tarfile module of Python can provide some compression mechanisms
848        # by itself, using the builtin gzip, bz2 and lzma modules (and "tarmodes"
849        # such as "w:xz").
850        #
851        # Doing so would have three major drawbacks:
852        # For one the compressor runs on the same thread as the program using the
853        # tarfile, stealing valuable CPU time from Borg and thus reducing throughput.
854        # Then this limits the available options - what about lz4? Brotli? zstd?
855        # The third issue is that systems can ship more optimized versions than those
856        # built into Python, e.g. pigz or pxz, which can use more than one thread for
857        # compression.
858        #
859        # Therefore we externalize compression by using a filter program, which has
860        # none of these drawbacks. The only issue of using an external filter is
861        # that it has to be installed -- hardly a problem, considering that
862        # the decompressor must be installed as well to make use of the exported tarball!
863
864        filter = None
865        if args.tar_filter == 'auto':
866            # Note that filter remains None if tarfile is '-'.
867            if args.tarfile.endswith('.tar.gz'):
868                filter = 'gzip'
869            elif args.tarfile.endswith('.tar.bz2'):
870                filter = 'bzip2'
871            elif args.tarfile.endswith('.tar.xz'):
872                filter = 'xz'
873            logger.debug('Automatically determined tar filter: %s', filter)
874        else:
875            filter = args.tar_filter
876
877        tarstream = dash_open(args.tarfile, 'wb')
878        tarstream_close = args.tarfile != '-'
879
880        if filter:
881            # When we put a filter between us and the final destination,
882            # the selected output (tarstream until now) becomes the output of the filter (=filterout).
883            # The decision whether to close that or not remains the same.
884            filterout = tarstream
885            filterout_close = tarstream_close
886            env = prepare_subprocess_env(system=True)
887            # There is no deadlock potential here (the subprocess docs warn about this), because
888            # communication with the process is a one-way road, i.e. the process can never block
889            # for us to do something while we block on the process for something different.
890            filterproc = popen_with_error_handling(filter, stdin=subprocess.PIPE, stdout=filterout,
891                                                   log_prefix='--tar-filter: ', env=env)
892            if not filterproc:
893                return EXIT_ERROR
894            # Always close the pipe, otherwise the filter process would not notice when we are done.
895            tarstream = filterproc.stdin
896            tarstream_close = True
897
898        # The | (pipe) symbol instructs tarfile to use a streaming mode of operation
899        # where it never seeks on the passed fileobj.
900        tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT)
901
902        self._export_tar(args, archive, tar)
903
904        # This does not close the fileobj (tarstream) we passed to it -- a side effect of the | mode.
905        tar.close()
906
907        if tarstream_close:
908            tarstream.close()
909
910        if filter:
911            logger.debug('Done creating tar, waiting for filter to die...')
912            rc = filterproc.wait()
913            if rc:
914                logger.error('--tar-filter exited with code %d, output file is likely unusable!', rc)
915                self.exit_code = EXIT_ERROR
916            else:
917                logger.debug('filter exited with code %d', rc)
918
919            if filterout_close:
920                filterout.close()
921
922        return self.exit_code
923
924    def _export_tar(self, args, archive, tar):
925        matcher = self.build_matcher(args.patterns, args.paths)
926
927        progress = args.progress
928        output_list = args.output_list
929        strip_components = args.strip_components
930        partial_extract = not matcher.empty() or strip_components
931        hardlink_masters = {} if partial_extract else None
932
933        def peek_and_store_hardlink_masters(item, matched):
934            if ((partial_extract and not matched and hardlinkable(item.mode)) and
935                    (item.get('hardlink_master', True) and 'source' not in item)):
936                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
937
938        filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
939
940        if progress:
941            pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
942            pi.output('Calculating size')
943            extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
944            pi.total = extracted_size
945        else:
946            pi = None
947
948        def item_content_stream(item):
949            """
950            Return a file-like object that reads from the chunks of *item*.
951            """
952            chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks],
953                                                         is_preloaded=True)
954            if pi:
955                info = [remove_surrogates(item.path)]
956                return ChunkIteratorFileWrapper(chunk_iterator,
957                                                lambda read_bytes: pi.show(increase=len(read_bytes), info=info))
958            else:
959                return ChunkIteratorFileWrapper(chunk_iterator)
960
961        def item_to_tarinfo(item, original_path):
962            """
963            Transform a Borg *item* into a tarfile.TarInfo object.
964
965            Return a tuple (tarinfo, stream), where stream may be a file-like object that represents
966            the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item*
967            cannot be represented as a TarInfo object and should be skipped.
968            """
969
970            # If we would use the PAX (POSIX) format (which we currently don't),
971            # we can support most things that aren't possible with classic tar
972            # formats, including GNU tar, such as:
973            # atime, ctime, possibly Linux capabilities (security.* xattrs)
974            # and various additions supported by GNU tar in POSIX mode.
975
976            stream = None
977            tarinfo = tarfile.TarInfo()
978            tarinfo.name = item.path
979            tarinfo.mtime = item.mtime / 1e9
980            tarinfo.mode = stat.S_IMODE(item.mode)
981            tarinfo.uid = item.uid
982            tarinfo.gid = item.gid
983            tarinfo.uname = item.user or ''
984            tarinfo.gname = item.group or ''
985            # The linkname in tar has the same dual use the 'source' attribute of Borg items,
986            # i.e. for symlinks it means the destination, while for hardlinks it refers to the
987            # file.
988            # Since hardlinks in tar have a different type code (LNKTYPE) the format might
989            # support hardlinking arbitrary objects (including symlinks and directories), but
990            # whether implementations actually support that is a whole different question...
991            tarinfo.linkname = ""
992
993            modebits = stat.S_IFMT(item.mode)
994            if modebits == stat.S_IFREG:
995                tarinfo.type = tarfile.REGTYPE
996                if 'source' in item:
997                    source = os.sep.join(item.source.split(os.sep)[strip_components:])
998                    if hardlink_masters is None:
999                        linkname = source
1000                    else:
1001                        chunks, linkname = hardlink_masters.get(item.source, (None, source))
1002                    if linkname:
1003                        # Master was already added to the archive, add a hardlink reference to it.
1004                        tarinfo.type = tarfile.LNKTYPE
1005                        tarinfo.linkname = linkname
1006                    elif chunks is not None:
1007                        # The item which has the chunks was not put into the tar, therefore
1008                        # we do that now and update hardlink_masters to reflect that.
1009                        item.chunks = chunks
1010                        tarinfo.size = item.get_size()
1011                        stream = item_content_stream(item)
1012                        hardlink_masters[item.get('source') or original_path] = (None, item.path)
1013                else:
1014                    tarinfo.size = item.get_size()
1015                    stream = item_content_stream(item)
1016            elif modebits == stat.S_IFDIR:
1017                tarinfo.type = tarfile.DIRTYPE
1018            elif modebits == stat.S_IFLNK:
1019                tarinfo.type = tarfile.SYMTYPE
1020                tarinfo.linkname = item.source
1021            elif modebits == stat.S_IFBLK:
1022                tarinfo.type = tarfile.BLKTYPE
1023                tarinfo.devmajor = os.major(item.rdev)
1024                tarinfo.devminor = os.minor(item.rdev)
1025            elif modebits == stat.S_IFCHR:
1026                tarinfo.type = tarfile.CHRTYPE
1027                tarinfo.devmajor = os.major(item.rdev)
1028                tarinfo.devminor = os.minor(item.rdev)
1029            elif modebits == stat.S_IFIFO:
1030                tarinfo.type = tarfile.FIFOTYPE
1031            else:
1032                self.print_warning('%s: unsupported file type %o for tar export', remove_surrogates(item.path), modebits)
1033                set_ec(EXIT_WARNING)
1034                return None, stream
1035            return tarinfo, stream
1036
1037        for item in archive.iter_items(filter, partial_extract=partial_extract,
1038                                       preload=True, hardlink_masters=hardlink_masters):
1039            orig_path = item.path
1040            if strip_components:
1041                item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
1042            tarinfo, stream = item_to_tarinfo(item, orig_path)
1043            if tarinfo:
1044                if output_list:
1045                    logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
1046                tar.addfile(tarinfo, stream)
1047
1048        if pi:
1049            pi.finish()
1050
1051        for pattern in matcher.get_unmatched_include_patterns():
1052            self.print_warning("Include pattern '%s' never matched.", pattern)
1053        return self.exit_code
1054
1055    @with_repository(compatibility=(Manifest.Operation.READ,))
1056    @with_archive
1057    def do_diff(self, args, repository, manifest, key, archive):
1058        """Diff contents of two archives"""
1059        def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2):
1060            chunks1 = archive1.pipeline.fetch_many(chunk_ids1)
1061            chunks2 = archive2.pipeline.fetch_many(chunk_ids2)
1062            return self.compare_chunk_contents(chunks1, chunks2)
1063
1064        def sum_chunk_size(item, consider_ids=None):
1065            if item.get('deleted'):
1066                size = None
1067            else:
1068                if consider_ids is not None:  # consider only specific chunks
1069                    size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
1070                else:  # consider all chunks
1071                    size = item.get_size()
1072            return size
1073
1074        def get_owner(item):
1075            if args.numeric_owner:
1076                return item.uid, item.gid
1077            else:
1078                return item.user, item.group
1079
1080        def get_mode(item):
1081            if 'mode' in item:
1082                return stat.filemode(item.mode)
1083            else:
1084                return [None]
1085
1086        def has_hardlink_master(item, hardlink_masters):
1087            return hardlinkable(item.mode) and item.get('source') in hardlink_masters
1088
1089        def compare_link(item1, item2):
1090            # These are the simple link cases. For special cases, e.g. if a
1091            # regular file is replaced with a link or vice versa, it is
1092            # indicated in compare_mode instead.
1093            if item1.get('deleted'):
1094                return ({"type": 'added link'}, 'added link')
1095            elif item2.get('deleted'):
1096                return ({"type": 'removed link'}, 'removed link')
1097            elif 'source' in item1 and 'source' in item2 and item1.source != item2.source:
1098                return ({"type": 'changed link'}, 'changed link')
1099
1100        def contents_changed(item1, item2):
1101            if item1.get('deleted') != item2.get('deleted'):
1102                # a deleleted/non-existing file is considered different to an existing file,
1103                # even if the latter is empty.
1104                return True
1105            if can_compare_chunk_ids:
1106                return item1.chunks != item2.chunks
1107            else:
1108                if sum_chunk_size(item1) != sum_chunk_size(item2):
1109                    return True
1110                else:
1111                    chunk_ids1 = [c.id for c in item1.chunks]
1112                    chunk_ids2 = [c.id for c in item2.chunks]
1113                    return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
1114
1115        def compare_content(path, item1, item2):
1116            if contents_changed(item1, item2):
1117                if item1.get('deleted'):
1118                    sz = sum_chunk_size(item2)
1119                    return ({"type": "added", "size": sz}, 'added {:>13}'.format(format_file_size(sz)))
1120                if item2.get('deleted'):
1121                    sz = sum_chunk_size(item1)
1122                    return ({"type": "removed", "size": sz}, 'removed {:>11}'.format(format_file_size(sz)))
1123                if not can_compare_chunk_ids:
1124                    return ({"type": "modified"}, "modified")
1125                chunk_ids1 = {c.id for c in item1.chunks}
1126                chunk_ids2 = {c.id for c in item2.chunks}
1127                added_ids = chunk_ids2 - chunk_ids1
1128                removed_ids = chunk_ids1 - chunk_ids2
1129                added = sum_chunk_size(item2, added_ids)
1130                removed = sum_chunk_size(item1, removed_ids)
1131                return ({"type": "modified", "added": added, "removed": removed},
1132                        '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
1133                        format_file_size(-removed, precision=1, sign=True)))
1134
1135        def compare_directory(item1, item2):
1136            if item2.get('deleted') and not item1.get('deleted'):
1137                return ({"type": 'removed directory'}, 'removed directory')
1138            elif item1.get('deleted') and not item2.get('deleted'):
1139                return ({"type": 'added directory'}, 'added directory')
1140
1141        def compare_owner(item1, item2):
1142            user1, group1 = get_owner(item1)
1143            user2, group2 = get_owner(item2)
1144            if user1 != user2 or group1 != group2:
1145                return ({"type": "owner", "old_user": user1, "old_group": group1, "new_user": user2, "new_group": group2},
1146                        '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2))
1147
1148        def compare_mode(item1, item2):
1149            if item1.mode != item2.mode:
1150                mode1 = get_mode(item1)
1151                mode2 = get_mode(item2)
1152                return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))
1153
1154        def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
1155            """
1156            Compare two items with identical paths.
1157            :param deleted: Whether one of the items has been deleted
1158            """
1159            changes = []
1160
1161            if has_hardlink_master(item1, hardlink_masters):
1162                item1 = hardlink_masters[item1.source][0]
1163
1164            if has_hardlink_master(item2, hardlink_masters):
1165                item2 = hardlink_masters[item2.source][1]
1166
1167            if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
1168                changes.append(compare_link(item1, item2))
1169
1170            if 'chunks' in item1 and 'chunks' in item2:
1171                changes.append(compare_content(path, item1, item2))
1172
1173            if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
1174                changes.append(compare_directory(item1, item2))
1175
1176            if not deleted:
1177                changes.append(compare_owner(item1, item2))
1178                changes.append(compare_mode(item1, item2))
1179
1180            # changes is a list of paths, changesets:  [(path1, [{changeset1}, ..]), (path2, [{changeset1}, ..]), ..]
1181            changes = [x for x in changes if x]
1182            if changes:
1183                output_line = (remove_surrogates(path), changes)
1184
1185                # if sorting, save changes for later, otherwise go ahead and output the results as they are generated.
1186                if args.sort:
1187                    output.append(output_line)
1188                elif args.json_lines:
1189                    print_json_output(output_line)
1190                else:
1191                    print_text_output(output_line)
1192
1193        def print_text_output(line):
1194            path, diff = line
1195            print("{:<19} {}".format(' '.join([txt for j, txt in diff]), path))
1196
1197        def print_json_output(line):
1198            path, diff = line
1199            print(json.dumps({"path": path, "changes": [j for j, txt in diff]}))
1200
1201        def compare_archives(archive1, archive2, matcher):
1202            def hardlink_master_seen(item):
1203                return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters
1204
1205            def is_hardlink_master(item):
1206                return item.get('hardlink_master', True) and 'source' not in item
1207
1208            def update_hardlink_masters(item1, item2):
1209                if is_hardlink_master(item1) or is_hardlink_master(item2):
1210                    hardlink_masters[item1.path] = (item1, item2)
1211
1212            def compare_or_defer(item1, item2):
1213                update_hardlink_masters(item1, item2)
1214                if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
1215                    deferred.append((item1, item2))
1216                else:
1217                    compare_items(output, item1.path, item1, item2, hardlink_masters)
1218
1219            orphans_archive1 = collections.OrderedDict()
1220            orphans_archive2 = collections.OrderedDict()
1221            deferred = []
1222            hardlink_masters = {}
1223            output = []
1224
1225            for item1, item2 in zip_longest(
1226                    archive1.iter_items(lambda item: matcher.match(item.path)),
1227                    archive2.iter_items(lambda item: matcher.match(item.path)),
1228            ):
1229                if item1 and item2 and item1.path == item2.path:
1230                    compare_or_defer(item1, item2)
1231                    continue
1232                if item1:
1233                    matching_orphan = orphans_archive2.pop(item1.path, None)
1234                    if matching_orphan:
1235                        compare_or_defer(item1, matching_orphan)
1236                    else:
1237                        orphans_archive1[item1.path] = item1
1238                if item2:
1239                    matching_orphan = orphans_archive1.pop(item2.path, None)
1240                    if matching_orphan:
1241                        compare_or_defer(matching_orphan, item2)
1242                    else:
1243                        orphans_archive2[item2.path] = item2
1244            # At this point orphans_* contain items that had no matching partner in the other archive
1245            deleted_item = Item(
1246                deleted=True,
1247                chunks=[],
1248                mode=0,
1249            )
1250            for added in orphans_archive2.values():
1251                path = added.path
1252                deleted_item.path = path
1253                update_hardlink_masters(deleted_item, added)
1254                compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
1255            for deleted in orphans_archive1.values():
1256                path = deleted.path
1257                deleted_item.path = path
1258                update_hardlink_masters(deleted, deleted_item)
1259                compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
1260            for item1, item2 in deferred:
1261                assert hardlink_master_seen(item1)
1262                assert hardlink_master_seen(item2)
1263                compare_items(output, item1.path, item1, item2, hardlink_masters)
1264
1265            print_output = print_json_output if args.json_lines else print_text_output
1266
1267            # if we wanted sorted output (args.sort is true), then results are collected in 'output' and
1268            # need to be sort them before printing. Otherwise results were already printed and 'output' is empty.
1269            for line in sorted(output):
1270                print_output(line)
1271
1272        archive1 = archive
1273        archive2 = Archive(repository, key, manifest, args.archive2,
1274                           consider_part_files=args.consider_part_files)
1275
1276        can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get(
1277            'chunker_params', True) or args.same_chunker_params
1278        if not can_compare_chunk_ids:
1279            self.print_warning('--chunker-params might be different between archives, diff will be slow.\n'
1280                               'If you know for certain that they are the same, pass --same-chunker-params '
1281                               'to override this check.')
1282
1283        matcher = self.build_matcher(args.patterns, args.paths)
1284
1285        compare_archives(archive1, archive2, matcher)
1286
1287        for pattern in matcher.get_unmatched_include_patterns():
1288            self.print_warning("Include pattern '%s' never matched.", pattern)
1289
1290        return self.exit_code
1291
1292    @with_repository(exclusive=True, cache=True, compatibility=(Manifest.Operation.CHECK,))
1293    @with_archive
1294    def do_rename(self, args, repository, manifest, key, cache, archive):
1295        """Rename an existing archive"""
1296        archive.rename(args.name)
1297        manifest.write()
1298        repository.commit()
1299        cache.commit()
1300        return self.exit_code
1301
1302    @with_repository(exclusive=True, manifest=False)
1303    def do_delete(self, args, repository):
1304        """Delete an existing repository or archives"""
1305        archive_filter_specified = any((args.first, args.last, args.prefix is not None, args.glob_archives))
1306        explicit_archives_specified = args.location.archive or args.archives
1307        if archive_filter_specified and explicit_archives_specified:
1308            self.print_error('Mixing archive filters and explicitly named archives is not supported.')
1309            return self.exit_code
1310        if archive_filter_specified or explicit_archives_specified:
1311            return self._delete_archives(args, repository)
1312        else:
1313            return self._delete_repository(args, repository)
1314
1315    def _delete_archives(self, args, repository):
1316        """Delete archives"""
1317        dry_run = args.dry_run
1318
1319        manifest, key = Manifest.load(repository, (Manifest.Operation.DELETE,))
1320
1321        if args.location.archive or args.archives:
1322            archives = list(args.archives)
1323            if args.location.archive:
1324                archives.insert(0, args.location.archive)
1325            archive_names = tuple(archives)
1326        else:
1327            archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
1328            if not archive_names:
1329                return self.exit_code
1330
1331        if args.forced == 2:
1332            deleted = False
1333            for i, archive_name in enumerate(archive_names, 1):
1334                try:
1335                    current_archive = manifest.archives.pop(archive_name)
1336                except KeyError:
1337                    self.exit_code = EXIT_WARNING
1338                    logger.warning('Archive {} not found ({}/{}).'.format(archive_name, i, len(archive_names)))
1339                else:
1340                    deleted = True
1341                    msg = 'Would delete: {} ({}/{})' if dry_run else 'Deleted archive: {} ({}/{})'
1342                    logger.info(msg.format(format_archive(current_archive), i, len(archive_names)))
1343            if dry_run:
1344                logger.info('Finished dry-run.')
1345            elif deleted:
1346                manifest.write()
1347                # note: might crash in compact() after committing the repo
1348                repository.commit()
1349                logger.warning('Done. Run "borg check --repair" to clean up the mess.')
1350            else:
1351                logger.warning('Aborted.')
1352            return self.exit_code
1353
1354        stats = Statistics()
1355        with Cache(repository, key, manifest, progress=args.progress, lock_wait=self.lock_wait) as cache:
1356            msg_delete = 'Would delete archive: {} ({}/{})' if dry_run else 'Deleting archive: {} ({}/{})'
1357            msg_not_found = 'Archive {} not found ({}/{}).'
1358            for i, archive_name in enumerate(archive_names, 1):
1359                try:
1360                    archive_info = manifest.archives[archive_name]
1361                except KeyError:
1362                    logger.warning(msg_not_found.format(archive_name, i, len(archive_names)))
1363                else:
1364                    logger.info(msg_delete.format(format_archive(archive_info), i, len(archive_names)))
1365                    if not dry_run:
1366                        archive = Archive(repository, key, manifest, archive_name, cache=cache,
1367                                          consider_part_files=args.consider_part_files)
1368                        archive.delete(stats, progress=args.progress, forced=args.forced)
1369            if not dry_run:
1370                manifest.write()
1371                repository.commit(save_space=args.save_space)
1372                cache.commit()
1373            if args.stats:
1374                log_multi(DASHES,
1375                          STATS_HEADER,
1376                          stats.summary.format(label='Deleted data:', stats=stats),
1377                          str(cache),
1378                          DASHES, logger=logging.getLogger('borg.output.stats'))
1379
1380        return self.exit_code
1381
1382    def _delete_repository(self, args, repository):
1383        """Delete a repository"""
1384        dry_run = args.dry_run
1385
1386        if not args.cache_only:
1387            msg = []
1388            try:
1389                manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
1390            except NoManifestError:
1391                msg.append("You requested to completely DELETE the repository *including* all archives it may "
1392                           "contain.")
1393                msg.append("This repository seems to have no manifest, so we can't tell anything about its "
1394                           "contents.")
1395            else:
1396                msg.append("You requested to completely DELETE the repository *including* all archives it "
1397                           "contains:")
1398                for archive_info in manifest.archives.list(sort_by=['ts']):
1399                    msg.append(format_archive(archive_info))
1400            msg.append("Type 'YES' if you understand this and want to continue: ")
1401            msg = '\n'.join(msg)
1402            if not yes(msg, false_msg="Aborting.", invalid_msg='Invalid answer, aborting.', truish=('YES',),
1403                       retry=False, env_var_override='BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'):
1404                self.exit_code = EXIT_ERROR
1405                return self.exit_code
1406            if not dry_run:
1407                repository.destroy()
1408                logger.info("Repository deleted.")
1409                SecurityManager.destroy(repository)
1410            else:
1411                logger.info("Would delete repository.")
1412        if not dry_run:
1413            Cache.destroy(repository)
1414            logger.info("Cache deleted.")
1415        else:
1416            logger.info("Would delete cache.")
1417        return self.exit_code
1418
1419    def do_mount(self, args):
1420        """Mount archive or an entire repository as a FUSE filesystem"""
1421        # Perform these checks before opening the repository and asking for a passphrase.
1422
1423        try:
1424            import borg.fuse
1425        except ImportError as e:
1426            self.print_error('borg mount not available: loading FUSE support failed [ImportError: %s]' % str(e))
1427            return self.exit_code
1428
1429        if not os.path.isdir(args.mountpoint) or not os.access(args.mountpoint, os.R_OK | os.W_OK | os.X_OK):
1430            self.print_error('%s: Mountpoint must be a writable directory' % args.mountpoint)
1431            return self.exit_code
1432
1433        return self._do_mount(args)
1434
1435    @with_repository(compatibility=(Manifest.Operation.READ,))
1436    def _do_mount(self, args, repository, manifest, key):
1437        from .fuse import FuseOperations
1438
1439        with cache_if_remote(repository, decrypted_cache=key) as cached_repo:
1440            operations = FuseOperations(key, repository, manifest, args, cached_repo)
1441            logger.info("Mounting filesystem")
1442            try:
1443                operations.mount(args.mountpoint, args.options, args.foreground)
1444            except RuntimeError:
1445                # Relevant error message already printed to stderr by FUSE
1446                self.exit_code = EXIT_ERROR
1447        return self.exit_code
1448
1449    def do_umount(self, args):
1450        """un-mount the FUSE filesystem"""
1451        return umount(args.mountpoint)
1452
1453    @with_repository(compatibility=(Manifest.Operation.READ,))
1454    def do_list(self, args, repository, manifest, key):
1455        """List archive or repository contents"""
1456        if args.location.archive:
1457            if args.json:
1458                self.print_error('The --json option is only valid for listing archives, not archive contents.')
1459                return self.exit_code
1460            return self._list_archive(args, repository, manifest, key)
1461        else:
1462            if args.json_lines:
1463                self.print_error('The --json-lines option is only valid for listing archive contents, not archives.')
1464                return self.exit_code
1465            return self._list_repository(args, repository, manifest, key)
1466
1467    def _list_archive(self, args, repository, manifest, key):
1468        matcher = self.build_matcher(args.patterns, args.paths)
1469        if args.format is not None:
1470            format = args.format
1471        elif args.short:
1472            format = "{path}{NL}"
1473        else:
1474            format = "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}"
1475
1476        def _list_inner(cache):
1477            archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
1478                              consider_part_files=args.consider_part_files)
1479
1480            formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
1481            for item in archive.iter_items(lambda item: matcher.match(item.path)):
1482                sys.stdout.write(formatter.format_item(item))
1483
1484        # Only load the cache if it will be used
1485        if ItemFormatter.format_needs_cache(format):
1486            with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
1487                _list_inner(cache)
1488        else:
1489            _list_inner(cache=None)
1490
1491        return self.exit_code
1492
1493    def _list_repository(self, args, repository, manifest, key):
1494        if args.format is not None:
1495            format = args.format
1496        elif args.short:
1497            format = "{archive}{NL}"
1498        else:
1499            format = "{archive:<36} {time} [{id}]{NL}"
1500        formatter = ArchiveFormatter(format, repository, manifest, key, json=args.json)
1501
1502        output_data = []
1503
1504        for archive_info in manifest.archives.list_considering(args):
1505            if args.json:
1506                output_data.append(formatter.get_item_data(archive_info))
1507            else:
1508                sys.stdout.write(formatter.format_item(archive_info))
1509
1510        if args.json:
1511            json_print(basic_json_data(manifest, extra={
1512                'archives': output_data
1513            }))
1514
1515        return self.exit_code
1516
1517    @with_repository(cache=True, compatibility=(Manifest.Operation.READ,))
1518    def do_info(self, args, repository, manifest, key, cache):
1519        """Show archive details such as disk space used"""
1520        if any((args.location.archive, args.first, args.last, args.prefix is not None, args.glob_archives)):
1521            return self._info_archives(args, repository, manifest, key, cache)
1522        else:
1523            return self._info_repository(args, repository, manifest, key, cache)
1524
1525    def _info_archives(self, args, repository, manifest, key, cache):
1526        def format_cmdline(cmdline):
1527            return remove_surrogates(' '.join(shlex.quote(x) for x in cmdline))
1528
1529        if args.location.archive:
1530            archive_names = (args.location.archive,)
1531        else:
1532            archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
1533            if not archive_names:
1534                return self.exit_code
1535
1536        output_data = []
1537
1538        for i, archive_name in enumerate(archive_names, 1):
1539            archive = Archive(repository, key, manifest, archive_name, cache=cache,
1540                              consider_part_files=args.consider_part_files)
1541            info = archive.info()
1542            if args.json:
1543                output_data.append(info)
1544            else:
1545                info['duration'] = format_timedelta(timedelta(seconds=info['duration']))
1546                info['command_line'] = format_cmdline(info['command_line'])
1547                print(textwrap.dedent("""
1548                Archive name: {name}
1549                Archive fingerprint: {id}
1550                Comment: {comment}
1551                Hostname: {hostname}
1552                Username: {username}
1553                Time (start): {start}
1554                Time (end): {end}
1555                Duration: {duration}
1556                Number of files: {stats[nfiles]}
1557                Command line: {command_line}
1558                Utilization of maximum supported archive size: {limits[max_archive_size]:.0%}
1559                ------------------------------------------------------------------------------
1560                                       Original size      Compressed size    Deduplicated size
1561                This archive:   {stats[original_size]:>20s} {stats[compressed_size]:>20s} {stats[deduplicated_size]:>20s}
1562                {cache}
1563                """).strip().format(cache=cache, **info))
1564            if self.exit_code:
1565                break
1566            if not args.json and len(archive_names) - i:
1567                print()
1568
1569        if args.json:
1570            json_print(basic_json_data(manifest, cache=cache, extra={
1571                'archives': output_data,
1572            }))
1573        return self.exit_code
1574
1575    def _info_repository(self, args, repository, manifest, key, cache):
1576        info = basic_json_data(manifest, cache=cache, extra={
1577            'security_dir': cache.security_manager.dir,
1578        })
1579
1580        if args.json:
1581            json_print(info)
1582        else:
1583            encryption = 'Encrypted: '
1584            if key.NAME == 'plaintext':
1585                encryption += 'No'
1586            else:
1587                encryption += 'Yes (%s)' % key.NAME
1588            if key.NAME.startswith('key file'):
1589                encryption += '\nKey file: %s' % key.find_key()
1590            info['encryption'] = encryption
1591
1592            print(textwrap.dedent("""
1593            Repository ID: {id}
1594            Location: {location}
1595            {encryption}
1596            Cache: {cache.path}
1597            Security dir: {security_dir}
1598            """).strip().format(
1599                id=bin_to_hex(repository.id),
1600                location=repository._location.canonical_path(),
1601                **info))
1602            print(DASHES)
1603            print(STATS_HEADER)
1604            print(str(cache))
1605        return self.exit_code
1606
1607    @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,))
1608    def do_prune(self, args, repository, manifest, key):
1609        """Prune repository archives according to specified rules"""
1610        if not any((args.secondly, args.minutely, args.hourly, args.daily,
1611                    args.weekly, args.monthly, args.yearly, args.within)):
1612            self.print_error('At least one of the "keep-within", "keep-last", '
1613                             '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
1614                             '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
1615            return self.exit_code
1616        if args.prefix is not None:
1617            args.glob_archives = args.prefix + '*'
1618        checkpoint_re = r'\.checkpoint(\.\d+)?'
1619        archives_checkpoints = manifest.archives.list(glob=args.glob_archives,
1620                                                      match_end=r'(%s)?\Z' % checkpoint_re,
1621                                                      sort_by=['ts'], reverse=True)
1622        is_checkpoint = re.compile(r'(%s)\Z' % checkpoint_re).search
1623        checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)]
1624        # keep the latest checkpoint, if there is no later non-checkpoint archive
1625        if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]:
1626            keep_checkpoints = checkpoints[:1]
1627        else:
1628            keep_checkpoints = []
1629        checkpoints = set(checkpoints)
1630        # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
1631        # that is newer than a successfully completed backup - and killing the successful backup.
1632        archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
1633        keep = []
1634        if args.within:
1635            keep += prune_within(archives, args.within)
1636        if args.secondly:
1637            keep += prune_split(archives, '%Y-%m-%d %H:%M:%S', args.secondly, keep)
1638        if args.minutely:
1639            keep += prune_split(archives, '%Y-%m-%d %H:%M', args.minutely, keep)
1640        if args.hourly:
1641            keep += prune_split(archives, '%Y-%m-%d %H', args.hourly, keep)
1642        if args.daily:
1643            keep += prune_split(archives, '%Y-%m-%d', args.daily, keep)
1644        if args.weekly:
1645            keep += prune_split(archives, '%G-%V', args.weekly, keep)
1646        if args.monthly:
1647            keep += prune_split(archives, '%Y-%m', args.monthly, keep)
1648        if args.yearly:
1649            keep += prune_split(archives, '%Y', args.yearly, keep)
1650        to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
1651        stats = Statistics()
1652        with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
1653            list_logger = logging.getLogger('borg.output.list')
1654            if args.output_list:
1655                # set up counters for the progress display
1656                to_delete_len = len(to_delete)
1657                archives_deleted = 0
1658            pi = ProgressIndicatorPercent(total=len(to_delete), msg='Pruning archives %3.0f%%', msgid='prune')
1659            for archive in archives_checkpoints:
1660                if archive in to_delete:
1661                    pi.show()
1662                    if args.dry_run:
1663                        if args.output_list:
1664                            list_logger.info('Would prune:     %s' % format_archive(archive))
1665                    else:
1666                        if args.output_list:
1667                            archives_deleted += 1
1668                            list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive),
1669                                                                              archives_deleted, to_delete_len))
1670                        Archive(repository, key, manifest, archive.name, cache).delete(stats, forced=args.forced)
1671                else:
1672                    if args.output_list:
1673                        list_logger.info('Keeping archive: %s' % format_archive(archive))
1674            pi.finish()
1675            if to_delete and not args.dry_run:
1676                manifest.write()
1677                repository.commit(save_space=args.save_space)
1678                cache.commit()
1679            if args.stats:
1680                log_multi(DASHES,
1681                          STATS_HEADER,
1682                          stats.summary.format(label='Deleted data:', stats=stats),
1683                          str(cache),
1684                          DASHES, logger=logging.getLogger('borg.output.stats'))
1685        return self.exit_code
1686
1687    @with_repository(fake=('tam', 'disable_tam'), invert_fake=True, manifest=False, exclusive=True)
1688    def do_upgrade(self, args, repository, manifest=None, key=None):
1689        """upgrade a repository from a previous version"""
1690        if args.tam:
1691            manifest, key = Manifest.load(repository, (Manifest.Operation.CHECK,), force_tam_not_required=args.force)
1692
1693            if not hasattr(key, 'change_passphrase'):
1694                print('This repository is not encrypted, cannot enable TAM.')
1695                return EXIT_ERROR
1696
1697            if not manifest.tam_verified or not manifest.config.get(b'tam_required', False):
1698                # The standard archive listing doesn't include the archive ID like in borg 1.1.x
1699                print('Manifest contents:')
1700                for archive_info in manifest.archives.list(sort_by=['ts']):
1701                    print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id))
1702                manifest.config[b'tam_required'] = True
1703                manifest.write()
1704                repository.commit()
1705            if not key.tam_required:
1706                key.tam_required = True
1707                key.change_passphrase(key._passphrase)
1708                print('Key updated')
1709                if hasattr(key, 'find_key'):
1710                    print('Key location:', key.find_key())
1711            if not tam_required(repository):
1712                tam_file = tam_required_file(repository)
1713                open(tam_file, 'w').close()
1714                print('Updated security database')
1715        elif args.disable_tam:
1716            manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK, force_tam_not_required=True)
1717            if tam_required(repository):
1718                os.unlink(tam_required_file(repository))
1719            if key.tam_required:
1720                key.tam_required = False
1721                key.change_passphrase(key._passphrase)
1722                print('Key updated')
1723                if hasattr(key, 'find_key'):
1724                    print('Key location:', key.find_key())
1725            manifest.config[b'tam_required'] = False
1726            manifest.write()
1727            repository.commit()
1728        else:
1729            # mainly for upgrades from Attic repositories,
1730            # but also supports borg 0.xx -> 1.0 upgrade.
1731
1732            repo = AtticRepositoryUpgrader(args.location.path, create=False)
1733            try:
1734                repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress)
1735            except NotImplementedError as e:
1736                print("warning: %s" % e)
1737            repo = BorgRepositoryUpgrader(args.location.path, create=False)
1738            try:
1739                repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress)
1740            except NotImplementedError as e:
1741                print("warning: %s" % e)
1742        return self.exit_code
1743
1744    @with_repository(cache=True, exclusive=True, compatibility=(Manifest.Operation.CHECK,))
1745    def do_recreate(self, args, repository, manifest, key, cache):
1746        """Re-create archives"""
1747        matcher = self.build_matcher(args.patterns, args.paths)
1748        self.output_list = args.output_list
1749        self.output_filter = args.output_filter
1750        recompress = args.recompress != 'never'
1751        always_recompress = args.recompress == 'always'
1752
1753        recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
1754                                     exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
1755                                     keep_exclude_tags=args.keep_exclude_tags, chunker_params=args.chunker_params,
1756                                     compression=args.compression, recompress=recompress, always_recompress=always_recompress,
1757                                     progress=args.progress, stats=args.stats,
1758                                     file_status_printer=self.print_file_status,
1759                                     checkpoint_interval=args.checkpoint_interval,
1760                                     dry_run=args.dry_run, timestamp=args.timestamp)
1761
1762        if args.location.archive:
1763            name = args.location.archive
1764            if recreater.is_temporary_archive(name):
1765                self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
1766                return self.exit_code
1767            if not recreater.recreate(name, args.comment, args.target):
1768                self.print_error('Nothing to do. Archive was not processed.\n'
1769                                 'Specify at least one pattern, PATH, --comment, re-compression or re-chunking option.')
1770        else:
1771            if args.target is not None:
1772                self.print_error('--target: Need to specify single archive')
1773                return self.exit_code
1774            for archive in manifest.archives.list(sort_by=['ts']):
1775                name = archive.name
1776                if recreater.is_temporary_archive(name):
1777                    continue
1778                print('Processing', name)
1779                if not recreater.recreate(name, args.comment):
1780                    logger.info('Skipped archive %s: Nothing to do. Archive was not processed.', name)
1781        if not args.dry_run:
1782            manifest.write()
1783            repository.commit()
1784            cache.commit()
1785        return self.exit_code
1786
1787    @with_repository(manifest=False, exclusive=True)
1788    def do_with_lock(self, args, repository):
1789        """run a user specified command with the repository lock held"""
1790        # for a new server, this will immediately take an exclusive lock.
1791        # to support old servers, that do not have "exclusive" arg in open()
1792        # RPC API, we also do it the old way:
1793        # re-write manifest to start a repository transaction - this causes a
1794        # lock upgrade to exclusive for remote (and also for local) repositories.
1795        # by using manifest=False in the decorator, we avoid having to require
1796        # the encryption key (and can operate just with encrypted data).
1797        data = repository.get(Manifest.MANIFEST_ID)
1798        repository.put(Manifest.MANIFEST_ID, data)
1799        # usually, a 0 byte (open for writing) segment file would be visible in the filesystem here.
1800        # we write and close this file, to rather have a valid segment file on disk, before invoking the subprocess.
1801        # we can only do this for local repositories (with .io), though:
1802        if hasattr(repository, 'io'):
1803            repository.io.close_segment()
1804        env = prepare_subprocess_env(system=True)
1805        try:
1806            # we exit with the return code we get from the subprocess
1807            return subprocess.call([args.command] + args.args, env=env)
1808        finally:
1809            # we need to commit the "no change" operation we did to the manifest
1810            # because it created a new segment file in the repository. if we would
1811            # roll back, the same file would be later used otherwise (for other content).
1812            # that would be bad if somebody uses rsync with ignore-existing (or
1813            # any other mechanism relying on existing segment data not changing).
1814            # see issue #1867.
1815            repository.commit()
1816
1817    @with_repository(exclusive=True, manifest=False)
1818    def do_config(self, args, repository):
1819        """get, set, and delete values in a repository or cache config file"""
1820
1821        def repo_validate(section, name, value=None, check_value=True):
1822            if section not in ['repository', ]:
1823                raise ValueError('Invalid section')
1824            if name in ['segments_per_dir', ]:
1825                if check_value:
1826                    try:
1827                        int(value)
1828                    except ValueError:
1829                        raise ValueError('Invalid value') from None
1830            elif name in ['max_segment_size', 'additional_free_space', 'storage_quota', ]:
1831                if check_value:
1832                    try:
1833                        parse_file_size(value)
1834                    except ValueError:
1835                        raise ValueError('Invalid value') from None
1836                    if name == 'storage_quota':
1837                        if parse_file_size(value) < parse_file_size('10M'):
1838                            raise ValueError('Invalid value: storage_quota < 10M')
1839                    elif name == 'max_segment_size':
1840                        if parse_file_size(value) >= MAX_SEGMENT_SIZE_LIMIT:
1841                            raise ValueError('Invalid value: max_segment_size >= %d' % MAX_SEGMENT_SIZE_LIMIT)
1842            elif name in ['append_only', ]:
1843                if check_value and value not in ['0', '1']:
1844                    raise ValueError('Invalid value')
1845            elif name in ['id', ]:
1846                if check_value:
1847                    try:
1848                        bin_id = unhexlify(value)
1849                    except:
1850                        raise ValueError('Invalid value, must be 64 hex digits') from None
1851                    if len(bin_id) != 32:
1852                        raise ValueError('Invalid value, must be 64 hex digits')
1853            else:
1854                raise ValueError('Invalid name')
1855
1856        def cache_validate(section, name, value=None, check_value=True):
1857            if section not in ['cache', ]:
1858                raise ValueError('Invalid section')
1859            if name in ['previous_location', ]:
1860                if check_value:
1861                    Location(value)
1862            else:
1863                raise ValueError('Invalid name')
1864
1865        def list_config(config):
1866            default_values = {
1867                'version': '1',
1868                'segments_per_dir': str(DEFAULT_SEGMENTS_PER_DIR),
1869                'max_segment_size': str(MAX_SEGMENT_SIZE_LIMIT),
1870                'additional_free_space': '0',
1871                'storage_quota': repository.storage_quota,
1872                'append_only': repository.append_only
1873            }
1874            print('[repository]')
1875            for key in ['version', 'segments_per_dir', 'max_segment_size',
1876                        'storage_quota', 'additional_free_space', 'append_only',
1877                        'id']:
1878                value = config.get('repository', key, fallback=False)
1879                if value is None:
1880                    value = default_values.get(key)
1881                    if value is None:
1882                        raise Error('The repository config is missing the %s key which has no default value' % key)
1883                print('%s = %s' % (key, value))
1884
1885        if not args.list:
1886            if args.name is None:
1887                self.print_error('No config key name was provided.')
1888                return self.exit_code
1889
1890            try:
1891                section, name = args.name.split('.')
1892            except ValueError:
1893                section = args.cache and "cache" or "repository"
1894                name = args.name
1895
1896        if args.cache:
1897            manifest, key = Manifest.load(repository, (Manifest.Operation.WRITE,))
1898            assert_secure(repository, manifest, self.lock_wait)
1899            cache = Cache(repository, key, manifest, lock_wait=self.lock_wait)
1900
1901        try:
1902            if args.cache:
1903                cache.cache_config.load()
1904                config = cache.cache_config._config
1905                save = cache.cache_config.save
1906                validate = cache_validate
1907            else:
1908                config = repository.config
1909                save = lambda: repository.save_config(repository.path, repository.config)
1910                validate = repo_validate
1911
1912            if args.delete:
1913                validate(section, name, check_value=False)
1914                config.remove_option(section, name)
1915                if len(config.options(section)) == 0:
1916                    config.remove_section(section)
1917                save()
1918            elif args.list:
1919                list_config(config)
1920            elif args.value:
1921                validate(section, name, args.value)
1922                if section not in config.sections():
1923                    config.add_section(section)
1924                config.set(section, name, args.value)
1925                save()
1926            else:
1927                try:
1928                    print(config.get(section, name))
1929                except (configparser.NoOptionError, configparser.NoSectionError) as e:
1930                    print(e, file=sys.stderr)
1931                    return EXIT_WARNING
1932            return EXIT_SUCCESS
1933        finally:
1934            if args.cache:
1935                cache.close()
1936
1937    def do_debug_info(self, args):
1938        """display system information for debugging / bug reports"""
1939        print(sysinfo())
1940
1941        # Additional debug information
1942        print('CRC implementation:', crc32.__name__)
1943        print('Process ID:', get_process_id())
1944        return EXIT_SUCCESS
1945
1946    @with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
1947    def do_debug_dump_archive_items(self, args, repository, manifest, key):
1948        """dump (decrypted, decompressed) archive items metadata (not: data)"""
1949        archive = Archive(repository, key, manifest, args.location.archive,
1950                          consider_part_files=args.consider_part_files)
1951        for i, item_id in enumerate(archive.metadata.items):
1952            data = key.decrypt(item_id, repository.get(item_id))
1953            filename = '%06d_%s.items' % (i, bin_to_hex(item_id))
1954            print('Dumping', filename)
1955            with open(filename, 'wb') as fd:
1956                fd.write(data)
1957        print('Done.')
1958        return EXIT_SUCCESS
1959
1960    @with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
1961    def do_debug_dump_archive(self, args, repository, manifest, key):
1962        """dump decoded archive metadata (not: data)"""
1963
1964        try:
1965            archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
1966        except KeyError:
1967            raise Archive.DoesNotExist(args.location.archive)
1968
1969        indent = 4
1970
1971        def do_indent(d):
1972            return textwrap.indent(json.dumps(d, indent=indent), prefix=' ' * indent)
1973
1974        def output(fd):
1975            # this outputs megabytes of data for a modest sized archive, so some manual streaming json output
1976            fd.write('{\n')
1977            fd.write('    "_name": ' + json.dumps(args.location.archive) + ",\n")
1978            fd.write('    "_manifest_entry":\n')
1979            fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
1980            fd.write(',\n')
1981
1982            data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
1983            archive_org_dict = msgpack.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')
1984
1985            fd.write('    "_meta":\n')
1986            fd.write(do_indent(prepare_dump_dict(archive_org_dict)))
1987            fd.write(',\n')
1988            fd.write('    "_items": [\n')
1989
1990            unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
1991            first = True
1992            for item_id in archive_org_dict[b'items']:
1993                data = key.decrypt(item_id, repository.get(item_id))
1994                unpacker.feed(data)
1995                for item in unpacker:
1996                    item = prepare_dump_dict(item)
1997                    if first:
1998                        first = False
1999                    else:
2000                        fd.write(',\n')
2001                    fd.write(do_indent(item))
2002
2003            fd.write('\n')
2004            fd.write('    ]\n}\n')
2005
2006        with dash_open(args.path, 'w') as fd:
2007            output(fd)
2008        return EXIT_SUCCESS
2009
2010    @with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
2011    def do_debug_dump_manifest(self, args, repository, manifest, key):
2012        """dump decoded repository manifest"""
2013
2014        data = key.decrypt(None, repository.get(manifest.MANIFEST_ID))
2015
2016        meta = prepare_dump_dict(msgpack_fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape'))
2017
2018        with dash_open(args.path, 'w') as fd:
2019            json.dump(meta, fd, indent=4)
2020        return EXIT_SUCCESS
2021
2022    @with_repository(manifest=False)
2023    def do_debug_dump_repo_objs(self, args, repository):
2024        """dump (decrypted, decompressed) repo objects, repo index MUST be current/correct"""
2025        from .crypto.key import key_factory
2026
2027        def decrypt_dump(i, id, cdata, tag=None, segment=None, offset=None):
2028            if cdata is not None:
2029                give_id = id if id != Manifest.MANIFEST_ID else None
2030                data = key.decrypt(give_id, cdata)
2031            else:
2032                data = b''
2033            tag_str = '' if tag is None else '_' + tag
2034            segment_str = '_' + str(segment) if segment is not None else ''
2035            offset_str = '_' + str(offset) if offset is not None else ''
2036            id_str = '_' + bin_to_hex(id) if id is not None else ''
2037            filename = '%08d%s%s%s%s.obj' % (i, segment_str, offset_str, tag_str, id_str)
2038            print('Dumping', filename)
2039            with open(filename, 'wb') as fd:
2040                fd.write(data)
2041
2042        if args.ghost:
2043            # dump ghosty stuff from segment files: not yet committed objects, deleted / superseded objects, commit tags
2044
2045            # set up the key without depending on a manifest obj
2046            for id, cdata, tag, segment, offset in repository.scan_low_level():
2047                if tag == TAG_PUT:
2048                    key = key_factory(repository, cdata)
2049                    break
2050            i = 0
2051            for id, cdata, tag, segment, offset in repository.scan_low_level():
2052                if tag == TAG_PUT:
2053                    decrypt_dump(i, id, cdata, tag='put', segment=segment, offset=offset)
2054                elif tag == TAG_DELETE:
2055                    decrypt_dump(i, id, None, tag='del', segment=segment, offset=offset)
2056                elif tag == TAG_COMMIT:
2057                    decrypt_dump(i, None, None, tag='commit', segment=segment, offset=offset)
2058                i += 1
2059        else:
2060            # set up the key without depending on a manifest obj
2061            ids = repository.list(limit=1, marker=None)
2062            cdata = repository.get(ids[0])
2063            key = key_factory(repository, cdata)
2064            marker = None
2065            i = 0
2066            while True:
2067                result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker)  # must use on-disk order scanning here
2068                if not result:
2069                    break
2070                marker = result[-1]
2071                for id in result:
2072                    cdata = repository.get(id)
2073                    decrypt_dump(i, id, cdata)
2074                    i += 1
2075        print('Done.')
2076        return EXIT_SUCCESS
2077
2078    @with_repository(manifest=False)
2079    def do_debug_search_repo_objs(self, args, repository):
2080        """search for byte sequences in repo objects, repo index MUST be current/correct"""
2081        context = 32
2082
2083        def print_finding(info, wanted, data, offset):
2084            before = data[offset - context:offset]
2085            after = data[offset + len(wanted):offset + len(wanted) + context]
2086            print('%s: %s %s %s == %r %r %r' % (info, before.hex(), wanted.hex(), after.hex(),
2087                                                before, wanted, after))
2088
2089        wanted = args.wanted
2090        try:
2091            if wanted.startswith('hex:'):
2092                wanted = unhexlify(wanted[4:])
2093            elif wanted.startswith('str:'):
2094                wanted = wanted[4:].encode('utf-8')
2095            else:
2096                raise ValueError('unsupported search term')
2097        except (ValueError, UnicodeEncodeError):
2098            wanted = None
2099        if not wanted:
2100            self.print_error('search term needs to be hex:123abc or str:foobar style')
2101            return EXIT_ERROR
2102
2103        from .crypto.key import key_factory
2104        # set up the key without depending on a manifest obj
2105        ids = repository.list(limit=1, marker=None)
2106        cdata = repository.get(ids[0])
2107        key = key_factory(repository, cdata)
2108
2109        marker = None
2110        last_data = b''
2111        last_id = None
2112        i = 0
2113        while True:
2114            result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker)  # must use on-disk order scanning here
2115            if not result:
2116                break
2117            marker = result[-1]
2118            for id in result:
2119                cdata = repository.get(id)
2120                give_id = id if id != Manifest.MANIFEST_ID else None
2121                data = key.decrypt(give_id, cdata)
2122
2123                # try to locate wanted sequence crossing the border of last_data and data
2124                boundary_data = last_data[-(len(wanted) - 1):] + data[:len(wanted) - 1]
2125                if wanted in boundary_data:
2126                    boundary_data = last_data[-(len(wanted) - 1 + context):] + data[:len(wanted) - 1 + context]
2127                    offset = boundary_data.find(wanted)
2128                    info = '%d %s | %s' % (i, last_id.hex(), id.hex())
2129                    print_finding(info, wanted, boundary_data, offset)
2130
2131                # try to locate wanted sequence in data
2132                count = data.count(wanted)
2133                if count:
2134                    offset = data.find(wanted)  # only determine first occurrence's offset
2135                    info = "%d %s #%d" % (i, id.hex(), count)
2136                    print_finding(info, wanted, data, offset)
2137
2138                last_id, last_data = id, data
2139                i += 1
2140                if i % 10000 == 0:
2141                    print('%d objects processed.' % i)
2142        print('Done.')
2143        return EXIT_SUCCESS
2144
2145    @with_repository(manifest=False)
2146    def do_debug_get_obj(self, args, repository):
2147        """get object contents from the repository and write it into file"""
2148        hex_id = args.id
2149        try:
2150            id = unhexlify(hex_id)
2151        except ValueError:
2152            print("object id %s is invalid." % hex_id)
2153        else:
2154            try:
2155                data = repository.get(id)
2156            except Repository.ObjectNotFound:
2157                print("object %s not found." % hex_id)
2158            else:
2159                with open(args.path, "wb") as f:
2160                    f.write(data)
2161                print("object %s fetched." % hex_id)
2162        return EXIT_SUCCESS
2163
2164    @with_repository(manifest=False, exclusive=True)
2165    def do_debug_put_obj(self, args, repository):
2166        """put file(s) contents into the repository"""
2167        for path in args.paths:
2168            with open(path, "rb") as f:
2169                data = f.read()
2170            h = hashlib.sha256(data)  # XXX hardcoded
2171            repository.put(h.digest(), data)
2172            print("object %s put." % h.hexdigest())
2173        repository.commit()
2174        return EXIT_SUCCESS
2175
2176    @with_repository(manifest=False, exclusive=True)
2177    def do_debug_delete_obj(self, args, repository):
2178        """delete the objects with the given IDs from the repo"""
2179        modified = False
2180        for hex_id in args.ids:
2181            try:
2182                id = unhexlify(hex_id)
2183            except ValueError:
2184                print("object id %s is invalid." % hex_id)
2185            else:
2186                try:
2187                    repository.delete(id)
2188                    modified = True
2189                    print("object %s deleted." % hex_id)
2190                except Repository.ObjectNotFound:
2191                    print("object %s not found." % hex_id)
2192        if modified:
2193            repository.commit()
2194        print('Done.')
2195        return EXIT_SUCCESS
2196
2197    @with_repository(manifest=False, exclusive=True, cache=True, compatibility=Manifest.NO_OPERATION_CHECK)
2198    def do_debug_refcount_obj(self, args, repository, manifest, key, cache):
2199        """display refcounts for the objects with the given IDs"""
2200        for hex_id in args.ids:
2201            try:
2202                id = unhexlify(hex_id)
2203            except ValueError:
2204                print("object id %s is invalid." % hex_id)
2205            else:
2206                try:
2207                    refcount = cache.chunks[id][0]
2208                    print("object %s has %d referrers [info from chunks cache]." % (hex_id, refcount))
2209                except KeyError:
2210                    print("object %s not found [info from chunks cache]." % hex_id)
2211        return EXIT_SUCCESS
2212
2213    @with_repository(manifest=False, exclusive=True)
2214    def do_debug_dump_hints(self, args, repository):
2215        """dump repository hints"""
2216        if not repository._active_txn:
2217            repository.prepare_txn(repository.get_transaction_id())
2218        try:
2219            hints = dict(
2220                segments=repository.segments,
2221                compact=repository.compact,
2222                storage_quota_use=repository.storage_quota_use,
2223            )
2224            with dash_open(args.path, 'w') as fd:
2225                json.dump(hints, fd, indent=4)
2226        finally:
2227            repository.rollback()
2228        return EXIT_SUCCESS
2229
2230    def do_debug_convert_profile(self, args):
2231        """convert Borg profile to Python profile"""
2232        import marshal
2233        with args.output, args.input:
2234            marshal.dump(msgpack.unpack(args.input, use_list=False, encoding='utf-8'), args.output)
2235        return EXIT_SUCCESS
2236
2237    @with_repository(lock=False, manifest=False)
2238    def do_break_lock(self, args, repository):
2239        """Break the repository lock (e.g. in case it was left by a dead borg."""
2240        repository.break_lock()
2241        Cache.break_lock(repository)
2242        return self.exit_code
2243
2244    helptext = collections.OrderedDict()
2245    helptext['patterns'] = textwrap.dedent('''
2246        The path/filenames used as input for the pattern matching start from the
2247        currently active recursion root. You usually give the recursion root(s)
2248        when invoking borg and these can be either relative or absolute paths.
2249
2250        So, when you give `relative/` as root, the paths going into the matcher
2251        will look like `relative/.../file.ext`. When you give `/absolute/` as
2252        root, they will look like `/absolute/.../file.ext`. This is meant when
2253        we talk about "full path" below.
2254
2255        File paths in Borg archives are always stored normalized and relative.
2256        This means that e.g. ``borg create /path/to/repo ../some/path`` will
2257        store all files as `some/path/.../file.ext` and ``borg create
2258        /path/to/repo /home/user`` will store all files as
2259        `home/user/.../file.ext`. Therefore, always use relative paths in your
2260        patterns when matching archive content in commands like ``extract`` or
2261        ``mount``. Starting with Borg 1.2 this behaviour will be changed to
2262        accept both absolute and relative paths.
2263
2264        File patterns support these styles: fnmatch, shell, regular expressions,
2265        path prefixes and path full-matches. By default, fnmatch is used for
2266        ``--exclude`` patterns and shell-style is used for the experimental
2267        ``--pattern`` option.
2268
2269        If followed by a colon (':') the first two characters of a pattern are
2270        used as a style selector. Explicit style selection is necessary when a
2271        non-default style is desired or when the desired pattern starts with
2272        two alphanumeric characters followed by a colon (i.e. `aa:something/*`).
2273
2274        `Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
2275            This is the default style for ``--exclude`` and ``--exclude-from``.
2276            These patterns use a variant of shell pattern syntax, with '\\*' matching
2277            any number of characters, '?' matching any single character, '[...]'
2278            matching any single character specified, including ranges, and '[!...]'
2279            matching any character not specified. For the purpose of these patterns,
2280            the path separator (backslash for Windows and '/' on other systems) is not
2281            treated specially. Wrap meta-characters in brackets for a literal
2282            match (i.e. `[?]` to match the literal character `?`). For a path
2283            to match a pattern, the full path must match, or it must match
2284            from the start of the full path to just before a path separator. Except
2285            for the root path, paths will never end in the path separator when
2286            matching is attempted.  Thus, if a given pattern ends in a path
2287            separator, a '\\*' is appended before matching is attempted.
2288
2289        Shell-style patterns, selector `sh:`
2290            This is the default style for ``--pattern`` and ``--patterns-from``.
2291            Like fnmatch patterns these are similar to shell patterns. The difference
2292            is that the pattern may include `**/` for matching zero or more directory
2293            levels, `*` for matching zero or more arbitrary characters with the
2294            exception of any path separator.
2295
2296        Regular expressions, selector `re:`
2297            Regular expressions similar to those found in Perl are supported. Unlike
2298            shell patterns regular expressions are not required to match the full
2299            path and any substring match is sufficient. It is strongly recommended to
2300            anchor patterns to the start ('^'), to the end ('$') or both. Path
2301            separators (backslash for Windows and '/' on other systems) in paths are
2302            always normalized to a forward slash ('/') before applying a pattern. The
2303            regular expression syntax is described in the `Python documentation for
2304            the re module <https://docs.python.org/3/library/re.html>`_.
2305
2306        Path prefix, selector `pp:`
2307            This pattern style is useful to match whole sub-directories. The pattern
2308            `pp:root/somedir` matches `root/somedir` and everything therein.
2309
2310        Path full-match, selector `pf:`
2311            This pattern style is (only) useful to match full paths.
2312            This is kind of a pseudo pattern as it can not have any variable or
2313            unspecified parts - the full path must be given.
2314            `pf:root/file.ext` matches `root/file.ext` only.
2315
2316            Implementation note: this is implemented via very time-efficient O(1)
2317            hashtable lookups (this means you can have huge amounts of such patterns
2318            without impacting performance much).
2319            Due to that, this kind of pattern does not respect any context or order.
2320            If you use such a pattern to include a file, it will always be included
2321            (if the directory recursion encounters it).
2322            Other include/exclude patterns that would normally match will be ignored.
2323            Same logic applies for exclude.
2324
2325        .. note::
2326
2327            `re:`, `sh:` and `fm:` patterns are all implemented on top of the Python SRE
2328            engine. It is very easy to formulate patterns for each of these types which
2329            requires an inordinate amount of time to match paths. If untrusted users
2330            are able to supply patterns, ensure they cannot supply `re:` patterns.
2331            Further, ensure that `sh:` and `fm:` patterns only contain a handful of
2332            wildcards at most.
2333
2334        Exclusions can be passed via the command line option ``--exclude``. When used
2335        from within a shell, the patterns should be quoted to protect them from
2336        expansion.
2337
2338        The ``--exclude-from`` option permits loading exclusion patterns from a text
2339        file with one pattern per line. Lines empty or starting with the number sign
2340        ('#') after removing whitespace on both ends are ignored. The optional style
2341        selector prefix is also supported for patterns loaded from a file. Due to
2342        whitespace removal, paths with whitespace at the beginning or end can only be
2343        excluded using regular expressions.
2344
2345        To test your exclusion patterns without performing an actual backup you can
2346        run ``borg create --list --dry-run ...``.
2347
2348        Examples::
2349
2350            # Exclude '/home/user/file.o' but not '/home/user/file.odt':
2351            $ borg create -e '*.o' backup /
2352
2353            # Exclude '/home/user/junk' and '/home/user/subdir/junk' but
2354            # not '/home/user/importantjunk' or '/etc/junk':
2355            $ borg create -e '/home/*/junk' backup /
2356
2357            # Exclude the contents of '/home/user/cache' but not the directory itself:
2358            $ borg create -e /home/user/cache/ backup /
2359
2360            # The file '/home/user/cache/important' is *not* backed up:
2361            $ borg create -e /home/user/cache/ backup / /home/user/cache/important
2362
2363            # The contents of directories in '/home' are not backed up when their name
2364            # ends in '.tmp'
2365            $ borg create --exclude 're:^/home/[^/]+\\.tmp/' backup /
2366
2367            # Load exclusions from file
2368            $ cat >exclude.txt <<EOF
2369            # Comment line
2370            /home/*/junk
2371            *.tmp
2372            fm:aa:something/*
2373            re:^/home/[^/]+\\.tmp/
2374            sh:/home/*/.thumbnails
2375            # Example with spaces, no need to escape as it is processed by borg
2376            some file with spaces.txt
2377            EOF
2378            $ borg create --exclude-from exclude.txt backup /
2379
2380        .. container:: experimental
2381
2382            A more general and easier to use way to define filename matching patterns exists
2383            with the experimental ``--pattern`` and ``--patterns-from`` options. Using these, you
2384            may specify the backup roots (starting points) and patterns for inclusion/exclusion.
2385            A root path starts with the prefix `R`, followed by a path (a plain path, not a
2386            file pattern). An include rule starts with the prefix +, an exclude rule starts
2387            with the prefix -, an exclude-norecurse rule starts with !, all followed by a pattern.
2388
2389            .. note::
2390
2391                Via ``--pattern`` or ``--patterns-from`` you can define BOTH inclusion and exclusion
2392                of files using pattern prefixes ``+`` and ``-``. With ``--exclude`` and
2393                ``--exclude-from`` ONLY excludes are defined.
2394
2395            Inclusion patterns are useful to include paths that are contained in an excluded
2396            path. The first matching pattern is used so if an include pattern matches before
2397            an exclude pattern, the file is backed up. If an exclude-norecurse pattern matches
2398            a directory, it won't recurse into it and won't discover any potential matches for
2399            include rules below that directory.
2400
2401            Note that the default pattern style for ``--pattern`` and ``--patterns-from`` is
2402            shell style (`sh:`), so those patterns behave similar to rsync include/exclude
2403            patterns. The pattern style can be set via the `P` prefix.
2404
2405            Patterns (``--pattern``) and excludes (``--exclude``) from the command line are
2406            considered first (in the order of appearance). Then patterns from ``--patterns-from``
2407            are added. Exclusion patterns from ``--exclude-from`` files are appended last.
2408
2409            Examples::
2410
2411                # backup pics, but not the ones from 2018, except the good ones:
2412                # note: using = is essential to avoid cmdline argument parsing issues.
2413                borg create --pattern=+pics/2018/good --pattern=-pics/2018 repo::arch pics
2414
2415                # use a file with patterns:
2416                borg create --patterns-from patterns.lst repo::arch
2417
2418            The patterns.lst file could look like that::
2419
2420                # "sh:" pattern style is the default, so the following line is not needed:
2421                P sh
2422                R /
2423                # can be rebuild
2424                - /home/*/.cache
2425                # they're downloads for a reason
2426                - /home/*/Downloads
2427                # susan is a nice person
2428                # include susans home
2429                + /home/susan
2430                # don't backup the other home directories
2431                - /home/*
2432                # don't even look in /proc
2433                ! /proc\n\n''')
2434    helptext['placeholders'] = textwrap.dedent('''
2435        Repository (or Archive) URLs, ``--prefix``, ``--glob-archives``, ``--comment``
2436        and ``--remote-path`` values support these placeholders:
2437
2438        {hostname}
2439            The (short) hostname of the machine.
2440
2441        {fqdn}
2442            The full name of the machine.
2443
2444        {reverse-fqdn}
2445            The full name of the machine in reverse domain name notation.
2446
2447        {now}
2448            The current local date and time, by default in ISO-8601 format.
2449            You can also supply your own `format string <https://docs.python.org/3.7/library/datetime.html#strftime-and-strptime-behavior>`_, e.g. {now:%Y-%m-%d_%H:%M:%S}
2450
2451        {utcnow}
2452            The current UTC date and time, by default in ISO-8601 format.
2453            You can also supply your own `format string <https://docs.python.org/3.7/library/datetime.html#strftime-and-strptime-behavior>`_, e.g. {utcnow:%Y-%m-%d_%H:%M:%S}
2454
2455        {user}
2456            The user name (or UID, if no name is available) of the user running borg.
2457
2458        {pid}
2459            The current process ID.
2460
2461        {borgversion}
2462            The version of borg, e.g.: 1.0.8rc1
2463
2464        {borgmajor}
2465            The version of borg, only the major version, e.g.: 1
2466
2467        {borgminor}
2468            The version of borg, only major and minor version, e.g.: 1.0
2469
2470        {borgpatch}
2471            The version of borg, only major, minor and patch version, e.g.: 1.0.8
2472
2473        If literal curly braces need to be used, double them for escaping::
2474
2475            borg create /path/to/repo::{{literal_text}}
2476
2477        Examples::
2478
2479            borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
2480            borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
2481            borg prune --prefix '{hostname}-' ...
2482
2483        .. note::
2484            systemd uses a difficult, non-standard syntax for command lines in unit files (refer to
2485            the `systemd.unit(5)` manual page).
2486
2487            When invoking borg from unit files, pay particular attention to escaping,
2488            especially when using the now/utcnow placeholders, since systemd performs its own
2489            %-based variable replacement even in quoted text. To avoid interference from systemd,
2490            double all percent signs (``{hostname}-{now:%Y-%m-%d_%H:%M:%S}``
2491            becomes ``{hostname}-{now:%%Y-%%m-%%d_%%H:%%M:%%S}``).\n\n''')
2492    helptext['compression'] = textwrap.dedent('''
2493        It is no problem to mix different compression methods in one repo,
2494        deduplication is done on the source data chunks (not on the compressed
2495        or encrypted data).
2496
2497        If some specific chunk was once compressed and stored into the repo, creating
2498        another backup that also uses this chunk will not change the stored chunk.
2499        So if you use different compression specs for the backups, whichever stores a
2500        chunk first determines its compression. See also borg recreate.
2501
2502        Compression is lz4 by default. If you want something else, you have to specify what you want.
2503
2504        Valid compression specifiers are:
2505
2506        none
2507            Do not compress.
2508
2509        lz4
2510            Use lz4 compression. Very high speed, very low compression. (default)
2511
2512        zstd[,L]
2513            Use zstd ("zstandard") compression, a modern wide-range algorithm.
2514            If you do not explicitely give the compression level L (ranging from 1
2515            to 22), it will use level 3.
2516            Archives compressed with zstd are not compatible with borg < 1.1.4.
2517
2518        zlib[,L]
2519            Use zlib ("gz") compression. Medium speed, medium compression.
2520            If you do not explicitely give the compression level L (ranging from 0
2521            to 9), it will use level 6.
2522            Giving level 0 (means "no compression", but still has zlib protocol
2523            overhead) is usually pointless, you better use "none" compression.
2524
2525        lzma[,L]
2526            Use lzma ("xz") compression. Low speed, high compression.
2527            If you do not explicitely give the compression level L (ranging from 0
2528            to 9), it will use level 6.
2529            Giving levels above 6 is pointless and counterproductive because it does
2530            not compress better due to the buffer size used by borg - but it wastes
2531            lots of CPU cycles and RAM.
2532
2533        auto,C[,L]
2534            Use a built-in heuristic to decide per chunk whether to compress or not.
2535            The heuristic tries with lz4 whether the data is compressible.
2536            For incompressible data, it will not use compression (uses "none").
2537            For compressible data, it uses the given C[,L] compression - with C[,L]
2538            being any valid compression specifier.
2539
2540        Examples::
2541
2542            borg create --compression lz4 REPO::ARCHIVE data
2543            borg create --compression zstd REPO::ARCHIVE data
2544            borg create --compression zstd,10 REPO::ARCHIVE data
2545            borg create --compression zlib REPO::ARCHIVE data
2546            borg create --compression zlib,1 REPO::ARCHIVE data
2547            borg create --compression auto,lzma,6 REPO::ARCHIVE data
2548            borg create --compression auto,lzma ...\n\n''')
2549
2550    def do_help(self, parser, commands, args):
2551        if not args.topic:
2552            parser.print_help()
2553        elif args.topic in self.helptext:
2554            print(rst_to_terminal(self.helptext[args.topic]))
2555        elif args.topic in commands:
2556            if args.epilog_only:
2557                print(commands[args.topic].epilog)
2558            elif args.usage_only:
2559                commands[args.topic].epilog = None
2560                commands[args.topic].print_help()
2561            else:
2562                commands[args.topic].print_help()
2563        else:
2564            msg_lines = []
2565            msg_lines += ['No help available on %s.' % args.topic]
2566            msg_lines += ['Try one of the following:']
2567            msg_lines += ['    Commands: %s' % ', '.join(sorted(commands.keys()))]
2568            msg_lines += ['    Topics: %s' % ', '.join(sorted(self.helptext.keys()))]
2569            parser.error('\n'.join(msg_lines))
2570        return self.exit_code
2571
2572    def do_subcommand_help(self, parser, args):
2573        """display infos about subcommand"""
2574        parser.print_help()
2575        return EXIT_SUCCESS
2576
2577    do_maincommand_help = do_subcommand_help
2578
2579    def preprocess_args(self, args):
2580        deprecations = [
2581            # ('--old', '--new' or None, 'Warning: "--old" has been deprecated. Use "--new" instead.'),
2582            ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'),
2583            ('--keep-tag-files', '--keep-exclude-tags', 'Warning: "--keep-tag-files" has been deprecated. Use "--keep-exclude-tags" instead.'),
2584            ('--ignore-inode', None, 'Warning: "--ignore-inode" has been deprecated. Use "--files-cache=ctime,size" or "...=mtime,size" instead.'),
2585            ('--no-files-cache', None, 'Warning: "--no-files-cache" has been deprecated. Use "--files-cache=disabled" instead.'),
2586        ]
2587        for i, arg in enumerate(args[:]):
2588            for old_name, new_name, warning in deprecations:
2589                if arg.startswith(old_name):
2590                    if new_name is not None:
2591                        args[i] = arg.replace(old_name, new_name)
2592                    print(warning, file=sys.stderr)
2593        return args
2594
2595    class CommonOptions:
2596        """
2597        Support class to allow specifying common options directly after the top-level command.
2598
2599        Normally options can only be specified on the parser defining them, which means
2600        that generally speaking *all* options go after all sub-commands. This is annoying
2601        for common options in scripts, e.g. --remote-path or logging options.
2602
2603        This class allows adding the same set of options to both the top-level parser
2604        and the final sub-command parsers (but not intermediary sub-commands, at least for now).
2605
2606        It does so by giving every option's target name ("dest") a suffix indicating its level
2607        -- no two options in the parser hierarchy can have the same target --
2608        then, after parsing the command line, multiple definitions are resolved.
2609
2610        Defaults are handled by only setting them on the top-level parser and setting
2611        a sentinel object in all sub-parsers, which then allows to discern which parser
2612        supplied the option.
2613        """
2614
2615        def __init__(self, define_common_options, suffix_precedence):
2616            """
2617            *define_common_options* should be a callable taking one argument, which
2618            will be a argparse.Parser.add_argument-like function.
2619
2620            *define_common_options* will be called multiple times, and should call
2621            the passed function to define common options exactly the same way each time.
2622
2623            *suffix_precedence* should be a tuple of the suffixes that will be used.
2624            It is ordered from lowest precedence to highest precedence:
2625            An option specified on the parser belonging to index 0 is overridden if the
2626            same option is specified on any parser with a higher index.
2627            """
2628            self.define_common_options = define_common_options
2629            self.suffix_precedence = suffix_precedence
2630
2631            # Maps suffixes to sets of target names.
2632            # E.g. common_options["_subcommand"] = {..., "log_level", ...}
2633            self.common_options = dict()
2634            # Set of options with the 'append' action.
2635            self.append_options = set()
2636            # This is the sentinel object that replaces all default values in parsers
2637            # below the top-level parser.
2638            self.default_sentinel = object()
2639
2640        def add_common_group(self, parser, suffix, provide_defaults=False):
2641            """
2642            Add common options to *parser*.
2643
2644            *provide_defaults* must only be True exactly once in a parser hierarchy,
2645            at the top level, and False on all lower levels. The default is chosen
2646            accordingly.
2647
2648            *suffix* indicates the suffix to use internally. It also indicates
2649            which precedence the *parser* has for common options. See *suffix_precedence*
2650            of __init__.
2651            """
2652            assert suffix in self.suffix_precedence
2653
2654            def add_argument(*args, **kwargs):
2655                if 'dest' in kwargs:
2656                    kwargs.setdefault('action', 'store')
2657                    assert kwargs['action'] in ('help', 'store_const', 'store_true', 'store_false', 'store', 'append')
2658                    is_append = kwargs['action'] == 'append'
2659                    if is_append:
2660                        self.append_options.add(kwargs['dest'])
2661                        assert kwargs['default'] == [], 'The default is explicitly constructed as an empty list in resolve()'
2662                    else:
2663                        self.common_options.setdefault(suffix, set()).add(kwargs['dest'])
2664                    kwargs['dest'] += suffix
2665                    if not provide_defaults:
2666                        # Interpolate help now, in case the %(default)d (or so) is mentioned,
2667                        # to avoid producing incorrect help output.
2668                        # Assumption: Interpolated output can safely be interpolated again,
2669                        # which should always be the case.
2670                        # Note: We control all inputs.
2671                        kwargs['help'] = kwargs['help'] % kwargs
2672                        if not is_append:
2673                            kwargs['default'] = self.default_sentinel
2674
2675                common_group.add_argument(*args, **kwargs)
2676
2677            common_group = parser.add_argument_group('Common options')
2678            self.define_common_options(add_argument)
2679
2680        def resolve(self, args: argparse.Namespace):  # Namespace has "in" but otherwise is not like a dict.
2681            """
2682            Resolve the multiple definitions of each common option to the final value.
2683            """
2684            for suffix in self.suffix_precedence:
2685                # From highest level to lowest level, so the "most-specific" option wins, e.g.
2686                # "borg --debug create --info" shall result in --info being effective.
2687                for dest in self.common_options.get(suffix, []):
2688                    # map_from is this suffix' option name, e.g. log_level_subcommand
2689                    # map_to is the target name, e.g. log_level
2690                    map_from = dest + suffix
2691                    map_to = dest
2692                    # Retrieve value; depending on the action it may not exist, but usually does
2693                    # (store_const/store_true/store_false), either because the action implied a default
2694                    # or a default is explicitly supplied.
2695                    # Note that defaults on lower levels are replaced with default_sentinel.
2696                    # Only the top level has defaults.
2697                    value = getattr(args, map_from, self.default_sentinel)
2698                    if value is not self.default_sentinel:
2699                        # value was indeed specified on this level. Transfer value to target,
2700                        # and un-clobber the args (for tidiness - you *cannot* use the suffixed
2701                        # names for other purposes, obviously).
2702                        setattr(args, map_to, value)
2703                    try:
2704                        delattr(args, map_from)
2705                    except AttributeError:
2706                        pass
2707
2708            # Options with an "append" action need some special treatment. Instead of
2709            # overriding values, all specified values are merged together.
2710            for dest in self.append_options:
2711                option_value = []
2712                for suffix in self.suffix_precedence:
2713                    # Find values of this suffix, if any, and add them to the final list
2714                    extend_from = dest + suffix
2715                    if extend_from in args:
2716                        values = getattr(args, extend_from)
2717                        delattr(args, extend_from)
2718                        option_value.extend(values)
2719                setattr(args, dest, option_value)
2720
2721    def build_parser(self):
2722        # You can use :ref:`xyz` in the following usage pages. However, for plain-text view,
2723        # e.g. through "borg ... --help", define a substitution for the reference here.
2724        # It will replace the entire :ref:`foo` verbatim.
2725        rst_plain_text_references = {
2726            'a_status_oddity': '"I am seeing ‘A’ (added) status for a unchanged file!?"',
2727            'list_item_flags': '"Item flags"',
2728        }
2729
2730        def process_epilog(epilog):
2731            epilog = textwrap.dedent(epilog).splitlines()
2732            try:
2733                mode = borg.doc_mode
2734            except AttributeError:
2735                mode = 'command-line'
2736            if mode in ('command-line', 'build_usage'):
2737                epilog = [line for line in epilog if not line.startswith('.. man')]
2738            epilog = '\n'.join(epilog)
2739            if mode == 'command-line':
2740                epilog = rst_to_terminal(epilog, rst_plain_text_references)
2741            return epilog
2742
2743        def define_common_options(add_common_option):
2744            add_common_option('-h', '--help', action='help', help='show this help message and exit')
2745            add_common_option('--critical', dest='log_level',
2746                              action='store_const', const='critical', default='warning',
2747                              help='work on log level CRITICAL')
2748            add_common_option('--error', dest='log_level',
2749                              action='store_const', const='error', default='warning',
2750                              help='work on log level ERROR')
2751            add_common_option('--warning', dest='log_level',
2752                              action='store_const', const='warning', default='warning',
2753                              help='work on log level WARNING (default)')
2754            add_common_option('--info', '-v', '--verbose', dest='log_level',
2755                              action='store_const', const='info', default='warning',
2756                              help='work on log level INFO')
2757            add_common_option('--debug', dest='log_level',
2758                              action='store_const', const='debug', default='warning',
2759                              help='enable debug output, work on log level DEBUG')
2760            add_common_option('--debug-topic', metavar='TOPIC', dest='debug_topics', action='append', default=[],
2761                              help='enable TOPIC debugging (can be specified multiple times). '
2762                                   'The logger path is borg.debug.<TOPIC> if TOPIC is not fully qualified.')
2763            add_common_option('-p', '--progress', dest='progress', action='store_true',
2764                              help='show progress information')
2765            add_common_option('--log-json', dest='log_json', action='store_true',
2766                              help='Output one JSON object per log line instead of formatted text.')
2767            add_common_option('--lock-wait', metavar='SECONDS', dest='lock_wait', type=int, default=1,
2768                              help='wait at most SECONDS for acquiring a repository/cache lock (default: %(default)d).')
2769            add_common_option('--bypass-lock', dest='lock', action='store_false',
2770                              default=argparse.SUPPRESS,  # only create args attribute if option is specified
2771                              help='Bypass locking mechanism')
2772            add_common_option('--show-version', dest='show_version', action='store_true',
2773                              help='show/log the borg version')
2774            add_common_option('--show-rc', dest='show_rc', action='store_true',
2775                              help='show/log the return code (rc)')
2776            add_common_option('--umask', metavar='M', dest='umask', type=lambda s: int(s, 8), default=UMASK_DEFAULT,
2777                              help='set umask to M (local and remote, default: %(default)04o)')
2778            add_common_option('--remote-path', metavar='PATH', dest='remote_path',
2779                              help='use PATH as borg executable on the remote (default: "borg")')
2780            add_common_option('--remote-ratelimit', metavar='RATE', dest='remote_ratelimit', type=int,
2781                              help='set remote network upload rate limit in kiByte/s (default: 0=unlimited)')
2782            add_common_option('--consider-part-files', dest='consider_part_files', action='store_true',
2783                              help='treat part files like normal files (e.g. to list/extract them)')
2784            add_common_option('--debug-profile', metavar='FILE', dest='debug_profile', default=None,
2785                              help='Write execution profile in Borg format into FILE. For local use a Python-'
2786                                   'compatible file can be generated by suffixing FILE with ".pyprof".')
2787            add_common_option('--rsh', metavar='RSH', dest='rsh',
2788                              help="Use this command to connect to the 'borg serve' process (default: 'ssh')")
2789
2790        def define_exclude_and_patterns(add_option, *, tag_files=False, strip_components=False):
2791            add_option('-e', '--exclude', metavar='PATTERN', dest='patterns',
2792                       type=parse_exclude_pattern, action='append',
2793                       help='exclude paths matching PATTERN')
2794            add_option('--exclude-from', metavar='EXCLUDEFILE', action=ArgparseExcludeFileAction,
2795                       help='read exclude patterns from EXCLUDEFILE, one per line')
2796            add_option('--pattern', metavar='PATTERN', action=ArgparsePatternAction,
2797                       help='experimental: include/exclude paths matching PATTERN')
2798            add_option('--patterns-from', metavar='PATTERNFILE', action=ArgparsePatternFileAction,
2799                       help='experimental: read include/exclude patterns from PATTERNFILE, one per line')
2800
2801            if tag_files:
2802                add_option('--exclude-caches', dest='exclude_caches', action='store_true',
2803                           help='exclude directories that contain a CACHEDIR.TAG file '
2804                                '(http://www.bford.info/cachedir/spec.html)')
2805                add_option('--exclude-if-present', metavar='NAME', dest='exclude_if_present',
2806                           action='append', type=str,
2807                           help='exclude directories that are tagged by containing a filesystem object with '
2808                                'the given NAME')
2809                add_option('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags',
2810                           action='store_true',
2811                           help='if tag objects are specified with ``--exclude-if-present``, '
2812                                'don\'t omit the tag objects themselves from the backup archive')
2813
2814            if strip_components:
2815                add_option('--strip-components', metavar='NUMBER', dest='strip_components', type=int, default=0,
2816                           help='Remove the specified number of leading path elements. '
2817                                'Paths with fewer elements will be silently skipped.')
2818
2819        def define_exclusion_group(subparser, **kwargs):
2820            exclude_group = subparser.add_argument_group('Exclusion options')
2821            define_exclude_and_patterns(exclude_group.add_argument, **kwargs)
2822            return exclude_group
2823
2824        def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
2825            filters_group = subparser.add_argument_group('Archive filters',
2826                                                         'Archive filters can be applied to repository targets.')
2827            group = filters_group.add_mutually_exclusive_group()
2828            group.add_argument('-P', '--prefix', metavar='PREFIX', dest='prefix', type=PrefixSpec, default=None,
2829                               help='only consider archive names starting with this prefix.')
2830            group.add_argument('-a', '--glob-archives', metavar='GLOB', dest='glob_archives',
2831                               type=GlobSpec, default=None,
2832                               help='only consider archive names matching the glob. '
2833                                    'sh: rules apply, see "borg help patterns". '
2834                                    '``--prefix`` and ``--glob-archives`` are mutually exclusive.')
2835
2836            if sort_by:
2837                sort_by_default = 'timestamp'
2838                filters_group.add_argument('--sort-by', metavar='KEYS', dest='sort_by',
2839                                           type=SortBySpec, default=sort_by_default,
2840                                           help='Comma-separated list of sorting keys; valid keys are: {}; default is: {}'
2841                                           .format(', '.join(HUMAN_SORT_KEYS), sort_by_default))
2842
2843            if first_last:
2844                group = filters_group.add_mutually_exclusive_group()
2845                group.add_argument('--first', metavar='N', dest='first', default=0, type=positive_int_validator,
2846                                   help='consider first N archives after other filters were applied')
2847                group.add_argument('--last', metavar='N', dest='last', default=0, type=positive_int_validator,
2848                                   help='consider last N archives after other filters were applied')
2849
2850        parser = argparse.ArgumentParser(prog=self.prog, description='Borg - Deduplicated Backups',
2851                                         add_help=False)
2852        # paths and patterns must have an empty list as default everywhere
2853        parser.set_defaults(fallback2_func=functools.partial(self.do_maincommand_help, parser),
2854                            paths=[], patterns=[])
2855        parser.common_options = self.CommonOptions(define_common_options,
2856                                                   suffix_precedence=('_maincommand', '_midcommand', '_subcommand'))
2857        parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__,
2858                            help='show version number and exit')
2859        parser.common_options.add_common_group(parser, '_maincommand', provide_defaults=True)
2860
2861        common_parser = argparse.ArgumentParser(add_help=False, prog=self.prog)
2862        common_parser.set_defaults(paths=[], patterns=[])
2863        parser.common_options.add_common_group(common_parser, '_subcommand')
2864
2865        mid_common_parser = argparse.ArgumentParser(add_help=False, prog=self.prog)
2866        mid_common_parser.set_defaults(paths=[], patterns=[])
2867        parser.common_options.add_common_group(mid_common_parser, '_midcommand')
2868
2869        mount_epilog = process_epilog("""
2870        This command mounts an archive as a FUSE filesystem. This can be useful for
2871        browsing an archive or restoring individual files. Unless the ``--foreground``
2872        option is given the command will run in the background until the filesystem
2873        is ``umounted``.
2874
2875        The command ``borgfs`` provides a wrapper for ``borg mount``. This can also be
2876        used in fstab entries:
2877        ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto 0 0``
2878
2879        To allow a regular user to use fstab entries, add the ``user`` option:
2880        ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``
2881
2882        For FUSE configuration and mount options, see the mount.fuse(8) manual page.
2883
2884        Additional mount options supported by borg:
2885
2886        - versions: when used with a repository mount, this gives a merged, versioned
2887          view of the files in the archives. EXPERIMENTAL, layout may change in future.
2888        - allow_damaged_files: by default damaged files (where missing chunks were
2889          replaced with runs of zeros by borg check ``--repair``) are not readable and
2890          return EIO (I/O error). Set this option to read such files.
2891        - ignore_permissions: for security reasons the "default_permissions" mount
2892          option is internally enforced by borg. "ignore_permissions" can be given to
2893          not enforce "default_permissions".
2894
2895        The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
2896        to tweak the performance. It sets the number of cached data chunks; additional
2897        memory usage can be up to ~8 MiB times this number. The default is the number
2898        of CPU cores.
2899
2900        When the daemonized process receives a signal or crashes, it does not unmount.
2901        Unmounting in these cases could cause an active rsync or similar process
2902        to unintentionally delete data.
2903
2904        When running in the foreground ^C/SIGINT unmounts cleanly, but other
2905        signals or crashes do not.
2906        """)
2907
2908        if parser.prog == 'borgfs':
2909            parser.description = self.do_mount.__doc__
2910            parser.epilog = mount_epilog
2911            parser.formatter_class = argparse.RawDescriptionHelpFormatter
2912            parser.help = 'mount repository'
2913            subparser = parser
2914        else:
2915            subparsers = parser.add_subparsers(title='required arguments', metavar='<command>')
2916            subparser = subparsers.add_parser('mount', parents=[common_parser], add_help=False,
2917                                            description=self.do_mount.__doc__,
2918                                            epilog=mount_epilog,
2919                                            formatter_class=argparse.RawDescriptionHelpFormatter,
2920                                            help='mount repository')
2921        subparser.set_defaults(func=self.do_mount)
2922        subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(),
2923                            help='repository or archive to mount')
2924        subparser.add_argument('mountpoint', metavar='MOUNTPOINT', type=str,
2925                            help='where to mount filesystem')
2926        subparser.add_argument('-f', '--foreground', dest='foreground',
2927                            action='store_true',
2928                            help='stay in foreground, do not daemonize')
2929        subparser.add_argument('-o', dest='options', type=str,
2930                            help='Extra mount options')
2931        define_archive_filters_group(subparser)
2932        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
2933                               help='paths to extract; patterns are supported')
2934        define_exclusion_group(subparser, strip_components=True)
2935        if parser.prog == 'borgfs':
2936            return parser
2937
2938        serve_epilog = process_epilog("""
2939        This command starts a repository server process. This command is usually not used manually.
2940        """)
2941        subparser = subparsers.add_parser('serve', parents=[common_parser], add_help=False,
2942                                          description=self.do_serve.__doc__, epilog=serve_epilog,
2943                                          formatter_class=argparse.RawDescriptionHelpFormatter,
2944                                          help='start repository server process')
2945        subparser.set_defaults(func=self.do_serve)
2946        subparser.add_argument('--restrict-to-path', metavar='PATH', dest='restrict_to_paths', action='append',
2947                               help='restrict repository access to PATH. '
2948                                    'Can be specified multiple times to allow the client access to several directories. '
2949                                    'Access to all sub-directories is granted implicitly; PATH doesn\'t need to directly point to a repository.')
2950        subparser.add_argument('--restrict-to-repository', metavar='PATH', dest='restrict_to_repositories', action='append',
2951                                help='restrict repository access. Only the repository located at PATH '
2952                                     '(no sub-directories are considered) is accessible. '
2953                                     'Can be specified multiple times to allow the client access to several repositories. '
2954                                     'Unlike ``--restrict-to-path`` sub-directories are not accessible; '
2955                                     'PATH needs to directly point at a repository location. '
2956                                     'PATH may be an empty directory or the last element of PATH may not exist, in which case '
2957                                     'the client may initialize a repository there.')
2958        subparser.add_argument('--append-only', dest='append_only', action='store_true',
2959                               help='only allow appending to repository segment files')
2960        subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota',
2961                               type=parse_storage_quota, default=None,
2962                               help='Override storage quota of the repository (e.g. 5G, 1.5T). '
2963                                    'When a new repository is initialized, sets the storage quota on the new '
2964                                    'repository as well. Default: no quota.')
2965
2966        init_epilog = process_epilog("""
2967        This command initializes an empty repository. A repository is a filesystem
2968        directory containing the deduplicated data from zero or more archives.
2969
2970        Encryption can be enabled at repository init time. It cannot be changed later.
2971
2972        It is not recommended to work without encryption. Repository encryption protects
2973        you e.g. against the case that an attacker has access to your backup repository.
2974
2975        Borg relies on randomly generated key material and uses that for chunking, id
2976        generation, encryption and authentication. The key material is encrypted using
2977        the passphrase you give before it is stored on-disk.
2978
2979        You need to be careful with the key / the passphrase:
2980
2981        If you want "passphrase-only" security, use one of the repokey modes. The
2982        key will be stored inside the repository (in its "config" file). In above
2983        mentioned attack scenario, the attacker will have the key (but not the
2984        passphrase).
2985
2986        If you want "passphrase and having-the-key" security, use one of the keyfile
2987        modes. The key will be stored in your home directory (in .config/borg/keys).
2988        In the attack scenario, the attacker who has just access to your repo won't
2989        have the key (and also not the passphrase).
2990
2991        Make a backup copy of the key file (keyfile mode) or repo config file
2992        (repokey mode) and keep it at a safe place, so you still have the key in
2993        case it gets corrupted or lost. Also keep the passphrase at a safe place.
2994        The backup that is encrypted with that key won't help you with that, of course.
2995
2996        Make sure you use a good passphrase. Not too short, not too simple. The real
2997        encryption / decryption key is encrypted with / locked by your passphrase.
2998        If an attacker gets your key, he can't unlock and use it without knowing the
2999        passphrase.
3000
3001        Be careful with special or non-ascii characters in your passphrase:
3002
3003        - Borg processes the passphrase as unicode (and encodes it as utf-8),
3004          so it does not have problems dealing with even the strangest characters.
3005        - BUT: that does not necessarily apply to your OS / VM / keyboard configuration.
3006
3007        So better use a long passphrase made from simple ascii chars than one that
3008        includes non-ascii stuff or characters that are hard/impossible to enter on
3009        a different keyboard layout.
3010
3011        You can change your passphrase for existing repos at any time, it won't affect
3012        the encryption/decryption key or other secrets.
3013
3014        Encryption modes
3015        ++++++++++++++++
3016
3017        You can choose from the encryption modes seen in the table below on a per-repo
3018        basis. The mode determines encryption algorithm, hash/MAC algorithm and also the
3019        key storage location.
3020
3021        Example: `borg init --encryption repokey ...`
3022
3023        .. nanorst: inline-fill
3024
3025        +----------+---------------+------------------------+--------------------------+
3026        | Hash/MAC | Not encrypted | Not encrypted,         | Encrypted (AEAD w/ AES)  |
3027        |          | no auth       | but authenticated      | and authenticated        |
3028        +----------+---------------+------------------------+--------------------------+
3029        | SHA-256  | none          | `authenticated`        | repokey                  |
3030        |          |               |                        | keyfile                  |
3031        +----------+---------------+------------------------+--------------------------+
3032        | BLAKE2b  | n/a           | `authenticated-blake2` | `repokey-blake2`         |
3033        |          |               |                        | `keyfile-blake2`         |
3034        +----------+---------------+------------------------+--------------------------+
3035
3036        .. nanorst: inline-replace
3037
3038        Modes `marked like this` in the above table are new in Borg 1.1 and are not
3039        backwards-compatible with Borg 1.0.x.
3040
3041        On modern Intel/AMD CPUs (except very cheap ones), AES is usually
3042        hardware-accelerated.
3043        BLAKE2b is faster than SHA256 on Intel/AMD 64-bit CPUs
3044        (except AMD Ryzen and future CPUs with SHA extensions),
3045        which makes `authenticated-blake2` faster than `none` and `authenticated`.
3046
3047        On modern ARM CPUs, NEON provides hardware acceleration for SHA256 making it faster
3048        than BLAKE2b-256 there. NEON accelerates AES as well.
3049
3050        Hardware acceleration is always used automatically when available.
3051
3052        `repokey` and `keyfile` use AES-CTR-256 for encryption and HMAC-SHA256 for
3053        authentication in an encrypt-then-MAC (EtM) construction. The chunk ID hash
3054        is HMAC-SHA256 as well (with a separate key).
3055        These modes are compatible with Borg 1.0.x.
3056
3057        `repokey-blake2` and `keyfile-blake2` are also authenticated encryption modes,
3058        but use BLAKE2b-256 instead of HMAC-SHA256 for authentication. The chunk ID
3059        hash is a keyed BLAKE2b-256 hash.
3060        These modes are new and *not* compatible with Borg 1.0.x.
3061
3062        `authenticated` mode uses no encryption, but authenticates repository contents
3063        through the same HMAC-SHA256 hash as the `repokey` and `keyfile` modes (it uses it
3064        as the chunk ID hash). The key is stored like `repokey`.
3065        This mode is new and *not* compatible with Borg 1.0.x.
3066
3067        `authenticated-blake2` is like `authenticated`, but uses the keyed BLAKE2b-256 hash
3068        from the other blake2 modes.
3069        This mode is new and *not* compatible with Borg 1.0.x.
3070
3071        `none` mode uses no encryption and no authentication. It uses SHA256 as chunk
3072        ID hash. This mode is not recommended, you should rather consider using an authenticated
3073        or authenticated/encrypted mode. This mode has possible denial-of-service issues
3074        when running ``borg create`` on contents controlled by an attacker.
3075        Use it only for new repositories where no encryption is wanted **and** when compatibility
3076        with 1.0.x is important. If compatibility with 1.0.x is not important, use
3077        `authenticated-blake2` or `authenticated` instead.
3078        This mode is compatible with Borg 1.0.x.
3079        """)
3080        subparser = subparsers.add_parser('init', parents=[common_parser], add_help=False,
3081                                          description=self.do_init.__doc__, epilog=init_epilog,
3082                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3083                                          help='initialize empty repository')
3084        subparser.set_defaults(func=self.do_init)
3085        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3086                               type=location_validator(archive=False),
3087                               help='repository to create')
3088        subparser.add_argument('-e', '--encryption', metavar='MODE', dest='encryption', required=True,
3089                               choices=key_argument_names(),
3090                               help='select encryption key mode **(required)**')
3091        subparser.add_argument('--append-only', dest='append_only', action='store_true',
3092                               help='create an append-only mode repository')
3093        subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota', default=None,
3094                               type=parse_storage_quota,
3095                               help='Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota.')
3096        subparser.add_argument('--make-parent-dirs', dest='make_parent_dirs', action='store_true',
3097                               help='create the parent directories of the repository directory, if they are missing.')
3098
3099        check_epilog = process_epilog("""
3100        The check command verifies the consistency of a repository and the corresponding archives.
3101
3102        check --repair is a potentially dangerous function and might lead to data loss
3103        (for kinds of corruption it is not capable of dealing with). BE VERY CAREFUL!
3104
3105        Pursuant to the previous warning it is also highly recommended to test the
3106        reliability of the hardware running this software with stress testing software
3107        such as memory testers. Unreliable hardware can also lead to data loss especially
3108        when this command is run in repair mode.
3109
3110        First, the underlying repository data files are checked:
3111
3112        - For all segments, the segment magic header is checked.
3113        - For all objects stored in the segments, all metadata (e.g. CRC and size) and
3114          all data is read. The read data is checked by size and CRC. Bit rot and other
3115          types of accidental damage can be detected this way.
3116        - In repair mode, if an integrity error is detected in a segment, try to recover
3117          as many objects from the segment as possible.
3118        - In repair mode, make sure that the index is consistent with the data stored in
3119          the segments.
3120        - If checking a remote repo via ``ssh:``, the repo check is executed on the server
3121          without causing significant network traffic.
3122        - The repository check can be skipped using the ``--archives-only`` option.
3123
3124        Second, the consistency and correctness of the archive metadata is verified:
3125
3126        - Is the repo manifest present? If not, it is rebuilt from archive metadata
3127          chunks (this requires reading and decrypting of all metadata and data).
3128        - Check if archive metadata chunk is present; if not, remove archive from manifest.
3129        - For all files (items) in the archive, for all chunks referenced by these
3130          files, check if chunk is present. In repair mode, if a chunk is not present,
3131          replace it with a same-size replacement chunk of zeroes. If a previously lost
3132          chunk reappears (e.g. via a later backup), in repair mode the all-zero replacement
3133          chunk will be replaced by the correct chunk. This requires reading of archive and
3134          file metadata, but not data.
3135        - In repair mode, when all the archives were checked, orphaned chunks are deleted
3136          from the repo. One cause of orphaned chunks are input file related errors (like
3137          read errors) in the archive creation process.
3138        - In verify-data mode, a complete cryptographic verification of the archive data
3139          integrity is performed. This conflicts with ``--repository-only`` as this mode
3140          only makes sense if the archive checks are enabled. The full details of this mode
3141          are documented below.
3142        - If checking a remote repo via ``ssh:``, the archive check is executed on the
3143          client machine because it requires decryption, and this is always done client-side
3144          as key access is needed.
3145        - The archive checks can be time consuming; they can be skipped using the
3146          ``--repository-only`` option.
3147
3148        The ``--verify-data`` option will perform a full integrity verification (as opposed to
3149        checking the CRC32 of the segment) of data, which means reading the data from the
3150        repository, decrypting and decompressing it. This is a cryptographic verification,
3151        which will detect (accidental) corruption. For encrypted repositories it is
3152        tamper-resistant as well, unless the attacker has access to the keys. It is also very
3153        slow.
3154        """)
3155        subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False,
3156                                          description=self.do_check.__doc__,
3157                                          epilog=check_epilog,
3158                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3159                                          help='verify repository')
3160        subparser.set_defaults(func=self.do_check)
3161        subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
3162                               type=location_validator(),
3163                               help='repository or archive to check consistency of')
3164        subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
3165                               help='only perform repository checks')
3166        subparser.add_argument('--archives-only', dest='archives_only', action='store_true',
3167                               help='only perform archives checks')
3168        subparser.add_argument('--verify-data', dest='verify_data', action='store_true',
3169                               help='perform cryptographic archive data integrity verification '
3170                                    '(conflicts with ``--repository-only``)')
3171        subparser.add_argument('--repair', dest='repair', action='store_true',
3172                               help='attempt to repair any inconsistencies found')
3173        subparser.add_argument('--save-space', dest='save_space', action='store_true',
3174                               help='work slower, but using less space')
3175        define_archive_filters_group(subparser)
3176
3177        subparser = subparsers.add_parser('key', parents=[mid_common_parser], add_help=False,
3178                                          description="Manage a keyfile or repokey of a repository",
3179                                          epilog="",
3180                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3181                                          help='manage repository key')
3182
3183        key_parsers = subparser.add_subparsers(title='required arguments', metavar='<command>')
3184        subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser))
3185
3186        key_export_epilog = process_epilog("""
3187        If repository encryption is used, the repository is inaccessible
3188        without the key. This command allows to backup this essential key.
3189        Note that the backup produced does not include the passphrase itself
3190        (i.e. the exported key stays encrypted). In order to regain access to a
3191        repository, one needs both the exported key and the original passphrase.
3192
3193        There are two backup formats. The normal backup format is suitable for
3194        digital storage as a file. The ``--paper`` backup format is optimized
3195        for printing and typing in while importing, with per line checks to
3196        reduce problems with manual input.
3197
3198        For repositories using keyfile encryption the key is saved locally
3199        on the system that is capable of doing backups. To guard against loss
3200        of this key, the key needs to be backed up independently of the main
3201        data backup.
3202
3203        For repositories using the repokey encryption the key is saved in the
3204        repository in the config file. A backup is thus not strictly needed,
3205        but guards against the repository becoming inaccessible if the file
3206        is damaged for some reason.
3207        """)
3208        subparser = key_parsers.add_parser('export', parents=[common_parser], add_help=False,
3209                                          description=self.do_key_export.__doc__,
3210                                          epilog=key_export_epilog,
3211                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3212                                          help='export repository key for backup')
3213        subparser.set_defaults(func=self.do_key_export)
3214        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3215                               type=location_validator(archive=False))
3216        subparser.add_argument('path', metavar='PATH', nargs='?', type=str,
3217                               help='where to store the backup')
3218        subparser.add_argument('--paper', dest='paper', action='store_true',
3219                               help='Create an export suitable for printing and later type-in')
3220        subparser.add_argument('--qr-html', dest='qr', action='store_true',
3221                               help='Create an html file suitable for printing and later type-in or qr scan')
3222
3223        key_import_epilog = process_epilog("""
3224        This command allows to restore a key previously backed up with the
3225        export command.
3226
3227        If the ``--paper`` option is given, the import will be an interactive
3228        process in which each line is checked for plausibility before
3229        proceeding to the next line. For this format PATH must not be given.
3230        """)
3231        subparser = key_parsers.add_parser('import', parents=[common_parser], add_help=False,
3232                                          description=self.do_key_import.__doc__,
3233                                          epilog=key_import_epilog,
3234                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3235                                          help='import repository key from backup')
3236        subparser.set_defaults(func=self.do_key_import)
3237        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3238                               type=location_validator(archive=False))
3239        subparser.add_argument('path', metavar='PATH', nargs='?', type=str,
3240                               help='path to the backup (\'-\' to read from stdin)')
3241        subparser.add_argument('--paper', dest='paper', action='store_true',
3242                               help='interactively import from a backup done with ``--paper``')
3243
3244        change_passphrase_epilog = process_epilog("""
3245        The key files used for repository encryption are optionally passphrase
3246        protected. This command can be used to change this passphrase.
3247
3248        Please note that this command only changes the passphrase, but not any
3249        secret protected by it (like e.g. encryption/MAC keys or chunker seed).
3250        Thus, changing the passphrase after passphrase and borg key got compromised
3251        does not protect future (nor past) backups to the same repository.
3252        """)
3253        subparser = key_parsers.add_parser('change-passphrase', parents=[common_parser], add_help=False,
3254                                          description=self.do_change_passphrase.__doc__,
3255                                          epilog=change_passphrase_epilog,
3256                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3257                                          help='change repository passphrase')
3258        subparser.set_defaults(func=self.do_change_passphrase)
3259        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3260                               type=location_validator(archive=False))
3261
3262        # Borg 1.0 alias for change passphrase (without the "key" subcommand)
3263        subparser = subparsers.add_parser('change-passphrase', parents=[common_parser], add_help=False,
3264                                          description=self.do_change_passphrase.__doc__,
3265                                          epilog=change_passphrase_epilog,
3266                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3267                                          help='change repository passphrase')
3268        subparser.set_defaults(func=self.do_change_passphrase_deprecated)
3269        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3270                               type=location_validator(archive=False))
3271
3272        migrate_to_repokey_epilog = process_epilog("""
3273        This command migrates a repository from passphrase mode (removed in Borg 1.0)
3274        to repokey mode.
3275
3276        You will be first asked for the repository passphrase (to open it in passphrase
3277        mode). This is the same passphrase as you used to use for this repo before 1.0.
3278
3279        It will then derive the different secrets from this passphrase.
3280
3281        Then you will be asked for a new passphrase (twice, for safety). This
3282        passphrase will be used to protect the repokey (which contains these same
3283        secrets in encrypted form). You may use the same passphrase as you used to
3284        use, but you may also use a different one.
3285
3286        After migrating to repokey mode, you can change the passphrase at any time.
3287        But please note: the secrets will always stay the same and they could always
3288        be derived from your (old) passphrase-mode passphrase.
3289        """)
3290        subparser = key_parsers.add_parser('migrate-to-repokey', parents=[common_parser], add_help=False,
3291                                          description=self.do_migrate_to_repokey.__doc__,
3292                                          epilog=migrate_to_repokey_epilog,
3293                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3294                                          help='migrate passphrase-mode repository to repokey')
3295        subparser.set_defaults(func=self.do_migrate_to_repokey)
3296        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3297                               type=location_validator(archive=False))
3298
3299        create_epilog = process_epilog("""
3300        This command creates a backup archive containing all files found while recursively
3301        traversing all paths specified. Paths are added to the archive as they are given,
3302        that means if relative paths are desired, the command has to be run from the correct
3303        directory.
3304
3305        When giving '-' as path, borg will read data from standard input and create a
3306        file 'stdin' in the created archive from that data. See section *Reading from
3307        stdin* below for details.
3308
3309        The archive will consume almost no disk space for files or parts of files that
3310        have already been stored in other archives.
3311
3312        The archive name needs to be unique. It must not end in '.checkpoint' or
3313        '.checkpoint.N' (with N being a number), because these names are used for
3314        checkpoints and treated in special ways.
3315
3316        In the archive name, you may use the following placeholders:
3317        {now}, {utcnow}, {fqdn}, {hostname}, {user} and some others.
3318
3319        Backup speed is increased by not reprocessing files that are already part of
3320        existing archives and weren't modified. The detection of unmodified files is
3321        done by comparing multiple file metadata values with previous values kept in
3322        the files cache.
3323
3324        This comparison can operate in different modes as given by ``--files-cache``:
3325
3326        - ctime,size,inode (default)
3327        - mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4)
3328        - ctime,size (ignore the inode number)
3329        - mtime,size (ignore the inode number)
3330        - rechunk,ctime (all files are considered modified - rechunk, cache ctime)
3331        - rechunk,mtime (all files are considered modified - rechunk, cache mtime)
3332        - disabled (disable the files cache, all files considered modified - rechunk)
3333
3334        inode number: better safety, but often unstable on network filesystems
3335
3336        Normally, detecting file modifications will take inode information into
3337        consideration to improve the reliability of file change detection.
3338        This is problematic for files located on sshfs and similar network file
3339        systems which do not provide stable inode numbers, such files will always
3340        be considered modified. You can use modes without `inode` in this case to
3341        improve performance, but reliability of change detection might be reduced.
3342
3343        ctime vs. mtime: safety vs. speed
3344
3345        - ctime is a rather safe way to detect changes to a file (metadata and contents)
3346          as it can not be set from userspace. But, a metadata-only change will already
3347          update the ctime, so there might be some unnecessary chunking/hashing even
3348          without content changes. Some filesystems do not support ctime (change time).
3349          E.g. doing a chown or chmod to a file will change its ctime.
3350        - mtime usually works and only updates if file contents were changed. But mtime
3351          can be arbitrarily set from userspace, e.g. to set mtime back to the same value
3352          it had before a content change happened. This can be used maliciously as well as
3353          well-meant, but in both cases mtime based cache modes can be problematic.
3354
3355        The mount points of filesystems or filesystem snapshots should be the same for every
3356        creation of a new archive to ensure fast operation. This is because the file cache that
3357        is used to determine changed files quickly uses absolute filenames.
3358        If this is not possible, consider creating a bind mount to a stable location.
3359
3360        The ``--progress`` option shows (from left to right) Original, Compressed and Deduplicated
3361        (O, C and D, respectively), then the Number of files (N) processed so far, followed by
3362        the currently processed path.
3363
3364        When using ``--stats``, you will get some statistics about how much data was
3365        added - the "This Archive" deduplicated size there is most interesting as that is
3366        how much your repository will grow. Please note that the "All archives" stats refer to
3367        the state after creation. Also, the ``--stats`` and ``--dry-run`` options are mutually
3368        exclusive because the data is not actually compressed and deduplicated during a dry run.
3369
3370        See the output of the "borg help patterns" command for more help on exclude patterns.
3371
3372        See the output of the "borg help placeholders" command for more help on placeholders.
3373
3374        .. man NOTES
3375
3376        The ``--exclude`` patterns are not like tar. In tar ``--exclude`` .bundler/gems will
3377        exclude foo/.bundler/gems. In borg it will not, you need to use ``--exclude``
3378        '\\*/.bundler/gems' to get the same effect. See ``borg help patterns`` for
3379        more information.
3380
3381        In addition to using ``--exclude`` patterns, it is possible to use
3382        ``--exclude-if-present`` to specify the name of a filesystem object (e.g. a file
3383        or folder name) which, when contained within another folder, will prevent the
3384        containing folder from being backed up.  By default, the containing folder and
3385        all of its contents will be omitted from the backup.  If, however, you wish to
3386        only include the objects specified by ``--exclude-if-present`` in your backup,
3387        and not include any other contents of the containing folder, this can be enabled
3388        through using the ``--keep-exclude-tags`` option.
3389
3390        The ``-x`` or ``--one-file-system`` option excludes directories, that are mountpoints (and everything in them).
3391        It detects mountpoints by comparing the device number from the output of ``stat()`` of the directory and its
3392        parent directory. Specifically, it excludes directories for which ``stat()`` reports a device number different
3393        from the device number of their parent. Be aware that in Linux (and possibly elsewhere) there are directories
3394        with device number different from their parent, which the kernel does not consider a mountpoint and also the
3395        other way around. Examples are bind mounts (possibly same device number, but always a mountpoint) and ALL
3396        subvolumes of a btrfs (different device number from parent but not necessarily a mountpoint). Therefore when
3397        using ``--one-file-system``, one should make doubly sure that the backup works as intended especially when using
3398        btrfs. This is even more important, if the btrfs layout was created by someone else, e.g. a distribution
3399        installer.
3400
3401
3402        .. _list_item_flags:
3403
3404        Item flags
3405        ++++++++++
3406
3407        ``--list`` outputs a list of all files, directories and other
3408        file system items it considered (no matter whether they had content changes
3409        or not). For each item, it prefixes a single-letter flag that indicates type
3410        and/or status of the item.
3411
3412        If you are interested only in a subset of that output, you can give e.g.
3413        ``--filter=AME`` and it will only show regular files with A, M or E status (see
3414        below).
3415
3416        A uppercase character represents the status of a regular file relative to the
3417        "files" cache (not relative to the repo -- this is an issue if the files cache
3418        is not used). Metadata is stored in any case and for 'A' and 'M' also new data
3419        chunks are stored. For 'U' all data chunks refer to already existing chunks.
3420
3421        - 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ)
3422        - 'M' = regular file, modified
3423        - 'U' = regular file, unchanged
3424        - 'E' = regular file, an error happened while accessing/reading *this* file
3425
3426        A lowercase character means a file type other than a regular file,
3427        borg usually just stores their metadata:
3428
3429        - 'd' = directory
3430        - 'b' = block device
3431        - 'c' = char device
3432        - 'h' = regular file, hardlink (to already seen inodes)
3433        - 's' = symlink
3434        - 'f' = fifo
3435
3436        Other flags used include:
3437
3438        - 'i' = backup data was read from standard input (stdin)
3439        - '-' = dry run, item was *not* backed up
3440        - 'x' = excluded, item was *not* backed up
3441        - '?' = missing status code (if you see this, please file a bug report!)
3442
3443        Reading from stdin
3444        ++++++++++++++++++
3445
3446        To read from stdin, specify ``-`` as path and pipe directly to borg::
3447
3448            backup-vm --id myvm --stdout | borg create REPO::ARCHIVE -
3449
3450        Note that piping to borg creates an archive even if the command piping
3451        to borg exits with a failure. In this case, **one can end up with
3452        truncated output being backed up**.
3453
3454        Reading from stdin yields just a stream of data without file metadata
3455        associated with it, and the files cache is not needed at all. So it is
3456        safe to disable it via ``--no-files-cache`` and speed up backup
3457        creation a bit.
3458
3459        By default, the content read from stdin is stored in a file called 'stdin'.
3460        Use ``--stdin-name`` to change the name.
3461        """)
3462
3463        subparser = subparsers.add_parser('create', parents=[common_parser], add_help=False,
3464                                          description=self.do_create.__doc__,
3465                                          epilog=create_epilog,
3466                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3467                                          help='create backup')
3468        subparser.set_defaults(func=self.do_create)
3469
3470        # note: --dry-run and --stats are mutually exclusive, but we do not want to abort when
3471        #  parsing, but rather proceed with the dry-run, but without stats (see run() method).
3472        subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
3473                               help='do not create a backup archive')
3474        subparser.add_argument('-s', '--stats', dest='stats', action='store_true',
3475                               help='print statistics for the created archive')
3476
3477        subparser.add_argument('--list', dest='output_list', action='store_true',
3478                               help='output verbose list of items (files, dirs, ...)')
3479        subparser.add_argument('--filter', metavar='STATUSCHARS', dest='output_filter',
3480                               help='only display items with the given status characters (see description)')
3481        subparser.add_argument('--json', action='store_true',
3482                               help='output stats as JSON. Implies ``--stats``.')
3483        subparser.add_argument('--no-cache-sync', dest='no_cache_sync', action='store_true',
3484                               help='experimental: do not synchronize the cache. Implies not using the files cache.')
3485        subparser.add_argument('--no-files-cache', dest='cache_files', action='store_false',
3486                               help='do not load/update the file metadata cache used to detect unchanged files')
3487        subparser.add_argument('--stdin-name', metavar='NAME', dest='stdin_name', default='stdin',
3488                               help='use NAME in archive for stdin data (default: "stdin")')
3489        subparser.add_argument('--stdin-user', metavar='USER', dest='stdin_user', default=uid2user(0),
3490                                help='set user USER in archive for stdin data (default: %(default)r)')
3491        subparser.add_argument('--stdin-group', metavar='GROUP', dest='stdin_group', default=gid2group(0),
3492                                help='set group GROUP in archive for stdin data (default: %(default)r)')
3493        subparser.add_argument('--stdin-mode', metavar='M', dest='stdin_mode', type=lambda s: int(s, 8), default=STDIN_MODE_DEFAULT,
3494                                help='set mode to M in archive for stdin data (default: %(default)04o)')
3495
3496        exclude_group = define_exclusion_group(subparser, tag_files=True)
3497        exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true',
3498                                   help='exclude files flagged NODUMP')
3499
3500        fs_group = subparser.add_argument_group('Filesystem options')
3501        fs_group.add_argument('-x', '--one-file-system', dest='one_file_system', action='store_true',
3502                              help='stay in the same file system and do not store mount points of other file systems.  This might behave different from your expectations, see the docs.')
3503        fs_group.add_argument('--numeric-owner', dest='numeric_owner', action='store_true',
3504                              help='only store numeric user and group identifiers')
3505        fs_group.add_argument('--noatime', dest='noatime', action='store_true',
3506                              help='do not store atime into archive')
3507        fs_group.add_argument('--noctime', dest='noctime', action='store_true',
3508                              help='do not store ctime into archive')
3509        fs_group.add_argument('--nobirthtime', dest='nobirthtime', action='store_true',
3510                              help='do not store birthtime (creation date) into archive')
3511        fs_group.add_argument('--nobsdflags', dest='nobsdflags', action='store_true',
3512                              help='do not read and store bsdflags (e.g. NODUMP, IMMUTABLE) into archive')
3513        fs_group.add_argument('--noacls', dest='noacls', action='store_true',
3514                              help='do not read and store ACLs into archive')
3515        fs_group.add_argument('--noxattrs', dest='noxattrs', action='store_true',
3516                              help='do not read and store xattrs into archive')
3517        fs_group.add_argument('--ignore-inode', dest='ignore_inode', action='store_true',
3518                              help='ignore inode data in the file metadata cache used to detect unchanged files.')
3519        fs_group.add_argument('--files-cache', metavar='MODE', dest='files_cache_mode',
3520                              type=FilesCacheMode, default=DEFAULT_FILES_CACHE_MODE_UI,
3521                              help='operate files cache in MODE. default: %s' % DEFAULT_FILES_CACHE_MODE_UI)
3522        fs_group.add_argument('--read-special', dest='read_special', action='store_true',
3523                              help='open and read block and char device files as well as FIFOs as if they were '
3524                                   'regular files. Also follows symlinks pointing to these kinds of files.')
3525
3526        archive_group = subparser.add_argument_group('Archive options')
3527        archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', type=CommentSpec, default='',
3528                                   help='add a comment text to the archive')
3529        archive_group.add_argument('--timestamp', metavar='TIMESTAMP', dest='timestamp',
3530                                   type=timestamp, default=None,
3531                                   help='manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). '
3532                                        'Alternatively, give a reference file/directory.')
3533        archive_group.add_argument('-c', '--checkpoint-interval', metavar='SECONDS', dest='checkpoint_interval',
3534                                   type=int, default=1800,
3535                                   help='write checkpoint every SECONDS seconds (Default: 1800)')
3536        archive_group.add_argument('--chunker-params', metavar='PARAMS', dest='chunker_params',
3537                                   type=ChunkerParams, default=CHUNKER_PARAMS,
3538                                   help='specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, '
3539                                        'HASH_MASK_BITS, HASH_WINDOW_SIZE). default: %d,%d,%d,%d' % CHUNKER_PARAMS)
3540        archive_group.add_argument('-C', '--compression', metavar='COMPRESSION', dest='compression',
3541                                   type=CompressionSpec, default=CompressionSpec('lz4'),
3542                                   help='select compression algorithm, see the output of the '
3543                                        '"borg help compression" command for details.')
3544
3545        subparser.add_argument('location', metavar='ARCHIVE',
3546                               type=location_validator(archive=True),
3547                               help='name of archive to create (must be also a valid directory name)')
3548        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
3549                               help='paths to archive')
3550
3551        extract_epilog = process_epilog("""
3552        This command extracts the contents of an archive. By default the entire
3553        archive is extracted but a subset of files and directories can be selected
3554        by passing a list of ``PATHs`` as arguments. The file selection can further
3555        be restricted by using the ``--exclude`` option.
3556
3557        See the output of the "borg help patterns" command for more help on exclude patterns.
3558
3559        By using ``--dry-run``, you can do all extraction steps except actually writing the
3560        output data: reading metadata and data chunks from the repo, checking the hash/hmac,
3561        decrypting, decompressing.
3562
3563        ``--progress`` can be slower than no progress display, since it makes one additional
3564        pass over the archive metadata.
3565
3566        .. note::
3567
3568            Currently, extract always writes into the current working directory ("."),
3569            so make sure you ``cd`` to the right place before calling ``borg extract``.
3570        """)
3571        subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False,
3572                                          description=self.do_extract.__doc__,
3573                                          epilog=extract_epilog,
3574                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3575                                          help='extract archive contents')
3576        subparser.set_defaults(func=self.do_extract)
3577        subparser.add_argument('--list', dest='output_list', action='store_true',
3578                               help='output verbose list of items (files, dirs, ...)')
3579        subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
3580                               help='do not actually change any files')
3581        subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true',
3582                               help='only obey numeric user and group identifiers')
3583        subparser.add_argument('--nobsdflags', dest='nobsdflags', action='store_true',
3584                               help='do not extract/set bsdflags (e.g. NODUMP, IMMUTABLE)')
3585        subparser.add_argument('--noacls', dest='noacls', action='store_true',
3586                               help='do not extract/set ACLs')
3587        subparser.add_argument('--noxattrs', dest='noxattrs', action='store_true',
3588                               help='do not extract/set xattrs')
3589        subparser.add_argument('--stdout', dest='stdout', action='store_true',
3590                               help='write all extracted data to stdout')
3591        subparser.add_argument('--sparse', dest='sparse', action='store_true',
3592                               help='create holes in output sparse file from all-zero chunks')
3593        subparser.add_argument('location', metavar='ARCHIVE',
3594                               type=location_validator(archive=True),
3595                               help='archive to extract')
3596        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
3597                               help='paths to extract; patterns are supported')
3598        define_exclusion_group(subparser, strip_components=True)
3599
3600        export_tar_epilog = process_epilog("""
3601        This command creates a tarball from an archive.
3602
3603        When giving '-' as the output FILE, Borg will write a tar stream to standard output.
3604
3605        By default (``--tar-filter=auto``) Borg will detect whether the FILE should be compressed
3606        based on its file extension and pipe the tarball through an appropriate filter
3607        before writing it to FILE:
3608
3609        - .tar.gz: gzip
3610        - .tar.bz2: bzip2
3611        - .tar.xz: xz
3612
3613        Alternatively a ``--tar-filter`` program may be explicitly specified. It should
3614        read the uncompressed tar stream from stdin and write a compressed/filtered
3615        tar stream to stdout.
3616
3617        The generated tarball uses the GNU tar format.
3618
3619        export-tar is a lossy conversion:
3620        BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
3621        Timestamp resolution is limited to whole seconds, not the nanosecond resolution
3622        otherwise supported by Borg.
3623
3624        A ``--sparse`` option (as found in borg extract) is not supported.
3625
3626        By default the entire archive is extracted but a subset of files and directories
3627        can be selected by passing a list of ``PATHs`` as arguments.
3628        The file selection can further be restricted by using the ``--exclude`` option.
3629
3630        See the output of the "borg help patterns" command for more help on exclude patterns.
3631
3632        ``--progress`` can be slower than no progress display, since it makes one additional
3633        pass over the archive metadata.
3634        """)
3635        subparser = subparsers.add_parser('export-tar', parents=[common_parser], add_help=False,
3636                                          description=self.do_export_tar.__doc__,
3637                                          epilog=export_tar_epilog,
3638                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3639                                          help='create tarball from archive')
3640        subparser.set_defaults(func=self.do_export_tar)
3641        subparser.add_argument('--tar-filter', dest='tar_filter', default='auto',
3642                               help='filter program to pipe data through')
3643        subparser.add_argument('--list', dest='output_list', action='store_true',
3644                               help='output verbose list of items (files, dirs, ...)')
3645        subparser.add_argument('location', metavar='ARCHIVE',
3646                               type=location_validator(archive=True),
3647                               help='archive to export')
3648        subparser.add_argument('tarfile', metavar='FILE',
3649                               help='output tar file. "-" to write to stdout instead.')
3650        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
3651                               help='paths to extract; patterns are supported')
3652        define_exclusion_group(subparser, strip_components=True)
3653
3654        diff_epilog = process_epilog("""
3655            This command finds differences (file contents, user/group/mode) between archives.
3656
3657            A repository location and an archive name must be specified for REPO::ARCHIVE1.
3658            ARCHIVE2 is just another archive name in same repository (no repository location
3659            allowed).
3660
3661            For archives created with Borg 1.1 or newer diff automatically detects whether
3662            the archives are created with the same chunker params. If so, only chunk IDs
3663            are compared, which is very fast.
3664
3665            For archives prior to Borg 1.1 chunk contents are compared by default.
3666            If you did not create the archives with different chunker params,
3667            pass ``--same-chunker-params``.
3668            Note that the chunker params changed from Borg 0.xx to 1.0.
3669
3670            See the output of the "borg help patterns" command for more help on exclude patterns.
3671            """)
3672        subparser = subparsers.add_parser('diff', parents=[common_parser], add_help=False,
3673                                          description=self.do_diff.__doc__,
3674                                          epilog=diff_epilog,
3675                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3676                                          help='find differences in archive contents')
3677        subparser.set_defaults(func=self.do_diff)
3678        subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true',
3679                               help='only consider numeric user and group identifiers')
3680        subparser.add_argument('--same-chunker-params', dest='same_chunker_params', action='store_true',
3681                               help='Override check of chunker parameters.')
3682        subparser.add_argument('--sort', dest='sort', action='store_true',
3683                               help='Sort the output lines by file path.')
3684        subparser.add_argument('--json-lines', action='store_true',
3685                               help='Format output as JSON Lines. ')
3686        subparser.add_argument('location', metavar='REPO::ARCHIVE1',
3687                               type=location_validator(archive=True),
3688                               help='repository location and ARCHIVE1 name')
3689        subparser.add_argument('archive2', metavar='ARCHIVE2',
3690                               type=archivename_validator(),
3691                               help='ARCHIVE2 name (no repository location allowed)')
3692        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
3693                               help='paths of items inside the archives to compare; patterns are supported')
3694        define_exclusion_group(subparser)
3695
3696        rename_epilog = process_epilog("""
3697        This command renames an archive in the repository.
3698
3699        This results in a different archive ID.
3700        """)
3701        subparser = subparsers.add_parser('rename', parents=[common_parser], add_help=False,
3702                                          description=self.do_rename.__doc__,
3703                                          epilog=rename_epilog,
3704                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3705                                          help='rename archive')
3706        subparser.set_defaults(func=self.do_rename)
3707        subparser.add_argument('location', metavar='ARCHIVE',
3708                               type=location_validator(archive=True),
3709                               help='archive to rename')
3710        subparser.add_argument('name', metavar='NEWNAME',
3711                               type=archivename_validator(),
3712                               help='the new archive name to use')
3713
3714        delete_epilog = process_epilog("""
3715        This command deletes an archive from the repository or the complete repository.
3716        Disk space is reclaimed accordingly. If you delete the complete repository, the
3717        local cache for it (if any) is also deleted.
3718
3719        When using ``--stats``, you will get some statistics about how much data was
3720        deleted - the "Deleted data" deduplicated size there is most interesting as
3721        that is how much your repository will shrink.
3722        Please note that the "All archives" stats refer to the state after deletion.
3723        """)
3724        subparser = subparsers.add_parser('delete', parents=[common_parser], add_help=False,
3725                                          description=self.do_delete.__doc__,
3726                                          epilog=delete_epilog,
3727                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3728                                          help='delete archive')
3729        subparser.set_defaults(func=self.do_delete)
3730        subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
3731                               help='do not change repository')
3732        subparser.add_argument('-s', '--stats', dest='stats', action='store_true',
3733                               help='print statistics for the deleted archive')
3734        subparser.add_argument('--cache-only', dest='cache_only', action='store_true',
3735                               help='delete only the local cache for the given repository')
3736        subparser.add_argument('--force', dest='forced',
3737                               action='count', default=0,
3738                               help='force deletion of corrupted archives, '
3739                                    'use ``--force --force`` in case ``--force`` does not work.')
3740        subparser.add_argument('--save-space', dest='save_space', action='store_true',
3741                               help='work slower, but using less space')
3742        subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
3743                               type=location_validator(),
3744                               help='repository or archive to delete')
3745        subparser.add_argument('archives', metavar='ARCHIVE', nargs='*',
3746                               help='archives to delete')
3747        define_archive_filters_group(subparser)
3748
3749        list_epilog = process_epilog("""
3750        This command lists the contents of a repository or an archive.
3751
3752        See the "borg help patterns" command for more help on exclude patterns.
3753
3754        .. man NOTES
3755
3756        The following keys are available for ``--format``:
3757
3758
3759        """) + BaseFormatter.keys_help() + textwrap.dedent("""
3760
3761        Keys for listing repository archives:
3762
3763        """) + ArchiveFormatter.keys_help() + textwrap.dedent("""
3764
3765        Keys for listing archive files:
3766
3767        """) + ItemFormatter.keys_help()
3768        subparser = subparsers.add_parser('list', parents=[common_parser], add_help=False,
3769                                          description=self.do_list.__doc__,
3770                                          epilog=list_epilog,
3771                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3772                                          help='list archive or repository contents')
3773        subparser.set_defaults(func=self.do_list)
3774        subparser.add_argument('--short', dest='short', action='store_true',
3775                               help='only print file/directory names, nothing else')
3776        subparser.add_argument('--format', '--list-format', metavar='FORMAT', dest='format',
3777                               help='specify format for file listing '
3778                                    '(default: "{mode} {user:6} {group:6} {size:8d} {mtime} {path}{extra}{NL}")')
3779        subparser.add_argument('--json', action='store_true',
3780                               help='Only valid for listing repository contents. Format output as JSON. '
3781                                    'The form of ``--format`` is ignored, '
3782                                    'but keys used in it are added to the JSON output. '
3783                                    'Some keys are always present. Note: JSON can only represent text. '
3784                                    'A "barchive" key is therefore not available.')
3785        subparser.add_argument('--json-lines', action='store_true',
3786                               help='Only valid for listing archive contents. Format output as JSON Lines. '
3787                                    'The form of ``--format`` is ignored, '
3788                                    'but keys used in it are added to the JSON output. '
3789                                    'Some keys are always present. Note: JSON can only represent text. '
3790                                    'A "bpath" key is therefore not available.')
3791        subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
3792                               type=location_validator(),
3793                               help='repository or archive to list contents of')
3794        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
3795                               help='paths to list; patterns are supported')
3796        define_archive_filters_group(subparser)
3797        define_exclusion_group(subparser)
3798
3799        umount_epilog = process_epilog("""
3800        This command un-mounts a FUSE filesystem that was mounted with ``borg mount``.
3801
3802        This is a convenience wrapper that just calls the platform-specific shell
3803        command - usually this is either umount or fusermount -u.
3804        """)
3805        subparser = subparsers.add_parser('umount', parents=[common_parser], add_help=False,
3806                                          description=self.do_umount.__doc__,
3807                                          epilog=umount_epilog,
3808                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3809                                          help='umount repository')
3810        subparser.set_defaults(func=self.do_umount)
3811        subparser.add_argument('mountpoint', metavar='MOUNTPOINT', type=str,
3812                               help='mountpoint of the filesystem to umount')
3813
3814        info_epilog = process_epilog("""
3815        This command displays detailed information about the specified archive or repository.
3816
3817        Please note that the deduplicated sizes of the individual archives do not add
3818        up to the deduplicated size of the repository ("all archives"), because the two
3819        are meaning different things:
3820
3821        This archive / deduplicated size = amount of data stored ONLY for this archive
3822        = unique chunks of this archive.
3823        All archives / deduplicated size = amount of data stored in the repo
3824        = all chunks in the repository.
3825
3826        Borg archives can only contain a limited amount of file metadata.
3827        The size of an archive relative to this limit depends on a number of factors,
3828        mainly the number of files, the lengths of paths and other metadata stored for files.
3829        This is shown as *utilization of maximum supported archive size*.
3830        """)
3831        subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False,
3832                                          description=self.do_info.__doc__,
3833                                          epilog=info_epilog,
3834                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3835                                          help='show repository or archive information')
3836        subparser.set_defaults(func=self.do_info)
3837        subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
3838                               type=location_validator(),
3839                               help='repository or archive to display information about')
3840        subparser.add_argument('--json', action='store_true',
3841                               help='format output as JSON')
3842        define_archive_filters_group(subparser)
3843
3844        break_lock_epilog = process_epilog("""
3845        This command breaks the repository and cache locks.
3846        Please use carefully and only while no borg process (on any machine) is
3847        trying to access the Cache or the Repository.
3848        """)
3849        subparser = subparsers.add_parser('break-lock', parents=[common_parser], add_help=False,
3850                                          description=self.do_break_lock.__doc__,
3851                                          epilog=break_lock_epilog,
3852                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3853                                          help='break repository and cache locks')
3854        subparser.set_defaults(func=self.do_break_lock)
3855        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3856                               type=location_validator(archive=False),
3857                               help='repository for which to break the locks')
3858
3859        prune_epilog = process_epilog("""
3860        The prune command prunes a repository by deleting all archives not matching
3861        any of the specified retention options. This command is normally used by
3862        automated backup scripts wanting to keep a certain number of historic backups.
3863
3864        Also, prune automatically removes checkpoint archives (incomplete archives left
3865        behind by interrupted backup runs) except if the checkpoint is the latest
3866        archive (and thus still needed). Checkpoint archives are not considered when
3867        comparing archive counts against the retention limits (``--keep-X``).
3868
3869        If a prefix is set with -P, then only archives that start with the prefix are
3870        considered for deletion and only those archives count towards the totals
3871        specified by the rules.
3872        Otherwise, *all* archives in the repository are candidates for deletion!
3873        There is no automatic distinction between archives representing different
3874        contents. These need to be distinguished by specifying matching prefixes.
3875
3876        If you have multiple sequences of archives with different data sets (e.g.
3877        from different machines) in one shared repository, use one prune call per
3878        data set that matches only the respective archives using the -P option.
3879
3880        The ``--keep-within`` option takes an argument of the form "<int><char>",
3881        where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means
3882        to keep all archives that were created within the past 48 hours.
3883        "1m" is taken to mean "31d". The archives kept with this option do not
3884        count towards the totals specified by any other options.
3885
3886        A good procedure is to thin out more and more the older your backups get.
3887        As an example, ``--keep-daily 7`` means to keep the latest backup on each day,
3888        up to 7 most recent days with backups (days without backups do not count).
3889        The rules are applied from secondly to yearly, and backups selected by previous
3890        rules do not count towards those of later rules. The time that each backup
3891        starts is used for pruning purposes. Dates and times are interpreted in
3892        the local timezone, and weeks go from Monday to Sunday. Specifying a
3893        negative number of archives to keep means that there is no limit.
3894
3895        The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will
3896        keep the last N archives under the assumption that you do not create more than one
3897        backup archive in the same second).
3898
3899        When using ``--stats``, you will get some statistics about how much data was
3900        deleted - the "Deleted data" deduplicated size there is most interesting as
3901        that is how much your repository will shrink.
3902        Please note that the "All archives" stats refer to the state after pruning.
3903        """)
3904        subparser = subparsers.add_parser('prune', parents=[common_parser], add_help=False,
3905                                          description=self.do_prune.__doc__,
3906                                          epilog=prune_epilog,
3907                                          formatter_class=argparse.RawDescriptionHelpFormatter,
3908                                          help='prune archives')
3909        subparser.set_defaults(func=self.do_prune)
3910        subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
3911                               help='do not change repository')
3912        subparser.add_argument('--force', dest='forced', action='store_true',
3913                               help='force pruning of corrupted archives, '
3914                                    'use ``--force --force`` in case ``--force`` does not work.')
3915        subparser.add_argument('-s', '--stats', dest='stats', action='store_true',
3916                               help='print statistics for the deleted archive')
3917        subparser.add_argument('--list', dest='output_list', action='store_true',
3918                               help='output verbose list of archives it keeps/prunes')
3919        subparser.add_argument('--keep-within', metavar='INTERVAL', dest='within', type=interval,
3920                               help='keep all archives within this time interval')
3921        subparser.add_argument('--keep-last', '--keep-secondly', dest='secondly', type=int, default=0,
3922                               help='number of secondly archives to keep')
3923        subparser.add_argument('--keep-minutely', dest='minutely', type=int, default=0,
3924                               help='number of minutely archives to keep')
3925        subparser.add_argument('-H', '--keep-hourly', dest='hourly', type=int, default=0,
3926                               help='number of hourly archives to keep')
3927        subparser.add_argument('-d', '--keep-daily', dest='daily', type=int, default=0,
3928                               help='number of daily archives to keep')
3929        subparser.add_argument('-w', '--keep-weekly', dest='weekly', type=int, default=0,
3930                               help='number of weekly archives to keep')
3931        subparser.add_argument('-m', '--keep-monthly', dest='monthly', type=int, default=0,
3932                               help='number of monthly archives to keep')
3933        subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0,
3934                               help='number of yearly archives to keep')
3935        define_archive_filters_group(subparser, sort_by=False, first_last=False)
3936        subparser.add_argument('--save-space', dest='save_space', action='store_true',
3937                               help='work slower, but using less space')
3938        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
3939                               type=location_validator(archive=False),
3940                               help='repository to prune')
3941
3942        upgrade_epilog = process_epilog("""
3943        Upgrade an existing, local Borg repository.
3944
3945        When you do not need borg upgrade
3946        +++++++++++++++++++++++++++++++++
3947
3948        Not every change requires that you run ``borg upgrade``.
3949
3950        You do **not** need to run it when:
3951
3952        - moving your repository to a different place
3953        - upgrading to another point release (like 1.0.x to 1.0.y),
3954          except when noted otherwise in the changelog
3955        - upgrading from 1.0.x to 1.1.x,
3956          except when noted otherwise in the changelog
3957
3958        Borg 1.x.y upgrades
3959        +++++++++++++++++++
3960
3961        Use ``borg upgrade --tam REPO`` to require manifest authentication
3962        introduced with Borg 1.0.9 to address security issues. This means
3963        that modifying the repository after doing this with a version prior
3964        to 1.0.9 will raise a validation error, so only perform this upgrade
3965        after updating all clients using the repository to 1.0.9 or newer.
3966
3967        This upgrade should be done on each client for safety reasons.
3968
3969        If a repository is accidentally modified with a pre-1.0.9 client after
3970        this upgrade, use ``borg upgrade --tam --force REPO`` to remedy it.
3971
3972        If you routinely do this you might not want to enable this upgrade
3973        (which will leave you exposed to the security issue). You can
3974        reverse the upgrade by issuing ``borg upgrade --disable-tam REPO``.
3975
3976        See
3977        https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability
3978        for details.
3979
3980        Attic and Borg 0.xx to Borg 1.x
3981        +++++++++++++++++++++++++++++++
3982
3983        This currently supports converting an Attic repository to Borg and also
3984        helps with converting Borg 0.xx to 1.0.
3985
3986        Currently, only LOCAL repositories can be upgraded (issue #465).
3987
3988        Please note that ``borg create`` (since 1.0.0) uses bigger chunks by
3989        default than old borg or attic did, so the new chunks won't deduplicate
3990        with the old chunks in the upgraded repository.
3991        See ``--chunker-params`` option of ``borg create`` and ``borg recreate``.
3992
3993        ``borg upgrade`` will change the magic strings in the repository's
3994        segments to match the new Borg magic strings. The keyfiles found in
3995        $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and
3996        copied to $BORG_KEYS_DIR or ~/.config/borg/keys.
3997
3998        The cache files are converted, from $ATTIC_CACHE_DIR or
3999        ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the
4000        cache layout between Borg and Attic changed, so it is possible
4001        the first backup after the conversion takes longer than expected
4002        due to the cache resync.
4003
4004        Upgrade should be able to resume if interrupted, although it
4005        will still iterate over all segments. If you want to start
4006        from scratch, use `borg delete` over the copied repository to
4007        make sure the cache files are also removed::
4008
4009            borg delete borg
4010
4011        Unless ``--inplace`` is specified, the upgrade process first creates a backup
4012        copy of the repository, in REPOSITORY.before-upgrade-DATETIME, using hardlinks.
4013        This requires that the repository and its parent directory reside on same
4014        filesystem so the hardlink copy can work.
4015        This takes longer than in place upgrades, but is much safer and gives
4016        progress information (as opposed to ``cp -al``). Once you are satisfied
4017        with the conversion, you can safely destroy the backup copy.
4018
4019        WARNING: Running the upgrade in place will make the current
4020        copy unusable with older version, with no way of going back
4021        to previous versions. This can PERMANENTLY DAMAGE YOUR
4022        REPOSITORY!  Attic CAN NOT READ BORG REPOSITORIES, as the
4023        magic strings have changed. You have been warned.""")
4024        subparser = subparsers.add_parser('upgrade', parents=[common_parser], add_help=False,
4025                                          description=self.do_upgrade.__doc__,
4026                                          epilog=upgrade_epilog,
4027                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4028                                          help='upgrade repository format')
4029        subparser.set_defaults(func=self.do_upgrade)
4030        subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
4031                               help='do not change repository')
4032        subparser.add_argument('--inplace', dest='inplace', action='store_true',
4033                               help='rewrite repository in place, with no chance of going back '
4034                                    'to older versions of the repository.')
4035        subparser.add_argument('--force', dest='force', action='store_true',
4036                               help='Force upgrade')
4037        subparser.add_argument('--tam', dest='tam', action='store_true',
4038                               help='Enable manifest authentication (in key and cache) (Borg 1.0.9 and later).')
4039        subparser.add_argument('--disable-tam', dest='disable_tam', action='store_true',
4040                               help='Disable manifest authentication (in key and cache).')
4041        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
4042                               type=location_validator(archive=False),
4043                               help='path to the repository to be upgraded')
4044
4045        recreate_epilog = process_epilog("""
4046        Recreate the contents of existing archives.
4047
4048        recreate is a potentially dangerous function and might lead to data loss
4049        (if used wrongly). BE VERY CAREFUL!
4050
4051        ``--exclude``, ``--exclude-from``, ``--exclude-if-present``, ``--keep-exclude-tags``
4052        and PATH have the exact same semantics as in "borg create", but they only check
4053        for files in the archives and not in the local file system. If PATHs are specified,
4054        the resulting archives will only contain files from these PATHs.
4055
4056        Note that all paths in an archive are relative, therefore absolute patterns/paths
4057        will *not* match (``--exclude``, ``--exclude-from``, PATHs).
4058
4059        ``--recompress`` allows to change the compression of existing data in archives.
4060        Due to how Borg stores compressed size information this might display
4061        incorrect information for archives that were not recreated at the same time.
4062        There is no risk of data loss by this.
4063
4064        ``--chunker-params`` will re-chunk all files in the archive, this can be
4065        used to have upgraded Borg 0.xx or Attic archives deduplicate with
4066        Borg 1.x archives.
4067
4068        **USE WITH CAUTION.**
4069        Depending on the PATHs and patterns given, recreate can be used to permanently
4070        delete files from archives.
4071        When in doubt, use ``--dry-run --verbose --list`` to see how patterns/PATHS are
4072        interpreted. See :ref:`list_item_flags` in ``borg create`` for details.
4073
4074        The archive being recreated is only removed after the operation completes. The
4075        archive that is built during the operation exists at the same time at
4076        "<ARCHIVE>.recreate". The new archive will have a different archive ID.
4077
4078        With ``--target`` the original archive is not replaced, instead a new archive is created.
4079
4080        When rechunking space usage can be substantial, expect at least the entire
4081        deduplicated size of the archives using the previous chunker params.
4082        When recompressing expect approx. (throughput / checkpoint-interval) in space usage,
4083        assuming all chunks are recompressed.
4084
4085        If you recently ran borg check --repair and it had to fix lost chunks with all-zero
4086        replacement chunks, please first run another backup for the same data and re-run
4087        borg check --repair afterwards to heal any archives that had lost chunks which are
4088        still generated from the input data.
4089
4090        Important: running borg recreate to re-chunk will remove the chunks_healthy
4091        metadata of all items with replacement chunks, so healing will not be possible
4092        any more after re-chunking (it is also unlikely it would ever work: due to the
4093        change of chunking parameters, the missing chunk likely will never be seen again
4094        even if you still have the data that produced it).
4095        """)
4096        subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False,
4097                                          description=self.do_recreate.__doc__,
4098                                          epilog=recreate_epilog,
4099                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4100                                          help=self.do_recreate.__doc__)
4101        subparser.set_defaults(func=self.do_recreate)
4102        subparser.add_argument('--list', dest='output_list', action='store_true',
4103                               help='output verbose list of items (files, dirs, ...)')
4104        subparser.add_argument('--filter', metavar='STATUSCHARS', dest='output_filter',
4105                               help='only display items with the given status characters (listed in borg create --help)')
4106        subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
4107                               help='do not change anything')
4108        subparser.add_argument('-s', '--stats', dest='stats', action='store_true',
4109                               help='print statistics at end')
4110
4111        define_exclusion_group(subparser, tag_files=True)
4112
4113        archive_group = subparser.add_argument_group('Archive options')
4114        archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None,
4115                                   type=archivename_validator(),
4116                                   help='create a new archive with the name ARCHIVE, do not replace existing archive '
4117                                        '(only applies for a single archive)')
4118        archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
4119                                   type=int, default=1800, metavar='SECONDS',
4120                                   help='write checkpoint every SECONDS seconds (Default: 1800)')
4121        archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', type=CommentSpec, default=None,
4122                                   help='add a comment text to the archive')
4123        archive_group.add_argument('--timestamp', metavar='TIMESTAMP', dest='timestamp',
4124                                   type=timestamp, default=None,
4125                                   help='manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). '
4126                                        'alternatively, give a reference file/directory.')
4127        archive_group.add_argument('-C', '--compression', metavar='COMPRESSION', dest='compression',
4128                                   type=CompressionSpec, default=CompressionSpec('lz4'),
4129                                   help='select compression algorithm, see the output of the '
4130                                        '"borg help compression" command for details.')
4131        archive_group.add_argument('--recompress', metavar='MODE', dest='recompress', nargs='?',
4132                                   default='never', const='if-different', choices=('never', 'if-different', 'always'),
4133                                   help='recompress data chunks according to ``--compression``. '
4134                                        'MODE `if-different`: '
4135                                        'recompress if current compression is with a different compression algorithm '
4136                                        '(the level is not considered). '
4137                                        'MODE `always`: '
4138                                        'recompress even if current compression is with the same compression algorithm '
4139                                        '(use this to change the compression level). '
4140                                        'MODE `never` (default): '
4141                                        'do not recompress.')
4142        archive_group.add_argument('--chunker-params', metavar='PARAMS', dest='chunker_params',
4143                                   type=ChunkerParams, default=CHUNKER_PARAMS,
4144                                   help='specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, '
4145                                        'HASH_MASK_BITS, HASH_WINDOW_SIZE) or `default` to use the current defaults. '
4146                                        'default: %d,%d,%d,%d' % CHUNKER_PARAMS)
4147
4148        subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
4149                               type=location_validator(),
4150                               help='repository or archive to recreate')
4151        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
4152                               help='paths to recreate; patterns are supported')
4153
4154        with_lock_epilog = process_epilog("""
4155        This command runs a user-specified command while the repository lock is held.
4156
4157        It will first try to acquire the lock (make sure that no other operation is
4158        running in the repo), then execute the given command as a subprocess and wait
4159        for its termination, release the lock and return the user command's return
4160        code as borg's return code.
4161
4162        .. note::
4163
4164            If you copy a repository with the lock held, the lock will be present in
4165            the copy. Thus, before using borg on the copy from a different host,
4166            you need to use "borg break-lock" on the copied repository, because
4167            Borg is cautious and does not automatically remove stale locks made by a different host.
4168        """)
4169        subparser = subparsers.add_parser('with-lock', parents=[common_parser], add_help=False,
4170                                          description=self.do_with_lock.__doc__,
4171                                          epilog=with_lock_epilog,
4172                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4173                                          help='run user command with lock held')
4174        subparser.set_defaults(func=self.do_with_lock)
4175        subparser.add_argument('location', metavar='REPOSITORY',
4176                               type=location_validator(archive=False),
4177                               help='repository to lock')
4178        subparser.add_argument('command', metavar='COMMAND',
4179                               help='command to run')
4180        subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER,
4181                               help='command arguments')
4182
4183        config_epilog = process_epilog("""
4184        This command gets and sets options in a local repository or cache config file.
4185        For security reasons, this command only works on local repositories.
4186
4187        To delete a config value entirely, use ``--delete``. To list the values
4188        of the configuration file or the default values, use ``--list``.  To get and existing
4189        key, pass only the key name. To set a key, pass both the key name and
4190        the new value. Keys can be specified in the format "section.name" or
4191        simply "name"; the section will default to "repository" and "cache" for
4192        the repo and cache configs, respectively.
4193
4194
4195        By default, borg config manipulates the repository config file. Using ``--cache``
4196        edits the repository cache's config file instead.
4197        """)
4198        subparser = subparsers.add_parser('config', parents=[common_parser], add_help=False,
4199                                          description=self.do_config.__doc__,
4200                                          epilog=config_epilog,
4201                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4202                                          help='get and set configuration values')
4203        subparser.set_defaults(func=self.do_config)
4204        subparser.add_argument('-c', '--cache', dest='cache', action='store_true',
4205                               help='get and set values from the repo cache')
4206
4207        group = subparser.add_mutually_exclusive_group()
4208        group.add_argument('-d', '--delete', dest='delete', action='store_true',
4209                               help='delete the key from the config file')
4210        group.add_argument('-l', '--list', dest='list', action='store_true',
4211                               help='list the configuration of the repo')
4212
4213        subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='',
4214                               type=location_validator(archive=False, proto='file'),
4215                               help='repository to configure')
4216        subparser.add_argument('name', metavar='NAME', nargs='?',
4217                               help='name of config key')
4218        subparser.add_argument('value', metavar='VALUE', nargs='?',
4219                               help='new value for key')
4220
4221        subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False,
4222                                          description='Extra help')
4223        subparser.add_argument('--epilog-only', dest='epilog_only', action='store_true')
4224        subparser.add_argument('--usage-only', dest='usage_only', action='store_true')
4225        subparser.set_defaults(func=functools.partial(self.do_help, parser, subparsers.choices))
4226        subparser.add_argument('topic', metavar='TOPIC', type=str, nargs='?',
4227                               help='additional help on TOPIC')
4228
4229        debug_epilog = process_epilog("""
4230        These commands are not intended for normal use and potentially very
4231        dangerous if used incorrectly.
4232
4233        They exist to improve debugging capabilities without direct system access, e.g.
4234        in case you ever run into some severe malfunction. Use them only if you know
4235        what you are doing or if a trusted developer tells you what to do.""")
4236
4237        subparser = subparsers.add_parser('debug', parents=[mid_common_parser], add_help=False,
4238                                          description='debugging command (not intended for normal use)',
4239                                          epilog=debug_epilog,
4240                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4241                                          help='debugging command (not intended for normal use)')
4242
4243        debug_parsers = subparser.add_subparsers(title='required arguments', metavar='<command>')
4244        subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser))
4245
4246        debug_info_epilog = process_epilog("""
4247        This command displays some system information that might be useful for bug
4248        reports and debugging problems. If a traceback happens, this information is
4249        already appended at the end of the traceback.
4250        """)
4251        subparser = debug_parsers.add_parser('info', parents=[common_parser], add_help=False,
4252                                          description=self.do_debug_info.__doc__,
4253                                          epilog=debug_info_epilog,
4254                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4255                                          help='show system infos for debugging / bug reports (debug)')
4256        subparser.set_defaults(func=self.do_debug_info)
4257
4258        debug_dump_archive_items_epilog = process_epilog("""
4259        This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files.
4260        """)
4261        subparser = debug_parsers.add_parser('dump-archive-items', parents=[common_parser], add_help=False,
4262                                          description=self.do_debug_dump_archive_items.__doc__,
4263                                          epilog=debug_dump_archive_items_epilog,
4264                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4265                                          help='dump archive items (metadata) (debug)')
4266        subparser.set_defaults(func=self.do_debug_dump_archive_items)
4267        subparser.add_argument('location', metavar='ARCHIVE',
4268                               type=location_validator(archive=True),
4269                               help='archive to dump')
4270
4271        debug_dump_archive_epilog = process_epilog("""
4272        This command dumps all metadata of an archive in a decoded form to a file.
4273        """)
4274        subparser = debug_parsers.add_parser('dump-archive', parents=[common_parser], add_help=False,
4275                                          description=self.do_debug_dump_archive.__doc__,
4276                                          epilog=debug_dump_archive_epilog,
4277                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4278                                          help='dump decoded archive metadata (debug)')
4279        subparser.set_defaults(func=self.do_debug_dump_archive)
4280        subparser.add_argument('location', metavar='ARCHIVE',
4281                               type=location_validator(archive=True),
4282                               help='archive to dump')
4283        subparser.add_argument('path', metavar='PATH', type=str,
4284                               help='file to dump data into')
4285
4286        debug_dump_manifest_epilog = process_epilog("""
4287        This command dumps manifest metadata of a repository in a decoded form to a file.
4288        """)
4289        subparser = debug_parsers.add_parser('dump-manifest', parents=[common_parser], add_help=False,
4290                                          description=self.do_debug_dump_manifest.__doc__,
4291                                          epilog=debug_dump_manifest_epilog,
4292                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4293                                          help='dump decoded repository metadata (debug)')
4294        subparser.set_defaults(func=self.do_debug_dump_manifest)
4295        subparser.add_argument('location', metavar='REPOSITORY',
4296                               type=location_validator(archive=False),
4297                               help='repository to dump')
4298        subparser.add_argument('path', metavar='PATH', type=str,
4299                               help='file to dump data into')
4300
4301        debug_dump_repo_objs_epilog = process_epilog("""
4302        This command dumps raw (but decrypted and decompressed) repo objects to files.
4303        """)
4304        subparser = debug_parsers.add_parser('dump-repo-objs', parents=[common_parser], add_help=False,
4305                                          description=self.do_debug_dump_repo_objs.__doc__,
4306                                          epilog=debug_dump_repo_objs_epilog,
4307                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4308                                          help='dump repo objects (debug)')
4309        subparser.set_defaults(func=self.do_debug_dump_repo_objs)
4310        subparser.add_argument('location', metavar='REPOSITORY',
4311                               type=location_validator(archive=False),
4312                               help='repository to dump')
4313        subparser.add_argument('--ghost', dest='ghost', action='store_true',
4314                               help='dump all segment file contents, including deleted/uncommitted objects and commits.')
4315
4316        debug_search_repo_objs_epilog = process_epilog("""
4317        This command searches raw (but decrypted and decompressed) repo objects for a specific bytes sequence.
4318        """)
4319        subparser = debug_parsers.add_parser('search-repo-objs', parents=[common_parser], add_help=False,
4320                                          description=self.do_debug_search_repo_objs.__doc__,
4321                                          epilog=debug_search_repo_objs_epilog,
4322                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4323                                          help='search repo objects (debug)')
4324        subparser.set_defaults(func=self.do_debug_search_repo_objs)
4325        subparser.add_argument('location', metavar='REPOSITORY',
4326                               type=location_validator(archive=False),
4327                               help='repository to search')
4328        subparser.add_argument('wanted', metavar='WANTED', type=str,
4329                               help='term to search the repo for, either 0x1234abcd hex term or a string')
4330
4331        debug_get_obj_epilog = process_epilog("""
4332        This command gets an object from the repository.
4333        """)
4334        subparser = debug_parsers.add_parser('get-obj', parents=[common_parser], add_help=False,
4335                                          description=self.do_debug_get_obj.__doc__,
4336                                          epilog=debug_get_obj_epilog,
4337                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4338                                          help='get object from repository (debug)')
4339        subparser.set_defaults(func=self.do_debug_get_obj)
4340        subparser.add_argument('location', metavar='REPOSITORY',
4341                               type=location_validator(archive=False),
4342                               help='repository to use')
4343        subparser.add_argument('id', metavar='ID', type=str,
4344                               help='hex object ID to get from the repo')
4345        subparser.add_argument('path', metavar='PATH', type=str,
4346                               help='file to write object data into')
4347
4348        debug_put_obj_epilog = process_epilog("""
4349        This command puts objects into the repository.
4350        """)
4351        subparser = debug_parsers.add_parser('put-obj', parents=[common_parser], add_help=False,
4352                                          description=self.do_debug_put_obj.__doc__,
4353                                          epilog=debug_put_obj_epilog,
4354                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4355                                          help='put object to repository (debug)')
4356        subparser.set_defaults(func=self.do_debug_put_obj)
4357        subparser.add_argument('location', metavar='REPOSITORY',
4358                               type=location_validator(archive=False),
4359                               help='repository to use')
4360        subparser.add_argument('paths', metavar='PATH', nargs='+', type=str,
4361                               help='file(s) to read and create object(s) from')
4362
4363        debug_delete_obj_epilog = process_epilog("""
4364        This command deletes objects from the repository.
4365        """)
4366        subparser = debug_parsers.add_parser('delete-obj', parents=[common_parser], add_help=False,
4367                                          description=self.do_debug_delete_obj.__doc__,
4368                                          epilog=debug_delete_obj_epilog,
4369                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4370                                          help='delete object from repository (debug)')
4371        subparser.set_defaults(func=self.do_debug_delete_obj)
4372        subparser.add_argument('location', metavar='REPOSITORY',
4373                               type=location_validator(archive=False),
4374                               help='repository to use')
4375        subparser.add_argument('ids', metavar='IDs', nargs='+', type=str,
4376                               help='hex object ID(s) to delete from the repo')
4377
4378        debug_refcount_obj_epilog = process_epilog("""
4379        This command displays the reference count for objects from the repository.
4380        """)
4381        subparser = debug_parsers.add_parser('refcount-obj', parents=[common_parser], add_help=False,
4382                                          description=self.do_debug_refcount_obj.__doc__,
4383                                          epilog=debug_refcount_obj_epilog,
4384                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4385                                          help='show refcount for object from repository (debug)')
4386        subparser.set_defaults(func=self.do_debug_refcount_obj)
4387        subparser.add_argument('location', metavar='REPOSITORY',
4388                               type=location_validator(archive=False),
4389                               help='repository to use')
4390        subparser.add_argument('ids', metavar='IDs', nargs='+', type=str,
4391                               help='hex object ID(s) to show refcounts for')
4392
4393        debug_dump_hints_epilog = process_epilog("""
4394        This command dumps the repository hints data.
4395        """)
4396        subparser = debug_parsers.add_parser('dump-hints', parents=[common_parser], add_help=False,
4397                                          description=self.do_debug_dump_hints.__doc__,
4398                                          epilog=debug_dump_hints_epilog,
4399                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4400                                          help='dump repo hints (debug)')
4401        subparser.set_defaults(func=self.do_debug_dump_hints)
4402        subparser.add_argument('location', metavar='REPOSITORY',
4403                               type=location_validator(archive=False),
4404                               help='repository to dump')
4405        subparser.add_argument('path', metavar='PATH', type=str,
4406                               help='file to dump data into')
4407
4408        debug_convert_profile_epilog = process_epilog("""
4409        Convert a Borg profile to a Python cProfile compatible profile.
4410        """)
4411        subparser = debug_parsers.add_parser('convert-profile', parents=[common_parser], add_help=False,
4412                                          description=self.do_debug_convert_profile.__doc__,
4413                                          epilog=debug_convert_profile_epilog,
4414                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4415                                          help='convert Borg profile to Python profile (debug)')
4416        subparser.set_defaults(func=self.do_debug_convert_profile)
4417        subparser.add_argument('input', metavar='INPUT', type=argparse.FileType('rb'),
4418                               help='Borg profile')
4419        subparser.add_argument('output', metavar='OUTPUT', type=argparse.FileType('wb'),
4420                               help='Output file')
4421
4422        benchmark_epilog = process_epilog("These commands do various benchmarks.")
4423
4424        subparser = subparsers.add_parser('benchmark', parents=[mid_common_parser], add_help=False,
4425                                          description='benchmark command',
4426                                          epilog=benchmark_epilog,
4427                                          formatter_class=argparse.RawDescriptionHelpFormatter,
4428                                          help='benchmark command')
4429
4430        benchmark_parsers = subparser.add_subparsers(title='required arguments', metavar='<command>')
4431        subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser))
4432
4433        bench_crud_epilog = process_epilog("""
4434        This command benchmarks borg CRUD (create, read, update, delete) operations.
4435
4436        It creates input data below the given PATH and backups this data into the given REPO.
4437        The REPO must already exist (it could be a fresh empty repo or an existing repo, the
4438        command will create / read / update / delete some archives named borg-benchmark-crud\\* there.
4439
4440        Make sure you have free space there, you'll need about 1GB each (+ overhead).
4441
4442        If your repository is encrypted and borg needs a passphrase to unlock the key, use::
4443
4444            BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH
4445
4446        Measurements are done with different input file sizes and counts.
4447        The file contents are very artificial (either all zero or all random),
4448        thus the measurement results do not necessarily reflect performance with real data.
4449        Also, due to the kind of content used, no compression is used in these benchmarks.
4450
4451        C- == borg create (1st archive creation, no compression, do not use files cache)
4452              C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher.
4453              C-R- == random files. no dedup, measuring throughput through all processing stages.
4454
4455        R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk)
4456              R-Z- == all zero files. Measuring heavily duplicated files.
4457              R-R- == random files. No duplication here, measuring throughput through all processing
4458              stages, except writing to disk.
4459
4460        U- == borg create (2nd archive creation of unchanged input files, measure files cache speed)
4461              The throughput value is kind of virtual here, it does not actually read the file.
4462              U-Z- == needs to check the 2 all-zero chunks' existence in the repo.
4463              U-R- == needs to check existence of a lot of different chunks in the repo.
4464
4465        D- == borg delete archive (delete last remaining archive, measure deletion + compaction)
4466              D-Z- == few chunks to delete / few segments to compact/remove.
4467              D-R- == many chunks to delete / many segments to compact/remove.
4468
4469        Please note that there might be quite some variance in these measurements.
4470        Try multiple measurements and having a otherwise idle machine (and network, if you use it).
4471        """)
4472        subparser = benchmark_parsers.add_parser('crud', parents=[common_parser], add_help=False,
4473                                                 description=self.do_benchmark_crud.__doc__,
4474                                                 epilog=bench_crud_epilog,
4475                                                 formatter_class=argparse.RawDescriptionHelpFormatter,
4476                                                 help='benchmarks borg CRUD (create, extract, update, delete).')
4477        subparser.set_defaults(func=self.do_benchmark_crud)
4478
4479        subparser.add_argument('location', metavar='REPOSITORY',
4480                               type=location_validator(archive=False),
4481                               help='repository to use for benchmark (must exist)')
4482
4483        subparser.add_argument('path', metavar='PATH', help='path were to create benchmark input data')
4484
4485        return parser
4486
4487    def get_args(self, argv, cmd):
4488        """usually, just returns argv, except if we deal with a ssh forced command for borg serve."""
4489        result = self.parse_args(argv[1:])
4490        if cmd is not None and result.func == self.do_serve:
4491            # borg serve case:
4492            # - "result" is how borg got invoked (e.g. via forced command from authorized_keys),
4493            # - "client_result" (from "cmd") refers to the command the client wanted to execute,
4494            #   which might be different in the case of a forced command or same otherwise.
4495            client_argv = shlex.split(cmd)
4496            # Drop environment variables (do *not* interpret them) before trying to parse
4497            # the borg command line.
4498            client_argv = list(itertools.dropwhile(lambda arg: '=' in arg, client_argv))
4499            client_result = self.parse_args(client_argv[1:])
4500            if client_result.func == result.func:
4501                # make sure we only process like normal if the client is executing
4502                # the same command as specified in the forced command, otherwise
4503                # just skip this block and return the forced command (== result).
4504                # client is allowed to specify the allowlisted options,
4505                # everything else comes from the forced "borg serve" command (or the defaults).
4506                # stuff from denylist must never be used from the client.
4507                denylist = {
4508                    'restrict_to_paths',
4509                    'restrict_to_repositories',
4510                    'append_only',
4511                    'storage_quota',
4512                }
4513                allowlist = {
4514                    'debug_topics',
4515                    'lock_wait',
4516                    'log_level',
4517                    'umask',
4518                }
4519                not_present = object()
4520                for attr_name in allowlist:
4521                    assert attr_name not in denylist, 'allowlist has denylisted attribute name %s' % attr_name
4522                    value = getattr(client_result, attr_name, not_present)
4523                    if value is not not_present:
4524                        # note: it is not possible to specify a allowlisted option via a forced command,
4525                        # it always gets overridden by the value specified (or defaulted to) by the client command.
4526                        setattr(result, attr_name, value)
4527
4528        return result
4529
4530    def parse_args(self, args=None):
4531        # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
4532        if args:
4533            args = self.preprocess_args(args)
4534        parser = self.build_parser()
4535        args = parser.parse_args(args or ['-h'])
4536        parser.common_options.resolve(args)
4537        func = get_func(args)
4538        if func == self.do_create and not args.paths:
4539            # need at least 1 path but args.paths may also be populated from patterns
4540            parser.error('Need at least one PATH argument.')
4541        if not getattr(args, 'lock', True):  # Option --bypass-lock sets args.lock = False
4542            bypass_allowed = {self.do_check, self.do_config, self.do_diff,
4543                              self.do_export_tar, self.do_extract, self.do_info,
4544                              self.do_list, self.do_mount, self.do_umount}
4545            if func not in bypass_allowed:
4546                raise Error('Not allowed to bypass locking mechanism for chosen command')
4547        if getattr(args, 'timestamp', None):
4548            args.location = args.location.with_timestamp(args.timestamp)
4549        return args
4550
4551    def prerun_checks(self, logger, is_serve):
4552        if not is_serve:
4553            # this is the borg *client*, we need to check the python:
4554            check_python()
4555        check_extension_modules()
4556        selftest(logger)
4557
4558    def _setup_implied_logging(self, args):
4559        """ turn on INFO level logging for args that imply that they will produce output """
4560        # map of option name to name of logger for that option
4561        option_logger = {
4562            'output_list': 'borg.output.list',
4563            'show_version': 'borg.output.show-version',
4564            'show_rc': 'borg.output.show-rc',
4565            'stats': 'borg.output.stats',
4566            'progress': 'borg.output.progress',
4567        }
4568        for option, logger_name in option_logger.items():
4569            option_set = args.get(option, False)
4570            logging.getLogger(logger_name).setLevel('INFO' if option_set else 'WARN')
4571
4572    def _setup_topic_debugging(self, args):
4573        """Turn on DEBUG level logging for specified --debug-topics."""
4574        for topic in args.debug_topics:
4575            if '.' not in topic:
4576                topic = 'borg.debug.' + topic
4577            logger.debug('Enabling debug topic %s', topic)
4578            logging.getLogger(topic).setLevel('DEBUG')
4579
4580    def run(self, args):
4581        os.umask(args.umask)  # early, before opening files
4582        self.lock_wait = args.lock_wait
4583        func = get_func(args)
4584        # do not use loggers before this!
4585        is_serve = func == self.do_serve
4586        setup_logging(level=args.log_level, is_serve=is_serve, json=args.log_json)
4587        self.log_json = args.log_json
4588        args.progress |= is_serve
4589        self._setup_implied_logging(vars(args))
4590        self._setup_topic_debugging(args)
4591        if getattr(args, 'stats', False) and getattr(args, 'dry_run', False):
4592            # the data needed for --stats is not computed when using --dry-run, so we can't do it.
4593            # for ease of scripting, we just ignore --stats when given with --dry-run.
4594            logger.warning("Ignoring --stats. It is not supported when using --dry-run.")
4595            args.stats = False
4596        if args.show_version:
4597            logging.getLogger('borg.output.show-version').info('borgbackup version %s' % __version__)
4598        self.prerun_checks(logger, is_serve)
4599        if not is_supported_msgpack():
4600            logger.error("You do not have a supported msgpack[-python] version installed. Terminating.")
4601            logger.error("This should never happen as specific, supported versions are required by our setup.py.")
4602            logger.error("Do not contact borgbackup support about this.")
4603            return set_ec(EXIT_ERROR)
4604        if is_slow_msgpack():
4605            logger.warning(PURE_PYTHON_MSGPACK_WARNING)
4606        if args.debug_profile:
4607            # Import only when needed - avoids a further increase in startup time
4608            import cProfile
4609            import marshal
4610            logger.debug('Writing execution profile to %s', args.debug_profile)
4611            # Open the file early, before running the main program, to avoid
4612            # a very late crash in case the specified path is invalid.
4613            with open(args.debug_profile, 'wb') as fd:
4614                profiler = cProfile.Profile()
4615                variables = dict(locals())
4616                profiler.enable()
4617                try:
4618                    return set_ec(func(args))
4619                finally:
4620                    profiler.disable()
4621                    profiler.snapshot_stats()
4622                    if args.debug_profile.endswith('.pyprof'):
4623                        marshal.dump(profiler.stats, fd)
4624                    else:
4625                        # We use msgpack here instead of the marshal module used by cProfile itself,
4626                        # because the latter is insecure. Since these files may be shared over the
4627                        # internet we don't want a format that is impossible to interpret outside
4628                        # an insecure implementation.
4629                        # See scripts/msgpack2marshal.py for a small script that turns a msgpack file
4630                        # into a marshal file that can be read by e.g. pyprof2calltree.
4631                        # For local use it's unnecessary hassle, though, that's why .pyprof makes
4632                        # it compatible (see above).
4633                        msgpack.pack(profiler.stats, fd, use_bin_type=True)
4634        else:
4635            return set_ec(func(args))
4636
4637
4638def sig_info_handler(sig_no, stack):  # pragma: no cover
4639    """search the stack for infos about the currently processed file and print them"""
4640    with signal_handler(sig_no, signal.SIG_IGN):
4641        for frame in inspect.getouterframes(stack):
4642            func, loc = frame[3], frame[0].f_locals
4643            if func in ('process_file', '_process', ):  # create op
4644                path = loc['path']
4645                try:
4646                    pos = loc['fd'].tell()
4647                    total = loc['st'].st_size
4648                except Exception:
4649                    pos, total = 0, 0
4650                logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
4651                break
4652            if func in ('extract_item', ):  # extract op
4653                path = loc['item'].path
4654                try:
4655                    pos = loc['fd'].tell()
4656                except Exception:
4657                    pos = 0
4658                logger.info("{0} {1}/???".format(path, format_file_size(pos)))
4659                break
4660
4661
4662def sig_trace_handler(sig_no, stack):  # pragma: no cover
4663    print('\nReceived SIGUSR2 at %s, dumping trace...' % datetime.now().replace(microsecond=0), file=sys.stderr)
4664    faulthandler.dump_traceback()
4665
4666
4667def main():  # pragma: no cover
4668    # Make sure stdout and stderr have errors='replace' to avoid unicode
4669    # issues when print()-ing unicode file names
4670    sys.stdout = ErrorIgnoringTextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)
4671    sys.stderr = ErrorIgnoringTextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True)
4672
4673    # If we receive SIGINT (ctrl-c), SIGTERM (kill) or SIGHUP (kill -HUP),
4674    # catch them and raise a proper exception that can be handled for an
4675    # orderly exit.
4676    # SIGHUP is important especially for systemd systems, where logind
4677    # sends it when a session exits, in addition to any traditional use.
4678    # Output some info if we receive SIGUSR1 or SIGINFO (ctrl-t).
4679
4680    # Register fault handler for SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL.
4681    faulthandler.enable()
4682    with signal_handler('SIGINT', raising_signal_handler(KeyboardInterrupt)), \
4683         signal_handler('SIGHUP', raising_signal_handler(SigHup)), \
4684         signal_handler('SIGTERM', raising_signal_handler(SigTerm)), \
4685         signal_handler('SIGUSR1', sig_info_handler), \
4686         signal_handler('SIGUSR2', sig_trace_handler), \
4687         signal_handler('SIGINFO', sig_info_handler):
4688        archiver = Archiver()
4689        msg = msgid = tb = None
4690        tb_log_level = logging.ERROR
4691        try:
4692            args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND'))
4693        except Error as e:
4694            msg = e.get_message()
4695            tb_log_level = logging.ERROR if e.traceback else logging.DEBUG
4696            tb = '%s\n%s' % (traceback.format_exc(), sysinfo())
4697            # we might not have logging setup yet, so get out quickly
4698            print(msg, file=sys.stderr)
4699            if tb_log_level == logging.ERROR:
4700                print(tb, file=sys.stderr)
4701            sys.exit(e.exit_code)
4702        try:
4703            exit_code = archiver.run(args)
4704        except Error as e:
4705            msg = e.get_message()
4706            msgid = type(e).__qualname__
4707            tb_log_level = logging.ERROR if e.traceback else logging.DEBUG
4708            tb = "%s\n%s" % (traceback.format_exc(), sysinfo())
4709            exit_code = e.exit_code
4710        except RemoteRepository.RPCError as e:
4711            important = e.exception_class not in ('LockTimeout', ) and e.traceback
4712            msgid = e.exception_class
4713            tb_log_level = logging.ERROR if important else logging.DEBUG
4714            if important:
4715                msg = e.exception_full
4716            else:
4717                msg = e.get_message()
4718            tb = '\n'.join('Borg server: ' + l for l in e.sysinfo.splitlines())
4719            tb += "\n" + sysinfo()
4720            exit_code = EXIT_ERROR
4721        except Exception:
4722            msg = 'Local Exception'
4723            msgid = 'Exception'
4724            tb_log_level = logging.ERROR
4725            tb = '%s\n%s' % (traceback.format_exc(), sysinfo())
4726            exit_code = EXIT_ERROR
4727        except KeyboardInterrupt:
4728            msg = 'Keyboard interrupt'
4729            tb_log_level = logging.DEBUG
4730            tb = '%s\n%s' % (traceback.format_exc(), sysinfo())
4731            exit_code = EXIT_SIGNAL_BASE + 2
4732        except SigTerm:
4733            msg = 'Received SIGTERM'
4734            msgid = 'Signal.SIGTERM'
4735            tb_log_level = logging.DEBUG
4736            tb = '%s\n%s' % (traceback.format_exc(), sysinfo())
4737            exit_code = EXIT_SIGNAL_BASE + 15
4738        except SigHup:
4739            msg = 'Received SIGHUP.'
4740            msgid = 'Signal.SIGHUP'
4741            exit_code = EXIT_SIGNAL_BASE + 1
4742        if msg:
4743            logger.error(msg, msgid=msgid)
4744        if tb:
4745            logger.log(tb_log_level, tb)
4746        if args.show_rc:
4747            rc_logger = logging.getLogger('borg.output.show-rc')
4748            exit_msg = 'terminating with %s status, rc %d'
4749            if exit_code == EXIT_SUCCESS:
4750                rc_logger.info(exit_msg % ('success', exit_code))
4751            elif exit_code == EXIT_WARNING:
4752                rc_logger.warning(exit_msg % ('warning', exit_code))
4753            elif exit_code == EXIT_ERROR:
4754                rc_logger.error(exit_msg % ('error', exit_code))
4755            elif exit_code >= EXIT_SIGNAL_BASE:
4756                rc_logger.error(exit_msg % ('signal', exit_code))
4757            else:
4758                rc_logger.error(exit_msg % ('abnormal', exit_code or 666))
4759        sys.exit(exit_code)
4760
4761
4762if __name__ == '__main__':
4763    main()
4764