1# borg cli interface / toplevel archiver code 2 3import sys 4import traceback 5 6try: 7 import argparse 8 import collections 9 import configparser 10 import faulthandler 11 import functools 12 import hashlib 13 import inspect 14 import itertools 15 import json 16 import logging 17 import os 18 import re 19 import shlex 20 import shutil 21 import signal 22 import stat 23 import subprocess 24 import tarfile 25 import textwrap 26 import time 27 from binascii import unhexlify, hexlify 28 from contextlib import contextmanager 29 from datetime import datetime, timedelta 30 from itertools import zip_longest 31 32 from .logger import create_logger, setup_logging 33 34 logger = create_logger() 35 36 import borg 37 from . import __version__ 38 from . import helpers 39 from . import shellpattern 40 from .algorithms.checksums import crc32 41 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special 42 from .archive import BackupError, BackupOSError, backup_io, has_link 43 from .cache import Cache, assert_secure, SecurityManager 44 from .constants import * # NOQA 45 from .compress import CompressionSpec 46 from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required, RepoKey, PassphraseKey 47 from .crypto.keymanager import KeyManager 48 from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, EXIT_SIGNAL_BASE 49 from .helpers import Error, NoManifestError, set_ec 50 from .helpers import positive_int_validator, location_validator, archivename_validator, ChunkerParams, Location 51 from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, HUMAN_SORT_KEYS, FilesCacheMode 52 from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter 53 from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive 54 from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict 55 from .helpers import interval, prune_within, prune_split 56 from .helpers import timestamp 57 from .helpers import get_cache_dir 58 from .helpers import Manifest 59 from .helpers import hardlinkable 60 from .helpers import StableDict 61 from .helpers import check_python, check_extension_modules 62 from .helpers import dir_is_tagged, is_slow_msgpack, is_supported_msgpack, yes, sysinfo 63 from .helpers import log_multi 64 from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm 65 from .helpers import ErrorIgnoringTextIOWrapper 66 from .helpers import ProgressIndicatorPercent 67 from .helpers import basic_json_data, json_print 68 from .helpers import replace_placeholders 69 from .helpers import ChunkIteratorFileWrapper 70 from .helpers import popen_with_error_handling, prepare_subprocess_env 71 from .helpers import dash_open 72 from .helpers import umount 73 from .helpers import msgpack, msgpack_fallback 74 from .helpers import uid2user, gid2group 75 from .nanorst import rst_to_terminal 76 from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern 77 from .patterns import PatternMatcher 78 from .item import Item 79 from .platform import get_flags, get_process_id, SyncFile 80 from .remote import RepositoryServer, RemoteRepository, cache_if_remote 81 from .repository import Repository, LIST_SCAN_LIMIT, TAG_PUT, TAG_DELETE, TAG_COMMIT 82 from .selftest import selftest 83 from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader 84except BaseException: 85 # an unhandled exception in the try-block would cause the borg cli command to exit with rc 1 due to python's 86 # default behavior, see issue #4424. 87 # as borg defines rc 1 as WARNING, this would be a mismatch, because a crash should be an ERROR (rc 2). 88 traceback.print_exc() 89 sys.exit(2) # == EXIT_ERROR 90 91assert EXIT_ERROR == 2, "EXIT_ERROR is not 2, as expected - fix assert AND exception handler right above this line." 92 93STATS_HEADER = " Original size Compressed size Deduplicated size" 94 95PURE_PYTHON_MSGPACK_WARNING = "Using a pure-python msgpack! This will result in lower performance." 96 97 98def argument(args, str_or_bool): 99 """If bool is passed, return it. If str is passed, retrieve named attribute from args.""" 100 if isinstance(str_or_bool, str): 101 return getattr(args, str_or_bool) 102 if isinstance(str_or_bool, (list, tuple)): 103 return any(getattr(args, item) for item in str_or_bool) 104 return str_or_bool 105 106 107def with_repository(fake=False, invert_fake=False, create=False, lock=True, 108 exclusive=False, manifest=True, cache=False, secure=True, 109 compatibility=None): 110 """ 111 Method decorator for subcommand-handling methods: do_XYZ(self, args, repository, …) 112 113 If a parameter (where allowed) is a str the attribute named of args is used instead. 114 :param fake: (str or bool) use None instead of repository, don't do anything else 115 :param create: create repository 116 :param lock: lock repository 117 :param exclusive: (str or bool) lock repository exclusively (for writing) 118 :param manifest: load manifest and key, pass them as keyword arguments 119 :param cache: open cache, pass it as keyword argument (implies manifest) 120 :param secure: do assert_secure after loading manifest 121 :param compatibility: mandatory if not create and (manifest or cache), specifies mandatory feature categories to check 122 """ 123 124 if not create and (manifest or cache): 125 if compatibility is None: 126 raise AssertionError("with_repository decorator used without compatibility argument") 127 if type(compatibility) is not tuple: 128 raise AssertionError("with_repository decorator compatibility argument must be of type tuple") 129 else: 130 if compatibility is not None: 131 raise AssertionError("with_repository called with compatibility argument but would not check" + repr(compatibility)) 132 if create: 133 compatibility = Manifest.NO_OPERATION_CHECK 134 135 # To process the `--bypass-lock` option if specified, we need to 136 # modify `lock` inside `wrapper`. Therefore we cannot use the 137 # `nonlocal` statement to access `lock` as modifications would also 138 # affect the scope outside of `wrapper`. Subsequent calls would 139 # only see the overwritten value of `lock`, not the original one. 140 # The solution is to define a place holder variable `_lock` to 141 # propagate the value into `wrapper`. 142 _lock = lock 143 144 def decorator(method): 145 @functools.wraps(method) 146 def wrapper(self, args, **kwargs): 147 lock = getattr(args, 'lock', _lock) 148 location = args.location # note: 'location' must be always present in args 149 append_only = getattr(args, 'append_only', False) 150 storage_quota = getattr(args, 'storage_quota', None) 151 make_parent_dirs = getattr(args, 'make_parent_dirs', False) 152 if argument(args, fake) ^ invert_fake: 153 return method(self, args, repository=None, **kwargs) 154 elif location.proto == 'ssh': 155 repository = RemoteRepository(location, create=create, exclusive=argument(args, exclusive), 156 lock_wait=self.lock_wait, lock=lock, append_only=append_only, 157 make_parent_dirs=make_parent_dirs, args=args) 158 else: 159 repository = Repository(location.path, create=create, exclusive=argument(args, exclusive), 160 lock_wait=self.lock_wait, lock=lock, append_only=append_only, 161 storage_quota=storage_quota, make_parent_dirs=make_parent_dirs) 162 with repository: 163 if manifest or cache: 164 kwargs['manifest'], kwargs['key'] = Manifest.load(repository, compatibility) 165 if 'compression' in args: 166 kwargs['key'].compressor = args.compression.compressor 167 if secure: 168 assert_secure(repository, kwargs['manifest'], self.lock_wait) 169 if cache: 170 with Cache(repository, kwargs['key'], kwargs['manifest'], 171 do_files=getattr(args, 'cache_files', False), 172 ignore_inode=getattr(args, 'ignore_inode', False), 173 progress=getattr(args, 'progress', False), lock_wait=self.lock_wait, 174 cache_mode=getattr(args, 'files_cache_mode', DEFAULT_FILES_CACHE_MODE)) as cache_: 175 return method(self, args, repository=repository, cache=cache_, **kwargs) 176 else: 177 return method(self, args, repository=repository, **kwargs) 178 return wrapper 179 return decorator 180 181 182def with_archive(method): 183 @functools.wraps(method) 184 def wrapper(self, args, repository, key, manifest, **kwargs): 185 archive = Archive(repository, key, manifest, args.location.archive, 186 numeric_owner=getattr(args, 'numeric_owner', False), 187 nobsdflags=getattr(args, 'nobsdflags', False), 188 noacls=getattr(args, 'noacls', False), 189 noxattrs=getattr(args, 'noxattrs', False), 190 cache=kwargs.get('cache'), 191 consider_part_files=args.consider_part_files, log_json=args.log_json) 192 return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs) 193 return wrapper 194 195 196def parse_storage_quota(storage_quota): 197 parsed = parse_file_size(storage_quota) 198 if parsed < parse_file_size('10M'): 199 raise argparse.ArgumentTypeError('quota is too small (%s). At least 10M are required.' % storage_quota) 200 return parsed 201 202 203def get_func(args): 204 # This works around https://bugs.python.org/issue9351 205 # func is used at the leaf parsers of the argparse parser tree, 206 # fallback_func at next level towards the root, 207 # fallback2_func at the 2nd next level (which is root in our case). 208 for name in 'func', 'fallback_func', 'fallback2_func': 209 func = getattr(args, name, None) 210 if func is not None: 211 return func 212 raise Exception('expected func attributes not found') 213 214 215class Archiver: 216 217 def __init__(self, lock_wait=None, prog=None): 218 self.exit_code = EXIT_SUCCESS 219 self.lock_wait = lock_wait 220 self.prog = prog 221 222 def print_error(self, msg, *args): 223 msg = args and msg % args or msg 224 self.exit_code = EXIT_ERROR 225 logger.error(msg) 226 227 def print_warning(self, msg, *args): 228 msg = args and msg % args or msg 229 self.exit_code = EXIT_WARNING # we do not terminate here, so it is a warning 230 logger.warning(msg) 231 232 def print_file_status(self, status, path): 233 if self.output_list and (self.output_filter is None or status in self.output_filter): 234 if self.log_json: 235 print(json.dumps({ 236 'type': 'file_status', 237 'status': status, 238 'path': remove_surrogates(path), 239 }), file=sys.stderr) 240 else: 241 logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path)) 242 243 @staticmethod 244 def compare_chunk_contents(chunks1, chunks2): 245 """Compare two chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`)""" 246 end = object() 247 alen = ai = 0 248 blen = bi = 0 249 while True: 250 if not alen - ai: 251 a = next(chunks1, end) 252 if a is end: 253 return not blen - bi and next(chunks2, end) is end 254 a = memoryview(a) 255 alen = len(a) 256 ai = 0 257 if not blen - bi: 258 b = next(chunks2, end) 259 if b is end: 260 return not alen - ai and next(chunks1, end) is end 261 b = memoryview(b) 262 blen = len(b) 263 bi = 0 264 slicelen = min(alen - ai, blen - bi) 265 if a[ai:ai + slicelen] != b[bi:bi + slicelen]: 266 return False 267 ai += slicelen 268 bi += slicelen 269 270 @staticmethod 271 def build_matcher(inclexcl_patterns, include_paths): 272 matcher = PatternMatcher() 273 matcher.add_inclexcl(inclexcl_patterns) 274 matcher.add_includepaths(include_paths) 275 return matcher 276 277 def do_serve(self, args): 278 """Start in server mode. This command is usually not used manually.""" 279 RepositoryServer( 280 restrict_to_paths=args.restrict_to_paths, 281 restrict_to_repositories=args.restrict_to_repositories, 282 append_only=args.append_only, 283 storage_quota=args.storage_quota, 284 ).serve() 285 return EXIT_SUCCESS 286 287 @with_repository(create=True, exclusive=True, manifest=False) 288 def do_init(self, args, repository): 289 """Initialize an empty repository""" 290 path = args.location.canonical_path() 291 logger.info('Initializing repository at "%s"' % path) 292 try: 293 key = key_creator(repository, args) 294 except (EOFError, KeyboardInterrupt): 295 repository.destroy() 296 return EXIT_WARNING 297 manifest = Manifest(key, repository) 298 manifest.key = key 299 manifest.write() 300 repository.commit() 301 with Cache(repository, key, manifest, warn_if_unencrypted=False): 302 pass 303 if key.tam_required: 304 tam_file = tam_required_file(repository) 305 open(tam_file, 'w').close() 306 logger.warning( 307 '\n' 308 'By default repositories initialized with this version will produce security\n' 309 'errors if written to with an older version (up to and including Borg 1.0.8).\n' 310 '\n' 311 'If you want to use these older versions, you can disable the check by running:\n' 312 'borg upgrade --disable-tam %s\n' 313 '\n' 314 'See https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability ' 315 'for details about the security implications.', shlex.quote(path)) 316 317 if key.NAME != 'plaintext': 318 logger.warning( 319 '\n' 320 'IMPORTANT: you will need both KEY AND PASSPHRASE to access this repo!\n' 321 'If you used a repokey mode, the key is stored in the repo, but you should back it up separately.\n' 322 'Use "borg key export" to export the key, optionally in printable format.\n' 323 'Write down the passphrase. Store both at safe place(s).\n') 324 return self.exit_code 325 326 @with_repository(exclusive=True, manifest=False) 327 def do_check(self, args, repository): 328 """Check repository consistency""" 329 if args.repair: 330 msg = ("This is a potentially dangerous function.\n" 331 "check --repair might lead to data loss (for kinds of corruption it is not\n" 332 "capable of dealing with). BE VERY CAREFUL!\n" 333 "\n" 334 "Type 'YES' if you understand this and want to continue: ") 335 if not yes(msg, false_msg="Aborting.", invalid_msg="Invalid answer, aborting.", 336 truish=('YES', ), retry=False, 337 env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): 338 return EXIT_ERROR 339 if args.repo_only and any( 340 (args.verify_data, args.first, args.last, args.prefix is not None, args.glob_archives)): 341 self.print_error("--repository-only contradicts --first, --last, --prefix and --verify-data arguments.") 342 return EXIT_ERROR 343 if not args.archives_only: 344 if not repository.check(repair=args.repair, save_space=args.save_space): 345 return EXIT_WARNING 346 if args.prefix is not None: 347 args.glob_archives = args.prefix + '*' 348 if not args.repo_only and not ArchiveChecker().check( 349 repository, repair=args.repair, archive=args.location.archive, 350 first=args.first, last=args.last, sort_by=args.sort_by or 'ts', glob=args.glob_archives, 351 verify_data=args.verify_data, save_space=args.save_space): 352 return EXIT_WARNING 353 return EXIT_SUCCESS 354 355 @with_repository(compatibility=(Manifest.Operation.CHECK,)) 356 def do_change_passphrase(self, args, repository, manifest, key): 357 """Change repository key file passphrase""" 358 if not hasattr(key, 'change_passphrase'): 359 print('This repository is not encrypted, cannot change the passphrase.') 360 return EXIT_ERROR 361 key.change_passphrase() 362 logger.info('Key updated') 363 if hasattr(key, 'find_key'): 364 # print key location to make backing it up easier 365 logger.info('Key location: %s', key.find_key()) 366 return EXIT_SUCCESS 367 368 def do_change_passphrase_deprecated(self, args): 369 logger.warning('"borg change-passphrase" is deprecated and will be removed in Borg 1.2.\n' 370 'Use "borg key change-passphrase" instead.') 371 return self.do_change_passphrase(args) 372 373 @with_repository(lock=False, exclusive=False, manifest=False, cache=False) 374 def do_key_export(self, args, repository): 375 """Export the repository key for backup""" 376 manager = KeyManager(repository) 377 manager.load_keyblob() 378 if args.paper: 379 manager.export_paperkey(args.path) 380 else: 381 if not args.path: 382 self.print_error("output file to export key to expected") 383 return EXIT_ERROR 384 try: 385 if args.qr: 386 manager.export_qr(args.path) 387 else: 388 manager.export(args.path) 389 except IsADirectoryError: 390 self.print_error("'{}' must be a file, not a directory".format(args.path)) 391 return EXIT_ERROR 392 return EXIT_SUCCESS 393 394 @with_repository(lock=False, exclusive=False, manifest=False, cache=False) 395 def do_key_import(self, args, repository): 396 """Import the repository key from backup""" 397 manager = KeyManager(repository) 398 if args.paper: 399 if args.path: 400 self.print_error("with --paper import from file is not supported") 401 return EXIT_ERROR 402 manager.import_paperkey(args) 403 else: 404 if not args.path: 405 self.print_error("input file to import key from expected") 406 return EXIT_ERROR 407 if args.path != '-' and not os.path.exists(args.path): 408 self.print_error("input file does not exist: " + args.path) 409 return EXIT_ERROR 410 manager.import_keyfile(args) 411 return EXIT_SUCCESS 412 413 @with_repository(manifest=False) 414 def do_migrate_to_repokey(self, args, repository): 415 """Migrate passphrase -> repokey""" 416 manifest_data = repository.get(Manifest.MANIFEST_ID) 417 key_old = PassphraseKey.detect(repository, manifest_data) 418 key_new = RepoKey(repository) 419 key_new.target = repository 420 key_new.repository_id = repository.id 421 key_new.enc_key = key_old.enc_key 422 key_new.enc_hmac_key = key_old.enc_hmac_key 423 key_new.id_key = key_old.id_key 424 key_new.chunk_seed = key_old.chunk_seed 425 key_new.change_passphrase() # option to change key protection passphrase, save 426 logger.info('Key updated') 427 return EXIT_SUCCESS 428 429 def do_benchmark_crud(self, args): 430 """Benchmark Create, Read, Update, Delete for archives.""" 431 def measurement_run(repo, path): 432 archive = repo + '::borg-benchmark-crud' 433 compression = '--compression=none' 434 # measure create perf (without files cache to always have it chunking) 435 t_start = time.monotonic() 436 rc = self.do_create(self.parse_args(['create', compression, '--files-cache=disabled', archive + '1', path])) 437 t_end = time.monotonic() 438 dt_create = t_end - t_start 439 assert rc == 0 440 # now build files cache 441 rc1 = self.do_create(self.parse_args(['create', compression, archive + '2', path])) 442 rc2 = self.do_delete(self.parse_args(['delete', archive + '2'])) 443 assert rc1 == rc2 == 0 444 # measure a no-change update (archive1 is still present) 445 t_start = time.monotonic() 446 rc1 = self.do_create(self.parse_args(['create', compression, archive + '3', path])) 447 t_end = time.monotonic() 448 dt_update = t_end - t_start 449 rc2 = self.do_delete(self.parse_args(['delete', archive + '3'])) 450 assert rc1 == rc2 == 0 451 # measure extraction (dry-run: without writing result to disk) 452 t_start = time.monotonic() 453 rc = self.do_extract(self.parse_args(['extract', '--dry-run', archive + '1'])) 454 t_end = time.monotonic() 455 dt_extract = t_end - t_start 456 assert rc == 0 457 # measure archive deletion (of LAST present archive with the data) 458 t_start = time.monotonic() 459 rc = self.do_delete(self.parse_args(['delete', archive + '1'])) 460 t_end = time.monotonic() 461 dt_delete = t_end - t_start 462 assert rc == 0 463 return dt_create, dt_update, dt_extract, dt_delete 464 465 @contextmanager 466 def test_files(path, count, size, random): 467 try: 468 path = os.path.join(path, 'borg-test-data') 469 os.makedirs(path) 470 for i in range(count): 471 fname = os.path.join(path, 'file_%d' % i) 472 data = b'\0' * size if not random else os.urandom(size) 473 with SyncFile(fname, binary=True) as fd: # used for posix_fadvise's sake 474 fd.write(data) 475 yield path 476 finally: 477 shutil.rmtree(path) 478 479 if '_BORG_BENCHMARK_CRUD_TEST' in os.environ: 480 tests = [ 481 ('Z-TEST', 1, 1, False), 482 ('R-TEST', 1, 1, True), 483 ] 484 else: 485 tests = [ 486 ('Z-BIG', 10, 100000000, False), 487 ('R-BIG', 10, 100000000, True), 488 ('Z-MEDIUM', 1000, 1000000, False), 489 ('R-MEDIUM', 1000, 1000000, True), 490 ('Z-SMALL', 10000, 10000, False), 491 ('R-SMALL', 10000, 10000, True), 492 ] 493 494 for msg, count, size, random in tests: 495 with test_files(args.path, count, size, random) as path: 496 dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path) 497 total_size_MB = count * size / 1e06 498 file_size_formatted = format_file_size(size) 499 content = 'random' if random else 'all-zero' 500 fmt = '%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)' 501 print(fmt % ('C', msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create)) 502 print(fmt % ('R', msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract)) 503 print(fmt % ('U', msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update)) 504 print(fmt % ('D', msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete)) 505 506 return 0 507 508 @with_repository(fake='dry_run', exclusive=True, compatibility=(Manifest.Operation.WRITE,)) 509 def do_create(self, args, repository, manifest=None, key=None): 510 """Create new archive""" 511 matcher = PatternMatcher(fallback=True) 512 matcher.add_inclexcl(args.patterns) 513 514 def create_inner(archive, cache): 515 # Add cache dir to inode_skip list 516 skip_inodes = set() 517 try: 518 st = os.stat(get_cache_dir()) 519 skip_inodes.add((st.st_ino, st.st_dev)) 520 except OSError: 521 pass 522 # Add local repository dir to inode_skip list 523 if not args.location.host: 524 try: 525 st = os.stat(args.location.path) 526 skip_inodes.add((st.st_ino, st.st_dev)) 527 except OSError: 528 pass 529 logger.debug('Processing files ...') 530 for path in args.paths: 531 if path == '-': # stdin 532 path = args.stdin_name 533 mode = args.stdin_mode 534 user = args.stdin_user 535 group = args.stdin_group 536 if not dry_run: 537 try: 538 status = archive.process_stdin(path, cache, mode, user, group) 539 except BackupOSError as e: 540 status = 'E' 541 self.print_warning('%s: %s', path, e) 542 else: 543 status = '-' 544 self.print_file_status(status, path) 545 continue 546 path = os.path.normpath(path) 547 try: 548 st = os.stat(path, follow_symlinks=False) 549 except OSError as e: 550 self.print_warning('%s: %s', path, e) 551 continue 552 if args.one_file_system: 553 restrict_dev = st.st_dev 554 else: 555 restrict_dev = None 556 self._process(archive, cache, matcher, args.exclude_caches, args.exclude_if_present, 557 args.keep_exclude_tags, skip_inodes, path, restrict_dev, 558 read_special=args.read_special, dry_run=dry_run, st=st) 559 # if we get back here, we've finished recursing into <path>, 560 # we do not ever want to get back in there (even if path is given twice as recursion root) 561 skip_inodes.add((st.st_ino, st.st_dev)) 562 if not dry_run: 563 archive.save(comment=args.comment, timestamp=args.timestamp) 564 if args.progress: 565 archive.stats.show_progress(final=True) 566 args.stats |= args.json 567 if args.stats: 568 if args.json: 569 json_print(basic_json_data(manifest, cache=cache, extra={ 570 'archive': archive, 571 })) 572 else: 573 log_multi(DASHES, 574 str(archive), 575 DASHES, 576 STATS_HEADER, 577 str(archive.stats), 578 str(cache), 579 DASHES, logger=logging.getLogger('borg.output.stats')) 580 581 self.output_filter = args.output_filter 582 self.output_list = args.output_list 583 self.exclude_nodump = args.exclude_nodump 584 dry_run = args.dry_run 585 t0 = datetime.utcnow() 586 t0_monotonic = time.monotonic() 587 logger.info('Creating archive at "%s"' % args.location.orig) 588 if not dry_run: 589 with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress, 590 lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync, 591 cache_mode=args.files_cache_mode, ignore_inode=args.ignore_inode) as cache: 592 archive = Archive(repository, key, manifest, args.location.archive, cache=cache, 593 create=True, checkpoint_interval=args.checkpoint_interval, 594 numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime, nobirthtime=args.nobirthtime, 595 nobsdflags=args.nobsdflags, noacls=args.noacls, noxattrs=args.noxattrs, progress=args.progress, 596 chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic, 597 log_json=args.log_json) 598 create_inner(archive, cache) 599 else: 600 create_inner(None, None) 601 return self.exit_code 602 603 def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, 604 keep_exclude_tags, skip_inodes, path, restrict_dev, 605 read_special=False, dry_run=False, st=None): 606 """ 607 Process *path* recursively according to the various parameters. 608 609 *st* (if given) is a *os.stat_result* object for *path*. 610 611 This should only raise on critical errors. Per-item errors must be handled within this method. 612 """ 613 try: 614 recurse_excluded_dir = False 615 if matcher.match(path): 616 if st is None: 617 with backup_io('stat'): 618 st = os.stat(path, follow_symlinks=False) 619 else: 620 self.print_file_status('x', path) 621 # get out here as quickly as possible: 622 # we only need to continue if we shall recurse into an excluded directory. 623 # if we shall not recurse, then do not even touch (stat()) the item, it 624 # could trigger an error, e.g. if access is forbidden, see #3209. 625 if not matcher.recurse_dir: 626 return 627 if st is None: 628 with backup_io('stat'): 629 st = os.stat(path, follow_symlinks=False) 630 recurse_excluded_dir = stat.S_ISDIR(st.st_mode) 631 if not recurse_excluded_dir: 632 return 633 634 if (st.st_ino, st.st_dev) in skip_inodes: 635 return 636 # if restrict_dev is given, we do not want to recurse into a new filesystem, 637 # but we WILL save the mountpoint directory (or more precise: the root 638 # directory of the mounted filesystem that shadows the mountpoint dir). 639 recurse = restrict_dev is None or st.st_dev == restrict_dev 640 status = None 641 if self.exclude_nodump: 642 # Ignore if nodump flag is set 643 with backup_io('flags'): 644 if get_flags(path, st) & stat.UF_NODUMP: 645 self.print_file_status('x', path) 646 return 647 if stat.S_ISREG(st.st_mode): 648 if not dry_run: 649 status = archive.process_file(path, st, cache) 650 elif stat.S_ISDIR(st.st_mode): 651 if recurse: 652 tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) 653 if tag_paths: 654 # if we are already recursing in an excluded dir, we do not need to do anything else than 655 # returning (we do not need to archive or recurse into tagged directories), see #3991: 656 if not recurse_excluded_dir: 657 if keep_exclude_tags: 658 if not dry_run: 659 archive.process_dir(path, st) 660 for tag_path in tag_paths: 661 self._process(archive, cache, matcher, exclude_caches, exclude_if_present, 662 keep_exclude_tags, skip_inodes, tag_path, restrict_dev, 663 read_special=read_special, dry_run=dry_run) 664 self.print_file_status('x', path) 665 return 666 if not dry_run: 667 if not recurse_excluded_dir: 668 status = archive.process_dir(path, st) 669 if recurse: 670 with backup_io('scandir'): 671 entries = helpers.scandir_inorder(path) 672 for dirent in entries: 673 normpath = os.path.normpath(dirent.path) 674 self._process(archive, cache, matcher, exclude_caches, exclude_if_present, 675 keep_exclude_tags, skip_inodes, normpath, restrict_dev, 676 read_special=read_special, dry_run=dry_run) 677 elif stat.S_ISLNK(st.st_mode): 678 if not dry_run: 679 if not read_special: 680 status = archive.process_symlink(path, st) 681 else: 682 try: 683 st_target = os.stat(path) 684 except OSError: 685 special = False 686 else: 687 special = is_special(st_target.st_mode) 688 if special: 689 status = archive.process_file(path, st_target, cache) 690 else: 691 status = archive.process_symlink(path, st) 692 elif stat.S_ISFIFO(st.st_mode): 693 if not dry_run: 694 if not read_special: 695 status = archive.process_fifo(path, st) 696 else: 697 status = archive.process_file(path, st, cache) 698 elif stat.S_ISCHR(st.st_mode): 699 if not dry_run: 700 if not read_special: 701 status = archive.process_dev(path, st, 'c') 702 else: 703 status = archive.process_file(path, st, cache) 704 elif stat.S_ISBLK(st.st_mode): 705 if not dry_run: 706 if not read_special: 707 status = archive.process_dev(path, st, 'b') 708 else: 709 status = archive.process_file(path, st, cache) 710 elif stat.S_ISSOCK(st.st_mode): 711 # Ignore unix sockets 712 return 713 elif stat.S_ISDOOR(st.st_mode): 714 # Ignore Solaris doors 715 return 716 elif stat.S_ISPORT(st.st_mode): 717 # Ignore Solaris event ports 718 return 719 else: 720 self.print_warning('Unknown file type: %s', path) 721 return 722 except BackupOSError as e: 723 self.print_warning('%s: %s', path, e) 724 status = 'E' 725 # Status output 726 if status is None: 727 if not dry_run: 728 status = '?' # need to add a status code somewhere 729 else: 730 status = '-' # dry run, item was not backed up 731 732 if not recurse_excluded_dir: 733 self.print_file_status(status, path) 734 735 @staticmethod 736 def build_filter(matcher, peek_and_store_hardlink_masters, strip_components): 737 if strip_components: 738 def item_filter(item): 739 matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]) 740 peek_and_store_hardlink_masters(item, matched) 741 return matched 742 else: 743 def item_filter(item): 744 matched = matcher.match(item.path) 745 peek_and_store_hardlink_masters(item, matched) 746 return matched 747 return item_filter 748 749 @with_repository(compatibility=(Manifest.Operation.READ,)) 750 @with_archive 751 def do_extract(self, args, repository, manifest, key, archive): 752 """Extract archive contents""" 753 # be restrictive when restoring files, restore permissions later 754 if sys.getfilesystemencoding() == 'ascii': 755 logger.warning('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.') 756 if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): 757 logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') 758 759 matcher = self.build_matcher(args.patterns, args.paths) 760 761 progress = args.progress 762 output_list = args.output_list 763 dry_run = args.dry_run 764 stdout = args.stdout 765 sparse = args.sparse 766 strip_components = args.strip_components 767 dirs = [] 768 partial_extract = not matcher.empty() or strip_components 769 hardlink_masters = {} if partial_extract or not has_link else None 770 771 def peek_and_store_hardlink_masters(item, matched): 772 # not has_link: 773 # OS does not have hardlink capability thus we need to remember the chunks so that 774 # we can extract all hardlinks as separate normal (not-hardlinked) files instead. 775 # 776 # partial_extract and not matched and hardlinkable: 777 # we do not extract the very first hardlink, so we need to remember the chunks 778 # in hardlinks_master, so we can use them when we extract some 2nd+ hardlink item 779 # that has no chunks list. 780 if ((not has_link or (partial_extract and not matched and hardlinkable(item.mode))) and 781 (item.get('hardlink_master', True) and 'source' not in item)): 782 hardlink_masters[item.get('path')] = (item.get('chunks'), None) 783 784 filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) 785 if progress: 786 pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1, msgid='extract') 787 pi.output('Calculating total archive size for the progress indicator (might take long for large archives)') 788 extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter)) 789 pi.total = extracted_size 790 else: 791 pi = None 792 793 for item in archive.iter_items(filter, partial_extract=partial_extract, 794 preload=True, hardlink_masters=hardlink_masters): 795 orig_path = item.path 796 if strip_components: 797 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) 798 if not args.dry_run: 799 while dirs and not item.path.startswith(dirs[-1].path): 800 dir_item = dirs.pop(-1) 801 try: 802 archive.extract_item(dir_item, stdout=stdout) 803 except BackupOSError as e: 804 self.print_warning('%s: %s', remove_surrogates(dir_item.path), e) 805 if output_list: 806 logging.getLogger('borg.output.list').info(remove_surrogates(item.path)) 807 try: 808 if dry_run: 809 archive.extract_item(item, dry_run=True, pi=pi) 810 else: 811 if stat.S_ISDIR(item.mode): 812 dirs.append(item) 813 archive.extract_item(item, stdout=stdout, restore_attrs=False) 814 else: 815 archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters, 816 stripped_components=strip_components, original_path=orig_path, pi=pi) 817 except (BackupOSError, BackupError) as e: 818 self.print_warning('%s: %s', remove_surrogates(orig_path), e) 819 820 if pi: 821 pi.finish() 822 823 if not args.dry_run: 824 pi = ProgressIndicatorPercent(total=len(dirs), msg='Setting directory permissions %3.0f%%', 825 msgid='extract.permissions') 826 while dirs: 827 pi.show() 828 dir_item = dirs.pop(-1) 829 try: 830 archive.extract_item(dir_item, stdout=stdout) 831 except BackupOSError as e: 832 self.print_warning('%s: %s', remove_surrogates(dir_item.path), e) 833 for pattern in matcher.get_unmatched_include_patterns(): 834 self.print_warning("Include pattern '%s' never matched.", pattern) 835 if pi: 836 # clear progress output 837 pi.finish() 838 return self.exit_code 839 840 @with_repository(compatibility=(Manifest.Operation.READ,)) 841 @with_archive 842 def do_export_tar(self, args, repository, manifest, key, archive): 843 """Export archive contents as a tarball""" 844 self.output_list = args.output_list 845 846 # A quick note about the general design of tar_filter and tarfile; 847 # The tarfile module of Python can provide some compression mechanisms 848 # by itself, using the builtin gzip, bz2 and lzma modules (and "tarmodes" 849 # such as "w:xz"). 850 # 851 # Doing so would have three major drawbacks: 852 # For one the compressor runs on the same thread as the program using the 853 # tarfile, stealing valuable CPU time from Borg and thus reducing throughput. 854 # Then this limits the available options - what about lz4? Brotli? zstd? 855 # The third issue is that systems can ship more optimized versions than those 856 # built into Python, e.g. pigz or pxz, which can use more than one thread for 857 # compression. 858 # 859 # Therefore we externalize compression by using a filter program, which has 860 # none of these drawbacks. The only issue of using an external filter is 861 # that it has to be installed -- hardly a problem, considering that 862 # the decompressor must be installed as well to make use of the exported tarball! 863 864 filter = None 865 if args.tar_filter == 'auto': 866 # Note that filter remains None if tarfile is '-'. 867 if args.tarfile.endswith('.tar.gz'): 868 filter = 'gzip' 869 elif args.tarfile.endswith('.tar.bz2'): 870 filter = 'bzip2' 871 elif args.tarfile.endswith('.tar.xz'): 872 filter = 'xz' 873 logger.debug('Automatically determined tar filter: %s', filter) 874 else: 875 filter = args.tar_filter 876 877 tarstream = dash_open(args.tarfile, 'wb') 878 tarstream_close = args.tarfile != '-' 879 880 if filter: 881 # When we put a filter between us and the final destination, 882 # the selected output (tarstream until now) becomes the output of the filter (=filterout). 883 # The decision whether to close that or not remains the same. 884 filterout = tarstream 885 filterout_close = tarstream_close 886 env = prepare_subprocess_env(system=True) 887 # There is no deadlock potential here (the subprocess docs warn about this), because 888 # communication with the process is a one-way road, i.e. the process can never block 889 # for us to do something while we block on the process for something different. 890 filterproc = popen_with_error_handling(filter, stdin=subprocess.PIPE, stdout=filterout, 891 log_prefix='--tar-filter: ', env=env) 892 if not filterproc: 893 return EXIT_ERROR 894 # Always close the pipe, otherwise the filter process would not notice when we are done. 895 tarstream = filterproc.stdin 896 tarstream_close = True 897 898 # The | (pipe) symbol instructs tarfile to use a streaming mode of operation 899 # where it never seeks on the passed fileobj. 900 tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT) 901 902 self._export_tar(args, archive, tar) 903 904 # This does not close the fileobj (tarstream) we passed to it -- a side effect of the | mode. 905 tar.close() 906 907 if tarstream_close: 908 tarstream.close() 909 910 if filter: 911 logger.debug('Done creating tar, waiting for filter to die...') 912 rc = filterproc.wait() 913 if rc: 914 logger.error('--tar-filter exited with code %d, output file is likely unusable!', rc) 915 self.exit_code = EXIT_ERROR 916 else: 917 logger.debug('filter exited with code %d', rc) 918 919 if filterout_close: 920 filterout.close() 921 922 return self.exit_code 923 924 def _export_tar(self, args, archive, tar): 925 matcher = self.build_matcher(args.patterns, args.paths) 926 927 progress = args.progress 928 output_list = args.output_list 929 strip_components = args.strip_components 930 partial_extract = not matcher.empty() or strip_components 931 hardlink_masters = {} if partial_extract else None 932 933 def peek_and_store_hardlink_masters(item, matched): 934 if ((partial_extract and not matched and hardlinkable(item.mode)) and 935 (item.get('hardlink_master', True) and 'source' not in item)): 936 hardlink_masters[item.get('path')] = (item.get('chunks'), None) 937 938 filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) 939 940 if progress: 941 pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract') 942 pi.output('Calculating size') 943 extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter)) 944 pi.total = extracted_size 945 else: 946 pi = None 947 948 def item_content_stream(item): 949 """ 950 Return a file-like object that reads from the chunks of *item*. 951 """ 952 chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks], 953 is_preloaded=True) 954 if pi: 955 info = [remove_surrogates(item.path)] 956 return ChunkIteratorFileWrapper(chunk_iterator, 957 lambda read_bytes: pi.show(increase=len(read_bytes), info=info)) 958 else: 959 return ChunkIteratorFileWrapper(chunk_iterator) 960 961 def item_to_tarinfo(item, original_path): 962 """ 963 Transform a Borg *item* into a tarfile.TarInfo object. 964 965 Return a tuple (tarinfo, stream), where stream may be a file-like object that represents 966 the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item* 967 cannot be represented as a TarInfo object and should be skipped. 968 """ 969 970 # If we would use the PAX (POSIX) format (which we currently don't), 971 # we can support most things that aren't possible with classic tar 972 # formats, including GNU tar, such as: 973 # atime, ctime, possibly Linux capabilities (security.* xattrs) 974 # and various additions supported by GNU tar in POSIX mode. 975 976 stream = None 977 tarinfo = tarfile.TarInfo() 978 tarinfo.name = item.path 979 tarinfo.mtime = item.mtime / 1e9 980 tarinfo.mode = stat.S_IMODE(item.mode) 981 tarinfo.uid = item.uid 982 tarinfo.gid = item.gid 983 tarinfo.uname = item.user or '' 984 tarinfo.gname = item.group or '' 985 # The linkname in tar has the same dual use the 'source' attribute of Borg items, 986 # i.e. for symlinks it means the destination, while for hardlinks it refers to the 987 # file. 988 # Since hardlinks in tar have a different type code (LNKTYPE) the format might 989 # support hardlinking arbitrary objects (including symlinks and directories), but 990 # whether implementations actually support that is a whole different question... 991 tarinfo.linkname = "" 992 993 modebits = stat.S_IFMT(item.mode) 994 if modebits == stat.S_IFREG: 995 tarinfo.type = tarfile.REGTYPE 996 if 'source' in item: 997 source = os.sep.join(item.source.split(os.sep)[strip_components:]) 998 if hardlink_masters is None: 999 linkname = source 1000 else: 1001 chunks, linkname = hardlink_masters.get(item.source, (None, source)) 1002 if linkname: 1003 # Master was already added to the archive, add a hardlink reference to it. 1004 tarinfo.type = tarfile.LNKTYPE 1005 tarinfo.linkname = linkname 1006 elif chunks is not None: 1007 # The item which has the chunks was not put into the tar, therefore 1008 # we do that now and update hardlink_masters to reflect that. 1009 item.chunks = chunks 1010 tarinfo.size = item.get_size() 1011 stream = item_content_stream(item) 1012 hardlink_masters[item.get('source') or original_path] = (None, item.path) 1013 else: 1014 tarinfo.size = item.get_size() 1015 stream = item_content_stream(item) 1016 elif modebits == stat.S_IFDIR: 1017 tarinfo.type = tarfile.DIRTYPE 1018 elif modebits == stat.S_IFLNK: 1019 tarinfo.type = tarfile.SYMTYPE 1020 tarinfo.linkname = item.source 1021 elif modebits == stat.S_IFBLK: 1022 tarinfo.type = tarfile.BLKTYPE 1023 tarinfo.devmajor = os.major(item.rdev) 1024 tarinfo.devminor = os.minor(item.rdev) 1025 elif modebits == stat.S_IFCHR: 1026 tarinfo.type = tarfile.CHRTYPE 1027 tarinfo.devmajor = os.major(item.rdev) 1028 tarinfo.devminor = os.minor(item.rdev) 1029 elif modebits == stat.S_IFIFO: 1030 tarinfo.type = tarfile.FIFOTYPE 1031 else: 1032 self.print_warning('%s: unsupported file type %o for tar export', remove_surrogates(item.path), modebits) 1033 set_ec(EXIT_WARNING) 1034 return None, stream 1035 return tarinfo, stream 1036 1037 for item in archive.iter_items(filter, partial_extract=partial_extract, 1038 preload=True, hardlink_masters=hardlink_masters): 1039 orig_path = item.path 1040 if strip_components: 1041 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) 1042 tarinfo, stream = item_to_tarinfo(item, orig_path) 1043 if tarinfo: 1044 if output_list: 1045 logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) 1046 tar.addfile(tarinfo, stream) 1047 1048 if pi: 1049 pi.finish() 1050 1051 for pattern in matcher.get_unmatched_include_patterns(): 1052 self.print_warning("Include pattern '%s' never matched.", pattern) 1053 return self.exit_code 1054 1055 @with_repository(compatibility=(Manifest.Operation.READ,)) 1056 @with_archive 1057 def do_diff(self, args, repository, manifest, key, archive): 1058 """Diff contents of two archives""" 1059 def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2): 1060 chunks1 = archive1.pipeline.fetch_many(chunk_ids1) 1061 chunks2 = archive2.pipeline.fetch_many(chunk_ids2) 1062 return self.compare_chunk_contents(chunks1, chunks2) 1063 1064 def sum_chunk_size(item, consider_ids=None): 1065 if item.get('deleted'): 1066 size = None 1067 else: 1068 if consider_ids is not None: # consider only specific chunks 1069 size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids) 1070 else: # consider all chunks 1071 size = item.get_size() 1072 return size 1073 1074 def get_owner(item): 1075 if args.numeric_owner: 1076 return item.uid, item.gid 1077 else: 1078 return item.user, item.group 1079 1080 def get_mode(item): 1081 if 'mode' in item: 1082 return stat.filemode(item.mode) 1083 else: 1084 return [None] 1085 1086 def has_hardlink_master(item, hardlink_masters): 1087 return hardlinkable(item.mode) and item.get('source') in hardlink_masters 1088 1089 def compare_link(item1, item2): 1090 # These are the simple link cases. For special cases, e.g. if a 1091 # regular file is replaced with a link or vice versa, it is 1092 # indicated in compare_mode instead. 1093 if item1.get('deleted'): 1094 return ({"type": 'added link'}, 'added link') 1095 elif item2.get('deleted'): 1096 return ({"type": 'removed link'}, 'removed link') 1097 elif 'source' in item1 and 'source' in item2 and item1.source != item2.source: 1098 return ({"type": 'changed link'}, 'changed link') 1099 1100 def contents_changed(item1, item2): 1101 if item1.get('deleted') != item2.get('deleted'): 1102 # a deleleted/non-existing file is considered different to an existing file, 1103 # even if the latter is empty. 1104 return True 1105 if can_compare_chunk_ids: 1106 return item1.chunks != item2.chunks 1107 else: 1108 if sum_chunk_size(item1) != sum_chunk_size(item2): 1109 return True 1110 else: 1111 chunk_ids1 = [c.id for c in item1.chunks] 1112 chunk_ids2 = [c.id for c in item2.chunks] 1113 return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2) 1114 1115 def compare_content(path, item1, item2): 1116 if contents_changed(item1, item2): 1117 if item1.get('deleted'): 1118 sz = sum_chunk_size(item2) 1119 return ({"type": "added", "size": sz}, 'added {:>13}'.format(format_file_size(sz))) 1120 if item2.get('deleted'): 1121 sz = sum_chunk_size(item1) 1122 return ({"type": "removed", "size": sz}, 'removed {:>11}'.format(format_file_size(sz))) 1123 if not can_compare_chunk_ids: 1124 return ({"type": "modified"}, "modified") 1125 chunk_ids1 = {c.id for c in item1.chunks} 1126 chunk_ids2 = {c.id for c in item2.chunks} 1127 added_ids = chunk_ids2 - chunk_ids1 1128 removed_ids = chunk_ids1 - chunk_ids2 1129 added = sum_chunk_size(item2, added_ids) 1130 removed = sum_chunk_size(item1, removed_ids) 1131 return ({"type": "modified", "added": added, "removed": removed}, 1132 '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True), 1133 format_file_size(-removed, precision=1, sign=True))) 1134 1135 def compare_directory(item1, item2): 1136 if item2.get('deleted') and not item1.get('deleted'): 1137 return ({"type": 'removed directory'}, 'removed directory') 1138 elif item1.get('deleted') and not item2.get('deleted'): 1139 return ({"type": 'added directory'}, 'added directory') 1140 1141 def compare_owner(item1, item2): 1142 user1, group1 = get_owner(item1) 1143 user2, group2 = get_owner(item2) 1144 if user1 != user2 or group1 != group2: 1145 return ({"type": "owner", "old_user": user1, "old_group": group1, "new_user": user2, "new_group": group2}, 1146 '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)) 1147 1148 def compare_mode(item1, item2): 1149 if item1.mode != item2.mode: 1150 mode1 = get_mode(item1) 1151 mode2 = get_mode(item2) 1152 return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2)) 1153 1154 def compare_items(output, path, item1, item2, hardlink_masters, deleted=False): 1155 """ 1156 Compare two items with identical paths. 1157 :param deleted: Whether one of the items has been deleted 1158 """ 1159 changes = [] 1160 1161 if has_hardlink_master(item1, hardlink_masters): 1162 item1 = hardlink_masters[item1.source][0] 1163 1164 if has_hardlink_master(item2, hardlink_masters): 1165 item2 = hardlink_masters[item2.source][1] 1166 1167 if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l': 1168 changes.append(compare_link(item1, item2)) 1169 1170 if 'chunks' in item1 and 'chunks' in item2: 1171 changes.append(compare_content(path, item1, item2)) 1172 1173 if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd': 1174 changes.append(compare_directory(item1, item2)) 1175 1176 if not deleted: 1177 changes.append(compare_owner(item1, item2)) 1178 changes.append(compare_mode(item1, item2)) 1179 1180 # changes is a list of paths, changesets: [(path1, [{changeset1}, ..]), (path2, [{changeset1}, ..]), ..] 1181 changes = [x for x in changes if x] 1182 if changes: 1183 output_line = (remove_surrogates(path), changes) 1184 1185 # if sorting, save changes for later, otherwise go ahead and output the results as they are generated. 1186 if args.sort: 1187 output.append(output_line) 1188 elif args.json_lines: 1189 print_json_output(output_line) 1190 else: 1191 print_text_output(output_line) 1192 1193 def print_text_output(line): 1194 path, diff = line 1195 print("{:<19} {}".format(' '.join([txt for j, txt in diff]), path)) 1196 1197 def print_json_output(line): 1198 path, diff = line 1199 print(json.dumps({"path": path, "changes": [j for j, txt in diff]})) 1200 1201 def compare_archives(archive1, archive2, matcher): 1202 def hardlink_master_seen(item): 1203 return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters 1204 1205 def is_hardlink_master(item): 1206 return item.get('hardlink_master', True) and 'source' not in item 1207 1208 def update_hardlink_masters(item1, item2): 1209 if is_hardlink_master(item1) or is_hardlink_master(item2): 1210 hardlink_masters[item1.path] = (item1, item2) 1211 1212 def compare_or_defer(item1, item2): 1213 update_hardlink_masters(item1, item2) 1214 if not hardlink_master_seen(item1) or not hardlink_master_seen(item2): 1215 deferred.append((item1, item2)) 1216 else: 1217 compare_items(output, item1.path, item1, item2, hardlink_masters) 1218 1219 orphans_archive1 = collections.OrderedDict() 1220 orphans_archive2 = collections.OrderedDict() 1221 deferred = [] 1222 hardlink_masters = {} 1223 output = [] 1224 1225 for item1, item2 in zip_longest( 1226 archive1.iter_items(lambda item: matcher.match(item.path)), 1227 archive2.iter_items(lambda item: matcher.match(item.path)), 1228 ): 1229 if item1 and item2 and item1.path == item2.path: 1230 compare_or_defer(item1, item2) 1231 continue 1232 if item1: 1233 matching_orphan = orphans_archive2.pop(item1.path, None) 1234 if matching_orphan: 1235 compare_or_defer(item1, matching_orphan) 1236 else: 1237 orphans_archive1[item1.path] = item1 1238 if item2: 1239 matching_orphan = orphans_archive1.pop(item2.path, None) 1240 if matching_orphan: 1241 compare_or_defer(matching_orphan, item2) 1242 else: 1243 orphans_archive2[item2.path] = item2 1244 # At this point orphans_* contain items that had no matching partner in the other archive 1245 deleted_item = Item( 1246 deleted=True, 1247 chunks=[], 1248 mode=0, 1249 ) 1250 for added in orphans_archive2.values(): 1251 path = added.path 1252 deleted_item.path = path 1253 update_hardlink_masters(deleted_item, added) 1254 compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True) 1255 for deleted in orphans_archive1.values(): 1256 path = deleted.path 1257 deleted_item.path = path 1258 update_hardlink_masters(deleted, deleted_item) 1259 compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True) 1260 for item1, item2 in deferred: 1261 assert hardlink_master_seen(item1) 1262 assert hardlink_master_seen(item2) 1263 compare_items(output, item1.path, item1, item2, hardlink_masters) 1264 1265 print_output = print_json_output if args.json_lines else print_text_output 1266 1267 # if we wanted sorted output (args.sort is true), then results are collected in 'output' and 1268 # need to be sort them before printing. Otherwise results were already printed and 'output' is empty. 1269 for line in sorted(output): 1270 print_output(line) 1271 1272 archive1 = archive 1273 archive2 = Archive(repository, key, manifest, args.archive2, 1274 consider_part_files=args.consider_part_files) 1275 1276 can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get( 1277 'chunker_params', True) or args.same_chunker_params 1278 if not can_compare_chunk_ids: 1279 self.print_warning('--chunker-params might be different between archives, diff will be slow.\n' 1280 'If you know for certain that they are the same, pass --same-chunker-params ' 1281 'to override this check.') 1282 1283 matcher = self.build_matcher(args.patterns, args.paths) 1284 1285 compare_archives(archive1, archive2, matcher) 1286 1287 for pattern in matcher.get_unmatched_include_patterns(): 1288 self.print_warning("Include pattern '%s' never matched.", pattern) 1289 1290 return self.exit_code 1291 1292 @with_repository(exclusive=True, cache=True, compatibility=(Manifest.Operation.CHECK,)) 1293 @with_archive 1294 def do_rename(self, args, repository, manifest, key, cache, archive): 1295 """Rename an existing archive""" 1296 archive.rename(args.name) 1297 manifest.write() 1298 repository.commit() 1299 cache.commit() 1300 return self.exit_code 1301 1302 @with_repository(exclusive=True, manifest=False) 1303 def do_delete(self, args, repository): 1304 """Delete an existing repository or archives""" 1305 archive_filter_specified = any((args.first, args.last, args.prefix is not None, args.glob_archives)) 1306 explicit_archives_specified = args.location.archive or args.archives 1307 if archive_filter_specified and explicit_archives_specified: 1308 self.print_error('Mixing archive filters and explicitly named archives is not supported.') 1309 return self.exit_code 1310 if archive_filter_specified or explicit_archives_specified: 1311 return self._delete_archives(args, repository) 1312 else: 1313 return self._delete_repository(args, repository) 1314 1315 def _delete_archives(self, args, repository): 1316 """Delete archives""" 1317 dry_run = args.dry_run 1318 1319 manifest, key = Manifest.load(repository, (Manifest.Operation.DELETE,)) 1320 1321 if args.location.archive or args.archives: 1322 archives = list(args.archives) 1323 if args.location.archive: 1324 archives.insert(0, args.location.archive) 1325 archive_names = tuple(archives) 1326 else: 1327 archive_names = tuple(x.name for x in manifest.archives.list_considering(args)) 1328 if not archive_names: 1329 return self.exit_code 1330 1331 if args.forced == 2: 1332 deleted = False 1333 for i, archive_name in enumerate(archive_names, 1): 1334 try: 1335 current_archive = manifest.archives.pop(archive_name) 1336 except KeyError: 1337 self.exit_code = EXIT_WARNING 1338 logger.warning('Archive {} not found ({}/{}).'.format(archive_name, i, len(archive_names))) 1339 else: 1340 deleted = True 1341 msg = 'Would delete: {} ({}/{})' if dry_run else 'Deleted archive: {} ({}/{})' 1342 logger.info(msg.format(format_archive(current_archive), i, len(archive_names))) 1343 if dry_run: 1344 logger.info('Finished dry-run.') 1345 elif deleted: 1346 manifest.write() 1347 # note: might crash in compact() after committing the repo 1348 repository.commit() 1349 logger.warning('Done. Run "borg check --repair" to clean up the mess.') 1350 else: 1351 logger.warning('Aborted.') 1352 return self.exit_code 1353 1354 stats = Statistics() 1355 with Cache(repository, key, manifest, progress=args.progress, lock_wait=self.lock_wait) as cache: 1356 msg_delete = 'Would delete archive: {} ({}/{})' if dry_run else 'Deleting archive: {} ({}/{})' 1357 msg_not_found = 'Archive {} not found ({}/{}).' 1358 for i, archive_name in enumerate(archive_names, 1): 1359 try: 1360 archive_info = manifest.archives[archive_name] 1361 except KeyError: 1362 logger.warning(msg_not_found.format(archive_name, i, len(archive_names))) 1363 else: 1364 logger.info(msg_delete.format(format_archive(archive_info), i, len(archive_names))) 1365 if not dry_run: 1366 archive = Archive(repository, key, manifest, archive_name, cache=cache, 1367 consider_part_files=args.consider_part_files) 1368 archive.delete(stats, progress=args.progress, forced=args.forced) 1369 if not dry_run: 1370 manifest.write() 1371 repository.commit(save_space=args.save_space) 1372 cache.commit() 1373 if args.stats: 1374 log_multi(DASHES, 1375 STATS_HEADER, 1376 stats.summary.format(label='Deleted data:', stats=stats), 1377 str(cache), 1378 DASHES, logger=logging.getLogger('borg.output.stats')) 1379 1380 return self.exit_code 1381 1382 def _delete_repository(self, args, repository): 1383 """Delete a repository""" 1384 dry_run = args.dry_run 1385 1386 if not args.cache_only: 1387 msg = [] 1388 try: 1389 manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) 1390 except NoManifestError: 1391 msg.append("You requested to completely DELETE the repository *including* all archives it may " 1392 "contain.") 1393 msg.append("This repository seems to have no manifest, so we can't tell anything about its " 1394 "contents.") 1395 else: 1396 msg.append("You requested to completely DELETE the repository *including* all archives it " 1397 "contains:") 1398 for archive_info in manifest.archives.list(sort_by=['ts']): 1399 msg.append(format_archive(archive_info)) 1400 msg.append("Type 'YES' if you understand this and want to continue: ") 1401 msg = '\n'.join(msg) 1402 if not yes(msg, false_msg="Aborting.", invalid_msg='Invalid answer, aborting.', truish=('YES',), 1403 retry=False, env_var_override='BORG_DELETE_I_KNOW_WHAT_I_AM_DOING'): 1404 self.exit_code = EXIT_ERROR 1405 return self.exit_code 1406 if not dry_run: 1407 repository.destroy() 1408 logger.info("Repository deleted.") 1409 SecurityManager.destroy(repository) 1410 else: 1411 logger.info("Would delete repository.") 1412 if not dry_run: 1413 Cache.destroy(repository) 1414 logger.info("Cache deleted.") 1415 else: 1416 logger.info("Would delete cache.") 1417 return self.exit_code 1418 1419 def do_mount(self, args): 1420 """Mount archive or an entire repository as a FUSE filesystem""" 1421 # Perform these checks before opening the repository and asking for a passphrase. 1422 1423 try: 1424 import borg.fuse 1425 except ImportError as e: 1426 self.print_error('borg mount not available: loading FUSE support failed [ImportError: %s]' % str(e)) 1427 return self.exit_code 1428 1429 if not os.path.isdir(args.mountpoint) or not os.access(args.mountpoint, os.R_OK | os.W_OK | os.X_OK): 1430 self.print_error('%s: Mountpoint must be a writable directory' % args.mountpoint) 1431 return self.exit_code 1432 1433 return self._do_mount(args) 1434 1435 @with_repository(compatibility=(Manifest.Operation.READ,)) 1436 def _do_mount(self, args, repository, manifest, key): 1437 from .fuse import FuseOperations 1438 1439 with cache_if_remote(repository, decrypted_cache=key) as cached_repo: 1440 operations = FuseOperations(key, repository, manifest, args, cached_repo) 1441 logger.info("Mounting filesystem") 1442 try: 1443 operations.mount(args.mountpoint, args.options, args.foreground) 1444 except RuntimeError: 1445 # Relevant error message already printed to stderr by FUSE 1446 self.exit_code = EXIT_ERROR 1447 return self.exit_code 1448 1449 def do_umount(self, args): 1450 """un-mount the FUSE filesystem""" 1451 return umount(args.mountpoint) 1452 1453 @with_repository(compatibility=(Manifest.Operation.READ,)) 1454 def do_list(self, args, repository, manifest, key): 1455 """List archive or repository contents""" 1456 if args.location.archive: 1457 if args.json: 1458 self.print_error('The --json option is only valid for listing archives, not archive contents.') 1459 return self.exit_code 1460 return self._list_archive(args, repository, manifest, key) 1461 else: 1462 if args.json_lines: 1463 self.print_error('The --json-lines option is only valid for listing archive contents, not archives.') 1464 return self.exit_code 1465 return self._list_repository(args, repository, manifest, key) 1466 1467 def _list_archive(self, args, repository, manifest, key): 1468 matcher = self.build_matcher(args.patterns, args.paths) 1469 if args.format is not None: 1470 format = args.format 1471 elif args.short: 1472 format = "{path}{NL}" 1473 else: 1474 format = "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}" 1475 1476 def _list_inner(cache): 1477 archive = Archive(repository, key, manifest, args.location.archive, cache=cache, 1478 consider_part_files=args.consider_part_files) 1479 1480 formatter = ItemFormatter(archive, format, json_lines=args.json_lines) 1481 for item in archive.iter_items(lambda item: matcher.match(item.path)): 1482 sys.stdout.write(formatter.format_item(item)) 1483 1484 # Only load the cache if it will be used 1485 if ItemFormatter.format_needs_cache(format): 1486 with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: 1487 _list_inner(cache) 1488 else: 1489 _list_inner(cache=None) 1490 1491 return self.exit_code 1492 1493 def _list_repository(self, args, repository, manifest, key): 1494 if args.format is not None: 1495 format = args.format 1496 elif args.short: 1497 format = "{archive}{NL}" 1498 else: 1499 format = "{archive:<36} {time} [{id}]{NL}" 1500 formatter = ArchiveFormatter(format, repository, manifest, key, json=args.json) 1501 1502 output_data = [] 1503 1504 for archive_info in manifest.archives.list_considering(args): 1505 if args.json: 1506 output_data.append(formatter.get_item_data(archive_info)) 1507 else: 1508 sys.stdout.write(formatter.format_item(archive_info)) 1509 1510 if args.json: 1511 json_print(basic_json_data(manifest, extra={ 1512 'archives': output_data 1513 })) 1514 1515 return self.exit_code 1516 1517 @with_repository(cache=True, compatibility=(Manifest.Operation.READ,)) 1518 def do_info(self, args, repository, manifest, key, cache): 1519 """Show archive details such as disk space used""" 1520 if any((args.location.archive, args.first, args.last, args.prefix is not None, args.glob_archives)): 1521 return self._info_archives(args, repository, manifest, key, cache) 1522 else: 1523 return self._info_repository(args, repository, manifest, key, cache) 1524 1525 def _info_archives(self, args, repository, manifest, key, cache): 1526 def format_cmdline(cmdline): 1527 return remove_surrogates(' '.join(shlex.quote(x) for x in cmdline)) 1528 1529 if args.location.archive: 1530 archive_names = (args.location.archive,) 1531 else: 1532 archive_names = tuple(x.name for x in manifest.archives.list_considering(args)) 1533 if not archive_names: 1534 return self.exit_code 1535 1536 output_data = [] 1537 1538 for i, archive_name in enumerate(archive_names, 1): 1539 archive = Archive(repository, key, manifest, archive_name, cache=cache, 1540 consider_part_files=args.consider_part_files) 1541 info = archive.info() 1542 if args.json: 1543 output_data.append(info) 1544 else: 1545 info['duration'] = format_timedelta(timedelta(seconds=info['duration'])) 1546 info['command_line'] = format_cmdline(info['command_line']) 1547 print(textwrap.dedent(""" 1548 Archive name: {name} 1549 Archive fingerprint: {id} 1550 Comment: {comment} 1551 Hostname: {hostname} 1552 Username: {username} 1553 Time (start): {start} 1554 Time (end): {end} 1555 Duration: {duration} 1556 Number of files: {stats[nfiles]} 1557 Command line: {command_line} 1558 Utilization of maximum supported archive size: {limits[max_archive_size]:.0%} 1559 ------------------------------------------------------------------------------ 1560 Original size Compressed size Deduplicated size 1561 This archive: {stats[original_size]:>20s} {stats[compressed_size]:>20s} {stats[deduplicated_size]:>20s} 1562 {cache} 1563 """).strip().format(cache=cache, **info)) 1564 if self.exit_code: 1565 break 1566 if not args.json and len(archive_names) - i: 1567 print() 1568 1569 if args.json: 1570 json_print(basic_json_data(manifest, cache=cache, extra={ 1571 'archives': output_data, 1572 })) 1573 return self.exit_code 1574 1575 def _info_repository(self, args, repository, manifest, key, cache): 1576 info = basic_json_data(manifest, cache=cache, extra={ 1577 'security_dir': cache.security_manager.dir, 1578 }) 1579 1580 if args.json: 1581 json_print(info) 1582 else: 1583 encryption = 'Encrypted: ' 1584 if key.NAME == 'plaintext': 1585 encryption += 'No' 1586 else: 1587 encryption += 'Yes (%s)' % key.NAME 1588 if key.NAME.startswith('key file'): 1589 encryption += '\nKey file: %s' % key.find_key() 1590 info['encryption'] = encryption 1591 1592 print(textwrap.dedent(""" 1593 Repository ID: {id} 1594 Location: {location} 1595 {encryption} 1596 Cache: {cache.path} 1597 Security dir: {security_dir} 1598 """).strip().format( 1599 id=bin_to_hex(repository.id), 1600 location=repository._location.canonical_path(), 1601 **info)) 1602 print(DASHES) 1603 print(STATS_HEADER) 1604 print(str(cache)) 1605 return self.exit_code 1606 1607 @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,)) 1608 def do_prune(self, args, repository, manifest, key): 1609 """Prune repository archives according to specified rules""" 1610 if not any((args.secondly, args.minutely, args.hourly, args.daily, 1611 args.weekly, args.monthly, args.yearly, args.within)): 1612 self.print_error('At least one of the "keep-within", "keep-last", ' 1613 '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' 1614 '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.') 1615 return self.exit_code 1616 if args.prefix is not None: 1617 args.glob_archives = args.prefix + '*' 1618 checkpoint_re = r'\.checkpoint(\.\d+)?' 1619 archives_checkpoints = manifest.archives.list(glob=args.glob_archives, 1620 match_end=r'(%s)?\Z' % checkpoint_re, 1621 sort_by=['ts'], reverse=True) 1622 is_checkpoint = re.compile(r'(%s)\Z' % checkpoint_re).search 1623 checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] 1624 # keep the latest checkpoint, if there is no later non-checkpoint archive 1625 if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]: 1626 keep_checkpoints = checkpoints[:1] 1627 else: 1628 keep_checkpoints = [] 1629 checkpoints = set(checkpoints) 1630 # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup) 1631 # that is newer than a successfully completed backup - and killing the successful backup. 1632 archives = [arch for arch in archives_checkpoints if arch not in checkpoints] 1633 keep = [] 1634 if args.within: 1635 keep += prune_within(archives, args.within) 1636 if args.secondly: 1637 keep += prune_split(archives, '%Y-%m-%d %H:%M:%S', args.secondly, keep) 1638 if args.minutely: 1639 keep += prune_split(archives, '%Y-%m-%d %H:%M', args.minutely, keep) 1640 if args.hourly: 1641 keep += prune_split(archives, '%Y-%m-%d %H', args.hourly, keep) 1642 if args.daily: 1643 keep += prune_split(archives, '%Y-%m-%d', args.daily, keep) 1644 if args.weekly: 1645 keep += prune_split(archives, '%G-%V', args.weekly, keep) 1646 if args.monthly: 1647 keep += prune_split(archives, '%Y-%m', args.monthly, keep) 1648 if args.yearly: 1649 keep += prune_split(archives, '%Y', args.yearly, keep) 1650 to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) 1651 stats = Statistics() 1652 with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: 1653 list_logger = logging.getLogger('borg.output.list') 1654 if args.output_list: 1655 # set up counters for the progress display 1656 to_delete_len = len(to_delete) 1657 archives_deleted = 0 1658 pi = ProgressIndicatorPercent(total=len(to_delete), msg='Pruning archives %3.0f%%', msgid='prune') 1659 for archive in archives_checkpoints: 1660 if archive in to_delete: 1661 pi.show() 1662 if args.dry_run: 1663 if args.output_list: 1664 list_logger.info('Would prune: %s' % format_archive(archive)) 1665 else: 1666 if args.output_list: 1667 archives_deleted += 1 1668 list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive), 1669 archives_deleted, to_delete_len)) 1670 Archive(repository, key, manifest, archive.name, cache).delete(stats, forced=args.forced) 1671 else: 1672 if args.output_list: 1673 list_logger.info('Keeping archive: %s' % format_archive(archive)) 1674 pi.finish() 1675 if to_delete and not args.dry_run: 1676 manifest.write() 1677 repository.commit(save_space=args.save_space) 1678 cache.commit() 1679 if args.stats: 1680 log_multi(DASHES, 1681 STATS_HEADER, 1682 stats.summary.format(label='Deleted data:', stats=stats), 1683 str(cache), 1684 DASHES, logger=logging.getLogger('borg.output.stats')) 1685 return self.exit_code 1686 1687 @with_repository(fake=('tam', 'disable_tam'), invert_fake=True, manifest=False, exclusive=True) 1688 def do_upgrade(self, args, repository, manifest=None, key=None): 1689 """upgrade a repository from a previous version""" 1690 if args.tam: 1691 manifest, key = Manifest.load(repository, (Manifest.Operation.CHECK,), force_tam_not_required=args.force) 1692 1693 if not hasattr(key, 'change_passphrase'): 1694 print('This repository is not encrypted, cannot enable TAM.') 1695 return EXIT_ERROR 1696 1697 if not manifest.tam_verified or not manifest.config.get(b'tam_required', False): 1698 # The standard archive listing doesn't include the archive ID like in borg 1.1.x 1699 print('Manifest contents:') 1700 for archive_info in manifest.archives.list(sort_by=['ts']): 1701 print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id)) 1702 manifest.config[b'tam_required'] = True 1703 manifest.write() 1704 repository.commit() 1705 if not key.tam_required: 1706 key.tam_required = True 1707 key.change_passphrase(key._passphrase) 1708 print('Key updated') 1709 if hasattr(key, 'find_key'): 1710 print('Key location:', key.find_key()) 1711 if not tam_required(repository): 1712 tam_file = tam_required_file(repository) 1713 open(tam_file, 'w').close() 1714 print('Updated security database') 1715 elif args.disable_tam: 1716 manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK, force_tam_not_required=True) 1717 if tam_required(repository): 1718 os.unlink(tam_required_file(repository)) 1719 if key.tam_required: 1720 key.tam_required = False 1721 key.change_passphrase(key._passphrase) 1722 print('Key updated') 1723 if hasattr(key, 'find_key'): 1724 print('Key location:', key.find_key()) 1725 manifest.config[b'tam_required'] = False 1726 manifest.write() 1727 repository.commit() 1728 else: 1729 # mainly for upgrades from Attic repositories, 1730 # but also supports borg 0.xx -> 1.0 upgrade. 1731 1732 repo = AtticRepositoryUpgrader(args.location.path, create=False) 1733 try: 1734 repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) 1735 except NotImplementedError as e: 1736 print("warning: %s" % e) 1737 repo = BorgRepositoryUpgrader(args.location.path, create=False) 1738 try: 1739 repo.upgrade(args.dry_run, inplace=args.inplace, progress=args.progress) 1740 except NotImplementedError as e: 1741 print("warning: %s" % e) 1742 return self.exit_code 1743 1744 @with_repository(cache=True, exclusive=True, compatibility=(Manifest.Operation.CHECK,)) 1745 def do_recreate(self, args, repository, manifest, key, cache): 1746 """Re-create archives""" 1747 matcher = self.build_matcher(args.patterns, args.paths) 1748 self.output_list = args.output_list 1749 self.output_filter = args.output_filter 1750 recompress = args.recompress != 'never' 1751 always_recompress = args.recompress == 'always' 1752 1753 recreater = ArchiveRecreater(repository, manifest, key, cache, matcher, 1754 exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, 1755 keep_exclude_tags=args.keep_exclude_tags, chunker_params=args.chunker_params, 1756 compression=args.compression, recompress=recompress, always_recompress=always_recompress, 1757 progress=args.progress, stats=args.stats, 1758 file_status_printer=self.print_file_status, 1759 checkpoint_interval=args.checkpoint_interval, 1760 dry_run=args.dry_run, timestamp=args.timestamp) 1761 1762 if args.location.archive: 1763 name = args.location.archive 1764 if recreater.is_temporary_archive(name): 1765 self.print_error('Refusing to work on temporary archive of prior recreate: %s', name) 1766 return self.exit_code 1767 if not recreater.recreate(name, args.comment, args.target): 1768 self.print_error('Nothing to do. Archive was not processed.\n' 1769 'Specify at least one pattern, PATH, --comment, re-compression or re-chunking option.') 1770 else: 1771 if args.target is not None: 1772 self.print_error('--target: Need to specify single archive') 1773 return self.exit_code 1774 for archive in manifest.archives.list(sort_by=['ts']): 1775 name = archive.name 1776 if recreater.is_temporary_archive(name): 1777 continue 1778 print('Processing', name) 1779 if not recreater.recreate(name, args.comment): 1780 logger.info('Skipped archive %s: Nothing to do. Archive was not processed.', name) 1781 if not args.dry_run: 1782 manifest.write() 1783 repository.commit() 1784 cache.commit() 1785 return self.exit_code 1786 1787 @with_repository(manifest=False, exclusive=True) 1788 def do_with_lock(self, args, repository): 1789 """run a user specified command with the repository lock held""" 1790 # for a new server, this will immediately take an exclusive lock. 1791 # to support old servers, that do not have "exclusive" arg in open() 1792 # RPC API, we also do it the old way: 1793 # re-write manifest to start a repository transaction - this causes a 1794 # lock upgrade to exclusive for remote (and also for local) repositories. 1795 # by using manifest=False in the decorator, we avoid having to require 1796 # the encryption key (and can operate just with encrypted data). 1797 data = repository.get(Manifest.MANIFEST_ID) 1798 repository.put(Manifest.MANIFEST_ID, data) 1799 # usually, a 0 byte (open for writing) segment file would be visible in the filesystem here. 1800 # we write and close this file, to rather have a valid segment file on disk, before invoking the subprocess. 1801 # we can only do this for local repositories (with .io), though: 1802 if hasattr(repository, 'io'): 1803 repository.io.close_segment() 1804 env = prepare_subprocess_env(system=True) 1805 try: 1806 # we exit with the return code we get from the subprocess 1807 return subprocess.call([args.command] + args.args, env=env) 1808 finally: 1809 # we need to commit the "no change" operation we did to the manifest 1810 # because it created a new segment file in the repository. if we would 1811 # roll back, the same file would be later used otherwise (for other content). 1812 # that would be bad if somebody uses rsync with ignore-existing (or 1813 # any other mechanism relying on existing segment data not changing). 1814 # see issue #1867. 1815 repository.commit() 1816 1817 @with_repository(exclusive=True, manifest=False) 1818 def do_config(self, args, repository): 1819 """get, set, and delete values in a repository or cache config file""" 1820 1821 def repo_validate(section, name, value=None, check_value=True): 1822 if section not in ['repository', ]: 1823 raise ValueError('Invalid section') 1824 if name in ['segments_per_dir', ]: 1825 if check_value: 1826 try: 1827 int(value) 1828 except ValueError: 1829 raise ValueError('Invalid value') from None 1830 elif name in ['max_segment_size', 'additional_free_space', 'storage_quota', ]: 1831 if check_value: 1832 try: 1833 parse_file_size(value) 1834 except ValueError: 1835 raise ValueError('Invalid value') from None 1836 if name == 'storage_quota': 1837 if parse_file_size(value) < parse_file_size('10M'): 1838 raise ValueError('Invalid value: storage_quota < 10M') 1839 elif name == 'max_segment_size': 1840 if parse_file_size(value) >= MAX_SEGMENT_SIZE_LIMIT: 1841 raise ValueError('Invalid value: max_segment_size >= %d' % MAX_SEGMENT_SIZE_LIMIT) 1842 elif name in ['append_only', ]: 1843 if check_value and value not in ['0', '1']: 1844 raise ValueError('Invalid value') 1845 elif name in ['id', ]: 1846 if check_value: 1847 try: 1848 bin_id = unhexlify(value) 1849 except: 1850 raise ValueError('Invalid value, must be 64 hex digits') from None 1851 if len(bin_id) != 32: 1852 raise ValueError('Invalid value, must be 64 hex digits') 1853 else: 1854 raise ValueError('Invalid name') 1855 1856 def cache_validate(section, name, value=None, check_value=True): 1857 if section not in ['cache', ]: 1858 raise ValueError('Invalid section') 1859 if name in ['previous_location', ]: 1860 if check_value: 1861 Location(value) 1862 else: 1863 raise ValueError('Invalid name') 1864 1865 def list_config(config): 1866 default_values = { 1867 'version': '1', 1868 'segments_per_dir': str(DEFAULT_SEGMENTS_PER_DIR), 1869 'max_segment_size': str(MAX_SEGMENT_SIZE_LIMIT), 1870 'additional_free_space': '0', 1871 'storage_quota': repository.storage_quota, 1872 'append_only': repository.append_only 1873 } 1874 print('[repository]') 1875 for key in ['version', 'segments_per_dir', 'max_segment_size', 1876 'storage_quota', 'additional_free_space', 'append_only', 1877 'id']: 1878 value = config.get('repository', key, fallback=False) 1879 if value is None: 1880 value = default_values.get(key) 1881 if value is None: 1882 raise Error('The repository config is missing the %s key which has no default value' % key) 1883 print('%s = %s' % (key, value)) 1884 1885 if not args.list: 1886 if args.name is None: 1887 self.print_error('No config key name was provided.') 1888 return self.exit_code 1889 1890 try: 1891 section, name = args.name.split('.') 1892 except ValueError: 1893 section = args.cache and "cache" or "repository" 1894 name = args.name 1895 1896 if args.cache: 1897 manifest, key = Manifest.load(repository, (Manifest.Operation.WRITE,)) 1898 assert_secure(repository, manifest, self.lock_wait) 1899 cache = Cache(repository, key, manifest, lock_wait=self.lock_wait) 1900 1901 try: 1902 if args.cache: 1903 cache.cache_config.load() 1904 config = cache.cache_config._config 1905 save = cache.cache_config.save 1906 validate = cache_validate 1907 else: 1908 config = repository.config 1909 save = lambda: repository.save_config(repository.path, repository.config) 1910 validate = repo_validate 1911 1912 if args.delete: 1913 validate(section, name, check_value=False) 1914 config.remove_option(section, name) 1915 if len(config.options(section)) == 0: 1916 config.remove_section(section) 1917 save() 1918 elif args.list: 1919 list_config(config) 1920 elif args.value: 1921 validate(section, name, args.value) 1922 if section not in config.sections(): 1923 config.add_section(section) 1924 config.set(section, name, args.value) 1925 save() 1926 else: 1927 try: 1928 print(config.get(section, name)) 1929 except (configparser.NoOptionError, configparser.NoSectionError) as e: 1930 print(e, file=sys.stderr) 1931 return EXIT_WARNING 1932 return EXIT_SUCCESS 1933 finally: 1934 if args.cache: 1935 cache.close() 1936 1937 def do_debug_info(self, args): 1938 """display system information for debugging / bug reports""" 1939 print(sysinfo()) 1940 1941 # Additional debug information 1942 print('CRC implementation:', crc32.__name__) 1943 print('Process ID:', get_process_id()) 1944 return EXIT_SUCCESS 1945 1946 @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) 1947 def do_debug_dump_archive_items(self, args, repository, manifest, key): 1948 """dump (decrypted, decompressed) archive items metadata (not: data)""" 1949 archive = Archive(repository, key, manifest, args.location.archive, 1950 consider_part_files=args.consider_part_files) 1951 for i, item_id in enumerate(archive.metadata.items): 1952 data = key.decrypt(item_id, repository.get(item_id)) 1953 filename = '%06d_%s.items' % (i, bin_to_hex(item_id)) 1954 print('Dumping', filename) 1955 with open(filename, 'wb') as fd: 1956 fd.write(data) 1957 print('Done.') 1958 return EXIT_SUCCESS 1959 1960 @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) 1961 def do_debug_dump_archive(self, args, repository, manifest, key): 1962 """dump decoded archive metadata (not: data)""" 1963 1964 try: 1965 archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)] 1966 except KeyError: 1967 raise Archive.DoesNotExist(args.location.archive) 1968 1969 indent = 4 1970 1971 def do_indent(d): 1972 return textwrap.indent(json.dumps(d, indent=indent), prefix=' ' * indent) 1973 1974 def output(fd): 1975 # this outputs megabytes of data for a modest sized archive, so some manual streaming json output 1976 fd.write('{\n') 1977 fd.write(' "_name": ' + json.dumps(args.location.archive) + ",\n") 1978 fd.write(' "_manifest_entry":\n') 1979 fd.write(do_indent(prepare_dump_dict(archive_meta_orig))) 1980 fd.write(',\n') 1981 1982 data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id'])) 1983 archive_org_dict = msgpack.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape') 1984 1985 fd.write(' "_meta":\n') 1986 fd.write(do_indent(prepare_dump_dict(archive_org_dict))) 1987 fd.write(',\n') 1988 fd.write(' "_items": [\n') 1989 1990 unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict) 1991 first = True 1992 for item_id in archive_org_dict[b'items']: 1993 data = key.decrypt(item_id, repository.get(item_id)) 1994 unpacker.feed(data) 1995 for item in unpacker: 1996 item = prepare_dump_dict(item) 1997 if first: 1998 first = False 1999 else: 2000 fd.write(',\n') 2001 fd.write(do_indent(item)) 2002 2003 fd.write('\n') 2004 fd.write(' ]\n}\n') 2005 2006 with dash_open(args.path, 'w') as fd: 2007 output(fd) 2008 return EXIT_SUCCESS 2009 2010 @with_repository(compatibility=Manifest.NO_OPERATION_CHECK) 2011 def do_debug_dump_manifest(self, args, repository, manifest, key): 2012 """dump decoded repository manifest""" 2013 2014 data = key.decrypt(None, repository.get(manifest.MANIFEST_ID)) 2015 2016 meta = prepare_dump_dict(msgpack_fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')) 2017 2018 with dash_open(args.path, 'w') as fd: 2019 json.dump(meta, fd, indent=4) 2020 return EXIT_SUCCESS 2021 2022 @with_repository(manifest=False) 2023 def do_debug_dump_repo_objs(self, args, repository): 2024 """dump (decrypted, decompressed) repo objects, repo index MUST be current/correct""" 2025 from .crypto.key import key_factory 2026 2027 def decrypt_dump(i, id, cdata, tag=None, segment=None, offset=None): 2028 if cdata is not None: 2029 give_id = id if id != Manifest.MANIFEST_ID else None 2030 data = key.decrypt(give_id, cdata) 2031 else: 2032 data = b'' 2033 tag_str = '' if tag is None else '_' + tag 2034 segment_str = '_' + str(segment) if segment is not None else '' 2035 offset_str = '_' + str(offset) if offset is not None else '' 2036 id_str = '_' + bin_to_hex(id) if id is not None else '' 2037 filename = '%08d%s%s%s%s.obj' % (i, segment_str, offset_str, tag_str, id_str) 2038 print('Dumping', filename) 2039 with open(filename, 'wb') as fd: 2040 fd.write(data) 2041 2042 if args.ghost: 2043 # dump ghosty stuff from segment files: not yet committed objects, deleted / superseded objects, commit tags 2044 2045 # set up the key without depending on a manifest obj 2046 for id, cdata, tag, segment, offset in repository.scan_low_level(): 2047 if tag == TAG_PUT: 2048 key = key_factory(repository, cdata) 2049 break 2050 i = 0 2051 for id, cdata, tag, segment, offset in repository.scan_low_level(): 2052 if tag == TAG_PUT: 2053 decrypt_dump(i, id, cdata, tag='put', segment=segment, offset=offset) 2054 elif tag == TAG_DELETE: 2055 decrypt_dump(i, id, None, tag='del', segment=segment, offset=offset) 2056 elif tag == TAG_COMMIT: 2057 decrypt_dump(i, None, None, tag='commit', segment=segment, offset=offset) 2058 i += 1 2059 else: 2060 # set up the key without depending on a manifest obj 2061 ids = repository.list(limit=1, marker=None) 2062 cdata = repository.get(ids[0]) 2063 key = key_factory(repository, cdata) 2064 marker = None 2065 i = 0 2066 while True: 2067 result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here 2068 if not result: 2069 break 2070 marker = result[-1] 2071 for id in result: 2072 cdata = repository.get(id) 2073 decrypt_dump(i, id, cdata) 2074 i += 1 2075 print('Done.') 2076 return EXIT_SUCCESS 2077 2078 @with_repository(manifest=False) 2079 def do_debug_search_repo_objs(self, args, repository): 2080 """search for byte sequences in repo objects, repo index MUST be current/correct""" 2081 context = 32 2082 2083 def print_finding(info, wanted, data, offset): 2084 before = data[offset - context:offset] 2085 after = data[offset + len(wanted):offset + len(wanted) + context] 2086 print('%s: %s %s %s == %r %r %r' % (info, before.hex(), wanted.hex(), after.hex(), 2087 before, wanted, after)) 2088 2089 wanted = args.wanted 2090 try: 2091 if wanted.startswith('hex:'): 2092 wanted = unhexlify(wanted[4:]) 2093 elif wanted.startswith('str:'): 2094 wanted = wanted[4:].encode('utf-8') 2095 else: 2096 raise ValueError('unsupported search term') 2097 except (ValueError, UnicodeEncodeError): 2098 wanted = None 2099 if not wanted: 2100 self.print_error('search term needs to be hex:123abc or str:foobar style') 2101 return EXIT_ERROR 2102 2103 from .crypto.key import key_factory 2104 # set up the key without depending on a manifest obj 2105 ids = repository.list(limit=1, marker=None) 2106 cdata = repository.get(ids[0]) 2107 key = key_factory(repository, cdata) 2108 2109 marker = None 2110 last_data = b'' 2111 last_id = None 2112 i = 0 2113 while True: 2114 result = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here 2115 if not result: 2116 break 2117 marker = result[-1] 2118 for id in result: 2119 cdata = repository.get(id) 2120 give_id = id if id != Manifest.MANIFEST_ID else None 2121 data = key.decrypt(give_id, cdata) 2122 2123 # try to locate wanted sequence crossing the border of last_data and data 2124 boundary_data = last_data[-(len(wanted) - 1):] + data[:len(wanted) - 1] 2125 if wanted in boundary_data: 2126 boundary_data = last_data[-(len(wanted) - 1 + context):] + data[:len(wanted) - 1 + context] 2127 offset = boundary_data.find(wanted) 2128 info = '%d %s | %s' % (i, last_id.hex(), id.hex()) 2129 print_finding(info, wanted, boundary_data, offset) 2130 2131 # try to locate wanted sequence in data 2132 count = data.count(wanted) 2133 if count: 2134 offset = data.find(wanted) # only determine first occurrence's offset 2135 info = "%d %s #%d" % (i, id.hex(), count) 2136 print_finding(info, wanted, data, offset) 2137 2138 last_id, last_data = id, data 2139 i += 1 2140 if i % 10000 == 0: 2141 print('%d objects processed.' % i) 2142 print('Done.') 2143 return EXIT_SUCCESS 2144 2145 @with_repository(manifest=False) 2146 def do_debug_get_obj(self, args, repository): 2147 """get object contents from the repository and write it into file""" 2148 hex_id = args.id 2149 try: 2150 id = unhexlify(hex_id) 2151 except ValueError: 2152 print("object id %s is invalid." % hex_id) 2153 else: 2154 try: 2155 data = repository.get(id) 2156 except Repository.ObjectNotFound: 2157 print("object %s not found." % hex_id) 2158 else: 2159 with open(args.path, "wb") as f: 2160 f.write(data) 2161 print("object %s fetched." % hex_id) 2162 return EXIT_SUCCESS 2163 2164 @with_repository(manifest=False, exclusive=True) 2165 def do_debug_put_obj(self, args, repository): 2166 """put file(s) contents into the repository""" 2167 for path in args.paths: 2168 with open(path, "rb") as f: 2169 data = f.read() 2170 h = hashlib.sha256(data) # XXX hardcoded 2171 repository.put(h.digest(), data) 2172 print("object %s put." % h.hexdigest()) 2173 repository.commit() 2174 return EXIT_SUCCESS 2175 2176 @with_repository(manifest=False, exclusive=True) 2177 def do_debug_delete_obj(self, args, repository): 2178 """delete the objects with the given IDs from the repo""" 2179 modified = False 2180 for hex_id in args.ids: 2181 try: 2182 id = unhexlify(hex_id) 2183 except ValueError: 2184 print("object id %s is invalid." % hex_id) 2185 else: 2186 try: 2187 repository.delete(id) 2188 modified = True 2189 print("object %s deleted." % hex_id) 2190 except Repository.ObjectNotFound: 2191 print("object %s not found." % hex_id) 2192 if modified: 2193 repository.commit() 2194 print('Done.') 2195 return EXIT_SUCCESS 2196 2197 @with_repository(manifest=False, exclusive=True, cache=True, compatibility=Manifest.NO_OPERATION_CHECK) 2198 def do_debug_refcount_obj(self, args, repository, manifest, key, cache): 2199 """display refcounts for the objects with the given IDs""" 2200 for hex_id in args.ids: 2201 try: 2202 id = unhexlify(hex_id) 2203 except ValueError: 2204 print("object id %s is invalid." % hex_id) 2205 else: 2206 try: 2207 refcount = cache.chunks[id][0] 2208 print("object %s has %d referrers [info from chunks cache]." % (hex_id, refcount)) 2209 except KeyError: 2210 print("object %s not found [info from chunks cache]." % hex_id) 2211 return EXIT_SUCCESS 2212 2213 @with_repository(manifest=False, exclusive=True) 2214 def do_debug_dump_hints(self, args, repository): 2215 """dump repository hints""" 2216 if not repository._active_txn: 2217 repository.prepare_txn(repository.get_transaction_id()) 2218 try: 2219 hints = dict( 2220 segments=repository.segments, 2221 compact=repository.compact, 2222 storage_quota_use=repository.storage_quota_use, 2223 ) 2224 with dash_open(args.path, 'w') as fd: 2225 json.dump(hints, fd, indent=4) 2226 finally: 2227 repository.rollback() 2228 return EXIT_SUCCESS 2229 2230 def do_debug_convert_profile(self, args): 2231 """convert Borg profile to Python profile""" 2232 import marshal 2233 with args.output, args.input: 2234 marshal.dump(msgpack.unpack(args.input, use_list=False, encoding='utf-8'), args.output) 2235 return EXIT_SUCCESS 2236 2237 @with_repository(lock=False, manifest=False) 2238 def do_break_lock(self, args, repository): 2239 """Break the repository lock (e.g. in case it was left by a dead borg.""" 2240 repository.break_lock() 2241 Cache.break_lock(repository) 2242 return self.exit_code 2243 2244 helptext = collections.OrderedDict() 2245 helptext['patterns'] = textwrap.dedent(''' 2246 The path/filenames used as input for the pattern matching start from the 2247 currently active recursion root. You usually give the recursion root(s) 2248 when invoking borg and these can be either relative or absolute paths. 2249 2250 So, when you give `relative/` as root, the paths going into the matcher 2251 will look like `relative/.../file.ext`. When you give `/absolute/` as 2252 root, they will look like `/absolute/.../file.ext`. This is meant when 2253 we talk about "full path" below. 2254 2255 File paths in Borg archives are always stored normalized and relative. 2256 This means that e.g. ``borg create /path/to/repo ../some/path`` will 2257 store all files as `some/path/.../file.ext` and ``borg create 2258 /path/to/repo /home/user`` will store all files as 2259 `home/user/.../file.ext`. Therefore, always use relative paths in your 2260 patterns when matching archive content in commands like ``extract`` or 2261 ``mount``. Starting with Borg 1.2 this behaviour will be changed to 2262 accept both absolute and relative paths. 2263 2264 File patterns support these styles: fnmatch, shell, regular expressions, 2265 path prefixes and path full-matches. By default, fnmatch is used for 2266 ``--exclude`` patterns and shell-style is used for the experimental 2267 ``--pattern`` option. 2268 2269 If followed by a colon (':') the first two characters of a pattern are 2270 used as a style selector. Explicit style selection is necessary when a 2271 non-default style is desired or when the desired pattern starts with 2272 two alphanumeric characters followed by a colon (i.e. `aa:something/*`). 2273 2274 `Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:` 2275 This is the default style for ``--exclude`` and ``--exclude-from``. 2276 These patterns use a variant of shell pattern syntax, with '\\*' matching 2277 any number of characters, '?' matching any single character, '[...]' 2278 matching any single character specified, including ranges, and '[!...]' 2279 matching any character not specified. For the purpose of these patterns, 2280 the path separator (backslash for Windows and '/' on other systems) is not 2281 treated specially. Wrap meta-characters in brackets for a literal 2282 match (i.e. `[?]` to match the literal character `?`). For a path 2283 to match a pattern, the full path must match, or it must match 2284 from the start of the full path to just before a path separator. Except 2285 for the root path, paths will never end in the path separator when 2286 matching is attempted. Thus, if a given pattern ends in a path 2287 separator, a '\\*' is appended before matching is attempted. 2288 2289 Shell-style patterns, selector `sh:` 2290 This is the default style for ``--pattern`` and ``--patterns-from``. 2291 Like fnmatch patterns these are similar to shell patterns. The difference 2292 is that the pattern may include `**/` for matching zero or more directory 2293 levels, `*` for matching zero or more arbitrary characters with the 2294 exception of any path separator. 2295 2296 Regular expressions, selector `re:` 2297 Regular expressions similar to those found in Perl are supported. Unlike 2298 shell patterns regular expressions are not required to match the full 2299 path and any substring match is sufficient. It is strongly recommended to 2300 anchor patterns to the start ('^'), to the end ('$') or both. Path 2301 separators (backslash for Windows and '/' on other systems) in paths are 2302 always normalized to a forward slash ('/') before applying a pattern. The 2303 regular expression syntax is described in the `Python documentation for 2304 the re module <https://docs.python.org/3/library/re.html>`_. 2305 2306 Path prefix, selector `pp:` 2307 This pattern style is useful to match whole sub-directories. The pattern 2308 `pp:root/somedir` matches `root/somedir` and everything therein. 2309 2310 Path full-match, selector `pf:` 2311 This pattern style is (only) useful to match full paths. 2312 This is kind of a pseudo pattern as it can not have any variable or 2313 unspecified parts - the full path must be given. 2314 `pf:root/file.ext` matches `root/file.ext` only. 2315 2316 Implementation note: this is implemented via very time-efficient O(1) 2317 hashtable lookups (this means you can have huge amounts of such patterns 2318 without impacting performance much). 2319 Due to that, this kind of pattern does not respect any context or order. 2320 If you use such a pattern to include a file, it will always be included 2321 (if the directory recursion encounters it). 2322 Other include/exclude patterns that would normally match will be ignored. 2323 Same logic applies for exclude. 2324 2325 .. note:: 2326 2327 `re:`, `sh:` and `fm:` patterns are all implemented on top of the Python SRE 2328 engine. It is very easy to formulate patterns for each of these types which 2329 requires an inordinate amount of time to match paths. If untrusted users 2330 are able to supply patterns, ensure they cannot supply `re:` patterns. 2331 Further, ensure that `sh:` and `fm:` patterns only contain a handful of 2332 wildcards at most. 2333 2334 Exclusions can be passed via the command line option ``--exclude``. When used 2335 from within a shell, the patterns should be quoted to protect them from 2336 expansion. 2337 2338 The ``--exclude-from`` option permits loading exclusion patterns from a text 2339 file with one pattern per line. Lines empty or starting with the number sign 2340 ('#') after removing whitespace on both ends are ignored. The optional style 2341 selector prefix is also supported for patterns loaded from a file. Due to 2342 whitespace removal, paths with whitespace at the beginning or end can only be 2343 excluded using regular expressions. 2344 2345 To test your exclusion patterns without performing an actual backup you can 2346 run ``borg create --list --dry-run ...``. 2347 2348 Examples:: 2349 2350 # Exclude '/home/user/file.o' but not '/home/user/file.odt': 2351 $ borg create -e '*.o' backup / 2352 2353 # Exclude '/home/user/junk' and '/home/user/subdir/junk' but 2354 # not '/home/user/importantjunk' or '/etc/junk': 2355 $ borg create -e '/home/*/junk' backup / 2356 2357 # Exclude the contents of '/home/user/cache' but not the directory itself: 2358 $ borg create -e /home/user/cache/ backup / 2359 2360 # The file '/home/user/cache/important' is *not* backed up: 2361 $ borg create -e /home/user/cache/ backup / /home/user/cache/important 2362 2363 # The contents of directories in '/home' are not backed up when their name 2364 # ends in '.tmp' 2365 $ borg create --exclude 're:^/home/[^/]+\\.tmp/' backup / 2366 2367 # Load exclusions from file 2368 $ cat >exclude.txt <<EOF 2369 # Comment line 2370 /home/*/junk 2371 *.tmp 2372 fm:aa:something/* 2373 re:^/home/[^/]+\\.tmp/ 2374 sh:/home/*/.thumbnails 2375 # Example with spaces, no need to escape as it is processed by borg 2376 some file with spaces.txt 2377 EOF 2378 $ borg create --exclude-from exclude.txt backup / 2379 2380 .. container:: experimental 2381 2382 A more general and easier to use way to define filename matching patterns exists 2383 with the experimental ``--pattern`` and ``--patterns-from`` options. Using these, you 2384 may specify the backup roots (starting points) and patterns for inclusion/exclusion. 2385 A root path starts with the prefix `R`, followed by a path (a plain path, not a 2386 file pattern). An include rule starts with the prefix +, an exclude rule starts 2387 with the prefix -, an exclude-norecurse rule starts with !, all followed by a pattern. 2388 2389 .. note:: 2390 2391 Via ``--pattern`` or ``--patterns-from`` you can define BOTH inclusion and exclusion 2392 of files using pattern prefixes ``+`` and ``-``. With ``--exclude`` and 2393 ``--exclude-from`` ONLY excludes are defined. 2394 2395 Inclusion patterns are useful to include paths that are contained in an excluded 2396 path. The first matching pattern is used so if an include pattern matches before 2397 an exclude pattern, the file is backed up. If an exclude-norecurse pattern matches 2398 a directory, it won't recurse into it and won't discover any potential matches for 2399 include rules below that directory. 2400 2401 Note that the default pattern style for ``--pattern`` and ``--patterns-from`` is 2402 shell style (`sh:`), so those patterns behave similar to rsync include/exclude 2403 patterns. The pattern style can be set via the `P` prefix. 2404 2405 Patterns (``--pattern``) and excludes (``--exclude``) from the command line are 2406 considered first (in the order of appearance). Then patterns from ``--patterns-from`` 2407 are added. Exclusion patterns from ``--exclude-from`` files are appended last. 2408 2409 Examples:: 2410 2411 # backup pics, but not the ones from 2018, except the good ones: 2412 # note: using = is essential to avoid cmdline argument parsing issues. 2413 borg create --pattern=+pics/2018/good --pattern=-pics/2018 repo::arch pics 2414 2415 # use a file with patterns: 2416 borg create --patterns-from patterns.lst repo::arch 2417 2418 The patterns.lst file could look like that:: 2419 2420 # "sh:" pattern style is the default, so the following line is not needed: 2421 P sh 2422 R / 2423 # can be rebuild 2424 - /home/*/.cache 2425 # they're downloads for a reason 2426 - /home/*/Downloads 2427 # susan is a nice person 2428 # include susans home 2429 + /home/susan 2430 # don't backup the other home directories 2431 - /home/* 2432 # don't even look in /proc 2433 ! /proc\n\n''') 2434 helptext['placeholders'] = textwrap.dedent(''' 2435 Repository (or Archive) URLs, ``--prefix``, ``--glob-archives``, ``--comment`` 2436 and ``--remote-path`` values support these placeholders: 2437 2438 {hostname} 2439 The (short) hostname of the machine. 2440 2441 {fqdn} 2442 The full name of the machine. 2443 2444 {reverse-fqdn} 2445 The full name of the machine in reverse domain name notation. 2446 2447 {now} 2448 The current local date and time, by default in ISO-8601 format. 2449 You can also supply your own `format string <https://docs.python.org/3.7/library/datetime.html#strftime-and-strptime-behavior>`_, e.g. {now:%Y-%m-%d_%H:%M:%S} 2450 2451 {utcnow} 2452 The current UTC date and time, by default in ISO-8601 format. 2453 You can also supply your own `format string <https://docs.python.org/3.7/library/datetime.html#strftime-and-strptime-behavior>`_, e.g. {utcnow:%Y-%m-%d_%H:%M:%S} 2454 2455 {user} 2456 The user name (or UID, if no name is available) of the user running borg. 2457 2458 {pid} 2459 The current process ID. 2460 2461 {borgversion} 2462 The version of borg, e.g.: 1.0.8rc1 2463 2464 {borgmajor} 2465 The version of borg, only the major version, e.g.: 1 2466 2467 {borgminor} 2468 The version of borg, only major and minor version, e.g.: 1.0 2469 2470 {borgpatch} 2471 The version of borg, only major, minor and patch version, e.g.: 1.0.8 2472 2473 If literal curly braces need to be used, double them for escaping:: 2474 2475 borg create /path/to/repo::{{literal_text}} 2476 2477 Examples:: 2478 2479 borg create /path/to/repo::{hostname}-{user}-{utcnow} ... 2480 borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ... 2481 borg prune --prefix '{hostname}-' ... 2482 2483 .. note:: 2484 systemd uses a difficult, non-standard syntax for command lines in unit files (refer to 2485 the `systemd.unit(5)` manual page). 2486 2487 When invoking borg from unit files, pay particular attention to escaping, 2488 especially when using the now/utcnow placeholders, since systemd performs its own 2489 %-based variable replacement even in quoted text. To avoid interference from systemd, 2490 double all percent signs (``{hostname}-{now:%Y-%m-%d_%H:%M:%S}`` 2491 becomes ``{hostname}-{now:%%Y-%%m-%%d_%%H:%%M:%%S}``).\n\n''') 2492 helptext['compression'] = textwrap.dedent(''' 2493 It is no problem to mix different compression methods in one repo, 2494 deduplication is done on the source data chunks (not on the compressed 2495 or encrypted data). 2496 2497 If some specific chunk was once compressed and stored into the repo, creating 2498 another backup that also uses this chunk will not change the stored chunk. 2499 So if you use different compression specs for the backups, whichever stores a 2500 chunk first determines its compression. See also borg recreate. 2501 2502 Compression is lz4 by default. If you want something else, you have to specify what you want. 2503 2504 Valid compression specifiers are: 2505 2506 none 2507 Do not compress. 2508 2509 lz4 2510 Use lz4 compression. Very high speed, very low compression. (default) 2511 2512 zstd[,L] 2513 Use zstd ("zstandard") compression, a modern wide-range algorithm. 2514 If you do not explicitely give the compression level L (ranging from 1 2515 to 22), it will use level 3. 2516 Archives compressed with zstd are not compatible with borg < 1.1.4. 2517 2518 zlib[,L] 2519 Use zlib ("gz") compression. Medium speed, medium compression. 2520 If you do not explicitely give the compression level L (ranging from 0 2521 to 9), it will use level 6. 2522 Giving level 0 (means "no compression", but still has zlib protocol 2523 overhead) is usually pointless, you better use "none" compression. 2524 2525 lzma[,L] 2526 Use lzma ("xz") compression. Low speed, high compression. 2527 If you do not explicitely give the compression level L (ranging from 0 2528 to 9), it will use level 6. 2529 Giving levels above 6 is pointless and counterproductive because it does 2530 not compress better due to the buffer size used by borg - but it wastes 2531 lots of CPU cycles and RAM. 2532 2533 auto,C[,L] 2534 Use a built-in heuristic to decide per chunk whether to compress or not. 2535 The heuristic tries with lz4 whether the data is compressible. 2536 For incompressible data, it will not use compression (uses "none"). 2537 For compressible data, it uses the given C[,L] compression - with C[,L] 2538 being any valid compression specifier. 2539 2540 Examples:: 2541 2542 borg create --compression lz4 REPO::ARCHIVE data 2543 borg create --compression zstd REPO::ARCHIVE data 2544 borg create --compression zstd,10 REPO::ARCHIVE data 2545 borg create --compression zlib REPO::ARCHIVE data 2546 borg create --compression zlib,1 REPO::ARCHIVE data 2547 borg create --compression auto,lzma,6 REPO::ARCHIVE data 2548 borg create --compression auto,lzma ...\n\n''') 2549 2550 def do_help(self, parser, commands, args): 2551 if not args.topic: 2552 parser.print_help() 2553 elif args.topic in self.helptext: 2554 print(rst_to_terminal(self.helptext[args.topic])) 2555 elif args.topic in commands: 2556 if args.epilog_only: 2557 print(commands[args.topic].epilog) 2558 elif args.usage_only: 2559 commands[args.topic].epilog = None 2560 commands[args.topic].print_help() 2561 else: 2562 commands[args.topic].print_help() 2563 else: 2564 msg_lines = [] 2565 msg_lines += ['No help available on %s.' % args.topic] 2566 msg_lines += ['Try one of the following:'] 2567 msg_lines += [' Commands: %s' % ', '.join(sorted(commands.keys()))] 2568 msg_lines += [' Topics: %s' % ', '.join(sorted(self.helptext.keys()))] 2569 parser.error('\n'.join(msg_lines)) 2570 return self.exit_code 2571 2572 def do_subcommand_help(self, parser, args): 2573 """display infos about subcommand""" 2574 parser.print_help() 2575 return EXIT_SUCCESS 2576 2577 do_maincommand_help = do_subcommand_help 2578 2579 def preprocess_args(self, args): 2580 deprecations = [ 2581 # ('--old', '--new' or None, 'Warning: "--old" has been deprecated. Use "--new" instead.'), 2582 ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'), 2583 ('--keep-tag-files', '--keep-exclude-tags', 'Warning: "--keep-tag-files" has been deprecated. Use "--keep-exclude-tags" instead.'), 2584 ('--ignore-inode', None, 'Warning: "--ignore-inode" has been deprecated. Use "--files-cache=ctime,size" or "...=mtime,size" instead.'), 2585 ('--no-files-cache', None, 'Warning: "--no-files-cache" has been deprecated. Use "--files-cache=disabled" instead.'), 2586 ] 2587 for i, arg in enumerate(args[:]): 2588 for old_name, new_name, warning in deprecations: 2589 if arg.startswith(old_name): 2590 if new_name is not None: 2591 args[i] = arg.replace(old_name, new_name) 2592 print(warning, file=sys.stderr) 2593 return args 2594 2595 class CommonOptions: 2596 """ 2597 Support class to allow specifying common options directly after the top-level command. 2598 2599 Normally options can only be specified on the parser defining them, which means 2600 that generally speaking *all* options go after all sub-commands. This is annoying 2601 for common options in scripts, e.g. --remote-path or logging options. 2602 2603 This class allows adding the same set of options to both the top-level parser 2604 and the final sub-command parsers (but not intermediary sub-commands, at least for now). 2605 2606 It does so by giving every option's target name ("dest") a suffix indicating its level 2607 -- no two options in the parser hierarchy can have the same target -- 2608 then, after parsing the command line, multiple definitions are resolved. 2609 2610 Defaults are handled by only setting them on the top-level parser and setting 2611 a sentinel object in all sub-parsers, which then allows to discern which parser 2612 supplied the option. 2613 """ 2614 2615 def __init__(self, define_common_options, suffix_precedence): 2616 """ 2617 *define_common_options* should be a callable taking one argument, which 2618 will be a argparse.Parser.add_argument-like function. 2619 2620 *define_common_options* will be called multiple times, and should call 2621 the passed function to define common options exactly the same way each time. 2622 2623 *suffix_precedence* should be a tuple of the suffixes that will be used. 2624 It is ordered from lowest precedence to highest precedence: 2625 An option specified on the parser belonging to index 0 is overridden if the 2626 same option is specified on any parser with a higher index. 2627 """ 2628 self.define_common_options = define_common_options 2629 self.suffix_precedence = suffix_precedence 2630 2631 # Maps suffixes to sets of target names. 2632 # E.g. common_options["_subcommand"] = {..., "log_level", ...} 2633 self.common_options = dict() 2634 # Set of options with the 'append' action. 2635 self.append_options = set() 2636 # This is the sentinel object that replaces all default values in parsers 2637 # below the top-level parser. 2638 self.default_sentinel = object() 2639 2640 def add_common_group(self, parser, suffix, provide_defaults=False): 2641 """ 2642 Add common options to *parser*. 2643 2644 *provide_defaults* must only be True exactly once in a parser hierarchy, 2645 at the top level, and False on all lower levels. The default is chosen 2646 accordingly. 2647 2648 *suffix* indicates the suffix to use internally. It also indicates 2649 which precedence the *parser* has for common options. See *suffix_precedence* 2650 of __init__. 2651 """ 2652 assert suffix in self.suffix_precedence 2653 2654 def add_argument(*args, **kwargs): 2655 if 'dest' in kwargs: 2656 kwargs.setdefault('action', 'store') 2657 assert kwargs['action'] in ('help', 'store_const', 'store_true', 'store_false', 'store', 'append') 2658 is_append = kwargs['action'] == 'append' 2659 if is_append: 2660 self.append_options.add(kwargs['dest']) 2661 assert kwargs['default'] == [], 'The default is explicitly constructed as an empty list in resolve()' 2662 else: 2663 self.common_options.setdefault(suffix, set()).add(kwargs['dest']) 2664 kwargs['dest'] += suffix 2665 if not provide_defaults: 2666 # Interpolate help now, in case the %(default)d (or so) is mentioned, 2667 # to avoid producing incorrect help output. 2668 # Assumption: Interpolated output can safely be interpolated again, 2669 # which should always be the case. 2670 # Note: We control all inputs. 2671 kwargs['help'] = kwargs['help'] % kwargs 2672 if not is_append: 2673 kwargs['default'] = self.default_sentinel 2674 2675 common_group.add_argument(*args, **kwargs) 2676 2677 common_group = parser.add_argument_group('Common options') 2678 self.define_common_options(add_argument) 2679 2680 def resolve(self, args: argparse.Namespace): # Namespace has "in" but otherwise is not like a dict. 2681 """ 2682 Resolve the multiple definitions of each common option to the final value. 2683 """ 2684 for suffix in self.suffix_precedence: 2685 # From highest level to lowest level, so the "most-specific" option wins, e.g. 2686 # "borg --debug create --info" shall result in --info being effective. 2687 for dest in self.common_options.get(suffix, []): 2688 # map_from is this suffix' option name, e.g. log_level_subcommand 2689 # map_to is the target name, e.g. log_level 2690 map_from = dest + suffix 2691 map_to = dest 2692 # Retrieve value; depending on the action it may not exist, but usually does 2693 # (store_const/store_true/store_false), either because the action implied a default 2694 # or a default is explicitly supplied. 2695 # Note that defaults on lower levels are replaced with default_sentinel. 2696 # Only the top level has defaults. 2697 value = getattr(args, map_from, self.default_sentinel) 2698 if value is not self.default_sentinel: 2699 # value was indeed specified on this level. Transfer value to target, 2700 # and un-clobber the args (for tidiness - you *cannot* use the suffixed 2701 # names for other purposes, obviously). 2702 setattr(args, map_to, value) 2703 try: 2704 delattr(args, map_from) 2705 except AttributeError: 2706 pass 2707 2708 # Options with an "append" action need some special treatment. Instead of 2709 # overriding values, all specified values are merged together. 2710 for dest in self.append_options: 2711 option_value = [] 2712 for suffix in self.suffix_precedence: 2713 # Find values of this suffix, if any, and add them to the final list 2714 extend_from = dest + suffix 2715 if extend_from in args: 2716 values = getattr(args, extend_from) 2717 delattr(args, extend_from) 2718 option_value.extend(values) 2719 setattr(args, dest, option_value) 2720 2721 def build_parser(self): 2722 # You can use :ref:`xyz` in the following usage pages. However, for plain-text view, 2723 # e.g. through "borg ... --help", define a substitution for the reference here. 2724 # It will replace the entire :ref:`foo` verbatim. 2725 rst_plain_text_references = { 2726 'a_status_oddity': '"I am seeing ‘A’ (added) status for a unchanged file!?"', 2727 'list_item_flags': '"Item flags"', 2728 } 2729 2730 def process_epilog(epilog): 2731 epilog = textwrap.dedent(epilog).splitlines() 2732 try: 2733 mode = borg.doc_mode 2734 except AttributeError: 2735 mode = 'command-line' 2736 if mode in ('command-line', 'build_usage'): 2737 epilog = [line for line in epilog if not line.startswith('.. man')] 2738 epilog = '\n'.join(epilog) 2739 if mode == 'command-line': 2740 epilog = rst_to_terminal(epilog, rst_plain_text_references) 2741 return epilog 2742 2743 def define_common_options(add_common_option): 2744 add_common_option('-h', '--help', action='help', help='show this help message and exit') 2745 add_common_option('--critical', dest='log_level', 2746 action='store_const', const='critical', default='warning', 2747 help='work on log level CRITICAL') 2748 add_common_option('--error', dest='log_level', 2749 action='store_const', const='error', default='warning', 2750 help='work on log level ERROR') 2751 add_common_option('--warning', dest='log_level', 2752 action='store_const', const='warning', default='warning', 2753 help='work on log level WARNING (default)') 2754 add_common_option('--info', '-v', '--verbose', dest='log_level', 2755 action='store_const', const='info', default='warning', 2756 help='work on log level INFO') 2757 add_common_option('--debug', dest='log_level', 2758 action='store_const', const='debug', default='warning', 2759 help='enable debug output, work on log level DEBUG') 2760 add_common_option('--debug-topic', metavar='TOPIC', dest='debug_topics', action='append', default=[], 2761 help='enable TOPIC debugging (can be specified multiple times). ' 2762 'The logger path is borg.debug.<TOPIC> if TOPIC is not fully qualified.') 2763 add_common_option('-p', '--progress', dest='progress', action='store_true', 2764 help='show progress information') 2765 add_common_option('--log-json', dest='log_json', action='store_true', 2766 help='Output one JSON object per log line instead of formatted text.') 2767 add_common_option('--lock-wait', metavar='SECONDS', dest='lock_wait', type=int, default=1, 2768 help='wait at most SECONDS for acquiring a repository/cache lock (default: %(default)d).') 2769 add_common_option('--bypass-lock', dest='lock', action='store_false', 2770 default=argparse.SUPPRESS, # only create args attribute if option is specified 2771 help='Bypass locking mechanism') 2772 add_common_option('--show-version', dest='show_version', action='store_true', 2773 help='show/log the borg version') 2774 add_common_option('--show-rc', dest='show_rc', action='store_true', 2775 help='show/log the return code (rc)') 2776 add_common_option('--umask', metavar='M', dest='umask', type=lambda s: int(s, 8), default=UMASK_DEFAULT, 2777 help='set umask to M (local and remote, default: %(default)04o)') 2778 add_common_option('--remote-path', metavar='PATH', dest='remote_path', 2779 help='use PATH as borg executable on the remote (default: "borg")') 2780 add_common_option('--remote-ratelimit', metavar='RATE', dest='remote_ratelimit', type=int, 2781 help='set remote network upload rate limit in kiByte/s (default: 0=unlimited)') 2782 add_common_option('--consider-part-files', dest='consider_part_files', action='store_true', 2783 help='treat part files like normal files (e.g. to list/extract them)') 2784 add_common_option('--debug-profile', metavar='FILE', dest='debug_profile', default=None, 2785 help='Write execution profile in Borg format into FILE. For local use a Python-' 2786 'compatible file can be generated by suffixing FILE with ".pyprof".') 2787 add_common_option('--rsh', metavar='RSH', dest='rsh', 2788 help="Use this command to connect to the 'borg serve' process (default: 'ssh')") 2789 2790 def define_exclude_and_patterns(add_option, *, tag_files=False, strip_components=False): 2791 add_option('-e', '--exclude', metavar='PATTERN', dest='patterns', 2792 type=parse_exclude_pattern, action='append', 2793 help='exclude paths matching PATTERN') 2794 add_option('--exclude-from', metavar='EXCLUDEFILE', action=ArgparseExcludeFileAction, 2795 help='read exclude patterns from EXCLUDEFILE, one per line') 2796 add_option('--pattern', metavar='PATTERN', action=ArgparsePatternAction, 2797 help='experimental: include/exclude paths matching PATTERN') 2798 add_option('--patterns-from', metavar='PATTERNFILE', action=ArgparsePatternFileAction, 2799 help='experimental: read include/exclude patterns from PATTERNFILE, one per line') 2800 2801 if tag_files: 2802 add_option('--exclude-caches', dest='exclude_caches', action='store_true', 2803 help='exclude directories that contain a CACHEDIR.TAG file ' 2804 '(http://www.bford.info/cachedir/spec.html)') 2805 add_option('--exclude-if-present', metavar='NAME', dest='exclude_if_present', 2806 action='append', type=str, 2807 help='exclude directories that are tagged by containing a filesystem object with ' 2808 'the given NAME') 2809 add_option('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags', 2810 action='store_true', 2811 help='if tag objects are specified with ``--exclude-if-present``, ' 2812 'don\'t omit the tag objects themselves from the backup archive') 2813 2814 if strip_components: 2815 add_option('--strip-components', metavar='NUMBER', dest='strip_components', type=int, default=0, 2816 help='Remove the specified number of leading path elements. ' 2817 'Paths with fewer elements will be silently skipped.') 2818 2819 def define_exclusion_group(subparser, **kwargs): 2820 exclude_group = subparser.add_argument_group('Exclusion options') 2821 define_exclude_and_patterns(exclude_group.add_argument, **kwargs) 2822 return exclude_group 2823 2824 def define_archive_filters_group(subparser, *, sort_by=True, first_last=True): 2825 filters_group = subparser.add_argument_group('Archive filters', 2826 'Archive filters can be applied to repository targets.') 2827 group = filters_group.add_mutually_exclusive_group() 2828 group.add_argument('-P', '--prefix', metavar='PREFIX', dest='prefix', type=PrefixSpec, default=None, 2829 help='only consider archive names starting with this prefix.') 2830 group.add_argument('-a', '--glob-archives', metavar='GLOB', dest='glob_archives', 2831 type=GlobSpec, default=None, 2832 help='only consider archive names matching the glob. ' 2833 'sh: rules apply, see "borg help patterns". ' 2834 '``--prefix`` and ``--glob-archives`` are mutually exclusive.') 2835 2836 if sort_by: 2837 sort_by_default = 'timestamp' 2838 filters_group.add_argument('--sort-by', metavar='KEYS', dest='sort_by', 2839 type=SortBySpec, default=sort_by_default, 2840 help='Comma-separated list of sorting keys; valid keys are: {}; default is: {}' 2841 .format(', '.join(HUMAN_SORT_KEYS), sort_by_default)) 2842 2843 if first_last: 2844 group = filters_group.add_mutually_exclusive_group() 2845 group.add_argument('--first', metavar='N', dest='first', default=0, type=positive_int_validator, 2846 help='consider first N archives after other filters were applied') 2847 group.add_argument('--last', metavar='N', dest='last', default=0, type=positive_int_validator, 2848 help='consider last N archives after other filters were applied') 2849 2850 parser = argparse.ArgumentParser(prog=self.prog, description='Borg - Deduplicated Backups', 2851 add_help=False) 2852 # paths and patterns must have an empty list as default everywhere 2853 parser.set_defaults(fallback2_func=functools.partial(self.do_maincommand_help, parser), 2854 paths=[], patterns=[]) 2855 parser.common_options = self.CommonOptions(define_common_options, 2856 suffix_precedence=('_maincommand', '_midcommand', '_subcommand')) 2857 parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__, 2858 help='show version number and exit') 2859 parser.common_options.add_common_group(parser, '_maincommand', provide_defaults=True) 2860 2861 common_parser = argparse.ArgumentParser(add_help=False, prog=self.prog) 2862 common_parser.set_defaults(paths=[], patterns=[]) 2863 parser.common_options.add_common_group(common_parser, '_subcommand') 2864 2865 mid_common_parser = argparse.ArgumentParser(add_help=False, prog=self.prog) 2866 mid_common_parser.set_defaults(paths=[], patterns=[]) 2867 parser.common_options.add_common_group(mid_common_parser, '_midcommand') 2868 2869 mount_epilog = process_epilog(""" 2870 This command mounts an archive as a FUSE filesystem. This can be useful for 2871 browsing an archive or restoring individual files. Unless the ``--foreground`` 2872 option is given the command will run in the background until the filesystem 2873 is ``umounted``. 2874 2875 The command ``borgfs`` provides a wrapper for ``borg mount``. This can also be 2876 used in fstab entries: 2877 ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto 0 0`` 2878 2879 To allow a regular user to use fstab entries, add the ``user`` option: 2880 ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0`` 2881 2882 For FUSE configuration and mount options, see the mount.fuse(8) manual page. 2883 2884 Additional mount options supported by borg: 2885 2886 - versions: when used with a repository mount, this gives a merged, versioned 2887 view of the files in the archives. EXPERIMENTAL, layout may change in future. 2888 - allow_damaged_files: by default damaged files (where missing chunks were 2889 replaced with runs of zeros by borg check ``--repair``) are not readable and 2890 return EIO (I/O error). Set this option to read such files. 2891 - ignore_permissions: for security reasons the "default_permissions" mount 2892 option is internally enforced by borg. "ignore_permissions" can be given to 2893 not enforce "default_permissions". 2894 2895 The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users 2896 to tweak the performance. It sets the number of cached data chunks; additional 2897 memory usage can be up to ~8 MiB times this number. The default is the number 2898 of CPU cores. 2899 2900 When the daemonized process receives a signal or crashes, it does not unmount. 2901 Unmounting in these cases could cause an active rsync or similar process 2902 to unintentionally delete data. 2903 2904 When running in the foreground ^C/SIGINT unmounts cleanly, but other 2905 signals or crashes do not. 2906 """) 2907 2908 if parser.prog == 'borgfs': 2909 parser.description = self.do_mount.__doc__ 2910 parser.epilog = mount_epilog 2911 parser.formatter_class = argparse.RawDescriptionHelpFormatter 2912 parser.help = 'mount repository' 2913 subparser = parser 2914 else: 2915 subparsers = parser.add_subparsers(title='required arguments', metavar='<command>') 2916 subparser = subparsers.add_parser('mount', parents=[common_parser], add_help=False, 2917 description=self.do_mount.__doc__, 2918 epilog=mount_epilog, 2919 formatter_class=argparse.RawDescriptionHelpFormatter, 2920 help='mount repository') 2921 subparser.set_defaults(func=self.do_mount) 2922 subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(), 2923 help='repository or archive to mount') 2924 subparser.add_argument('mountpoint', metavar='MOUNTPOINT', type=str, 2925 help='where to mount filesystem') 2926 subparser.add_argument('-f', '--foreground', dest='foreground', 2927 action='store_true', 2928 help='stay in foreground, do not daemonize') 2929 subparser.add_argument('-o', dest='options', type=str, 2930 help='Extra mount options') 2931 define_archive_filters_group(subparser) 2932 subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, 2933 help='paths to extract; patterns are supported') 2934 define_exclusion_group(subparser, strip_components=True) 2935 if parser.prog == 'borgfs': 2936 return parser 2937 2938 serve_epilog = process_epilog(""" 2939 This command starts a repository server process. This command is usually not used manually. 2940 """) 2941 subparser = subparsers.add_parser('serve', parents=[common_parser], add_help=False, 2942 description=self.do_serve.__doc__, epilog=serve_epilog, 2943 formatter_class=argparse.RawDescriptionHelpFormatter, 2944 help='start repository server process') 2945 subparser.set_defaults(func=self.do_serve) 2946 subparser.add_argument('--restrict-to-path', metavar='PATH', dest='restrict_to_paths', action='append', 2947 help='restrict repository access to PATH. ' 2948 'Can be specified multiple times to allow the client access to several directories. ' 2949 'Access to all sub-directories is granted implicitly; PATH doesn\'t need to directly point to a repository.') 2950 subparser.add_argument('--restrict-to-repository', metavar='PATH', dest='restrict_to_repositories', action='append', 2951 help='restrict repository access. Only the repository located at PATH ' 2952 '(no sub-directories are considered) is accessible. ' 2953 'Can be specified multiple times to allow the client access to several repositories. ' 2954 'Unlike ``--restrict-to-path`` sub-directories are not accessible; ' 2955 'PATH needs to directly point at a repository location. ' 2956 'PATH may be an empty directory or the last element of PATH may not exist, in which case ' 2957 'the client may initialize a repository there.') 2958 subparser.add_argument('--append-only', dest='append_only', action='store_true', 2959 help='only allow appending to repository segment files') 2960 subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota', 2961 type=parse_storage_quota, default=None, 2962 help='Override storage quota of the repository (e.g. 5G, 1.5T). ' 2963 'When a new repository is initialized, sets the storage quota on the new ' 2964 'repository as well. Default: no quota.') 2965 2966 init_epilog = process_epilog(""" 2967 This command initializes an empty repository. A repository is a filesystem 2968 directory containing the deduplicated data from zero or more archives. 2969 2970 Encryption can be enabled at repository init time. It cannot be changed later. 2971 2972 It is not recommended to work without encryption. Repository encryption protects 2973 you e.g. against the case that an attacker has access to your backup repository. 2974 2975 Borg relies on randomly generated key material and uses that for chunking, id 2976 generation, encryption and authentication. The key material is encrypted using 2977 the passphrase you give before it is stored on-disk. 2978 2979 You need to be careful with the key / the passphrase: 2980 2981 If you want "passphrase-only" security, use one of the repokey modes. The 2982 key will be stored inside the repository (in its "config" file). In above 2983 mentioned attack scenario, the attacker will have the key (but not the 2984 passphrase). 2985 2986 If you want "passphrase and having-the-key" security, use one of the keyfile 2987 modes. The key will be stored in your home directory (in .config/borg/keys). 2988 In the attack scenario, the attacker who has just access to your repo won't 2989 have the key (and also not the passphrase). 2990 2991 Make a backup copy of the key file (keyfile mode) or repo config file 2992 (repokey mode) and keep it at a safe place, so you still have the key in 2993 case it gets corrupted or lost. Also keep the passphrase at a safe place. 2994 The backup that is encrypted with that key won't help you with that, of course. 2995 2996 Make sure you use a good passphrase. Not too short, not too simple. The real 2997 encryption / decryption key is encrypted with / locked by your passphrase. 2998 If an attacker gets your key, he can't unlock and use it without knowing the 2999 passphrase. 3000 3001 Be careful with special or non-ascii characters in your passphrase: 3002 3003 - Borg processes the passphrase as unicode (and encodes it as utf-8), 3004 so it does not have problems dealing with even the strangest characters. 3005 - BUT: that does not necessarily apply to your OS / VM / keyboard configuration. 3006 3007 So better use a long passphrase made from simple ascii chars than one that 3008 includes non-ascii stuff or characters that are hard/impossible to enter on 3009 a different keyboard layout. 3010 3011 You can change your passphrase for existing repos at any time, it won't affect 3012 the encryption/decryption key or other secrets. 3013 3014 Encryption modes 3015 ++++++++++++++++ 3016 3017 You can choose from the encryption modes seen in the table below on a per-repo 3018 basis. The mode determines encryption algorithm, hash/MAC algorithm and also the 3019 key storage location. 3020 3021 Example: `borg init --encryption repokey ...` 3022 3023 .. nanorst: inline-fill 3024 3025 +----------+---------------+------------------------+--------------------------+ 3026 | Hash/MAC | Not encrypted | Not encrypted, | Encrypted (AEAD w/ AES) | 3027 | | no auth | but authenticated | and authenticated | 3028 +----------+---------------+------------------------+--------------------------+ 3029 | SHA-256 | none | `authenticated` | repokey | 3030 | | | | keyfile | 3031 +----------+---------------+------------------------+--------------------------+ 3032 | BLAKE2b | n/a | `authenticated-blake2` | `repokey-blake2` | 3033 | | | | `keyfile-blake2` | 3034 +----------+---------------+------------------------+--------------------------+ 3035 3036 .. nanorst: inline-replace 3037 3038 Modes `marked like this` in the above table are new in Borg 1.1 and are not 3039 backwards-compatible with Borg 1.0.x. 3040 3041 On modern Intel/AMD CPUs (except very cheap ones), AES is usually 3042 hardware-accelerated. 3043 BLAKE2b is faster than SHA256 on Intel/AMD 64-bit CPUs 3044 (except AMD Ryzen and future CPUs with SHA extensions), 3045 which makes `authenticated-blake2` faster than `none` and `authenticated`. 3046 3047 On modern ARM CPUs, NEON provides hardware acceleration for SHA256 making it faster 3048 than BLAKE2b-256 there. NEON accelerates AES as well. 3049 3050 Hardware acceleration is always used automatically when available. 3051 3052 `repokey` and `keyfile` use AES-CTR-256 for encryption and HMAC-SHA256 for 3053 authentication in an encrypt-then-MAC (EtM) construction. The chunk ID hash 3054 is HMAC-SHA256 as well (with a separate key). 3055 These modes are compatible with Borg 1.0.x. 3056 3057 `repokey-blake2` and `keyfile-blake2` are also authenticated encryption modes, 3058 but use BLAKE2b-256 instead of HMAC-SHA256 for authentication. The chunk ID 3059 hash is a keyed BLAKE2b-256 hash. 3060 These modes are new and *not* compatible with Borg 1.0.x. 3061 3062 `authenticated` mode uses no encryption, but authenticates repository contents 3063 through the same HMAC-SHA256 hash as the `repokey` and `keyfile` modes (it uses it 3064 as the chunk ID hash). The key is stored like `repokey`. 3065 This mode is new and *not* compatible with Borg 1.0.x. 3066 3067 `authenticated-blake2` is like `authenticated`, but uses the keyed BLAKE2b-256 hash 3068 from the other blake2 modes. 3069 This mode is new and *not* compatible with Borg 1.0.x. 3070 3071 `none` mode uses no encryption and no authentication. It uses SHA256 as chunk 3072 ID hash. This mode is not recommended, you should rather consider using an authenticated 3073 or authenticated/encrypted mode. This mode has possible denial-of-service issues 3074 when running ``borg create`` on contents controlled by an attacker. 3075 Use it only for new repositories where no encryption is wanted **and** when compatibility 3076 with 1.0.x is important. If compatibility with 1.0.x is not important, use 3077 `authenticated-blake2` or `authenticated` instead. 3078 This mode is compatible with Borg 1.0.x. 3079 """) 3080 subparser = subparsers.add_parser('init', parents=[common_parser], add_help=False, 3081 description=self.do_init.__doc__, epilog=init_epilog, 3082 formatter_class=argparse.RawDescriptionHelpFormatter, 3083 help='initialize empty repository') 3084 subparser.set_defaults(func=self.do_init) 3085 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3086 type=location_validator(archive=False), 3087 help='repository to create') 3088 subparser.add_argument('-e', '--encryption', metavar='MODE', dest='encryption', required=True, 3089 choices=key_argument_names(), 3090 help='select encryption key mode **(required)**') 3091 subparser.add_argument('--append-only', dest='append_only', action='store_true', 3092 help='create an append-only mode repository') 3093 subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota', default=None, 3094 type=parse_storage_quota, 3095 help='Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota.') 3096 subparser.add_argument('--make-parent-dirs', dest='make_parent_dirs', action='store_true', 3097 help='create the parent directories of the repository directory, if they are missing.') 3098 3099 check_epilog = process_epilog(""" 3100 The check command verifies the consistency of a repository and the corresponding archives. 3101 3102 check --repair is a potentially dangerous function and might lead to data loss 3103 (for kinds of corruption it is not capable of dealing with). BE VERY CAREFUL! 3104 3105 Pursuant to the previous warning it is also highly recommended to test the 3106 reliability of the hardware running this software with stress testing software 3107 such as memory testers. Unreliable hardware can also lead to data loss especially 3108 when this command is run in repair mode. 3109 3110 First, the underlying repository data files are checked: 3111 3112 - For all segments, the segment magic header is checked. 3113 - For all objects stored in the segments, all metadata (e.g. CRC and size) and 3114 all data is read. The read data is checked by size and CRC. Bit rot and other 3115 types of accidental damage can be detected this way. 3116 - In repair mode, if an integrity error is detected in a segment, try to recover 3117 as many objects from the segment as possible. 3118 - In repair mode, make sure that the index is consistent with the data stored in 3119 the segments. 3120 - If checking a remote repo via ``ssh:``, the repo check is executed on the server 3121 without causing significant network traffic. 3122 - The repository check can be skipped using the ``--archives-only`` option. 3123 3124 Second, the consistency and correctness of the archive metadata is verified: 3125 3126 - Is the repo manifest present? If not, it is rebuilt from archive metadata 3127 chunks (this requires reading and decrypting of all metadata and data). 3128 - Check if archive metadata chunk is present; if not, remove archive from manifest. 3129 - For all files (items) in the archive, for all chunks referenced by these 3130 files, check if chunk is present. In repair mode, if a chunk is not present, 3131 replace it with a same-size replacement chunk of zeroes. If a previously lost 3132 chunk reappears (e.g. via a later backup), in repair mode the all-zero replacement 3133 chunk will be replaced by the correct chunk. This requires reading of archive and 3134 file metadata, but not data. 3135 - In repair mode, when all the archives were checked, orphaned chunks are deleted 3136 from the repo. One cause of orphaned chunks are input file related errors (like 3137 read errors) in the archive creation process. 3138 - In verify-data mode, a complete cryptographic verification of the archive data 3139 integrity is performed. This conflicts with ``--repository-only`` as this mode 3140 only makes sense if the archive checks are enabled. The full details of this mode 3141 are documented below. 3142 - If checking a remote repo via ``ssh:``, the archive check is executed on the 3143 client machine because it requires decryption, and this is always done client-side 3144 as key access is needed. 3145 - The archive checks can be time consuming; they can be skipped using the 3146 ``--repository-only`` option. 3147 3148 The ``--verify-data`` option will perform a full integrity verification (as opposed to 3149 checking the CRC32 of the segment) of data, which means reading the data from the 3150 repository, decrypting and decompressing it. This is a cryptographic verification, 3151 which will detect (accidental) corruption. For encrypted repositories it is 3152 tamper-resistant as well, unless the attacker has access to the keys. It is also very 3153 slow. 3154 """) 3155 subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False, 3156 description=self.do_check.__doc__, 3157 epilog=check_epilog, 3158 formatter_class=argparse.RawDescriptionHelpFormatter, 3159 help='verify repository') 3160 subparser.set_defaults(func=self.do_check) 3161 subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', 3162 type=location_validator(), 3163 help='repository or archive to check consistency of') 3164 subparser.add_argument('--repository-only', dest='repo_only', action='store_true', 3165 help='only perform repository checks') 3166 subparser.add_argument('--archives-only', dest='archives_only', action='store_true', 3167 help='only perform archives checks') 3168 subparser.add_argument('--verify-data', dest='verify_data', action='store_true', 3169 help='perform cryptographic archive data integrity verification ' 3170 '(conflicts with ``--repository-only``)') 3171 subparser.add_argument('--repair', dest='repair', action='store_true', 3172 help='attempt to repair any inconsistencies found') 3173 subparser.add_argument('--save-space', dest='save_space', action='store_true', 3174 help='work slower, but using less space') 3175 define_archive_filters_group(subparser) 3176 3177 subparser = subparsers.add_parser('key', parents=[mid_common_parser], add_help=False, 3178 description="Manage a keyfile or repokey of a repository", 3179 epilog="", 3180 formatter_class=argparse.RawDescriptionHelpFormatter, 3181 help='manage repository key') 3182 3183 key_parsers = subparser.add_subparsers(title='required arguments', metavar='<command>') 3184 subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser)) 3185 3186 key_export_epilog = process_epilog(""" 3187 If repository encryption is used, the repository is inaccessible 3188 without the key. This command allows to backup this essential key. 3189 Note that the backup produced does not include the passphrase itself 3190 (i.e. the exported key stays encrypted). In order to regain access to a 3191 repository, one needs both the exported key and the original passphrase. 3192 3193 There are two backup formats. The normal backup format is suitable for 3194 digital storage as a file. The ``--paper`` backup format is optimized 3195 for printing and typing in while importing, with per line checks to 3196 reduce problems with manual input. 3197 3198 For repositories using keyfile encryption the key is saved locally 3199 on the system that is capable of doing backups. To guard against loss 3200 of this key, the key needs to be backed up independently of the main 3201 data backup. 3202 3203 For repositories using the repokey encryption the key is saved in the 3204 repository in the config file. A backup is thus not strictly needed, 3205 but guards against the repository becoming inaccessible if the file 3206 is damaged for some reason. 3207 """) 3208 subparser = key_parsers.add_parser('export', parents=[common_parser], add_help=False, 3209 description=self.do_key_export.__doc__, 3210 epilog=key_export_epilog, 3211 formatter_class=argparse.RawDescriptionHelpFormatter, 3212 help='export repository key for backup') 3213 subparser.set_defaults(func=self.do_key_export) 3214 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3215 type=location_validator(archive=False)) 3216 subparser.add_argument('path', metavar='PATH', nargs='?', type=str, 3217 help='where to store the backup') 3218 subparser.add_argument('--paper', dest='paper', action='store_true', 3219 help='Create an export suitable for printing and later type-in') 3220 subparser.add_argument('--qr-html', dest='qr', action='store_true', 3221 help='Create an html file suitable for printing and later type-in or qr scan') 3222 3223 key_import_epilog = process_epilog(""" 3224 This command allows to restore a key previously backed up with the 3225 export command. 3226 3227 If the ``--paper`` option is given, the import will be an interactive 3228 process in which each line is checked for plausibility before 3229 proceeding to the next line. For this format PATH must not be given. 3230 """) 3231 subparser = key_parsers.add_parser('import', parents=[common_parser], add_help=False, 3232 description=self.do_key_import.__doc__, 3233 epilog=key_import_epilog, 3234 formatter_class=argparse.RawDescriptionHelpFormatter, 3235 help='import repository key from backup') 3236 subparser.set_defaults(func=self.do_key_import) 3237 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3238 type=location_validator(archive=False)) 3239 subparser.add_argument('path', metavar='PATH', nargs='?', type=str, 3240 help='path to the backup (\'-\' to read from stdin)') 3241 subparser.add_argument('--paper', dest='paper', action='store_true', 3242 help='interactively import from a backup done with ``--paper``') 3243 3244 change_passphrase_epilog = process_epilog(""" 3245 The key files used for repository encryption are optionally passphrase 3246 protected. This command can be used to change this passphrase. 3247 3248 Please note that this command only changes the passphrase, but not any 3249 secret protected by it (like e.g. encryption/MAC keys or chunker seed). 3250 Thus, changing the passphrase after passphrase and borg key got compromised 3251 does not protect future (nor past) backups to the same repository. 3252 """) 3253 subparser = key_parsers.add_parser('change-passphrase', parents=[common_parser], add_help=False, 3254 description=self.do_change_passphrase.__doc__, 3255 epilog=change_passphrase_epilog, 3256 formatter_class=argparse.RawDescriptionHelpFormatter, 3257 help='change repository passphrase') 3258 subparser.set_defaults(func=self.do_change_passphrase) 3259 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3260 type=location_validator(archive=False)) 3261 3262 # Borg 1.0 alias for change passphrase (without the "key" subcommand) 3263 subparser = subparsers.add_parser('change-passphrase', parents=[common_parser], add_help=False, 3264 description=self.do_change_passphrase.__doc__, 3265 epilog=change_passphrase_epilog, 3266 formatter_class=argparse.RawDescriptionHelpFormatter, 3267 help='change repository passphrase') 3268 subparser.set_defaults(func=self.do_change_passphrase_deprecated) 3269 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3270 type=location_validator(archive=False)) 3271 3272 migrate_to_repokey_epilog = process_epilog(""" 3273 This command migrates a repository from passphrase mode (removed in Borg 1.0) 3274 to repokey mode. 3275 3276 You will be first asked for the repository passphrase (to open it in passphrase 3277 mode). This is the same passphrase as you used to use for this repo before 1.0. 3278 3279 It will then derive the different secrets from this passphrase. 3280 3281 Then you will be asked for a new passphrase (twice, for safety). This 3282 passphrase will be used to protect the repokey (which contains these same 3283 secrets in encrypted form). You may use the same passphrase as you used to 3284 use, but you may also use a different one. 3285 3286 After migrating to repokey mode, you can change the passphrase at any time. 3287 But please note: the secrets will always stay the same and they could always 3288 be derived from your (old) passphrase-mode passphrase. 3289 """) 3290 subparser = key_parsers.add_parser('migrate-to-repokey', parents=[common_parser], add_help=False, 3291 description=self.do_migrate_to_repokey.__doc__, 3292 epilog=migrate_to_repokey_epilog, 3293 formatter_class=argparse.RawDescriptionHelpFormatter, 3294 help='migrate passphrase-mode repository to repokey') 3295 subparser.set_defaults(func=self.do_migrate_to_repokey) 3296 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3297 type=location_validator(archive=False)) 3298 3299 create_epilog = process_epilog(""" 3300 This command creates a backup archive containing all files found while recursively 3301 traversing all paths specified. Paths are added to the archive as they are given, 3302 that means if relative paths are desired, the command has to be run from the correct 3303 directory. 3304 3305 When giving '-' as path, borg will read data from standard input and create a 3306 file 'stdin' in the created archive from that data. See section *Reading from 3307 stdin* below for details. 3308 3309 The archive will consume almost no disk space for files or parts of files that 3310 have already been stored in other archives. 3311 3312 The archive name needs to be unique. It must not end in '.checkpoint' or 3313 '.checkpoint.N' (with N being a number), because these names are used for 3314 checkpoints and treated in special ways. 3315 3316 In the archive name, you may use the following placeholders: 3317 {now}, {utcnow}, {fqdn}, {hostname}, {user} and some others. 3318 3319 Backup speed is increased by not reprocessing files that are already part of 3320 existing archives and weren't modified. The detection of unmodified files is 3321 done by comparing multiple file metadata values with previous values kept in 3322 the files cache. 3323 3324 This comparison can operate in different modes as given by ``--files-cache``: 3325 3326 - ctime,size,inode (default) 3327 - mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4) 3328 - ctime,size (ignore the inode number) 3329 - mtime,size (ignore the inode number) 3330 - rechunk,ctime (all files are considered modified - rechunk, cache ctime) 3331 - rechunk,mtime (all files are considered modified - rechunk, cache mtime) 3332 - disabled (disable the files cache, all files considered modified - rechunk) 3333 3334 inode number: better safety, but often unstable on network filesystems 3335 3336 Normally, detecting file modifications will take inode information into 3337 consideration to improve the reliability of file change detection. 3338 This is problematic for files located on sshfs and similar network file 3339 systems which do not provide stable inode numbers, such files will always 3340 be considered modified. You can use modes without `inode` in this case to 3341 improve performance, but reliability of change detection might be reduced. 3342 3343 ctime vs. mtime: safety vs. speed 3344 3345 - ctime is a rather safe way to detect changes to a file (metadata and contents) 3346 as it can not be set from userspace. But, a metadata-only change will already 3347 update the ctime, so there might be some unnecessary chunking/hashing even 3348 without content changes. Some filesystems do not support ctime (change time). 3349 E.g. doing a chown or chmod to a file will change its ctime. 3350 - mtime usually works and only updates if file contents were changed. But mtime 3351 can be arbitrarily set from userspace, e.g. to set mtime back to the same value 3352 it had before a content change happened. This can be used maliciously as well as 3353 well-meant, but in both cases mtime based cache modes can be problematic. 3354 3355 The mount points of filesystems or filesystem snapshots should be the same for every 3356 creation of a new archive to ensure fast operation. This is because the file cache that 3357 is used to determine changed files quickly uses absolute filenames. 3358 If this is not possible, consider creating a bind mount to a stable location. 3359 3360 The ``--progress`` option shows (from left to right) Original, Compressed and Deduplicated 3361 (O, C and D, respectively), then the Number of files (N) processed so far, followed by 3362 the currently processed path. 3363 3364 When using ``--stats``, you will get some statistics about how much data was 3365 added - the "This Archive" deduplicated size there is most interesting as that is 3366 how much your repository will grow. Please note that the "All archives" stats refer to 3367 the state after creation. Also, the ``--stats`` and ``--dry-run`` options are mutually 3368 exclusive because the data is not actually compressed and deduplicated during a dry run. 3369 3370 See the output of the "borg help patterns" command for more help on exclude patterns. 3371 3372 See the output of the "borg help placeholders" command for more help on placeholders. 3373 3374 .. man NOTES 3375 3376 The ``--exclude`` patterns are not like tar. In tar ``--exclude`` .bundler/gems will 3377 exclude foo/.bundler/gems. In borg it will not, you need to use ``--exclude`` 3378 '\\*/.bundler/gems' to get the same effect. See ``borg help patterns`` for 3379 more information. 3380 3381 In addition to using ``--exclude`` patterns, it is possible to use 3382 ``--exclude-if-present`` to specify the name of a filesystem object (e.g. a file 3383 or folder name) which, when contained within another folder, will prevent the 3384 containing folder from being backed up. By default, the containing folder and 3385 all of its contents will be omitted from the backup. If, however, you wish to 3386 only include the objects specified by ``--exclude-if-present`` in your backup, 3387 and not include any other contents of the containing folder, this can be enabled 3388 through using the ``--keep-exclude-tags`` option. 3389 3390 The ``-x`` or ``--one-file-system`` option excludes directories, that are mountpoints (and everything in them). 3391 It detects mountpoints by comparing the device number from the output of ``stat()`` of the directory and its 3392 parent directory. Specifically, it excludes directories for which ``stat()`` reports a device number different 3393 from the device number of their parent. Be aware that in Linux (and possibly elsewhere) there are directories 3394 with device number different from their parent, which the kernel does not consider a mountpoint and also the 3395 other way around. Examples are bind mounts (possibly same device number, but always a mountpoint) and ALL 3396 subvolumes of a btrfs (different device number from parent but not necessarily a mountpoint). Therefore when 3397 using ``--one-file-system``, one should make doubly sure that the backup works as intended especially when using 3398 btrfs. This is even more important, if the btrfs layout was created by someone else, e.g. a distribution 3399 installer. 3400 3401 3402 .. _list_item_flags: 3403 3404 Item flags 3405 ++++++++++ 3406 3407 ``--list`` outputs a list of all files, directories and other 3408 file system items it considered (no matter whether they had content changes 3409 or not). For each item, it prefixes a single-letter flag that indicates type 3410 and/or status of the item. 3411 3412 If you are interested only in a subset of that output, you can give e.g. 3413 ``--filter=AME`` and it will only show regular files with A, M or E status (see 3414 below). 3415 3416 A uppercase character represents the status of a regular file relative to the 3417 "files" cache (not relative to the repo -- this is an issue if the files cache 3418 is not used). Metadata is stored in any case and for 'A' and 'M' also new data 3419 chunks are stored. For 'U' all data chunks refer to already existing chunks. 3420 3421 - 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ) 3422 - 'M' = regular file, modified 3423 - 'U' = regular file, unchanged 3424 - 'E' = regular file, an error happened while accessing/reading *this* file 3425 3426 A lowercase character means a file type other than a regular file, 3427 borg usually just stores their metadata: 3428 3429 - 'd' = directory 3430 - 'b' = block device 3431 - 'c' = char device 3432 - 'h' = regular file, hardlink (to already seen inodes) 3433 - 's' = symlink 3434 - 'f' = fifo 3435 3436 Other flags used include: 3437 3438 - 'i' = backup data was read from standard input (stdin) 3439 - '-' = dry run, item was *not* backed up 3440 - 'x' = excluded, item was *not* backed up 3441 - '?' = missing status code (if you see this, please file a bug report!) 3442 3443 Reading from stdin 3444 ++++++++++++++++++ 3445 3446 To read from stdin, specify ``-`` as path and pipe directly to borg:: 3447 3448 backup-vm --id myvm --stdout | borg create REPO::ARCHIVE - 3449 3450 Note that piping to borg creates an archive even if the command piping 3451 to borg exits with a failure. In this case, **one can end up with 3452 truncated output being backed up**. 3453 3454 Reading from stdin yields just a stream of data without file metadata 3455 associated with it, and the files cache is not needed at all. So it is 3456 safe to disable it via ``--no-files-cache`` and speed up backup 3457 creation a bit. 3458 3459 By default, the content read from stdin is stored in a file called 'stdin'. 3460 Use ``--stdin-name`` to change the name. 3461 """) 3462 3463 subparser = subparsers.add_parser('create', parents=[common_parser], add_help=False, 3464 description=self.do_create.__doc__, 3465 epilog=create_epilog, 3466 formatter_class=argparse.RawDescriptionHelpFormatter, 3467 help='create backup') 3468 subparser.set_defaults(func=self.do_create) 3469 3470 # note: --dry-run and --stats are mutually exclusive, but we do not want to abort when 3471 # parsing, but rather proceed with the dry-run, but without stats (see run() method). 3472 subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', 3473 help='do not create a backup archive') 3474 subparser.add_argument('-s', '--stats', dest='stats', action='store_true', 3475 help='print statistics for the created archive') 3476 3477 subparser.add_argument('--list', dest='output_list', action='store_true', 3478 help='output verbose list of items (files, dirs, ...)') 3479 subparser.add_argument('--filter', metavar='STATUSCHARS', dest='output_filter', 3480 help='only display items with the given status characters (see description)') 3481 subparser.add_argument('--json', action='store_true', 3482 help='output stats as JSON. Implies ``--stats``.') 3483 subparser.add_argument('--no-cache-sync', dest='no_cache_sync', action='store_true', 3484 help='experimental: do not synchronize the cache. Implies not using the files cache.') 3485 subparser.add_argument('--no-files-cache', dest='cache_files', action='store_false', 3486 help='do not load/update the file metadata cache used to detect unchanged files') 3487 subparser.add_argument('--stdin-name', metavar='NAME', dest='stdin_name', default='stdin', 3488 help='use NAME in archive for stdin data (default: "stdin")') 3489 subparser.add_argument('--stdin-user', metavar='USER', dest='stdin_user', default=uid2user(0), 3490 help='set user USER in archive for stdin data (default: %(default)r)') 3491 subparser.add_argument('--stdin-group', metavar='GROUP', dest='stdin_group', default=gid2group(0), 3492 help='set group GROUP in archive for stdin data (default: %(default)r)') 3493 subparser.add_argument('--stdin-mode', metavar='M', dest='stdin_mode', type=lambda s: int(s, 8), default=STDIN_MODE_DEFAULT, 3494 help='set mode to M in archive for stdin data (default: %(default)04o)') 3495 3496 exclude_group = define_exclusion_group(subparser, tag_files=True) 3497 exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true', 3498 help='exclude files flagged NODUMP') 3499 3500 fs_group = subparser.add_argument_group('Filesystem options') 3501 fs_group.add_argument('-x', '--one-file-system', dest='one_file_system', action='store_true', 3502 help='stay in the same file system and do not store mount points of other file systems. This might behave different from your expectations, see the docs.') 3503 fs_group.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', 3504 help='only store numeric user and group identifiers') 3505 fs_group.add_argument('--noatime', dest='noatime', action='store_true', 3506 help='do not store atime into archive') 3507 fs_group.add_argument('--noctime', dest='noctime', action='store_true', 3508 help='do not store ctime into archive') 3509 fs_group.add_argument('--nobirthtime', dest='nobirthtime', action='store_true', 3510 help='do not store birthtime (creation date) into archive') 3511 fs_group.add_argument('--nobsdflags', dest='nobsdflags', action='store_true', 3512 help='do not read and store bsdflags (e.g. NODUMP, IMMUTABLE) into archive') 3513 fs_group.add_argument('--noacls', dest='noacls', action='store_true', 3514 help='do not read and store ACLs into archive') 3515 fs_group.add_argument('--noxattrs', dest='noxattrs', action='store_true', 3516 help='do not read and store xattrs into archive') 3517 fs_group.add_argument('--ignore-inode', dest='ignore_inode', action='store_true', 3518 help='ignore inode data in the file metadata cache used to detect unchanged files.') 3519 fs_group.add_argument('--files-cache', metavar='MODE', dest='files_cache_mode', 3520 type=FilesCacheMode, default=DEFAULT_FILES_CACHE_MODE_UI, 3521 help='operate files cache in MODE. default: %s' % DEFAULT_FILES_CACHE_MODE_UI) 3522 fs_group.add_argument('--read-special', dest='read_special', action='store_true', 3523 help='open and read block and char device files as well as FIFOs as if they were ' 3524 'regular files. Also follows symlinks pointing to these kinds of files.') 3525 3526 archive_group = subparser.add_argument_group('Archive options') 3527 archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', type=CommentSpec, default='', 3528 help='add a comment text to the archive') 3529 archive_group.add_argument('--timestamp', metavar='TIMESTAMP', dest='timestamp', 3530 type=timestamp, default=None, 3531 help='manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). ' 3532 'Alternatively, give a reference file/directory.') 3533 archive_group.add_argument('-c', '--checkpoint-interval', metavar='SECONDS', dest='checkpoint_interval', 3534 type=int, default=1800, 3535 help='write checkpoint every SECONDS seconds (Default: 1800)') 3536 archive_group.add_argument('--chunker-params', metavar='PARAMS', dest='chunker_params', 3537 type=ChunkerParams, default=CHUNKER_PARAMS, 3538 help='specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, ' 3539 'HASH_MASK_BITS, HASH_WINDOW_SIZE). default: %d,%d,%d,%d' % CHUNKER_PARAMS) 3540 archive_group.add_argument('-C', '--compression', metavar='COMPRESSION', dest='compression', 3541 type=CompressionSpec, default=CompressionSpec('lz4'), 3542 help='select compression algorithm, see the output of the ' 3543 '"borg help compression" command for details.') 3544 3545 subparser.add_argument('location', metavar='ARCHIVE', 3546 type=location_validator(archive=True), 3547 help='name of archive to create (must be also a valid directory name)') 3548 subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, 3549 help='paths to archive') 3550 3551 extract_epilog = process_epilog(""" 3552 This command extracts the contents of an archive. By default the entire 3553 archive is extracted but a subset of files and directories can be selected 3554 by passing a list of ``PATHs`` as arguments. The file selection can further 3555 be restricted by using the ``--exclude`` option. 3556 3557 See the output of the "borg help patterns" command for more help on exclude patterns. 3558 3559 By using ``--dry-run``, you can do all extraction steps except actually writing the 3560 output data: reading metadata and data chunks from the repo, checking the hash/hmac, 3561 decrypting, decompressing. 3562 3563 ``--progress`` can be slower than no progress display, since it makes one additional 3564 pass over the archive metadata. 3565 3566 .. note:: 3567 3568 Currently, extract always writes into the current working directory ("."), 3569 so make sure you ``cd`` to the right place before calling ``borg extract``. 3570 """) 3571 subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False, 3572 description=self.do_extract.__doc__, 3573 epilog=extract_epilog, 3574 formatter_class=argparse.RawDescriptionHelpFormatter, 3575 help='extract archive contents') 3576 subparser.set_defaults(func=self.do_extract) 3577 subparser.add_argument('--list', dest='output_list', action='store_true', 3578 help='output verbose list of items (files, dirs, ...)') 3579 subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', 3580 help='do not actually change any files') 3581 subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', 3582 help='only obey numeric user and group identifiers') 3583 subparser.add_argument('--nobsdflags', dest='nobsdflags', action='store_true', 3584 help='do not extract/set bsdflags (e.g. NODUMP, IMMUTABLE)') 3585 subparser.add_argument('--noacls', dest='noacls', action='store_true', 3586 help='do not extract/set ACLs') 3587 subparser.add_argument('--noxattrs', dest='noxattrs', action='store_true', 3588 help='do not extract/set xattrs') 3589 subparser.add_argument('--stdout', dest='stdout', action='store_true', 3590 help='write all extracted data to stdout') 3591 subparser.add_argument('--sparse', dest='sparse', action='store_true', 3592 help='create holes in output sparse file from all-zero chunks') 3593 subparser.add_argument('location', metavar='ARCHIVE', 3594 type=location_validator(archive=True), 3595 help='archive to extract') 3596 subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, 3597 help='paths to extract; patterns are supported') 3598 define_exclusion_group(subparser, strip_components=True) 3599 3600 export_tar_epilog = process_epilog(""" 3601 This command creates a tarball from an archive. 3602 3603 When giving '-' as the output FILE, Borg will write a tar stream to standard output. 3604 3605 By default (``--tar-filter=auto``) Borg will detect whether the FILE should be compressed 3606 based on its file extension and pipe the tarball through an appropriate filter 3607 before writing it to FILE: 3608 3609 - .tar.gz: gzip 3610 - .tar.bz2: bzip2 3611 - .tar.xz: xz 3612 3613 Alternatively a ``--tar-filter`` program may be explicitly specified. It should 3614 read the uncompressed tar stream from stdin and write a compressed/filtered 3615 tar stream to stdout. 3616 3617 The generated tarball uses the GNU tar format. 3618 3619 export-tar is a lossy conversion: 3620 BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported. 3621 Timestamp resolution is limited to whole seconds, not the nanosecond resolution 3622 otherwise supported by Borg. 3623 3624 A ``--sparse`` option (as found in borg extract) is not supported. 3625 3626 By default the entire archive is extracted but a subset of files and directories 3627 can be selected by passing a list of ``PATHs`` as arguments. 3628 The file selection can further be restricted by using the ``--exclude`` option. 3629 3630 See the output of the "borg help patterns" command for more help on exclude patterns. 3631 3632 ``--progress`` can be slower than no progress display, since it makes one additional 3633 pass over the archive metadata. 3634 """) 3635 subparser = subparsers.add_parser('export-tar', parents=[common_parser], add_help=False, 3636 description=self.do_export_tar.__doc__, 3637 epilog=export_tar_epilog, 3638 formatter_class=argparse.RawDescriptionHelpFormatter, 3639 help='create tarball from archive') 3640 subparser.set_defaults(func=self.do_export_tar) 3641 subparser.add_argument('--tar-filter', dest='tar_filter', default='auto', 3642 help='filter program to pipe data through') 3643 subparser.add_argument('--list', dest='output_list', action='store_true', 3644 help='output verbose list of items (files, dirs, ...)') 3645 subparser.add_argument('location', metavar='ARCHIVE', 3646 type=location_validator(archive=True), 3647 help='archive to export') 3648 subparser.add_argument('tarfile', metavar='FILE', 3649 help='output tar file. "-" to write to stdout instead.') 3650 subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, 3651 help='paths to extract; patterns are supported') 3652 define_exclusion_group(subparser, strip_components=True) 3653 3654 diff_epilog = process_epilog(""" 3655 This command finds differences (file contents, user/group/mode) between archives. 3656 3657 A repository location and an archive name must be specified for REPO::ARCHIVE1. 3658 ARCHIVE2 is just another archive name in same repository (no repository location 3659 allowed). 3660 3661 For archives created with Borg 1.1 or newer diff automatically detects whether 3662 the archives are created with the same chunker params. If so, only chunk IDs 3663 are compared, which is very fast. 3664 3665 For archives prior to Borg 1.1 chunk contents are compared by default. 3666 If you did not create the archives with different chunker params, 3667 pass ``--same-chunker-params``. 3668 Note that the chunker params changed from Borg 0.xx to 1.0. 3669 3670 See the output of the "borg help patterns" command for more help on exclude patterns. 3671 """) 3672 subparser = subparsers.add_parser('diff', parents=[common_parser], add_help=False, 3673 description=self.do_diff.__doc__, 3674 epilog=diff_epilog, 3675 formatter_class=argparse.RawDescriptionHelpFormatter, 3676 help='find differences in archive contents') 3677 subparser.set_defaults(func=self.do_diff) 3678 subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', 3679 help='only consider numeric user and group identifiers') 3680 subparser.add_argument('--same-chunker-params', dest='same_chunker_params', action='store_true', 3681 help='Override check of chunker parameters.') 3682 subparser.add_argument('--sort', dest='sort', action='store_true', 3683 help='Sort the output lines by file path.') 3684 subparser.add_argument('--json-lines', action='store_true', 3685 help='Format output as JSON Lines. ') 3686 subparser.add_argument('location', metavar='REPO::ARCHIVE1', 3687 type=location_validator(archive=True), 3688 help='repository location and ARCHIVE1 name') 3689 subparser.add_argument('archive2', metavar='ARCHIVE2', 3690 type=archivename_validator(), 3691 help='ARCHIVE2 name (no repository location allowed)') 3692 subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, 3693 help='paths of items inside the archives to compare; patterns are supported') 3694 define_exclusion_group(subparser) 3695 3696 rename_epilog = process_epilog(""" 3697 This command renames an archive in the repository. 3698 3699 This results in a different archive ID. 3700 """) 3701 subparser = subparsers.add_parser('rename', parents=[common_parser], add_help=False, 3702 description=self.do_rename.__doc__, 3703 epilog=rename_epilog, 3704 formatter_class=argparse.RawDescriptionHelpFormatter, 3705 help='rename archive') 3706 subparser.set_defaults(func=self.do_rename) 3707 subparser.add_argument('location', metavar='ARCHIVE', 3708 type=location_validator(archive=True), 3709 help='archive to rename') 3710 subparser.add_argument('name', metavar='NEWNAME', 3711 type=archivename_validator(), 3712 help='the new archive name to use') 3713 3714 delete_epilog = process_epilog(""" 3715 This command deletes an archive from the repository or the complete repository. 3716 Disk space is reclaimed accordingly. If you delete the complete repository, the 3717 local cache for it (if any) is also deleted. 3718 3719 When using ``--stats``, you will get some statistics about how much data was 3720 deleted - the "Deleted data" deduplicated size there is most interesting as 3721 that is how much your repository will shrink. 3722 Please note that the "All archives" stats refer to the state after deletion. 3723 """) 3724 subparser = subparsers.add_parser('delete', parents=[common_parser], add_help=False, 3725 description=self.do_delete.__doc__, 3726 epilog=delete_epilog, 3727 formatter_class=argparse.RawDescriptionHelpFormatter, 3728 help='delete archive') 3729 subparser.set_defaults(func=self.do_delete) 3730 subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', 3731 help='do not change repository') 3732 subparser.add_argument('-s', '--stats', dest='stats', action='store_true', 3733 help='print statistics for the deleted archive') 3734 subparser.add_argument('--cache-only', dest='cache_only', action='store_true', 3735 help='delete only the local cache for the given repository') 3736 subparser.add_argument('--force', dest='forced', 3737 action='count', default=0, 3738 help='force deletion of corrupted archives, ' 3739 'use ``--force --force`` in case ``--force`` does not work.') 3740 subparser.add_argument('--save-space', dest='save_space', action='store_true', 3741 help='work slower, but using less space') 3742 subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', 3743 type=location_validator(), 3744 help='repository or archive to delete') 3745 subparser.add_argument('archives', metavar='ARCHIVE', nargs='*', 3746 help='archives to delete') 3747 define_archive_filters_group(subparser) 3748 3749 list_epilog = process_epilog(""" 3750 This command lists the contents of a repository or an archive. 3751 3752 See the "borg help patterns" command for more help on exclude patterns. 3753 3754 .. man NOTES 3755 3756 The following keys are available for ``--format``: 3757 3758 3759 """) + BaseFormatter.keys_help() + textwrap.dedent(""" 3760 3761 Keys for listing repository archives: 3762 3763 """) + ArchiveFormatter.keys_help() + textwrap.dedent(""" 3764 3765 Keys for listing archive files: 3766 3767 """) + ItemFormatter.keys_help() 3768 subparser = subparsers.add_parser('list', parents=[common_parser], add_help=False, 3769 description=self.do_list.__doc__, 3770 epilog=list_epilog, 3771 formatter_class=argparse.RawDescriptionHelpFormatter, 3772 help='list archive or repository contents') 3773 subparser.set_defaults(func=self.do_list) 3774 subparser.add_argument('--short', dest='short', action='store_true', 3775 help='only print file/directory names, nothing else') 3776 subparser.add_argument('--format', '--list-format', metavar='FORMAT', dest='format', 3777 help='specify format for file listing ' 3778 '(default: "{mode} {user:6} {group:6} {size:8d} {mtime} {path}{extra}{NL}")') 3779 subparser.add_argument('--json', action='store_true', 3780 help='Only valid for listing repository contents. Format output as JSON. ' 3781 'The form of ``--format`` is ignored, ' 3782 'but keys used in it are added to the JSON output. ' 3783 'Some keys are always present. Note: JSON can only represent text. ' 3784 'A "barchive" key is therefore not available.') 3785 subparser.add_argument('--json-lines', action='store_true', 3786 help='Only valid for listing archive contents. Format output as JSON Lines. ' 3787 'The form of ``--format`` is ignored, ' 3788 'but keys used in it are added to the JSON output. ' 3789 'Some keys are always present. Note: JSON can only represent text. ' 3790 'A "bpath" key is therefore not available.') 3791 subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', 3792 type=location_validator(), 3793 help='repository or archive to list contents of') 3794 subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, 3795 help='paths to list; patterns are supported') 3796 define_archive_filters_group(subparser) 3797 define_exclusion_group(subparser) 3798 3799 umount_epilog = process_epilog(""" 3800 This command un-mounts a FUSE filesystem that was mounted with ``borg mount``. 3801 3802 This is a convenience wrapper that just calls the platform-specific shell 3803 command - usually this is either umount or fusermount -u. 3804 """) 3805 subparser = subparsers.add_parser('umount', parents=[common_parser], add_help=False, 3806 description=self.do_umount.__doc__, 3807 epilog=umount_epilog, 3808 formatter_class=argparse.RawDescriptionHelpFormatter, 3809 help='umount repository') 3810 subparser.set_defaults(func=self.do_umount) 3811 subparser.add_argument('mountpoint', metavar='MOUNTPOINT', type=str, 3812 help='mountpoint of the filesystem to umount') 3813 3814 info_epilog = process_epilog(""" 3815 This command displays detailed information about the specified archive or repository. 3816 3817 Please note that the deduplicated sizes of the individual archives do not add 3818 up to the deduplicated size of the repository ("all archives"), because the two 3819 are meaning different things: 3820 3821 This archive / deduplicated size = amount of data stored ONLY for this archive 3822 = unique chunks of this archive. 3823 All archives / deduplicated size = amount of data stored in the repo 3824 = all chunks in the repository. 3825 3826 Borg archives can only contain a limited amount of file metadata. 3827 The size of an archive relative to this limit depends on a number of factors, 3828 mainly the number of files, the lengths of paths and other metadata stored for files. 3829 This is shown as *utilization of maximum supported archive size*. 3830 """) 3831 subparser = subparsers.add_parser('info', parents=[common_parser], add_help=False, 3832 description=self.do_info.__doc__, 3833 epilog=info_epilog, 3834 formatter_class=argparse.RawDescriptionHelpFormatter, 3835 help='show repository or archive information') 3836 subparser.set_defaults(func=self.do_info) 3837 subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', 3838 type=location_validator(), 3839 help='repository or archive to display information about') 3840 subparser.add_argument('--json', action='store_true', 3841 help='format output as JSON') 3842 define_archive_filters_group(subparser) 3843 3844 break_lock_epilog = process_epilog(""" 3845 This command breaks the repository and cache locks. 3846 Please use carefully and only while no borg process (on any machine) is 3847 trying to access the Cache or the Repository. 3848 """) 3849 subparser = subparsers.add_parser('break-lock', parents=[common_parser], add_help=False, 3850 description=self.do_break_lock.__doc__, 3851 epilog=break_lock_epilog, 3852 formatter_class=argparse.RawDescriptionHelpFormatter, 3853 help='break repository and cache locks') 3854 subparser.set_defaults(func=self.do_break_lock) 3855 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3856 type=location_validator(archive=False), 3857 help='repository for which to break the locks') 3858 3859 prune_epilog = process_epilog(""" 3860 The prune command prunes a repository by deleting all archives not matching 3861 any of the specified retention options. This command is normally used by 3862 automated backup scripts wanting to keep a certain number of historic backups. 3863 3864 Also, prune automatically removes checkpoint archives (incomplete archives left 3865 behind by interrupted backup runs) except if the checkpoint is the latest 3866 archive (and thus still needed). Checkpoint archives are not considered when 3867 comparing archive counts against the retention limits (``--keep-X``). 3868 3869 If a prefix is set with -P, then only archives that start with the prefix are 3870 considered for deletion and only those archives count towards the totals 3871 specified by the rules. 3872 Otherwise, *all* archives in the repository are candidates for deletion! 3873 There is no automatic distinction between archives representing different 3874 contents. These need to be distinguished by specifying matching prefixes. 3875 3876 If you have multiple sequences of archives with different data sets (e.g. 3877 from different machines) in one shared repository, use one prune call per 3878 data set that matches only the respective archives using the -P option. 3879 3880 The ``--keep-within`` option takes an argument of the form "<int><char>", 3881 where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means 3882 to keep all archives that were created within the past 48 hours. 3883 "1m" is taken to mean "31d". The archives kept with this option do not 3884 count towards the totals specified by any other options. 3885 3886 A good procedure is to thin out more and more the older your backups get. 3887 As an example, ``--keep-daily 7`` means to keep the latest backup on each day, 3888 up to 7 most recent days with backups (days without backups do not count). 3889 The rules are applied from secondly to yearly, and backups selected by previous 3890 rules do not count towards those of later rules. The time that each backup 3891 starts is used for pruning purposes. Dates and times are interpreted in 3892 the local timezone, and weeks go from Monday to Sunday. Specifying a 3893 negative number of archives to keep means that there is no limit. 3894 3895 The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will 3896 keep the last N archives under the assumption that you do not create more than one 3897 backup archive in the same second). 3898 3899 When using ``--stats``, you will get some statistics about how much data was 3900 deleted - the "Deleted data" deduplicated size there is most interesting as 3901 that is how much your repository will shrink. 3902 Please note that the "All archives" stats refer to the state after pruning. 3903 """) 3904 subparser = subparsers.add_parser('prune', parents=[common_parser], add_help=False, 3905 description=self.do_prune.__doc__, 3906 epilog=prune_epilog, 3907 formatter_class=argparse.RawDescriptionHelpFormatter, 3908 help='prune archives') 3909 subparser.set_defaults(func=self.do_prune) 3910 subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', 3911 help='do not change repository') 3912 subparser.add_argument('--force', dest='forced', action='store_true', 3913 help='force pruning of corrupted archives, ' 3914 'use ``--force --force`` in case ``--force`` does not work.') 3915 subparser.add_argument('-s', '--stats', dest='stats', action='store_true', 3916 help='print statistics for the deleted archive') 3917 subparser.add_argument('--list', dest='output_list', action='store_true', 3918 help='output verbose list of archives it keeps/prunes') 3919 subparser.add_argument('--keep-within', metavar='INTERVAL', dest='within', type=interval, 3920 help='keep all archives within this time interval') 3921 subparser.add_argument('--keep-last', '--keep-secondly', dest='secondly', type=int, default=0, 3922 help='number of secondly archives to keep') 3923 subparser.add_argument('--keep-minutely', dest='minutely', type=int, default=0, 3924 help='number of minutely archives to keep') 3925 subparser.add_argument('-H', '--keep-hourly', dest='hourly', type=int, default=0, 3926 help='number of hourly archives to keep') 3927 subparser.add_argument('-d', '--keep-daily', dest='daily', type=int, default=0, 3928 help='number of daily archives to keep') 3929 subparser.add_argument('-w', '--keep-weekly', dest='weekly', type=int, default=0, 3930 help='number of weekly archives to keep') 3931 subparser.add_argument('-m', '--keep-monthly', dest='monthly', type=int, default=0, 3932 help='number of monthly archives to keep') 3933 subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0, 3934 help='number of yearly archives to keep') 3935 define_archive_filters_group(subparser, sort_by=False, first_last=False) 3936 subparser.add_argument('--save-space', dest='save_space', action='store_true', 3937 help='work slower, but using less space') 3938 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 3939 type=location_validator(archive=False), 3940 help='repository to prune') 3941 3942 upgrade_epilog = process_epilog(""" 3943 Upgrade an existing, local Borg repository. 3944 3945 When you do not need borg upgrade 3946 +++++++++++++++++++++++++++++++++ 3947 3948 Not every change requires that you run ``borg upgrade``. 3949 3950 You do **not** need to run it when: 3951 3952 - moving your repository to a different place 3953 - upgrading to another point release (like 1.0.x to 1.0.y), 3954 except when noted otherwise in the changelog 3955 - upgrading from 1.0.x to 1.1.x, 3956 except when noted otherwise in the changelog 3957 3958 Borg 1.x.y upgrades 3959 +++++++++++++++++++ 3960 3961 Use ``borg upgrade --tam REPO`` to require manifest authentication 3962 introduced with Borg 1.0.9 to address security issues. This means 3963 that modifying the repository after doing this with a version prior 3964 to 1.0.9 will raise a validation error, so only perform this upgrade 3965 after updating all clients using the repository to 1.0.9 or newer. 3966 3967 This upgrade should be done on each client for safety reasons. 3968 3969 If a repository is accidentally modified with a pre-1.0.9 client after 3970 this upgrade, use ``borg upgrade --tam --force REPO`` to remedy it. 3971 3972 If you routinely do this you might not want to enable this upgrade 3973 (which will leave you exposed to the security issue). You can 3974 reverse the upgrade by issuing ``borg upgrade --disable-tam REPO``. 3975 3976 See 3977 https://borgbackup.readthedocs.io/en/stable/changes.html#pre-1-0-9-manifest-spoofing-vulnerability 3978 for details. 3979 3980 Attic and Borg 0.xx to Borg 1.x 3981 +++++++++++++++++++++++++++++++ 3982 3983 This currently supports converting an Attic repository to Borg and also 3984 helps with converting Borg 0.xx to 1.0. 3985 3986 Currently, only LOCAL repositories can be upgraded (issue #465). 3987 3988 Please note that ``borg create`` (since 1.0.0) uses bigger chunks by 3989 default than old borg or attic did, so the new chunks won't deduplicate 3990 with the old chunks in the upgraded repository. 3991 See ``--chunker-params`` option of ``borg create`` and ``borg recreate``. 3992 3993 ``borg upgrade`` will change the magic strings in the repository's 3994 segments to match the new Borg magic strings. The keyfiles found in 3995 $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and 3996 copied to $BORG_KEYS_DIR or ~/.config/borg/keys. 3997 3998 The cache files are converted, from $ATTIC_CACHE_DIR or 3999 ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the 4000 cache layout between Borg and Attic changed, so it is possible 4001 the first backup after the conversion takes longer than expected 4002 due to the cache resync. 4003 4004 Upgrade should be able to resume if interrupted, although it 4005 will still iterate over all segments. If you want to start 4006 from scratch, use `borg delete` over the copied repository to 4007 make sure the cache files are also removed:: 4008 4009 borg delete borg 4010 4011 Unless ``--inplace`` is specified, the upgrade process first creates a backup 4012 copy of the repository, in REPOSITORY.before-upgrade-DATETIME, using hardlinks. 4013 This requires that the repository and its parent directory reside on same 4014 filesystem so the hardlink copy can work. 4015 This takes longer than in place upgrades, but is much safer and gives 4016 progress information (as opposed to ``cp -al``). Once you are satisfied 4017 with the conversion, you can safely destroy the backup copy. 4018 4019 WARNING: Running the upgrade in place will make the current 4020 copy unusable with older version, with no way of going back 4021 to previous versions. This can PERMANENTLY DAMAGE YOUR 4022 REPOSITORY! Attic CAN NOT READ BORG REPOSITORIES, as the 4023 magic strings have changed. You have been warned.""") 4024 subparser = subparsers.add_parser('upgrade', parents=[common_parser], add_help=False, 4025 description=self.do_upgrade.__doc__, 4026 epilog=upgrade_epilog, 4027 formatter_class=argparse.RawDescriptionHelpFormatter, 4028 help='upgrade repository format') 4029 subparser.set_defaults(func=self.do_upgrade) 4030 subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', 4031 help='do not change repository') 4032 subparser.add_argument('--inplace', dest='inplace', action='store_true', 4033 help='rewrite repository in place, with no chance of going back ' 4034 'to older versions of the repository.') 4035 subparser.add_argument('--force', dest='force', action='store_true', 4036 help='Force upgrade') 4037 subparser.add_argument('--tam', dest='tam', action='store_true', 4038 help='Enable manifest authentication (in key and cache) (Borg 1.0.9 and later).') 4039 subparser.add_argument('--disable-tam', dest='disable_tam', action='store_true', 4040 help='Disable manifest authentication (in key and cache).') 4041 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 4042 type=location_validator(archive=False), 4043 help='path to the repository to be upgraded') 4044 4045 recreate_epilog = process_epilog(""" 4046 Recreate the contents of existing archives. 4047 4048 recreate is a potentially dangerous function and might lead to data loss 4049 (if used wrongly). BE VERY CAREFUL! 4050 4051 ``--exclude``, ``--exclude-from``, ``--exclude-if-present``, ``--keep-exclude-tags`` 4052 and PATH have the exact same semantics as in "borg create", but they only check 4053 for files in the archives and not in the local file system. If PATHs are specified, 4054 the resulting archives will only contain files from these PATHs. 4055 4056 Note that all paths in an archive are relative, therefore absolute patterns/paths 4057 will *not* match (``--exclude``, ``--exclude-from``, PATHs). 4058 4059 ``--recompress`` allows to change the compression of existing data in archives. 4060 Due to how Borg stores compressed size information this might display 4061 incorrect information for archives that were not recreated at the same time. 4062 There is no risk of data loss by this. 4063 4064 ``--chunker-params`` will re-chunk all files in the archive, this can be 4065 used to have upgraded Borg 0.xx or Attic archives deduplicate with 4066 Borg 1.x archives. 4067 4068 **USE WITH CAUTION.** 4069 Depending on the PATHs and patterns given, recreate can be used to permanently 4070 delete files from archives. 4071 When in doubt, use ``--dry-run --verbose --list`` to see how patterns/PATHS are 4072 interpreted. See :ref:`list_item_flags` in ``borg create`` for details. 4073 4074 The archive being recreated is only removed after the operation completes. The 4075 archive that is built during the operation exists at the same time at 4076 "<ARCHIVE>.recreate". The new archive will have a different archive ID. 4077 4078 With ``--target`` the original archive is not replaced, instead a new archive is created. 4079 4080 When rechunking space usage can be substantial, expect at least the entire 4081 deduplicated size of the archives using the previous chunker params. 4082 When recompressing expect approx. (throughput / checkpoint-interval) in space usage, 4083 assuming all chunks are recompressed. 4084 4085 If you recently ran borg check --repair and it had to fix lost chunks with all-zero 4086 replacement chunks, please first run another backup for the same data and re-run 4087 borg check --repair afterwards to heal any archives that had lost chunks which are 4088 still generated from the input data. 4089 4090 Important: running borg recreate to re-chunk will remove the chunks_healthy 4091 metadata of all items with replacement chunks, so healing will not be possible 4092 any more after re-chunking (it is also unlikely it would ever work: due to the 4093 change of chunking parameters, the missing chunk likely will never be seen again 4094 even if you still have the data that produced it). 4095 """) 4096 subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False, 4097 description=self.do_recreate.__doc__, 4098 epilog=recreate_epilog, 4099 formatter_class=argparse.RawDescriptionHelpFormatter, 4100 help=self.do_recreate.__doc__) 4101 subparser.set_defaults(func=self.do_recreate) 4102 subparser.add_argument('--list', dest='output_list', action='store_true', 4103 help='output verbose list of items (files, dirs, ...)') 4104 subparser.add_argument('--filter', metavar='STATUSCHARS', dest='output_filter', 4105 help='only display items with the given status characters (listed in borg create --help)') 4106 subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', 4107 help='do not change anything') 4108 subparser.add_argument('-s', '--stats', dest='stats', action='store_true', 4109 help='print statistics at end') 4110 4111 define_exclusion_group(subparser, tag_files=True) 4112 4113 archive_group = subparser.add_argument_group('Archive options') 4114 archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None, 4115 type=archivename_validator(), 4116 help='create a new archive with the name ARCHIVE, do not replace existing archive ' 4117 '(only applies for a single archive)') 4118 archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', 4119 type=int, default=1800, metavar='SECONDS', 4120 help='write checkpoint every SECONDS seconds (Default: 1800)') 4121 archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', type=CommentSpec, default=None, 4122 help='add a comment text to the archive') 4123 archive_group.add_argument('--timestamp', metavar='TIMESTAMP', dest='timestamp', 4124 type=timestamp, default=None, 4125 help='manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). ' 4126 'alternatively, give a reference file/directory.') 4127 archive_group.add_argument('-C', '--compression', metavar='COMPRESSION', dest='compression', 4128 type=CompressionSpec, default=CompressionSpec('lz4'), 4129 help='select compression algorithm, see the output of the ' 4130 '"borg help compression" command for details.') 4131 archive_group.add_argument('--recompress', metavar='MODE', dest='recompress', nargs='?', 4132 default='never', const='if-different', choices=('never', 'if-different', 'always'), 4133 help='recompress data chunks according to ``--compression``. ' 4134 'MODE `if-different`: ' 4135 'recompress if current compression is with a different compression algorithm ' 4136 '(the level is not considered). ' 4137 'MODE `always`: ' 4138 'recompress even if current compression is with the same compression algorithm ' 4139 '(use this to change the compression level). ' 4140 'MODE `never` (default): ' 4141 'do not recompress.') 4142 archive_group.add_argument('--chunker-params', metavar='PARAMS', dest='chunker_params', 4143 type=ChunkerParams, default=CHUNKER_PARAMS, 4144 help='specify the chunker parameters (CHUNK_MIN_EXP, CHUNK_MAX_EXP, ' 4145 'HASH_MASK_BITS, HASH_WINDOW_SIZE) or `default` to use the current defaults. ' 4146 'default: %d,%d,%d,%d' % CHUNKER_PARAMS) 4147 4148 subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', 4149 type=location_validator(), 4150 help='repository or archive to recreate') 4151 subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, 4152 help='paths to recreate; patterns are supported') 4153 4154 with_lock_epilog = process_epilog(""" 4155 This command runs a user-specified command while the repository lock is held. 4156 4157 It will first try to acquire the lock (make sure that no other operation is 4158 running in the repo), then execute the given command as a subprocess and wait 4159 for its termination, release the lock and return the user command's return 4160 code as borg's return code. 4161 4162 .. note:: 4163 4164 If you copy a repository with the lock held, the lock will be present in 4165 the copy. Thus, before using borg on the copy from a different host, 4166 you need to use "borg break-lock" on the copied repository, because 4167 Borg is cautious and does not automatically remove stale locks made by a different host. 4168 """) 4169 subparser = subparsers.add_parser('with-lock', parents=[common_parser], add_help=False, 4170 description=self.do_with_lock.__doc__, 4171 epilog=with_lock_epilog, 4172 formatter_class=argparse.RawDescriptionHelpFormatter, 4173 help='run user command with lock held') 4174 subparser.set_defaults(func=self.do_with_lock) 4175 subparser.add_argument('location', metavar='REPOSITORY', 4176 type=location_validator(archive=False), 4177 help='repository to lock') 4178 subparser.add_argument('command', metavar='COMMAND', 4179 help='command to run') 4180 subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER, 4181 help='command arguments') 4182 4183 config_epilog = process_epilog(""" 4184 This command gets and sets options in a local repository or cache config file. 4185 For security reasons, this command only works on local repositories. 4186 4187 To delete a config value entirely, use ``--delete``. To list the values 4188 of the configuration file or the default values, use ``--list``. To get and existing 4189 key, pass only the key name. To set a key, pass both the key name and 4190 the new value. Keys can be specified in the format "section.name" or 4191 simply "name"; the section will default to "repository" and "cache" for 4192 the repo and cache configs, respectively. 4193 4194 4195 By default, borg config manipulates the repository config file. Using ``--cache`` 4196 edits the repository cache's config file instead. 4197 """) 4198 subparser = subparsers.add_parser('config', parents=[common_parser], add_help=False, 4199 description=self.do_config.__doc__, 4200 epilog=config_epilog, 4201 formatter_class=argparse.RawDescriptionHelpFormatter, 4202 help='get and set configuration values') 4203 subparser.set_defaults(func=self.do_config) 4204 subparser.add_argument('-c', '--cache', dest='cache', action='store_true', 4205 help='get and set values from the repo cache') 4206 4207 group = subparser.add_mutually_exclusive_group() 4208 group.add_argument('-d', '--delete', dest='delete', action='store_true', 4209 help='delete the key from the config file') 4210 group.add_argument('-l', '--list', dest='list', action='store_true', 4211 help='list the configuration of the repo') 4212 4213 subparser.add_argument('location', metavar='REPOSITORY', nargs='?', default='', 4214 type=location_validator(archive=False, proto='file'), 4215 help='repository to configure') 4216 subparser.add_argument('name', metavar='NAME', nargs='?', 4217 help='name of config key') 4218 subparser.add_argument('value', metavar='VALUE', nargs='?', 4219 help='new value for key') 4220 4221 subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False, 4222 description='Extra help') 4223 subparser.add_argument('--epilog-only', dest='epilog_only', action='store_true') 4224 subparser.add_argument('--usage-only', dest='usage_only', action='store_true') 4225 subparser.set_defaults(func=functools.partial(self.do_help, parser, subparsers.choices)) 4226 subparser.add_argument('topic', metavar='TOPIC', type=str, nargs='?', 4227 help='additional help on TOPIC') 4228 4229 debug_epilog = process_epilog(""" 4230 These commands are not intended for normal use and potentially very 4231 dangerous if used incorrectly. 4232 4233 They exist to improve debugging capabilities without direct system access, e.g. 4234 in case you ever run into some severe malfunction. Use them only if you know 4235 what you are doing or if a trusted developer tells you what to do.""") 4236 4237 subparser = subparsers.add_parser('debug', parents=[mid_common_parser], add_help=False, 4238 description='debugging command (not intended for normal use)', 4239 epilog=debug_epilog, 4240 formatter_class=argparse.RawDescriptionHelpFormatter, 4241 help='debugging command (not intended for normal use)') 4242 4243 debug_parsers = subparser.add_subparsers(title='required arguments', metavar='<command>') 4244 subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser)) 4245 4246 debug_info_epilog = process_epilog(""" 4247 This command displays some system information that might be useful for bug 4248 reports and debugging problems. If a traceback happens, this information is 4249 already appended at the end of the traceback. 4250 """) 4251 subparser = debug_parsers.add_parser('info', parents=[common_parser], add_help=False, 4252 description=self.do_debug_info.__doc__, 4253 epilog=debug_info_epilog, 4254 formatter_class=argparse.RawDescriptionHelpFormatter, 4255 help='show system infos for debugging / bug reports (debug)') 4256 subparser.set_defaults(func=self.do_debug_info) 4257 4258 debug_dump_archive_items_epilog = process_epilog(""" 4259 This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files. 4260 """) 4261 subparser = debug_parsers.add_parser('dump-archive-items', parents=[common_parser], add_help=False, 4262 description=self.do_debug_dump_archive_items.__doc__, 4263 epilog=debug_dump_archive_items_epilog, 4264 formatter_class=argparse.RawDescriptionHelpFormatter, 4265 help='dump archive items (metadata) (debug)') 4266 subparser.set_defaults(func=self.do_debug_dump_archive_items) 4267 subparser.add_argument('location', metavar='ARCHIVE', 4268 type=location_validator(archive=True), 4269 help='archive to dump') 4270 4271 debug_dump_archive_epilog = process_epilog(""" 4272 This command dumps all metadata of an archive in a decoded form to a file. 4273 """) 4274 subparser = debug_parsers.add_parser('dump-archive', parents=[common_parser], add_help=False, 4275 description=self.do_debug_dump_archive.__doc__, 4276 epilog=debug_dump_archive_epilog, 4277 formatter_class=argparse.RawDescriptionHelpFormatter, 4278 help='dump decoded archive metadata (debug)') 4279 subparser.set_defaults(func=self.do_debug_dump_archive) 4280 subparser.add_argument('location', metavar='ARCHIVE', 4281 type=location_validator(archive=True), 4282 help='archive to dump') 4283 subparser.add_argument('path', metavar='PATH', type=str, 4284 help='file to dump data into') 4285 4286 debug_dump_manifest_epilog = process_epilog(""" 4287 This command dumps manifest metadata of a repository in a decoded form to a file. 4288 """) 4289 subparser = debug_parsers.add_parser('dump-manifest', parents=[common_parser], add_help=False, 4290 description=self.do_debug_dump_manifest.__doc__, 4291 epilog=debug_dump_manifest_epilog, 4292 formatter_class=argparse.RawDescriptionHelpFormatter, 4293 help='dump decoded repository metadata (debug)') 4294 subparser.set_defaults(func=self.do_debug_dump_manifest) 4295 subparser.add_argument('location', metavar='REPOSITORY', 4296 type=location_validator(archive=False), 4297 help='repository to dump') 4298 subparser.add_argument('path', metavar='PATH', type=str, 4299 help='file to dump data into') 4300 4301 debug_dump_repo_objs_epilog = process_epilog(""" 4302 This command dumps raw (but decrypted and decompressed) repo objects to files. 4303 """) 4304 subparser = debug_parsers.add_parser('dump-repo-objs', parents=[common_parser], add_help=False, 4305 description=self.do_debug_dump_repo_objs.__doc__, 4306 epilog=debug_dump_repo_objs_epilog, 4307 formatter_class=argparse.RawDescriptionHelpFormatter, 4308 help='dump repo objects (debug)') 4309 subparser.set_defaults(func=self.do_debug_dump_repo_objs) 4310 subparser.add_argument('location', metavar='REPOSITORY', 4311 type=location_validator(archive=False), 4312 help='repository to dump') 4313 subparser.add_argument('--ghost', dest='ghost', action='store_true', 4314 help='dump all segment file contents, including deleted/uncommitted objects and commits.') 4315 4316 debug_search_repo_objs_epilog = process_epilog(""" 4317 This command searches raw (but decrypted and decompressed) repo objects for a specific bytes sequence. 4318 """) 4319 subparser = debug_parsers.add_parser('search-repo-objs', parents=[common_parser], add_help=False, 4320 description=self.do_debug_search_repo_objs.__doc__, 4321 epilog=debug_search_repo_objs_epilog, 4322 formatter_class=argparse.RawDescriptionHelpFormatter, 4323 help='search repo objects (debug)') 4324 subparser.set_defaults(func=self.do_debug_search_repo_objs) 4325 subparser.add_argument('location', metavar='REPOSITORY', 4326 type=location_validator(archive=False), 4327 help='repository to search') 4328 subparser.add_argument('wanted', metavar='WANTED', type=str, 4329 help='term to search the repo for, either 0x1234abcd hex term or a string') 4330 4331 debug_get_obj_epilog = process_epilog(""" 4332 This command gets an object from the repository. 4333 """) 4334 subparser = debug_parsers.add_parser('get-obj', parents=[common_parser], add_help=False, 4335 description=self.do_debug_get_obj.__doc__, 4336 epilog=debug_get_obj_epilog, 4337 formatter_class=argparse.RawDescriptionHelpFormatter, 4338 help='get object from repository (debug)') 4339 subparser.set_defaults(func=self.do_debug_get_obj) 4340 subparser.add_argument('location', metavar='REPOSITORY', 4341 type=location_validator(archive=False), 4342 help='repository to use') 4343 subparser.add_argument('id', metavar='ID', type=str, 4344 help='hex object ID to get from the repo') 4345 subparser.add_argument('path', metavar='PATH', type=str, 4346 help='file to write object data into') 4347 4348 debug_put_obj_epilog = process_epilog(""" 4349 This command puts objects into the repository. 4350 """) 4351 subparser = debug_parsers.add_parser('put-obj', parents=[common_parser], add_help=False, 4352 description=self.do_debug_put_obj.__doc__, 4353 epilog=debug_put_obj_epilog, 4354 formatter_class=argparse.RawDescriptionHelpFormatter, 4355 help='put object to repository (debug)') 4356 subparser.set_defaults(func=self.do_debug_put_obj) 4357 subparser.add_argument('location', metavar='REPOSITORY', 4358 type=location_validator(archive=False), 4359 help='repository to use') 4360 subparser.add_argument('paths', metavar='PATH', nargs='+', type=str, 4361 help='file(s) to read and create object(s) from') 4362 4363 debug_delete_obj_epilog = process_epilog(""" 4364 This command deletes objects from the repository. 4365 """) 4366 subparser = debug_parsers.add_parser('delete-obj', parents=[common_parser], add_help=False, 4367 description=self.do_debug_delete_obj.__doc__, 4368 epilog=debug_delete_obj_epilog, 4369 formatter_class=argparse.RawDescriptionHelpFormatter, 4370 help='delete object from repository (debug)') 4371 subparser.set_defaults(func=self.do_debug_delete_obj) 4372 subparser.add_argument('location', metavar='REPOSITORY', 4373 type=location_validator(archive=False), 4374 help='repository to use') 4375 subparser.add_argument('ids', metavar='IDs', nargs='+', type=str, 4376 help='hex object ID(s) to delete from the repo') 4377 4378 debug_refcount_obj_epilog = process_epilog(""" 4379 This command displays the reference count for objects from the repository. 4380 """) 4381 subparser = debug_parsers.add_parser('refcount-obj', parents=[common_parser], add_help=False, 4382 description=self.do_debug_refcount_obj.__doc__, 4383 epilog=debug_refcount_obj_epilog, 4384 formatter_class=argparse.RawDescriptionHelpFormatter, 4385 help='show refcount for object from repository (debug)') 4386 subparser.set_defaults(func=self.do_debug_refcount_obj) 4387 subparser.add_argument('location', metavar='REPOSITORY', 4388 type=location_validator(archive=False), 4389 help='repository to use') 4390 subparser.add_argument('ids', metavar='IDs', nargs='+', type=str, 4391 help='hex object ID(s) to show refcounts for') 4392 4393 debug_dump_hints_epilog = process_epilog(""" 4394 This command dumps the repository hints data. 4395 """) 4396 subparser = debug_parsers.add_parser('dump-hints', parents=[common_parser], add_help=False, 4397 description=self.do_debug_dump_hints.__doc__, 4398 epilog=debug_dump_hints_epilog, 4399 formatter_class=argparse.RawDescriptionHelpFormatter, 4400 help='dump repo hints (debug)') 4401 subparser.set_defaults(func=self.do_debug_dump_hints) 4402 subparser.add_argument('location', metavar='REPOSITORY', 4403 type=location_validator(archive=False), 4404 help='repository to dump') 4405 subparser.add_argument('path', metavar='PATH', type=str, 4406 help='file to dump data into') 4407 4408 debug_convert_profile_epilog = process_epilog(""" 4409 Convert a Borg profile to a Python cProfile compatible profile. 4410 """) 4411 subparser = debug_parsers.add_parser('convert-profile', parents=[common_parser], add_help=False, 4412 description=self.do_debug_convert_profile.__doc__, 4413 epilog=debug_convert_profile_epilog, 4414 formatter_class=argparse.RawDescriptionHelpFormatter, 4415 help='convert Borg profile to Python profile (debug)') 4416 subparser.set_defaults(func=self.do_debug_convert_profile) 4417 subparser.add_argument('input', metavar='INPUT', type=argparse.FileType('rb'), 4418 help='Borg profile') 4419 subparser.add_argument('output', metavar='OUTPUT', type=argparse.FileType('wb'), 4420 help='Output file') 4421 4422 benchmark_epilog = process_epilog("These commands do various benchmarks.") 4423 4424 subparser = subparsers.add_parser('benchmark', parents=[mid_common_parser], add_help=False, 4425 description='benchmark command', 4426 epilog=benchmark_epilog, 4427 formatter_class=argparse.RawDescriptionHelpFormatter, 4428 help='benchmark command') 4429 4430 benchmark_parsers = subparser.add_subparsers(title='required arguments', metavar='<command>') 4431 subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser)) 4432 4433 bench_crud_epilog = process_epilog(""" 4434 This command benchmarks borg CRUD (create, read, update, delete) operations. 4435 4436 It creates input data below the given PATH and backups this data into the given REPO. 4437 The REPO must already exist (it could be a fresh empty repo or an existing repo, the 4438 command will create / read / update / delete some archives named borg-benchmark-crud\\* there. 4439 4440 Make sure you have free space there, you'll need about 1GB each (+ overhead). 4441 4442 If your repository is encrypted and borg needs a passphrase to unlock the key, use:: 4443 4444 BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH 4445 4446 Measurements are done with different input file sizes and counts. 4447 The file contents are very artificial (either all zero or all random), 4448 thus the measurement results do not necessarily reflect performance with real data. 4449 Also, due to the kind of content used, no compression is used in these benchmarks. 4450 4451 C- == borg create (1st archive creation, no compression, do not use files cache) 4452 C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher. 4453 C-R- == random files. no dedup, measuring throughput through all processing stages. 4454 4455 R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk) 4456 R-Z- == all zero files. Measuring heavily duplicated files. 4457 R-R- == random files. No duplication here, measuring throughput through all processing 4458 stages, except writing to disk. 4459 4460 U- == borg create (2nd archive creation of unchanged input files, measure files cache speed) 4461 The throughput value is kind of virtual here, it does not actually read the file. 4462 U-Z- == needs to check the 2 all-zero chunks' existence in the repo. 4463 U-R- == needs to check existence of a lot of different chunks in the repo. 4464 4465 D- == borg delete archive (delete last remaining archive, measure deletion + compaction) 4466 D-Z- == few chunks to delete / few segments to compact/remove. 4467 D-R- == many chunks to delete / many segments to compact/remove. 4468 4469 Please note that there might be quite some variance in these measurements. 4470 Try multiple measurements and having a otherwise idle machine (and network, if you use it). 4471 """) 4472 subparser = benchmark_parsers.add_parser('crud', parents=[common_parser], add_help=False, 4473 description=self.do_benchmark_crud.__doc__, 4474 epilog=bench_crud_epilog, 4475 formatter_class=argparse.RawDescriptionHelpFormatter, 4476 help='benchmarks borg CRUD (create, extract, update, delete).') 4477 subparser.set_defaults(func=self.do_benchmark_crud) 4478 4479 subparser.add_argument('location', metavar='REPOSITORY', 4480 type=location_validator(archive=False), 4481 help='repository to use for benchmark (must exist)') 4482 4483 subparser.add_argument('path', metavar='PATH', help='path were to create benchmark input data') 4484 4485 return parser 4486 4487 def get_args(self, argv, cmd): 4488 """usually, just returns argv, except if we deal with a ssh forced command for borg serve.""" 4489 result = self.parse_args(argv[1:]) 4490 if cmd is not None and result.func == self.do_serve: 4491 # borg serve case: 4492 # - "result" is how borg got invoked (e.g. via forced command from authorized_keys), 4493 # - "client_result" (from "cmd") refers to the command the client wanted to execute, 4494 # which might be different in the case of a forced command or same otherwise. 4495 client_argv = shlex.split(cmd) 4496 # Drop environment variables (do *not* interpret them) before trying to parse 4497 # the borg command line. 4498 client_argv = list(itertools.dropwhile(lambda arg: '=' in arg, client_argv)) 4499 client_result = self.parse_args(client_argv[1:]) 4500 if client_result.func == result.func: 4501 # make sure we only process like normal if the client is executing 4502 # the same command as specified in the forced command, otherwise 4503 # just skip this block and return the forced command (== result). 4504 # client is allowed to specify the allowlisted options, 4505 # everything else comes from the forced "borg serve" command (or the defaults). 4506 # stuff from denylist must never be used from the client. 4507 denylist = { 4508 'restrict_to_paths', 4509 'restrict_to_repositories', 4510 'append_only', 4511 'storage_quota', 4512 } 4513 allowlist = { 4514 'debug_topics', 4515 'lock_wait', 4516 'log_level', 4517 'umask', 4518 } 4519 not_present = object() 4520 for attr_name in allowlist: 4521 assert attr_name not in denylist, 'allowlist has denylisted attribute name %s' % attr_name 4522 value = getattr(client_result, attr_name, not_present) 4523 if value is not not_present: 4524 # note: it is not possible to specify a allowlisted option via a forced command, 4525 # it always gets overridden by the value specified (or defaulted to) by the client command. 4526 setattr(result, attr_name, value) 4527 4528 return result 4529 4530 def parse_args(self, args=None): 4531 # We can't use argparse for "serve" since we don't want it to show up in "Available commands" 4532 if args: 4533 args = self.preprocess_args(args) 4534 parser = self.build_parser() 4535 args = parser.parse_args(args or ['-h']) 4536 parser.common_options.resolve(args) 4537 func = get_func(args) 4538 if func == self.do_create and not args.paths: 4539 # need at least 1 path but args.paths may also be populated from patterns 4540 parser.error('Need at least one PATH argument.') 4541 if not getattr(args, 'lock', True): # Option --bypass-lock sets args.lock = False 4542 bypass_allowed = {self.do_check, self.do_config, self.do_diff, 4543 self.do_export_tar, self.do_extract, self.do_info, 4544 self.do_list, self.do_mount, self.do_umount} 4545 if func not in bypass_allowed: 4546 raise Error('Not allowed to bypass locking mechanism for chosen command') 4547 if getattr(args, 'timestamp', None): 4548 args.location = args.location.with_timestamp(args.timestamp) 4549 return args 4550 4551 def prerun_checks(self, logger, is_serve): 4552 if not is_serve: 4553 # this is the borg *client*, we need to check the python: 4554 check_python() 4555 check_extension_modules() 4556 selftest(logger) 4557 4558 def _setup_implied_logging(self, args): 4559 """ turn on INFO level logging for args that imply that they will produce output """ 4560 # map of option name to name of logger for that option 4561 option_logger = { 4562 'output_list': 'borg.output.list', 4563 'show_version': 'borg.output.show-version', 4564 'show_rc': 'borg.output.show-rc', 4565 'stats': 'borg.output.stats', 4566 'progress': 'borg.output.progress', 4567 } 4568 for option, logger_name in option_logger.items(): 4569 option_set = args.get(option, False) 4570 logging.getLogger(logger_name).setLevel('INFO' if option_set else 'WARN') 4571 4572 def _setup_topic_debugging(self, args): 4573 """Turn on DEBUG level logging for specified --debug-topics.""" 4574 for topic in args.debug_topics: 4575 if '.' not in topic: 4576 topic = 'borg.debug.' + topic 4577 logger.debug('Enabling debug topic %s', topic) 4578 logging.getLogger(topic).setLevel('DEBUG') 4579 4580 def run(self, args): 4581 os.umask(args.umask) # early, before opening files 4582 self.lock_wait = args.lock_wait 4583 func = get_func(args) 4584 # do not use loggers before this! 4585 is_serve = func == self.do_serve 4586 setup_logging(level=args.log_level, is_serve=is_serve, json=args.log_json) 4587 self.log_json = args.log_json 4588 args.progress |= is_serve 4589 self._setup_implied_logging(vars(args)) 4590 self._setup_topic_debugging(args) 4591 if getattr(args, 'stats', False) and getattr(args, 'dry_run', False): 4592 # the data needed for --stats is not computed when using --dry-run, so we can't do it. 4593 # for ease of scripting, we just ignore --stats when given with --dry-run. 4594 logger.warning("Ignoring --stats. It is not supported when using --dry-run.") 4595 args.stats = False 4596 if args.show_version: 4597 logging.getLogger('borg.output.show-version').info('borgbackup version %s' % __version__) 4598 self.prerun_checks(logger, is_serve) 4599 if not is_supported_msgpack(): 4600 logger.error("You do not have a supported msgpack[-python] version installed. Terminating.") 4601 logger.error("This should never happen as specific, supported versions are required by our setup.py.") 4602 logger.error("Do not contact borgbackup support about this.") 4603 return set_ec(EXIT_ERROR) 4604 if is_slow_msgpack(): 4605 logger.warning(PURE_PYTHON_MSGPACK_WARNING) 4606 if args.debug_profile: 4607 # Import only when needed - avoids a further increase in startup time 4608 import cProfile 4609 import marshal 4610 logger.debug('Writing execution profile to %s', args.debug_profile) 4611 # Open the file early, before running the main program, to avoid 4612 # a very late crash in case the specified path is invalid. 4613 with open(args.debug_profile, 'wb') as fd: 4614 profiler = cProfile.Profile() 4615 variables = dict(locals()) 4616 profiler.enable() 4617 try: 4618 return set_ec(func(args)) 4619 finally: 4620 profiler.disable() 4621 profiler.snapshot_stats() 4622 if args.debug_profile.endswith('.pyprof'): 4623 marshal.dump(profiler.stats, fd) 4624 else: 4625 # We use msgpack here instead of the marshal module used by cProfile itself, 4626 # because the latter is insecure. Since these files may be shared over the 4627 # internet we don't want a format that is impossible to interpret outside 4628 # an insecure implementation. 4629 # See scripts/msgpack2marshal.py for a small script that turns a msgpack file 4630 # into a marshal file that can be read by e.g. pyprof2calltree. 4631 # For local use it's unnecessary hassle, though, that's why .pyprof makes 4632 # it compatible (see above). 4633 msgpack.pack(profiler.stats, fd, use_bin_type=True) 4634 else: 4635 return set_ec(func(args)) 4636 4637 4638def sig_info_handler(sig_no, stack): # pragma: no cover 4639 """search the stack for infos about the currently processed file and print them""" 4640 with signal_handler(sig_no, signal.SIG_IGN): 4641 for frame in inspect.getouterframes(stack): 4642 func, loc = frame[3], frame[0].f_locals 4643 if func in ('process_file', '_process', ): # create op 4644 path = loc['path'] 4645 try: 4646 pos = loc['fd'].tell() 4647 total = loc['st'].st_size 4648 except Exception: 4649 pos, total = 0, 0 4650 logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total))) 4651 break 4652 if func in ('extract_item', ): # extract op 4653 path = loc['item'].path 4654 try: 4655 pos = loc['fd'].tell() 4656 except Exception: 4657 pos = 0 4658 logger.info("{0} {1}/???".format(path, format_file_size(pos))) 4659 break 4660 4661 4662def sig_trace_handler(sig_no, stack): # pragma: no cover 4663 print('\nReceived SIGUSR2 at %s, dumping trace...' % datetime.now().replace(microsecond=0), file=sys.stderr) 4664 faulthandler.dump_traceback() 4665 4666 4667def main(): # pragma: no cover 4668 # Make sure stdout and stderr have errors='replace' to avoid unicode 4669 # issues when print()-ing unicode file names 4670 sys.stdout = ErrorIgnoringTextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True) 4671 sys.stderr = ErrorIgnoringTextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True) 4672 4673 # If we receive SIGINT (ctrl-c), SIGTERM (kill) or SIGHUP (kill -HUP), 4674 # catch them and raise a proper exception that can be handled for an 4675 # orderly exit. 4676 # SIGHUP is important especially for systemd systems, where logind 4677 # sends it when a session exits, in addition to any traditional use. 4678 # Output some info if we receive SIGUSR1 or SIGINFO (ctrl-t). 4679 4680 # Register fault handler for SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL. 4681 faulthandler.enable() 4682 with signal_handler('SIGINT', raising_signal_handler(KeyboardInterrupt)), \ 4683 signal_handler('SIGHUP', raising_signal_handler(SigHup)), \ 4684 signal_handler('SIGTERM', raising_signal_handler(SigTerm)), \ 4685 signal_handler('SIGUSR1', sig_info_handler), \ 4686 signal_handler('SIGUSR2', sig_trace_handler), \ 4687 signal_handler('SIGINFO', sig_info_handler): 4688 archiver = Archiver() 4689 msg = msgid = tb = None 4690 tb_log_level = logging.ERROR 4691 try: 4692 args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND')) 4693 except Error as e: 4694 msg = e.get_message() 4695 tb_log_level = logging.ERROR if e.traceback else logging.DEBUG 4696 tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) 4697 # we might not have logging setup yet, so get out quickly 4698 print(msg, file=sys.stderr) 4699 if tb_log_level == logging.ERROR: 4700 print(tb, file=sys.stderr) 4701 sys.exit(e.exit_code) 4702 try: 4703 exit_code = archiver.run(args) 4704 except Error as e: 4705 msg = e.get_message() 4706 msgid = type(e).__qualname__ 4707 tb_log_level = logging.ERROR if e.traceback else logging.DEBUG 4708 tb = "%s\n%s" % (traceback.format_exc(), sysinfo()) 4709 exit_code = e.exit_code 4710 except RemoteRepository.RPCError as e: 4711 important = e.exception_class not in ('LockTimeout', ) and e.traceback 4712 msgid = e.exception_class 4713 tb_log_level = logging.ERROR if important else logging.DEBUG 4714 if important: 4715 msg = e.exception_full 4716 else: 4717 msg = e.get_message() 4718 tb = '\n'.join('Borg server: ' + l for l in e.sysinfo.splitlines()) 4719 tb += "\n" + sysinfo() 4720 exit_code = EXIT_ERROR 4721 except Exception: 4722 msg = 'Local Exception' 4723 msgid = 'Exception' 4724 tb_log_level = logging.ERROR 4725 tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) 4726 exit_code = EXIT_ERROR 4727 except KeyboardInterrupt: 4728 msg = 'Keyboard interrupt' 4729 tb_log_level = logging.DEBUG 4730 tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) 4731 exit_code = EXIT_SIGNAL_BASE + 2 4732 except SigTerm: 4733 msg = 'Received SIGTERM' 4734 msgid = 'Signal.SIGTERM' 4735 tb_log_level = logging.DEBUG 4736 tb = '%s\n%s' % (traceback.format_exc(), sysinfo()) 4737 exit_code = EXIT_SIGNAL_BASE + 15 4738 except SigHup: 4739 msg = 'Received SIGHUP.' 4740 msgid = 'Signal.SIGHUP' 4741 exit_code = EXIT_SIGNAL_BASE + 1 4742 if msg: 4743 logger.error(msg, msgid=msgid) 4744 if tb: 4745 logger.log(tb_log_level, tb) 4746 if args.show_rc: 4747 rc_logger = logging.getLogger('borg.output.show-rc') 4748 exit_msg = 'terminating with %s status, rc %d' 4749 if exit_code == EXIT_SUCCESS: 4750 rc_logger.info(exit_msg % ('success', exit_code)) 4751 elif exit_code == EXIT_WARNING: 4752 rc_logger.warning(exit_msg % ('warning', exit_code)) 4753 elif exit_code == EXIT_ERROR: 4754 rc_logger.error(exit_msg % ('error', exit_code)) 4755 elif exit_code >= EXIT_SIGNAL_BASE: 4756 rc_logger.error(exit_msg % ('signal', exit_code)) 4757 else: 4758 rc_logger.error(exit_msg % ('abnormal', exit_code or 666)) 4759 sys.exit(exit_code) 4760 4761 4762if __name__ == '__main__': 4763 main() 4764