1import argparse 2import contextlib 3import collections 4import enum 5import errno 6import grp 7import hashlib 8import logging 9import io 10import json 11import os 12import os.path 13import platform 14import pwd 15import re 16import shlex 17import signal 18import socket 19import stat 20import subprocess 21import sys 22import textwrap 23import time 24import uuid 25from binascii import hexlify 26from collections import namedtuple, deque, abc, Counter 27from datetime import datetime, timezone, timedelta 28from functools import partial, lru_cache 29from itertools import islice 30from operator import attrgetter 31from os import scandir 32from string import Formatter 33from shutil import get_terminal_size 34 35# MSGPACK ===================================================================== 36# we are rather picky about msgpack versions, because a good working msgpack is 37# very important for borg, see https://github.com/borgbackup/borg/issues/3753 38# 39# because some linux distributions didn't get their dependency management right 40# and broke borgbackup by upgrading msgpack to incompatible versions, we now 41# bundle msgpack-python 0.5.6, which is the latest and best msgpack that is 42# still compatible with borg 1.1.x and we use the bundled version by default. 43# 44# if you are a package maintainer and don't like bundled library code, feel 45# free to not use the bundled code: 46# - set prefer_system_msgpack = True 47# - make sure that an external msgpack-python gets installed 48# - make sure the external msgpack-python always stays at supported versions. 49# - best versions seem to be 0.4.6, 0.4.7, 0.4.8 and 0.5.6. 50# - if you can't satisfy the above requirement, these are versions that might 51# also work ok, IF you make sure to use the COMPILED version of 52# msgpack-python NOT the PURE PYTHON fallback implementation: 0.5.1 and 0.5.4 53# 54# Please note: 55# - using any other version is not supported by borg development and 56# any feedback related to issues caused by this will be ignored. 57# - especially, it is known that msgpack 0.6.x does NOT work for borg 1.1.x. 58 59prefer_system_msgpack = False 60 61try: 62 if prefer_system_msgpack: 63 raise ImportError 64 # use the bundled msgpack 0.5.6 known-good version - other code only imports it from here: 65 import borg.algorithms.msgpack as msgpack 66 from borg.algorithms.msgpack import fallback as msgpack_fallback 67except ImportError: 68 # use an external msgpack version 69 import msgpack 70 from msgpack import fallback as msgpack_fallback 71 72 73from .logger import create_logger 74logger = create_logger() 75 76import borg.crypto.low_level 77from . import __version__ as borg_version 78from . import __version_tuple__ as borg_version_tuple 79from . import chunker 80from . import hashindex 81from . import shellpattern 82from .constants import * # NOQA 83 84 85# generic mechanism to enable users to invoke workarounds by setting the 86# BORG_WORKAROUNDS environment variable to a list of comma-separated strings. 87# see the docs for a list of known workaround strings. 88workarounds = tuple(os.environ.get('BORG_WORKAROUNDS', '').split(',')) 89 90 91''' 92The global exit_code variable is used so that modules other than archiver can increase the program exit code if a 93warning or error occurred during their operation. This is different from archiver.exit_code, which is only accessible 94from the archiver object. 95''' 96exit_code = EXIT_SUCCESS 97 98 99def set_ec(ec): 100 ''' 101 Sets the exit code of the program, if an exit code higher or equal than this is set, this does nothing. This 102 makes EXIT_ERROR override EXIT_WARNING, etc.. 103 104 ec: exit code to set 105 ''' 106 global exit_code 107 exit_code = max(exit_code, ec) 108 return exit_code 109 110 111class Error(Exception): 112 """Error: {}""" 113 # Error base class 114 115 # if we raise such an Error and it is only caught by the uppermost 116 # exception handler (that exits short after with the given exit_code), 117 # it is always a (fatal and abrupt) EXIT_ERROR, never just a warning. 118 exit_code = EXIT_ERROR 119 # show a traceback? 120 traceback = False 121 122 def __init__(self, *args): 123 super().__init__(*args) 124 self.args = args 125 126 def get_message(self): 127 return type(self).__doc__.format(*self.args) 128 129 __str__ = get_message 130 131 132class ErrorWithTraceback(Error): 133 """Error: {}""" 134 # like Error, but show a traceback also 135 traceback = True 136 137 138class IntegrityError(ErrorWithTraceback): 139 """Data integrity error: {}""" 140 141 142class DecompressionError(IntegrityError): 143 """Decompression error: {}""" 144 145 146class ExtensionModuleError(Error): 147 """The Borg binary extension modules do not seem to be properly installed""" 148 149 150class NoManifestError(Error): 151 """Repository has no manifest.""" 152 153 154class PlaceholderError(Error): 155 """Formatting Error: "{}".format({}): {}({})""" 156 157 158class InvalidPlaceholder(PlaceholderError): 159 """Invalid placeholder "{}" in string: {}""" 160 161 162class PythonLibcTooOld(Error): 163 """FATAL: this Python was compiled for a too old (g)libc and misses required functionality.""" 164 165 166def check_python(): 167 required_funcs = {os.stat, os.utime, os.chown} 168 if not os.supports_follow_symlinks.issuperset(required_funcs): 169 raise PythonLibcTooOld 170 171 172class MandatoryFeatureUnsupported(Error): 173 """Unsupported repository feature(s) {}. A newer version of borg is required to access this repository.""" 174 175 176def check_extension_modules(): 177 from . import platform, compress, item 178 if hashindex.API_VERSION != '1.1_07': 179 raise ExtensionModuleError 180 if chunker.API_VERSION != '1.1_01': 181 raise ExtensionModuleError 182 if compress.API_VERSION != '1.1_06': 183 raise ExtensionModuleError 184 if borg.crypto.low_level.API_VERSION != '1.1_02': 185 raise ExtensionModuleError 186 if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.1_04': 187 raise ExtensionModuleError 188 if item.API_VERSION != '1.1_03': 189 raise ExtensionModuleError 190 191 192def get_limited_unpacker(kind): 193 """return a limited Unpacker because we should not trust msgpack data received from remote""" 194 args = dict(use_list=False, # return tuples, not lists 195 max_bin_len=0, # not used 196 max_ext_len=0, # not used 197 max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE), 198 max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object 199 ) 200 if kind == 'server': 201 args.update(dict(max_array_len=100, # misc. cmd tuples 202 max_map_len=100, # misc. cmd dicts 203 )) 204 elif kind == 'client': 205 args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan() 206 max_map_len=100, # misc. result dicts 207 )) 208 elif kind == 'manifest': 209 args.update(dict(use_list=True, # default value 210 max_array_len=100, # ITEM_KEYS ~= 22 211 max_map_len=MAX_ARCHIVES, # list of archives 212 max_str_len=255, # archive name 213 object_hook=StableDict, 214 unicode_errors='surrogateescape', 215 )) 216 elif kind == 'key': 217 args.update(dict(use_list=True, # default value 218 max_array_len=0, # not used 219 max_map_len=10, # EncryptedKey dict 220 max_str_len=4000, # inner key data 221 object_hook=StableDict, 222 unicode_errors='surrogateescape', 223 )) 224 else: 225 raise ValueError('kind must be "server", "client", "manifest" or "key"') 226 return msgpack.Unpacker(**args) 227 228 229ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') 230 231 232class Archives(abc.MutableMapping): 233 """ 234 Nice wrapper around the archives dict, making sure only valid types/values get in 235 and we can deal with str keys (and it internally encodes to byte keys) and either 236 str timestamps or datetime timestamps. 237 """ 238 def __init__(self): 239 # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts) 240 self._archives = {} 241 242 def __len__(self): 243 return len(self._archives) 244 245 def __iter__(self): 246 return iter(safe_decode(name) for name in self._archives) 247 248 def __getitem__(self, name): 249 assert isinstance(name, str) 250 _name = safe_encode(name) 251 values = self._archives.get(_name) 252 if values is None: 253 raise KeyError 254 ts = parse_timestamp(values[b'time'].decode('utf-8')) 255 return ArchiveInfo(name=name, id=values[b'id'], ts=ts) 256 257 def __setitem__(self, name, info): 258 assert isinstance(name, str) 259 name = safe_encode(name) 260 assert isinstance(info, tuple) 261 id, ts = info 262 assert isinstance(id, bytes) 263 if isinstance(ts, datetime): 264 ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT) 265 assert isinstance(ts, str) 266 ts = ts.encode() 267 self._archives[name] = {b'id': id, b'time': ts} 268 269 def __delitem__(self, name): 270 assert isinstance(name, str) 271 name = safe_encode(name) 272 del self._archives[name] 273 274 def list(self, *, glob=None, match_end=r'\Z', sort_by=(), first=None, last=None, reverse=False): 275 """ 276 Return list of ArchiveInfo instances according to the parameters. 277 278 First match *glob* (considering *match_end*), then *sort_by*. 279 Apply *first* and *last* filters, and then possibly *reverse* the list. 280 281 *sort_by* is a list of sort keys applied in reverse order. 282 283 Note: for better robustness, all filtering / limiting parameters must default to 284 "not limit / not filter", so a FULL archive list is produced by a simple .list(). 285 some callers EXPECT to iterate over all archives in a repo for correct operation. 286 """ 287 if isinstance(sort_by, (str, bytes)): 288 raise TypeError('sort_by must be a sequence of str') 289 regex = re.compile(shellpattern.translate(glob or '*', match_end=match_end)) 290 archives = [x for x in self.values() if regex.match(x.name) is not None] 291 for sortkey in reversed(sort_by): 292 archives.sort(key=attrgetter(sortkey)) 293 if first: 294 archives = archives[:first] 295 elif last: 296 archives = archives[max(len(archives) - last, 0):] 297 if reverse: 298 archives.reverse() 299 return archives 300 301 def list_considering(self, args): 302 """ 303 get a list of archives, considering --first/last/prefix/glob-archives/sort cmdline args 304 """ 305 if args.location.archive: 306 raise Error('The options --first, --last, --prefix and --glob-archives can only be used on repository targets.') 307 if args.prefix is not None: 308 args.glob_archives = args.prefix + '*' 309 return self.list(sort_by=args.sort_by.split(','), glob=args.glob_archives, first=args.first, last=args.last) 310 311 def set_raw_dict(self, d): 312 """set the dict we get from the msgpack unpacker""" 313 for k, v in d.items(): 314 assert isinstance(k, bytes) 315 assert isinstance(v, dict) and b'id' in v and b'time' in v 316 self._archives[k] = v 317 318 def get_raw_dict(self): 319 """get the dict we can give to the msgpack packer""" 320 return self._archives 321 322 323class Manifest: 324 325 @enum.unique 326 class Operation(enum.Enum): 327 # The comments here only roughly describe the scope of each feature. In the end, additions need to be 328 # based on potential problems older clients could produce when accessing newer repositories and the 329 # tradeofs of locking version out or still allowing access. As all older versions and their exact 330 # behaviours are known when introducing new features sometimes this might not match the general descriptions 331 # below. 332 333 # The READ operation describes which features are needed to safely list and extract the archives in the 334 # repository. 335 READ = 'read' 336 # The CHECK operation is for all operations that need either to understand every detail 337 # of the repository (for consistency checks and repairs) or are seldom used functions that just 338 # should use the most restrictive feature set because more fine grained compatibility tracking is 339 # not needed. 340 CHECK = 'check' 341 # The WRITE operation is for adding archives. Features here ensure that older clients don't add archives 342 # in an old format, or is used to lock out clients that for other reasons can no longer safely add new 343 # archives. 344 WRITE = 'write' 345 # The DELETE operation is for all operations (like archive deletion) that need a 100% correct reference 346 # count and the need to be able to find all (directly and indirectly) referenced chunks of a given archive. 347 DELETE = 'delete' 348 349 NO_OPERATION_CHECK = tuple() 350 351 SUPPORTED_REPO_FEATURES = frozenset([]) 352 353 MANIFEST_ID = b'\0' * 32 354 355 def __init__(self, key, repository, item_keys=None): 356 self.archives = Archives() 357 self.config = {} 358 self.key = key 359 self.repository = repository 360 self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS 361 self.tam_verified = False 362 self.timestamp = None 363 364 @property 365 def id_str(self): 366 return bin_to_hex(self.id) 367 368 @property 369 def last_timestamp(self): 370 return parse_timestamp(self.timestamp, tzinfo=None) 371 372 @classmethod 373 def load(cls, repository, operations, key=None, force_tam_not_required=False): 374 from .item import ManifestItem 375 from .crypto.key import key_factory, tam_required_file, tam_required 376 from .repository import Repository 377 try: 378 cdata = repository.get(cls.MANIFEST_ID) 379 except Repository.ObjectNotFound: 380 raise NoManifestError 381 if not key: 382 key = key_factory(repository, cdata) 383 manifest = cls(key, repository) 384 data = key.decrypt(None, cdata) 385 manifest_dict, manifest.tam_verified = key.unpack_and_verify_manifest(data, force_tam_not_required=force_tam_not_required) 386 m = ManifestItem(internal_dict=manifest_dict) 387 manifest.id = key.id_hash(data) 388 if m.get('version') not in (1, 2): 389 raise ValueError('Invalid manifest version') 390 manifest.archives.set_raw_dict(m.archives) 391 manifest.timestamp = m.get('timestamp') 392 manifest.config = m.config 393 # valid item keys are whatever is known in the repo or every key we know 394 manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', [])) 395 396 if manifest.tam_verified: 397 manifest_required = manifest.config.get(b'tam_required', False) 398 security_required = tam_required(repository) 399 if manifest_required and not security_required: 400 logger.debug('Manifest is TAM verified and says TAM is required, updating security database...') 401 file = tam_required_file(repository) 402 open(file, 'w').close() 403 if not manifest_required and security_required: 404 logger.debug('Manifest is TAM verified and says TAM is *not* required, updating security database...') 405 os.unlink(tam_required_file(repository)) 406 manifest.check_repository_compatibility(operations) 407 return manifest, key 408 409 def check_repository_compatibility(self, operations): 410 for operation in operations: 411 assert isinstance(operation, self.Operation) 412 feature_flags = self.config.get(b'feature_flags', None) 413 if feature_flags is None: 414 return 415 if operation.value.encode() not in feature_flags: 416 continue 417 requirements = feature_flags[operation.value.encode()] 418 if b'mandatory' in requirements: 419 unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES 420 if unsupported: 421 raise MandatoryFeatureUnsupported([f.decode() for f in unsupported]) 422 423 def get_all_mandatory_features(self): 424 result = {} 425 feature_flags = self.config.get(b'feature_flags', None) 426 if feature_flags is None: 427 return result 428 429 for operation, requirements in feature_flags.items(): 430 if b'mandatory' in requirements: 431 result[operation.decode()] = set([feature.decode() for feature in requirements[b'mandatory']]) 432 return result 433 434 def write(self): 435 from .item import ManifestItem 436 if self.key.tam_required: 437 self.config[b'tam_required'] = True 438 # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly 439 if self.timestamp is None: 440 self.timestamp = datetime.utcnow().strftime(ISO_FORMAT) 441 else: 442 prev_ts = self.last_timestamp 443 incremented = (prev_ts + timedelta(microseconds=1)).strftime(ISO_FORMAT) 444 self.timestamp = max(incremented, datetime.utcnow().strftime(ISO_FORMAT)) 445 # include checks for limits as enforced by limited unpacker (used by load()) 446 assert len(self.archives) <= MAX_ARCHIVES 447 assert all(len(name) <= 255 for name in self.archives) 448 assert len(self.item_keys) <= 100 449 manifest = ManifestItem( 450 version=1, 451 archives=StableDict(self.archives.get_raw_dict()), 452 timestamp=self.timestamp, 453 config=StableDict(self.config), 454 item_keys=tuple(sorted(self.item_keys)), 455 ) 456 self.tam_verified = True 457 data = self.key.pack_and_authenticate_metadata(manifest.as_dict()) 458 self.id = self.key.id_hash(data) 459 self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) 460 461 462def positive_int_validator(value): 463 """argparse type for positive integers""" 464 int_value = int(value) 465 if int_value <= 0: 466 raise argparse.ArgumentTypeError('A positive integer is required: %s' % value) 467 return int_value 468 469 470def interval(s): 471 """Convert a string representing a valid interval to a number of hours.""" 472 multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365} 473 474 if s.endswith(tuple(multiplier.keys())): 475 number = s[:-1] 476 suffix = s[-1] 477 else: 478 # range suffixes in ascending multiplier order 479 ranges = [k for k, v in sorted(multiplier.items(), key=lambda t: t[1])] 480 raise argparse.ArgumentTypeError( 481 'Unexpected interval time unit "%s": expected one of %r' % (s[-1], ranges)) 482 483 try: 484 hours = int(number) * multiplier[suffix] 485 except ValueError: 486 hours = -1 487 488 if hours <= 0: 489 raise argparse.ArgumentTypeError( 490 'Unexpected interval number "%s": expected an integer greater than 0' % number) 491 492 return hours 493 494 495def prune_within(archives, hours): 496 target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600) 497 return [a for a in archives if a.ts > target] 498 499 500def prune_split(archives, pattern, n, skip=[]): 501 last = None 502 keep = [] 503 if n == 0: 504 return keep 505 for a in sorted(archives, key=attrgetter('ts'), reverse=True): 506 period = to_localtime(a.ts).strftime(pattern) 507 if period != last: 508 last = period 509 if a not in skip: 510 keep.append(a) 511 if len(keep) == n: 512 break 513 return keep 514 515 516def ensure_dir(path, mode=stat.S_IRWXU, pretty_deadly=True): 517 """ 518 Ensures that the dir exists with the right permissions. 519 1) Make sure the directory exists in a race-free operation 520 2) If mode is not None and the directory has been created, give the right 521 permissions to the leaf directory 522 3) If pretty_deadly is True, catch exceptions, reraise them with a pretty 523 message. 524 Returns if the directory has been created and has the right permissions, 525 An exception otherwise. If a deadly exception happened it is reraised. 526 """ 527 try: 528 os.makedirs(path, mode=mode, exist_ok=True) 529 except OSError as e: 530 if pretty_deadly: 531 raise Error(e.args[1]) 532 else: 533 raise 534 535 536def get_base_dir(): 537 """Get home directory / base directory for borg: 538 539 - BORG_BASE_DIR, if set 540 - HOME, if set 541 - ~$USER, if USER is set 542 - ~ 543 """ 544 base_dir = os.environ.get('BORG_BASE_DIR') or os.environ.get('HOME') 545 # os.path.expanduser() behaves differently for '~' and '~someuser' as 546 # parameters: when called with an explicit username, the possibly set 547 # environment variable HOME is no longer respected. So we have to check if 548 # it is set and only expand the user's home directory if HOME is unset. 549 if not base_dir: 550 base_dir = os.path.expanduser('~%s' % os.environ.get('USER', '')) 551 return base_dir 552 553 554def get_keys_dir(): 555 """Determine where to repository keys and cache""" 556 557 keys_dir = os.environ.get('BORG_KEYS_DIR', os.path.join(get_config_dir(), 'keys')) 558 ensure_dir(keys_dir) 559 return keys_dir 560 561 562def get_security_dir(repository_id=None): 563 """Determine where to store local security information.""" 564 security_dir = os.environ.get('BORG_SECURITY_DIR', os.path.join(get_config_dir(), 'security')) 565 if repository_id: 566 security_dir = os.path.join(security_dir, repository_id) 567 ensure_dir(security_dir) 568 return security_dir 569 570 571def get_cache_dir(): 572 """Determine where to repository keys and cache""" 573 # Get cache home path 574 cache_home = os.path.join(get_base_dir(), '.cache') 575 # Try to use XDG_CACHE_HOME instead if BORG_BASE_DIR isn't explicitly set 576 if not os.environ.get('BORG_BASE_DIR'): 577 cache_home = os.environ.get('XDG_CACHE_HOME', cache_home) 578 # Use BORG_CACHE_DIR if set, otherwise assemble final path from cache home path 579 cache_dir = os.environ.get('BORG_CACHE_DIR', os.path.join(cache_home, 'borg')) 580 # Create path if it doesn't exist yet 581 ensure_dir(cache_dir) 582 cache_fn = os.path.join(cache_dir, CACHE_TAG_NAME) 583 if not os.path.exists(cache_fn): 584 with open(cache_fn, 'wb') as fd: 585 fd.write(CACHE_TAG_CONTENTS) 586 fd.write(textwrap.dedent(""" 587 # This file is a cache directory tag created by Borg. 588 # For information about cache directory tags, see: 589 # http://www.bford.info/cachedir/spec.html 590 """).encode('ascii')) 591 return cache_dir 592 593 594def get_config_dir(): 595 """Determine where to store whole config""" 596 # Get config home path 597 config_home = os.path.join(get_base_dir(), '.config') 598 # Try to use XDG_CONFIG_HOME instead if BORG_BASE_DIR isn't explicitly set 599 if not os.environ.get('BORG_BASE_DIR'): 600 config_home = os.environ.get('XDG_CONFIG_HOME', config_home) 601 # Use BORG_CONFIG_DIR if set, otherwise assemble final path from config home path 602 config_dir = os.environ.get('BORG_CONFIG_DIR', os.path.join(config_home, 'borg')) 603 # Create path if it doesn't exist yet 604 ensure_dir(config_dir) 605 return config_dir 606 607 608def to_localtime(ts): 609 """Convert datetime object from UTC to local time zone""" 610 return datetime(*time.localtime((ts - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds())[:6]) 611 612 613def parse_timestamp(timestamp, tzinfo=timezone.utc): 614 """Parse a ISO 8601 timestamp string""" 615 fmt = ISO_FORMAT if '.' in timestamp else ISO_FORMAT_NO_USECS 616 dt = datetime.strptime(timestamp, fmt) 617 if tzinfo is not None: 618 dt = dt.replace(tzinfo=tzinfo) 619 return dt 620 621 622def timestamp(s): 623 """Convert a --timestamp=s argument to a datetime object""" 624 try: 625 # is it pointing to a file / directory? 626 ts = safe_s(os.stat(s).st_mtime) 627 return datetime.fromtimestamp(ts, tz=timezone.utc) 628 except OSError: 629 # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support. 630 for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', 631 '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', 632 '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M', 633 '%Y-%m-%d', '%Y-%j', 634 ): 635 try: 636 return datetime.strptime(s, format).replace(tzinfo=timezone.utc) 637 except ValueError: 638 continue 639 raise ValueError 640 641 642def ChunkerParams(s): 643 if s.strip().lower() == "default": 644 return CHUNKER_PARAMS 645 chunk_min, chunk_max, chunk_mask, window_size = s.split(',') 646 if int(chunk_max) > 23: 647 raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)') 648 return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size) 649 650 651def FilesCacheMode(s): 652 ENTRIES_MAP = dict(ctime='c', mtime='m', size='s', inode='i', rechunk='r', disabled='d') 653 VALID_MODES = ('cis', 'ims', 'cs', 'ms', 'cr', 'mr', 'd', 's') # letters in alpha order 654 entries = set(s.strip().split(',')) 655 if not entries <= set(ENTRIES_MAP): 656 raise ValueError('cache mode must be a comma-separated list of: %s' % ','.join(sorted(ENTRIES_MAP))) 657 short_entries = {ENTRIES_MAP[entry] for entry in entries} 658 mode = ''.join(sorted(short_entries)) 659 if mode not in VALID_MODES: 660 raise ValueError('cache mode short must be one of: %s' % ','.join(VALID_MODES)) 661 return mode 662 663 664assert FilesCacheMode(DEFAULT_FILES_CACHE_MODE_UI) == DEFAULT_FILES_CACHE_MODE # keep these 2 values in sync! 665 666 667def dir_is_cachedir(path): 668 """Determines whether the specified path is a cache directory (and 669 therefore should potentially be excluded from the backup) according to 670 the CACHEDIR.TAG protocol 671 (http://www.bford.info/cachedir/spec.html). 672 """ 673 674 tag_path = os.path.join(path, CACHE_TAG_NAME) 675 try: 676 if os.path.exists(tag_path): 677 with open(tag_path, 'rb') as tag_file: 678 tag_data = tag_file.read(len(CACHE_TAG_CONTENTS)) 679 if tag_data == CACHE_TAG_CONTENTS: 680 return True 681 except OSError: 682 pass 683 return False 684 685 686def dir_is_tagged(path, exclude_caches, exclude_if_present): 687 """Determines whether the specified path is excluded by being a cache 688 directory or containing user-specified tag files/directories. Returns a 689 list of the paths of the tag files/directories (either CACHEDIR.TAG or the 690 matching user-specified files/directories). 691 """ 692 tag_paths = [] 693 if exclude_caches and dir_is_cachedir(path): 694 tag_paths.append(os.path.join(path, CACHE_TAG_NAME)) 695 if exclude_if_present is not None: 696 for tag in exclude_if_present: 697 tag_path = os.path.join(path, tag) 698 if os.path.exists(tag_path): 699 tag_paths.append(tag_path) 700 return tag_paths 701 702 703def partial_format(format, mapping): 704 """ 705 Apply format.format_map(mapping) while preserving unknown keys 706 707 Does not support attribute access, indexing and ![rsa] conversions 708 """ 709 for key, value in mapping.items(): 710 key = re.escape(key) 711 format = re.sub(r'(?<!\{)((\{%s\})|(\{%s:[^\}]*\}))' % (key, key), 712 lambda match: match.group(1).format_map(mapping), 713 format) 714 return format 715 716 717class DatetimeWrapper: 718 def __init__(self, dt): 719 self.dt = dt 720 721 def __format__(self, format_spec): 722 if format_spec == '': 723 format_spec = ISO_FORMAT_NO_USECS 724 return self.dt.__format__(format_spec) 725 726 727def format_line(format, data): 728 for _, key, _, conversion in Formatter().parse(format): 729 if not key: 730 continue 731 if conversion or key not in data: 732 raise InvalidPlaceholder(key, format) 733 try: 734 return format.format_map(data) 735 except Exception as e: 736 raise PlaceholderError(format, data, e.__class__.__name__, str(e)) 737 738 739def replace_placeholders(text, overrides={}): 740 """Replace placeholders in text with their values.""" 741 from .platform import fqdn, hostname 742 current_time = datetime.now(timezone.utc) 743 data = { 744 'pid': os.getpid(), 745 'fqdn': fqdn, 746 'reverse-fqdn': '.'.join(reversed(fqdn.split('.'))), 747 'hostname': hostname, 748 'now': DatetimeWrapper(current_time.astimezone(None)), 749 'utcnow': DatetimeWrapper(current_time), 750 'user': uid2user(os.getuid(), os.getuid()), 751 'uuid4': str(uuid.uuid4()), 752 'borgversion': borg_version, 753 'borgmajor': '%d' % borg_version_tuple[:1], 754 'borgminor': '%d.%d' % borg_version_tuple[:2], 755 'borgpatch': '%d.%d.%d' % borg_version_tuple[:3], 756 **overrides, 757 } 758 return format_line(text, data) 759 760 761PrefixSpec = replace_placeholders 762 763GlobSpec = replace_placeholders 764 765CommentSpec = replace_placeholders 766 767HUMAN_SORT_KEYS = ['timestamp'] + list(ArchiveInfo._fields) 768HUMAN_SORT_KEYS.remove('ts') 769 770 771def SortBySpec(text): 772 for token in text.split(','): 773 if token not in HUMAN_SORT_KEYS: 774 raise ValueError('Invalid sort key: %s' % token) 775 return text.replace('timestamp', 'ts') 776 777 778# Not too rarely, we get crappy timestamps from the fs, that overflow some computations. 779# As they are crap anyway (valid filesystem timestamps always refer to the past up to 780# the present, but never to the future), nothing is lost if we just clamp them to the 781# maximum value we can support. 782# As long as people are using borg on 32bit platforms to access borg archives, we must 783# keep this value True. But we can expect that we can stop supporting 32bit platforms 784# well before coming close to the year 2038, so this will never be a practical problem. 785SUPPORT_32BIT_PLATFORMS = True # set this to False before y2038. 786 787if SUPPORT_32BIT_PLATFORMS: 788 # second timestamps will fit into a signed int32 (platform time_t limit). 789 # nanosecond timestamps thus will naturally fit into a signed int64. 790 # subtract last 48h to avoid any issues that could be caused by tz calculations. 791 # this is in the year 2038, so it is also less than y9999 (which is a datetime internal limit). 792 # msgpack can pack up to uint64. 793 MAX_S = 2**31-1 - 48*3600 794 MAX_NS = MAX_S * 1000000000 795else: 796 # nanosecond timestamps will fit into a signed int64. 797 # subtract last 48h to avoid any issues that could be caused by tz calculations. 798 # this is in the year 2262, so it is also less than y9999 (which is a datetime internal limit). 799 # round down to 1e9 multiple, so MAX_NS corresponds precisely to a integer MAX_S. 800 # msgpack can pack up to uint64. 801 MAX_NS = (2**63-1 - 48*3600*1000000000) // 1000000000 * 1000000000 802 MAX_S = MAX_NS // 1000000000 803 804 805def safe_s(ts): 806 if 0 <= ts <= MAX_S: 807 return ts 808 elif ts < 0: 809 return 0 810 else: 811 return MAX_S 812 813 814def safe_ns(ts): 815 if 0 <= ts <= MAX_NS: 816 return ts 817 elif ts < 0: 818 return 0 819 else: 820 return MAX_NS 821 822 823def safe_timestamp(item_timestamp_ns): 824 t_ns = safe_ns(item_timestamp_ns) 825 return datetime.fromtimestamp(t_ns / 1e9) 826 827 828def format_time(ts: datetime, format_spec=''): 829 """ 830 Convert *ts* to a human-friendly format with textual weekday. 831 """ 832 return ts.strftime('%a, %Y-%m-%d %H:%M:%S' if format_spec == '' else format_spec) 833 834 835def isoformat_time(ts: datetime): 836 """ 837 Format *ts* according to ISO 8601. 838 """ 839 # note: first make all datetime objects tz aware before adding %z here. 840 return ts.strftime(ISO_FORMAT) 841 842 843def format_timedelta(td): 844 """Format timedelta in a human friendly format 845 """ 846 ts = td.total_seconds() 847 s = ts % 60 848 m = int(ts / 60) % 60 849 h = int(ts / 3600) % 24 850 txt = '%.2f seconds' % s 851 if m: 852 txt = '%d minutes %s' % (m, txt) 853 if h: 854 txt = '%d hours %s' % (h, txt) 855 if td.days: 856 txt = '%d days %s' % (td.days, txt) 857 return txt 858 859 860class OutputTimestamp: 861 def __init__(self, ts: datetime): 862 if ts.tzinfo == timezone.utc: 863 ts = to_localtime(ts) 864 self.ts = ts 865 866 def __format__(self, format_spec): 867 return format_time(self.ts, format_spec=format_spec) 868 869 def __str__(self): 870 return '{}'.format(self) 871 872 def isoformat(self): 873 return isoformat_time(self.ts) 874 875 to_json = isoformat 876 877 878def format_file_size(v, precision=2, sign=False): 879 """Format file size into a human friendly format 880 """ 881 return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign) 882 883 884class FileSize(int): 885 def __format__(self, format_spec): 886 return format_file_size(int(self)).__format__(format_spec) 887 888 889def parse_file_size(s): 890 """Return int from file size (1234, 55G, 1.7T).""" 891 if not s: 892 return int(s) # will raise 893 suffix = s[-1] 894 power = 1000 895 try: 896 factor = { 897 'K': power, 898 'M': power**2, 899 'G': power**3, 900 'T': power**4, 901 'P': power**5, 902 }[suffix] 903 s = s[:-1] 904 except KeyError: 905 factor = 1 906 return int(float(s) * factor) 907 908 909def sizeof_fmt(num, suffix='B', units=None, power=None, sep='', precision=2, sign=False): 910 prefix = '+' if sign and num > 0 else '' 911 912 for unit in units[:-1]: 913 if abs(round(num, precision)) < power: 914 if isinstance(num, int): 915 return "{}{}{}{}{}".format(prefix, num, sep, unit, suffix) 916 else: 917 return "{}{:3.{}f}{}{}{}".format(prefix, num, precision, sep, unit, suffix) 918 num /= float(power) 919 return "{}{:.{}f}{}{}{}".format(prefix, num, precision, sep, units[-1], suffix) 920 921 922def sizeof_fmt_iec(num, suffix='B', sep='', precision=2, sign=False): 923 return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign, 924 units=['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'], power=1024) 925 926 927def sizeof_fmt_decimal(num, suffix='B', sep='', precision=2, sign=False): 928 return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign, 929 units=['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'], power=1000) 930 931 932def format_archive(archive): 933 return '%-36s %s [%s]' % ( 934 archive.name, 935 format_time(to_localtime(archive.ts)), 936 bin_to_hex(archive.id), 937 ) 938 939 940class Buffer: 941 """ 942 managed buffer (like a resizable bytearray) 943 """ 944 945 class MemoryLimitExceeded(Error, OSError): 946 """Requested buffer size {} is above the limit of {}.""" 947 948 def __init__(self, allocator, size=4096, limit=None): 949 """ 950 Initialize the buffer: use allocator(size) call to allocate a buffer. 951 Optionally, set the upper <limit> for the buffer size. 952 """ 953 assert callable(allocator), 'must give alloc(size) function as first param' 954 assert limit is None or size <= limit, 'initial size must be <= limit' 955 self.allocator = allocator 956 self.limit = limit 957 self.resize(size, init=True) 958 959 def __len__(self): 960 return len(self.buffer) 961 962 def resize(self, size, init=False): 963 """ 964 resize the buffer - to avoid frequent reallocation, we usually always grow (if needed). 965 giving init=True it is possible to first-time initialize or shrink the buffer. 966 if a buffer size beyond the limit is requested, raise Buffer.MemoryLimitExceeded (OSError). 967 """ 968 size = int(size) 969 if self.limit is not None and size > self.limit: 970 raise Buffer.MemoryLimitExceeded(size, self.limit) 971 if init or len(self) < size: 972 self.buffer = self.allocator(size) 973 974 def get(self, size=None, init=False): 975 """ 976 return a buffer of at least the requested size (None: any current size). 977 init=True can be given to trigger shrinking of the buffer to the given size. 978 """ 979 if size is not None: 980 self.resize(size, init) 981 return self.buffer 982 983 984@lru_cache(maxsize=None) 985def uid2user(uid, default=None): 986 try: 987 return pwd.getpwuid(uid).pw_name 988 except KeyError: 989 return default 990 991 992@lru_cache(maxsize=None) 993def user2uid(user, default=None): 994 try: 995 return user and pwd.getpwnam(user).pw_uid 996 except KeyError: 997 return default 998 999 1000@lru_cache(maxsize=None) 1001def gid2group(gid, default=None): 1002 try: 1003 return grp.getgrgid(gid).gr_name 1004 except KeyError: 1005 return default 1006 1007 1008@lru_cache(maxsize=None) 1009def group2gid(group, default=None): 1010 try: 1011 return group and grp.getgrnam(group).gr_gid 1012 except KeyError: 1013 return default 1014 1015 1016def posix_acl_use_stored_uid_gid(acl): 1017 """Replace the user/group field with the stored uid/gid 1018 """ 1019 entries = [] 1020 for entry in safe_decode(acl).split('\n'): 1021 if entry: 1022 fields = entry.split(':') 1023 if len(fields) == 4: 1024 entries.append(':'.join([fields[0], fields[3], fields[2]])) 1025 else: 1026 entries.append(entry) 1027 return safe_encode('\n'.join(entries)) 1028 1029 1030def safe_decode(s, coding='utf-8', errors='surrogateescape'): 1031 """decode bytes to str, with round-tripping "invalid" bytes""" 1032 if s is None: 1033 return None 1034 return s.decode(coding, errors) 1035 1036 1037def safe_encode(s, coding='utf-8', errors='surrogateescape'): 1038 """encode str to bytes, with round-tripping "invalid" bytes""" 1039 if s is None: 1040 return None 1041 return s.encode(coding, errors) 1042 1043 1044def bin_to_hex(binary): 1045 return hexlify(binary).decode('ascii') 1046 1047 1048def parse_stringified_list(s): 1049 l = re.split(" *, *", s) 1050 return [item for item in l if item != ''] 1051 1052 1053class Location: 1054 """Object representing a repository / archive location 1055 """ 1056 proto = user = _host = port = path = archive = None 1057 1058 # user must not contain "@", ":" or "/". 1059 # Quoting adduser error message: 1060 # "To avoid problems, the username should consist only of letters, digits, 1061 # underscores, periods, at signs and dashes, and not start with a dash 1062 # (as defined by IEEE Std 1003.1-2001)." 1063 # We use "@" as separator between username and hostname, so we must 1064 # disallow it within the pure username part. 1065 optional_user_re = r""" 1066 (?:(?P<user>[^@:/]+)@)? 1067 """ 1068 1069 # path must not contain :: (it ends at :: or string end), but may contain single colons. 1070 # to avoid ambiguities with other regexes, it must also not start with ":" nor with "//" nor with "ssh://". 1071 scp_path_re = r""" 1072 (?!(:|//|ssh://)) # not starting with ":" or // or ssh:// 1073 (?P<path>([^:]|(:(?!:)))+) # any chars, but no "::" 1074 """ 1075 1076 # file_path must not contain :: (it ends at :: or string end), but may contain single colons. 1077 # it must start with a / and that slash is part of the path. 1078 file_path_re = r""" 1079 (?P<path>(([^/]*)/([^:]|(:(?!:)))+)) # start opt. servername, then /, then any chars, but no "::" 1080 """ 1081 1082 # abs_path must not contain :: (it ends at :: or string end), but may contain single colons. 1083 # it must start with a / and that slash is part of the path. 1084 abs_path_re = r""" 1085 (?P<path>(/([^:]|(:(?!:)))+)) # start with /, then any chars, but no "::" 1086 """ 1087 1088 # optional ::archive_name at the end, archive name must not contain "/". 1089 # borg mount's FUSE filesystem creates one level of directories from 1090 # the archive names and of course "/" is not valid in a directory name. 1091 optional_archive_re = r""" 1092 (?: 1093 :: # "::" as separator 1094 (?P<archive>[^/]+) # archive name must not contain "/" 1095 )?$""" # must match until the end 1096 1097 # regexes for misc. kinds of supported location specifiers: 1098 ssh_re = re.compile(r""" 1099 (?P<proto>ssh):// # ssh:// 1100 """ + optional_user_re + r""" # user@ (optional) 1101 (?P<host>([^:/]+|\[[0-9a-fA-F:.]+\]))(?::(?P<port>\d+))? # host or host:port or [ipv6] or [ipv6]:port 1102 """ + abs_path_re + optional_archive_re, re.VERBOSE) # path or path::archive 1103 1104 file_re = re.compile(r""" 1105 (?P<proto>file):// # file:// 1106 """ + file_path_re + optional_archive_re, re.VERBOSE) # servername/path, path or path::archive 1107 1108 # note: scp_re is also use for local paths 1109 scp_re = re.compile(r""" 1110 ( 1111 """ + optional_user_re + r""" # user@ (optional) 1112 (?P<host>([^:/]+|\[[0-9a-fA-F:.]+\])): # host: (don't match / or [ipv6] in host to disambiguate from file:) 1113 )? # user@host: part is optional 1114 """ + scp_path_re + optional_archive_re, re.VERBOSE) # path with optional archive 1115 1116 # get the repo from BORG_REPO env and the optional archive from param. 1117 # if the syntax requires giving REPOSITORY (see "borg mount"), 1118 # use "::" to let it use the env var. 1119 # if REPOSITORY argument is optional, it'll automatically use the env. 1120 env_re = re.compile(r""" # the repo part is fetched from BORG_REPO 1121 (?:::$) # just "::" is ok (when a pos. arg is required, no archive) 1122 | # or 1123 """ + optional_archive_re, re.VERBOSE) # archive name (optional, may be empty) 1124 1125 def __init__(self, text='', overrides={}): 1126 if not self.parse(text, overrides): 1127 raise ValueError('Invalid location format: "%s"' % self.orig) 1128 1129 def parse(self, text, overrides={}): 1130 self.orig = text 1131 text = replace_placeholders(text, overrides) 1132 valid = self._parse(text) 1133 if valid: 1134 return True 1135 m = self.env_re.match(text) 1136 if not m: 1137 return False 1138 repo = os.environ.get('BORG_REPO') 1139 if repo is None: 1140 return False 1141 valid = self._parse(repo) 1142 self.archive = m.group('archive') 1143 self.orig = repo if not self.archive else '%s::%s' % (repo, self.archive) 1144 return valid 1145 1146 def _parse(self, text): 1147 def normpath_special(p): 1148 # avoid that normpath strips away our relative path hack and even makes p absolute 1149 relative = p.startswith('/./') 1150 p = os.path.normpath(p) 1151 return ('/.' + p) if relative else p 1152 1153 m = self.ssh_re.match(text) 1154 if m: 1155 self.proto = m.group('proto') 1156 self.user = m.group('user') 1157 self._host = m.group('host') 1158 self.port = m.group('port') and int(m.group('port')) or None 1159 self.path = normpath_special(m.group('path')) 1160 self.archive = m.group('archive') 1161 return True 1162 m = self.file_re.match(text) 1163 if m: 1164 self.proto = m.group('proto') 1165 self.path = normpath_special(m.group('path')) 1166 self.archive = m.group('archive') 1167 return True 1168 m = self.scp_re.match(text) 1169 if m: 1170 self.user = m.group('user') 1171 self._host = m.group('host') 1172 self.path = normpath_special(m.group('path')) 1173 self.archive = m.group('archive') 1174 self.proto = self._host and 'ssh' or 'file' 1175 return True 1176 return False 1177 1178 def __str__(self): 1179 items = [ 1180 'proto=%r' % self.proto, 1181 'user=%r' % self.user, 1182 'host=%r' % self.host, 1183 'port=%r' % self.port, 1184 'path=%r' % self.path, 1185 'archive=%r' % self.archive, 1186 ] 1187 return ', '.join(items) 1188 1189 def to_key_filename(self): 1190 name = re.sub(r'[^\w]', '_', self.path).strip('_') 1191 if self.proto != 'file': 1192 name = re.sub(r'[^\w]', '_', self.host) + '__' + name 1193 if len(name) > 100: 1194 # Limit file names to some reasonable length. Most file systems 1195 # limit them to 255 [unit of choice]; due to variations in unicode 1196 # handling we truncate to 100 *characters*. 1197 name = name[:100] 1198 return os.path.join(get_keys_dir(), name) 1199 1200 def __repr__(self): 1201 return "Location(%s)" % self 1202 1203 @property 1204 def host(self): 1205 # strip square brackets used for IPv6 addrs 1206 if self._host is not None: 1207 return self._host.lstrip('[').rstrip(']') 1208 1209 def canonical_path(self): 1210 if self.proto == 'file': 1211 return self.path 1212 else: 1213 if self.path and self.path.startswith('~'): 1214 path = '/' + self.path # /~/x = path x relative to home dir 1215 elif self.path and not self.path.startswith('/'): 1216 path = '/./' + self.path # /./x = path x relative to cwd 1217 else: 1218 path = self.path 1219 return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '', 1220 self._host, # needed for ipv6 addrs 1221 ':{}'.format(self.port) if self.port else '', 1222 path) 1223 1224 def with_timestamp(self, timestamp): 1225 return Location(self.orig, overrides={ 1226 'now': DatetimeWrapper(timestamp.astimezone(None)), 1227 'utcnow': DatetimeWrapper(timestamp), 1228 }) 1229 1230 1231def location_validator(archive=None, proto=None): 1232 def validator(text): 1233 try: 1234 loc = Location(text) 1235 except ValueError as err: 1236 raise argparse.ArgumentTypeError(str(err)) from None 1237 if archive is True and not loc.archive: 1238 raise argparse.ArgumentTypeError('"%s": No archive specified' % text) 1239 elif archive is False and loc.archive: 1240 raise argparse.ArgumentTypeError('"%s": No archive can be specified' % text) 1241 if proto is not None and loc.proto != proto: 1242 if proto == 'file': 1243 raise argparse.ArgumentTypeError('"%s": Repository must be local' % text) 1244 else: 1245 raise argparse.ArgumentTypeError('"%s": Repository must be remote' % text) 1246 return loc 1247 return validator 1248 1249 1250def archivename_validator(): 1251 def validator(text): 1252 text = replace_placeholders(text) 1253 if '/' in text or '::' in text or not text: 1254 raise argparse.ArgumentTypeError('Invalid archive name: "%s"' % text) 1255 return text 1256 return validator 1257 1258 1259def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'): 1260 for key in keys: 1261 if isinstance(d.get(key), bytes): 1262 d[key] = d[key].decode(encoding, errors) 1263 return d 1264 1265 1266def prepare_dump_dict(d): 1267 def decode_bytes(value): 1268 # this should somehow be reversible later, but usual strings should 1269 # look nice and chunk ids should mostly show in hex. Use a special 1270 # inband signaling character (ASCII DEL) to distinguish between 1271 # decoded and hex mode. 1272 if not value.startswith(b'\x7f'): 1273 try: 1274 value = value.decode() 1275 return value 1276 except UnicodeDecodeError: 1277 pass 1278 return '\u007f' + bin_to_hex(value) 1279 1280 def decode_tuple(t): 1281 res = [] 1282 for value in t: 1283 if isinstance(value, dict): 1284 value = decode(value) 1285 elif isinstance(value, tuple) or isinstance(value, list): 1286 value = decode_tuple(value) 1287 elif isinstance(value, bytes): 1288 value = decode_bytes(value) 1289 res.append(value) 1290 return res 1291 1292 def decode(d): 1293 res = collections.OrderedDict() 1294 for key, value in d.items(): 1295 if isinstance(value, dict): 1296 value = decode(value) 1297 elif isinstance(value, (tuple, list)): 1298 value = decode_tuple(value) 1299 elif isinstance(value, bytes): 1300 value = decode_bytes(value) 1301 if isinstance(key, bytes): 1302 key = key.decode() 1303 res[key] = value 1304 return res 1305 1306 return decode(d) 1307 1308 1309def remove_surrogates(s, errors='replace'): 1310 """Replace surrogates generated by fsdecode with '?' 1311 """ 1312 return s.encode('utf-8', errors).decode('utf-8') 1313 1314 1315_safe_re = re.compile(r'^((\.\.)?/+)+') 1316 1317 1318def make_path_safe(path): 1319 """Make path safe by making it relative and local 1320 """ 1321 return _safe_re.sub('', path) or '.' 1322 1323 1324def daemonize(): 1325 """Detach process from controlling terminal and run in background 1326 1327 Returns: old and new get_process_id tuples 1328 """ 1329 from .platform import get_process_id 1330 old_id = get_process_id() 1331 pid = os.fork() 1332 if pid: 1333 os._exit(0) 1334 os.setsid() 1335 pid = os.fork() 1336 if pid: 1337 os._exit(0) 1338 os.chdir('/') 1339 os.close(0) 1340 os.close(1) 1341 os.close(2) 1342 fd = os.open(os.devnull, os.O_RDWR) 1343 os.dup2(fd, 0) 1344 os.dup2(fd, 1) 1345 os.dup2(fd, 2) 1346 new_id = get_process_id() 1347 return old_id, new_id 1348 1349 1350class StableDict(dict): 1351 """A dict subclass with stable items() ordering""" 1352 def items(self): 1353 return sorted(super().items()) 1354 1355 1356def bigint_to_int(mtime): 1357 """Convert bytearray to int 1358 """ 1359 if isinstance(mtime, bytes): 1360 return int.from_bytes(mtime, 'little', signed=True) 1361 return mtime 1362 1363 1364def int_to_bigint(value): 1365 """Convert integers larger than 64 bits to bytearray 1366 1367 Smaller integers are left alone 1368 """ 1369 if value.bit_length() > 63: 1370 return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) 1371 return value 1372 1373 1374def is_slow_msgpack(): 1375 return msgpack.Packer is msgpack_fallback.Packer 1376 1377 1378def is_supported_msgpack(): 1379 # DO NOT CHANGE OR REMOVE! See also requirements and comments in setup.py. 1380 v = msgpack.version[:3] 1381 return (0, 4, 6) <= v <= (0, 5, 6) and \ 1382 v not in [(0, 5, 0), (0, 5, 2), (0, 5, 3), (0, 5, 5)] 1383 1384 1385FALSISH = ('No', 'NO', 'no', 'N', 'n', '0', ) 1386TRUISH = ('Yes', 'YES', 'yes', 'Y', 'y', '1', ) 1387DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', ) 1388 1389 1390def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, 1391 retry_msg=None, invalid_msg=None, env_msg='{} (from {})', 1392 falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH, 1393 default=False, retry=True, env_var_override=None, ofile=None, input=input, prompt=True, 1394 msgid=None): 1395 """Output <msg> (usually a question) and let user input an answer. 1396 Qualifies the answer according to falsish, truish and defaultish as True, False or <default>. 1397 If it didn't qualify and retry is False (no retries wanted), return the default [which 1398 defaults to False]. If retry is True let user retry answering until answer is qualified. 1399 1400 If env_var_override is given and this var is present in the environment, do not ask 1401 the user, but just use the env var contents as answer as if it was typed in. 1402 Otherwise read input from stdin and proceed as normal. 1403 If EOF is received instead an input or an invalid input without retry possibility, 1404 return default. 1405 1406 :param msg: introducing message to output on ofile, no \n is added [None] 1407 :param retry_msg: retry message to output on ofile, no \n is added [None] 1408 :param false_msg: message to output before returning False [None] 1409 :param true_msg: message to output before returning True [None] 1410 :param default_msg: message to output before returning a <default> [None] 1411 :param invalid_msg: message to output after a invalid answer was given [None] 1412 :param env_msg: message to output when using input from env_var_override ['{} (from {})'], 1413 needs to have 2 placeholders for answer and env var name 1414 :param falsish: sequence of answers qualifying as False 1415 :param truish: sequence of answers qualifying as True 1416 :param defaultish: sequence of answers qualifying as <default> 1417 :param default: default return value (defaultish answer was given or no-answer condition) [False] 1418 :param retry: if True and input is incorrect, retry. Otherwise return default. [True] 1419 :param env_var_override: environment variable name [None] 1420 :param ofile: output stream [sys.stderr] 1421 :param input: input function [input from builtins] 1422 :return: boolean answer value, True or False 1423 """ 1424 def output(msg, msg_type, is_prompt=False, **kwargs): 1425 json_output = getattr(logging.getLogger('borg'), 'json', False) 1426 if json_output: 1427 kwargs.update(dict( 1428 type='question_%s' % msg_type, 1429 msgid=msgid, 1430 message=msg, 1431 )) 1432 print(json.dumps(kwargs), file=sys.stderr) 1433 else: 1434 if is_prompt: 1435 print(msg, file=ofile, end='', flush=True) 1436 else: 1437 print(msg, file=ofile) 1438 1439 msgid = msgid or env_var_override 1440 # note: we do not assign sys.stderr as default above, so it is 1441 # really evaluated NOW, not at function definition time. 1442 if ofile is None: 1443 ofile = sys.stderr 1444 if default not in (True, False): 1445 raise ValueError("invalid default value, must be True or False") 1446 if msg: 1447 output(msg, 'prompt', is_prompt=True) 1448 while True: 1449 answer = None 1450 if env_var_override: 1451 answer = os.environ.get(env_var_override) 1452 if answer is not None and env_msg: 1453 output(env_msg.format(answer, env_var_override), 'env_answer', env_var=env_var_override) 1454 if answer is None: 1455 if not prompt: 1456 return default 1457 try: 1458 answer = input() 1459 except EOFError: 1460 # avoid defaultish[0], defaultish could be empty 1461 answer = truish[0] if default else falsish[0] 1462 if answer in defaultish: 1463 if default_msg: 1464 output(default_msg, 'accepted_default') 1465 return default 1466 if answer in truish: 1467 if true_msg: 1468 output(true_msg, 'accepted_true') 1469 return True 1470 if answer in falsish: 1471 if false_msg: 1472 output(false_msg, 'accepted_false') 1473 return False 1474 # if we get here, the answer was invalid 1475 if invalid_msg: 1476 output(invalid_msg, 'invalid_answer') 1477 if not retry: 1478 return default 1479 if retry_msg: 1480 output(retry_msg, 'prompt_retry', is_prompt=True) 1481 # in case we used an environment variable and it gave an invalid answer, do not use it again: 1482 env_var_override = None 1483 1484 1485def hostname_is_unique(): 1486 return yes(env_var_override='BORG_HOSTNAME_IS_UNIQUE', prompt=False, env_msg=None, default=True) 1487 1488 1489def ellipsis_truncate(msg, space): 1490 """ 1491 shorten a long string by adding ellipsis between it and return it, example: 1492 this_is_a_very_long_string -------> this_is..._string 1493 """ 1494 from .platform import swidth 1495 ellipsis_width = swidth('...') 1496 msg_width = swidth(msg) 1497 if space < 8: 1498 # if there is very little space, just show ... 1499 return '...' + ' ' * (space - ellipsis_width) 1500 if space < ellipsis_width + msg_width: 1501 return '%s...%s' % (swidth_slice(msg, space // 2 - ellipsis_width), 1502 swidth_slice(msg, -space // 2)) 1503 return msg + ' ' * (space - msg_width) 1504 1505 1506class ProgressIndicatorBase: 1507 LOGGER = 'borg.output.progress' 1508 JSON_TYPE = None 1509 json = False 1510 1511 operation_id_counter = 0 1512 1513 @classmethod 1514 def operation_id(cls): 1515 """Unique number, can be used by receiving applications to distinguish different operations.""" 1516 cls.operation_id_counter += 1 1517 return cls.operation_id_counter 1518 1519 def __init__(self, msgid=None): 1520 self.handler = None 1521 self.logger = logging.getLogger(self.LOGGER) 1522 self.id = self.operation_id() 1523 self.msgid = msgid 1524 1525 # If there are no handlers, set one up explicitly because the 1526 # terminator and propagation needs to be set. If there are, 1527 # they must have been set up by BORG_LOGGING_CONF: skip setup. 1528 if not self.logger.handlers: 1529 self.handler = logging.StreamHandler(stream=sys.stderr) 1530 self.handler.setLevel(logging.INFO) 1531 logger = logging.getLogger('borg') 1532 # Some special attributes on the borg logger, created by setup_logging 1533 # But also be able to work without that 1534 try: 1535 formatter = logger.formatter 1536 terminator = '\n' if logger.json else '\r' 1537 self.json = logger.json 1538 except AttributeError: 1539 terminator = '\r' 1540 else: 1541 self.handler.setFormatter(formatter) 1542 self.handler.terminator = terminator 1543 1544 self.logger.addHandler(self.handler) 1545 if self.logger.level == logging.NOTSET: 1546 self.logger.setLevel(logging.WARN) 1547 self.logger.propagate = False 1548 1549 # If --progress is not set then the progress logger level will be WARN 1550 # due to setup_implied_logging (it may be NOTSET with a logging config file, 1551 # but the interactions there are generally unclear), so self.emit becomes 1552 # False, which is correct. 1553 # If --progress is set then the level will be INFO as per setup_implied_logging; 1554 # note that this is always the case for serve processes due to a "args.progress |= is_serve". 1555 # In this case self.emit is True. 1556 self.emit = self.logger.getEffectiveLevel() == logging.INFO 1557 1558 def __del__(self): 1559 if self.handler is not None: 1560 self.logger.removeHandler(self.handler) 1561 self.handler.close() 1562 1563 def output_json(self, *, finished=False, **kwargs): 1564 assert self.json 1565 if not self.emit: 1566 return 1567 kwargs.update(dict( 1568 operation=self.id, 1569 msgid=self.msgid, 1570 type=self.JSON_TYPE, 1571 finished=finished, 1572 time=time.time(), 1573 )) 1574 print(json.dumps(kwargs), file=sys.stderr, flush=True) 1575 1576 def finish(self): 1577 if self.json: 1578 self.output_json(finished=True) 1579 else: 1580 self.output('') 1581 1582 1583def justify_to_terminal_size(message): 1584 terminal_space = get_terminal_size(fallback=(-1, -1))[0] 1585 # justify only if we are outputting to a terminal 1586 if terminal_space != -1: 1587 return message.ljust(terminal_space) 1588 return message 1589 1590 1591class ProgressIndicatorMessage(ProgressIndicatorBase): 1592 JSON_TYPE = 'progress_message' 1593 1594 def output(self, msg): 1595 if self.json: 1596 self.output_json(message=msg) 1597 else: 1598 self.logger.info(justify_to_terminal_size(msg)) 1599 1600 1601class ProgressIndicatorPercent(ProgressIndicatorBase): 1602 JSON_TYPE = 'progress_percent' 1603 1604 def __init__(self, total=0, step=5, start=0, msg="%3.0f%%", msgid=None): 1605 """ 1606 Percentage-based progress indicator 1607 1608 :param total: total amount of items 1609 :param step: step size in percent 1610 :param start: at which percent value to start 1611 :param msg: output message, must contain one %f placeholder for the percentage 1612 """ 1613 self.counter = 0 # 0 .. (total-1) 1614 self.total = total 1615 self.trigger_at = start # output next percentage value when reaching (at least) this 1616 self.step = step 1617 self.msg = msg 1618 1619 super().__init__(msgid=msgid) 1620 1621 def progress(self, current=None, increase=1): 1622 if current is not None: 1623 self.counter = current 1624 pct = self.counter * 100 / self.total 1625 self.counter += increase 1626 if pct >= self.trigger_at: 1627 self.trigger_at += self.step 1628 return pct 1629 1630 def show(self, current=None, increase=1, info=None): 1631 """ 1632 Show and output the progress message 1633 1634 :param current: set the current percentage [None] 1635 :param increase: increase the current percentage [None] 1636 :param info: array of strings to be formatted with msg [None] 1637 """ 1638 pct = self.progress(current, increase) 1639 if pct is not None: 1640 # truncate the last argument, if no space is available 1641 if info is not None: 1642 if not self.json: 1643 # no need to truncate if we're not outputting to a terminal 1644 terminal_space = get_terminal_size(fallback=(-1, -1))[0] 1645 if terminal_space != -1: 1646 space = terminal_space - len(self.msg % tuple([pct] + info[:-1] + [''])) 1647 info[-1] = ellipsis_truncate(info[-1], space) 1648 return self.output(self.msg % tuple([pct] + info), justify=False, info=info) 1649 1650 return self.output(self.msg % pct) 1651 1652 def output(self, message, justify=True, info=None): 1653 if self.json: 1654 self.output_json(message=message, current=self.counter, total=self.total, info=info) 1655 else: 1656 if justify: 1657 message = justify_to_terminal_size(message) 1658 self.logger.info(message) 1659 1660 1661class ProgressIndicatorEndless: 1662 def __init__(self, step=10, file=None): 1663 """ 1664 Progress indicator (long row of dots) 1665 1666 :param step: every Nth call, call the func 1667 :param file: output file, default: sys.stderr 1668 """ 1669 self.counter = 0 # call counter 1670 self.triggered = 0 # increases 1 per trigger event 1671 self.step = step # trigger every <step> calls 1672 if file is None: 1673 file = sys.stderr 1674 self.file = file 1675 1676 def progress(self): 1677 self.counter += 1 1678 trigger = self.counter % self.step == 0 1679 if trigger: 1680 self.triggered += 1 1681 return trigger 1682 1683 def show(self): 1684 trigger = self.progress() 1685 if trigger: 1686 return self.output(self.triggered) 1687 1688 def output(self, triggered): 1689 print('.', end='', file=self.file, flush=True) 1690 1691 def finish(self): 1692 print(file=self.file) 1693 1694 1695def sysinfo(): 1696 show_sysinfo = os.environ.get('BORG_SHOW_SYSINFO', 'yes').lower() 1697 if show_sysinfo == 'no': 1698 return '' 1699 1700 python_implementation = platform.python_implementation() 1701 python_version = platform.python_version() 1702 # platform.uname() does a shell call internally to get processor info, 1703 # creating #3732 issue, so rather use os.uname(). 1704 try: 1705 uname = os.uname() 1706 except AttributeError: 1707 uname = None 1708 if sys.platform.startswith('linux'): 1709 try: 1710 linux_distribution = platform.linux_distribution() 1711 except: 1712 # platform.linux_distribution() is deprecated since py 3.5 and removed in 3.7. 1713 linux_distribution = ('Unknown Linux', '', '') 1714 else: 1715 linux_distribution = None 1716 try: 1717 msgpack_version = '.'.join(str(v) for v in msgpack.version) 1718 except: 1719 msgpack_version = 'unknown' 1720 info = [] 1721 if uname is not None: 1722 info.append('Platform: %s' % (' '.join(uname), )) 1723 if linux_distribution is not None: 1724 info.append('Linux: %s %s %s' % linux_distribution) 1725 info.append('Borg: %s Python: %s %s msgpack: %s' % ( 1726 borg_version, python_implementation, python_version, msgpack_version)) 1727 info.append('PID: %d CWD: %s' % (os.getpid(), os.getcwd())) 1728 info.append('sys.argv: %r' % sys.argv) 1729 info.append('SSH_ORIGINAL_COMMAND: %r' % os.environ.get('SSH_ORIGINAL_COMMAND')) 1730 info.append('') 1731 return '\n'.join(info) 1732 1733 1734def log_multi(*msgs, level=logging.INFO, logger=logger): 1735 """ 1736 log multiple lines of text, each line by a separate logging call for cosmetic reasons 1737 1738 each positional argument may be a single or multiple lines (separated by newlines) of text. 1739 """ 1740 lines = [] 1741 for msg in msgs: 1742 lines.extend(msg.splitlines()) 1743 for line in lines: 1744 logger.log(level, line) 1745 1746 1747class BaseFormatter: 1748 FIXED_KEYS = { 1749 # Formatting aids 1750 'LF': '\n', 1751 'SPACE': ' ', 1752 'TAB': '\t', 1753 'CR': '\r', 1754 'NUL': '\0', 1755 'NEWLINE': os.linesep, 1756 'NL': os.linesep, 1757 } 1758 1759 def get_item_data(self, item): 1760 raise NotImplementedError 1761 1762 def format_item(self, item): 1763 return self.format.format_map(self.get_item_data(item)) 1764 1765 @staticmethod 1766 def keys_help(): 1767 return "- NEWLINE: OS dependent line separator\n" \ 1768 "- NL: alias of NEWLINE\n" \ 1769 "- NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath\n" \ 1770 "- SPACE\n" \ 1771 "- TAB\n" \ 1772 "- CR\n" \ 1773 "- LF" 1774 1775 1776class ArchiveFormatter(BaseFormatter): 1777 KEY_DESCRIPTIONS = { 1778 'archive': 'archive name interpreted as text (might be missing non-text characters, see barchive)', 1779 'name': 'alias of "archive"', 1780 'barchive': 'verbatim archive name, can contain any character except NUL', 1781 'comment': 'archive comment interpreted as text (might be missing non-text characters, see bcomment)', 1782 'bcomment': 'verbatim archive comment, can contain any character except NUL', 1783 # *start* is the key used by borg-info for this timestamp, this makes the formats more compatible 1784 'start': 'time (start) of creation of the archive', 1785 'time': 'alias of "start"', 1786 'end': 'time (end) of creation of the archive', 1787 'id': 'internal ID of the archive', 1788 'hostname': 'hostname of host on which this archive was created', 1789 'username': 'username of user who created this archive', 1790 } 1791 KEY_GROUPS = ( 1792 ('archive', 'name', 'barchive', 'comment', 'bcomment', 'id'), 1793 ('start', 'time', 'end'), 1794 ('hostname', 'username'), 1795 ) 1796 1797 @classmethod 1798 def available_keys(cls): 1799 fake_archive_info = ArchiveInfo('archivename', b'\1'*32, datetime(1970, 1, 1, tzinfo=timezone.utc)) 1800 formatter = cls('', None, None, None) 1801 keys = [] 1802 keys.extend(formatter.call_keys.keys()) 1803 keys.extend(formatter.get_item_data(fake_archive_info).keys()) 1804 return keys 1805 1806 @classmethod 1807 def keys_help(cls): 1808 help = [] 1809 keys = cls.available_keys() 1810 for key in cls.FIXED_KEYS: 1811 keys.remove(key) 1812 1813 for group in cls.KEY_GROUPS: 1814 for key in group: 1815 keys.remove(key) 1816 text = "- " + key 1817 if key in cls.KEY_DESCRIPTIONS: 1818 text += ": " + cls.KEY_DESCRIPTIONS[key] 1819 help.append(text) 1820 help.append("") 1821 assert not keys, str(keys) 1822 return "\n".join(help) 1823 1824 def __init__(self, format, repository, manifest, key, *, json=False): 1825 self.repository = repository 1826 self.manifest = manifest 1827 self.key = key 1828 self.name = None 1829 self.id = None 1830 self._archive = None 1831 self.json = json 1832 static_keys = {} # here could be stuff on repo level, above archive level 1833 static_keys.update(self.FIXED_KEYS) 1834 self.format = partial_format(format, static_keys) 1835 self.format_keys = {f[1] for f in Formatter().parse(format)} 1836 self.call_keys = { 1837 'hostname': partial(self.get_meta, 'hostname', rs=True), 1838 'username': partial(self.get_meta, 'username', rs=True), 1839 'comment': partial(self.get_meta, 'comment', rs=True), 1840 'bcomment': partial(self.get_meta, 'comment', rs=False), 1841 'end': self.get_ts_end, 1842 } 1843 self.used_call_keys = set(self.call_keys) & self.format_keys 1844 if self.json: 1845 self.item_data = {} 1846 self.format_item = self.format_item_json 1847 else: 1848 self.item_data = static_keys 1849 1850 def format_item_json(self, item): 1851 return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n' 1852 1853 def get_item_data(self, archive_info): 1854 self.name = archive_info.name 1855 self.id = archive_info.id 1856 item_data = {} 1857 item_data.update(self.item_data) 1858 item_data.update({ 1859 'name': remove_surrogates(archive_info.name), 1860 'archive': remove_surrogates(archive_info.name), 1861 'barchive': archive_info.name, 1862 'id': bin_to_hex(archive_info.id), 1863 'time': self.format_time(archive_info.ts), 1864 'start': self.format_time(archive_info.ts), 1865 }) 1866 for key in self.used_call_keys: 1867 item_data[key] = self.call_keys[key]() 1868 return item_data 1869 1870 @property 1871 def archive(self): 1872 """lazy load / update loaded archive""" 1873 if self._archive is None or self._archive.id != self.id: 1874 from .archive import Archive 1875 self._archive = Archive(self.repository, self.key, self.manifest, self.name) 1876 return self._archive 1877 1878 def get_meta(self, key, rs): 1879 value = self.archive.metadata.get(key, '') 1880 return remove_surrogates(value) if rs else value 1881 1882 def get_ts_end(self): 1883 return self.format_time(self.archive.ts_end) 1884 1885 def format_time(self, ts): 1886 return OutputTimestamp(ts) 1887 1888 1889class ItemFormatter(BaseFormatter): 1890 KEY_DESCRIPTIONS = { 1891 'bpath': 'verbatim POSIX path, can contain any character except NUL', 1892 'path': 'path interpreted as text (might be missing non-text characters, see bpath)', 1893 'source': 'link target for links (identical to linktarget)', 1894 'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links', 1895 'csize': 'compressed size', 1896 'dsize': 'deduplicated size', 1897 'dcsize': 'deduplicated compressed size', 1898 'num_chunks': 'number of chunks in this file', 1899 'unique_chunks': 'number of unique chunks in this file', 1900 'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)', 1901 } 1902 KEY_GROUPS = ( 1903 ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'), 1904 ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'), 1905 ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'), 1906 tuple(sorted(hashlib.algorithms_guaranteed)), 1907 ('archiveid', 'archivename', 'extra'), 1908 ('health', ) 1909 ) 1910 1911 KEYS_REQUIRING_CACHE = ( 1912 'dsize', 'dcsize', 'unique_chunks', 1913 ) 1914 1915 @classmethod 1916 def available_keys(cls): 1917 class FakeArchive: 1918 fpr = name = "" 1919 1920 from .item import Item 1921 fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0) 1922 formatter = cls(FakeArchive, "") 1923 keys = [] 1924 keys.extend(formatter.call_keys.keys()) 1925 keys.extend(formatter.get_item_data(fake_item).keys()) 1926 return keys 1927 1928 @classmethod 1929 def keys_help(cls): 1930 help = [] 1931 keys = cls.available_keys() 1932 for key in cls.FIXED_KEYS: 1933 keys.remove(key) 1934 1935 for group in cls.KEY_GROUPS: 1936 for key in group: 1937 keys.remove(key) 1938 text = "- " + key 1939 if key in cls.KEY_DESCRIPTIONS: 1940 text += ": " + cls.KEY_DESCRIPTIONS[key] 1941 help.append(text) 1942 help.append("") 1943 assert not keys, str(keys) 1944 return "\n".join(help) 1945 1946 @classmethod 1947 def format_needs_cache(cls, format): 1948 format_keys = {f[1] for f in Formatter().parse(format)} 1949 return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys) 1950 1951 def __init__(self, archive, format, *, json_lines=False): 1952 self.archive = archive 1953 self.json_lines = json_lines 1954 static_keys = { 1955 'archivename': archive.name, 1956 'archiveid': archive.fpr, 1957 } 1958 static_keys.update(self.FIXED_KEYS) 1959 if self.json_lines: 1960 self.item_data = {} 1961 self.format_item = self.format_item_json 1962 else: 1963 self.item_data = static_keys 1964 self.format = partial_format(format, static_keys) 1965 self.format_keys = {f[1] for f in Formatter().parse(format)} 1966 self.call_keys = { 1967 'size': self.calculate_size, 1968 'csize': self.calculate_csize, 1969 'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size), 1970 'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize), 1971 'num_chunks': self.calculate_num_chunks, 1972 'unique_chunks': partial(self.sum_unique_chunks_metadata, lambda chunk: 1), 1973 'isomtime': partial(self.format_iso_time, 'mtime'), 1974 'isoctime': partial(self.format_iso_time, 'ctime'), 1975 'isoatime': partial(self.format_iso_time, 'atime'), 1976 'mtime': partial(self.format_time, 'mtime'), 1977 'ctime': partial(self.format_time, 'ctime'), 1978 'atime': partial(self.format_time, 'atime'), 1979 } 1980 for hash_function in hashlib.algorithms_guaranteed: 1981 self.add_key(hash_function, partial(self.hash_item, hash_function)) 1982 self.used_call_keys = set(self.call_keys) & self.format_keys 1983 1984 def format_item_json(self, item): 1985 return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n' 1986 1987 def add_key(self, key, callable_with_item): 1988 self.call_keys[key] = callable_with_item 1989 self.used_call_keys = set(self.call_keys) & self.format_keys 1990 1991 def get_item_data(self, item): 1992 item_data = {} 1993 item_data.update(self.item_data) 1994 mode = stat.filemode(item.mode) 1995 item_type = mode[0] 1996 1997 source = item.get('source', '') 1998 extra = '' 1999 if source: 2000 source = remove_surrogates(source) 2001 if item_type == 'l': 2002 extra = ' -> %s' % source 2003 else: 2004 mode = 'h' + mode[1:] 2005 extra = ' link to %s' % source 2006 item_data['type'] = item_type 2007 item_data['mode'] = mode 2008 item_data['user'] = item.user or item.uid 2009 item_data['group'] = item.group or item.gid 2010 item_data['uid'] = item.uid 2011 item_data['gid'] = item.gid 2012 item_data['path'] = remove_surrogates(item.path) 2013 if self.json_lines: 2014 item_data['healthy'] = 'chunks_healthy' not in item 2015 else: 2016 item_data['bpath'] = item.path 2017 item_data['extra'] = extra 2018 item_data['health'] = 'broken' if 'chunks_healthy' in item else 'healthy' 2019 item_data['source'] = source 2020 item_data['linktarget'] = source 2021 item_data['flags'] = item.get('bsdflags') 2022 for key in self.used_call_keys: 2023 item_data[key] = self.call_keys[key](item) 2024 return item_data 2025 2026 def sum_unique_chunks_metadata(self, metadata_func, item): 2027 """ 2028 sum unique chunks metadata, a unique chunk is a chunk which is referenced globally as often as it is in the 2029 item 2030 2031 item: The item to sum its unique chunks' metadata 2032 metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return 2033 the metadata needed from the chunk 2034 """ 2035 chunk_index = self.archive.cache.chunks 2036 chunks = item.get('chunks', []) 2037 chunks_counter = Counter(c.id for c in chunks) 2038 return sum(metadata_func(c) for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id]) 2039 2040 def calculate_num_chunks(self, item): 2041 return len(item.get('chunks', [])) 2042 2043 def calculate_size(self, item): 2044 # note: does not support hardlink slaves, they will be size 0 2045 return item.get_size(compressed=False) 2046 2047 def calculate_csize(self, item): 2048 # note: does not support hardlink slaves, they will be csize 0 2049 return item.get_size(compressed=True) 2050 2051 def hash_item(self, hash_function, item): 2052 if 'chunks' not in item: 2053 return "" 2054 hash = hashlib.new(hash_function) 2055 for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]): 2056 hash.update(data) 2057 return hash.hexdigest() 2058 2059 def format_time(self, key, item): 2060 return OutputTimestamp(safe_timestamp(item.get(key) or item.mtime)) 2061 2062 def format_iso_time(self, key, item): 2063 return self.format_time(key, item).isoformat() 2064 2065 2066class ChunkIteratorFileWrapper: 2067 """File-like wrapper for chunk iterators""" 2068 2069 def __init__(self, chunk_iterator, read_callback=None): 2070 """ 2071 *chunk_iterator* should be an iterator yielding bytes. These will be buffered 2072 internally as necessary to satisfy .read() calls. 2073 2074 *read_callback* will be called with one argument, some byte string that has 2075 just been read and will be subsequently returned to a caller of .read(). 2076 It can be used to update a progress display. 2077 """ 2078 self.chunk_iterator = chunk_iterator 2079 self.chunk_offset = 0 2080 self.chunk = b'' 2081 self.exhausted = False 2082 self.read_callback = read_callback 2083 2084 def _refill(self): 2085 remaining = len(self.chunk) - self.chunk_offset 2086 if not remaining: 2087 try: 2088 chunk = next(self.chunk_iterator) 2089 self.chunk = memoryview(chunk) 2090 except StopIteration: 2091 self.exhausted = True 2092 return 0 # EOF 2093 self.chunk_offset = 0 2094 remaining = len(self.chunk) 2095 return remaining 2096 2097 def _read(self, nbytes): 2098 if not nbytes: 2099 return b'' 2100 remaining = self._refill() 2101 will_read = min(remaining, nbytes) 2102 self.chunk_offset += will_read 2103 return self.chunk[self.chunk_offset - will_read:self.chunk_offset] 2104 2105 def read(self, nbytes): 2106 parts = [] 2107 while nbytes and not self.exhausted: 2108 read_data = self._read(nbytes) 2109 nbytes -= len(read_data) 2110 parts.append(read_data) 2111 if self.read_callback: 2112 self.read_callback(read_data) 2113 return b''.join(parts) 2114 2115 2116def open_item(archive, item): 2117 """Return file-like object for archived item (with chunks).""" 2118 chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks]) 2119 return ChunkIteratorFileWrapper(chunk_iterator) 2120 2121 2122def file_status(mode): 2123 if stat.S_ISREG(mode): 2124 return 'A' 2125 elif stat.S_ISDIR(mode): 2126 return 'd' 2127 elif stat.S_ISBLK(mode): 2128 return 'b' 2129 elif stat.S_ISCHR(mode): 2130 return 'c' 2131 elif stat.S_ISLNK(mode): 2132 return 's' 2133 elif stat.S_ISFIFO(mode): 2134 return 'f' 2135 return '?' 2136 2137 2138def hardlinkable(mode): 2139 """return True if we support hardlinked items of this type""" 2140 return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode) 2141 2142 2143def chunkit(it, size): 2144 """ 2145 Chunk an iterator <it> into pieces of <size>. 2146 2147 >>> list(chunker('ABCDEFG', 3)) 2148 [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] 2149 """ 2150 iterable = iter(it) 2151 return iter(lambda: list(islice(iterable, size)), []) 2152 2153 2154def consume(iterator, n=None): 2155 """Advance the iterator n-steps ahead. If n is none, consume entirely.""" 2156 # Use functions that consume iterators at C speed. 2157 if n is None: 2158 # feed the entire iterator into a zero-length deque 2159 deque(iterator, maxlen=0) 2160 else: 2161 # advance to the empty slice starting at position n 2162 next(islice(iterator, n, n), None) 2163 2164 2165def scandir_keyfunc(dirent): 2166 try: 2167 return (0, dirent.inode()) 2168 except OSError as e: 2169 # maybe a permission denied error while doing a stat() on the dirent 2170 logger.debug('scandir_inorder: Unable to stat %s: %s', dirent.path, e) 2171 # order this dirent after all the others lexically by file name 2172 # we may not break the whole scandir just because of an exception in one dirent 2173 # ignore the exception for now, since another stat will be done later anyways 2174 # (or the entry will be skipped by an exclude pattern) 2175 return (1, dirent.name) 2176 2177 2178def scandir_inorder(path='.'): 2179 return sorted(scandir(path), key=scandir_keyfunc) 2180 2181 2182def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True): 2183 """ 2184 clean lines (usually read from a config file): 2185 2186 1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments. 2187 2188 note: only "pure comment lines" are supported, no support for "trailing comments". 2189 2190 :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines 2191 :param lstrip: lstrip call arguments or False, if lstripping is not desired 2192 :param rstrip: rstrip call arguments or False, if rstripping is not desired 2193 :param remove_comments: remove comment lines (lines starting with "#") 2194 :param remove_empty: remove empty lines 2195 :return: yields processed lines 2196 """ 2197 for line in lines: 2198 if lstrip is not False: 2199 line = line.lstrip(lstrip) 2200 if rstrip is not False: 2201 line = line.rstrip(rstrip) 2202 if remove_empty and not line: 2203 continue 2204 if remove_comments and line.startswith('#'): 2205 continue 2206 yield line 2207 2208 2209class ErrorIgnoringTextIOWrapper(io.TextIOWrapper): 2210 def read(self, n): 2211 if not self.closed: 2212 try: 2213 return super().read(n) 2214 except BrokenPipeError: 2215 try: 2216 super().close() 2217 except OSError: 2218 pass 2219 return '' 2220 2221 def write(self, s): 2222 if not self.closed: 2223 try: 2224 return super().write(s) 2225 except BrokenPipeError: 2226 try: 2227 super().close() 2228 except OSError: 2229 pass 2230 return len(s) 2231 2232 2233class SignalException(BaseException): 2234 """base class for all signal-based exceptions""" 2235 2236 2237class SigHup(SignalException): 2238 """raised on SIGHUP signal""" 2239 2240 2241class SigTerm(SignalException): 2242 """raised on SIGTERM signal""" 2243 2244 2245@contextlib.contextmanager 2246def signal_handler(sig, handler): 2247 """ 2248 when entering context, set up signal handler <handler> for signal <sig>. 2249 when leaving context, restore original signal handler. 2250 2251 <sig> can bei either a str when giving a signal.SIGXXX attribute name (it 2252 won't crash if the attribute name does not exist as some names are platform 2253 specific) or a int, when giving a signal number. 2254 2255 <handler> is any handler value as accepted by the signal.signal(sig, handler). 2256 """ 2257 if isinstance(sig, str): 2258 sig = getattr(signal, sig, None) 2259 if sig is not None: 2260 orig_handler = signal.signal(sig, handler) 2261 try: 2262 yield 2263 finally: 2264 if sig is not None: 2265 signal.signal(sig, orig_handler) 2266 2267 2268def raising_signal_handler(exc_cls): 2269 def handler(sig_no, frame): 2270 # setting SIG_IGN avoids that an incoming second signal of this 2271 # kind would raise a 2nd exception while we still process the 2272 # exception handler for exc_cls for the 1st signal. 2273 signal.signal(sig_no, signal.SIG_IGN) 2274 raise exc_cls 2275 2276 return handler 2277 2278 2279def swidth_slice(string, max_width): 2280 """ 2281 Return a slice of *max_width* cells from *string*. 2282 2283 Negative *max_width* means from the end of string. 2284 2285 *max_width* is in units of character cells (or "columns"). 2286 Latin characters are usually one cell wide, many CJK characters are two cells wide. 2287 """ 2288 from .platform import swidth 2289 reverse = max_width < 0 2290 max_width = abs(max_width) 2291 if reverse: 2292 string = reversed(string) 2293 current_swidth = 0 2294 result = [] 2295 for character in string: 2296 current_swidth += swidth(character) 2297 if current_swidth > max_width: 2298 break 2299 result.append(character) 2300 if reverse: 2301 result.reverse() 2302 return ''.join(result) 2303 2304 2305class BorgJsonEncoder(json.JSONEncoder): 2306 def default(self, o): 2307 from .repository import Repository 2308 from .remote import RemoteRepository 2309 from .archive import Archive 2310 from .cache import LocalCache, AdHocCache 2311 if isinstance(o, Repository) or isinstance(o, RemoteRepository): 2312 return { 2313 'id': bin_to_hex(o.id), 2314 'location': o._location.canonical_path(), 2315 } 2316 if isinstance(o, Archive): 2317 return o.info() 2318 if isinstance(o, LocalCache): 2319 return { 2320 'path': o.path, 2321 'stats': o.stats(), 2322 } 2323 if isinstance(o, AdHocCache): 2324 return { 2325 'stats': o.stats(), 2326 } 2327 if callable(getattr(o, 'to_json', None)): 2328 return o.to_json() 2329 return super().default(o) 2330 2331 2332def basic_json_data(manifest, *, cache=None, extra=None): 2333 key = manifest.key 2334 data = extra or {} 2335 data.update({ 2336 'repository': BorgJsonEncoder().default(manifest.repository), 2337 'encryption': { 2338 'mode': key.ARG_NAME, 2339 }, 2340 }) 2341 data['repository']['last_modified'] = OutputTimestamp(manifest.last_timestamp.replace(tzinfo=timezone.utc)) 2342 if key.NAME.startswith('key file'): 2343 data['encryption']['keyfile'] = key.find_key() 2344 if cache: 2345 data['cache'] = cache 2346 return data 2347 2348 2349def json_dump(obj): 2350 """Dump using BorgJSONEncoder.""" 2351 return json.dumps(obj, sort_keys=True, indent=4, cls=BorgJsonEncoder) 2352 2353 2354def json_print(obj): 2355 print(json_dump(obj)) 2356 2357 2358def secure_erase(path): 2359 """Attempt to securely erase a file by writing random data over it before deleting it.""" 2360 with open(path, 'r+b') as fd: 2361 length = os.stat(fd.fileno()).st_size 2362 fd.write(os.urandom(length)) 2363 fd.flush() 2364 os.fsync(fd.fileno()) 2365 os.unlink(path) 2366 2367 2368def truncate_and_unlink(path): 2369 """ 2370 Truncate and then unlink *path*. 2371 2372 Do not create *path* if it does not exist. 2373 Open *path* for truncation in r+b mode (=O_RDWR|O_BINARY). 2374 2375 Use this when deleting potentially large files when recovering 2376 from a VFS error such as ENOSPC. It can help a full file system 2377 recover. Refer to the "File system interaction" section 2378 in repository.py for further explanations. 2379 """ 2380 try: 2381 with open(path, 'r+b') as fd: 2382 fd.truncate() 2383 except OSError as err: 2384 if err.errno != errno.ENOTSUP: 2385 raise 2386 # don't crash if the above ops are not supported. 2387 os.unlink(path) 2388 2389 2390def popen_with_error_handling(cmd_line: str, log_prefix='', **kwargs): 2391 """ 2392 Handle typical errors raised by subprocess.Popen. Return None if an error occurred, 2393 otherwise return the Popen object. 2394 2395 *cmd_line* is split using shlex (e.g. 'gzip -9' => ['gzip', '-9']). 2396 2397 Log messages will be prefixed with *log_prefix*; if set, it should end with a space 2398 (e.g. log_prefix='--some-option: '). 2399 2400 Does not change the exit code. 2401 """ 2402 assert not kwargs.get('shell'), 'Sorry pal, shell mode is a no-no' 2403 try: 2404 command = shlex.split(cmd_line) 2405 if not command: 2406 raise ValueError('an empty command line is not permitted') 2407 except ValueError as ve: 2408 logger.error('%s%s', log_prefix, ve) 2409 return 2410 logger.debug('%scommand line: %s', log_prefix, command) 2411 try: 2412 return subprocess.Popen(command, **kwargs) 2413 except FileNotFoundError: 2414 logger.error('%sexecutable not found: %s', log_prefix, command[0]) 2415 return 2416 except PermissionError: 2417 logger.error('%spermission denied: %s', log_prefix, command[0]) 2418 return 2419 2420 2421def prepare_subprocess_env(system, env=None): 2422 """ 2423 Prepare the environment for a subprocess we are going to create. 2424 2425 :param system: True for preparing to invoke system-installed binaries, 2426 False for stuff inside the pyinstaller environment (like borg, python). 2427 :param env: optionally give a environment dict here. if not given, default to os.environ. 2428 :return: a modified copy of the environment 2429 """ 2430 env = dict(env if env is not None else os.environ) 2431 if system: 2432 # a pyinstaller binary's bootloader modifies LD_LIBRARY_PATH=/tmp/_MEIXXXXXX, 2433 # but we do not want that system binaries (like ssh or other) pick up 2434 # (non-matching) libraries from there. 2435 # thus we install the original LDLP, before pyinstaller has modified it: 2436 lp_key = 'LD_LIBRARY_PATH' 2437 lp_orig = env.get(lp_key + '_ORIG') 2438 if lp_orig is not None: 2439 env[lp_key] = lp_orig 2440 else: 2441 # We get here in 2 cases: 2442 # 1. when not running a pyinstaller-made binary. 2443 # in this case, we must not kill LDLP. 2444 # 2. when running a pyinstaller-made binary and there was no LDLP 2445 # in the original env (in this case, the pyinstaller bootloader 2446 # does *not* put ..._ORIG into the env either). 2447 # in this case, we must kill LDLP. 2448 # We can recognize this via sys.frozen and sys._MEIPASS being set. 2449 lp = env.get(lp_key) 2450 if lp is not None and getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): 2451 env.pop(lp_key) 2452 # security: do not give secrets to subprocess 2453 env.pop('BORG_PASSPHRASE', None) 2454 # for information, give borg version to the subprocess 2455 env['BORG_VERSION'] = borg_version 2456 return env 2457 2458 2459def dash_open(path, mode): 2460 assert '+' not in mode # the streams are either r or w, but never both 2461 if path == '-': 2462 stream = sys.stdin if 'r' in mode else sys.stdout 2463 return stream.buffer if 'b' in mode else stream 2464 else: 2465 return open(path, mode) 2466 2467 2468def is_terminal(fd=sys.stdout): 2469 return hasattr(fd, 'isatty') and fd.isatty() and (sys.platform != 'win32' or 'ANSICON' in os.environ) 2470 2471 2472def umount(mountpoint): 2473 env = prepare_subprocess_env(system=True) 2474 try: 2475 return subprocess.call(['fusermount', '-u', mountpoint], env=env) 2476 except FileNotFoundError: 2477 return subprocess.call(['umount', mountpoint], env=env) 2478