1import argparse
2import contextlib
3import collections
4import enum
5import errno
6import grp
7import hashlib
8import logging
9import io
10import json
11import os
12import os.path
13import platform
14import pwd
15import re
16import shlex
17import signal
18import socket
19import stat
20import subprocess
21import sys
22import textwrap
23import time
24import uuid
25from binascii import hexlify
26from collections import namedtuple, deque, abc, Counter
27from datetime import datetime, timezone, timedelta
28from functools import partial, lru_cache
29from itertools import islice
30from operator import attrgetter
31from os import scandir
32from string import Formatter
33from shutil import get_terminal_size
34
35# MSGPACK =====================================================================
36# we are rather picky about msgpack versions, because a good working msgpack is
37# very important for borg, see https://github.com/borgbackup/borg/issues/3753
38#
39# because some linux distributions didn't get their dependency management right
40# and broke borgbackup by upgrading msgpack to incompatible versions, we now
41# bundle msgpack-python 0.5.6, which is the latest and best msgpack that is
42# still compatible with borg 1.1.x and we use the bundled version by default.
43#
44# if you are a package maintainer and don't like bundled library code, feel
45# free to not use the bundled code:
46# - set prefer_system_msgpack = True
47# - make sure that an external msgpack-python gets installed
48# - make sure the external msgpack-python always stays at supported versions.
49# - best versions seem to be 0.4.6, 0.4.7, 0.4.8 and 0.5.6.
50# - if you can't satisfy the above requirement, these are versions that might
51#   also work ok, IF you make sure to use the COMPILED version of
52#   msgpack-python NOT the PURE PYTHON fallback implementation: 0.5.1 and 0.5.4
53#
54# Please note:
55# - using any other version is not supported by borg development and
56#   any feedback related to issues caused by this will be ignored.
57# - especially, it is known that msgpack 0.6.x does NOT work for borg 1.1.x.
58
59prefer_system_msgpack = False
60
61try:
62    if prefer_system_msgpack:
63        raise ImportError
64    # use the bundled msgpack 0.5.6 known-good version - other code only imports it from here:
65    import borg.algorithms.msgpack as msgpack
66    from borg.algorithms.msgpack import fallback as msgpack_fallback
67except ImportError:
68    # use an external msgpack version
69    import msgpack
70    from msgpack import fallback as msgpack_fallback
71
72
73from .logger import create_logger
74logger = create_logger()
75
76import borg.crypto.low_level
77from . import __version__ as borg_version
78from . import __version_tuple__ as borg_version_tuple
79from . import chunker
80from . import hashindex
81from . import shellpattern
82from .constants import *  # NOQA
83
84
85# generic mechanism to enable users to invoke workarounds by setting the
86# BORG_WORKAROUNDS environment variable to a list of comma-separated strings.
87# see the docs for a list of known workaround strings.
88workarounds = tuple(os.environ.get('BORG_WORKAROUNDS', '').split(','))
89
90
91'''
92The global exit_code variable is used so that modules other than archiver can increase the program exit code if a
93warning or error occurred during their operation. This is different from archiver.exit_code, which is only accessible
94from the archiver object.
95'''
96exit_code = EXIT_SUCCESS
97
98
99def set_ec(ec):
100    '''
101    Sets the exit code of the program, if an exit code higher or equal than this is set, this does nothing. This
102    makes EXIT_ERROR override EXIT_WARNING, etc..
103
104    ec: exit code to set
105    '''
106    global exit_code
107    exit_code = max(exit_code, ec)
108    return exit_code
109
110
111class Error(Exception):
112    """Error: {}"""
113    # Error base class
114
115    # if we raise such an Error and it is only caught by the uppermost
116    # exception handler (that exits short after with the given exit_code),
117    # it is always a (fatal and abrupt) EXIT_ERROR, never just a warning.
118    exit_code = EXIT_ERROR
119    # show a traceback?
120    traceback = False
121
122    def __init__(self, *args):
123        super().__init__(*args)
124        self.args = args
125
126    def get_message(self):
127        return type(self).__doc__.format(*self.args)
128
129    __str__ = get_message
130
131
132class ErrorWithTraceback(Error):
133    """Error: {}"""
134    # like Error, but show a traceback also
135    traceback = True
136
137
138class IntegrityError(ErrorWithTraceback):
139    """Data integrity error: {}"""
140
141
142class DecompressionError(IntegrityError):
143    """Decompression error: {}"""
144
145
146class ExtensionModuleError(Error):
147    """The Borg binary extension modules do not seem to be properly installed"""
148
149
150class NoManifestError(Error):
151    """Repository has no manifest."""
152
153
154class PlaceholderError(Error):
155    """Formatting Error: "{}".format({}): {}({})"""
156
157
158class InvalidPlaceholder(PlaceholderError):
159    """Invalid placeholder "{}" in string: {}"""
160
161
162class PythonLibcTooOld(Error):
163    """FATAL: this Python was compiled for a too old (g)libc and misses required functionality."""
164
165
166def check_python():
167    required_funcs = {os.stat, os.utime, os.chown}
168    if not os.supports_follow_symlinks.issuperset(required_funcs):
169        raise PythonLibcTooOld
170
171
172class MandatoryFeatureUnsupported(Error):
173    """Unsupported repository feature(s) {}. A newer version of borg is required to access this repository."""
174
175
176def check_extension_modules():
177    from . import platform, compress, item
178    if hashindex.API_VERSION != '1.1_07':
179        raise ExtensionModuleError
180    if chunker.API_VERSION != '1.1_01':
181        raise ExtensionModuleError
182    if compress.API_VERSION != '1.1_06':
183        raise ExtensionModuleError
184    if borg.crypto.low_level.API_VERSION != '1.1_02':
185        raise ExtensionModuleError
186    if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.1_04':
187        raise ExtensionModuleError
188    if item.API_VERSION != '1.1_03':
189        raise ExtensionModuleError
190
191
192def get_limited_unpacker(kind):
193    """return a limited Unpacker because we should not trust msgpack data received from remote"""
194    args = dict(use_list=False,  # return tuples, not lists
195                max_bin_len=0,  # not used
196                max_ext_len=0,  # not used
197                max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
198                max_str_len=MAX_OBJECT_SIZE,  # a chunk or other repo object
199                )
200    if kind == 'server':
201        args.update(dict(max_array_len=100,  # misc. cmd tuples
202                         max_map_len=100,  # misc. cmd dicts
203                         ))
204    elif kind == 'client':
205        args.update(dict(max_array_len=LIST_SCAN_LIMIT,  # result list from repo.list() / .scan()
206                         max_map_len=100,  # misc. result dicts
207                         ))
208    elif kind == 'manifest':
209        args.update(dict(use_list=True,  # default value
210                         max_array_len=100,  # ITEM_KEYS ~= 22
211                         max_map_len=MAX_ARCHIVES,  # list of archives
212                         max_str_len=255,  # archive name
213                         object_hook=StableDict,
214                         unicode_errors='surrogateescape',
215                         ))
216    elif kind == 'key':
217        args.update(dict(use_list=True,  # default value
218                         max_array_len=0,  # not used
219                         max_map_len=10,  # EncryptedKey dict
220                         max_str_len=4000,  # inner key data
221                         object_hook=StableDict,
222                         unicode_errors='surrogateescape',
223                         ))
224    else:
225        raise ValueError('kind must be "server", "client", "manifest" or "key"')
226    return msgpack.Unpacker(**args)
227
228
229ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
230
231
232class Archives(abc.MutableMapping):
233    """
234    Nice wrapper around the archives dict, making sure only valid types/values get in
235    and we can deal with str keys (and it internally encodes to byte keys) and either
236    str timestamps or datetime timestamps.
237    """
238    def __init__(self):
239        # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
240        self._archives = {}
241
242    def __len__(self):
243        return len(self._archives)
244
245    def __iter__(self):
246        return iter(safe_decode(name) for name in self._archives)
247
248    def __getitem__(self, name):
249        assert isinstance(name, str)
250        _name = safe_encode(name)
251        values = self._archives.get(_name)
252        if values is None:
253            raise KeyError
254        ts = parse_timestamp(values[b'time'].decode('utf-8'))
255        return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
256
257    def __setitem__(self, name, info):
258        assert isinstance(name, str)
259        name = safe_encode(name)
260        assert isinstance(info, tuple)
261        id, ts = info
262        assert isinstance(id, bytes)
263        if isinstance(ts, datetime):
264            ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT)
265        assert isinstance(ts, str)
266        ts = ts.encode()
267        self._archives[name] = {b'id': id, b'time': ts}
268
269    def __delitem__(self, name):
270        assert isinstance(name, str)
271        name = safe_encode(name)
272        del self._archives[name]
273
274    def list(self, *, glob=None, match_end=r'\Z', sort_by=(), first=None, last=None, reverse=False):
275        """
276        Return list of ArchiveInfo instances according to the parameters.
277
278        First match *glob* (considering *match_end*), then *sort_by*.
279        Apply *first* and *last* filters, and then possibly *reverse* the list.
280
281        *sort_by* is a list of sort keys applied in reverse order.
282
283        Note: for better robustness, all filtering / limiting parameters must default to
284              "not limit / not filter", so a FULL archive list is produced by a simple .list().
285              some callers EXPECT to iterate over all archives in a repo for correct operation.
286        """
287        if isinstance(sort_by, (str, bytes)):
288            raise TypeError('sort_by must be a sequence of str')
289        regex = re.compile(shellpattern.translate(glob or '*', match_end=match_end))
290        archives = [x for x in self.values() if regex.match(x.name) is not None]
291        for sortkey in reversed(sort_by):
292            archives.sort(key=attrgetter(sortkey))
293        if first:
294            archives = archives[:first]
295        elif last:
296            archives = archives[max(len(archives) - last, 0):]
297        if reverse:
298            archives.reverse()
299        return archives
300
301    def list_considering(self, args):
302        """
303        get a list of archives, considering --first/last/prefix/glob-archives/sort cmdline args
304        """
305        if args.location.archive:
306            raise Error('The options --first, --last, --prefix and --glob-archives can only be used on repository targets.')
307        if args.prefix is not None:
308            args.glob_archives = args.prefix + '*'
309        return self.list(sort_by=args.sort_by.split(','), glob=args.glob_archives, first=args.first, last=args.last)
310
311    def set_raw_dict(self, d):
312        """set the dict we get from the msgpack unpacker"""
313        for k, v in d.items():
314            assert isinstance(k, bytes)
315            assert isinstance(v, dict) and b'id' in v and b'time' in v
316            self._archives[k] = v
317
318    def get_raw_dict(self):
319        """get the dict we can give to the msgpack packer"""
320        return self._archives
321
322
323class Manifest:
324
325    @enum.unique
326    class Operation(enum.Enum):
327        # The comments here only roughly describe the scope of each feature. In the end, additions need to be
328        # based on potential problems older clients could produce when accessing newer repositories and the
329        # tradeofs of locking version out or still allowing access. As all older versions and their exact
330        # behaviours are known when introducing new features sometimes this might not match the general descriptions
331        # below.
332
333        # The READ operation describes which features are needed to safely list and extract the archives in the
334        # repository.
335        READ = 'read'
336        # The CHECK operation is for all operations that need either to understand every detail
337        # of the repository (for consistency checks and repairs) or are seldom used functions that just
338        # should use the most restrictive feature set because more fine grained compatibility tracking is
339        # not needed.
340        CHECK = 'check'
341        # The WRITE operation is for adding archives. Features here ensure that older clients don't add archives
342        # in an old format, or is used to lock out clients that for other reasons can no longer safely add new
343        # archives.
344        WRITE = 'write'
345        # The DELETE operation is for all operations (like archive deletion) that need a 100% correct reference
346        # count and the need to be able to find all (directly and indirectly) referenced chunks of a given archive.
347        DELETE = 'delete'
348
349    NO_OPERATION_CHECK = tuple()
350
351    SUPPORTED_REPO_FEATURES = frozenset([])
352
353    MANIFEST_ID = b'\0' * 32
354
355    def __init__(self, key, repository, item_keys=None):
356        self.archives = Archives()
357        self.config = {}
358        self.key = key
359        self.repository = repository
360        self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
361        self.tam_verified = False
362        self.timestamp = None
363
364    @property
365    def id_str(self):
366        return bin_to_hex(self.id)
367
368    @property
369    def last_timestamp(self):
370        return parse_timestamp(self.timestamp, tzinfo=None)
371
372    @classmethod
373    def load(cls, repository, operations, key=None, force_tam_not_required=False):
374        from .item import ManifestItem
375        from .crypto.key import key_factory, tam_required_file, tam_required
376        from .repository import Repository
377        try:
378            cdata = repository.get(cls.MANIFEST_ID)
379        except Repository.ObjectNotFound:
380            raise NoManifestError
381        if not key:
382            key = key_factory(repository, cdata)
383        manifest = cls(key, repository)
384        data = key.decrypt(None, cdata)
385        manifest_dict, manifest.tam_verified = key.unpack_and_verify_manifest(data, force_tam_not_required=force_tam_not_required)
386        m = ManifestItem(internal_dict=manifest_dict)
387        manifest.id = key.id_hash(data)
388        if m.get('version') not in (1, 2):
389            raise ValueError('Invalid manifest version')
390        manifest.archives.set_raw_dict(m.archives)
391        manifest.timestamp = m.get('timestamp')
392        manifest.config = m.config
393        # valid item keys are whatever is known in the repo or every key we know
394        manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', []))
395
396        if manifest.tam_verified:
397            manifest_required = manifest.config.get(b'tam_required', False)
398            security_required = tam_required(repository)
399            if manifest_required and not security_required:
400                logger.debug('Manifest is TAM verified and says TAM is required, updating security database...')
401                file = tam_required_file(repository)
402                open(file, 'w').close()
403            if not manifest_required and security_required:
404                logger.debug('Manifest is TAM verified and says TAM is *not* required, updating security database...')
405                os.unlink(tam_required_file(repository))
406        manifest.check_repository_compatibility(operations)
407        return manifest, key
408
409    def check_repository_compatibility(self, operations):
410        for operation in operations:
411            assert isinstance(operation, self.Operation)
412            feature_flags = self.config.get(b'feature_flags', None)
413            if feature_flags is None:
414                return
415            if operation.value.encode() not in feature_flags:
416                continue
417            requirements = feature_flags[operation.value.encode()]
418            if b'mandatory' in requirements:
419                unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES
420                if unsupported:
421                    raise MandatoryFeatureUnsupported([f.decode() for f in unsupported])
422
423    def get_all_mandatory_features(self):
424        result = {}
425        feature_flags = self.config.get(b'feature_flags', None)
426        if feature_flags is None:
427            return result
428
429        for operation, requirements in feature_flags.items():
430            if b'mandatory' in requirements:
431                result[operation.decode()] = set([feature.decode() for feature in requirements[b'mandatory']])
432        return result
433
434    def write(self):
435        from .item import ManifestItem
436        if self.key.tam_required:
437            self.config[b'tam_required'] = True
438        # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly
439        if self.timestamp is None:
440            self.timestamp = datetime.utcnow().strftime(ISO_FORMAT)
441        else:
442            prev_ts = self.last_timestamp
443            incremented = (prev_ts + timedelta(microseconds=1)).strftime(ISO_FORMAT)
444            self.timestamp = max(incremented, datetime.utcnow().strftime(ISO_FORMAT))
445        # include checks for limits as enforced by limited unpacker (used by load())
446        assert len(self.archives) <= MAX_ARCHIVES
447        assert all(len(name) <= 255 for name in self.archives)
448        assert len(self.item_keys) <= 100
449        manifest = ManifestItem(
450            version=1,
451            archives=StableDict(self.archives.get_raw_dict()),
452            timestamp=self.timestamp,
453            config=StableDict(self.config),
454            item_keys=tuple(sorted(self.item_keys)),
455        )
456        self.tam_verified = True
457        data = self.key.pack_and_authenticate_metadata(manifest.as_dict())
458        self.id = self.key.id_hash(data)
459        self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
460
461
462def positive_int_validator(value):
463    """argparse type for positive integers"""
464    int_value = int(value)
465    if int_value <= 0:
466        raise argparse.ArgumentTypeError('A positive integer is required: %s' % value)
467    return int_value
468
469
470def interval(s):
471    """Convert a string representing a valid interval to a number of hours."""
472    multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}
473
474    if s.endswith(tuple(multiplier.keys())):
475        number = s[:-1]
476        suffix = s[-1]
477    else:
478        # range suffixes in ascending multiplier order
479        ranges = [k for k, v in sorted(multiplier.items(), key=lambda t: t[1])]
480        raise argparse.ArgumentTypeError(
481            'Unexpected interval time unit "%s": expected one of %r' % (s[-1], ranges))
482
483    try:
484        hours = int(number) * multiplier[suffix]
485    except ValueError:
486        hours = -1
487
488    if hours <= 0:
489        raise argparse.ArgumentTypeError(
490            'Unexpected interval number "%s": expected an integer greater than 0' % number)
491
492    return hours
493
494
495def prune_within(archives, hours):
496    target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600)
497    return [a for a in archives if a.ts > target]
498
499
500def prune_split(archives, pattern, n, skip=[]):
501    last = None
502    keep = []
503    if n == 0:
504        return keep
505    for a in sorted(archives, key=attrgetter('ts'), reverse=True):
506        period = to_localtime(a.ts).strftime(pattern)
507        if period != last:
508            last = period
509            if a not in skip:
510                keep.append(a)
511                if len(keep) == n:
512                    break
513    return keep
514
515
516def ensure_dir(path, mode=stat.S_IRWXU, pretty_deadly=True):
517    """
518    Ensures that the dir exists with the right permissions.
519    1) Make sure the directory exists in a race-free operation
520    2) If mode is not None and the directory has been created, give the right
521    permissions to the leaf directory
522    3) If pretty_deadly is True, catch exceptions, reraise them with a pretty
523    message.
524    Returns if the directory has been created and has the right permissions,
525    An exception otherwise. If a deadly exception happened it is reraised.
526    """
527    try:
528        os.makedirs(path, mode=mode, exist_ok=True)
529    except OSError as e:
530        if pretty_deadly:
531            raise Error(e.args[1])
532        else:
533            raise
534
535
536def get_base_dir():
537    """Get home directory / base directory for borg:
538
539    - BORG_BASE_DIR, if set
540    - HOME, if set
541    - ~$USER, if USER is set
542    - ~
543    """
544    base_dir = os.environ.get('BORG_BASE_DIR') or os.environ.get('HOME')
545    # os.path.expanduser() behaves differently for '~' and '~someuser' as
546    # parameters: when called with an explicit username, the possibly set
547    # environment variable HOME is no longer respected. So we have to check if
548    # it is set and only expand the user's home directory if HOME is unset.
549    if not base_dir:
550        base_dir = os.path.expanduser('~%s' % os.environ.get('USER', ''))
551    return base_dir
552
553
554def get_keys_dir():
555    """Determine where to repository keys and cache"""
556
557    keys_dir = os.environ.get('BORG_KEYS_DIR', os.path.join(get_config_dir(), 'keys'))
558    ensure_dir(keys_dir)
559    return keys_dir
560
561
562def get_security_dir(repository_id=None):
563    """Determine where to store local security information."""
564    security_dir = os.environ.get('BORG_SECURITY_DIR', os.path.join(get_config_dir(), 'security'))
565    if repository_id:
566        security_dir = os.path.join(security_dir, repository_id)
567    ensure_dir(security_dir)
568    return security_dir
569
570
571def get_cache_dir():
572    """Determine where to repository keys and cache"""
573    # Get cache home path
574    cache_home = os.path.join(get_base_dir(), '.cache')
575    # Try to use XDG_CACHE_HOME instead if BORG_BASE_DIR isn't explicitly set
576    if not os.environ.get('BORG_BASE_DIR'):
577        cache_home = os.environ.get('XDG_CACHE_HOME', cache_home)
578    # Use BORG_CACHE_DIR if set, otherwise assemble final path from cache home path
579    cache_dir = os.environ.get('BORG_CACHE_DIR', os.path.join(cache_home, 'borg'))
580    # Create path if it doesn't exist yet
581    ensure_dir(cache_dir)
582    cache_fn = os.path.join(cache_dir, CACHE_TAG_NAME)
583    if not os.path.exists(cache_fn):
584        with open(cache_fn, 'wb') as fd:
585            fd.write(CACHE_TAG_CONTENTS)
586            fd.write(textwrap.dedent("""
587            # This file is a cache directory tag created by Borg.
588            # For information about cache directory tags, see:
589            #       http://www.bford.info/cachedir/spec.html
590            """).encode('ascii'))
591    return cache_dir
592
593
594def get_config_dir():
595    """Determine where to store whole config"""
596    # Get config home path
597    config_home = os.path.join(get_base_dir(), '.config')
598    # Try to use XDG_CONFIG_HOME instead if BORG_BASE_DIR isn't explicitly set
599    if not os.environ.get('BORG_BASE_DIR'):
600        config_home = os.environ.get('XDG_CONFIG_HOME', config_home)
601    # Use BORG_CONFIG_DIR if set, otherwise assemble final path from config home path
602    config_dir = os.environ.get('BORG_CONFIG_DIR', os.path.join(config_home, 'borg'))
603    # Create path if it doesn't exist yet
604    ensure_dir(config_dir)
605    return config_dir
606
607
608def to_localtime(ts):
609    """Convert datetime object from UTC to local time zone"""
610    return datetime(*time.localtime((ts - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds())[:6])
611
612
613def parse_timestamp(timestamp, tzinfo=timezone.utc):
614    """Parse a ISO 8601 timestamp string"""
615    fmt = ISO_FORMAT if '.' in timestamp else ISO_FORMAT_NO_USECS
616    dt = datetime.strptime(timestamp, fmt)
617    if tzinfo is not None:
618        dt = dt.replace(tzinfo=tzinfo)
619    return dt
620
621
622def timestamp(s):
623    """Convert a --timestamp=s argument to a datetime object"""
624    try:
625        # is it pointing to a file / directory?
626        ts = safe_s(os.stat(s).st_mtime)
627        return datetime.fromtimestamp(ts, tz=timezone.utc)
628    except OSError:
629        # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support.
630        for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00',
631                       '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S',
632                       '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M',
633                       '%Y-%m-%d', '%Y-%j',
634                       ):
635            try:
636                return datetime.strptime(s, format).replace(tzinfo=timezone.utc)
637            except ValueError:
638                continue
639        raise ValueError
640
641
642def ChunkerParams(s):
643    if s.strip().lower() == "default":
644        return CHUNKER_PARAMS
645    chunk_min, chunk_max, chunk_mask, window_size = s.split(',')
646    if int(chunk_max) > 23:
647        raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)')
648    return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size)
649
650
651def FilesCacheMode(s):
652    ENTRIES_MAP = dict(ctime='c', mtime='m', size='s', inode='i', rechunk='r', disabled='d')
653    VALID_MODES = ('cis', 'ims', 'cs', 'ms', 'cr', 'mr', 'd', 's')  # letters in alpha order
654    entries = set(s.strip().split(','))
655    if not entries <= set(ENTRIES_MAP):
656        raise ValueError('cache mode must be a comma-separated list of: %s' % ','.join(sorted(ENTRIES_MAP)))
657    short_entries = {ENTRIES_MAP[entry] for entry in entries}
658    mode = ''.join(sorted(short_entries))
659    if mode not in VALID_MODES:
660        raise ValueError('cache mode short must be one of: %s' % ','.join(VALID_MODES))
661    return mode
662
663
664assert FilesCacheMode(DEFAULT_FILES_CACHE_MODE_UI) == DEFAULT_FILES_CACHE_MODE  # keep these 2 values in sync!
665
666
667def dir_is_cachedir(path):
668    """Determines whether the specified path is a cache directory (and
669    therefore should potentially be excluded from the backup) according to
670    the CACHEDIR.TAG protocol
671    (http://www.bford.info/cachedir/spec.html).
672    """
673
674    tag_path = os.path.join(path, CACHE_TAG_NAME)
675    try:
676        if os.path.exists(tag_path):
677            with open(tag_path, 'rb') as tag_file:
678                tag_data = tag_file.read(len(CACHE_TAG_CONTENTS))
679                if tag_data == CACHE_TAG_CONTENTS:
680                    return True
681    except OSError:
682        pass
683    return False
684
685
686def dir_is_tagged(path, exclude_caches, exclude_if_present):
687    """Determines whether the specified path is excluded by being a cache
688    directory or containing user-specified tag files/directories. Returns a
689    list of the paths of the tag files/directories (either CACHEDIR.TAG or the
690    matching user-specified files/directories).
691    """
692    tag_paths = []
693    if exclude_caches and dir_is_cachedir(path):
694        tag_paths.append(os.path.join(path, CACHE_TAG_NAME))
695    if exclude_if_present is not None:
696        for tag in exclude_if_present:
697            tag_path = os.path.join(path, tag)
698            if os.path.exists(tag_path):
699                tag_paths.append(tag_path)
700    return tag_paths
701
702
703def partial_format(format, mapping):
704    """
705    Apply format.format_map(mapping) while preserving unknown keys
706
707    Does not support attribute access, indexing and ![rsa] conversions
708    """
709    for key, value in mapping.items():
710        key = re.escape(key)
711        format = re.sub(r'(?<!\{)((\{%s\})|(\{%s:[^\}]*\}))' % (key, key),
712                        lambda match: match.group(1).format_map(mapping),
713                        format)
714    return format
715
716
717class DatetimeWrapper:
718    def __init__(self, dt):
719        self.dt = dt
720
721    def __format__(self, format_spec):
722        if format_spec == '':
723            format_spec = ISO_FORMAT_NO_USECS
724        return self.dt.__format__(format_spec)
725
726
727def format_line(format, data):
728    for _, key, _, conversion in Formatter().parse(format):
729        if not key:
730            continue
731        if conversion or key not in data:
732            raise InvalidPlaceholder(key, format)
733    try:
734        return format.format_map(data)
735    except Exception as e:
736        raise PlaceholderError(format, data, e.__class__.__name__, str(e))
737
738
739def replace_placeholders(text, overrides={}):
740    """Replace placeholders in text with their values."""
741    from .platform import fqdn, hostname
742    current_time = datetime.now(timezone.utc)
743    data = {
744        'pid': os.getpid(),
745        'fqdn': fqdn,
746        'reverse-fqdn': '.'.join(reversed(fqdn.split('.'))),
747        'hostname': hostname,
748        'now': DatetimeWrapper(current_time.astimezone(None)),
749        'utcnow': DatetimeWrapper(current_time),
750        'user': uid2user(os.getuid(), os.getuid()),
751        'uuid4': str(uuid.uuid4()),
752        'borgversion': borg_version,
753        'borgmajor': '%d' % borg_version_tuple[:1],
754        'borgminor': '%d.%d' % borg_version_tuple[:2],
755        'borgpatch': '%d.%d.%d' % borg_version_tuple[:3],
756        **overrides,
757    }
758    return format_line(text, data)
759
760
761PrefixSpec = replace_placeholders
762
763GlobSpec = replace_placeholders
764
765CommentSpec = replace_placeholders
766
767HUMAN_SORT_KEYS = ['timestamp'] + list(ArchiveInfo._fields)
768HUMAN_SORT_KEYS.remove('ts')
769
770
771def SortBySpec(text):
772    for token in text.split(','):
773        if token not in HUMAN_SORT_KEYS:
774            raise ValueError('Invalid sort key: %s' % token)
775    return text.replace('timestamp', 'ts')
776
777
778# Not too rarely, we get crappy timestamps from the fs, that overflow some computations.
779# As they are crap anyway (valid filesystem timestamps always refer to the past up to
780# the present, but never to the future), nothing is lost if we just clamp them to the
781# maximum value we can support.
782# As long as people are using borg on 32bit platforms to access borg archives, we must
783# keep this value True. But we can expect that we can stop supporting 32bit platforms
784# well before coming close to the year 2038, so this will never be a practical problem.
785SUPPORT_32BIT_PLATFORMS = True  # set this to False before y2038.
786
787if SUPPORT_32BIT_PLATFORMS:
788    # second timestamps will fit into a signed int32 (platform time_t limit).
789    # nanosecond timestamps thus will naturally fit into a signed int64.
790    # subtract last 48h to avoid any issues that could be caused by tz calculations.
791    # this is in the year 2038, so it is also less than y9999 (which is a datetime internal limit).
792    # msgpack can pack up to uint64.
793    MAX_S = 2**31-1 - 48*3600
794    MAX_NS = MAX_S * 1000000000
795else:
796    # nanosecond timestamps will fit into a signed int64.
797    # subtract last 48h to avoid any issues that could be caused by tz calculations.
798    # this is in the year 2262, so it is also less than y9999 (which is a datetime internal limit).
799    # round down to 1e9 multiple, so MAX_NS corresponds precisely to a integer MAX_S.
800    # msgpack can pack up to uint64.
801    MAX_NS = (2**63-1 - 48*3600*1000000000) // 1000000000 * 1000000000
802    MAX_S = MAX_NS // 1000000000
803
804
805def safe_s(ts):
806    if 0 <= ts <= MAX_S:
807        return ts
808    elif ts < 0:
809        return 0
810    else:
811        return MAX_S
812
813
814def safe_ns(ts):
815    if 0 <= ts <= MAX_NS:
816        return ts
817    elif ts < 0:
818        return 0
819    else:
820        return MAX_NS
821
822
823def safe_timestamp(item_timestamp_ns):
824    t_ns = safe_ns(item_timestamp_ns)
825    return datetime.fromtimestamp(t_ns / 1e9)
826
827
828def format_time(ts: datetime, format_spec=''):
829    """
830    Convert *ts* to a human-friendly format with textual weekday.
831    """
832    return ts.strftime('%a, %Y-%m-%d %H:%M:%S' if format_spec == '' else format_spec)
833
834
835def isoformat_time(ts: datetime):
836    """
837    Format *ts* according to ISO 8601.
838    """
839    # note: first make all datetime objects tz aware before adding %z here.
840    return ts.strftime(ISO_FORMAT)
841
842
843def format_timedelta(td):
844    """Format timedelta in a human friendly format
845    """
846    ts = td.total_seconds()
847    s = ts % 60
848    m = int(ts / 60) % 60
849    h = int(ts / 3600) % 24
850    txt = '%.2f seconds' % s
851    if m:
852        txt = '%d minutes %s' % (m, txt)
853    if h:
854        txt = '%d hours %s' % (h, txt)
855    if td.days:
856        txt = '%d days %s' % (td.days, txt)
857    return txt
858
859
860class OutputTimestamp:
861    def __init__(self, ts: datetime):
862        if ts.tzinfo == timezone.utc:
863            ts = to_localtime(ts)
864        self.ts = ts
865
866    def __format__(self, format_spec):
867        return format_time(self.ts, format_spec=format_spec)
868
869    def __str__(self):
870        return '{}'.format(self)
871
872    def isoformat(self):
873        return isoformat_time(self.ts)
874
875    to_json = isoformat
876
877
878def format_file_size(v, precision=2, sign=False):
879    """Format file size into a human friendly format
880    """
881    return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign)
882
883
884class FileSize(int):
885    def __format__(self, format_spec):
886        return format_file_size(int(self)).__format__(format_spec)
887
888
889def parse_file_size(s):
890    """Return int from file size (1234, 55G, 1.7T)."""
891    if not s:
892        return int(s)  # will raise
893    suffix = s[-1]
894    power = 1000
895    try:
896        factor = {
897            'K': power,
898            'M': power**2,
899            'G': power**3,
900            'T': power**4,
901            'P': power**5,
902        }[suffix]
903        s = s[:-1]
904    except KeyError:
905        factor = 1
906    return int(float(s) * factor)
907
908
909def sizeof_fmt(num, suffix='B', units=None, power=None, sep='', precision=2, sign=False):
910    prefix = '+' if sign and num > 0 else ''
911
912    for unit in units[:-1]:
913        if abs(round(num, precision)) < power:
914            if isinstance(num, int):
915                return "{}{}{}{}{}".format(prefix, num, sep, unit, suffix)
916            else:
917                return "{}{:3.{}f}{}{}{}".format(prefix, num, precision, sep, unit, suffix)
918        num /= float(power)
919    return "{}{:.{}f}{}{}{}".format(prefix, num, precision, sep, units[-1], suffix)
920
921
922def sizeof_fmt_iec(num, suffix='B', sep='', precision=2, sign=False):
923    return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign,
924                      units=['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'], power=1024)
925
926
927def sizeof_fmt_decimal(num, suffix='B', sep='', precision=2, sign=False):
928    return sizeof_fmt(num, suffix=suffix, sep=sep, precision=precision, sign=sign,
929                      units=['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'], power=1000)
930
931
932def format_archive(archive):
933    return '%-36s %s [%s]' % (
934        archive.name,
935        format_time(to_localtime(archive.ts)),
936        bin_to_hex(archive.id),
937    )
938
939
940class Buffer:
941    """
942    managed buffer (like a resizable bytearray)
943    """
944
945    class MemoryLimitExceeded(Error, OSError):
946        """Requested buffer size {} is above the limit of {}."""
947
948    def __init__(self, allocator, size=4096, limit=None):
949        """
950        Initialize the buffer: use allocator(size) call to allocate a buffer.
951        Optionally, set the upper <limit> for the buffer size.
952        """
953        assert callable(allocator), 'must give alloc(size) function as first param'
954        assert limit is None or size <= limit, 'initial size must be <= limit'
955        self.allocator = allocator
956        self.limit = limit
957        self.resize(size, init=True)
958
959    def __len__(self):
960        return len(self.buffer)
961
962    def resize(self, size, init=False):
963        """
964        resize the buffer - to avoid frequent reallocation, we usually always grow (if needed).
965        giving init=True it is possible to first-time initialize or shrink the buffer.
966        if a buffer size beyond the limit is requested, raise Buffer.MemoryLimitExceeded (OSError).
967        """
968        size = int(size)
969        if self.limit is not None and size > self.limit:
970            raise Buffer.MemoryLimitExceeded(size, self.limit)
971        if init or len(self) < size:
972            self.buffer = self.allocator(size)
973
974    def get(self, size=None, init=False):
975        """
976        return a buffer of at least the requested size (None: any current size).
977        init=True can be given to trigger shrinking of the buffer to the given size.
978        """
979        if size is not None:
980            self.resize(size, init)
981        return self.buffer
982
983
984@lru_cache(maxsize=None)
985def uid2user(uid, default=None):
986    try:
987        return pwd.getpwuid(uid).pw_name
988    except KeyError:
989        return default
990
991
992@lru_cache(maxsize=None)
993def user2uid(user, default=None):
994    try:
995        return user and pwd.getpwnam(user).pw_uid
996    except KeyError:
997        return default
998
999
1000@lru_cache(maxsize=None)
1001def gid2group(gid, default=None):
1002    try:
1003        return grp.getgrgid(gid).gr_name
1004    except KeyError:
1005        return default
1006
1007
1008@lru_cache(maxsize=None)
1009def group2gid(group, default=None):
1010    try:
1011        return group and grp.getgrnam(group).gr_gid
1012    except KeyError:
1013        return default
1014
1015
1016def posix_acl_use_stored_uid_gid(acl):
1017    """Replace the user/group field with the stored uid/gid
1018    """
1019    entries = []
1020    for entry in safe_decode(acl).split('\n'):
1021        if entry:
1022            fields = entry.split(':')
1023            if len(fields) == 4:
1024                entries.append(':'.join([fields[0], fields[3], fields[2]]))
1025            else:
1026                entries.append(entry)
1027    return safe_encode('\n'.join(entries))
1028
1029
1030def safe_decode(s, coding='utf-8', errors='surrogateescape'):
1031    """decode bytes to str, with round-tripping "invalid" bytes"""
1032    if s is None:
1033        return None
1034    return s.decode(coding, errors)
1035
1036
1037def safe_encode(s, coding='utf-8', errors='surrogateescape'):
1038    """encode str to bytes, with round-tripping "invalid" bytes"""
1039    if s is None:
1040        return None
1041    return s.encode(coding, errors)
1042
1043
1044def bin_to_hex(binary):
1045    return hexlify(binary).decode('ascii')
1046
1047
1048def parse_stringified_list(s):
1049    l = re.split(" *, *", s)
1050    return [item for item in l if item != '']
1051
1052
1053class Location:
1054    """Object representing a repository / archive location
1055    """
1056    proto = user = _host = port = path = archive = None
1057
1058    # user must not contain "@", ":" or "/".
1059    # Quoting adduser error message:
1060    # "To avoid problems, the username should consist only of letters, digits,
1061    # underscores, periods, at signs and dashes, and not start with a dash
1062    # (as defined by IEEE Std 1003.1-2001)."
1063    # We use "@" as separator between username and hostname, so we must
1064    # disallow it within the pure username part.
1065    optional_user_re = r"""
1066        (?:(?P<user>[^@:/]+)@)?
1067    """
1068
1069    # path must not contain :: (it ends at :: or string end), but may contain single colons.
1070    # to avoid ambiguities with other regexes, it must also not start with ":" nor with "//" nor with "ssh://".
1071    scp_path_re = r"""
1072        (?!(:|//|ssh://))                                   # not starting with ":" or // or ssh://
1073        (?P<path>([^:]|(:(?!:)))+)                          # any chars, but no "::"
1074        """
1075
1076    # file_path must not contain :: (it ends at :: or string end), but may contain single colons.
1077    # it must start with a / and that slash is part of the path.
1078    file_path_re = r"""
1079        (?P<path>(([^/]*)/([^:]|(:(?!:)))+))                # start opt. servername, then /, then any chars, but no "::"
1080        """
1081
1082    # abs_path must not contain :: (it ends at :: or string end), but may contain single colons.
1083    # it must start with a / and that slash is part of the path.
1084    abs_path_re = r"""
1085        (?P<path>(/([^:]|(:(?!:)))+))                       # start with /, then any chars, but no "::"
1086        """
1087
1088    # optional ::archive_name at the end, archive name must not contain "/".
1089    # borg mount's FUSE filesystem creates one level of directories from
1090    # the archive names and of course "/" is not valid in a directory name.
1091    optional_archive_re = r"""
1092        (?:
1093            ::                                              # "::" as separator
1094            (?P<archive>[^/]+)                              # archive name must not contain "/"
1095        )?$"""                                              # must match until the end
1096
1097    # regexes for misc. kinds of supported location specifiers:
1098    ssh_re = re.compile(r"""
1099        (?P<proto>ssh)://                                   # ssh://
1100        """ + optional_user_re + r"""                       # user@  (optional)
1101        (?P<host>([^:/]+|\[[0-9a-fA-F:.]+\]))(?::(?P<port>\d+))?  # host or host:port or [ipv6] or [ipv6]:port
1102        """ + abs_path_re + optional_archive_re, re.VERBOSE)  # path or path::archive
1103
1104    file_re = re.compile(r"""
1105        (?P<proto>file)://                                  # file://
1106        """ + file_path_re + optional_archive_re, re.VERBOSE)  # servername/path, path or path::archive
1107
1108    # note: scp_re is also use for local paths
1109    scp_re = re.compile(r"""
1110        (
1111            """ + optional_user_re + r"""                   # user@  (optional)
1112            (?P<host>([^:/]+|\[[0-9a-fA-F:.]+\])):          # host: (don't match / or [ipv6] in host to disambiguate from file:)
1113        )?                                                  # user@host: part is optional
1114        """ + scp_path_re + optional_archive_re, re.VERBOSE)  # path with optional archive
1115
1116    # get the repo from BORG_REPO env and the optional archive from param.
1117    # if the syntax requires giving REPOSITORY (see "borg mount"),
1118    # use "::" to let it use the env var.
1119    # if REPOSITORY argument is optional, it'll automatically use the env.
1120    env_re = re.compile(r"""                                # the repo part is fetched from BORG_REPO
1121        (?:::$)                                             # just "::" is ok (when a pos. arg is required, no archive)
1122        |                                                   # or
1123        """ + optional_archive_re, re.VERBOSE)              # archive name (optional, may be empty)
1124
1125    def __init__(self, text='', overrides={}):
1126        if not self.parse(text, overrides):
1127            raise ValueError('Invalid location format: "%s"' % self.orig)
1128
1129    def parse(self, text, overrides={}):
1130        self.orig = text
1131        text = replace_placeholders(text, overrides)
1132        valid = self._parse(text)
1133        if valid:
1134            return True
1135        m = self.env_re.match(text)
1136        if not m:
1137            return False
1138        repo = os.environ.get('BORG_REPO')
1139        if repo is None:
1140            return False
1141        valid = self._parse(repo)
1142        self.archive = m.group('archive')
1143        self.orig = repo if not self.archive else '%s::%s' % (repo, self.archive)
1144        return valid
1145
1146    def _parse(self, text):
1147        def normpath_special(p):
1148            # avoid that normpath strips away our relative path hack and even makes p absolute
1149            relative = p.startswith('/./')
1150            p = os.path.normpath(p)
1151            return ('/.' + p) if relative else p
1152
1153        m = self.ssh_re.match(text)
1154        if m:
1155            self.proto = m.group('proto')
1156            self.user = m.group('user')
1157            self._host = m.group('host')
1158            self.port = m.group('port') and int(m.group('port')) or None
1159            self.path = normpath_special(m.group('path'))
1160            self.archive = m.group('archive')
1161            return True
1162        m = self.file_re.match(text)
1163        if m:
1164            self.proto = m.group('proto')
1165            self.path = normpath_special(m.group('path'))
1166            self.archive = m.group('archive')
1167            return True
1168        m = self.scp_re.match(text)
1169        if m:
1170            self.user = m.group('user')
1171            self._host = m.group('host')
1172            self.path = normpath_special(m.group('path'))
1173            self.archive = m.group('archive')
1174            self.proto = self._host and 'ssh' or 'file'
1175            return True
1176        return False
1177
1178    def __str__(self):
1179        items = [
1180            'proto=%r' % self.proto,
1181            'user=%r' % self.user,
1182            'host=%r' % self.host,
1183            'port=%r' % self.port,
1184            'path=%r' % self.path,
1185            'archive=%r' % self.archive,
1186        ]
1187        return ', '.join(items)
1188
1189    def to_key_filename(self):
1190        name = re.sub(r'[^\w]', '_', self.path).strip('_')
1191        if self.proto != 'file':
1192            name = re.sub(r'[^\w]', '_', self.host) + '__' + name
1193        if len(name) > 100:
1194            # Limit file names to some reasonable length. Most file systems
1195            # limit them to 255 [unit of choice]; due to variations in unicode
1196            # handling we truncate to 100 *characters*.
1197            name = name[:100]
1198        return os.path.join(get_keys_dir(), name)
1199
1200    def __repr__(self):
1201        return "Location(%s)" % self
1202
1203    @property
1204    def host(self):
1205        # strip square brackets used for IPv6 addrs
1206        if self._host is not None:
1207            return self._host.lstrip('[').rstrip(']')
1208
1209    def canonical_path(self):
1210        if self.proto == 'file':
1211            return self.path
1212        else:
1213            if self.path and self.path.startswith('~'):
1214                path = '/' + self.path  # /~/x = path x relative to home dir
1215            elif self.path and not self.path.startswith('/'):
1216                path = '/./' + self.path  # /./x = path x relative to cwd
1217            else:
1218                path = self.path
1219            return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '',
1220                                           self._host,  # needed for ipv6 addrs
1221                                           ':{}'.format(self.port) if self.port else '',
1222                                           path)
1223
1224    def with_timestamp(self, timestamp):
1225        return Location(self.orig, overrides={
1226            'now': DatetimeWrapper(timestamp.astimezone(None)),
1227            'utcnow': DatetimeWrapper(timestamp),
1228        })
1229
1230
1231def location_validator(archive=None, proto=None):
1232    def validator(text):
1233        try:
1234            loc = Location(text)
1235        except ValueError as err:
1236            raise argparse.ArgumentTypeError(str(err)) from None
1237        if archive is True and not loc.archive:
1238            raise argparse.ArgumentTypeError('"%s": No archive specified' % text)
1239        elif archive is False and loc.archive:
1240            raise argparse.ArgumentTypeError('"%s": No archive can be specified' % text)
1241        if proto is not None and loc.proto != proto:
1242            if proto == 'file':
1243                raise argparse.ArgumentTypeError('"%s": Repository must be local' % text)
1244            else:
1245                raise argparse.ArgumentTypeError('"%s": Repository must be remote' % text)
1246        return loc
1247    return validator
1248
1249
1250def archivename_validator():
1251    def validator(text):
1252        text = replace_placeholders(text)
1253        if '/' in text or '::' in text or not text:
1254            raise argparse.ArgumentTypeError('Invalid archive name: "%s"' % text)
1255        return text
1256    return validator
1257
1258
1259def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
1260    for key in keys:
1261        if isinstance(d.get(key), bytes):
1262            d[key] = d[key].decode(encoding, errors)
1263    return d
1264
1265
1266def prepare_dump_dict(d):
1267    def decode_bytes(value):
1268        # this should somehow be reversible later, but usual strings should
1269        # look nice and chunk ids should mostly show in hex. Use a special
1270        # inband signaling character (ASCII DEL) to distinguish between
1271        # decoded and hex mode.
1272        if not value.startswith(b'\x7f'):
1273            try:
1274                value = value.decode()
1275                return value
1276            except UnicodeDecodeError:
1277                pass
1278        return '\u007f' + bin_to_hex(value)
1279
1280    def decode_tuple(t):
1281        res = []
1282        for value in t:
1283            if isinstance(value, dict):
1284                value = decode(value)
1285            elif isinstance(value, tuple) or isinstance(value, list):
1286                value = decode_tuple(value)
1287            elif isinstance(value, bytes):
1288                value = decode_bytes(value)
1289            res.append(value)
1290        return res
1291
1292    def decode(d):
1293        res = collections.OrderedDict()
1294        for key, value in d.items():
1295            if isinstance(value, dict):
1296                value = decode(value)
1297            elif isinstance(value, (tuple, list)):
1298                value = decode_tuple(value)
1299            elif isinstance(value, bytes):
1300                value = decode_bytes(value)
1301            if isinstance(key, bytes):
1302                key = key.decode()
1303            res[key] = value
1304        return res
1305
1306    return decode(d)
1307
1308
1309def remove_surrogates(s, errors='replace'):
1310    """Replace surrogates generated by fsdecode with '?'
1311    """
1312    return s.encode('utf-8', errors).decode('utf-8')
1313
1314
1315_safe_re = re.compile(r'^((\.\.)?/+)+')
1316
1317
1318def make_path_safe(path):
1319    """Make path safe by making it relative and local
1320    """
1321    return _safe_re.sub('', path) or '.'
1322
1323
1324def daemonize():
1325    """Detach process from controlling terminal and run in background
1326
1327    Returns: old and new get_process_id tuples
1328    """
1329    from .platform import get_process_id
1330    old_id = get_process_id()
1331    pid = os.fork()
1332    if pid:
1333        os._exit(0)
1334    os.setsid()
1335    pid = os.fork()
1336    if pid:
1337        os._exit(0)
1338    os.chdir('/')
1339    os.close(0)
1340    os.close(1)
1341    os.close(2)
1342    fd = os.open(os.devnull, os.O_RDWR)
1343    os.dup2(fd, 0)
1344    os.dup2(fd, 1)
1345    os.dup2(fd, 2)
1346    new_id = get_process_id()
1347    return old_id, new_id
1348
1349
1350class StableDict(dict):
1351    """A dict subclass with stable items() ordering"""
1352    def items(self):
1353        return sorted(super().items())
1354
1355
1356def bigint_to_int(mtime):
1357    """Convert bytearray to int
1358    """
1359    if isinstance(mtime, bytes):
1360        return int.from_bytes(mtime, 'little', signed=True)
1361    return mtime
1362
1363
1364def int_to_bigint(value):
1365    """Convert integers larger than 64 bits to bytearray
1366
1367    Smaller integers are left alone
1368    """
1369    if value.bit_length() > 63:
1370        return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
1371    return value
1372
1373
1374def is_slow_msgpack():
1375    return msgpack.Packer is msgpack_fallback.Packer
1376
1377
1378def is_supported_msgpack():
1379    # DO NOT CHANGE OR REMOVE! See also requirements and comments in setup.py.
1380    v = msgpack.version[:3]
1381    return (0, 4, 6) <= v <= (0, 5, 6) and \
1382           v not in [(0, 5, 0), (0, 5, 2), (0, 5, 3), (0, 5, 5)]
1383
1384
1385FALSISH = ('No', 'NO', 'no', 'N', 'n', '0', )
1386TRUISH = ('Yes', 'YES', 'yes', 'Y', 'y', '1', )
1387DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', )
1388
1389
1390def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
1391        retry_msg=None, invalid_msg=None, env_msg='{} (from {})',
1392        falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH,
1393        default=False, retry=True, env_var_override=None, ofile=None, input=input, prompt=True,
1394        msgid=None):
1395    """Output <msg> (usually a question) and let user input an answer.
1396    Qualifies the answer according to falsish, truish and defaultish as True, False or <default>.
1397    If it didn't qualify and retry is False (no retries wanted), return the default [which
1398    defaults to False]. If retry is True let user retry answering until answer is qualified.
1399
1400    If env_var_override is given and this var is present in the environment, do not ask
1401    the user, but just use the env var contents as answer as if it was typed in.
1402    Otherwise read input from stdin and proceed as normal.
1403    If EOF is received instead an input or an invalid input without retry possibility,
1404    return default.
1405
1406    :param msg: introducing message to output on ofile, no \n is added [None]
1407    :param retry_msg: retry message to output on ofile, no \n is added [None]
1408    :param false_msg: message to output before returning False [None]
1409    :param true_msg: message to output before returning True [None]
1410    :param default_msg: message to output before returning a <default> [None]
1411    :param invalid_msg: message to output after a invalid answer was given [None]
1412    :param env_msg: message to output when using input from env_var_override ['{} (from {})'],
1413           needs to have 2 placeholders for answer and env var name
1414    :param falsish: sequence of answers qualifying as False
1415    :param truish: sequence of answers qualifying as True
1416    :param defaultish: sequence of answers qualifying as <default>
1417    :param default: default return value (defaultish answer was given or no-answer condition) [False]
1418    :param retry: if True and input is incorrect, retry. Otherwise return default. [True]
1419    :param env_var_override: environment variable name [None]
1420    :param ofile: output stream [sys.stderr]
1421    :param input: input function [input from builtins]
1422    :return: boolean answer value, True or False
1423    """
1424    def output(msg, msg_type, is_prompt=False, **kwargs):
1425        json_output = getattr(logging.getLogger('borg'), 'json', False)
1426        if json_output:
1427            kwargs.update(dict(
1428                type='question_%s' % msg_type,
1429                msgid=msgid,
1430                message=msg,
1431            ))
1432            print(json.dumps(kwargs), file=sys.stderr)
1433        else:
1434            if is_prompt:
1435                print(msg, file=ofile, end='', flush=True)
1436            else:
1437                print(msg, file=ofile)
1438
1439    msgid = msgid or env_var_override
1440    # note: we do not assign sys.stderr as default above, so it is
1441    # really evaluated NOW,  not at function definition time.
1442    if ofile is None:
1443        ofile = sys.stderr
1444    if default not in (True, False):
1445        raise ValueError("invalid default value, must be True or False")
1446    if msg:
1447        output(msg, 'prompt', is_prompt=True)
1448    while True:
1449        answer = None
1450        if env_var_override:
1451            answer = os.environ.get(env_var_override)
1452            if answer is not None and env_msg:
1453                output(env_msg.format(answer, env_var_override), 'env_answer', env_var=env_var_override)
1454        if answer is None:
1455            if not prompt:
1456                return default
1457            try:
1458                answer = input()
1459            except EOFError:
1460                # avoid defaultish[0], defaultish could be empty
1461                answer = truish[0] if default else falsish[0]
1462        if answer in defaultish:
1463            if default_msg:
1464                output(default_msg, 'accepted_default')
1465            return default
1466        if answer in truish:
1467            if true_msg:
1468                output(true_msg, 'accepted_true')
1469            return True
1470        if answer in falsish:
1471            if false_msg:
1472                output(false_msg, 'accepted_false')
1473            return False
1474        # if we get here, the answer was invalid
1475        if invalid_msg:
1476            output(invalid_msg, 'invalid_answer')
1477        if not retry:
1478            return default
1479        if retry_msg:
1480            output(retry_msg, 'prompt_retry', is_prompt=True)
1481        # in case we used an environment variable and it gave an invalid answer, do not use it again:
1482        env_var_override = None
1483
1484
1485def hostname_is_unique():
1486    return yes(env_var_override='BORG_HOSTNAME_IS_UNIQUE', prompt=False, env_msg=None, default=True)
1487
1488
1489def ellipsis_truncate(msg, space):
1490    """
1491    shorten a long string by adding ellipsis between it and return it, example:
1492    this_is_a_very_long_string -------> this_is..._string
1493    """
1494    from .platform import swidth
1495    ellipsis_width = swidth('...')
1496    msg_width = swidth(msg)
1497    if space < 8:
1498        # if there is very little space, just show ...
1499        return '...' + ' ' * (space - ellipsis_width)
1500    if space < ellipsis_width + msg_width:
1501        return '%s...%s' % (swidth_slice(msg, space // 2 - ellipsis_width),
1502                            swidth_slice(msg, -space // 2))
1503    return msg + ' ' * (space - msg_width)
1504
1505
1506class ProgressIndicatorBase:
1507    LOGGER = 'borg.output.progress'
1508    JSON_TYPE = None
1509    json = False
1510
1511    operation_id_counter = 0
1512
1513    @classmethod
1514    def operation_id(cls):
1515        """Unique number, can be used by receiving applications to distinguish different operations."""
1516        cls.operation_id_counter += 1
1517        return cls.operation_id_counter
1518
1519    def __init__(self, msgid=None):
1520        self.handler = None
1521        self.logger = logging.getLogger(self.LOGGER)
1522        self.id = self.operation_id()
1523        self.msgid = msgid
1524
1525        # If there are no handlers, set one up explicitly because the
1526        # terminator and propagation needs to be set.  If there are,
1527        # they must have been set up by BORG_LOGGING_CONF: skip setup.
1528        if not self.logger.handlers:
1529            self.handler = logging.StreamHandler(stream=sys.stderr)
1530            self.handler.setLevel(logging.INFO)
1531            logger = logging.getLogger('borg')
1532            # Some special attributes on the borg logger, created by setup_logging
1533            # But also be able to work without that
1534            try:
1535                formatter = logger.formatter
1536                terminator = '\n' if logger.json else '\r'
1537                self.json = logger.json
1538            except AttributeError:
1539                terminator = '\r'
1540            else:
1541                self.handler.setFormatter(formatter)
1542            self.handler.terminator = terminator
1543
1544            self.logger.addHandler(self.handler)
1545            if self.logger.level == logging.NOTSET:
1546                self.logger.setLevel(logging.WARN)
1547            self.logger.propagate = False
1548
1549        # If --progress is not set then the progress logger level will be WARN
1550        # due to setup_implied_logging (it may be NOTSET with a logging config file,
1551        # but the interactions there are generally unclear), so self.emit becomes
1552        # False, which is correct.
1553        # If --progress is set then the level will be INFO as per setup_implied_logging;
1554        # note that this is always the case for serve processes due to a "args.progress |= is_serve".
1555        # In this case self.emit is True.
1556        self.emit = self.logger.getEffectiveLevel() == logging.INFO
1557
1558    def __del__(self):
1559        if self.handler is not None:
1560            self.logger.removeHandler(self.handler)
1561            self.handler.close()
1562
1563    def output_json(self, *, finished=False, **kwargs):
1564        assert self.json
1565        if not self.emit:
1566            return
1567        kwargs.update(dict(
1568            operation=self.id,
1569            msgid=self.msgid,
1570            type=self.JSON_TYPE,
1571            finished=finished,
1572            time=time.time(),
1573        ))
1574        print(json.dumps(kwargs), file=sys.stderr, flush=True)
1575
1576    def finish(self):
1577        if self.json:
1578            self.output_json(finished=True)
1579        else:
1580            self.output('')
1581
1582
1583def justify_to_terminal_size(message):
1584    terminal_space = get_terminal_size(fallback=(-1, -1))[0]
1585    # justify only if we are outputting to a terminal
1586    if terminal_space != -1:
1587        return message.ljust(terminal_space)
1588    return message
1589
1590
1591class ProgressIndicatorMessage(ProgressIndicatorBase):
1592    JSON_TYPE = 'progress_message'
1593
1594    def output(self, msg):
1595        if self.json:
1596            self.output_json(message=msg)
1597        else:
1598            self.logger.info(justify_to_terminal_size(msg))
1599
1600
1601class ProgressIndicatorPercent(ProgressIndicatorBase):
1602    JSON_TYPE = 'progress_percent'
1603
1604    def __init__(self, total=0, step=5, start=0, msg="%3.0f%%", msgid=None):
1605        """
1606        Percentage-based progress indicator
1607
1608        :param total: total amount of items
1609        :param step: step size in percent
1610        :param start: at which percent value to start
1611        :param msg: output message, must contain one %f placeholder for the percentage
1612        """
1613        self.counter = 0  # 0 .. (total-1)
1614        self.total = total
1615        self.trigger_at = start  # output next percentage value when reaching (at least) this
1616        self.step = step
1617        self.msg = msg
1618
1619        super().__init__(msgid=msgid)
1620
1621    def progress(self, current=None, increase=1):
1622        if current is not None:
1623            self.counter = current
1624        pct = self.counter * 100 / self.total
1625        self.counter += increase
1626        if pct >= self.trigger_at:
1627            self.trigger_at += self.step
1628            return pct
1629
1630    def show(self, current=None, increase=1, info=None):
1631        """
1632        Show and output the progress message
1633
1634        :param current: set the current percentage [None]
1635        :param increase: increase the current percentage [None]
1636        :param info: array of strings to be formatted with msg [None]
1637        """
1638        pct = self.progress(current, increase)
1639        if pct is not None:
1640            # truncate the last argument, if no space is available
1641            if info is not None:
1642                if not self.json:
1643                    # no need to truncate if we're not outputting to a terminal
1644                    terminal_space = get_terminal_size(fallback=(-1, -1))[0]
1645                    if terminal_space != -1:
1646                        space = terminal_space - len(self.msg % tuple([pct] + info[:-1] + ['']))
1647                        info[-1] = ellipsis_truncate(info[-1], space)
1648                return self.output(self.msg % tuple([pct] + info), justify=False, info=info)
1649
1650            return self.output(self.msg % pct)
1651
1652    def output(self, message, justify=True, info=None):
1653        if self.json:
1654            self.output_json(message=message, current=self.counter, total=self.total, info=info)
1655        else:
1656            if justify:
1657                message = justify_to_terminal_size(message)
1658            self.logger.info(message)
1659
1660
1661class ProgressIndicatorEndless:
1662    def __init__(self, step=10, file=None):
1663        """
1664        Progress indicator (long row of dots)
1665
1666        :param step: every Nth call, call the func
1667        :param file: output file, default: sys.stderr
1668        """
1669        self.counter = 0  # call counter
1670        self.triggered = 0  # increases 1 per trigger event
1671        self.step = step  # trigger every <step> calls
1672        if file is None:
1673            file = sys.stderr
1674        self.file = file
1675
1676    def progress(self):
1677        self.counter += 1
1678        trigger = self.counter % self.step == 0
1679        if trigger:
1680            self.triggered += 1
1681        return trigger
1682
1683    def show(self):
1684        trigger = self.progress()
1685        if trigger:
1686            return self.output(self.triggered)
1687
1688    def output(self, triggered):
1689        print('.', end='', file=self.file, flush=True)
1690
1691    def finish(self):
1692        print(file=self.file)
1693
1694
1695def sysinfo():
1696    show_sysinfo = os.environ.get('BORG_SHOW_SYSINFO', 'yes').lower()
1697    if show_sysinfo == 'no':
1698        return ''
1699
1700    python_implementation = platform.python_implementation()
1701    python_version = platform.python_version()
1702    # platform.uname() does a shell call internally to get processor info,
1703    # creating #3732 issue, so rather use os.uname().
1704    try:
1705        uname = os.uname()
1706    except AttributeError:
1707        uname = None
1708    if sys.platform.startswith('linux'):
1709        try:
1710            linux_distribution = platform.linux_distribution()
1711        except:
1712            # platform.linux_distribution() is deprecated since py 3.5 and removed in 3.7.
1713            linux_distribution = ('Unknown Linux', '', '')
1714    else:
1715        linux_distribution = None
1716    try:
1717        msgpack_version = '.'.join(str(v) for v in msgpack.version)
1718    except:
1719        msgpack_version = 'unknown'
1720    info = []
1721    if uname is not None:
1722        info.append('Platform: %s' % (' '.join(uname), ))
1723    if linux_distribution is not None:
1724        info.append('Linux: %s %s %s' % linux_distribution)
1725    info.append('Borg: %s  Python: %s %s msgpack: %s' % (
1726                borg_version, python_implementation, python_version, msgpack_version))
1727    info.append('PID: %d  CWD: %s' % (os.getpid(), os.getcwd()))
1728    info.append('sys.argv: %r' % sys.argv)
1729    info.append('SSH_ORIGINAL_COMMAND: %r' % os.environ.get('SSH_ORIGINAL_COMMAND'))
1730    info.append('')
1731    return '\n'.join(info)
1732
1733
1734def log_multi(*msgs, level=logging.INFO, logger=logger):
1735    """
1736    log multiple lines of text, each line by a separate logging call for cosmetic reasons
1737
1738    each positional argument may be a single or multiple lines (separated by newlines) of text.
1739    """
1740    lines = []
1741    for msg in msgs:
1742        lines.extend(msg.splitlines())
1743    for line in lines:
1744        logger.log(level, line)
1745
1746
1747class BaseFormatter:
1748    FIXED_KEYS = {
1749        # Formatting aids
1750        'LF': '\n',
1751        'SPACE': ' ',
1752        'TAB': '\t',
1753        'CR': '\r',
1754        'NUL': '\0',
1755        'NEWLINE': os.linesep,
1756        'NL': os.linesep,
1757    }
1758
1759    def get_item_data(self, item):
1760        raise NotImplementedError
1761
1762    def format_item(self, item):
1763        return self.format.format_map(self.get_item_data(item))
1764
1765    @staticmethod
1766    def keys_help():
1767        return "- NEWLINE: OS dependent line separator\n" \
1768               "- NL: alias of NEWLINE\n" \
1769               "- NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath\n" \
1770               "- SPACE\n" \
1771               "- TAB\n" \
1772               "- CR\n" \
1773               "- LF"
1774
1775
1776class ArchiveFormatter(BaseFormatter):
1777    KEY_DESCRIPTIONS = {
1778        'archive': 'archive name interpreted as text (might be missing non-text characters, see barchive)',
1779        'name': 'alias of "archive"',
1780        'barchive': 'verbatim archive name, can contain any character except NUL',
1781        'comment': 'archive comment interpreted as text (might be missing non-text characters, see bcomment)',
1782        'bcomment': 'verbatim archive comment, can contain any character except NUL',
1783        # *start* is the key used by borg-info for this timestamp, this makes the formats more compatible
1784        'start': 'time (start) of creation of the archive',
1785        'time': 'alias of "start"',
1786        'end': 'time (end) of creation of the archive',
1787        'id': 'internal ID of the archive',
1788        'hostname': 'hostname of host on which this archive was created',
1789        'username': 'username of user who created this archive',
1790    }
1791    KEY_GROUPS = (
1792        ('archive', 'name', 'barchive', 'comment', 'bcomment', 'id'),
1793        ('start', 'time', 'end'),
1794        ('hostname', 'username'),
1795    )
1796
1797    @classmethod
1798    def available_keys(cls):
1799        fake_archive_info = ArchiveInfo('archivename', b'\1'*32, datetime(1970, 1, 1, tzinfo=timezone.utc))
1800        formatter = cls('', None, None, None)
1801        keys = []
1802        keys.extend(formatter.call_keys.keys())
1803        keys.extend(formatter.get_item_data(fake_archive_info).keys())
1804        return keys
1805
1806    @classmethod
1807    def keys_help(cls):
1808        help = []
1809        keys = cls.available_keys()
1810        for key in cls.FIXED_KEYS:
1811            keys.remove(key)
1812
1813        for group in cls.KEY_GROUPS:
1814            for key in group:
1815                keys.remove(key)
1816                text = "- " + key
1817                if key in cls.KEY_DESCRIPTIONS:
1818                    text += ": " + cls.KEY_DESCRIPTIONS[key]
1819                help.append(text)
1820            help.append("")
1821        assert not keys, str(keys)
1822        return "\n".join(help)
1823
1824    def __init__(self, format, repository, manifest, key, *, json=False):
1825        self.repository = repository
1826        self.manifest = manifest
1827        self.key = key
1828        self.name = None
1829        self.id = None
1830        self._archive = None
1831        self.json = json
1832        static_keys = {}  # here could be stuff on repo level, above archive level
1833        static_keys.update(self.FIXED_KEYS)
1834        self.format = partial_format(format, static_keys)
1835        self.format_keys = {f[1] for f in Formatter().parse(format)}
1836        self.call_keys = {
1837            'hostname': partial(self.get_meta, 'hostname', rs=True),
1838            'username': partial(self.get_meta, 'username', rs=True),
1839            'comment': partial(self.get_meta, 'comment', rs=True),
1840            'bcomment': partial(self.get_meta, 'comment', rs=False),
1841            'end': self.get_ts_end,
1842        }
1843        self.used_call_keys = set(self.call_keys) & self.format_keys
1844        if self.json:
1845            self.item_data = {}
1846            self.format_item = self.format_item_json
1847        else:
1848            self.item_data = static_keys
1849
1850    def format_item_json(self, item):
1851        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n'
1852
1853    def get_item_data(self, archive_info):
1854        self.name = archive_info.name
1855        self.id = archive_info.id
1856        item_data = {}
1857        item_data.update(self.item_data)
1858        item_data.update({
1859            'name': remove_surrogates(archive_info.name),
1860            'archive': remove_surrogates(archive_info.name),
1861            'barchive': archive_info.name,
1862            'id': bin_to_hex(archive_info.id),
1863            'time': self.format_time(archive_info.ts),
1864            'start': self.format_time(archive_info.ts),
1865        })
1866        for key in self.used_call_keys:
1867            item_data[key] = self.call_keys[key]()
1868        return item_data
1869
1870    @property
1871    def archive(self):
1872        """lazy load / update loaded archive"""
1873        if self._archive is None or self._archive.id != self.id:
1874            from .archive import Archive
1875            self._archive = Archive(self.repository, self.key, self.manifest, self.name)
1876        return self._archive
1877
1878    def get_meta(self, key, rs):
1879        value = self.archive.metadata.get(key, '')
1880        return remove_surrogates(value) if rs else value
1881
1882    def get_ts_end(self):
1883        return self.format_time(self.archive.ts_end)
1884
1885    def format_time(self, ts):
1886        return OutputTimestamp(ts)
1887
1888
1889class ItemFormatter(BaseFormatter):
1890    KEY_DESCRIPTIONS = {
1891        'bpath': 'verbatim POSIX path, can contain any character except NUL',
1892        'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
1893        'source': 'link target for links (identical to linktarget)',
1894        'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
1895        'csize': 'compressed size',
1896        'dsize': 'deduplicated size',
1897        'dcsize': 'deduplicated compressed size',
1898        'num_chunks': 'number of chunks in this file',
1899        'unique_chunks': 'number of unique chunks in this file',
1900        'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
1901    }
1902    KEY_GROUPS = (
1903        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
1904        ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'),
1905        ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
1906        tuple(sorted(hashlib.algorithms_guaranteed)),
1907        ('archiveid', 'archivename', 'extra'),
1908        ('health', )
1909    )
1910
1911    KEYS_REQUIRING_CACHE = (
1912        'dsize', 'dcsize', 'unique_chunks',
1913    )
1914
1915    @classmethod
1916    def available_keys(cls):
1917        class FakeArchive:
1918            fpr = name = ""
1919
1920        from .item import Item
1921        fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0)
1922        formatter = cls(FakeArchive, "")
1923        keys = []
1924        keys.extend(formatter.call_keys.keys())
1925        keys.extend(formatter.get_item_data(fake_item).keys())
1926        return keys
1927
1928    @classmethod
1929    def keys_help(cls):
1930        help = []
1931        keys = cls.available_keys()
1932        for key in cls.FIXED_KEYS:
1933            keys.remove(key)
1934
1935        for group in cls.KEY_GROUPS:
1936            for key in group:
1937                keys.remove(key)
1938                text = "- " + key
1939                if key in cls.KEY_DESCRIPTIONS:
1940                    text += ": " + cls.KEY_DESCRIPTIONS[key]
1941                help.append(text)
1942            help.append("")
1943        assert not keys, str(keys)
1944        return "\n".join(help)
1945
1946    @classmethod
1947    def format_needs_cache(cls, format):
1948        format_keys = {f[1] for f in Formatter().parse(format)}
1949        return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
1950
1951    def __init__(self, archive, format, *, json_lines=False):
1952        self.archive = archive
1953        self.json_lines = json_lines
1954        static_keys = {
1955            'archivename': archive.name,
1956            'archiveid': archive.fpr,
1957        }
1958        static_keys.update(self.FIXED_KEYS)
1959        if self.json_lines:
1960            self.item_data = {}
1961            self.format_item = self.format_item_json
1962        else:
1963            self.item_data = static_keys
1964        self.format = partial_format(format, static_keys)
1965        self.format_keys = {f[1] for f in Formatter().parse(format)}
1966        self.call_keys = {
1967            'size': self.calculate_size,
1968            'csize': self.calculate_csize,
1969            'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size),
1970            'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize),
1971            'num_chunks': self.calculate_num_chunks,
1972            'unique_chunks': partial(self.sum_unique_chunks_metadata, lambda chunk: 1),
1973            'isomtime': partial(self.format_iso_time, 'mtime'),
1974            'isoctime': partial(self.format_iso_time, 'ctime'),
1975            'isoatime': partial(self.format_iso_time, 'atime'),
1976            'mtime': partial(self.format_time, 'mtime'),
1977            'ctime': partial(self.format_time, 'ctime'),
1978            'atime': partial(self.format_time, 'atime'),
1979        }
1980        for hash_function in hashlib.algorithms_guaranteed:
1981            self.add_key(hash_function, partial(self.hash_item, hash_function))
1982        self.used_call_keys = set(self.call_keys) & self.format_keys
1983
1984    def format_item_json(self, item):
1985        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n'
1986
1987    def add_key(self, key, callable_with_item):
1988        self.call_keys[key] = callable_with_item
1989        self.used_call_keys = set(self.call_keys) & self.format_keys
1990
1991    def get_item_data(self, item):
1992        item_data = {}
1993        item_data.update(self.item_data)
1994        mode = stat.filemode(item.mode)
1995        item_type = mode[0]
1996
1997        source = item.get('source', '')
1998        extra = ''
1999        if source:
2000            source = remove_surrogates(source)
2001            if item_type == 'l':
2002                extra = ' -> %s' % source
2003            else:
2004                mode = 'h' + mode[1:]
2005                extra = ' link to %s' % source
2006        item_data['type'] = item_type
2007        item_data['mode'] = mode
2008        item_data['user'] = item.user or item.uid
2009        item_data['group'] = item.group or item.gid
2010        item_data['uid'] = item.uid
2011        item_data['gid'] = item.gid
2012        item_data['path'] = remove_surrogates(item.path)
2013        if self.json_lines:
2014            item_data['healthy'] = 'chunks_healthy' not in item
2015        else:
2016            item_data['bpath'] = item.path
2017            item_data['extra'] = extra
2018            item_data['health'] = 'broken' if 'chunks_healthy' in item else 'healthy'
2019        item_data['source'] = source
2020        item_data['linktarget'] = source
2021        item_data['flags'] = item.get('bsdflags')
2022        for key in self.used_call_keys:
2023            item_data[key] = self.call_keys[key](item)
2024        return item_data
2025
2026    def sum_unique_chunks_metadata(self, metadata_func, item):
2027        """
2028        sum unique chunks metadata, a unique chunk is a chunk which is referenced globally as often as it is in the
2029        item
2030
2031        item: The item to sum its unique chunks' metadata
2032        metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
2033                       the metadata needed from the chunk
2034        """
2035        chunk_index = self.archive.cache.chunks
2036        chunks = item.get('chunks', [])
2037        chunks_counter = Counter(c.id for c in chunks)
2038        return sum(metadata_func(c) for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id])
2039
2040    def calculate_num_chunks(self, item):
2041        return len(item.get('chunks', []))
2042
2043    def calculate_size(self, item):
2044        # note: does not support hardlink slaves, they will be size 0
2045        return item.get_size(compressed=False)
2046
2047    def calculate_csize(self, item):
2048        # note: does not support hardlink slaves, they will be csize 0
2049        return item.get_size(compressed=True)
2050
2051    def hash_item(self, hash_function, item):
2052        if 'chunks' not in item:
2053            return ""
2054        hash = hashlib.new(hash_function)
2055        for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]):
2056            hash.update(data)
2057        return hash.hexdigest()
2058
2059    def format_time(self, key, item):
2060        return OutputTimestamp(safe_timestamp(item.get(key) or item.mtime))
2061
2062    def format_iso_time(self, key, item):
2063        return self.format_time(key, item).isoformat()
2064
2065
2066class ChunkIteratorFileWrapper:
2067    """File-like wrapper for chunk iterators"""
2068
2069    def __init__(self, chunk_iterator, read_callback=None):
2070        """
2071        *chunk_iterator* should be an iterator yielding bytes. These will be buffered
2072        internally as necessary to satisfy .read() calls.
2073
2074        *read_callback* will be called with one argument, some byte string that has
2075        just been read and will be subsequently returned to a caller of .read().
2076        It can be used to update a progress display.
2077        """
2078        self.chunk_iterator = chunk_iterator
2079        self.chunk_offset = 0
2080        self.chunk = b''
2081        self.exhausted = False
2082        self.read_callback = read_callback
2083
2084    def _refill(self):
2085        remaining = len(self.chunk) - self.chunk_offset
2086        if not remaining:
2087            try:
2088                chunk = next(self.chunk_iterator)
2089                self.chunk = memoryview(chunk)
2090            except StopIteration:
2091                self.exhausted = True
2092                return 0  # EOF
2093            self.chunk_offset = 0
2094            remaining = len(self.chunk)
2095        return remaining
2096
2097    def _read(self, nbytes):
2098        if not nbytes:
2099            return b''
2100        remaining = self._refill()
2101        will_read = min(remaining, nbytes)
2102        self.chunk_offset += will_read
2103        return self.chunk[self.chunk_offset - will_read:self.chunk_offset]
2104
2105    def read(self, nbytes):
2106        parts = []
2107        while nbytes and not self.exhausted:
2108            read_data = self._read(nbytes)
2109            nbytes -= len(read_data)
2110            parts.append(read_data)
2111            if self.read_callback:
2112                self.read_callback(read_data)
2113        return b''.join(parts)
2114
2115
2116def open_item(archive, item):
2117    """Return file-like object for archived item (with chunks)."""
2118    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks])
2119    return ChunkIteratorFileWrapper(chunk_iterator)
2120
2121
2122def file_status(mode):
2123    if stat.S_ISREG(mode):
2124        return 'A'
2125    elif stat.S_ISDIR(mode):
2126        return 'd'
2127    elif stat.S_ISBLK(mode):
2128        return 'b'
2129    elif stat.S_ISCHR(mode):
2130        return 'c'
2131    elif stat.S_ISLNK(mode):
2132        return 's'
2133    elif stat.S_ISFIFO(mode):
2134        return 'f'
2135    return '?'
2136
2137
2138def hardlinkable(mode):
2139    """return True if we support hardlinked items of this type"""
2140    return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
2141
2142
2143def chunkit(it, size):
2144    """
2145    Chunk an iterator <it> into pieces of <size>.
2146
2147    >>> list(chunker('ABCDEFG', 3))
2148    [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]
2149    """
2150    iterable = iter(it)
2151    return iter(lambda: list(islice(iterable, size)), [])
2152
2153
2154def consume(iterator, n=None):
2155    """Advance the iterator n-steps ahead. If n is none, consume entirely."""
2156    # Use functions that consume iterators at C speed.
2157    if n is None:
2158        # feed the entire iterator into a zero-length deque
2159        deque(iterator, maxlen=0)
2160    else:
2161        # advance to the empty slice starting at position n
2162        next(islice(iterator, n, n), None)
2163
2164
2165def scandir_keyfunc(dirent):
2166    try:
2167        return (0, dirent.inode())
2168    except OSError as e:
2169        # maybe a permission denied error while doing a stat() on the dirent
2170        logger.debug('scandir_inorder: Unable to stat %s: %s', dirent.path, e)
2171        # order this dirent after all the others lexically by file name
2172        # we may not break the whole scandir just because of an exception in one dirent
2173        # ignore the exception for now, since another stat will be done later anyways
2174        # (or the entry will be skipped by an exclude pattern)
2175        return (1, dirent.name)
2176
2177
2178def scandir_inorder(path='.'):
2179    return sorted(scandir(path), key=scandir_keyfunc)
2180
2181
2182def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True):
2183    """
2184    clean lines (usually read from a config file):
2185
2186    1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments.
2187
2188    note: only "pure comment lines" are supported, no support for "trailing comments".
2189
2190    :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines
2191    :param lstrip: lstrip call arguments or False, if lstripping is not desired
2192    :param rstrip: rstrip call arguments or False, if rstripping is not desired
2193    :param remove_comments: remove comment lines (lines starting with "#")
2194    :param remove_empty: remove empty lines
2195    :return: yields processed lines
2196    """
2197    for line in lines:
2198        if lstrip is not False:
2199            line = line.lstrip(lstrip)
2200        if rstrip is not False:
2201            line = line.rstrip(rstrip)
2202        if remove_empty and not line:
2203            continue
2204        if remove_comments and line.startswith('#'):
2205            continue
2206        yield line
2207
2208
2209class ErrorIgnoringTextIOWrapper(io.TextIOWrapper):
2210    def read(self, n):
2211        if not self.closed:
2212            try:
2213                return super().read(n)
2214            except BrokenPipeError:
2215                try:
2216                    super().close()
2217                except OSError:
2218                    pass
2219        return ''
2220
2221    def write(self, s):
2222        if not self.closed:
2223            try:
2224                return super().write(s)
2225            except BrokenPipeError:
2226                try:
2227                    super().close()
2228                except OSError:
2229                    pass
2230        return len(s)
2231
2232
2233class SignalException(BaseException):
2234    """base class for all signal-based exceptions"""
2235
2236
2237class SigHup(SignalException):
2238    """raised on SIGHUP signal"""
2239
2240
2241class SigTerm(SignalException):
2242    """raised on SIGTERM signal"""
2243
2244
2245@contextlib.contextmanager
2246def signal_handler(sig, handler):
2247    """
2248    when entering context, set up signal handler <handler> for signal <sig>.
2249    when leaving context, restore original signal handler.
2250
2251    <sig> can bei either a str when giving a signal.SIGXXX attribute name (it
2252    won't crash if the attribute name does not exist as some names are platform
2253    specific) or a int, when giving a signal number.
2254
2255    <handler> is any handler value as accepted by the signal.signal(sig, handler).
2256    """
2257    if isinstance(sig, str):
2258        sig = getattr(signal, sig, None)
2259    if sig is not None:
2260        orig_handler = signal.signal(sig, handler)
2261    try:
2262        yield
2263    finally:
2264        if sig is not None:
2265            signal.signal(sig, orig_handler)
2266
2267
2268def raising_signal_handler(exc_cls):
2269    def handler(sig_no, frame):
2270        # setting SIG_IGN avoids that an incoming second signal of this
2271        # kind would raise a 2nd exception while we still process the
2272        # exception handler for exc_cls for the 1st signal.
2273        signal.signal(sig_no, signal.SIG_IGN)
2274        raise exc_cls
2275
2276    return handler
2277
2278
2279def swidth_slice(string, max_width):
2280    """
2281    Return a slice of *max_width* cells from *string*.
2282
2283    Negative *max_width* means from the end of string.
2284
2285    *max_width* is in units of character cells (or "columns").
2286    Latin characters are usually one cell wide, many CJK characters are two cells wide.
2287    """
2288    from .platform import swidth
2289    reverse = max_width < 0
2290    max_width = abs(max_width)
2291    if reverse:
2292        string = reversed(string)
2293    current_swidth = 0
2294    result = []
2295    for character in string:
2296        current_swidth += swidth(character)
2297        if current_swidth > max_width:
2298            break
2299        result.append(character)
2300    if reverse:
2301        result.reverse()
2302    return ''.join(result)
2303
2304
2305class BorgJsonEncoder(json.JSONEncoder):
2306    def default(self, o):
2307        from .repository import Repository
2308        from .remote import RemoteRepository
2309        from .archive import Archive
2310        from .cache import LocalCache, AdHocCache
2311        if isinstance(o, Repository) or isinstance(o, RemoteRepository):
2312            return {
2313                'id': bin_to_hex(o.id),
2314                'location': o._location.canonical_path(),
2315            }
2316        if isinstance(o, Archive):
2317            return o.info()
2318        if isinstance(o, LocalCache):
2319            return {
2320                'path': o.path,
2321                'stats': o.stats(),
2322            }
2323        if isinstance(o, AdHocCache):
2324            return {
2325                'stats': o.stats(),
2326            }
2327        if callable(getattr(o, 'to_json', None)):
2328            return o.to_json()
2329        return super().default(o)
2330
2331
2332def basic_json_data(manifest, *, cache=None, extra=None):
2333    key = manifest.key
2334    data = extra or {}
2335    data.update({
2336        'repository': BorgJsonEncoder().default(manifest.repository),
2337        'encryption': {
2338            'mode': key.ARG_NAME,
2339        },
2340    })
2341    data['repository']['last_modified'] = OutputTimestamp(manifest.last_timestamp.replace(tzinfo=timezone.utc))
2342    if key.NAME.startswith('key file'):
2343        data['encryption']['keyfile'] = key.find_key()
2344    if cache:
2345        data['cache'] = cache
2346    return data
2347
2348
2349def json_dump(obj):
2350    """Dump using BorgJSONEncoder."""
2351    return json.dumps(obj, sort_keys=True, indent=4, cls=BorgJsonEncoder)
2352
2353
2354def json_print(obj):
2355    print(json_dump(obj))
2356
2357
2358def secure_erase(path):
2359    """Attempt to securely erase a file by writing random data over it before deleting it."""
2360    with open(path, 'r+b') as fd:
2361        length = os.stat(fd.fileno()).st_size
2362        fd.write(os.urandom(length))
2363        fd.flush()
2364        os.fsync(fd.fileno())
2365    os.unlink(path)
2366
2367
2368def truncate_and_unlink(path):
2369    """
2370    Truncate and then unlink *path*.
2371
2372    Do not create *path* if it does not exist.
2373    Open *path* for truncation in r+b mode (=O_RDWR|O_BINARY).
2374
2375    Use this when deleting potentially large files when recovering
2376    from a VFS error such as ENOSPC. It can help a full file system
2377    recover. Refer to the "File system interaction" section
2378    in repository.py for further explanations.
2379    """
2380    try:
2381        with open(path, 'r+b') as fd:
2382            fd.truncate()
2383    except OSError as err:
2384        if err.errno != errno.ENOTSUP:
2385            raise
2386        # don't crash if the above ops are not supported.
2387    os.unlink(path)
2388
2389
2390def popen_with_error_handling(cmd_line: str, log_prefix='', **kwargs):
2391    """
2392    Handle typical errors raised by subprocess.Popen. Return None if an error occurred,
2393    otherwise return the Popen object.
2394
2395    *cmd_line* is split using shlex (e.g. 'gzip -9' => ['gzip', '-9']).
2396
2397    Log messages will be prefixed with *log_prefix*; if set, it should end with a space
2398    (e.g. log_prefix='--some-option: ').
2399
2400    Does not change the exit code.
2401    """
2402    assert not kwargs.get('shell'), 'Sorry pal, shell mode is a no-no'
2403    try:
2404        command = shlex.split(cmd_line)
2405        if not command:
2406            raise ValueError('an empty command line is not permitted')
2407    except ValueError as ve:
2408        logger.error('%s%s', log_prefix, ve)
2409        return
2410    logger.debug('%scommand line: %s', log_prefix, command)
2411    try:
2412        return subprocess.Popen(command, **kwargs)
2413    except FileNotFoundError:
2414        logger.error('%sexecutable not found: %s', log_prefix, command[0])
2415        return
2416    except PermissionError:
2417        logger.error('%spermission denied: %s', log_prefix, command[0])
2418        return
2419
2420
2421def prepare_subprocess_env(system, env=None):
2422    """
2423    Prepare the environment for a subprocess we are going to create.
2424
2425    :param system: True for preparing to invoke system-installed binaries,
2426                   False for stuff inside the pyinstaller environment (like borg, python).
2427    :param env: optionally give a environment dict here. if not given, default to os.environ.
2428    :return: a modified copy of the environment
2429    """
2430    env = dict(env if env is not None else os.environ)
2431    if system:
2432        # a pyinstaller binary's bootloader modifies LD_LIBRARY_PATH=/tmp/_MEIXXXXXX,
2433        # but we do not want that system binaries (like ssh or other) pick up
2434        # (non-matching) libraries from there.
2435        # thus we install the original LDLP, before pyinstaller has modified it:
2436        lp_key = 'LD_LIBRARY_PATH'
2437        lp_orig = env.get(lp_key + '_ORIG')
2438        if lp_orig is not None:
2439            env[lp_key] = lp_orig
2440        else:
2441            # We get here in 2 cases:
2442            # 1. when not running a pyinstaller-made binary.
2443            #    in this case, we must not kill LDLP.
2444            # 2. when running a pyinstaller-made binary and there was no LDLP
2445            #    in the original env (in this case, the pyinstaller bootloader
2446            #    does *not* put ..._ORIG into the env either).
2447            #    in this case, we must kill LDLP.
2448            #    We can recognize this via sys.frozen and sys._MEIPASS being set.
2449            lp = env.get(lp_key)
2450            if lp is not None and getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
2451                env.pop(lp_key)
2452    # security: do not give secrets to subprocess
2453    env.pop('BORG_PASSPHRASE', None)
2454    # for information, give borg version to the subprocess
2455    env['BORG_VERSION'] = borg_version
2456    return env
2457
2458
2459def dash_open(path, mode):
2460    assert '+' not in mode  # the streams are either r or w, but never both
2461    if path == '-':
2462        stream = sys.stdin if 'r' in mode else sys.stdout
2463        return stream.buffer if 'b' in mode else stream
2464    else:
2465        return open(path, mode)
2466
2467
2468def is_terminal(fd=sys.stdout):
2469    return hasattr(fd, 'isatty') and fd.isatty() and (sys.platform != 'win32' or 'ANSICON' in os.environ)
2470
2471
2472def umount(mountpoint):
2473    env = prepare_subprocess_env(system=True)
2474    try:
2475        return subprocess.call(['fusermount', '-u', mountpoint], env=env)
2476    except FileNotFoundError:
2477        return subprocess.call(['umount', mountpoint], env=env)
2478