1"""Helper functions and classes for bup."""
2
3from __future__ import absolute_import, division
4from collections import namedtuple
5from contextlib import contextmanager
6from ctypes import sizeof, c_void_p
7from math import floor
8from os import environ
9from subprocess import PIPE, Popen
10import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
11import hashlib, heapq, math, operator, time, grp, tempfile
12
13from bup import _helpers
14from bup import compat
15from bup.compat import argv_bytes, byte_int
16from bup.io import byte_stream, path_msg
17# This function should really be in helpers, not in bup.options.  But we
18# want options.py to be standalone so people can include it in other projects.
19from bup.options import _tty_width as tty_width
20
21
22class Nonlocal:
23    """Helper to deal with Python scoping issues"""
24    pass
25
26
27sc_page_size = os.sysconf('SC_PAGE_SIZE')
28assert(sc_page_size > 0)
29
30sc_arg_max = os.sysconf('SC_ARG_MAX')
31if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
32    sc_arg_max = 2 * 1024 * 1024
33
34def last(iterable):
35    result = None
36    for result in iterable:
37        pass
38    return result
39
40
41def atoi(s):
42    """Convert s (ascii bytes) to an integer. Return 0 if s is not a number."""
43    try:
44        return int(s or b'0')
45    except ValueError:
46        return 0
47
48
49def atof(s):
50    """Convert s (ascii bytes) to a float. Return 0 if s is not a number."""
51    try:
52        return float(s or b'0')
53    except ValueError:
54        return 0
55
56
57buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
58
59
60try:
61    _fdatasync = os.fdatasync
62except AttributeError:
63    _fdatasync = os.fsync
64
65if sys.platform.startswith('darwin'):
66    # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
67    import fcntl
68    def fdatasync(fd):
69        try:
70            return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
71        except IOError as e:
72            # Fallback for file systems (SMB) that do not support F_FULLFSYNC
73            if e.errno == errno.ENOTSUP:
74                return _fdatasync(fd)
75            else:
76                raise
77else:
78    fdatasync = _fdatasync
79
80
81def partition(predicate, stream):
82    """Returns (leading_matches_it, rest_it), where leading_matches_it
83    must be completely exhausted before traversing rest_it.
84
85    """
86    stream = iter(stream)
87    ns = Nonlocal()
88    ns.first_nonmatch = None
89    def leading_matches():
90        for x in stream:
91            if predicate(x):
92                yield x
93            else:
94                ns.first_nonmatch = (x,)
95                break
96    def rest():
97        if ns.first_nonmatch:
98            yield ns.first_nonmatch[0]
99            for x in stream:
100                yield x
101    return (leading_matches(), rest())
102
103
104def merge_dict(*xs):
105    result = {}
106    for x in xs:
107        result.update(x)
108    return result
109
110
111def lines_until_sentinel(f, sentinel, ex_type):
112    # sentinel must end with \n and must contain only one \n
113    while True:
114        line = f.readline()
115        if not (line and line.endswith(b'\n')):
116            raise ex_type('Hit EOF while reading line')
117        if line == sentinel:
118            return
119        yield line
120
121
122def stat_if_exists(path):
123    try:
124        return os.stat(path)
125    except OSError as e:
126        if e.errno != errno.ENOENT:
127            raise
128    return None
129
130
131# Write (blockingly) to sockets that may or may not be in blocking mode.
132# We need this because our stderr is sometimes eaten by subprocesses
133# (probably ssh) that sometimes make it nonblocking, if only temporarily,
134# leading to race conditions.  Ick.  We'll do it the hard way.
135def _hard_write(fd, buf):
136    while buf:
137        (r,w,x) = select.select([], [fd], [], None)
138        if not w:
139            raise IOError('select(fd) returned without being writable')
140        try:
141            sz = os.write(fd, buf)
142        except OSError as e:
143            if e.errno != errno.EAGAIN:
144                raise
145        assert(sz >= 0)
146        buf = buf[sz:]
147
148
149_last_prog = 0
150def log(s):
151    """Print a log message to stderr."""
152    global _last_prog
153    sys.stdout.flush()
154    _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
155    _last_prog = 0
156
157
158def debug1(s):
159    if buglvl >= 1:
160        log(s)
161
162
163def debug2(s):
164    if buglvl >= 2:
165        log(s)
166
167
168istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
169istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
170_last_progress = ''
171def progress(s):
172    """Calls log() if stderr is a TTY.  Does nothing otherwise."""
173    global _last_progress
174    if istty2:
175        log(s)
176        _last_progress = s
177
178
179def qprogress(s):
180    """Calls progress() only if we haven't printed progress in a while.
181
182    This avoids overloading the stderr buffer with excess junk.
183    """
184    global _last_prog
185    now = time.time()
186    if now - _last_prog > 0.1:
187        progress(s)
188        _last_prog = now
189
190
191def reprogress():
192    """Calls progress() to redisplay the most recent progress message.
193
194    Useful after you've printed some other message that wipes out the
195    progress line.
196    """
197    if _last_progress and _last_progress.endswith('\r'):
198        progress(_last_progress)
199
200
201def mkdirp(d, mode=None):
202    """Recursively create directories on path 'd'.
203
204    Unlike os.makedirs(), it doesn't raise an exception if the last element of
205    the path already exists.
206    """
207    try:
208        if mode:
209            os.makedirs(d, mode)
210        else:
211            os.makedirs(d)
212    except OSError as e:
213        if e.errno == errno.EEXIST:
214            pass
215        else:
216            raise
217
218
219class MergeIterItem:
220    def __init__(self, entry, read_it):
221        self.entry = entry
222        self.read_it = read_it
223    def __lt__(self, x):
224        return self.entry < x.entry
225
226def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
227    if key:
228        samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
229    else:
230        samekey = operator.eq
231    count = 0
232    total = sum(len(it) for it in iters)
233    iters = (iter(it) for it in iters)
234    heap = ((next(it, None),it) for it in iters)
235    heap = [MergeIterItem(e, it) for e, it in heap if e]
236
237    heapq.heapify(heap)
238    pe = None
239    while heap:
240        if not count % pfreq:
241            pfunc(count, total)
242        e, it = heap[0].entry, heap[0].read_it
243        if not samekey(e, pe):
244            pe = e
245            yield e
246        count += 1
247        try:
248            e = next(it)
249        except StopIteration:
250            heapq.heappop(heap) # remove current
251        else:
252            # shift current to new location
253            heapq.heapreplace(heap, MergeIterItem(e, it))
254    pfinal(count, total)
255
256
257def unlink(f):
258    """Delete a file at path 'f' if it currently exists.
259
260    Unlike os.unlink(), does not throw an exception if the file didn't already
261    exist.
262    """
263    try:
264        os.unlink(f)
265    except OSError as e:
266        if e.errno != errno.ENOENT:
267            raise
268
269
270_bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
271_sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$')
272
273def bquote(x):
274    if x == b'':
275        return b"''"
276    if _bq_simple_id_rx.match(x):
277        return x
278    return b"'%s'" % x.replace(b"'", b"'\"'\"'")
279
280def squote(x):
281    if x == '':
282        return "''"
283    if _sq_simple_id_rx.match(x):
284        return x
285    return "'%s'" % x.replace("'", "'\"'\"'")
286
287def quote(x):
288    if isinstance(x, bytes):
289        return bquote(x)
290    if isinstance(x, compat.str_type):
291        return squote(x)
292    assert False
293
294def shstr(cmd):
295    """Return a shell quoted string for cmd if it's a sequence, else cmd.
296
297    cmd must be a string, bytes, or a sequence of one or the other,
298    and the assumption is that if cmd is a string or bytes, then it's
299    already quoted (because it's what's actually being passed to
300    call() and friends.  e.g. log(shstr(cmd)); call(cmd)
301
302    """
303    if isinstance(cmd, (bytes, compat.str_type)):
304        return cmd
305    elif all(isinstance(x, bytes) for x in cmd):
306        return b' '.join(map(bquote, cmd))
307    elif all(isinstance(x, compat.str_type) for x in cmd):
308        return ' '.join(map(squote, cmd))
309    raise TypeError('unsupported shstr argument: ' + repr(cmd))
310
311
312exc = subprocess.check_call
313
314def exo(cmd,
315        input=None,
316        stdin=None,
317        stderr=None,
318        shell=False,
319        check=True,
320        preexec_fn=None,
321        close_fds=True):
322    if input:
323        assert stdin in (None, PIPE)
324        stdin = PIPE
325    p = Popen(cmd,
326              stdin=stdin, stdout=PIPE, stderr=stderr,
327              shell=shell,
328              preexec_fn=preexec_fn,
329              close_fds=close_fds)
330    out, err = p.communicate(input)
331    if check and p.returncode != 0:
332        raise Exception('subprocess %r failed with status %d%s'
333                        % (b' '.join(map(quote, cmd)), p.returncode,
334                           ', stderr: %r' % err if err else ''))
335    return out, err, p
336
337def readpipe(argv, preexec_fn=None, shell=False):
338    """Run a subprocess and return its output."""
339    return exo(argv, preexec_fn=preexec_fn, shell=shell)[0]
340
341
342def _argmax_base(command):
343    base_size = 2048
344    for c in command:
345        base_size += len(command) + 1
346    for k, v in compat.items(environ):
347        base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
348    return base_size
349
350
351def _argmax_args_size(args):
352    return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
353
354
355def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
356    """If args is not empty, yield the output produced by calling the
357command list with args as a sequence of strings (It may be necessary
358to return multiple strings in order to respect ARG_MAX)."""
359    # The optional arg_max arg is a workaround for an issue with the
360    # current wvtest behavior.
361    base_size = _argmax_base(command)
362    while args:
363        room = arg_max - base_size
364        i = 0
365        while i < len(args):
366            next_size = _argmax_args_size(args[i:i+1])
367            if room - next_size < 0:
368                break
369            room -= next_size
370            i += 1
371        sub_args = args[:i]
372        args = args[i:]
373        assert(len(sub_args))
374        yield readpipe(command + sub_args, preexec_fn=preexec_fn)
375
376
377def resolve_parent(p):
378    """Return the absolute path of a file without following any final symlink.
379
380    Behaves like os.path.realpath, but doesn't follow a symlink for the last
381    element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
382    will follow symlinks in p's directory)
383    """
384    try:
385        st = os.lstat(p)
386    except OSError:
387        st = None
388    if st and stat.S_ISLNK(st.st_mode):
389        (dir, name) = os.path.split(p)
390        dir = os.path.realpath(dir)
391        out = os.path.join(dir, name)
392    else:
393        out = os.path.realpath(p)
394    #log('realpathing:%r,%r\n' % (p, out))
395    return out
396
397
398def detect_fakeroot():
399    "Return True if we appear to be running under fakeroot."
400    return os.getenv("FAKEROOTKEY") != None
401
402
403if sys.platform.startswith('cygwin'):
404    def is_superuser():
405        # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
406        groups = os.getgroups()
407        return 544 in groups or 0 in groups
408else:
409    def is_superuser():
410        return os.geteuid() == 0
411
412
413def cache_key_value(get_value, key, cache):
414    """Return (value, was_cached).  If there is a value in the cache
415    for key, use that, otherwise, call get_value(key) which should
416    throw a KeyError if there is no value -- in which case the cached
417    and returned value will be None.
418    """
419    try: # Do we already have it (or know there wasn't one)?
420        value = cache[key]
421        return value, True
422    except KeyError:
423        pass
424    value = None
425    try:
426        cache[key] = value = get_value(key)
427    except KeyError:
428        cache[key] = None
429    return value, False
430
431
432_hostname = None
433def hostname():
434    """Get the FQDN of this machine."""
435    global _hostname
436    if not _hostname:
437        _hostname = _helpers.gethostname()
438    return _hostname
439
440
441def format_filesize(size):
442    unit = 1024.0
443    size = float(size)
444    if size < unit:
445        return "%d" % (size)
446    exponent = int(math.log(size) // math.log(unit))
447    size_prefix = "KMGTPE"[exponent - 1]
448    return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix)
449
450
451class NotOk(Exception):
452    pass
453
454
455class BaseConn:
456    def __init__(self, outp):
457        self.outp = outp
458
459    def close(self):
460        while self._read(65536): pass
461
462    def _read(self, size):
463        raise NotImplementedError("Subclasses must implement _read")
464
465    def read(self, size):
466        """Read 'size' bytes from input stream."""
467        self.outp.flush()
468        return self._read(size)
469
470    def _readline(self, size):
471        raise NotImplementedError("Subclasses must implement _readline")
472
473    def readline(self):
474        """Read from input stream until a newline is found."""
475        self.outp.flush()
476        return self._readline()
477
478    def write(self, data):
479        """Write 'data' to output stream."""
480        #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
481        self.outp.write(data)
482
483    def has_input(self):
484        """Return true if input stream is readable."""
485        raise NotImplementedError("Subclasses must implement has_input")
486
487    def ok(self):
488        """Indicate end of output from last sent command."""
489        self.write(b'\nok\n')
490
491    def error(self, s):
492        """Indicate server error to the client."""
493        s = re.sub(br'\s+', b' ', s)
494        self.write(b'\nerror %s\n' % s)
495
496    def _check_ok(self, onempty):
497        self.outp.flush()
498        rl = b''
499        for rl in linereader(self):
500            #log('%d got line: %r\n' % (os.getpid(), rl))
501            if not rl:  # empty line
502                continue
503            elif rl == b'ok':
504                return None
505            elif rl.startswith(b'error '):
506                #log('client: error: %s\n' % rl[6:])
507                return NotOk(rl[6:])
508            else:
509                onempty(rl)
510        raise Exception('server exited unexpectedly; see errors above')
511
512    def drain_and_check_ok(self):
513        """Remove all data for the current command from input stream."""
514        def onempty(rl):
515            pass
516        return self._check_ok(onempty)
517
518    def check_ok(self):
519        """Verify that server action completed successfully."""
520        def onempty(rl):
521            raise Exception('expected "ok", got %r' % rl)
522        return self._check_ok(onempty)
523
524
525class Conn(BaseConn):
526    def __init__(self, inp, outp):
527        BaseConn.__init__(self, outp)
528        self.inp = inp
529
530    def _read(self, size):
531        return self.inp.read(size)
532
533    def _readline(self):
534        return self.inp.readline()
535
536    def has_input(self):
537        [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
538        if rl:
539            assert(rl[0] == self.inp.fileno())
540            return True
541        else:
542            return None
543
544
545def checked_reader(fd, n):
546    while n > 0:
547        rl, _, _ = select.select([fd], [], [])
548        assert(rl[0] == fd)
549        buf = os.read(fd, n)
550        if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
551        yield buf
552        n -= len(buf)
553
554
555MAX_PACKET = 128 * 1024
556def mux(p, outfd, outr, errr):
557    try:
558        fds = [outr, errr]
559        while p.poll() is None:
560            rl, _, _ = select.select(fds, [], [])
561            for fd in rl:
562                if fd == outr:
563                    buf = os.read(outr, MAX_PACKET)
564                    if not buf: break
565                    os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
566                elif fd == errr:
567                    buf = os.read(errr, 1024)
568                    if not buf: break
569                    os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
570    finally:
571        os.write(outfd, struct.pack('!IB', 0, 3))
572
573
574class DemuxConn(BaseConn):
575    """A helper class for bup's client-server protocol."""
576    def __init__(self, infd, outp):
577        BaseConn.__init__(self, outp)
578        # Anything that comes through before the sync string was not
579        # multiplexed and can be assumed to be debug/log before mux init.
580        tail = b''
581        while tail != b'BUPMUX':
582            b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
583            if not b:
584                raise IOError('demux: unexpected EOF during initialization')
585            tail += b
586            byte_stream(sys.stderr).write(tail[:-6])  # pre-mux log messages
587            tail = tail[-6:]
588        self.infd = infd
589        self.reader = None
590        self.buf = None
591        self.closed = False
592
593    def write(self, data):
594        self._load_buf(0)
595        BaseConn.write(self, data)
596
597    def _next_packet(self, timeout):
598        if self.closed: return False
599        rl, wl, xl = select.select([self.infd], [], [], timeout)
600        if not rl: return False
601        assert(rl[0] == self.infd)
602        ns = b''.join(checked_reader(self.infd, 5))
603        n, fdw = struct.unpack('!IB', ns)
604        assert(n <= MAX_PACKET)
605        if fdw == 1:
606            self.reader = checked_reader(self.infd, n)
607        elif fdw == 2:
608            for buf in checked_reader(self.infd, n):
609                byte_stream(sys.stderr).write(buf)
610        elif fdw == 3:
611            self.closed = True
612            debug2("DemuxConn: marked closed\n")
613        return True
614
615    def _load_buf(self, timeout):
616        if self.buf is not None:
617            return True
618        while not self.closed:
619            while not self.reader:
620                if not self._next_packet(timeout):
621                    return False
622            try:
623                self.buf = next(self.reader)
624                return True
625            except StopIteration:
626                self.reader = None
627        return False
628
629    def _read_parts(self, ix_fn):
630        while self._load_buf(None):
631            assert(self.buf is not None)
632            i = ix_fn(self.buf)
633            if i is None or i == len(self.buf):
634                yv = self.buf
635                self.buf = None
636            else:
637                yv = self.buf[:i]
638                self.buf = self.buf[i:]
639            yield yv
640            if i is not None:
641                break
642
643    def _readline(self):
644        def find_eol(buf):
645            try:
646                return buf.index(b'\n')+1
647            except ValueError:
648                return None
649        return b''.join(self._read_parts(find_eol))
650
651    def _read(self, size):
652        csize = [size]
653        def until_size(buf): # Closes on csize
654            if len(buf) < csize[0]:
655                csize[0] -= len(buf)
656                return None
657            else:
658                return csize[0]
659        return b''.join(self._read_parts(until_size))
660
661    def has_input(self):
662        return self._load_buf(0)
663
664
665def linereader(f):
666    """Generate a list of input lines from 'f' without terminating newlines."""
667    while 1:
668        line = f.readline()
669        if not line:
670            break
671        yield line[:-1]
672
673
674def chunkyreader(f, count = None):
675    """Generate a list of chunks of data read from 'f'.
676
677    If count is None, read until EOF is reached.
678
679    If count is a positive integer, read 'count' bytes from 'f'. If EOF is
680    reached while reading, raise IOError.
681    """
682    if count != None:
683        while count > 0:
684            b = f.read(min(count, 65536))
685            if not b:
686                raise IOError('EOF with %d bytes remaining' % count)
687            yield b
688            count -= len(b)
689    else:
690        while 1:
691            b = f.read(65536)
692            if not b: break
693            yield b
694
695
696@contextmanager
697def atomically_replaced_file(name, mode='w', buffering=-1):
698    """Yield a file that will be atomically renamed name when leaving the block.
699
700    This contextmanager yields an open file object that is backed by a
701    temporary file which will be renamed (atomically) to the target
702    name if everything succeeds.
703
704    The mode and buffering arguments are handled exactly as with open,
705    and the yielded file will have very restrictive permissions, as
706    per mkstemp.
707
708    E.g.::
709
710        with atomically_replaced_file('foo.txt', 'w') as f:
711            f.write('hello jack.')
712
713    """
714
715    (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
716                                       text=('b' not in mode))
717    try:
718        try:
719            f = os.fdopen(ffd, mode, buffering)
720        except:
721            os.close(ffd)
722            raise
723        try:
724            yield f
725        finally:
726            f.close()
727        os.rename(tempname, name)
728    finally:
729        unlink(tempname)  # nonexistant file is ignored
730
731
732def slashappend(s):
733    """Append "/" to 's' if it doesn't aleady end in "/"."""
734    assert isinstance(s, bytes)
735    if s and not s.endswith(b'/'):
736        return s + b'/'
737    else:
738        return s
739
740
741def _mmap_do(f, sz, flags, prot, close):
742    if not sz:
743        st = os.fstat(f.fileno())
744        sz = st.st_size
745    if not sz:
746        # trying to open a zero-length map gives an error, but an empty
747        # string has all the same behaviour of a zero-length map, ie. it has
748        # no elements :)
749        return ''
750    map = mmap.mmap(f.fileno(), sz, flags, prot)
751    if close:
752        f.close()  # map will persist beyond file close
753    return map
754
755
756def mmap_read(f, sz = 0, close=True):
757    """Create a read-only memory mapped region on file 'f'.
758    If sz is 0, the region will cover the entire file.
759    """
760    return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
761
762
763def mmap_readwrite(f, sz = 0, close=True):
764    """Create a read-write memory mapped region on file 'f'.
765    If sz is 0, the region will cover the entire file.
766    """
767    return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
768                    close)
769
770
771def mmap_readwrite_private(f, sz = 0, close=True):
772    """Create a read-write memory mapped region on file 'f'.
773    If sz is 0, the region will cover the entire file.
774    The map is private, which means the changes are never flushed back to the
775    file.
776    """
777    return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
778                    close)
779
780
781_mincore = getattr(_helpers, 'mincore', None)
782if _mincore:
783    # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
784    MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
785
786    _fmincore_chunk_size = None
787    def _set_fmincore_chunk_size():
788        global _fmincore_chunk_size
789        pref_chunk_size = 64 * 1024 * 1024
790        chunk_size = sc_page_size
791        if (sc_page_size < pref_chunk_size):
792            chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
793        _fmincore_chunk_size = chunk_size
794
795    def fmincore(fd):
796        """Return the mincore() data for fd as a bytearray whose values can be
797        tested via MINCORE_INCORE, or None if fd does not fully
798        support the operation."""
799        st = os.fstat(fd)
800        if (st.st_size == 0):
801            return bytearray(0)
802        if not _fmincore_chunk_size:
803            _set_fmincore_chunk_size()
804        pages_per_chunk = _fmincore_chunk_size // sc_page_size;
805        page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
806        chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size
807        result = bytearray(page_count)
808        for ci in compat.range(chunk_count):
809            pos = _fmincore_chunk_size * ci;
810            msize = min(_fmincore_chunk_size, st.st_size - pos)
811            try:
812                m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
813            except mmap.error as ex:
814                if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
815                    # Perhaps the file was a pipe, i.e. "... | bup split ..."
816                    return None
817                raise ex
818            try:
819                _mincore(m, msize, 0, result, ci * pages_per_chunk)
820            except OSError as ex:
821                if ex.errno == errno.ENOSYS:
822                    return None
823                raise
824        return result
825
826
827def parse_timestamp(epoch_str):
828    """Return the number of nanoseconds since the epoch that are described
829by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
830throw a ValueError that may contain additional information."""
831    ns_per = {'s' :  1000000000,
832              'ms' : 1000000,
833              'us' : 1000,
834              'ns' : 1}
835    match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
836    if not match:
837        if re.match(r'^([-+]?[0-9]+)$', epoch_str):
838            raise ValueError('must include units, i.e. 100ns, 100ms, ...')
839        raise ValueError()
840    (n, units) = match.group(1, 2)
841    if not n:
842        n = 1
843    n = int(n)
844    return n * ns_per[units]
845
846
847def parse_num(s):
848    """Parse string or bytes as a possibly unit suffixed number.
849
850    For example:
851        199.2k means 203981 bytes
852        1GB means 1073741824 bytes
853        2.1 tb means 2199023255552 bytes
854    """
855    if isinstance(s, bytes):
856        # FIXME: should this raise a ValueError for UnicodeDecodeError
857        # (perhaps with the latter as the context).
858        s = s.decode('ascii')
859    g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
860    if not g:
861        raise ValueError("can't parse %r as a number" % s)
862    (val, unit) = g.groups()
863    num = float(val)
864    unit = unit.lower()
865    if unit in ['t', 'tb']:
866        mult = 1024*1024*1024*1024
867    elif unit in ['g', 'gb']:
868        mult = 1024*1024*1024
869    elif unit in ['m', 'mb']:
870        mult = 1024*1024
871    elif unit in ['k', 'kb']:
872        mult = 1024
873    elif unit in ['', 'b']:
874        mult = 1
875    else:
876        raise ValueError("invalid unit %r in number %r" % (unit, s))
877    return int(num*mult)
878
879
880saved_errors = []
881def add_error(e):
882    """Append an error message to the list of saved errors.
883
884    Once processing is able to stop and output the errors, the saved errors are
885    accessible in the module variable helpers.saved_errors.
886    """
887    saved_errors.append(e)
888    log('%-70s\n' % e)
889
890
891def clear_errors():
892    global saved_errors
893    saved_errors = []
894
895
896def die_if_errors(msg=None, status=1):
897    global saved_errors
898    if saved_errors:
899        if not msg:
900            msg = 'warning: %d errors encountered\n' % len(saved_errors)
901        log(msg)
902        sys.exit(status)
903
904
905def handle_ctrl_c():
906    """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
907
908    The new exception handler will make sure that bup will exit without an ugly
909    stacktrace when Ctrl-C is hit.
910    """
911    oldhook = sys.excepthook
912    def newhook(exctype, value, traceback):
913        if exctype == KeyboardInterrupt:
914            log('\nInterrupted.\n')
915        else:
916            return oldhook(exctype, value, traceback)
917    sys.excepthook = newhook
918
919
920def columnate(l, prefix):
921    """Format elements of 'l' in columns with 'prefix' leading each line.
922
923    The number of columns is determined automatically based on the string
924    lengths.
925    """
926    binary = isinstance(prefix, bytes)
927    nothing = b'' if binary else ''
928    nl = b'\n' if binary else '\n'
929    if not l:
930        return nothing
931    l = l[:]
932    clen = max(len(s) for s in l)
933    ncols = (tty_width() - len(prefix)) // (clen + 2)
934    if ncols <= 1:
935        ncols = 1
936        clen = 0
937    cols = []
938    while len(l) % ncols:
939        l.append(nothing)
940    rows = len(l) // ncols
941    for s in compat.range(0, len(l), rows):
942        cols.append(l[s:s+rows])
943    out = nothing
944    fmt = b'%-*s' if binary else '%-*s'
945    for row in zip(*cols):
946        out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl
947    return out
948
949
950def parse_date_or_fatal(str, fatal):
951    """Parses the given date or calls Option.fatal().
952    For now we expect a string that contains a float."""
953    try:
954        date = float(str)
955    except ValueError as e:
956        raise fatal('invalid date format (should be a float): %r' % e)
957    else:
958        return date
959
960
961def parse_excludes(options, fatal):
962    """Traverse the options and extract all excludes, or call Option.fatal()."""
963    excluded_paths = []
964
965    for flag in options:
966        (option, parameter) = flag
967        if option == '--exclude':
968            excluded_paths.append(resolve_parent(argv_bytes(parameter)))
969        elif option == '--exclude-from':
970            try:
971                f = open(resolve_parent(argv_bytes(parameter)), 'rb')
972            except IOError as e:
973                raise fatal("couldn't read %r" % parameter)
974            for exclude_path in f.readlines():
975                # FIXME: perhaps this should be rstrip('\n')
976                exclude_path = resolve_parent(exclude_path.strip())
977                if exclude_path:
978                    excluded_paths.append(exclude_path)
979    return sorted(frozenset(excluded_paths))
980
981
982def parse_rx_excludes(options, fatal):
983    """Traverse the options and extract all rx excludes, or call
984    Option.fatal()."""
985    excluded_patterns = []
986
987    for flag in options:
988        (option, parameter) = flag
989        if option == '--exclude-rx':
990            try:
991                excluded_patterns.append(re.compile(argv_bytes(parameter)))
992            except re.error as ex:
993                fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex))
994        elif option == '--exclude-rx-from':
995            try:
996                f = open(resolve_parent(parameter), 'rb')
997            except IOError as e:
998                raise fatal("couldn't read %r" % parameter)
999            for pattern in f.readlines():
1000                spattern = pattern.rstrip(b'\n')
1001                if not spattern:
1002                    continue
1003                try:
1004                    excluded_patterns.append(re.compile(spattern))
1005                except re.error as ex:
1006                    fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex))
1007    return excluded_patterns
1008
1009
1010def should_rx_exclude_path(path, exclude_rxs):
1011    """Return True if path matches a regular expression in exclude_rxs."""
1012    for rx in exclude_rxs:
1013        if rx.search(path):
1014            debug1('Skipping %r: excluded by rx pattern %r.\n'
1015                   % (path, rx.pattern))
1016            return True
1017    return False
1018
1019
1020# FIXME: Carefully consider the use of functions (os.path.*, etc.)
1021# that resolve against the current filesystem in the strip/graft
1022# functions for example, but elsewhere as well.  I suspect bup's not
1023# always being careful about that.  For some cases, the contents of
1024# the current filesystem should be irrelevant, and consulting it might
1025# produce the wrong result, perhaps via unintended symlink resolution,
1026# for example.
1027
1028def path_components(path):
1029    """Break path into a list of pairs of the form (name,
1030    full_path_to_name).  Path must start with '/'.
1031    Example:
1032      '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
1033    if not path.startswith(b'/'):
1034        raise Exception('path must start with "/": %s' % path_msg(path))
1035    # Since we assume path startswith('/'), we can skip the first element.
1036    result = [(b'', b'/')]
1037    norm_path = os.path.abspath(path)
1038    if norm_path == b'/':
1039        return result
1040    full_path = b''
1041    for p in norm_path.split(b'/')[1:]:
1042        full_path += b'/' + p
1043        result.append((p, full_path))
1044    return result
1045
1046
1047def stripped_path_components(path, strip_prefixes):
1048    """Strip any prefix in strip_prefixes from path and return a list
1049    of path components where each component is (name,
1050    none_or_full_fs_path_to_name).  Assume path startswith('/').
1051    See thelpers.py for examples."""
1052    normalized_path = os.path.abspath(path)
1053    sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
1054    for bp in sorted_strip_prefixes:
1055        normalized_bp = os.path.abspath(bp)
1056        if normalized_bp == b'/':
1057            continue
1058        if normalized_path.startswith(normalized_bp):
1059            prefix = normalized_path[:len(normalized_bp)]
1060            result = []
1061            for p in normalized_path[len(normalized_bp):].split(b'/'):
1062                if p: # not root
1063                    prefix += b'/'
1064                prefix += p
1065                result.append((p, prefix))
1066            return result
1067    # Nothing to strip.
1068    return path_components(path)
1069
1070
1071def grafted_path_components(graft_points, path):
1072    # Create a result that consists of some number of faked graft
1073    # directories before the graft point, followed by all of the real
1074    # directories from path that are after the graft point.  Arrange
1075    # for the directory at the graft point in the result to correspond
1076    # to the "orig" directory in --graft orig=new.  See t/thelpers.py
1077    # for some examples.
1078
1079    # Note that given --graft orig=new, orig and new have *nothing* to
1080    # do with each other, even if some of their component names
1081    # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
1082    # equivalent to --graft /foo/bar/baz=/x/y/z, or even
1083    # /foo/bar/baz=/x.
1084
1085    # FIXME: This can't be the best solution...
1086    clean_path = os.path.abspath(path)
1087    for graft_point in graft_points:
1088        old_prefix, new_prefix = graft_point
1089        # Expand prefixes iff not absolute paths.
1090        old_prefix = os.path.normpath(old_prefix)
1091        new_prefix = os.path.normpath(new_prefix)
1092        if clean_path.startswith(old_prefix):
1093            escaped_prefix = re.escape(old_prefix)
1094            grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path)
1095            # Handle /foo=/ (at least) -- which produces //whatever.
1096            grafted_path = b'/' + grafted_path.lstrip(b'/')
1097            clean_path_components = path_components(clean_path)
1098            # Count the components that were stripped.
1099            strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/')
1100            new_prefix_parts = new_prefix.split(b'/')
1101            result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')]
1102            result = [(p, None) for p in result_prefix] \
1103                + clean_path_components[strip_count:]
1104            # Now set the graft point name to match the end of new_prefix.
1105            graft_point = len(result_prefix)
1106            result[graft_point] = \
1107                (new_prefix_parts[-1], clean_path_components[strip_count][1])
1108            if new_prefix == b'/': # --graft ...=/ is a special case.
1109                return result[1:]
1110            return result
1111    return path_components(clean_path)
1112
1113
1114Sha1 = hashlib.sha1
1115
1116
1117_localtime = getattr(_helpers, 'localtime', None)
1118
1119if _localtime:
1120    bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
1121                                       'tm_hour', 'tm_min', 'tm_sec',
1122                                       'tm_wday', 'tm_yday',
1123                                       'tm_isdst', 'tm_gmtoff', 'tm_zone'])
1124
1125# Define a localtime() that returns bup_time when possible.  Note:
1126# this means that any helpers.localtime() results may need to be
1127# passed through to_py_time() before being passed to python's time
1128# module, which doesn't appear willing to ignore the extra items.
1129if _localtime:
1130    def localtime(time):
1131        return bup_time(*_helpers.localtime(floor(time)))
1132    def utc_offset_str(t):
1133        """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
1134        If the current UTC offset does not represent an integer number
1135        of minutes, the fractional component will be truncated."""
1136        off = localtime(t).tm_gmtoff
1137        # Note: // doesn't truncate like C for negative values, it rounds down.
1138        offmin = abs(off) // 60
1139        m = offmin % 60
1140        h = (offmin - m) // 60
1141        return b'%+03d%02d' % (-h if off < 0 else h, m)
1142    def to_py_time(x):
1143        if isinstance(x, time.struct_time):
1144            return x
1145        return time.struct_time(x[:9])
1146else:
1147    localtime = time.localtime
1148    def utc_offset_str(t):
1149        return time.strftime(b'%z', localtime(t))
1150    def to_py_time(x):
1151        return x
1152
1153
1154_some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{')
1155
1156def valid_save_name(name):
1157    # Enforce a superset of the restrictions in git-check-ref-format(1)
1158    if name == b'@' \
1159       or name.startswith(b'/') or name.endswith(b'/') \
1160       or name.endswith(b'.'):
1161        return False
1162    if _some_invalid_save_parts_rx.search(name):
1163        return False
1164    for c in name:
1165        if byte_int(c) < 0x20 or byte_int(c) == 0x7f:
1166            return False
1167    for part in name.split(b'/'):
1168        if part.startswith(b'.') or part.endswith(b'.lock'):
1169            return False
1170    return True
1171
1172
1173_period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$')
1174
1175def period_as_secs(s):
1176    if s == b'forever':
1177        return float('inf')
1178    match = _period_rx.match(s)
1179    if not match:
1180        return None
1181    mag = int(match.group(1))
1182    scale = match.group(2)
1183    return mag * {b's': 1,
1184                  b'min': 60,
1185                  b'h': 60 * 60,
1186                  b'd': 60 * 60 * 24,
1187                  b'w': 60 * 60 * 24 * 7,
1188                  b'm': 60 * 60 * 24 * 31,
1189                  b'y': 60 * 60 * 24 * 366}[scale]
1190