1"""Helper functions and classes for bup.""" 2 3from __future__ import absolute_import, division 4from collections import namedtuple 5from contextlib import contextmanager 6from ctypes import sizeof, c_void_p 7from math import floor 8from os import environ 9from subprocess import PIPE, Popen 10import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct 11import hashlib, heapq, math, operator, time, grp, tempfile 12 13from bup import _helpers 14from bup import compat 15from bup.compat import argv_bytes, byte_int 16from bup.io import byte_stream, path_msg 17# This function should really be in helpers, not in bup.options. But we 18# want options.py to be standalone so people can include it in other projects. 19from bup.options import _tty_width as tty_width 20 21 22class Nonlocal: 23 """Helper to deal with Python scoping issues""" 24 pass 25 26 27sc_page_size = os.sysconf('SC_PAGE_SIZE') 28assert(sc_page_size > 0) 29 30sc_arg_max = os.sysconf('SC_ARG_MAX') 31if sc_arg_max == -1: # "no definite limit" - let's choose 2M 32 sc_arg_max = 2 * 1024 * 1024 33 34def last(iterable): 35 result = None 36 for result in iterable: 37 pass 38 return result 39 40 41def atoi(s): 42 """Convert s (ascii bytes) to an integer. Return 0 if s is not a number.""" 43 try: 44 return int(s or b'0') 45 except ValueError: 46 return 0 47 48 49def atof(s): 50 """Convert s (ascii bytes) to a float. Return 0 if s is not a number.""" 51 try: 52 return float(s or b'0') 53 except ValueError: 54 return 0 55 56 57buglvl = atoi(os.environ.get('BUP_DEBUG', 0)) 58 59 60try: 61 _fdatasync = os.fdatasync 62except AttributeError: 63 _fdatasync = os.fsync 64 65if sys.platform.startswith('darwin'): 66 # Apparently os.fsync on OS X doesn't guarantee to sync all the way down 67 import fcntl 68 def fdatasync(fd): 69 try: 70 return fcntl.fcntl(fd, fcntl.F_FULLFSYNC) 71 except IOError as e: 72 # Fallback for file systems (SMB) that do not support F_FULLFSYNC 73 if e.errno == errno.ENOTSUP: 74 return _fdatasync(fd) 75 else: 76 raise 77else: 78 fdatasync = _fdatasync 79 80 81def partition(predicate, stream): 82 """Returns (leading_matches_it, rest_it), where leading_matches_it 83 must be completely exhausted before traversing rest_it. 84 85 """ 86 stream = iter(stream) 87 ns = Nonlocal() 88 ns.first_nonmatch = None 89 def leading_matches(): 90 for x in stream: 91 if predicate(x): 92 yield x 93 else: 94 ns.first_nonmatch = (x,) 95 break 96 def rest(): 97 if ns.first_nonmatch: 98 yield ns.first_nonmatch[0] 99 for x in stream: 100 yield x 101 return (leading_matches(), rest()) 102 103 104def merge_dict(*xs): 105 result = {} 106 for x in xs: 107 result.update(x) 108 return result 109 110 111def lines_until_sentinel(f, sentinel, ex_type): 112 # sentinel must end with \n and must contain only one \n 113 while True: 114 line = f.readline() 115 if not (line and line.endswith(b'\n')): 116 raise ex_type('Hit EOF while reading line') 117 if line == sentinel: 118 return 119 yield line 120 121 122def stat_if_exists(path): 123 try: 124 return os.stat(path) 125 except OSError as e: 126 if e.errno != errno.ENOENT: 127 raise 128 return None 129 130 131# Write (blockingly) to sockets that may or may not be in blocking mode. 132# We need this because our stderr is sometimes eaten by subprocesses 133# (probably ssh) that sometimes make it nonblocking, if only temporarily, 134# leading to race conditions. Ick. We'll do it the hard way. 135def _hard_write(fd, buf): 136 while buf: 137 (r,w,x) = select.select([], [fd], [], None) 138 if not w: 139 raise IOError('select(fd) returned without being writable') 140 try: 141 sz = os.write(fd, buf) 142 except OSError as e: 143 if e.errno != errno.EAGAIN: 144 raise 145 assert(sz >= 0) 146 buf = buf[sz:] 147 148 149_last_prog = 0 150def log(s): 151 """Print a log message to stderr.""" 152 global _last_prog 153 sys.stdout.flush() 154 _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode()) 155 _last_prog = 0 156 157 158def debug1(s): 159 if buglvl >= 1: 160 log(s) 161 162 163def debug2(s): 164 if buglvl >= 2: 165 log(s) 166 167 168istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1) 169istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2) 170_last_progress = '' 171def progress(s): 172 """Calls log() if stderr is a TTY. Does nothing otherwise.""" 173 global _last_progress 174 if istty2: 175 log(s) 176 _last_progress = s 177 178 179def qprogress(s): 180 """Calls progress() only if we haven't printed progress in a while. 181 182 This avoids overloading the stderr buffer with excess junk. 183 """ 184 global _last_prog 185 now = time.time() 186 if now - _last_prog > 0.1: 187 progress(s) 188 _last_prog = now 189 190 191def reprogress(): 192 """Calls progress() to redisplay the most recent progress message. 193 194 Useful after you've printed some other message that wipes out the 195 progress line. 196 """ 197 if _last_progress and _last_progress.endswith('\r'): 198 progress(_last_progress) 199 200 201def mkdirp(d, mode=None): 202 """Recursively create directories on path 'd'. 203 204 Unlike os.makedirs(), it doesn't raise an exception if the last element of 205 the path already exists. 206 """ 207 try: 208 if mode: 209 os.makedirs(d, mode) 210 else: 211 os.makedirs(d) 212 except OSError as e: 213 if e.errno == errno.EEXIST: 214 pass 215 else: 216 raise 217 218 219class MergeIterItem: 220 def __init__(self, entry, read_it): 221 self.entry = entry 222 self.read_it = read_it 223 def __lt__(self, x): 224 return self.entry < x.entry 225 226def merge_iter(iters, pfreq, pfunc, pfinal, key=None): 227 if key: 228 samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None) 229 else: 230 samekey = operator.eq 231 count = 0 232 total = sum(len(it) for it in iters) 233 iters = (iter(it) for it in iters) 234 heap = ((next(it, None),it) for it in iters) 235 heap = [MergeIterItem(e, it) for e, it in heap if e] 236 237 heapq.heapify(heap) 238 pe = None 239 while heap: 240 if not count % pfreq: 241 pfunc(count, total) 242 e, it = heap[0].entry, heap[0].read_it 243 if not samekey(e, pe): 244 pe = e 245 yield e 246 count += 1 247 try: 248 e = next(it) 249 except StopIteration: 250 heapq.heappop(heap) # remove current 251 else: 252 # shift current to new location 253 heapq.heapreplace(heap, MergeIterItem(e, it)) 254 pfinal(count, total) 255 256 257def unlink(f): 258 """Delete a file at path 'f' if it currently exists. 259 260 Unlike os.unlink(), does not throw an exception if the file didn't already 261 exist. 262 """ 263 try: 264 os.unlink(f) 265 except OSError as e: 266 if e.errno != errno.ENOENT: 267 raise 268 269 270_bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$') 271_sq_simple_id_rx = re.compile(r'^[-_./a-zA-Z0-9]+$') 272 273def bquote(x): 274 if x == b'': 275 return b"''" 276 if _bq_simple_id_rx.match(x): 277 return x 278 return b"'%s'" % x.replace(b"'", b"'\"'\"'") 279 280def squote(x): 281 if x == '': 282 return "''" 283 if _sq_simple_id_rx.match(x): 284 return x 285 return "'%s'" % x.replace("'", "'\"'\"'") 286 287def quote(x): 288 if isinstance(x, bytes): 289 return bquote(x) 290 if isinstance(x, compat.str_type): 291 return squote(x) 292 assert False 293 294def shstr(cmd): 295 """Return a shell quoted string for cmd if it's a sequence, else cmd. 296 297 cmd must be a string, bytes, or a sequence of one or the other, 298 and the assumption is that if cmd is a string or bytes, then it's 299 already quoted (because it's what's actually being passed to 300 call() and friends. e.g. log(shstr(cmd)); call(cmd) 301 302 """ 303 if isinstance(cmd, (bytes, compat.str_type)): 304 return cmd 305 elif all(isinstance(x, bytes) for x in cmd): 306 return b' '.join(map(bquote, cmd)) 307 elif all(isinstance(x, compat.str_type) for x in cmd): 308 return ' '.join(map(squote, cmd)) 309 raise TypeError('unsupported shstr argument: ' + repr(cmd)) 310 311 312exc = subprocess.check_call 313 314def exo(cmd, 315 input=None, 316 stdin=None, 317 stderr=None, 318 shell=False, 319 check=True, 320 preexec_fn=None, 321 close_fds=True): 322 if input: 323 assert stdin in (None, PIPE) 324 stdin = PIPE 325 p = Popen(cmd, 326 stdin=stdin, stdout=PIPE, stderr=stderr, 327 shell=shell, 328 preexec_fn=preexec_fn, 329 close_fds=close_fds) 330 out, err = p.communicate(input) 331 if check and p.returncode != 0: 332 raise Exception('subprocess %r failed with status %d%s' 333 % (b' '.join(map(quote, cmd)), p.returncode, 334 ', stderr: %r' % err if err else '')) 335 return out, err, p 336 337def readpipe(argv, preexec_fn=None, shell=False): 338 """Run a subprocess and return its output.""" 339 return exo(argv, preexec_fn=preexec_fn, shell=shell)[0] 340 341 342def _argmax_base(command): 343 base_size = 2048 344 for c in command: 345 base_size += len(command) + 1 346 for k, v in compat.items(environ): 347 base_size += len(k) + len(v) + 2 + sizeof(c_void_p) 348 return base_size 349 350 351def _argmax_args_size(args): 352 return sum(len(x) + 1 + sizeof(c_void_p) for x in args) 353 354 355def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max): 356 """If args is not empty, yield the output produced by calling the 357command list with args as a sequence of strings (It may be necessary 358to return multiple strings in order to respect ARG_MAX).""" 359 # The optional arg_max arg is a workaround for an issue with the 360 # current wvtest behavior. 361 base_size = _argmax_base(command) 362 while args: 363 room = arg_max - base_size 364 i = 0 365 while i < len(args): 366 next_size = _argmax_args_size(args[i:i+1]) 367 if room - next_size < 0: 368 break 369 room -= next_size 370 i += 1 371 sub_args = args[:i] 372 args = args[i:] 373 assert(len(sub_args)) 374 yield readpipe(command + sub_args, preexec_fn=preexec_fn) 375 376 377def resolve_parent(p): 378 """Return the absolute path of a file without following any final symlink. 379 380 Behaves like os.path.realpath, but doesn't follow a symlink for the last 381 element. (ie. if 'p' itself is a symlink, this one won't follow it, but it 382 will follow symlinks in p's directory) 383 """ 384 try: 385 st = os.lstat(p) 386 except OSError: 387 st = None 388 if st and stat.S_ISLNK(st.st_mode): 389 (dir, name) = os.path.split(p) 390 dir = os.path.realpath(dir) 391 out = os.path.join(dir, name) 392 else: 393 out = os.path.realpath(p) 394 #log('realpathing:%r,%r\n' % (p, out)) 395 return out 396 397 398def detect_fakeroot(): 399 "Return True if we appear to be running under fakeroot." 400 return os.getenv("FAKEROOTKEY") != None 401 402 403if sys.platform.startswith('cygwin'): 404 def is_superuser(): 405 # https://cygwin.com/ml/cygwin/2015-02/msg00057.html 406 groups = os.getgroups() 407 return 544 in groups or 0 in groups 408else: 409 def is_superuser(): 410 return os.geteuid() == 0 411 412 413def cache_key_value(get_value, key, cache): 414 """Return (value, was_cached). If there is a value in the cache 415 for key, use that, otherwise, call get_value(key) which should 416 throw a KeyError if there is no value -- in which case the cached 417 and returned value will be None. 418 """ 419 try: # Do we already have it (or know there wasn't one)? 420 value = cache[key] 421 return value, True 422 except KeyError: 423 pass 424 value = None 425 try: 426 cache[key] = value = get_value(key) 427 except KeyError: 428 cache[key] = None 429 return value, False 430 431 432_hostname = None 433def hostname(): 434 """Get the FQDN of this machine.""" 435 global _hostname 436 if not _hostname: 437 _hostname = _helpers.gethostname() 438 return _hostname 439 440 441def format_filesize(size): 442 unit = 1024.0 443 size = float(size) 444 if size < unit: 445 return "%d" % (size) 446 exponent = int(math.log(size) // math.log(unit)) 447 size_prefix = "KMGTPE"[exponent - 1] 448 return "%.1f%s" % (size / math.pow(unit, exponent), size_prefix) 449 450 451class NotOk(Exception): 452 pass 453 454 455class BaseConn: 456 def __init__(self, outp): 457 self.outp = outp 458 459 def close(self): 460 while self._read(65536): pass 461 462 def _read(self, size): 463 raise NotImplementedError("Subclasses must implement _read") 464 465 def read(self, size): 466 """Read 'size' bytes from input stream.""" 467 self.outp.flush() 468 return self._read(size) 469 470 def _readline(self, size): 471 raise NotImplementedError("Subclasses must implement _readline") 472 473 def readline(self): 474 """Read from input stream until a newline is found.""" 475 self.outp.flush() 476 return self._readline() 477 478 def write(self, data): 479 """Write 'data' to output stream.""" 480 #log('%d writing: %d bytes\n' % (os.getpid(), len(data))) 481 self.outp.write(data) 482 483 def has_input(self): 484 """Return true if input stream is readable.""" 485 raise NotImplementedError("Subclasses must implement has_input") 486 487 def ok(self): 488 """Indicate end of output from last sent command.""" 489 self.write(b'\nok\n') 490 491 def error(self, s): 492 """Indicate server error to the client.""" 493 s = re.sub(br'\s+', b' ', s) 494 self.write(b'\nerror %s\n' % s) 495 496 def _check_ok(self, onempty): 497 self.outp.flush() 498 rl = b'' 499 for rl in linereader(self): 500 #log('%d got line: %r\n' % (os.getpid(), rl)) 501 if not rl: # empty line 502 continue 503 elif rl == b'ok': 504 return None 505 elif rl.startswith(b'error '): 506 #log('client: error: %s\n' % rl[6:]) 507 return NotOk(rl[6:]) 508 else: 509 onempty(rl) 510 raise Exception('server exited unexpectedly; see errors above') 511 512 def drain_and_check_ok(self): 513 """Remove all data for the current command from input stream.""" 514 def onempty(rl): 515 pass 516 return self._check_ok(onempty) 517 518 def check_ok(self): 519 """Verify that server action completed successfully.""" 520 def onempty(rl): 521 raise Exception('expected "ok", got %r' % rl) 522 return self._check_ok(onempty) 523 524 525class Conn(BaseConn): 526 def __init__(self, inp, outp): 527 BaseConn.__init__(self, outp) 528 self.inp = inp 529 530 def _read(self, size): 531 return self.inp.read(size) 532 533 def _readline(self): 534 return self.inp.readline() 535 536 def has_input(self): 537 [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0) 538 if rl: 539 assert(rl[0] == self.inp.fileno()) 540 return True 541 else: 542 return None 543 544 545def checked_reader(fd, n): 546 while n > 0: 547 rl, _, _ = select.select([fd], [], []) 548 assert(rl[0] == fd) 549 buf = os.read(fd, n) 550 if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n) 551 yield buf 552 n -= len(buf) 553 554 555MAX_PACKET = 128 * 1024 556def mux(p, outfd, outr, errr): 557 try: 558 fds = [outr, errr] 559 while p.poll() is None: 560 rl, _, _ = select.select(fds, [], []) 561 for fd in rl: 562 if fd == outr: 563 buf = os.read(outr, MAX_PACKET) 564 if not buf: break 565 os.write(outfd, struct.pack('!IB', len(buf), 1) + buf) 566 elif fd == errr: 567 buf = os.read(errr, 1024) 568 if not buf: break 569 os.write(outfd, struct.pack('!IB', len(buf), 2) + buf) 570 finally: 571 os.write(outfd, struct.pack('!IB', 0, 3)) 572 573 574class DemuxConn(BaseConn): 575 """A helper class for bup's client-server protocol.""" 576 def __init__(self, infd, outp): 577 BaseConn.__init__(self, outp) 578 # Anything that comes through before the sync string was not 579 # multiplexed and can be assumed to be debug/log before mux init. 580 tail = b'' 581 while tail != b'BUPMUX': 582 b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1) 583 if not b: 584 raise IOError('demux: unexpected EOF during initialization') 585 tail += b 586 byte_stream(sys.stderr).write(tail[:-6]) # pre-mux log messages 587 tail = tail[-6:] 588 self.infd = infd 589 self.reader = None 590 self.buf = None 591 self.closed = False 592 593 def write(self, data): 594 self._load_buf(0) 595 BaseConn.write(self, data) 596 597 def _next_packet(self, timeout): 598 if self.closed: return False 599 rl, wl, xl = select.select([self.infd], [], [], timeout) 600 if not rl: return False 601 assert(rl[0] == self.infd) 602 ns = b''.join(checked_reader(self.infd, 5)) 603 n, fdw = struct.unpack('!IB', ns) 604 assert(n <= MAX_PACKET) 605 if fdw == 1: 606 self.reader = checked_reader(self.infd, n) 607 elif fdw == 2: 608 for buf in checked_reader(self.infd, n): 609 byte_stream(sys.stderr).write(buf) 610 elif fdw == 3: 611 self.closed = True 612 debug2("DemuxConn: marked closed\n") 613 return True 614 615 def _load_buf(self, timeout): 616 if self.buf is not None: 617 return True 618 while not self.closed: 619 while not self.reader: 620 if not self._next_packet(timeout): 621 return False 622 try: 623 self.buf = next(self.reader) 624 return True 625 except StopIteration: 626 self.reader = None 627 return False 628 629 def _read_parts(self, ix_fn): 630 while self._load_buf(None): 631 assert(self.buf is not None) 632 i = ix_fn(self.buf) 633 if i is None or i == len(self.buf): 634 yv = self.buf 635 self.buf = None 636 else: 637 yv = self.buf[:i] 638 self.buf = self.buf[i:] 639 yield yv 640 if i is not None: 641 break 642 643 def _readline(self): 644 def find_eol(buf): 645 try: 646 return buf.index(b'\n')+1 647 except ValueError: 648 return None 649 return b''.join(self._read_parts(find_eol)) 650 651 def _read(self, size): 652 csize = [size] 653 def until_size(buf): # Closes on csize 654 if len(buf) < csize[0]: 655 csize[0] -= len(buf) 656 return None 657 else: 658 return csize[0] 659 return b''.join(self._read_parts(until_size)) 660 661 def has_input(self): 662 return self._load_buf(0) 663 664 665def linereader(f): 666 """Generate a list of input lines from 'f' without terminating newlines.""" 667 while 1: 668 line = f.readline() 669 if not line: 670 break 671 yield line[:-1] 672 673 674def chunkyreader(f, count = None): 675 """Generate a list of chunks of data read from 'f'. 676 677 If count is None, read until EOF is reached. 678 679 If count is a positive integer, read 'count' bytes from 'f'. If EOF is 680 reached while reading, raise IOError. 681 """ 682 if count != None: 683 while count > 0: 684 b = f.read(min(count, 65536)) 685 if not b: 686 raise IOError('EOF with %d bytes remaining' % count) 687 yield b 688 count -= len(b) 689 else: 690 while 1: 691 b = f.read(65536) 692 if not b: break 693 yield b 694 695 696@contextmanager 697def atomically_replaced_file(name, mode='w', buffering=-1): 698 """Yield a file that will be atomically renamed name when leaving the block. 699 700 This contextmanager yields an open file object that is backed by a 701 temporary file which will be renamed (atomically) to the target 702 name if everything succeeds. 703 704 The mode and buffering arguments are handled exactly as with open, 705 and the yielded file will have very restrictive permissions, as 706 per mkstemp. 707 708 E.g.:: 709 710 with atomically_replaced_file('foo.txt', 'w') as f: 711 f.write('hello jack.') 712 713 """ 714 715 (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name), 716 text=('b' not in mode)) 717 try: 718 try: 719 f = os.fdopen(ffd, mode, buffering) 720 except: 721 os.close(ffd) 722 raise 723 try: 724 yield f 725 finally: 726 f.close() 727 os.rename(tempname, name) 728 finally: 729 unlink(tempname) # nonexistant file is ignored 730 731 732def slashappend(s): 733 """Append "/" to 's' if it doesn't aleady end in "/".""" 734 assert isinstance(s, bytes) 735 if s and not s.endswith(b'/'): 736 return s + b'/' 737 else: 738 return s 739 740 741def _mmap_do(f, sz, flags, prot, close): 742 if not sz: 743 st = os.fstat(f.fileno()) 744 sz = st.st_size 745 if not sz: 746 # trying to open a zero-length map gives an error, but an empty 747 # string has all the same behaviour of a zero-length map, ie. it has 748 # no elements :) 749 return '' 750 map = mmap.mmap(f.fileno(), sz, flags, prot) 751 if close: 752 f.close() # map will persist beyond file close 753 return map 754 755 756def mmap_read(f, sz = 0, close=True): 757 """Create a read-only memory mapped region on file 'f'. 758 If sz is 0, the region will cover the entire file. 759 """ 760 return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close) 761 762 763def mmap_readwrite(f, sz = 0, close=True): 764 """Create a read-write memory mapped region on file 'f'. 765 If sz is 0, the region will cover the entire file. 766 """ 767 return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE, 768 close) 769 770 771def mmap_readwrite_private(f, sz = 0, close=True): 772 """Create a read-write memory mapped region on file 'f'. 773 If sz is 0, the region will cover the entire file. 774 The map is private, which means the changes are never flushed back to the 775 file. 776 """ 777 return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE, 778 close) 779 780 781_mincore = getattr(_helpers, 'mincore', None) 782if _mincore: 783 # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined. 784 MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1) 785 786 _fmincore_chunk_size = None 787 def _set_fmincore_chunk_size(): 788 global _fmincore_chunk_size 789 pref_chunk_size = 64 * 1024 * 1024 790 chunk_size = sc_page_size 791 if (sc_page_size < pref_chunk_size): 792 chunk_size = sc_page_size * (pref_chunk_size // sc_page_size) 793 _fmincore_chunk_size = chunk_size 794 795 def fmincore(fd): 796 """Return the mincore() data for fd as a bytearray whose values can be 797 tested via MINCORE_INCORE, or None if fd does not fully 798 support the operation.""" 799 st = os.fstat(fd) 800 if (st.st_size == 0): 801 return bytearray(0) 802 if not _fmincore_chunk_size: 803 _set_fmincore_chunk_size() 804 pages_per_chunk = _fmincore_chunk_size // sc_page_size; 805 page_count = (st.st_size + sc_page_size - 1) // sc_page_size; 806 chunk_count = (st.st_size + _fmincore_chunk_size - 1) // _fmincore_chunk_size 807 result = bytearray(page_count) 808 for ci in compat.range(chunk_count): 809 pos = _fmincore_chunk_size * ci; 810 msize = min(_fmincore_chunk_size, st.st_size - pos) 811 try: 812 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos) 813 except mmap.error as ex: 814 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV: 815 # Perhaps the file was a pipe, i.e. "... | bup split ..." 816 return None 817 raise ex 818 try: 819 _mincore(m, msize, 0, result, ci * pages_per_chunk) 820 except OSError as ex: 821 if ex.errno == errno.ENOSYS: 822 return None 823 raise 824 return result 825 826 827def parse_timestamp(epoch_str): 828 """Return the number of nanoseconds since the epoch that are described 829by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed, 830throw a ValueError that may contain additional information.""" 831 ns_per = {'s' : 1000000000, 832 'ms' : 1000000, 833 'us' : 1000, 834 'ns' : 1} 835 match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str) 836 if not match: 837 if re.match(r'^([-+]?[0-9]+)$', epoch_str): 838 raise ValueError('must include units, i.e. 100ns, 100ms, ...') 839 raise ValueError() 840 (n, units) = match.group(1, 2) 841 if not n: 842 n = 1 843 n = int(n) 844 return n * ns_per[units] 845 846 847def parse_num(s): 848 """Parse string or bytes as a possibly unit suffixed number. 849 850 For example: 851 199.2k means 203981 bytes 852 1GB means 1073741824 bytes 853 2.1 tb means 2199023255552 bytes 854 """ 855 if isinstance(s, bytes): 856 # FIXME: should this raise a ValueError for UnicodeDecodeError 857 # (perhaps with the latter as the context). 858 s = s.decode('ascii') 859 g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s)) 860 if not g: 861 raise ValueError("can't parse %r as a number" % s) 862 (val, unit) = g.groups() 863 num = float(val) 864 unit = unit.lower() 865 if unit in ['t', 'tb']: 866 mult = 1024*1024*1024*1024 867 elif unit in ['g', 'gb']: 868 mult = 1024*1024*1024 869 elif unit in ['m', 'mb']: 870 mult = 1024*1024 871 elif unit in ['k', 'kb']: 872 mult = 1024 873 elif unit in ['', 'b']: 874 mult = 1 875 else: 876 raise ValueError("invalid unit %r in number %r" % (unit, s)) 877 return int(num*mult) 878 879 880saved_errors = [] 881def add_error(e): 882 """Append an error message to the list of saved errors. 883 884 Once processing is able to stop and output the errors, the saved errors are 885 accessible in the module variable helpers.saved_errors. 886 """ 887 saved_errors.append(e) 888 log('%-70s\n' % e) 889 890 891def clear_errors(): 892 global saved_errors 893 saved_errors = [] 894 895 896def die_if_errors(msg=None, status=1): 897 global saved_errors 898 if saved_errors: 899 if not msg: 900 msg = 'warning: %d errors encountered\n' % len(saved_errors) 901 log(msg) 902 sys.exit(status) 903 904 905def handle_ctrl_c(): 906 """Replace the default exception handler for KeyboardInterrupt (Ctrl-C). 907 908 The new exception handler will make sure that bup will exit without an ugly 909 stacktrace when Ctrl-C is hit. 910 """ 911 oldhook = sys.excepthook 912 def newhook(exctype, value, traceback): 913 if exctype == KeyboardInterrupt: 914 log('\nInterrupted.\n') 915 else: 916 return oldhook(exctype, value, traceback) 917 sys.excepthook = newhook 918 919 920def columnate(l, prefix): 921 """Format elements of 'l' in columns with 'prefix' leading each line. 922 923 The number of columns is determined automatically based on the string 924 lengths. 925 """ 926 binary = isinstance(prefix, bytes) 927 nothing = b'' if binary else '' 928 nl = b'\n' if binary else '\n' 929 if not l: 930 return nothing 931 l = l[:] 932 clen = max(len(s) for s in l) 933 ncols = (tty_width() - len(prefix)) // (clen + 2) 934 if ncols <= 1: 935 ncols = 1 936 clen = 0 937 cols = [] 938 while len(l) % ncols: 939 l.append(nothing) 940 rows = len(l) // ncols 941 for s in compat.range(0, len(l), rows): 942 cols.append(l[s:s+rows]) 943 out = nothing 944 fmt = b'%-*s' if binary else '%-*s' 945 for row in zip(*cols): 946 out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl 947 return out 948 949 950def parse_date_or_fatal(str, fatal): 951 """Parses the given date or calls Option.fatal(). 952 For now we expect a string that contains a float.""" 953 try: 954 date = float(str) 955 except ValueError as e: 956 raise fatal('invalid date format (should be a float): %r' % e) 957 else: 958 return date 959 960 961def parse_excludes(options, fatal): 962 """Traverse the options and extract all excludes, or call Option.fatal().""" 963 excluded_paths = [] 964 965 for flag in options: 966 (option, parameter) = flag 967 if option == '--exclude': 968 excluded_paths.append(resolve_parent(argv_bytes(parameter))) 969 elif option == '--exclude-from': 970 try: 971 f = open(resolve_parent(argv_bytes(parameter)), 'rb') 972 except IOError as e: 973 raise fatal("couldn't read %r" % parameter) 974 for exclude_path in f.readlines(): 975 # FIXME: perhaps this should be rstrip('\n') 976 exclude_path = resolve_parent(exclude_path.strip()) 977 if exclude_path: 978 excluded_paths.append(exclude_path) 979 return sorted(frozenset(excluded_paths)) 980 981 982def parse_rx_excludes(options, fatal): 983 """Traverse the options and extract all rx excludes, or call 984 Option.fatal().""" 985 excluded_patterns = [] 986 987 for flag in options: 988 (option, parameter) = flag 989 if option == '--exclude-rx': 990 try: 991 excluded_patterns.append(re.compile(argv_bytes(parameter))) 992 except re.error as ex: 993 fatal('invalid --exclude-rx pattern (%r): %s' % (parameter, ex)) 994 elif option == '--exclude-rx-from': 995 try: 996 f = open(resolve_parent(parameter), 'rb') 997 except IOError as e: 998 raise fatal("couldn't read %r" % parameter) 999 for pattern in f.readlines(): 1000 spattern = pattern.rstrip(b'\n') 1001 if not spattern: 1002 continue 1003 try: 1004 excluded_patterns.append(re.compile(spattern)) 1005 except re.error as ex: 1006 fatal('invalid --exclude-rx pattern (%r): %s' % (spattern, ex)) 1007 return excluded_patterns 1008 1009 1010def should_rx_exclude_path(path, exclude_rxs): 1011 """Return True if path matches a regular expression in exclude_rxs.""" 1012 for rx in exclude_rxs: 1013 if rx.search(path): 1014 debug1('Skipping %r: excluded by rx pattern %r.\n' 1015 % (path, rx.pattern)) 1016 return True 1017 return False 1018 1019 1020# FIXME: Carefully consider the use of functions (os.path.*, etc.) 1021# that resolve against the current filesystem in the strip/graft 1022# functions for example, but elsewhere as well. I suspect bup's not 1023# always being careful about that. For some cases, the contents of 1024# the current filesystem should be irrelevant, and consulting it might 1025# produce the wrong result, perhaps via unintended symlink resolution, 1026# for example. 1027 1028def path_components(path): 1029 """Break path into a list of pairs of the form (name, 1030 full_path_to_name). Path must start with '/'. 1031 Example: 1032 '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]""" 1033 if not path.startswith(b'/'): 1034 raise Exception('path must start with "/": %s' % path_msg(path)) 1035 # Since we assume path startswith('/'), we can skip the first element. 1036 result = [(b'', b'/')] 1037 norm_path = os.path.abspath(path) 1038 if norm_path == b'/': 1039 return result 1040 full_path = b'' 1041 for p in norm_path.split(b'/')[1:]: 1042 full_path += b'/' + p 1043 result.append((p, full_path)) 1044 return result 1045 1046 1047def stripped_path_components(path, strip_prefixes): 1048 """Strip any prefix in strip_prefixes from path and return a list 1049 of path components where each component is (name, 1050 none_or_full_fs_path_to_name). Assume path startswith('/'). 1051 See thelpers.py for examples.""" 1052 normalized_path = os.path.abspath(path) 1053 sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True) 1054 for bp in sorted_strip_prefixes: 1055 normalized_bp = os.path.abspath(bp) 1056 if normalized_bp == b'/': 1057 continue 1058 if normalized_path.startswith(normalized_bp): 1059 prefix = normalized_path[:len(normalized_bp)] 1060 result = [] 1061 for p in normalized_path[len(normalized_bp):].split(b'/'): 1062 if p: # not root 1063 prefix += b'/' 1064 prefix += p 1065 result.append((p, prefix)) 1066 return result 1067 # Nothing to strip. 1068 return path_components(path) 1069 1070 1071def grafted_path_components(graft_points, path): 1072 # Create a result that consists of some number of faked graft 1073 # directories before the graft point, followed by all of the real 1074 # directories from path that are after the graft point. Arrange 1075 # for the directory at the graft point in the result to correspond 1076 # to the "orig" directory in --graft orig=new. See t/thelpers.py 1077 # for some examples. 1078 1079 # Note that given --graft orig=new, orig and new have *nothing* to 1080 # do with each other, even if some of their component names 1081 # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically 1082 # equivalent to --graft /foo/bar/baz=/x/y/z, or even 1083 # /foo/bar/baz=/x. 1084 1085 # FIXME: This can't be the best solution... 1086 clean_path = os.path.abspath(path) 1087 for graft_point in graft_points: 1088 old_prefix, new_prefix = graft_point 1089 # Expand prefixes iff not absolute paths. 1090 old_prefix = os.path.normpath(old_prefix) 1091 new_prefix = os.path.normpath(new_prefix) 1092 if clean_path.startswith(old_prefix): 1093 escaped_prefix = re.escape(old_prefix) 1094 grafted_path = re.sub(br'^' + escaped_prefix, new_prefix, clean_path) 1095 # Handle /foo=/ (at least) -- which produces //whatever. 1096 grafted_path = b'/' + grafted_path.lstrip(b'/') 1097 clean_path_components = path_components(clean_path) 1098 # Count the components that were stripped. 1099 strip_count = 0 if old_prefix == b'/' else old_prefix.count(b'/') 1100 new_prefix_parts = new_prefix.split(b'/') 1101 result_prefix = grafted_path.split(b'/')[:new_prefix.count(b'/')] 1102 result = [(p, None) for p in result_prefix] \ 1103 + clean_path_components[strip_count:] 1104 # Now set the graft point name to match the end of new_prefix. 1105 graft_point = len(result_prefix) 1106 result[graft_point] = \ 1107 (new_prefix_parts[-1], clean_path_components[strip_count][1]) 1108 if new_prefix == b'/': # --graft ...=/ is a special case. 1109 return result[1:] 1110 return result 1111 return path_components(clean_path) 1112 1113 1114Sha1 = hashlib.sha1 1115 1116 1117_localtime = getattr(_helpers, 'localtime', None) 1118 1119if _localtime: 1120 bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday', 1121 'tm_hour', 'tm_min', 'tm_sec', 1122 'tm_wday', 'tm_yday', 1123 'tm_isdst', 'tm_gmtoff', 'tm_zone']) 1124 1125# Define a localtime() that returns bup_time when possible. Note: 1126# this means that any helpers.localtime() results may need to be 1127# passed through to_py_time() before being passed to python's time 1128# module, which doesn't appear willing to ignore the extra items. 1129if _localtime: 1130 def localtime(time): 1131 return bup_time(*_helpers.localtime(floor(time))) 1132 def utc_offset_str(t): 1133 """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t. 1134 If the current UTC offset does not represent an integer number 1135 of minutes, the fractional component will be truncated.""" 1136 off = localtime(t).tm_gmtoff 1137 # Note: // doesn't truncate like C for negative values, it rounds down. 1138 offmin = abs(off) // 60 1139 m = offmin % 60 1140 h = (offmin - m) // 60 1141 return b'%+03d%02d' % (-h if off < 0 else h, m) 1142 def to_py_time(x): 1143 if isinstance(x, time.struct_time): 1144 return x 1145 return time.struct_time(x[:9]) 1146else: 1147 localtime = time.localtime 1148 def utc_offset_str(t): 1149 return time.strftime(b'%z', localtime(t)) 1150 def to_py_time(x): 1151 return x 1152 1153 1154_some_invalid_save_parts_rx = re.compile(br'[\[ ~^:?*\\]|\.\.|//|@{') 1155 1156def valid_save_name(name): 1157 # Enforce a superset of the restrictions in git-check-ref-format(1) 1158 if name == b'@' \ 1159 or name.startswith(b'/') or name.endswith(b'/') \ 1160 or name.endswith(b'.'): 1161 return False 1162 if _some_invalid_save_parts_rx.search(name): 1163 return False 1164 for c in name: 1165 if byte_int(c) < 0x20 or byte_int(c) == 0x7f: 1166 return False 1167 for part in name.split(b'/'): 1168 if part.startswith(b'.') or part.endswith(b'.lock'): 1169 return False 1170 return True 1171 1172 1173_period_rx = re.compile(br'^([0-9]+)(s|min|h|d|w|m|y)$') 1174 1175def period_as_secs(s): 1176 if s == b'forever': 1177 return float('inf') 1178 match = _period_rx.match(s) 1179 if not match: 1180 return None 1181 mag = int(match.group(1)) 1182 scale = match.group(2) 1183 return mag * {b's': 1, 1184 b'min': 60, 1185 b'h': 60 * 60, 1186 b'd': 60 * 60 * 24, 1187 b'w': 60 * 60 * 24 * 7, 1188 b'm': 60 * 60 * 24 * 31, 1189 b'y': 60 * 60 * 24 * 366}[scale] 1190