1from __future__ import division, print_function, unicode_literals 2 3from contextlib import closing, contextmanager 4from copy import copy 5from os import chdir, getcwd, stat, walk 6from os.path import abspath, dirname, join 7from stat import S_ISREG 8import tarfile 9try: 10 from stat import filemode 11except ImportError: # Python 2 12 filemode = tarfile.filemode 13 14from libarchive import file_reader 15 16from . import surrogateescape 17 18 19data_dir = join(dirname(__file__), 'data') 20surrogateescape.register() 21 22 23def check_archive(archive, tree): 24 tree2 = copy(tree) 25 for e in archive: 26 epath = str(e).rstrip('/') 27 assert epath in tree2 28 estat = tree2.pop(epath) 29 assert e.mtime == int(estat['mtime']) 30 if not e.isdir: 31 size = e.size 32 if size is not None: 33 assert size == estat['size'] 34 with open(epath, 'rb') as f: 35 for block in e.get_blocks(): 36 assert f.read(len(block)) == block 37 leftover = f.read() 38 assert not leftover 39 40 # Check that there are no missing directories or files 41 assert len(tree2) == 0 42 43 44def get_entries(location): 45 """ 46 Using the archive file at `location`, return an iterable of name->value 47 mappings for each libarchive.ArchiveEntry objects essential attributes. 48 Paths are base64-encoded because JSON is UTF-8 and cannot handle 49 arbitrary binary pathdata. 50 """ 51 with file_reader(location) as arch: 52 for entry in arch: 53 # libarchive introduces prefixes such as h prefix for 54 # hardlinks: tarfile does not, so we ignore the first char 55 mode = entry.strmode[1:].decode('ascii') 56 yield { 57 'path': surrogate_decode(entry.pathname), 58 'mtime': entry.mtime, 59 'size': entry.size, 60 'mode': mode, 61 'isreg': entry.isreg, 62 'isdir': entry.isdir, 63 'islnk': entry.islnk, 64 'issym': entry.issym, 65 'linkpath': surrogate_decode(entry.linkpath), 66 'isblk': entry.isblk, 67 'ischr': entry.ischr, 68 'isfifo': entry.isfifo, 69 'isdev': entry.isdev, 70 'uid': entry.uid, 71 'gid': entry.gid 72 } 73 74 75def get_tarinfos(location): 76 """ 77 Using the tar archive file at `location`, return an iterable of 78 name->value mappings for each tarfile.TarInfo objects essential 79 attributes. 80 Paths are base64-encoded because JSON is UTF-8 and cannot handle 81 arbitrary binary pathdata. 82 """ 83 with closing(tarfile.open(location)) as tar: 84 for entry in tar: 85 path = surrogate_decode(entry.path or '') 86 if entry.isdir() and not path.endswith('/'): 87 path += '/' 88 # libarchive introduces prefixes such as h prefix for 89 # hardlinks: tarfile does not, so we ignore the first char 90 mode = filemode(entry.mode)[1:] 91 yield { 92 'path': path, 93 'mtime': entry.mtime, 94 'size': entry.size, 95 'mode': mode, 96 'isreg': entry.isreg(), 97 'isdir': entry.isdir(), 98 'islnk': entry.islnk(), 99 'issym': entry.issym(), 100 'linkpath': surrogate_decode(entry.linkpath or None), 101 'isblk': entry.isblk(), 102 'ischr': entry.ischr(), 103 'isfifo': entry.isfifo(), 104 'isdev': entry.isdev(), 105 'uid': entry.uid, 106 'gid': entry.gid 107 } 108 109 110@contextmanager 111def in_dir(dirpath): 112 prev = abspath(getcwd()) 113 chdir(dirpath) 114 try: 115 yield 116 finally: 117 chdir(prev) 118 119 120def stat_dict(path): 121 keys = set(('uid', 'gid', 'mtime')) 122 mode, _, _, _, uid, gid, size, _, mtime, _ = stat(path) 123 if S_ISREG(mode): 124 keys.add('size') 125 return {k: v for k, v in locals().items() if k in keys} 126 127 128def treestat(d, stat_dict=stat_dict): 129 r = {} 130 for dirpath, dirnames, filenames in walk(d): 131 r[dirpath] = stat_dict(dirpath) 132 for fname in filenames: 133 fpath = join(dirpath, fname) 134 r[fpath] = stat_dict(fpath) 135 return r 136 137 138def surrogate_decode(o): 139 if isinstance(o, bytes): 140 return o.decode('utf8', errors='surrogateescape') 141 return o 142