1from __future__ import division, print_function, unicode_literals
2
3from contextlib import closing, contextmanager
4from copy import copy
5from os import chdir, getcwd, stat, walk
6from os.path import abspath, dirname, join
7from stat import S_ISREG
8import tarfile
9try:
10    from stat import filemode
11except ImportError:  # Python 2
12    filemode = tarfile.filemode
13
14from libarchive import file_reader
15
16from . import surrogateescape
17
18
19data_dir = join(dirname(__file__), 'data')
20surrogateescape.register()
21
22
23def check_archive(archive, tree):
24    tree2 = copy(tree)
25    for e in archive:
26        epath = str(e).rstrip('/')
27        assert epath in tree2
28        estat = tree2.pop(epath)
29        assert e.mtime == int(estat['mtime'])
30        if not e.isdir:
31            size = e.size
32            if size is not None:
33                assert size == estat['size']
34            with open(epath, 'rb') as f:
35                for block in e.get_blocks():
36                    assert f.read(len(block)) == block
37                leftover = f.read()
38                assert not leftover
39
40    # Check that there are no missing directories or files
41    assert len(tree2) == 0
42
43
44def get_entries(location):
45    """
46    Using the archive file at `location`, return an iterable of name->value
47    mappings for each libarchive.ArchiveEntry objects essential attributes.
48    Paths are base64-encoded because JSON is UTF-8 and cannot handle
49    arbitrary binary pathdata.
50    """
51    with file_reader(location) as arch:
52        for entry in arch:
53            # libarchive introduces prefixes such as h prefix for
54            # hardlinks: tarfile does not, so we ignore the first char
55            mode = entry.strmode[1:].decode('ascii')
56            yield {
57                'path': surrogate_decode(entry.pathname),
58                'mtime': entry.mtime,
59                'size': entry.size,
60                'mode': mode,
61                'isreg': entry.isreg,
62                'isdir': entry.isdir,
63                'islnk': entry.islnk,
64                'issym': entry.issym,
65                'linkpath': surrogate_decode(entry.linkpath),
66                'isblk': entry.isblk,
67                'ischr': entry.ischr,
68                'isfifo': entry.isfifo,
69                'isdev': entry.isdev,
70                'uid': entry.uid,
71                'gid': entry.gid
72            }
73
74
75def get_tarinfos(location):
76    """
77    Using the tar archive file at `location`, return an iterable of
78    name->value mappings for each tarfile.TarInfo objects essential
79    attributes.
80    Paths are base64-encoded because JSON is UTF-8 and cannot handle
81    arbitrary binary pathdata.
82    """
83    with closing(tarfile.open(location)) as tar:
84        for entry in tar:
85            path = surrogate_decode(entry.path or '')
86            if entry.isdir() and not path.endswith('/'):
87                path += '/'
88            # libarchive introduces prefixes such as h prefix for
89            # hardlinks: tarfile does not, so we ignore the first char
90            mode = filemode(entry.mode)[1:]
91            yield {
92                'path': path,
93                'mtime': entry.mtime,
94                'size': entry.size,
95                'mode': mode,
96                'isreg': entry.isreg(),
97                'isdir': entry.isdir(),
98                'islnk': entry.islnk(),
99                'issym': entry.issym(),
100                'linkpath': surrogate_decode(entry.linkpath or None),
101                'isblk': entry.isblk(),
102                'ischr': entry.ischr(),
103                'isfifo': entry.isfifo(),
104                'isdev': entry.isdev(),
105                'uid': entry.uid,
106                'gid': entry.gid
107            }
108
109
110@contextmanager
111def in_dir(dirpath):
112    prev = abspath(getcwd())
113    chdir(dirpath)
114    try:
115        yield
116    finally:
117        chdir(prev)
118
119
120def stat_dict(path):
121    keys = set(('uid', 'gid', 'mtime'))
122    mode, _, _, _, uid, gid, size, _, mtime, _ = stat(path)
123    if S_ISREG(mode):
124        keys.add('size')
125    return {k: v for k, v in locals().items() if k in keys}
126
127
128def treestat(d, stat_dict=stat_dict):
129    r = {}
130    for dirpath, dirnames, filenames in walk(d):
131        r[dirpath] = stat_dict(dirpath)
132        for fname in filenames:
133            fpath = join(dirpath, fname)
134            r[fpath] = stat_dict(fpath)
135    return r
136
137
138def surrogate_decode(o):
139    if isinstance(o, bytes):
140        return o.decode('utf8', errors='surrogateescape')
141    return o
142