1"""
2The goals of this class are:
31. To be able to use ISO-8859-1 encoding instead of CP43, and
42. Also support zip files with UTF-8 encoding, and
5
6Python 3 assumes strings must be converted to CP437 for non-ASCII characters.
7This is why there'll be decoding/encoding from/to CP437 even for ISO-8859-1
8and UTF-8 name lookup (with Python 3). Complications arise because Python
9seems to detect UTF-8 in some zip files (based on a flag) and then
10automatically convert to/from unicode.
11"""
12import os
13import sys
14import zipfile
15import zlib
16
17try:
18    from typing import List
19except ImportError:
20    from fstd.typing import List
21
22
23class ZipFile(zipfile.ZipFile):
24    def __init__(self, path: str, mode: str = "r") -> None:
25        zipfile.ZipFile.__init__(self, path, mode)
26
27    def getinfo(self, name: str) -> zipfile.ZipInfo:
28        try:
29            n = name.encode("ISO-8859-1").decode("CP437")
30            return zipfile.ZipFile.getinfo(self, n)
31        except Exception:
32            pass
33        try:
34            n = name.encode("UTF-8").decode("CP437")
35            return zipfile.ZipFile.getinfo(self, n)
36        except Exception:
37            pass
38        return zipfile.ZipFile.getinfo(self, name)
39
40    def namelist(self) -> List[str]:
41        names = zipfile.ZipFile.namelist(self)
42        for i, name in enumerate(names):
43            print("...", repr(name))
44            try:
45                name = name.encode("CP437").decode("UTF-8")
46            except Exception:
47                try:
48                    name = name.encode("CP437").decode("ISO-8859-1")
49                except Exception:
50                    pass
51            names[i] = name
52        return names
53
54
55def _get_compressor(compress_type):
56    if compress_type == zipfile.ZIP_DEFLATED:
57        return zlib.compressobj(9, zlib.DEFLATED, -15)
58    # noinspection PyProtectedMember,PyUnresolvedReferences
59    return zipfile._get_compressor_fspy(compress_type)
60
61
62def create_deterministic_archive(
63    src, dst, fix_pyc_timestamps=False, torrentzip=False
64):
65    sz = ZipFile(src, "r")
66    # Sort and remove duplicates
67    names = sorted(set(sz.namelist()), key=str.lower)
68    # FIXME: Remove non-needed empty directories
69    dz = ZipFile(dst, "w")
70    for name in names:
71        data = sz.read(name)
72        if name.endswith(".pyc") and fix_pyc_timestamps:
73            data = data[0:4] + b"\x00\x00\x00\x00" + data[8:]
74        zinfo = zipfile.ZipInfo()
75        zinfo.filename = name
76        zinfo.date_time = (1996, 12, 24, 23, 32, 00)
77        zinfo.compress_type = zipfile.ZIP_DEFLATED
78        zinfo.create_system = 0
79        zinfo.create_version = 0
80        zinfo.extract_version = 20
81        zinfo.flag_bits = 2
82        if torrentzip:
83            zinfo.external_attr = 0
84        else:
85            zinfo.external_attr = (0o644 & 0xFFFF) << 16
86            if name.endswith("/"):
87                # FIXME: Check if this is correct?
88                zinfo.external_attr |= 0x10
89        try:
90            if torrentzip:
91                # noinspection PyProtectedMember
92                zipfile._get_compressor_fspy = zipfile._get_compressor
93                zipfile._get_compressor = _get_compressor
94            dz.writestr(zinfo, data)
95        finally:
96            if torrentzip:
97                # noinspection PyProtectedMember,PyUnresolvedReferences
98                zipfile._get_compressor = zipfile._get_compressor_fspy
99                # noinspection PyProtectedMember,PyUnresolvedReferences
100                del zipfile._get_compressor_fspy
101    sz.close()
102    if torrentzip:
103        # Placeholder for the CRC-32 of the central directory records.
104        dz.comment = b"TORRENTZIPPED-XXXXXXXX"
105    dz.close()
106    if torrentzip:
107        with open(dst, "r+b") as f:
108            f.seek(-22 - 2 - 4 - 4, 2)
109            size = (
110                f.read(1)[0]
111                + f.read(1)[0] * 256
112                + f.read(1)[0] * 256 ** 2
113                + f.read(1)[0] * 256 ** 2
114            )
115            socd = (
116                f.read(1)[0]
117                + f.read(1)[0] * 256
118                + f.read(1)[0] * 256 ** 2
119                + f.read(1)[0] * 256 ** 2
120            )
121            f.seek(socd)
122            data = f.read(size)
123            checksum = zlib.crc32(data)
124            f.seek(-8, 2)
125            f.write("{:X}".format(checksum).encode("ASCII"))
126
127
128def convert_deterministic_archive(
129    src, fix_pyc_timestamps=False, torrentzip=False
130):
131    tmp = os.path.join(
132        os.path.dirname(src), ".~" + os.path.basename(src) + ".tmp"
133    )
134    try:
135        create_deterministic_archive(
136            src,
137            tmp,
138            fix_pyc_timestamps=fix_pyc_timestamps,
139            torrentzip=torrentzip,
140        )
141        os.remove(src)
142        os.rename(tmp, src)
143    finally:
144        if os.path.exists(tmp):
145            os.remove(tmp)
146
147
148def main():
149    if len(sys.argv) > 1:
150        if sys.argv[1] == "deterministic":
151            fix_pyc_timestamps = False
152            if "--fix-pyc-timestamps" in sys.argv:
153                sys.argv.remove("--fix-pyc-timestamps")
154                fix_pyc_timestamps = True
155            torrentzip = False
156            if "--torrentzip" in sys.argv:
157                sys.argv.remove("--torrentzip")
158                torrentzip = True
159            if len(sys.argv) == 3:
160                convert_deterministic_archive(
161                    sys.argv[2],
162                    torrentzip=torrentzip,
163                    fix_pyc_timestamps=fix_pyc_timestamps,
164                )
165            else:
166                create_deterministic_archive(
167                    sys.argv[2],
168                    sys.argv[3],
169                    torrentzip=torrentzip,
170                    fix_pyc_timestamps=fix_pyc_timestamps,
171                )
172
173
174if __name__ == "__main__":
175    main()
176