1""" 2The goals of this class are: 31. To be able to use ISO-8859-1 encoding instead of CP43, and 42. Also support zip files with UTF-8 encoding, and 5 6Python 3 assumes strings must be converted to CP437 for non-ASCII characters. 7This is why there'll be decoding/encoding from/to CP437 even for ISO-8859-1 8and UTF-8 name lookup (with Python 3). Complications arise because Python 9seems to detect UTF-8 in some zip files (based on a flag) and then 10automatically convert to/from unicode. 11""" 12import os 13import sys 14import zipfile 15import zlib 16 17try: 18 from typing import List 19except ImportError: 20 from fstd.typing import List 21 22 23class ZipFile(zipfile.ZipFile): 24 def __init__(self, path: str, mode: str = "r") -> None: 25 zipfile.ZipFile.__init__(self, path, mode) 26 27 def getinfo(self, name: str) -> zipfile.ZipInfo: 28 try: 29 n = name.encode("ISO-8859-1").decode("CP437") 30 return zipfile.ZipFile.getinfo(self, n) 31 except Exception: 32 pass 33 try: 34 n = name.encode("UTF-8").decode("CP437") 35 return zipfile.ZipFile.getinfo(self, n) 36 except Exception: 37 pass 38 return zipfile.ZipFile.getinfo(self, name) 39 40 def namelist(self) -> List[str]: 41 names = zipfile.ZipFile.namelist(self) 42 for i, name in enumerate(names): 43 print("...", repr(name)) 44 try: 45 name = name.encode("CP437").decode("UTF-8") 46 except Exception: 47 try: 48 name = name.encode("CP437").decode("ISO-8859-1") 49 except Exception: 50 pass 51 names[i] = name 52 return names 53 54 55def _get_compressor(compress_type): 56 if compress_type == zipfile.ZIP_DEFLATED: 57 return zlib.compressobj(9, zlib.DEFLATED, -15) 58 # noinspection PyProtectedMember,PyUnresolvedReferences 59 return zipfile._get_compressor_fspy(compress_type) 60 61 62def create_deterministic_archive( 63 src, dst, fix_pyc_timestamps=False, torrentzip=False 64): 65 sz = ZipFile(src, "r") 66 # Sort and remove duplicates 67 names = sorted(set(sz.namelist()), key=str.lower) 68 # FIXME: Remove non-needed empty directories 69 dz = ZipFile(dst, "w") 70 for name in names: 71 data = sz.read(name) 72 if name.endswith(".pyc") and fix_pyc_timestamps: 73 data = data[0:4] + b"\x00\x00\x00\x00" + data[8:] 74 zinfo = zipfile.ZipInfo() 75 zinfo.filename = name 76 zinfo.date_time = (1996, 12, 24, 23, 32, 00) 77 zinfo.compress_type = zipfile.ZIP_DEFLATED 78 zinfo.create_system = 0 79 zinfo.create_version = 0 80 zinfo.extract_version = 20 81 zinfo.flag_bits = 2 82 if torrentzip: 83 zinfo.external_attr = 0 84 else: 85 zinfo.external_attr = (0o644 & 0xFFFF) << 16 86 if name.endswith("/"): 87 # FIXME: Check if this is correct? 88 zinfo.external_attr |= 0x10 89 try: 90 if torrentzip: 91 # noinspection PyProtectedMember 92 zipfile._get_compressor_fspy = zipfile._get_compressor 93 zipfile._get_compressor = _get_compressor 94 dz.writestr(zinfo, data) 95 finally: 96 if torrentzip: 97 # noinspection PyProtectedMember,PyUnresolvedReferences 98 zipfile._get_compressor = zipfile._get_compressor_fspy 99 # noinspection PyProtectedMember,PyUnresolvedReferences 100 del zipfile._get_compressor_fspy 101 sz.close() 102 if torrentzip: 103 # Placeholder for the CRC-32 of the central directory records. 104 dz.comment = b"TORRENTZIPPED-XXXXXXXX" 105 dz.close() 106 if torrentzip: 107 with open(dst, "r+b") as f: 108 f.seek(-22 - 2 - 4 - 4, 2) 109 size = ( 110 f.read(1)[0] 111 + f.read(1)[0] * 256 112 + f.read(1)[0] * 256 ** 2 113 + f.read(1)[0] * 256 ** 2 114 ) 115 socd = ( 116 f.read(1)[0] 117 + f.read(1)[0] * 256 118 + f.read(1)[0] * 256 ** 2 119 + f.read(1)[0] * 256 ** 2 120 ) 121 f.seek(socd) 122 data = f.read(size) 123 checksum = zlib.crc32(data) 124 f.seek(-8, 2) 125 f.write("{:X}".format(checksum).encode("ASCII")) 126 127 128def convert_deterministic_archive( 129 src, fix_pyc_timestamps=False, torrentzip=False 130): 131 tmp = os.path.join( 132 os.path.dirname(src), ".~" + os.path.basename(src) + ".tmp" 133 ) 134 try: 135 create_deterministic_archive( 136 src, 137 tmp, 138 fix_pyc_timestamps=fix_pyc_timestamps, 139 torrentzip=torrentzip, 140 ) 141 os.remove(src) 142 os.rename(tmp, src) 143 finally: 144 if os.path.exists(tmp): 145 os.remove(tmp) 146 147 148def main(): 149 if len(sys.argv) > 1: 150 if sys.argv[1] == "deterministic": 151 fix_pyc_timestamps = False 152 if "--fix-pyc-timestamps" in sys.argv: 153 sys.argv.remove("--fix-pyc-timestamps") 154 fix_pyc_timestamps = True 155 torrentzip = False 156 if "--torrentzip" in sys.argv: 157 sys.argv.remove("--torrentzip") 158 torrentzip = True 159 if len(sys.argv) == 3: 160 convert_deterministic_archive( 161 sys.argv[2], 162 torrentzip=torrentzip, 163 fix_pyc_timestamps=fix_pyc_timestamps, 164 ) 165 else: 166 create_deterministic_archive( 167 sys.argv[2], 168 sys.argv[3], 169 torrentzip=torrentzip, 170 fix_pyc_timestamps=fix_pyc_timestamps, 171 ) 172 173 174if __name__ == "__main__": 175 main() 176