1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import io 7import os 8import importlib.util 9import sys 10import time 11import stat 12import shutil 13import struct 14import binascii 15import threading 16 17try: 18 import zlib # We may need its compression method 19 crc32 = zlib.crc32 20except ImportError: 21 zlib = None 22 crc32 = binascii.crc32 23 24try: 25 import bz2 # We may need its compression method 26except ImportError: 27 bz2 = None 28 29try: 30 import lzma # We may need its compression method 31except ImportError: 32 lzma = None 33 34__all__ = ["BadZipFile", "BadZipfile", "error", 35 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 36 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"] 37 38class BadZipFile(Exception): 39 pass 40 41 42class LargeZipFile(Exception): 43 """ 44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 45 and those extensions are disabled. 46 """ 47 48error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 49 50 51ZIP64_LIMIT = (1 << 31) - 1 52ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 53ZIP_MAX_COMMENT = (1 << 16) - 1 54 55# constants for Zip file compression methods 56ZIP_STORED = 0 57ZIP_DEFLATED = 8 58ZIP_BZIP2 = 12 59ZIP_LZMA = 14 60# Other ZIP compression methods not supported 61 62DEFAULT_VERSION = 20 63ZIP64_VERSION = 45 64BZIP2_VERSION = 46 65LZMA_VERSION = 63 66# we recognize (but not necessarily support) all features up to that version 67MAX_EXTRACT_VERSION = 63 68 69# Below are some formats and associated data for reading/writing headers using 70# the struct module. The names and structures of headers/records are those used 71# in the PKWARE description of the ZIP file format: 72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 73# (URL valid as of January 2008) 74 75# The "end of central directory" structure, magic number, size, and indices 76# (section V.I in the format document) 77structEndArchive = b"<4s4H2LH" 78stringEndArchive = b"PK\005\006" 79sizeEndCentDir = struct.calcsize(structEndArchive) 80 81_ECD_SIGNATURE = 0 82_ECD_DISK_NUMBER = 1 83_ECD_DISK_START = 2 84_ECD_ENTRIES_THIS_DISK = 3 85_ECD_ENTRIES_TOTAL = 4 86_ECD_SIZE = 5 87_ECD_OFFSET = 6 88_ECD_COMMENT_SIZE = 7 89# These last two indices are not part of the structure as defined in the 90# spec, but they are used internally by this module as a convenience 91_ECD_COMMENT = 8 92_ECD_LOCATION = 9 93 94# The "central directory" structure, magic number, size, and indices 95# of entries in the structure (section V.F in the format document) 96structCentralDir = "<4s4B4HL2L5H2L" 97stringCentralDir = b"PK\001\002" 98sizeCentralDir = struct.calcsize(structCentralDir) 99 100# indexes of entries in the central directory structure 101_CD_SIGNATURE = 0 102_CD_CREATE_VERSION = 1 103_CD_CREATE_SYSTEM = 2 104_CD_EXTRACT_VERSION = 3 105_CD_EXTRACT_SYSTEM = 4 106_CD_FLAG_BITS = 5 107_CD_COMPRESS_TYPE = 6 108_CD_TIME = 7 109_CD_DATE = 8 110_CD_CRC = 9 111_CD_COMPRESSED_SIZE = 10 112_CD_UNCOMPRESSED_SIZE = 11 113_CD_FILENAME_LENGTH = 12 114_CD_EXTRA_FIELD_LENGTH = 13 115_CD_COMMENT_LENGTH = 14 116_CD_DISK_NUMBER_START = 15 117_CD_INTERNAL_FILE_ATTRIBUTES = 16 118_CD_EXTERNAL_FILE_ATTRIBUTES = 17 119_CD_LOCAL_HEADER_OFFSET = 18 120 121# The "local file header" structure, magic number, size, and indices 122# (section V.A in the format document) 123structFileHeader = "<4s2B4HL2L2H" 124stringFileHeader = b"PK\003\004" 125sizeFileHeader = struct.calcsize(structFileHeader) 126 127_FH_SIGNATURE = 0 128_FH_EXTRACT_VERSION = 1 129_FH_EXTRACT_SYSTEM = 2 130_FH_GENERAL_PURPOSE_FLAG_BITS = 3 131_FH_COMPRESSION_METHOD = 4 132_FH_LAST_MOD_TIME = 5 133_FH_LAST_MOD_DATE = 6 134_FH_CRC = 7 135_FH_COMPRESSED_SIZE = 8 136_FH_UNCOMPRESSED_SIZE = 9 137_FH_FILENAME_LENGTH = 10 138_FH_EXTRA_FIELD_LENGTH = 11 139 140# The "Zip64 end of central directory locator" structure, magic number, and size 141structEndArchive64Locator = "<4sLQL" 142stringEndArchive64Locator = b"PK\x06\x07" 143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 144 145# The "Zip64 end of central directory" record, magic number, size, and indices 146# (section V.G in the format document) 147structEndArchive64 = "<4sQ2H2L4Q" 148stringEndArchive64 = b"PK\x06\x06" 149sizeEndCentDir64 = struct.calcsize(structEndArchive64) 150 151_CD64_SIGNATURE = 0 152_CD64_DIRECTORY_RECSIZE = 1 153_CD64_CREATE_VERSION = 2 154_CD64_EXTRACT_VERSION = 3 155_CD64_DISK_NUMBER = 4 156_CD64_DISK_NUMBER_START = 5 157_CD64_NUMBER_ENTRIES_THIS_DISK = 6 158_CD64_NUMBER_ENTRIES_TOTAL = 7 159_CD64_DIRECTORY_SIZE = 8 160_CD64_OFFSET_START_CENTDIR = 9 161 162_DD_SIGNATURE = 0x08074b50 163 164_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 165 166def _strip_extra(extra, xids): 167 # Remove Extra Fields with specified IDs. 168 unpack = _EXTRA_FIELD_STRUCT.unpack 169 modified = False 170 buffer = [] 171 start = i = 0 172 while i + 4 <= len(extra): 173 xid, xlen = unpack(extra[i : i + 4]) 174 j = i + 4 + xlen 175 if xid in xids: 176 if i != start: 177 buffer.append(extra[start : i]) 178 start = j 179 modified = True 180 i = j 181 if not modified: 182 return extra 183 return b''.join(buffer) 184 185def _check_zipfile(fp): 186 try: 187 if _EndRecData(fp): 188 return True # file has correct magic number 189 except OSError: 190 pass 191 return False 192 193def is_zipfile(filename): 194 """Quickly see if a file is a ZIP file by checking the magic number. 195 196 The filename argument may be a file or file-like object too. 197 """ 198 result = False 199 try: 200 if hasattr(filename, "read"): 201 result = _check_zipfile(fp=filename) 202 else: 203 with open(filename, "rb") as fp: 204 result = _check_zipfile(fp) 205 except OSError: 206 pass 207 return result 208 209def _EndRecData64(fpin, offset, endrec): 210 """ 211 Read the ZIP64 end-of-archive records and use that to update endrec 212 """ 213 try: 214 fpin.seek(offset - sizeEndCentDir64Locator, 2) 215 except OSError: 216 # If the seek fails, the file is not large enough to contain a ZIP64 217 # end-of-archive record, so just return the end record we were given. 218 return endrec 219 220 data = fpin.read(sizeEndCentDir64Locator) 221 if len(data) != sizeEndCentDir64Locator: 222 return endrec 223 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 224 if sig != stringEndArchive64Locator: 225 return endrec 226 227 if diskno != 0 or disks > 1: 228 raise BadZipFile("zipfiles that span multiple disks are not supported") 229 230 # Assume no 'zip64 extensible data' 231 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 232 data = fpin.read(sizeEndCentDir64) 233 if len(data) != sizeEndCentDir64: 234 return endrec 235 sig, sz, create_version, read_version, disk_num, disk_dir, \ 236 dircount, dircount2, dirsize, diroffset = \ 237 struct.unpack(structEndArchive64, data) 238 if sig != stringEndArchive64: 239 return endrec 240 241 # Update the original endrec using data from the ZIP64 record 242 endrec[_ECD_SIGNATURE] = sig 243 endrec[_ECD_DISK_NUMBER] = disk_num 244 endrec[_ECD_DISK_START] = disk_dir 245 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 246 endrec[_ECD_ENTRIES_TOTAL] = dircount2 247 endrec[_ECD_SIZE] = dirsize 248 endrec[_ECD_OFFSET] = diroffset 249 return endrec 250 251 252def _EndRecData(fpin): 253 """Return data from the "End of Central Directory" record, or None. 254 255 The data is a list of the nine items in the ZIP "End of central dir" 256 record followed by a tenth item, the file seek offset of this record.""" 257 258 # Determine file size 259 fpin.seek(0, 2) 260 filesize = fpin.tell() 261 262 # Check to see if this is ZIP file with no archive comment (the 263 # "end of central directory" structure should be the last item in the 264 # file if this is the case). 265 try: 266 fpin.seek(-sizeEndCentDir, 2) 267 except OSError: 268 return None 269 data = fpin.read() 270 if (len(data) == sizeEndCentDir and 271 data[0:4] == stringEndArchive and 272 data[-2:] == b"\000\000"): 273 # the signature is correct and there's no comment, unpack structure 274 endrec = struct.unpack(structEndArchive, data) 275 endrec=list(endrec) 276 277 # Append a blank comment and record start offset 278 endrec.append(b"") 279 endrec.append(filesize - sizeEndCentDir) 280 281 # Try to read the "Zip64 end of central directory" structure 282 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 283 284 # Either this is not a ZIP file, or it is a ZIP file with an archive 285 # comment. Search the end of the file for the "end of central directory" 286 # record signature. The comment is the last item in the ZIP file and may be 287 # up to 64K long. It is assumed that the "end of central directory" magic 288 # number does not appear in the comment. 289 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 290 fpin.seek(maxCommentStart, 0) 291 data = fpin.read() 292 start = data.rfind(stringEndArchive) 293 if start >= 0: 294 # found the magic number; attempt to unpack and interpret 295 recData = data[start:start+sizeEndCentDir] 296 if len(recData) != sizeEndCentDir: 297 # Zip file is corrupted. 298 return None 299 endrec = list(struct.unpack(structEndArchive, recData)) 300 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 301 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 302 endrec.append(comment) 303 endrec.append(maxCommentStart + start) 304 305 # Try to read the "Zip64 end of central directory" structure 306 return _EndRecData64(fpin, maxCommentStart + start - filesize, 307 endrec) 308 309 # Unable to find a valid end of central directory structure 310 return None 311 312 313class ZipInfo (object): 314 """Class with attributes describing each file in the ZIP archive.""" 315 316 __slots__ = ( 317 'orig_filename', 318 'filename', 319 'date_time', 320 'compress_type', 321 '_compresslevel', 322 'comment', 323 'extra', 324 'create_system', 325 'create_version', 326 'extract_version', 327 'reserved', 328 'flag_bits', 329 'volume', 330 'internal_attr', 331 'external_attr', 332 'header_offset', 333 'CRC', 334 'compress_size', 335 'file_size', 336 '_raw_time', 337 ) 338 339 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 340 self.orig_filename = filename # Original file name in archive 341 342 # Terminate the file name at the first null byte. Null bytes in file 343 # names are used as tricks by viruses in archives. 344 null_byte = filename.find(chr(0)) 345 if null_byte >= 0: 346 filename = filename[0:null_byte] 347 # This is used to ensure paths in generated ZIP files always use 348 # forward slashes as the directory separator, as required by the 349 # ZIP format specification. 350 if os.sep != "/" and os.sep in filename: 351 filename = filename.replace(os.sep, "/") 352 353 self.filename = filename # Normalized file name 354 self.date_time = date_time # year, month, day, hour, min, sec 355 356 if date_time[0] < 1980: 357 raise ValueError('ZIP does not support timestamps before 1980') 358 359 # Standard values: 360 self.compress_type = ZIP_STORED # Type of compression for the file 361 self._compresslevel = None # Level for the compressor 362 self.comment = b"" # Comment for each file 363 self.extra = b"" # ZIP extra data 364 if sys.platform == 'win32': 365 self.create_system = 0 # System which created ZIP archive 366 else: 367 # Assume everything else is unix-y 368 self.create_system = 3 # System which created ZIP archive 369 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 370 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 371 self.reserved = 0 # Must be zero 372 self.flag_bits = 0 # ZIP flag bits 373 self.volume = 0 # Volume number of file header 374 self.internal_attr = 0 # Internal attributes 375 self.external_attr = 0 # External file attributes 376 # Other attributes are set by class ZipFile: 377 # header_offset Byte offset to the file header 378 # CRC CRC-32 of the uncompressed file 379 # compress_size Size of the compressed file 380 # file_size Size of the uncompressed file 381 382 def __repr__(self): 383 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 384 if self.compress_type != ZIP_STORED: 385 result.append(' compress_type=%s' % 386 compressor_names.get(self.compress_type, 387 self.compress_type)) 388 hi = self.external_attr >> 16 389 lo = self.external_attr & 0xFFFF 390 if hi: 391 result.append(' filemode=%r' % stat.filemode(hi)) 392 if lo: 393 result.append(' external_attr=%#x' % lo) 394 isdir = self.is_dir() 395 if not isdir or self.file_size: 396 result.append(' file_size=%r' % self.file_size) 397 if ((not isdir or self.compress_size) and 398 (self.compress_type != ZIP_STORED or 399 self.file_size != self.compress_size)): 400 result.append(' compress_size=%r' % self.compress_size) 401 result.append('>') 402 return ''.join(result) 403 404 def FileHeader(self, zip64=None): 405 """Return the per-file header as a bytes object.""" 406 dt = self.date_time 407 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 408 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 409 if self.flag_bits & 0x08: 410 # Set these to zero because we write them after the file data 411 CRC = compress_size = file_size = 0 412 else: 413 CRC = self.CRC 414 compress_size = self.compress_size 415 file_size = self.file_size 416 417 extra = self.extra 418 419 min_version = 0 420 if zip64 is None: 421 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 422 if zip64: 423 fmt = '<HHQQ' 424 extra = extra + struct.pack(fmt, 425 1, struct.calcsize(fmt)-4, file_size, compress_size) 426 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 427 if not zip64: 428 raise LargeZipFile("Filesize would require ZIP64 extensions") 429 # File is larger than what fits into a 4 byte integer, 430 # fall back to the ZIP64 extension 431 file_size = 0xffffffff 432 compress_size = 0xffffffff 433 min_version = ZIP64_VERSION 434 435 if self.compress_type == ZIP_BZIP2: 436 min_version = max(BZIP2_VERSION, min_version) 437 elif self.compress_type == ZIP_LZMA: 438 min_version = max(LZMA_VERSION, min_version) 439 440 self.extract_version = max(min_version, self.extract_version) 441 self.create_version = max(min_version, self.create_version) 442 filename, flag_bits = self._encodeFilenameFlags() 443 header = struct.pack(structFileHeader, stringFileHeader, 444 self.extract_version, self.reserved, flag_bits, 445 self.compress_type, dostime, dosdate, CRC, 446 compress_size, file_size, 447 len(filename), len(extra)) 448 return header + filename + extra 449 450 def _encodeFilenameFlags(self): 451 try: 452 return self.filename.encode('ascii'), self.flag_bits 453 except UnicodeEncodeError: 454 return self.filename.encode('utf-8'), self.flag_bits | 0x800 455 456 def _decodeExtra(self): 457 # Try to decode the extra field. 458 extra = self.extra 459 unpack = struct.unpack 460 while len(extra) >= 4: 461 tp, ln = unpack('<HH', extra[:4]) 462 if ln+4 > len(extra): 463 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 464 if tp == 0x0001: 465 if ln >= 24: 466 counts = unpack('<QQQ', extra[4:28]) 467 elif ln == 16: 468 counts = unpack('<QQ', extra[4:20]) 469 elif ln == 8: 470 counts = unpack('<Q', extra[4:12]) 471 elif ln == 0: 472 counts = () 473 else: 474 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 475 476 idx = 0 477 478 # ZIP64 extension (large files and/or large archives) 479 if self.file_size in (0xffffffffffffffff, 0xffffffff): 480 if len(counts) <= idx: 481 raise BadZipFile( 482 "Corrupt zip64 extra field. File size not found." 483 ) 484 self.file_size = counts[idx] 485 idx += 1 486 487 if self.compress_size == 0xFFFFFFFF: 488 if len(counts) <= idx: 489 raise BadZipFile( 490 "Corrupt zip64 extra field. Compress size not found." 491 ) 492 self.compress_size = counts[idx] 493 idx += 1 494 495 if self.header_offset == 0xffffffff: 496 if len(counts) <= idx: 497 raise BadZipFile( 498 "Corrupt zip64 extra field. Header offset not found." 499 ) 500 old = self.header_offset 501 self.header_offset = counts[idx] 502 idx+=1 503 504 extra = extra[ln+4:] 505 506 @classmethod 507 def from_file(cls, filename, arcname=None): 508 """Construct an appropriate ZipInfo for a file on the filesystem. 509 510 filename should be the path to a file or directory on the filesystem. 511 512 arcname is the name which it will have within the archive (by default, 513 this will be the same as filename, but without a drive letter and with 514 leading path separators removed). 515 """ 516 if isinstance(filename, os.PathLike): 517 filename = os.fspath(filename) 518 st = os.stat(filename) 519 isdir = stat.S_ISDIR(st.st_mode) 520 mtime = time.localtime(st.st_mtime) 521 date_time = mtime[0:6] 522 # Create ZipInfo instance to store file information 523 if arcname is None: 524 arcname = filename 525 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 526 while arcname[0] in (os.sep, os.altsep): 527 arcname = arcname[1:] 528 if isdir: 529 arcname += '/' 530 zinfo = cls(arcname, date_time) 531 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 532 if isdir: 533 zinfo.file_size = 0 534 zinfo.external_attr |= 0x10 # MS-DOS directory flag 535 else: 536 zinfo.file_size = st.st_size 537 538 return zinfo 539 540 def is_dir(self): 541 """Return True if this archive member is a directory.""" 542 return self.filename[-1] == '/' 543 544 545# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 546# internal keys. We noticed that a direct implementation is faster than 547# relying on binascii.crc32(). 548 549_crctable = None 550def _gen_crc(crc): 551 for j in range(8): 552 if crc & 1: 553 crc = (crc >> 1) ^ 0xEDB88320 554 else: 555 crc >>= 1 556 return crc 557 558# ZIP supports a password-based form of encryption. Even though known 559# plaintext attacks have been found against it, it is still useful 560# to be able to get data out of such a file. 561# 562# Usage: 563# zd = _ZipDecrypter(mypwd) 564# plain_bytes = zd(cypher_bytes) 565 566def _ZipDecrypter(pwd): 567 key0 = 305419896 568 key1 = 591751049 569 key2 = 878082192 570 571 global _crctable 572 if _crctable is None: 573 _crctable = list(map(_gen_crc, range(256))) 574 crctable = _crctable 575 576 def crc32(ch, crc): 577 """Compute the CRC32 primitive on one byte.""" 578 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 579 580 def update_keys(c): 581 nonlocal key0, key1, key2 582 key0 = crc32(c, key0) 583 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 584 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 585 key2 = crc32(key1 >> 24, key2) 586 587 for p in pwd: 588 update_keys(p) 589 590 def decrypter(data): 591 """Decrypt a bytes object.""" 592 result = bytearray() 593 append = result.append 594 for c in data: 595 k = key2 | 2 596 c ^= ((k * (k^1)) >> 8) & 0xFF 597 update_keys(c) 598 append(c) 599 return bytes(result) 600 601 return decrypter 602 603 604class LZMACompressor: 605 606 def __init__(self): 607 self._comp = None 608 609 def _init(self): 610 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 611 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 612 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 613 ]) 614 return struct.pack('<BBH', 9, 4, len(props)) + props 615 616 def compress(self, data): 617 if self._comp is None: 618 return self._init() + self._comp.compress(data) 619 return self._comp.compress(data) 620 621 def flush(self): 622 if self._comp is None: 623 return self._init() + self._comp.flush() 624 return self._comp.flush() 625 626 627class LZMADecompressor: 628 629 def __init__(self): 630 self._decomp = None 631 self._unconsumed = b'' 632 self.eof = False 633 634 def decompress(self, data): 635 if self._decomp is None: 636 self._unconsumed += data 637 if len(self._unconsumed) <= 4: 638 return b'' 639 psize, = struct.unpack('<H', self._unconsumed[2:4]) 640 if len(self._unconsumed) <= 4 + psize: 641 return b'' 642 643 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 644 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 645 self._unconsumed[4:4 + psize]) 646 ]) 647 data = self._unconsumed[4 + psize:] 648 del self._unconsumed 649 650 result = self._decomp.decompress(data) 651 self.eof = self._decomp.eof 652 return result 653 654 655compressor_names = { 656 0: 'store', 657 1: 'shrink', 658 2: 'reduce', 659 3: 'reduce', 660 4: 'reduce', 661 5: 'reduce', 662 6: 'implode', 663 7: 'tokenize', 664 8: 'deflate', 665 9: 'deflate64', 666 10: 'implode', 667 12: 'bzip2', 668 14: 'lzma', 669 18: 'terse', 670 19: 'lz77', 671 97: 'wavpack', 672 98: 'ppmd', 673} 674 675def _check_compression(compression): 676 if compression == ZIP_STORED: 677 pass 678 elif compression == ZIP_DEFLATED: 679 if not zlib: 680 raise RuntimeError( 681 "Compression requires the (missing) zlib module") 682 elif compression == ZIP_BZIP2: 683 if not bz2: 684 raise RuntimeError( 685 "Compression requires the (missing) bz2 module") 686 elif compression == ZIP_LZMA: 687 if not lzma: 688 raise RuntimeError( 689 "Compression requires the (missing) lzma module") 690 else: 691 raise NotImplementedError("That compression method is not supported") 692 693 694def _get_compressor(compress_type, compresslevel=None): 695 if compress_type == ZIP_DEFLATED: 696 if compresslevel is not None: 697 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 698 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 699 elif compress_type == ZIP_BZIP2: 700 if compresslevel is not None: 701 return bz2.BZ2Compressor(compresslevel) 702 return bz2.BZ2Compressor() 703 # compresslevel is ignored for ZIP_LZMA 704 elif compress_type == ZIP_LZMA: 705 return LZMACompressor() 706 else: 707 return None 708 709 710def _get_decompressor(compress_type): 711 if compress_type == ZIP_STORED: 712 return None 713 elif compress_type == ZIP_DEFLATED: 714 return zlib.decompressobj(-15) 715 elif compress_type == ZIP_BZIP2: 716 return bz2.BZ2Decompressor() 717 elif compress_type == ZIP_LZMA: 718 return LZMADecompressor() 719 else: 720 descr = compressor_names.get(compress_type) 721 if descr: 722 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 723 else: 724 raise NotImplementedError("compression type %d" % (compress_type,)) 725 726 727class _SharedFile: 728 def __init__(self, file, pos, close, lock, writing): 729 self._file = file 730 self._pos = pos 731 self._close = close 732 self._lock = lock 733 self._writing = writing 734 self.seekable = file.seekable 735 self.tell = file.tell 736 737 def seek(self, offset, whence=0): 738 with self._lock: 739 if self._writing(): 740 raise ValueError("Can't reposition in the ZIP file while " 741 "there is an open writing handle on it. " 742 "Close the writing handle before trying to read.") 743 self._file.seek(offset, whence) 744 self._pos = self._file.tell() 745 return self._pos 746 747 def read(self, n=-1): 748 with self._lock: 749 if self._writing(): 750 raise ValueError("Can't read from the ZIP file while there " 751 "is an open writing handle on it. " 752 "Close the writing handle before trying to read.") 753 self._file.seek(self._pos) 754 data = self._file.read(n) 755 self._pos = self._file.tell() 756 return data 757 758 def close(self): 759 if self._file is not None: 760 fileobj = self._file 761 self._file = None 762 self._close(fileobj) 763 764# Provide the tell method for unseekable stream 765class _Tellable: 766 def __init__(self, fp): 767 self.fp = fp 768 self.offset = 0 769 770 def write(self, data): 771 n = self.fp.write(data) 772 self.offset += n 773 return n 774 775 def tell(self): 776 return self.offset 777 778 def flush(self): 779 self.fp.flush() 780 781 def close(self): 782 self.fp.close() 783 784 785class ZipExtFile(io.BufferedIOBase): 786 """File-like object for reading an archive member. 787 Is returned by ZipFile.open(). 788 """ 789 790 # Max size supported by decompressor. 791 MAX_N = 1 << 31 - 1 792 793 # Read from compressed files in 4k blocks. 794 MIN_READ_SIZE = 4096 795 796 # Chunk size to read during seek 797 MAX_SEEK_READ = 1 << 24 798 799 def __init__(self, fileobj, mode, zipinfo, pwd=None, 800 close_fileobj=False): 801 self._fileobj = fileobj 802 self._pwd = pwd 803 self._close_fileobj = close_fileobj 804 805 self._compress_type = zipinfo.compress_type 806 self._compress_left = zipinfo.compress_size 807 self._left = zipinfo.file_size 808 809 self._decompressor = _get_decompressor(self._compress_type) 810 811 self._eof = False 812 self._readbuffer = b'' 813 self._offset = 0 814 815 self.newlines = None 816 817 self.mode = mode 818 self.name = zipinfo.filename 819 820 if hasattr(zipinfo, 'CRC'): 821 self._expected_crc = zipinfo.CRC 822 self._running_crc = crc32(b'') 823 else: 824 self._expected_crc = None 825 826 self._seekable = False 827 try: 828 if fileobj.seekable(): 829 self._orig_compress_start = fileobj.tell() 830 self._orig_compress_size = zipinfo.compress_size 831 self._orig_file_size = zipinfo.file_size 832 self._orig_start_crc = self._running_crc 833 self._seekable = True 834 except AttributeError: 835 pass 836 837 self._decrypter = None 838 if pwd: 839 if zipinfo.flag_bits & 0x8: 840 # compare against the file type from extended local headers 841 check_byte = (zipinfo._raw_time >> 8) & 0xff 842 else: 843 # compare against the CRC otherwise 844 check_byte = (zipinfo.CRC >> 24) & 0xff 845 h = self._init_decrypter() 846 if h != check_byte: 847 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 848 849 850 def _init_decrypter(self): 851 self._decrypter = _ZipDecrypter(self._pwd) 852 # The first 12 bytes in the cypher stream is an encryption header 853 # used to strengthen the algorithm. The first 11 bytes are 854 # completely random, while the 12th contains the MSB of the CRC, 855 # or the MSB of the file time depending on the header type 856 # and is used to check the correctness of the password. 857 header = self._fileobj.read(12) 858 self._compress_left -= 12 859 return self._decrypter(header)[11] 860 861 def __repr__(self): 862 result = ['<%s.%s' % (self.__class__.__module__, 863 self.__class__.__qualname__)] 864 if not self.closed: 865 result.append(' name=%r mode=%r' % (self.name, self.mode)) 866 if self._compress_type != ZIP_STORED: 867 result.append(' compress_type=%s' % 868 compressor_names.get(self._compress_type, 869 self._compress_type)) 870 else: 871 result.append(' [closed]') 872 result.append('>') 873 return ''.join(result) 874 875 def readline(self, limit=-1): 876 """Read and return a line from the stream. 877 878 If limit is specified, at most limit bytes will be read. 879 """ 880 881 if limit < 0: 882 # Shortcut common case - newline found in buffer. 883 i = self._readbuffer.find(b'\n', self._offset) + 1 884 if i > 0: 885 line = self._readbuffer[self._offset: i] 886 self._offset = i 887 return line 888 889 return io.BufferedIOBase.readline(self, limit) 890 891 def peek(self, n=1): 892 """Returns buffered bytes without advancing the position.""" 893 if n > len(self._readbuffer) - self._offset: 894 chunk = self.read(n) 895 if len(chunk) > self._offset: 896 self._readbuffer = chunk + self._readbuffer[self._offset:] 897 self._offset = 0 898 else: 899 self._offset -= len(chunk) 900 901 # Return up to 512 bytes to reduce allocation overhead for tight loops. 902 return self._readbuffer[self._offset: self._offset + 512] 903 904 def readable(self): 905 return True 906 907 def read(self, n=-1): 908 """Read and return up to n bytes. 909 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 910 """ 911 if n is None or n < 0: 912 buf = self._readbuffer[self._offset:] 913 self._readbuffer = b'' 914 self._offset = 0 915 while not self._eof: 916 buf += self._read1(self.MAX_N) 917 return buf 918 919 end = n + self._offset 920 if end < len(self._readbuffer): 921 buf = self._readbuffer[self._offset:end] 922 self._offset = end 923 return buf 924 925 n = end - len(self._readbuffer) 926 buf = self._readbuffer[self._offset:] 927 self._readbuffer = b'' 928 self._offset = 0 929 while n > 0 and not self._eof: 930 data = self._read1(n) 931 if n < len(data): 932 self._readbuffer = data 933 self._offset = n 934 buf += data[:n] 935 break 936 buf += data 937 n -= len(data) 938 return buf 939 940 def _update_crc(self, newdata): 941 # Update the CRC using the given data. 942 if self._expected_crc is None: 943 # No need to compute the CRC if we don't have a reference value 944 return 945 self._running_crc = crc32(newdata, self._running_crc) 946 # Check the CRC if we're at the end of the file 947 if self._eof and self._running_crc != self._expected_crc: 948 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 949 950 def read1(self, n): 951 """Read up to n bytes with at most one read() system call.""" 952 953 if n is None or n < 0: 954 buf = self._readbuffer[self._offset:] 955 self._readbuffer = b'' 956 self._offset = 0 957 while not self._eof: 958 data = self._read1(self.MAX_N) 959 if data: 960 buf += data 961 break 962 return buf 963 964 end = n + self._offset 965 if end < len(self._readbuffer): 966 buf = self._readbuffer[self._offset:end] 967 self._offset = end 968 return buf 969 970 n = end - len(self._readbuffer) 971 buf = self._readbuffer[self._offset:] 972 self._readbuffer = b'' 973 self._offset = 0 974 if n > 0: 975 while not self._eof: 976 data = self._read1(n) 977 if n < len(data): 978 self._readbuffer = data 979 self._offset = n 980 buf += data[:n] 981 break 982 if data: 983 buf += data 984 break 985 return buf 986 987 def _read1(self, n): 988 # Read up to n compressed bytes with at most one read() system call, 989 # decrypt and decompress them. 990 if self._eof or n <= 0: 991 return b'' 992 993 # Read from file. 994 if self._compress_type == ZIP_DEFLATED: 995 ## Handle unconsumed data. 996 data = self._decompressor.unconsumed_tail 997 if n > len(data): 998 data += self._read2(n - len(data)) 999 else: 1000 data = self._read2(n) 1001 1002 if self._compress_type == ZIP_STORED: 1003 self._eof = self._compress_left <= 0 1004 elif self._compress_type == ZIP_DEFLATED: 1005 n = max(n, self.MIN_READ_SIZE) 1006 data = self._decompressor.decompress(data, n) 1007 self._eof = (self._decompressor.eof or 1008 self._compress_left <= 0 and 1009 not self._decompressor.unconsumed_tail) 1010 if self._eof: 1011 data += self._decompressor.flush() 1012 else: 1013 data = self._decompressor.decompress(data) 1014 self._eof = self._decompressor.eof or self._compress_left <= 0 1015 1016 data = data[:self._left] 1017 self._left -= len(data) 1018 if self._left <= 0: 1019 self._eof = True 1020 self._update_crc(data) 1021 return data 1022 1023 def _read2(self, n): 1024 if self._compress_left <= 0: 1025 return b'' 1026 1027 n = max(n, self.MIN_READ_SIZE) 1028 n = min(n, self._compress_left) 1029 1030 data = self._fileobj.read(n) 1031 self._compress_left -= len(data) 1032 if not data: 1033 raise EOFError 1034 1035 if self._decrypter is not None: 1036 data = self._decrypter(data) 1037 return data 1038 1039 def close(self): 1040 try: 1041 if self._close_fileobj: 1042 self._fileobj.close() 1043 finally: 1044 super().close() 1045 1046 def seekable(self): 1047 return self._seekable 1048 1049 def seek(self, offset, whence=0): 1050 if not self._seekable: 1051 raise io.UnsupportedOperation("underlying stream is not seekable") 1052 curr_pos = self.tell() 1053 if whence == 0: # Seek from start of file 1054 new_pos = offset 1055 elif whence == 1: # Seek from current position 1056 new_pos = curr_pos + offset 1057 elif whence == 2: # Seek from EOF 1058 new_pos = self._orig_file_size + offset 1059 else: 1060 raise ValueError("whence must be os.SEEK_SET (0), " 1061 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1062 1063 if new_pos > self._orig_file_size: 1064 new_pos = self._orig_file_size 1065 1066 if new_pos < 0: 1067 new_pos = 0 1068 1069 read_offset = new_pos - curr_pos 1070 buff_offset = read_offset + self._offset 1071 1072 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1073 # Just move the _offset index if the new position is in the _readbuffer 1074 self._offset = buff_offset 1075 read_offset = 0 1076 elif read_offset < 0: 1077 # Position is before the current position. Reset the ZipExtFile 1078 self._fileobj.seek(self._orig_compress_start) 1079 self._running_crc = self._orig_start_crc 1080 self._compress_left = self._orig_compress_size 1081 self._left = self._orig_file_size 1082 self._readbuffer = b'' 1083 self._offset = 0 1084 self._decompressor = _get_decompressor(self._compress_type) 1085 self._eof = False 1086 read_offset = new_pos 1087 if self._decrypter is not None: 1088 self._init_decrypter() 1089 1090 while read_offset > 0: 1091 read_len = min(self.MAX_SEEK_READ, read_offset) 1092 self.read(read_len) 1093 read_offset -= read_len 1094 1095 return self.tell() 1096 1097 def tell(self): 1098 if not self._seekable: 1099 raise io.UnsupportedOperation("underlying stream is not seekable") 1100 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1101 return filepos 1102 1103 1104class _ZipWriteFile(io.BufferedIOBase): 1105 def __init__(self, zf, zinfo, zip64): 1106 self._zinfo = zinfo 1107 self._zip64 = zip64 1108 self._zipfile = zf 1109 self._compressor = _get_compressor(zinfo.compress_type, 1110 zinfo._compresslevel) 1111 self._file_size = 0 1112 self._compress_size = 0 1113 self._crc = 0 1114 1115 @property 1116 def _fileobj(self): 1117 return self._zipfile.fp 1118 1119 def writable(self): 1120 return True 1121 1122 def write(self, data): 1123 if self.closed: 1124 raise ValueError('I/O operation on closed file.') 1125 nbytes = len(data) 1126 self._file_size += nbytes 1127 self._crc = crc32(data, self._crc) 1128 if self._compressor: 1129 data = self._compressor.compress(data) 1130 self._compress_size += len(data) 1131 self._fileobj.write(data) 1132 return nbytes 1133 1134 def close(self): 1135 if self.closed: 1136 return 1137 try: 1138 super().close() 1139 # Flush any data from the compressor, and update header info 1140 if self._compressor: 1141 buf = self._compressor.flush() 1142 self._compress_size += len(buf) 1143 self._fileobj.write(buf) 1144 self._zinfo.compress_size = self._compress_size 1145 else: 1146 self._zinfo.compress_size = self._file_size 1147 self._zinfo.CRC = self._crc 1148 self._zinfo.file_size = self._file_size 1149 1150 # Write updated header info 1151 if self._zinfo.flag_bits & 0x08: 1152 # Write CRC and file sizes after the file data 1153 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1154 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1155 self._zinfo.compress_size, self._zinfo.file_size)) 1156 self._zipfile.start_dir = self._fileobj.tell() 1157 else: 1158 if not self._zip64: 1159 if self._file_size > ZIP64_LIMIT: 1160 raise RuntimeError( 1161 'File size unexpectedly exceeded ZIP64 limit') 1162 if self._compress_size > ZIP64_LIMIT: 1163 raise RuntimeError( 1164 'Compressed size unexpectedly exceeded ZIP64 limit') 1165 # Seek backwards and write file header (which will now include 1166 # correct CRC and file sizes) 1167 1168 # Preserve current position in file 1169 self._zipfile.start_dir = self._fileobj.tell() 1170 self._fileobj.seek(self._zinfo.header_offset) 1171 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1172 self._fileobj.seek(self._zipfile.start_dir) 1173 1174 # Successfully written: Add file to our caches 1175 self._zipfile.filelist.append(self._zinfo) 1176 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1177 finally: 1178 self._zipfile._writing = False 1179 1180 1181 1182class ZipFile: 1183 """ Class with methods to open, read, write, close, list zip files. 1184 1185 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1186 compresslevel=None) 1187 1188 file: Either the path to the file, or a file-like object. 1189 If it is a path, the file will be opened and closed by ZipFile. 1190 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1191 or append 'a'. 1192 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1193 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1194 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1195 needed, otherwise it will raise an exception when this would 1196 be necessary. 1197 compresslevel: None (default for the given compression type) or an integer 1198 specifying the level to pass to the compressor. 1199 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1200 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1201 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1202 1203 """ 1204 1205 fp = None # Set here since __del__ checks it 1206 _windows_illegal_name_trans_table = None 1207 1208 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1209 compresslevel=None): 1210 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1211 or append 'a'.""" 1212 if mode not in ('r', 'w', 'x', 'a'): 1213 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1214 1215 _check_compression(compression) 1216 1217 self._allowZip64 = allowZip64 1218 self._didModify = False 1219 self.debug = 0 # Level of printing: 0 through 3 1220 self.NameToInfo = {} # Find file info given name 1221 self.filelist = [] # List of ZipInfo instances for archive 1222 self.compression = compression # Method of compression 1223 self.compresslevel = compresslevel 1224 self.mode = mode 1225 self.pwd = None 1226 self._comment = b'' 1227 1228 # Check if we were passed a file-like object 1229 if isinstance(file, os.PathLike): 1230 file = os.fspath(file) 1231 if isinstance(file, str): 1232 # No, it's a filename 1233 self._filePassed = 0 1234 self.filename = file 1235 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1236 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1237 filemode = modeDict[mode] 1238 while True: 1239 try: 1240 self.fp = io.open(file, filemode) 1241 except OSError: 1242 if filemode in modeDict: 1243 filemode = modeDict[filemode] 1244 continue 1245 raise 1246 break 1247 else: 1248 self._filePassed = 1 1249 self.fp = file 1250 self.filename = getattr(file, 'name', None) 1251 self._fileRefCnt = 1 1252 self._lock = threading.RLock() 1253 self._seekable = True 1254 self._writing = False 1255 1256 try: 1257 if mode == 'r': 1258 self._RealGetContents() 1259 elif mode in ('w', 'x'): 1260 # set the modified flag so central directory gets written 1261 # even if no files are added to the archive 1262 self._didModify = True 1263 try: 1264 self.start_dir = self.fp.tell() 1265 except (AttributeError, OSError): 1266 self.fp = _Tellable(self.fp) 1267 self.start_dir = 0 1268 self._seekable = False 1269 else: 1270 # Some file-like objects can provide tell() but not seek() 1271 try: 1272 self.fp.seek(self.start_dir) 1273 except (AttributeError, OSError): 1274 self._seekable = False 1275 elif mode == 'a': 1276 try: 1277 # See if file is a zip file 1278 self._RealGetContents() 1279 # seek to start of directory and overwrite 1280 self.fp.seek(self.start_dir) 1281 except BadZipFile: 1282 # file is not a zip file, just append 1283 self.fp.seek(0, 2) 1284 1285 # set the modified flag so central directory gets written 1286 # even if no files are added to the archive 1287 self._didModify = True 1288 self.start_dir = self.fp.tell() 1289 else: 1290 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1291 except: 1292 fp = self.fp 1293 self.fp = None 1294 self._fpclose(fp) 1295 raise 1296 1297 def __enter__(self): 1298 return self 1299 1300 def __exit__(self, type, value, traceback): 1301 self.close() 1302 1303 def __repr__(self): 1304 result = ['<%s.%s' % (self.__class__.__module__, 1305 self.__class__.__qualname__)] 1306 if self.fp is not None: 1307 if self._filePassed: 1308 result.append(' file=%r' % self.fp) 1309 elif self.filename is not None: 1310 result.append(' filename=%r' % self.filename) 1311 result.append(' mode=%r' % self.mode) 1312 else: 1313 result.append(' [closed]') 1314 result.append('>') 1315 return ''.join(result) 1316 1317 def _RealGetContents(self): 1318 """Read in the table of contents for the ZIP file.""" 1319 fp = self.fp 1320 try: 1321 endrec = _EndRecData(fp) 1322 except OSError: 1323 raise BadZipFile("File is not a zip file") 1324 if not endrec: 1325 raise BadZipFile("File is not a zip file") 1326 if self.debug > 1: 1327 print(endrec) 1328 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1329 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1330 self._comment = endrec[_ECD_COMMENT] # archive comment 1331 1332 # "concat" is zero, unless zip was concatenated to another file 1333 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1334 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1335 # If Zip64 extension structures are present, account for them 1336 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1337 1338 if self.debug > 2: 1339 inferred = concat + offset_cd 1340 print("given, inferred, offset", offset_cd, inferred, concat) 1341 # self.start_dir: Position of start of central directory 1342 self.start_dir = offset_cd + concat 1343 fp.seek(self.start_dir, 0) 1344 data = fp.read(size_cd) 1345 fp = io.BytesIO(data) 1346 total = 0 1347 while total < size_cd: 1348 centdir = fp.read(sizeCentralDir) 1349 if len(centdir) != sizeCentralDir: 1350 raise BadZipFile("Truncated central directory") 1351 centdir = struct.unpack(structCentralDir, centdir) 1352 if centdir[_CD_SIGNATURE] != stringCentralDir: 1353 raise BadZipFile("Bad magic number for central directory") 1354 if self.debug > 2: 1355 print(centdir) 1356 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1357 flags = centdir[5] 1358 if flags & 0x800: 1359 # UTF-8 file names extension 1360 filename = filename.decode('utf-8') 1361 else: 1362 # Historical ZIP filename encoding 1363 filename = filename.decode('cp437') 1364 # Create ZipInfo instance to store file information 1365 x = ZipInfo(filename) 1366 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1367 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1368 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1369 (x.create_version, x.create_system, x.extract_version, x.reserved, 1370 x.flag_bits, x.compress_type, t, d, 1371 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1372 if x.extract_version > MAX_EXTRACT_VERSION: 1373 raise NotImplementedError("zip file version %.1f" % 1374 (x.extract_version / 10)) 1375 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1376 # Convert date/time code to (year, month, day, hour, min, sec) 1377 x._raw_time = t 1378 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1379 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1380 1381 x._decodeExtra() 1382 x.header_offset = x.header_offset + concat 1383 self.filelist.append(x) 1384 self.NameToInfo[x.filename] = x 1385 1386 # update total bytes read from central directory 1387 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1388 + centdir[_CD_EXTRA_FIELD_LENGTH] 1389 + centdir[_CD_COMMENT_LENGTH]) 1390 1391 if self.debug > 2: 1392 print("total", total) 1393 1394 1395 def namelist(self): 1396 """Return a list of file names in the archive.""" 1397 return [data.filename for data in self.filelist] 1398 1399 def infolist(self): 1400 """Return a list of class ZipInfo instances for files in the 1401 archive.""" 1402 return self.filelist 1403 1404 def printdir(self, file=None): 1405 """Print a table of contents for the zip file.""" 1406 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1407 file=file) 1408 for zinfo in self.filelist: 1409 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1410 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1411 file=file) 1412 1413 def testzip(self): 1414 """Read all the files and check the CRC.""" 1415 chunk_size = 2 ** 20 1416 for zinfo in self.filelist: 1417 try: 1418 # Read by chunks, to avoid an OverflowError or a 1419 # MemoryError with very large embedded files. 1420 with self.open(zinfo.filename, "r") as f: 1421 while f.read(chunk_size): # Check CRC-32 1422 pass 1423 except BadZipFile: 1424 return zinfo.filename 1425 1426 def getinfo(self, name): 1427 """Return the instance of ZipInfo given 'name'.""" 1428 info = self.NameToInfo.get(name) 1429 if info is None: 1430 raise KeyError( 1431 'There is no item named %r in the archive' % name) 1432 1433 return info 1434 1435 def setpassword(self, pwd): 1436 """Set default password for encrypted files.""" 1437 if pwd and not isinstance(pwd, bytes): 1438 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1439 if pwd: 1440 self.pwd = pwd 1441 else: 1442 self.pwd = None 1443 1444 @property 1445 def comment(self): 1446 """The comment text associated with the ZIP file.""" 1447 return self._comment 1448 1449 @comment.setter 1450 def comment(self, comment): 1451 if not isinstance(comment, bytes): 1452 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1453 # check for valid comment length 1454 if len(comment) > ZIP_MAX_COMMENT: 1455 import warnings 1456 warnings.warn('Archive comment is too long; truncating to %d bytes' 1457 % ZIP_MAX_COMMENT, stacklevel=2) 1458 comment = comment[:ZIP_MAX_COMMENT] 1459 self._comment = comment 1460 self._didModify = True 1461 1462 def read(self, name, pwd=None): 1463 """Return file bytes for name.""" 1464 with self.open(name, "r", pwd) as fp: 1465 return fp.read() 1466 1467 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1468 """Return file-like object for 'name'. 1469 1470 name is a string for the file name within the ZIP file, or a ZipInfo 1471 object. 1472 1473 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1474 write to a file newly added to the archive. 1475 1476 pwd is the password to decrypt files (only used for reading). 1477 1478 When writing, if the file size is not known in advance but may exceed 1479 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1480 files. If the size is known in advance, it is best to pass a ZipInfo 1481 instance for name, with zinfo.file_size set. 1482 """ 1483 if mode not in {"r", "w"}: 1484 raise ValueError('open() requires mode "r" or "w"') 1485 if pwd and not isinstance(pwd, bytes): 1486 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1487 if pwd and (mode == "w"): 1488 raise ValueError("pwd is only supported for reading files") 1489 if not self.fp: 1490 raise ValueError( 1491 "Attempt to use ZIP archive that was already closed") 1492 1493 # Make sure we have an info object 1494 if isinstance(name, ZipInfo): 1495 # 'name' is already an info object 1496 zinfo = name 1497 elif mode == 'w': 1498 zinfo = ZipInfo(name) 1499 zinfo.compress_type = self.compression 1500 zinfo._compresslevel = self.compresslevel 1501 else: 1502 # Get info object for name 1503 zinfo = self.getinfo(name) 1504 1505 if mode == 'w': 1506 return self._open_to_write(zinfo, force_zip64=force_zip64) 1507 1508 if self._writing: 1509 raise ValueError("Can't read from the ZIP file while there " 1510 "is an open writing handle on it. " 1511 "Close the writing handle before trying to read.") 1512 1513 # Open for reading: 1514 self._fileRefCnt += 1 1515 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1516 self._fpclose, self._lock, lambda: self._writing) 1517 try: 1518 # Skip the file header: 1519 fheader = zef_file.read(sizeFileHeader) 1520 if len(fheader) != sizeFileHeader: 1521 raise BadZipFile("Truncated file header") 1522 fheader = struct.unpack(structFileHeader, fheader) 1523 if fheader[_FH_SIGNATURE] != stringFileHeader: 1524 raise BadZipFile("Bad magic number for file header") 1525 1526 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1527 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1528 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1529 1530 if zinfo.flag_bits & 0x20: 1531 # Zip 2.7: compressed patched data 1532 raise NotImplementedError("compressed patched data (flag bit 5)") 1533 1534 if zinfo.flag_bits & 0x40: 1535 # strong encryption 1536 raise NotImplementedError("strong encryption (flag bit 6)") 1537 1538 if zinfo.flag_bits & 0x800: 1539 # UTF-8 filename 1540 fname_str = fname.decode("utf-8") 1541 else: 1542 fname_str = fname.decode("cp437") 1543 1544 if fname_str != zinfo.orig_filename: 1545 raise BadZipFile( 1546 'File name in directory %r and header %r differ.' 1547 % (zinfo.orig_filename, fname)) 1548 1549 # check for encrypted flag & handle password 1550 is_encrypted = zinfo.flag_bits & 0x1 1551 if is_encrypted: 1552 if not pwd: 1553 pwd = self.pwd 1554 if not pwd: 1555 raise RuntimeError("File %r is encrypted, password " 1556 "required for extraction" % name) 1557 else: 1558 pwd = None 1559 1560 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1561 except: 1562 zef_file.close() 1563 raise 1564 1565 def _open_to_write(self, zinfo, force_zip64=False): 1566 if force_zip64 and not self._allowZip64: 1567 raise ValueError( 1568 "force_zip64 is True, but allowZip64 was False when opening " 1569 "the ZIP file." 1570 ) 1571 if self._writing: 1572 raise ValueError("Can't write to the ZIP file while there is " 1573 "another write handle open on it. " 1574 "Close the first handle before opening another.") 1575 1576 # Sizes and CRC are overwritten with correct data after processing the file 1577 if not hasattr(zinfo, 'file_size'): 1578 zinfo.file_size = 0 1579 zinfo.compress_size = 0 1580 zinfo.CRC = 0 1581 1582 zinfo.flag_bits = 0x00 1583 if zinfo.compress_type == ZIP_LZMA: 1584 # Compressed data includes an end-of-stream (EOS) marker 1585 zinfo.flag_bits |= 0x02 1586 if not self._seekable: 1587 zinfo.flag_bits |= 0x08 1588 1589 if not zinfo.external_attr: 1590 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1591 1592 # Compressed size can be larger than uncompressed size 1593 zip64 = self._allowZip64 and \ 1594 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1595 1596 if self._seekable: 1597 self.fp.seek(self.start_dir) 1598 zinfo.header_offset = self.fp.tell() 1599 1600 self._writecheck(zinfo) 1601 self._didModify = True 1602 1603 self.fp.write(zinfo.FileHeader(zip64)) 1604 1605 self._writing = True 1606 return _ZipWriteFile(self, zinfo, zip64) 1607 1608 def extract(self, member, path=None, pwd=None): 1609 """Extract a member from the archive to the current working directory, 1610 using its full name. Its file information is extracted as accurately 1611 as possible. `member' may be a filename or a ZipInfo object. You can 1612 specify a different directory using `path'. 1613 """ 1614 if path is None: 1615 path = os.getcwd() 1616 else: 1617 path = os.fspath(path) 1618 1619 return self._extract_member(member, path, pwd) 1620 1621 def extractall(self, path=None, members=None, pwd=None): 1622 """Extract all members from the archive to the current working 1623 directory. `path' specifies a different directory to extract to. 1624 `members' is optional and must be a subset of the list returned 1625 by namelist(). 1626 """ 1627 if members is None: 1628 members = self.namelist() 1629 1630 if path is None: 1631 path = os.getcwd() 1632 else: 1633 path = os.fspath(path) 1634 1635 for zipinfo in members: 1636 self._extract_member(zipinfo, path, pwd) 1637 1638 @classmethod 1639 def _sanitize_windows_name(cls, arcname, pathsep): 1640 """Replace bad characters and remove trailing dots from parts.""" 1641 table = cls._windows_illegal_name_trans_table 1642 if not table: 1643 illegal = ':<>|"?*' 1644 table = str.maketrans(illegal, '_' * len(illegal)) 1645 cls._windows_illegal_name_trans_table = table 1646 arcname = arcname.translate(table) 1647 # remove trailing dots 1648 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1649 # rejoin, removing empty parts. 1650 arcname = pathsep.join(x for x in arcname if x) 1651 return arcname 1652 1653 def _extract_member(self, member, targetpath, pwd): 1654 """Extract the ZipInfo object 'member' to a physical 1655 file on the path targetpath. 1656 """ 1657 if not isinstance(member, ZipInfo): 1658 member = self.getinfo(member) 1659 1660 # build the destination pathname, replacing 1661 # forward slashes to platform specific separators. 1662 arcname = member.filename.replace('/', os.path.sep) 1663 1664 if os.path.altsep: 1665 arcname = arcname.replace(os.path.altsep, os.path.sep) 1666 # interpret absolute pathname as relative, remove drive letter or 1667 # UNC path, redundant separators, "." and ".." components. 1668 arcname = os.path.splitdrive(arcname)[1] 1669 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1670 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1671 if x not in invalid_path_parts) 1672 if os.path.sep == '\\': 1673 # filter illegal characters on Windows 1674 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1675 1676 targetpath = os.path.join(targetpath, arcname) 1677 targetpath = os.path.normpath(targetpath) 1678 1679 # Create all upper directories if necessary. 1680 upperdirs = os.path.dirname(targetpath) 1681 if upperdirs and not os.path.exists(upperdirs): 1682 os.makedirs(upperdirs) 1683 1684 if member.is_dir(): 1685 if not os.path.isdir(targetpath): 1686 os.mkdir(targetpath) 1687 return targetpath 1688 1689 with self.open(member, pwd=pwd) as source, \ 1690 open(targetpath, "wb") as target: 1691 shutil.copyfileobj(source, target) 1692 1693 return targetpath 1694 1695 def _writecheck(self, zinfo): 1696 """Check for errors before writing a file to the archive.""" 1697 if zinfo.filename in self.NameToInfo: 1698 import warnings 1699 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1700 if self.mode not in ('w', 'x', 'a'): 1701 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1702 if not self.fp: 1703 raise ValueError( 1704 "Attempt to write ZIP archive that was already closed") 1705 _check_compression(zinfo.compress_type) 1706 if not self._allowZip64: 1707 requires_zip64 = None 1708 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1709 requires_zip64 = "Files count" 1710 elif zinfo.file_size > ZIP64_LIMIT: 1711 requires_zip64 = "Filesize" 1712 elif zinfo.header_offset > ZIP64_LIMIT: 1713 requires_zip64 = "Zipfile size" 1714 if requires_zip64: 1715 raise LargeZipFile(requires_zip64 + 1716 " would require ZIP64 extensions") 1717 1718 def write(self, filename, arcname=None, 1719 compress_type=None, compresslevel=None): 1720 """Put the bytes from filename into the archive under the name 1721 arcname.""" 1722 if not self.fp: 1723 raise ValueError( 1724 "Attempt to write to ZIP archive that was already closed") 1725 if self._writing: 1726 raise ValueError( 1727 "Can't write to ZIP archive while an open writing handle exists" 1728 ) 1729 1730 zinfo = ZipInfo.from_file(filename, arcname) 1731 1732 if zinfo.is_dir(): 1733 zinfo.compress_size = 0 1734 zinfo.CRC = 0 1735 else: 1736 if compress_type is not None: 1737 zinfo.compress_type = compress_type 1738 else: 1739 zinfo.compress_type = self.compression 1740 1741 if compresslevel is not None: 1742 zinfo._compresslevel = compresslevel 1743 else: 1744 zinfo._compresslevel = self.compresslevel 1745 1746 if zinfo.is_dir(): 1747 with self._lock: 1748 if self._seekable: 1749 self.fp.seek(self.start_dir) 1750 zinfo.header_offset = self.fp.tell() # Start of header bytes 1751 if zinfo.compress_type == ZIP_LZMA: 1752 # Compressed data includes an end-of-stream (EOS) marker 1753 zinfo.flag_bits |= 0x02 1754 1755 self._writecheck(zinfo) 1756 self._didModify = True 1757 1758 self.filelist.append(zinfo) 1759 self.NameToInfo[zinfo.filename] = zinfo 1760 self.fp.write(zinfo.FileHeader(False)) 1761 self.start_dir = self.fp.tell() 1762 else: 1763 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1764 shutil.copyfileobj(src, dest, 1024*8) 1765 1766 def writestr(self, zinfo_or_arcname, data, 1767 compress_type=None, compresslevel=None): 1768 """Write a file into the archive. The contents is 'data', which 1769 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1770 it is encoded as UTF-8 first. 1771 'zinfo_or_arcname' is either a ZipInfo instance or 1772 the name of the file in the archive.""" 1773 if isinstance(data, str): 1774 data = data.encode("utf-8") 1775 if not isinstance(zinfo_or_arcname, ZipInfo): 1776 zinfo = ZipInfo(filename=zinfo_or_arcname, 1777 date_time=time.localtime(time.time())[:6]) 1778 zinfo.compress_type = self.compression 1779 zinfo._compresslevel = self.compresslevel 1780 if zinfo.filename[-1] == '/': 1781 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1782 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1783 else: 1784 zinfo.external_attr = 0o600 << 16 # ?rw------- 1785 else: 1786 zinfo = zinfo_or_arcname 1787 1788 if not self.fp: 1789 raise ValueError( 1790 "Attempt to write to ZIP archive that was already closed") 1791 if self._writing: 1792 raise ValueError( 1793 "Can't write to ZIP archive while an open writing handle exists." 1794 ) 1795 1796 if compress_type is not None: 1797 zinfo.compress_type = compress_type 1798 1799 if compresslevel is not None: 1800 zinfo._compresslevel = compresslevel 1801 1802 zinfo.file_size = len(data) # Uncompressed size 1803 with self._lock: 1804 with self.open(zinfo, mode='w') as dest: 1805 dest.write(data) 1806 1807 def __del__(self): 1808 """Call the "close()" method in case the user forgot.""" 1809 self.close() 1810 1811 def close(self): 1812 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1813 records.""" 1814 if self.fp is None: 1815 return 1816 1817 if self._writing: 1818 raise ValueError("Can't close the ZIP file while there is " 1819 "an open writing handle on it. " 1820 "Close the writing handle before closing the zip.") 1821 1822 try: 1823 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1824 with self._lock: 1825 if self._seekable: 1826 self.fp.seek(self.start_dir) 1827 self._write_end_record() 1828 finally: 1829 fp = self.fp 1830 self.fp = None 1831 self._fpclose(fp) 1832 1833 def _write_end_record(self): 1834 for zinfo in self.filelist: # write central directory 1835 dt = zinfo.date_time 1836 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1837 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1838 extra = [] 1839 if zinfo.file_size > ZIP64_LIMIT \ 1840 or zinfo.compress_size > ZIP64_LIMIT: 1841 extra.append(zinfo.file_size) 1842 extra.append(zinfo.compress_size) 1843 file_size = 0xffffffff 1844 compress_size = 0xffffffff 1845 else: 1846 file_size = zinfo.file_size 1847 compress_size = zinfo.compress_size 1848 1849 if zinfo.header_offset > ZIP64_LIMIT: 1850 extra.append(zinfo.header_offset) 1851 header_offset = 0xffffffff 1852 else: 1853 header_offset = zinfo.header_offset 1854 1855 extra_data = zinfo.extra 1856 min_version = 0 1857 if extra: 1858 # Append a ZIP64 field to the extra's 1859 extra_data = _strip_extra(extra_data, (1,)) 1860 extra_data = struct.pack( 1861 '<HH' + 'Q'*len(extra), 1862 1, 8*len(extra), *extra) + extra_data 1863 1864 min_version = ZIP64_VERSION 1865 1866 if zinfo.compress_type == ZIP_BZIP2: 1867 min_version = max(BZIP2_VERSION, min_version) 1868 elif zinfo.compress_type == ZIP_LZMA: 1869 min_version = max(LZMA_VERSION, min_version) 1870 1871 extract_version = max(min_version, zinfo.extract_version) 1872 create_version = max(min_version, zinfo.create_version) 1873 try: 1874 filename, flag_bits = zinfo._encodeFilenameFlags() 1875 centdir = struct.pack(structCentralDir, 1876 stringCentralDir, create_version, 1877 zinfo.create_system, extract_version, zinfo.reserved, 1878 flag_bits, zinfo.compress_type, dostime, dosdate, 1879 zinfo.CRC, compress_size, file_size, 1880 len(filename), len(extra_data), len(zinfo.comment), 1881 0, zinfo.internal_attr, zinfo.external_attr, 1882 header_offset) 1883 except DeprecationWarning: 1884 print((structCentralDir, stringCentralDir, create_version, 1885 zinfo.create_system, extract_version, zinfo.reserved, 1886 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1887 zinfo.CRC, compress_size, file_size, 1888 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1889 0, zinfo.internal_attr, zinfo.external_attr, 1890 header_offset), file=sys.stderr) 1891 raise 1892 self.fp.write(centdir) 1893 self.fp.write(filename) 1894 self.fp.write(extra_data) 1895 self.fp.write(zinfo.comment) 1896 1897 pos2 = self.fp.tell() 1898 # Write end-of-zip-archive record 1899 centDirCount = len(self.filelist) 1900 centDirSize = pos2 - self.start_dir 1901 centDirOffset = self.start_dir 1902 requires_zip64 = None 1903 if centDirCount > ZIP_FILECOUNT_LIMIT: 1904 requires_zip64 = "Files count" 1905 elif centDirOffset > ZIP64_LIMIT: 1906 requires_zip64 = "Central directory offset" 1907 elif centDirSize > ZIP64_LIMIT: 1908 requires_zip64 = "Central directory size" 1909 if requires_zip64: 1910 # Need to write the ZIP64 end-of-archive records 1911 if not self._allowZip64: 1912 raise LargeZipFile(requires_zip64 + 1913 " would require ZIP64 extensions") 1914 zip64endrec = struct.pack( 1915 structEndArchive64, stringEndArchive64, 1916 44, 45, 45, 0, 0, centDirCount, centDirCount, 1917 centDirSize, centDirOffset) 1918 self.fp.write(zip64endrec) 1919 1920 zip64locrec = struct.pack( 1921 structEndArchive64Locator, 1922 stringEndArchive64Locator, 0, pos2, 1) 1923 self.fp.write(zip64locrec) 1924 centDirCount = min(centDirCount, 0xFFFF) 1925 centDirSize = min(centDirSize, 0xFFFFFFFF) 1926 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1927 1928 endrec = struct.pack(structEndArchive, stringEndArchive, 1929 0, 0, centDirCount, centDirCount, 1930 centDirSize, centDirOffset, len(self._comment)) 1931 self.fp.write(endrec) 1932 self.fp.write(self._comment) 1933 self.fp.flush() 1934 1935 def _fpclose(self, fp): 1936 assert self._fileRefCnt > 0 1937 self._fileRefCnt -= 1 1938 if not self._fileRefCnt and not self._filePassed: 1939 fp.close() 1940 1941 1942class PyZipFile(ZipFile): 1943 """Class to create ZIP archives with Python library files and packages.""" 1944 1945 def __init__(self, file, mode="r", compression=ZIP_STORED, 1946 allowZip64=True, optimize=-1): 1947 ZipFile.__init__(self, file, mode=mode, compression=compression, 1948 allowZip64=allowZip64) 1949 self._optimize = optimize 1950 1951 def writepy(self, pathname, basename="", filterfunc=None): 1952 """Add all files from "pathname" to the ZIP archive. 1953 1954 If pathname is a package directory, search the directory and 1955 all package subdirectories recursively for all *.py and enter 1956 the modules into the archive. If pathname is a plain 1957 directory, listdir *.py and enter all modules. Else, pathname 1958 must be a Python *.py file and the module will be put into the 1959 archive. Added modules are always module.pyc. 1960 This method will compile the module.py into module.pyc if 1961 necessary. 1962 If filterfunc(pathname) is given, it is called with every argument. 1963 When it is False, the file or directory is skipped. 1964 """ 1965 pathname = os.fspath(pathname) 1966 if filterfunc and not filterfunc(pathname): 1967 if self.debug: 1968 label = 'path' if os.path.isdir(pathname) else 'file' 1969 print('%s %r skipped by filterfunc' % (label, pathname)) 1970 return 1971 dir, name = os.path.split(pathname) 1972 if os.path.isdir(pathname): 1973 initname = os.path.join(pathname, "__init__.py") 1974 if os.path.isfile(initname): 1975 # This is a package directory, add it 1976 if basename: 1977 basename = "%s/%s" % (basename, name) 1978 else: 1979 basename = name 1980 if self.debug: 1981 print("Adding package in", pathname, "as", basename) 1982 fname, arcname = self._get_codename(initname[0:-3], basename) 1983 if self.debug: 1984 print("Adding", arcname) 1985 self.write(fname, arcname) 1986 dirlist = sorted(os.listdir(pathname)) 1987 dirlist.remove("__init__.py") 1988 # Add all *.py files and package subdirectories 1989 for filename in dirlist: 1990 path = os.path.join(pathname, filename) 1991 root, ext = os.path.splitext(filename) 1992 if os.path.isdir(path): 1993 if os.path.isfile(os.path.join(path, "__init__.py")): 1994 # This is a package directory, add it 1995 self.writepy(path, basename, 1996 filterfunc=filterfunc) # Recursive call 1997 elif ext == ".py": 1998 if filterfunc and not filterfunc(path): 1999 if self.debug: 2000 print('file %r skipped by filterfunc' % path) 2001 continue 2002 fname, arcname = self._get_codename(path[0:-3], 2003 basename) 2004 if self.debug: 2005 print("Adding", arcname) 2006 self.write(fname, arcname) 2007 else: 2008 # This is NOT a package directory, add its files at top level 2009 if self.debug: 2010 print("Adding files from directory", pathname) 2011 for filename in sorted(os.listdir(pathname)): 2012 path = os.path.join(pathname, filename) 2013 root, ext = os.path.splitext(filename) 2014 if ext == ".py": 2015 if filterfunc and not filterfunc(path): 2016 if self.debug: 2017 print('file %r skipped by filterfunc' % path) 2018 continue 2019 fname, arcname = self._get_codename(path[0:-3], 2020 basename) 2021 if self.debug: 2022 print("Adding", arcname) 2023 self.write(fname, arcname) 2024 else: 2025 if pathname[-3:] != ".py": 2026 raise RuntimeError( 2027 'Files added with writepy() must end with ".py"') 2028 fname, arcname = self._get_codename(pathname[0:-3], basename) 2029 if self.debug: 2030 print("Adding file", arcname) 2031 self.write(fname, arcname) 2032 2033 def _get_codename(self, pathname, basename): 2034 """Return (filename, archivename) for the path. 2035 2036 Given a module name path, return the correct file path and 2037 archive name, compiling if necessary. For example, given 2038 /python/lib/string, return (/python/lib/string.pyc, string). 2039 """ 2040 def _compile(file, optimize=-1): 2041 import py_compile 2042 if self.debug: 2043 print("Compiling", file) 2044 try: 2045 py_compile.compile(file, doraise=True, optimize=optimize) 2046 except py_compile.PyCompileError as err: 2047 print(err.msg) 2048 return False 2049 return True 2050 2051 file_py = pathname + ".py" 2052 file_pyc = pathname + ".pyc" 2053 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2054 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2055 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2056 if self._optimize == -1: 2057 # legacy mode: use whatever file is present 2058 if (os.path.isfile(file_pyc) and 2059 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2060 # Use .pyc file. 2061 arcname = fname = file_pyc 2062 elif (os.path.isfile(pycache_opt0) and 2063 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2064 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2065 # file name in the archive. 2066 fname = pycache_opt0 2067 arcname = file_pyc 2068 elif (os.path.isfile(pycache_opt1) and 2069 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2070 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2071 # file name in the archive. 2072 fname = pycache_opt1 2073 arcname = file_pyc 2074 elif (os.path.isfile(pycache_opt2) and 2075 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2076 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2077 # file name in the archive. 2078 fname = pycache_opt2 2079 arcname = file_pyc 2080 else: 2081 # Compile py into PEP 3147 pyc file. 2082 if _compile(file_py): 2083 if sys.flags.optimize == 0: 2084 fname = pycache_opt0 2085 elif sys.flags.optimize == 1: 2086 fname = pycache_opt1 2087 else: 2088 fname = pycache_opt2 2089 arcname = file_pyc 2090 else: 2091 fname = arcname = file_py 2092 else: 2093 # new mode: use given optimization level 2094 if self._optimize == 0: 2095 fname = pycache_opt0 2096 arcname = file_pyc 2097 else: 2098 arcname = file_pyc 2099 if self._optimize == 1: 2100 fname = pycache_opt1 2101 elif self._optimize == 2: 2102 fname = pycache_opt2 2103 else: 2104 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2105 raise ValueError(msg) 2106 if not (os.path.isfile(fname) and 2107 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2108 if not _compile(file_py, optimize=self._optimize): 2109 fname = arcname = file_py 2110 archivename = os.path.split(arcname)[1] 2111 if basename: 2112 archivename = "%s/%s" % (basename, archivename) 2113 return (fname, archivename) 2114 2115 2116def main(args=None): 2117 import argparse 2118 2119 description = 'A simple command-line interface for zipfile module.' 2120 parser = argparse.ArgumentParser(description=description) 2121 group = parser.add_mutually_exclusive_group(required=True) 2122 group.add_argument('-l', '--list', metavar='<zipfile>', 2123 help='Show listing of a zipfile') 2124 group.add_argument('-e', '--extract', nargs=2, 2125 metavar=('<zipfile>', '<output_dir>'), 2126 help='Extract zipfile into target dir') 2127 group.add_argument('-c', '--create', nargs='+', 2128 metavar=('<name>', '<file>'), 2129 help='Create zipfile from sources') 2130 group.add_argument('-t', '--test', metavar='<zipfile>', 2131 help='Test if a zipfile is valid') 2132 args = parser.parse_args(args) 2133 2134 if args.test is not None: 2135 src = args.test 2136 with ZipFile(src, 'r') as zf: 2137 badfile = zf.testzip() 2138 if badfile: 2139 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2140 print("Done testing") 2141 2142 elif args.list is not None: 2143 src = args.list 2144 with ZipFile(src, 'r') as zf: 2145 zf.printdir() 2146 2147 elif args.extract is not None: 2148 src, curdir = args.extract 2149 with ZipFile(src, 'r') as zf: 2150 zf.extractall(curdir) 2151 2152 elif args.create is not None: 2153 zip_name = args.create.pop(0) 2154 files = args.create 2155 2156 def addToZip(zf, path, zippath): 2157 if os.path.isfile(path): 2158 zf.write(path, zippath, ZIP_DEFLATED) 2159 elif os.path.isdir(path): 2160 if zippath: 2161 zf.write(path, zippath) 2162 for nm in sorted(os.listdir(path)): 2163 addToZip(zf, 2164 os.path.join(path, nm), os.path.join(zippath, nm)) 2165 # else: ignore 2166 2167 with ZipFile(zip_name, 'w') as zf: 2168 for path in files: 2169 zippath = os.path.basename(path) 2170 if not zippath: 2171 zippath = os.path.basename(os.path.dirname(path)) 2172 if zippath in ('', os.curdir, os.pardir): 2173 zippath = '' 2174 addToZip(zf, path, zippath) 2175 2176if __name__ == "__main__": 2177 main() 2178