1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import shutil 13import stat 14import struct 15import sys 16import threading 17import time 18import contextlib 19import pathlib 20 21try: 22 import zlib # We may need its compression method 23 crc32 = zlib.crc32 24except ImportError: 25 zlib = None 26 crc32 = binascii.crc32 27 28try: 29 import bz2 # We may need its compression method 30except ImportError: 31 bz2 = None 32 33try: 34 import lzma # We may need its compression method 35except ImportError: 36 lzma = None 37 38__all__ = ["BadZipFile", "BadZipfile", "error", 39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 41 "Path"] 42 43class BadZipFile(Exception): 44 pass 45 46 47class LargeZipFile(Exception): 48 """ 49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 50 and those extensions are disabled. 51 """ 52 53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 54 55 56ZIP64_LIMIT = (1 << 31) - 1 57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 58ZIP_MAX_COMMENT = (1 << 16) - 1 59 60# constants for Zip file compression methods 61ZIP_STORED = 0 62ZIP_DEFLATED = 8 63ZIP_BZIP2 = 12 64ZIP_LZMA = 14 65# Other ZIP compression methods not supported 66 67DEFAULT_VERSION = 20 68ZIP64_VERSION = 45 69BZIP2_VERSION = 46 70LZMA_VERSION = 63 71# we recognize (but not necessarily support) all features up to that version 72MAX_EXTRACT_VERSION = 63 73 74# Below are some formats and associated data for reading/writing headers using 75# the struct module. The names and structures of headers/records are those used 76# in the PKWARE description of the ZIP file format: 77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 78# (URL valid as of January 2008) 79 80# The "end of central directory" structure, magic number, size, and indices 81# (section V.I in the format document) 82structEndArchive = b"<4s4H2LH" 83stringEndArchive = b"PK\005\006" 84sizeEndCentDir = struct.calcsize(structEndArchive) 85 86_ECD_SIGNATURE = 0 87_ECD_DISK_NUMBER = 1 88_ECD_DISK_START = 2 89_ECD_ENTRIES_THIS_DISK = 3 90_ECD_ENTRIES_TOTAL = 4 91_ECD_SIZE = 5 92_ECD_OFFSET = 6 93_ECD_COMMENT_SIZE = 7 94# These last two indices are not part of the structure as defined in the 95# spec, but they are used internally by this module as a convenience 96_ECD_COMMENT = 8 97_ECD_LOCATION = 9 98 99# The "central directory" structure, magic number, size, and indices 100# of entries in the structure (section V.F in the format document) 101structCentralDir = "<4s4B4HL2L5H2L" 102stringCentralDir = b"PK\001\002" 103sizeCentralDir = struct.calcsize(structCentralDir) 104 105# indexes of entries in the central directory structure 106_CD_SIGNATURE = 0 107_CD_CREATE_VERSION = 1 108_CD_CREATE_SYSTEM = 2 109_CD_EXTRACT_VERSION = 3 110_CD_EXTRACT_SYSTEM = 4 111_CD_FLAG_BITS = 5 112_CD_COMPRESS_TYPE = 6 113_CD_TIME = 7 114_CD_DATE = 8 115_CD_CRC = 9 116_CD_COMPRESSED_SIZE = 10 117_CD_UNCOMPRESSED_SIZE = 11 118_CD_FILENAME_LENGTH = 12 119_CD_EXTRA_FIELD_LENGTH = 13 120_CD_COMMENT_LENGTH = 14 121_CD_DISK_NUMBER_START = 15 122_CD_INTERNAL_FILE_ATTRIBUTES = 16 123_CD_EXTERNAL_FILE_ATTRIBUTES = 17 124_CD_LOCAL_HEADER_OFFSET = 18 125 126# General purpose bit flags 127# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes) 128_MASK_ENCRYPTED = 1 << 0 129# Bits 1 and 2 have different meanings depending on the compression used. 130_MASK_COMPRESS_OPTION_1 = 1 << 1 131# _MASK_COMPRESS_OPTION_2 = 1 << 2 132# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed 133# size are zero in the local header and the real values are written in the data 134# descriptor immediately following the compressed data. 135_MASK_USE_DATA_DESCRIPTOR = 1 << 3 136# Bit 4: Reserved for use with compression method 8, for enhanced deflating. 137# _MASK_RESERVED_BIT_4 = 1 << 4 138_MASK_COMPRESSED_PATCH = 1 << 5 139_MASK_STRONG_ENCRYPTION = 1 << 6 140# _MASK_UNUSED_BIT_7 = 1 << 7 141# _MASK_UNUSED_BIT_8 = 1 << 8 142# _MASK_UNUSED_BIT_9 = 1 << 9 143# _MASK_UNUSED_BIT_10 = 1 << 10 144_MASK_UTF_FILENAME = 1 << 11 145# Bit 12: Reserved by PKWARE for enhanced compression. 146# _MASK_RESERVED_BIT_12 = 1 << 12 147# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13 148# Bit 14, 15: Reserved by PKWARE 149# _MASK_RESERVED_BIT_14 = 1 << 14 150# _MASK_RESERVED_BIT_15 = 1 << 15 151 152# The "local file header" structure, magic number, size, and indices 153# (section V.A in the format document) 154structFileHeader = "<4s2B4HL2L2H" 155stringFileHeader = b"PK\003\004" 156sizeFileHeader = struct.calcsize(structFileHeader) 157 158_FH_SIGNATURE = 0 159_FH_EXTRACT_VERSION = 1 160_FH_EXTRACT_SYSTEM = 2 161_FH_GENERAL_PURPOSE_FLAG_BITS = 3 162_FH_COMPRESSION_METHOD = 4 163_FH_LAST_MOD_TIME = 5 164_FH_LAST_MOD_DATE = 6 165_FH_CRC = 7 166_FH_COMPRESSED_SIZE = 8 167_FH_UNCOMPRESSED_SIZE = 9 168_FH_FILENAME_LENGTH = 10 169_FH_EXTRA_FIELD_LENGTH = 11 170 171# The "Zip64 end of central directory locator" structure, magic number, and size 172structEndArchive64Locator = "<4sLQL" 173stringEndArchive64Locator = b"PK\x06\x07" 174sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 175 176# The "Zip64 end of central directory" record, magic number, size, and indices 177# (section V.G in the format document) 178structEndArchive64 = "<4sQ2H2L4Q" 179stringEndArchive64 = b"PK\x06\x06" 180sizeEndCentDir64 = struct.calcsize(structEndArchive64) 181 182_CD64_SIGNATURE = 0 183_CD64_DIRECTORY_RECSIZE = 1 184_CD64_CREATE_VERSION = 2 185_CD64_EXTRACT_VERSION = 3 186_CD64_DISK_NUMBER = 4 187_CD64_DISK_NUMBER_START = 5 188_CD64_NUMBER_ENTRIES_THIS_DISK = 6 189_CD64_NUMBER_ENTRIES_TOTAL = 7 190_CD64_DIRECTORY_SIZE = 8 191_CD64_OFFSET_START_CENTDIR = 9 192 193_DD_SIGNATURE = 0x08074b50 194 195_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 196 197def _strip_extra(extra, xids): 198 # Remove Extra Fields with specified IDs. 199 unpack = _EXTRA_FIELD_STRUCT.unpack 200 modified = False 201 buffer = [] 202 start = i = 0 203 while i + 4 <= len(extra): 204 xid, xlen = unpack(extra[i : i + 4]) 205 j = i + 4 + xlen 206 if xid in xids: 207 if i != start: 208 buffer.append(extra[start : i]) 209 start = j 210 modified = True 211 i = j 212 if not modified: 213 return extra 214 return b''.join(buffer) 215 216def _check_zipfile(fp): 217 try: 218 if _EndRecData(fp): 219 return True # file has correct magic number 220 except OSError: 221 pass 222 return False 223 224def is_zipfile(filename): 225 """Quickly see if a file is a ZIP file by checking the magic number. 226 227 The filename argument may be a file or file-like object too. 228 """ 229 result = False 230 try: 231 if hasattr(filename, "read"): 232 result = _check_zipfile(fp=filename) 233 else: 234 with open(filename, "rb") as fp: 235 result = _check_zipfile(fp) 236 except OSError: 237 pass 238 return result 239 240def _EndRecData64(fpin, offset, endrec): 241 """ 242 Read the ZIP64 end-of-archive records and use that to update endrec 243 """ 244 try: 245 fpin.seek(offset - sizeEndCentDir64Locator, 2) 246 except OSError: 247 # If the seek fails, the file is not large enough to contain a ZIP64 248 # end-of-archive record, so just return the end record we were given. 249 return endrec 250 251 data = fpin.read(sizeEndCentDir64Locator) 252 if len(data) != sizeEndCentDir64Locator: 253 return endrec 254 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 255 if sig != stringEndArchive64Locator: 256 return endrec 257 258 if diskno != 0 or disks > 1: 259 raise BadZipFile("zipfiles that span multiple disks are not supported") 260 261 # Assume no 'zip64 extensible data' 262 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 263 data = fpin.read(sizeEndCentDir64) 264 if len(data) != sizeEndCentDir64: 265 return endrec 266 sig, sz, create_version, read_version, disk_num, disk_dir, \ 267 dircount, dircount2, dirsize, diroffset = \ 268 struct.unpack(structEndArchive64, data) 269 if sig != stringEndArchive64: 270 return endrec 271 272 # Update the original endrec using data from the ZIP64 record 273 endrec[_ECD_SIGNATURE] = sig 274 endrec[_ECD_DISK_NUMBER] = disk_num 275 endrec[_ECD_DISK_START] = disk_dir 276 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 277 endrec[_ECD_ENTRIES_TOTAL] = dircount2 278 endrec[_ECD_SIZE] = dirsize 279 endrec[_ECD_OFFSET] = diroffset 280 return endrec 281 282 283def _EndRecData(fpin): 284 """Return data from the "End of Central Directory" record, or None. 285 286 The data is a list of the nine items in the ZIP "End of central dir" 287 record followed by a tenth item, the file seek offset of this record.""" 288 289 # Determine file size 290 fpin.seek(0, 2) 291 filesize = fpin.tell() 292 293 # Check to see if this is ZIP file with no archive comment (the 294 # "end of central directory" structure should be the last item in the 295 # file if this is the case). 296 try: 297 fpin.seek(-sizeEndCentDir, 2) 298 except OSError: 299 return None 300 data = fpin.read() 301 if (len(data) == sizeEndCentDir and 302 data[0:4] == stringEndArchive and 303 data[-2:] == b"\000\000"): 304 # the signature is correct and there's no comment, unpack structure 305 endrec = struct.unpack(structEndArchive, data) 306 endrec=list(endrec) 307 308 # Append a blank comment and record start offset 309 endrec.append(b"") 310 endrec.append(filesize - sizeEndCentDir) 311 312 # Try to read the "Zip64 end of central directory" structure 313 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 314 315 # Either this is not a ZIP file, or it is a ZIP file with an archive 316 # comment. Search the end of the file for the "end of central directory" 317 # record signature. The comment is the last item in the ZIP file and may be 318 # up to 64K long. It is assumed that the "end of central directory" magic 319 # number does not appear in the comment. 320 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 321 fpin.seek(maxCommentStart, 0) 322 data = fpin.read() 323 start = data.rfind(stringEndArchive) 324 if start >= 0: 325 # found the magic number; attempt to unpack and interpret 326 recData = data[start:start+sizeEndCentDir] 327 if len(recData) != sizeEndCentDir: 328 # Zip file is corrupted. 329 return None 330 endrec = list(struct.unpack(structEndArchive, recData)) 331 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 332 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 333 endrec.append(comment) 334 endrec.append(maxCommentStart + start) 335 336 # Try to read the "Zip64 end of central directory" structure 337 return _EndRecData64(fpin, maxCommentStart + start - filesize, 338 endrec) 339 340 # Unable to find a valid end of central directory structure 341 return None 342 343 344class ZipInfo (object): 345 """Class with attributes describing each file in the ZIP archive.""" 346 347 __slots__ = ( 348 'orig_filename', 349 'filename', 350 'date_time', 351 'compress_type', 352 '_compresslevel', 353 'comment', 354 'extra', 355 'create_system', 356 'create_version', 357 'extract_version', 358 'reserved', 359 'flag_bits', 360 'volume', 361 'internal_attr', 362 'external_attr', 363 'header_offset', 364 'CRC', 365 'compress_size', 366 'file_size', 367 '_raw_time', 368 ) 369 370 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 371 self.orig_filename = filename # Original file name in archive 372 373 # Terminate the file name at the first null byte. Null bytes in file 374 # names are used as tricks by viruses in archives. 375 null_byte = filename.find(chr(0)) 376 if null_byte >= 0: 377 filename = filename[0:null_byte] 378 # This is used to ensure paths in generated ZIP files always use 379 # forward slashes as the directory separator, as required by the 380 # ZIP format specification. 381 if os.sep != "/" and os.sep in filename: 382 filename = filename.replace(os.sep, "/") 383 384 self.filename = filename # Normalized file name 385 self.date_time = date_time # year, month, day, hour, min, sec 386 387 if date_time[0] < 1980: 388 raise ValueError('ZIP does not support timestamps before 1980') 389 390 # Standard values: 391 self.compress_type = ZIP_STORED # Type of compression for the file 392 self._compresslevel = None # Level for the compressor 393 self.comment = b"" # Comment for each file 394 self.extra = b"" # ZIP extra data 395 if sys.platform == 'win32': 396 self.create_system = 0 # System which created ZIP archive 397 else: 398 # Assume everything else is unix-y 399 self.create_system = 3 # System which created ZIP archive 400 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 401 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 402 self.reserved = 0 # Must be zero 403 self.flag_bits = 0 # ZIP flag bits 404 self.volume = 0 # Volume number of file header 405 self.internal_attr = 0 # Internal attributes 406 self.external_attr = 0 # External file attributes 407 self.compress_size = 0 # Size of the compressed file 408 self.file_size = 0 # Size of the uncompressed file 409 # Other attributes are set by class ZipFile: 410 # header_offset Byte offset to the file header 411 # CRC CRC-32 of the uncompressed file 412 413 def __repr__(self): 414 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 415 if self.compress_type != ZIP_STORED: 416 result.append(' compress_type=%s' % 417 compressor_names.get(self.compress_type, 418 self.compress_type)) 419 hi = self.external_attr >> 16 420 lo = self.external_attr & 0xFFFF 421 if hi: 422 result.append(' filemode=%r' % stat.filemode(hi)) 423 if lo: 424 result.append(' external_attr=%#x' % lo) 425 isdir = self.is_dir() 426 if not isdir or self.file_size: 427 result.append(' file_size=%r' % self.file_size) 428 if ((not isdir or self.compress_size) and 429 (self.compress_type != ZIP_STORED or 430 self.file_size != self.compress_size)): 431 result.append(' compress_size=%r' % self.compress_size) 432 result.append('>') 433 return ''.join(result) 434 435 def FileHeader(self, zip64=None): 436 """Return the per-file header as a bytes object.""" 437 dt = self.date_time 438 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 439 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 440 if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 441 # Set these to zero because we write them after the file data 442 CRC = compress_size = file_size = 0 443 else: 444 CRC = self.CRC 445 compress_size = self.compress_size 446 file_size = self.file_size 447 448 extra = self.extra 449 450 min_version = 0 451 if zip64 is None: 452 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 453 if zip64: 454 fmt = '<HHQQ' 455 extra = extra + struct.pack(fmt, 456 1, struct.calcsize(fmt)-4, file_size, compress_size) 457 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 458 if not zip64: 459 raise LargeZipFile("Filesize would require ZIP64 extensions") 460 # File is larger than what fits into a 4 byte integer, 461 # fall back to the ZIP64 extension 462 file_size = 0xffffffff 463 compress_size = 0xffffffff 464 min_version = ZIP64_VERSION 465 466 if self.compress_type == ZIP_BZIP2: 467 min_version = max(BZIP2_VERSION, min_version) 468 elif self.compress_type == ZIP_LZMA: 469 min_version = max(LZMA_VERSION, min_version) 470 471 self.extract_version = max(min_version, self.extract_version) 472 self.create_version = max(min_version, self.create_version) 473 filename, flag_bits = self._encodeFilenameFlags() 474 header = struct.pack(structFileHeader, stringFileHeader, 475 self.extract_version, self.reserved, flag_bits, 476 self.compress_type, dostime, dosdate, CRC, 477 compress_size, file_size, 478 len(filename), len(extra)) 479 return header + filename + extra 480 481 def _encodeFilenameFlags(self): 482 try: 483 return self.filename.encode('ascii'), self.flag_bits 484 except UnicodeEncodeError: 485 return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME 486 487 def _decodeExtra(self): 488 # Try to decode the extra field. 489 extra = self.extra 490 unpack = struct.unpack 491 while len(extra) >= 4: 492 tp, ln = unpack('<HH', extra[:4]) 493 if ln+4 > len(extra): 494 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 495 if tp == 0x0001: 496 data = extra[4:ln+4] 497 # ZIP64 extension (large files and/or large archives) 498 try: 499 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 500 field = "File size" 501 self.file_size, = unpack('<Q', data[:8]) 502 data = data[8:] 503 if self.compress_size == 0xFFFF_FFFF: 504 field = "Compress size" 505 self.compress_size, = unpack('<Q', data[:8]) 506 data = data[8:] 507 if self.header_offset == 0xFFFF_FFFF: 508 field = "Header offset" 509 self.header_offset, = unpack('<Q', data[:8]) 510 except struct.error: 511 raise BadZipFile(f"Corrupt zip64 extra field. " 512 f"{field} not found.") from None 513 514 extra = extra[ln+4:] 515 516 @classmethod 517 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 518 """Construct an appropriate ZipInfo for a file on the filesystem. 519 520 filename should be the path to a file or directory on the filesystem. 521 522 arcname is the name which it will have within the archive (by default, 523 this will be the same as filename, but without a drive letter and with 524 leading path separators removed). 525 """ 526 if isinstance(filename, os.PathLike): 527 filename = os.fspath(filename) 528 st = os.stat(filename) 529 isdir = stat.S_ISDIR(st.st_mode) 530 mtime = time.localtime(st.st_mtime) 531 date_time = mtime[0:6] 532 if not strict_timestamps and date_time[0] < 1980: 533 date_time = (1980, 1, 1, 0, 0, 0) 534 elif not strict_timestamps and date_time[0] > 2107: 535 date_time = (2107, 12, 31, 23, 59, 59) 536 # Create ZipInfo instance to store file information 537 if arcname is None: 538 arcname = filename 539 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 540 while arcname[0] in (os.sep, os.altsep): 541 arcname = arcname[1:] 542 if isdir: 543 arcname += '/' 544 zinfo = cls(arcname, date_time) 545 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 546 if isdir: 547 zinfo.file_size = 0 548 zinfo.external_attr |= 0x10 # MS-DOS directory flag 549 else: 550 zinfo.file_size = st.st_size 551 552 return zinfo 553 554 def is_dir(self): 555 """Return True if this archive member is a directory.""" 556 return self.filename[-1] == '/' 557 558 559# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 560# internal keys. We noticed that a direct implementation is faster than 561# relying on binascii.crc32(). 562 563_crctable = None 564def _gen_crc(crc): 565 for j in range(8): 566 if crc & 1: 567 crc = (crc >> 1) ^ 0xEDB88320 568 else: 569 crc >>= 1 570 return crc 571 572# ZIP supports a password-based form of encryption. Even though known 573# plaintext attacks have been found against it, it is still useful 574# to be able to get data out of such a file. 575# 576# Usage: 577# zd = _ZipDecrypter(mypwd) 578# plain_bytes = zd(cypher_bytes) 579 580def _ZipDecrypter(pwd): 581 key0 = 305419896 582 key1 = 591751049 583 key2 = 878082192 584 585 global _crctable 586 if _crctable is None: 587 _crctable = list(map(_gen_crc, range(256))) 588 crctable = _crctable 589 590 def crc32(ch, crc): 591 """Compute the CRC32 primitive on one byte.""" 592 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 593 594 def update_keys(c): 595 nonlocal key0, key1, key2 596 key0 = crc32(c, key0) 597 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 598 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 599 key2 = crc32(key1 >> 24, key2) 600 601 for p in pwd: 602 update_keys(p) 603 604 def decrypter(data): 605 """Decrypt a bytes object.""" 606 result = bytearray() 607 append = result.append 608 for c in data: 609 k = key2 | 2 610 c ^= ((k * (k^1)) >> 8) & 0xFF 611 update_keys(c) 612 append(c) 613 return bytes(result) 614 615 return decrypter 616 617 618class LZMACompressor: 619 620 def __init__(self): 621 self._comp = None 622 623 def _init(self): 624 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 625 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 626 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 627 ]) 628 return struct.pack('<BBH', 9, 4, len(props)) + props 629 630 def compress(self, data): 631 if self._comp is None: 632 return self._init() + self._comp.compress(data) 633 return self._comp.compress(data) 634 635 def flush(self): 636 if self._comp is None: 637 return self._init() + self._comp.flush() 638 return self._comp.flush() 639 640 641class LZMADecompressor: 642 643 def __init__(self): 644 self._decomp = None 645 self._unconsumed = b'' 646 self.eof = False 647 648 def decompress(self, data): 649 if self._decomp is None: 650 self._unconsumed += data 651 if len(self._unconsumed) <= 4: 652 return b'' 653 psize, = struct.unpack('<H', self._unconsumed[2:4]) 654 if len(self._unconsumed) <= 4 + psize: 655 return b'' 656 657 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 658 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 659 self._unconsumed[4:4 + psize]) 660 ]) 661 data = self._unconsumed[4 + psize:] 662 del self._unconsumed 663 664 result = self._decomp.decompress(data) 665 self.eof = self._decomp.eof 666 return result 667 668 669compressor_names = { 670 0: 'store', 671 1: 'shrink', 672 2: 'reduce', 673 3: 'reduce', 674 4: 'reduce', 675 5: 'reduce', 676 6: 'implode', 677 7: 'tokenize', 678 8: 'deflate', 679 9: 'deflate64', 680 10: 'implode', 681 12: 'bzip2', 682 14: 'lzma', 683 18: 'terse', 684 19: 'lz77', 685 97: 'wavpack', 686 98: 'ppmd', 687} 688 689def _check_compression(compression): 690 if compression == ZIP_STORED: 691 pass 692 elif compression == ZIP_DEFLATED: 693 if not zlib: 694 raise RuntimeError( 695 "Compression requires the (missing) zlib module") 696 elif compression == ZIP_BZIP2: 697 if not bz2: 698 raise RuntimeError( 699 "Compression requires the (missing) bz2 module") 700 elif compression == ZIP_LZMA: 701 if not lzma: 702 raise RuntimeError( 703 "Compression requires the (missing) lzma module") 704 else: 705 raise NotImplementedError("That compression method is not supported") 706 707 708def _get_compressor(compress_type, compresslevel=None): 709 if compress_type == ZIP_DEFLATED: 710 if compresslevel is not None: 711 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 712 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 713 elif compress_type == ZIP_BZIP2: 714 if compresslevel is not None: 715 return bz2.BZ2Compressor(compresslevel) 716 return bz2.BZ2Compressor() 717 # compresslevel is ignored for ZIP_LZMA 718 elif compress_type == ZIP_LZMA: 719 return LZMACompressor() 720 else: 721 return None 722 723 724def _get_decompressor(compress_type): 725 _check_compression(compress_type) 726 if compress_type == ZIP_STORED: 727 return None 728 elif compress_type == ZIP_DEFLATED: 729 return zlib.decompressobj(-15) 730 elif compress_type == ZIP_BZIP2: 731 return bz2.BZ2Decompressor() 732 elif compress_type == ZIP_LZMA: 733 return LZMADecompressor() 734 else: 735 descr = compressor_names.get(compress_type) 736 if descr: 737 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 738 else: 739 raise NotImplementedError("compression type %d" % (compress_type,)) 740 741 742class _SharedFile: 743 def __init__(self, file, pos, close, lock, writing): 744 self._file = file 745 self._pos = pos 746 self._close = close 747 self._lock = lock 748 self._writing = writing 749 self.seekable = file.seekable 750 self.tell = file.tell 751 752 def seek(self, offset, whence=0): 753 with self._lock: 754 if self._writing(): 755 raise ValueError("Can't reposition in the ZIP file while " 756 "there is an open writing handle on it. " 757 "Close the writing handle before trying to read.") 758 self._file.seek(offset, whence) 759 self._pos = self._file.tell() 760 return self._pos 761 762 def read(self, n=-1): 763 with self._lock: 764 if self._writing(): 765 raise ValueError("Can't read from the ZIP file while there " 766 "is an open writing handle on it. " 767 "Close the writing handle before trying to read.") 768 self._file.seek(self._pos) 769 data = self._file.read(n) 770 self._pos = self._file.tell() 771 return data 772 773 def close(self): 774 if self._file is not None: 775 fileobj = self._file 776 self._file = None 777 self._close(fileobj) 778 779# Provide the tell method for unseekable stream 780class _Tellable: 781 def __init__(self, fp): 782 self.fp = fp 783 self.offset = 0 784 785 def write(self, data): 786 n = self.fp.write(data) 787 self.offset += n 788 return n 789 790 def tell(self): 791 return self.offset 792 793 def flush(self): 794 self.fp.flush() 795 796 def close(self): 797 self.fp.close() 798 799 800class ZipExtFile(io.BufferedIOBase): 801 """File-like object for reading an archive member. 802 Is returned by ZipFile.open(). 803 """ 804 805 # Max size supported by decompressor. 806 MAX_N = 1 << 31 - 1 807 808 # Read from compressed files in 4k blocks. 809 MIN_READ_SIZE = 4096 810 811 # Chunk size to read during seek 812 MAX_SEEK_READ = 1 << 24 813 814 def __init__(self, fileobj, mode, zipinfo, pwd=None, 815 close_fileobj=False): 816 self._fileobj = fileobj 817 self._pwd = pwd 818 self._close_fileobj = close_fileobj 819 820 self._compress_type = zipinfo.compress_type 821 self._compress_left = zipinfo.compress_size 822 self._left = zipinfo.file_size 823 824 self._decompressor = _get_decompressor(self._compress_type) 825 826 self._eof = False 827 self._readbuffer = b'' 828 self._offset = 0 829 830 self.newlines = None 831 832 self.mode = mode 833 self.name = zipinfo.filename 834 835 if hasattr(zipinfo, 'CRC'): 836 self._expected_crc = zipinfo.CRC 837 self._running_crc = crc32(b'') 838 else: 839 self._expected_crc = None 840 841 self._seekable = False 842 try: 843 if fileobj.seekable(): 844 self._orig_compress_start = fileobj.tell() 845 self._orig_compress_size = zipinfo.compress_size 846 self._orig_file_size = zipinfo.file_size 847 self._orig_start_crc = self._running_crc 848 self._seekable = True 849 except AttributeError: 850 pass 851 852 self._decrypter = None 853 if pwd: 854 if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 855 # compare against the file type from extended local headers 856 check_byte = (zipinfo._raw_time >> 8) & 0xff 857 else: 858 # compare against the CRC otherwise 859 check_byte = (zipinfo.CRC >> 24) & 0xff 860 h = self._init_decrypter() 861 if h != check_byte: 862 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 863 864 865 def _init_decrypter(self): 866 self._decrypter = _ZipDecrypter(self._pwd) 867 # The first 12 bytes in the cypher stream is an encryption header 868 # used to strengthen the algorithm. The first 11 bytes are 869 # completely random, while the 12th contains the MSB of the CRC, 870 # or the MSB of the file time depending on the header type 871 # and is used to check the correctness of the password. 872 header = self._fileobj.read(12) 873 self._compress_left -= 12 874 return self._decrypter(header)[11] 875 876 def __repr__(self): 877 result = ['<%s.%s' % (self.__class__.__module__, 878 self.__class__.__qualname__)] 879 if not self.closed: 880 result.append(' name=%r mode=%r' % (self.name, self.mode)) 881 if self._compress_type != ZIP_STORED: 882 result.append(' compress_type=%s' % 883 compressor_names.get(self._compress_type, 884 self._compress_type)) 885 else: 886 result.append(' [closed]') 887 result.append('>') 888 return ''.join(result) 889 890 def readline(self, limit=-1): 891 """Read and return a line from the stream. 892 893 If limit is specified, at most limit bytes will be read. 894 """ 895 896 if limit < 0: 897 # Shortcut common case - newline found in buffer. 898 i = self._readbuffer.find(b'\n', self._offset) + 1 899 if i > 0: 900 line = self._readbuffer[self._offset: i] 901 self._offset = i 902 return line 903 904 return io.BufferedIOBase.readline(self, limit) 905 906 def peek(self, n=1): 907 """Returns buffered bytes without advancing the position.""" 908 if n > len(self._readbuffer) - self._offset: 909 chunk = self.read(n) 910 if len(chunk) > self._offset: 911 self._readbuffer = chunk + self._readbuffer[self._offset:] 912 self._offset = 0 913 else: 914 self._offset -= len(chunk) 915 916 # Return up to 512 bytes to reduce allocation overhead for tight loops. 917 return self._readbuffer[self._offset: self._offset + 512] 918 919 def readable(self): 920 if self.closed: 921 raise ValueError("I/O operation on closed file.") 922 return True 923 924 def read(self, n=-1): 925 """Read and return up to n bytes. 926 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 927 """ 928 if self.closed: 929 raise ValueError("read from closed file.") 930 if n is None or n < 0: 931 buf = self._readbuffer[self._offset:] 932 self._readbuffer = b'' 933 self._offset = 0 934 while not self._eof: 935 buf += self._read1(self.MAX_N) 936 return buf 937 938 end = n + self._offset 939 if end < len(self._readbuffer): 940 buf = self._readbuffer[self._offset:end] 941 self._offset = end 942 return buf 943 944 n = end - len(self._readbuffer) 945 buf = self._readbuffer[self._offset:] 946 self._readbuffer = b'' 947 self._offset = 0 948 while n > 0 and not self._eof: 949 data = self._read1(n) 950 if n < len(data): 951 self._readbuffer = data 952 self._offset = n 953 buf += data[:n] 954 break 955 buf += data 956 n -= len(data) 957 return buf 958 959 def _update_crc(self, newdata): 960 # Update the CRC using the given data. 961 if self._expected_crc is None: 962 # No need to compute the CRC if we don't have a reference value 963 return 964 self._running_crc = crc32(newdata, self._running_crc) 965 # Check the CRC if we're at the end of the file 966 if self._eof and self._running_crc != self._expected_crc: 967 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 968 969 def read1(self, n): 970 """Read up to n bytes with at most one read() system call.""" 971 972 if n is None or n < 0: 973 buf = self._readbuffer[self._offset:] 974 self._readbuffer = b'' 975 self._offset = 0 976 while not self._eof: 977 data = self._read1(self.MAX_N) 978 if data: 979 buf += data 980 break 981 return buf 982 983 end = n + self._offset 984 if end < len(self._readbuffer): 985 buf = self._readbuffer[self._offset:end] 986 self._offset = end 987 return buf 988 989 n = end - len(self._readbuffer) 990 buf = self._readbuffer[self._offset:] 991 self._readbuffer = b'' 992 self._offset = 0 993 if n > 0: 994 while not self._eof: 995 data = self._read1(n) 996 if n < len(data): 997 self._readbuffer = data 998 self._offset = n 999 buf += data[:n] 1000 break 1001 if data: 1002 buf += data 1003 break 1004 return buf 1005 1006 def _read1(self, n): 1007 # Read up to n compressed bytes with at most one read() system call, 1008 # decrypt and decompress them. 1009 if self._eof or n <= 0: 1010 return b'' 1011 1012 # Read from file. 1013 if self._compress_type == ZIP_DEFLATED: 1014 ## Handle unconsumed data. 1015 data = self._decompressor.unconsumed_tail 1016 if n > len(data): 1017 data += self._read2(n - len(data)) 1018 else: 1019 data = self._read2(n) 1020 1021 if self._compress_type == ZIP_STORED: 1022 self._eof = self._compress_left <= 0 1023 elif self._compress_type == ZIP_DEFLATED: 1024 n = max(n, self.MIN_READ_SIZE) 1025 data = self._decompressor.decompress(data, n) 1026 self._eof = (self._decompressor.eof or 1027 self._compress_left <= 0 and 1028 not self._decompressor.unconsumed_tail) 1029 if self._eof: 1030 data += self._decompressor.flush() 1031 else: 1032 data = self._decompressor.decompress(data) 1033 self._eof = self._decompressor.eof or self._compress_left <= 0 1034 1035 data = data[:self._left] 1036 self._left -= len(data) 1037 if self._left <= 0: 1038 self._eof = True 1039 self._update_crc(data) 1040 return data 1041 1042 def _read2(self, n): 1043 if self._compress_left <= 0: 1044 return b'' 1045 1046 n = max(n, self.MIN_READ_SIZE) 1047 n = min(n, self._compress_left) 1048 1049 data = self._fileobj.read(n) 1050 self._compress_left -= len(data) 1051 if not data: 1052 raise EOFError 1053 1054 if self._decrypter is not None: 1055 data = self._decrypter(data) 1056 return data 1057 1058 def close(self): 1059 try: 1060 if self._close_fileobj: 1061 self._fileobj.close() 1062 finally: 1063 super().close() 1064 1065 def seekable(self): 1066 if self.closed: 1067 raise ValueError("I/O operation on closed file.") 1068 return self._seekable 1069 1070 def seek(self, offset, whence=0): 1071 if self.closed: 1072 raise ValueError("seek on closed file.") 1073 if not self._seekable: 1074 raise io.UnsupportedOperation("underlying stream is not seekable") 1075 curr_pos = self.tell() 1076 if whence == 0: # Seek from start of file 1077 new_pos = offset 1078 elif whence == 1: # Seek from current position 1079 new_pos = curr_pos + offset 1080 elif whence == 2: # Seek from EOF 1081 new_pos = self._orig_file_size + offset 1082 else: 1083 raise ValueError("whence must be os.SEEK_SET (0), " 1084 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1085 1086 if new_pos > self._orig_file_size: 1087 new_pos = self._orig_file_size 1088 1089 if new_pos < 0: 1090 new_pos = 0 1091 1092 read_offset = new_pos - curr_pos 1093 buff_offset = read_offset + self._offset 1094 1095 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1096 # Just move the _offset index if the new position is in the _readbuffer 1097 self._offset = buff_offset 1098 read_offset = 0 1099 elif read_offset < 0: 1100 # Position is before the current position. Reset the ZipExtFile 1101 self._fileobj.seek(self._orig_compress_start) 1102 self._running_crc = self._orig_start_crc 1103 self._compress_left = self._orig_compress_size 1104 self._left = self._orig_file_size 1105 self._readbuffer = b'' 1106 self._offset = 0 1107 self._decompressor = _get_decompressor(self._compress_type) 1108 self._eof = False 1109 read_offset = new_pos 1110 if self._decrypter is not None: 1111 self._init_decrypter() 1112 1113 while read_offset > 0: 1114 read_len = min(self.MAX_SEEK_READ, read_offset) 1115 self.read(read_len) 1116 read_offset -= read_len 1117 1118 return self.tell() 1119 1120 def tell(self): 1121 if self.closed: 1122 raise ValueError("tell on closed file.") 1123 if not self._seekable: 1124 raise io.UnsupportedOperation("underlying stream is not seekable") 1125 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1126 return filepos 1127 1128 1129class _ZipWriteFile(io.BufferedIOBase): 1130 def __init__(self, zf, zinfo, zip64): 1131 self._zinfo = zinfo 1132 self._zip64 = zip64 1133 self._zipfile = zf 1134 self._compressor = _get_compressor(zinfo.compress_type, 1135 zinfo._compresslevel) 1136 self._file_size = 0 1137 self._compress_size = 0 1138 self._crc = 0 1139 1140 @property 1141 def _fileobj(self): 1142 return self._zipfile.fp 1143 1144 def writable(self): 1145 return True 1146 1147 def write(self, data): 1148 if self.closed: 1149 raise ValueError('I/O operation on closed file.') 1150 nbytes = len(data) 1151 self._file_size += nbytes 1152 self._crc = crc32(data, self._crc) 1153 if self._compressor: 1154 data = self._compressor.compress(data) 1155 self._compress_size += len(data) 1156 self._fileobj.write(data) 1157 return nbytes 1158 1159 def close(self): 1160 if self.closed: 1161 return 1162 try: 1163 super().close() 1164 # Flush any data from the compressor, and update header info 1165 if self._compressor: 1166 buf = self._compressor.flush() 1167 self._compress_size += len(buf) 1168 self._fileobj.write(buf) 1169 self._zinfo.compress_size = self._compress_size 1170 else: 1171 self._zinfo.compress_size = self._file_size 1172 self._zinfo.CRC = self._crc 1173 self._zinfo.file_size = self._file_size 1174 1175 # Write updated header info 1176 if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 1177 # Write CRC and file sizes after the file data 1178 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1179 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1180 self._zinfo.compress_size, self._zinfo.file_size)) 1181 self._zipfile.start_dir = self._fileobj.tell() 1182 else: 1183 if not self._zip64: 1184 if self._file_size > ZIP64_LIMIT: 1185 raise RuntimeError( 1186 'File size unexpectedly exceeded ZIP64 limit') 1187 if self._compress_size > ZIP64_LIMIT: 1188 raise RuntimeError( 1189 'Compressed size unexpectedly exceeded ZIP64 limit') 1190 # Seek backwards and write file header (which will now include 1191 # correct CRC and file sizes) 1192 1193 # Preserve current position in file 1194 self._zipfile.start_dir = self._fileobj.tell() 1195 self._fileobj.seek(self._zinfo.header_offset) 1196 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1197 self._fileobj.seek(self._zipfile.start_dir) 1198 1199 # Successfully written: Add file to our caches 1200 self._zipfile.filelist.append(self._zinfo) 1201 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1202 finally: 1203 self._zipfile._writing = False 1204 1205 1206 1207class ZipFile: 1208 """ Class with methods to open, read, write, close, list zip files. 1209 1210 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1211 compresslevel=None) 1212 1213 file: Either the path to the file, or a file-like object. 1214 If it is a path, the file will be opened and closed by ZipFile. 1215 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1216 or append 'a'. 1217 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1218 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1219 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1220 needed, otherwise it will raise an exception when this would 1221 be necessary. 1222 compresslevel: None (default for the given compression type) or an integer 1223 specifying the level to pass to the compressor. 1224 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1225 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1226 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1227 1228 """ 1229 1230 fp = None # Set here since __del__ checks it 1231 _windows_illegal_name_trans_table = None 1232 1233 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1234 compresslevel=None, *, strict_timestamps=True): 1235 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1236 or append 'a'.""" 1237 if mode not in ('r', 'w', 'x', 'a'): 1238 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1239 1240 _check_compression(compression) 1241 1242 self._allowZip64 = allowZip64 1243 self._didModify = False 1244 self.debug = 0 # Level of printing: 0 through 3 1245 self.NameToInfo = {} # Find file info given name 1246 self.filelist = [] # List of ZipInfo instances for archive 1247 self.compression = compression # Method of compression 1248 self.compresslevel = compresslevel 1249 self.mode = mode 1250 self.pwd = None 1251 self._comment = b'' 1252 self._strict_timestamps = strict_timestamps 1253 1254 # Check if we were passed a file-like object 1255 if isinstance(file, os.PathLike): 1256 file = os.fspath(file) 1257 if isinstance(file, str): 1258 # No, it's a filename 1259 self._filePassed = 0 1260 self.filename = file 1261 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1262 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1263 filemode = modeDict[mode] 1264 while True: 1265 try: 1266 self.fp = io.open(file, filemode) 1267 except OSError: 1268 if filemode in modeDict: 1269 filemode = modeDict[filemode] 1270 continue 1271 raise 1272 break 1273 else: 1274 self._filePassed = 1 1275 self.fp = file 1276 self.filename = getattr(file, 'name', None) 1277 self._fileRefCnt = 1 1278 self._lock = threading.RLock() 1279 self._seekable = True 1280 self._writing = False 1281 1282 try: 1283 if mode == 'r': 1284 self._RealGetContents() 1285 elif mode in ('w', 'x'): 1286 # set the modified flag so central directory gets written 1287 # even if no files are added to the archive 1288 self._didModify = True 1289 try: 1290 self.start_dir = self.fp.tell() 1291 except (AttributeError, OSError): 1292 self.fp = _Tellable(self.fp) 1293 self.start_dir = 0 1294 self._seekable = False 1295 else: 1296 # Some file-like objects can provide tell() but not seek() 1297 try: 1298 self.fp.seek(self.start_dir) 1299 except (AttributeError, OSError): 1300 self._seekable = False 1301 elif mode == 'a': 1302 try: 1303 # See if file is a zip file 1304 self._RealGetContents() 1305 # seek to start of directory and overwrite 1306 self.fp.seek(self.start_dir) 1307 except BadZipFile: 1308 # file is not a zip file, just append 1309 self.fp.seek(0, 2) 1310 1311 # set the modified flag so central directory gets written 1312 # even if no files are added to the archive 1313 self._didModify = True 1314 self.start_dir = self.fp.tell() 1315 else: 1316 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1317 except: 1318 fp = self.fp 1319 self.fp = None 1320 self._fpclose(fp) 1321 raise 1322 1323 def __enter__(self): 1324 return self 1325 1326 def __exit__(self, type, value, traceback): 1327 self.close() 1328 1329 def __repr__(self): 1330 result = ['<%s.%s' % (self.__class__.__module__, 1331 self.__class__.__qualname__)] 1332 if self.fp is not None: 1333 if self._filePassed: 1334 result.append(' file=%r' % self.fp) 1335 elif self.filename is not None: 1336 result.append(' filename=%r' % self.filename) 1337 result.append(' mode=%r' % self.mode) 1338 else: 1339 result.append(' [closed]') 1340 result.append('>') 1341 return ''.join(result) 1342 1343 def _RealGetContents(self): 1344 """Read in the table of contents for the ZIP file.""" 1345 fp = self.fp 1346 try: 1347 endrec = _EndRecData(fp) 1348 except OSError: 1349 raise BadZipFile("File is not a zip file") 1350 if not endrec: 1351 raise BadZipFile("File is not a zip file") 1352 if self.debug > 1: 1353 print(endrec) 1354 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1355 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1356 self._comment = endrec[_ECD_COMMENT] # archive comment 1357 1358 # "concat" is zero, unless zip was concatenated to another file 1359 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1360 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1361 # If Zip64 extension structures are present, account for them 1362 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1363 1364 if self.debug > 2: 1365 inferred = concat + offset_cd 1366 print("given, inferred, offset", offset_cd, inferred, concat) 1367 # self.start_dir: Position of start of central directory 1368 self.start_dir = offset_cd + concat 1369 fp.seek(self.start_dir, 0) 1370 data = fp.read(size_cd) 1371 fp = io.BytesIO(data) 1372 total = 0 1373 while total < size_cd: 1374 centdir = fp.read(sizeCentralDir) 1375 if len(centdir) != sizeCentralDir: 1376 raise BadZipFile("Truncated central directory") 1377 centdir = struct.unpack(structCentralDir, centdir) 1378 if centdir[_CD_SIGNATURE] != stringCentralDir: 1379 raise BadZipFile("Bad magic number for central directory") 1380 if self.debug > 2: 1381 print(centdir) 1382 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1383 flags = centdir[5] 1384 if flags & _MASK_UTF_FILENAME: 1385 # UTF-8 file names extension 1386 filename = filename.decode('utf-8') 1387 else: 1388 # Historical ZIP filename encoding 1389 filename = filename.decode('cp437') 1390 # Create ZipInfo instance to store file information 1391 x = ZipInfo(filename) 1392 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1393 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1394 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1395 (x.create_version, x.create_system, x.extract_version, x.reserved, 1396 x.flag_bits, x.compress_type, t, d, 1397 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1398 if x.extract_version > MAX_EXTRACT_VERSION: 1399 raise NotImplementedError("zip file version %.1f" % 1400 (x.extract_version / 10)) 1401 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1402 # Convert date/time code to (year, month, day, hour, min, sec) 1403 x._raw_time = t 1404 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1405 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1406 1407 x._decodeExtra() 1408 x.header_offset = x.header_offset + concat 1409 self.filelist.append(x) 1410 self.NameToInfo[x.filename] = x 1411 1412 # update total bytes read from central directory 1413 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1414 + centdir[_CD_EXTRA_FIELD_LENGTH] 1415 + centdir[_CD_COMMENT_LENGTH]) 1416 1417 if self.debug > 2: 1418 print("total", total) 1419 1420 1421 def namelist(self): 1422 """Return a list of file names in the archive.""" 1423 return [data.filename for data in self.filelist] 1424 1425 def infolist(self): 1426 """Return a list of class ZipInfo instances for files in the 1427 archive.""" 1428 return self.filelist 1429 1430 def printdir(self, file=None): 1431 """Print a table of contents for the zip file.""" 1432 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1433 file=file) 1434 for zinfo in self.filelist: 1435 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1436 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1437 file=file) 1438 1439 def testzip(self): 1440 """Read all the files and check the CRC.""" 1441 chunk_size = 2 ** 20 1442 for zinfo in self.filelist: 1443 try: 1444 # Read by chunks, to avoid an OverflowError or a 1445 # MemoryError with very large embedded files. 1446 with self.open(zinfo.filename, "r") as f: 1447 while f.read(chunk_size): # Check CRC-32 1448 pass 1449 except BadZipFile: 1450 return zinfo.filename 1451 1452 def getinfo(self, name): 1453 """Return the instance of ZipInfo given 'name'.""" 1454 info = self.NameToInfo.get(name) 1455 if info is None: 1456 raise KeyError( 1457 'There is no item named %r in the archive' % name) 1458 1459 return info 1460 1461 def setpassword(self, pwd): 1462 """Set default password for encrypted files.""" 1463 if pwd and not isinstance(pwd, bytes): 1464 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1465 if pwd: 1466 self.pwd = pwd 1467 else: 1468 self.pwd = None 1469 1470 @property 1471 def comment(self): 1472 """The comment text associated with the ZIP file.""" 1473 return self._comment 1474 1475 @comment.setter 1476 def comment(self, comment): 1477 if not isinstance(comment, bytes): 1478 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1479 # check for valid comment length 1480 if len(comment) > ZIP_MAX_COMMENT: 1481 import warnings 1482 warnings.warn('Archive comment is too long; truncating to %d bytes' 1483 % ZIP_MAX_COMMENT, stacklevel=2) 1484 comment = comment[:ZIP_MAX_COMMENT] 1485 self._comment = comment 1486 self._didModify = True 1487 1488 def read(self, name, pwd=None): 1489 """Return file bytes for name.""" 1490 with self.open(name, "r", pwd) as fp: 1491 return fp.read() 1492 1493 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1494 """Return file-like object for 'name'. 1495 1496 name is a string for the file name within the ZIP file, or a ZipInfo 1497 object. 1498 1499 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1500 write to a file newly added to the archive. 1501 1502 pwd is the password to decrypt files (only used for reading). 1503 1504 When writing, if the file size is not known in advance but may exceed 1505 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1506 files. If the size is known in advance, it is best to pass a ZipInfo 1507 instance for name, with zinfo.file_size set. 1508 """ 1509 if mode not in {"r", "w"}: 1510 raise ValueError('open() requires mode "r" or "w"') 1511 if pwd and (mode == "w"): 1512 raise ValueError("pwd is only supported for reading files") 1513 if not self.fp: 1514 raise ValueError( 1515 "Attempt to use ZIP archive that was already closed") 1516 1517 # Make sure we have an info object 1518 if isinstance(name, ZipInfo): 1519 # 'name' is already an info object 1520 zinfo = name 1521 elif mode == 'w': 1522 zinfo = ZipInfo(name) 1523 zinfo.compress_type = self.compression 1524 zinfo._compresslevel = self.compresslevel 1525 else: 1526 # Get info object for name 1527 zinfo = self.getinfo(name) 1528 1529 if mode == 'w': 1530 return self._open_to_write(zinfo, force_zip64=force_zip64) 1531 1532 if self._writing: 1533 raise ValueError("Can't read from the ZIP file while there " 1534 "is an open writing handle on it. " 1535 "Close the writing handle before trying to read.") 1536 1537 # Open for reading: 1538 self._fileRefCnt += 1 1539 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1540 self._fpclose, self._lock, lambda: self._writing) 1541 try: 1542 # Skip the file header: 1543 fheader = zef_file.read(sizeFileHeader) 1544 if len(fheader) != sizeFileHeader: 1545 raise BadZipFile("Truncated file header") 1546 fheader = struct.unpack(structFileHeader, fheader) 1547 if fheader[_FH_SIGNATURE] != stringFileHeader: 1548 raise BadZipFile("Bad magic number for file header") 1549 1550 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1551 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1552 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1553 1554 if zinfo.flag_bits & _MASK_COMPRESSED_PATCH: 1555 # Zip 2.7: compressed patched data 1556 raise NotImplementedError("compressed patched data (flag bit 5)") 1557 1558 if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION: 1559 # strong encryption 1560 raise NotImplementedError("strong encryption (flag bit 6)") 1561 1562 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME: 1563 # UTF-8 filename 1564 fname_str = fname.decode("utf-8") 1565 else: 1566 fname_str = fname.decode("cp437") 1567 1568 if fname_str != zinfo.orig_filename: 1569 raise BadZipFile( 1570 'File name in directory %r and header %r differ.' 1571 % (zinfo.orig_filename, fname)) 1572 1573 # check for encrypted flag & handle password 1574 is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED 1575 if is_encrypted: 1576 if not pwd: 1577 pwd = self.pwd 1578 if pwd and not isinstance(pwd, bytes): 1579 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1580 if not pwd: 1581 raise RuntimeError("File %r is encrypted, password " 1582 "required for extraction" % name) 1583 else: 1584 pwd = None 1585 1586 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1587 except: 1588 zef_file.close() 1589 raise 1590 1591 def _open_to_write(self, zinfo, force_zip64=False): 1592 if force_zip64 and not self._allowZip64: 1593 raise ValueError( 1594 "force_zip64 is True, but allowZip64 was False when opening " 1595 "the ZIP file." 1596 ) 1597 if self._writing: 1598 raise ValueError("Can't write to the ZIP file while there is " 1599 "another write handle open on it. " 1600 "Close the first handle before opening another.") 1601 1602 # Size and CRC are overwritten with correct data after processing the file 1603 zinfo.compress_size = 0 1604 zinfo.CRC = 0 1605 1606 zinfo.flag_bits = 0x00 1607 if zinfo.compress_type == ZIP_LZMA: 1608 # Compressed data includes an end-of-stream (EOS) marker 1609 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1610 if not self._seekable: 1611 zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR 1612 1613 if not zinfo.external_attr: 1614 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1615 1616 # Compressed size can be larger than uncompressed size 1617 zip64 = self._allowZip64 and \ 1618 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1619 1620 if self._seekable: 1621 self.fp.seek(self.start_dir) 1622 zinfo.header_offset = self.fp.tell() 1623 1624 self._writecheck(zinfo) 1625 self._didModify = True 1626 1627 self.fp.write(zinfo.FileHeader(zip64)) 1628 1629 self._writing = True 1630 return _ZipWriteFile(self, zinfo, zip64) 1631 1632 def extract(self, member, path=None, pwd=None): 1633 """Extract a member from the archive to the current working directory, 1634 using its full name. Its file information is extracted as accurately 1635 as possible. `member' may be a filename or a ZipInfo object. You can 1636 specify a different directory using `path'. 1637 """ 1638 if path is None: 1639 path = os.getcwd() 1640 else: 1641 path = os.fspath(path) 1642 1643 return self._extract_member(member, path, pwd) 1644 1645 def extractall(self, path=None, members=None, pwd=None): 1646 """Extract all members from the archive to the current working 1647 directory. `path' specifies a different directory to extract to. 1648 `members' is optional and must be a subset of the list returned 1649 by namelist(). 1650 """ 1651 if members is None: 1652 members = self.namelist() 1653 1654 if path is None: 1655 path = os.getcwd() 1656 else: 1657 path = os.fspath(path) 1658 1659 for zipinfo in members: 1660 self._extract_member(zipinfo, path, pwd) 1661 1662 @classmethod 1663 def _sanitize_windows_name(cls, arcname, pathsep): 1664 """Replace bad characters and remove trailing dots from parts.""" 1665 table = cls._windows_illegal_name_trans_table 1666 if not table: 1667 illegal = ':<>|"?*' 1668 table = str.maketrans(illegal, '_' * len(illegal)) 1669 cls._windows_illegal_name_trans_table = table 1670 arcname = arcname.translate(table) 1671 # remove trailing dots 1672 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1673 # rejoin, removing empty parts. 1674 arcname = pathsep.join(x for x in arcname if x) 1675 return arcname 1676 1677 def _extract_member(self, member, targetpath, pwd): 1678 """Extract the ZipInfo object 'member' to a physical 1679 file on the path targetpath. 1680 """ 1681 if not isinstance(member, ZipInfo): 1682 member = self.getinfo(member) 1683 1684 # build the destination pathname, replacing 1685 # forward slashes to platform specific separators. 1686 arcname = member.filename.replace('/', os.path.sep) 1687 1688 if os.path.altsep: 1689 arcname = arcname.replace(os.path.altsep, os.path.sep) 1690 # interpret absolute pathname as relative, remove drive letter or 1691 # UNC path, redundant separators, "." and ".." components. 1692 arcname = os.path.splitdrive(arcname)[1] 1693 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1694 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1695 if x not in invalid_path_parts) 1696 if os.path.sep == '\\': 1697 # filter illegal characters on Windows 1698 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1699 1700 targetpath = os.path.join(targetpath, arcname) 1701 targetpath = os.path.normpath(targetpath) 1702 1703 # Create all upper directories if necessary. 1704 upperdirs = os.path.dirname(targetpath) 1705 if upperdirs and not os.path.exists(upperdirs): 1706 os.makedirs(upperdirs) 1707 1708 if member.is_dir(): 1709 if not os.path.isdir(targetpath): 1710 os.mkdir(targetpath) 1711 return targetpath 1712 1713 with self.open(member, pwd=pwd) as source, \ 1714 open(targetpath, "wb") as target: 1715 shutil.copyfileobj(source, target) 1716 1717 return targetpath 1718 1719 def _writecheck(self, zinfo): 1720 """Check for errors before writing a file to the archive.""" 1721 if zinfo.filename in self.NameToInfo: 1722 import warnings 1723 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1724 if self.mode not in ('w', 'x', 'a'): 1725 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1726 if not self.fp: 1727 raise ValueError( 1728 "Attempt to write ZIP archive that was already closed") 1729 _check_compression(zinfo.compress_type) 1730 if not self._allowZip64: 1731 requires_zip64 = None 1732 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1733 requires_zip64 = "Files count" 1734 elif zinfo.file_size > ZIP64_LIMIT: 1735 requires_zip64 = "Filesize" 1736 elif zinfo.header_offset > ZIP64_LIMIT: 1737 requires_zip64 = "Zipfile size" 1738 if requires_zip64: 1739 raise LargeZipFile(requires_zip64 + 1740 " would require ZIP64 extensions") 1741 1742 def write(self, filename, arcname=None, 1743 compress_type=None, compresslevel=None): 1744 """Put the bytes from filename into the archive under the name 1745 arcname.""" 1746 if not self.fp: 1747 raise ValueError( 1748 "Attempt to write to ZIP archive that was already closed") 1749 if self._writing: 1750 raise ValueError( 1751 "Can't write to ZIP archive while an open writing handle exists" 1752 ) 1753 1754 zinfo = ZipInfo.from_file(filename, arcname, 1755 strict_timestamps=self._strict_timestamps) 1756 1757 if zinfo.is_dir(): 1758 zinfo.compress_size = 0 1759 zinfo.CRC = 0 1760 else: 1761 if compress_type is not None: 1762 zinfo.compress_type = compress_type 1763 else: 1764 zinfo.compress_type = self.compression 1765 1766 if compresslevel is not None: 1767 zinfo._compresslevel = compresslevel 1768 else: 1769 zinfo._compresslevel = self.compresslevel 1770 1771 if zinfo.is_dir(): 1772 with self._lock: 1773 if self._seekable: 1774 self.fp.seek(self.start_dir) 1775 zinfo.header_offset = self.fp.tell() # Start of header bytes 1776 if zinfo.compress_type == ZIP_LZMA: 1777 # Compressed data includes an end-of-stream (EOS) marker 1778 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1779 1780 self._writecheck(zinfo) 1781 self._didModify = True 1782 1783 self.filelist.append(zinfo) 1784 self.NameToInfo[zinfo.filename] = zinfo 1785 self.fp.write(zinfo.FileHeader(False)) 1786 self.start_dir = self.fp.tell() 1787 else: 1788 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1789 shutil.copyfileobj(src, dest, 1024*8) 1790 1791 def writestr(self, zinfo_or_arcname, data, 1792 compress_type=None, compresslevel=None): 1793 """Write a file into the archive. The contents is 'data', which 1794 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1795 it is encoded as UTF-8 first. 1796 'zinfo_or_arcname' is either a ZipInfo instance or 1797 the name of the file in the archive.""" 1798 if isinstance(data, str): 1799 data = data.encode("utf-8") 1800 if not isinstance(zinfo_or_arcname, ZipInfo): 1801 zinfo = ZipInfo(filename=zinfo_or_arcname, 1802 date_time=time.localtime(time.time())[:6]) 1803 zinfo.compress_type = self.compression 1804 zinfo._compresslevel = self.compresslevel 1805 if zinfo.filename[-1] == '/': 1806 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1807 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1808 else: 1809 zinfo.external_attr = 0o600 << 16 # ?rw------- 1810 else: 1811 zinfo = zinfo_or_arcname 1812 1813 if not self.fp: 1814 raise ValueError( 1815 "Attempt to write to ZIP archive that was already closed") 1816 if self._writing: 1817 raise ValueError( 1818 "Can't write to ZIP archive while an open writing handle exists." 1819 ) 1820 1821 if compress_type is not None: 1822 zinfo.compress_type = compress_type 1823 1824 if compresslevel is not None: 1825 zinfo._compresslevel = compresslevel 1826 1827 zinfo.file_size = len(data) # Uncompressed size 1828 with self._lock: 1829 with self.open(zinfo, mode='w') as dest: 1830 dest.write(data) 1831 1832 def __del__(self): 1833 """Call the "close()" method in case the user forgot.""" 1834 self.close() 1835 1836 def close(self): 1837 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1838 records.""" 1839 if self.fp is None: 1840 return 1841 1842 if self._writing: 1843 raise ValueError("Can't close the ZIP file while there is " 1844 "an open writing handle on it. " 1845 "Close the writing handle before closing the zip.") 1846 1847 try: 1848 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1849 with self._lock: 1850 if self._seekable: 1851 self.fp.seek(self.start_dir) 1852 self._write_end_record() 1853 finally: 1854 fp = self.fp 1855 self.fp = None 1856 self._fpclose(fp) 1857 1858 def _write_end_record(self): 1859 for zinfo in self.filelist: # write central directory 1860 dt = zinfo.date_time 1861 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1862 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1863 extra = [] 1864 if zinfo.file_size > ZIP64_LIMIT \ 1865 or zinfo.compress_size > ZIP64_LIMIT: 1866 extra.append(zinfo.file_size) 1867 extra.append(zinfo.compress_size) 1868 file_size = 0xffffffff 1869 compress_size = 0xffffffff 1870 else: 1871 file_size = zinfo.file_size 1872 compress_size = zinfo.compress_size 1873 1874 if zinfo.header_offset > ZIP64_LIMIT: 1875 extra.append(zinfo.header_offset) 1876 header_offset = 0xffffffff 1877 else: 1878 header_offset = zinfo.header_offset 1879 1880 extra_data = zinfo.extra 1881 min_version = 0 1882 if extra: 1883 # Append a ZIP64 field to the extra's 1884 extra_data = _strip_extra(extra_data, (1,)) 1885 extra_data = struct.pack( 1886 '<HH' + 'Q'*len(extra), 1887 1, 8*len(extra), *extra) + extra_data 1888 1889 min_version = ZIP64_VERSION 1890 1891 if zinfo.compress_type == ZIP_BZIP2: 1892 min_version = max(BZIP2_VERSION, min_version) 1893 elif zinfo.compress_type == ZIP_LZMA: 1894 min_version = max(LZMA_VERSION, min_version) 1895 1896 extract_version = max(min_version, zinfo.extract_version) 1897 create_version = max(min_version, zinfo.create_version) 1898 filename, flag_bits = zinfo._encodeFilenameFlags() 1899 centdir = struct.pack(structCentralDir, 1900 stringCentralDir, create_version, 1901 zinfo.create_system, extract_version, zinfo.reserved, 1902 flag_bits, zinfo.compress_type, dostime, dosdate, 1903 zinfo.CRC, compress_size, file_size, 1904 len(filename), len(extra_data), len(zinfo.comment), 1905 0, zinfo.internal_attr, zinfo.external_attr, 1906 header_offset) 1907 self.fp.write(centdir) 1908 self.fp.write(filename) 1909 self.fp.write(extra_data) 1910 self.fp.write(zinfo.comment) 1911 1912 pos2 = self.fp.tell() 1913 # Write end-of-zip-archive record 1914 centDirCount = len(self.filelist) 1915 centDirSize = pos2 - self.start_dir 1916 centDirOffset = self.start_dir 1917 requires_zip64 = None 1918 if centDirCount > ZIP_FILECOUNT_LIMIT: 1919 requires_zip64 = "Files count" 1920 elif centDirOffset > ZIP64_LIMIT: 1921 requires_zip64 = "Central directory offset" 1922 elif centDirSize > ZIP64_LIMIT: 1923 requires_zip64 = "Central directory size" 1924 if requires_zip64: 1925 # Need to write the ZIP64 end-of-archive records 1926 if not self._allowZip64: 1927 raise LargeZipFile(requires_zip64 + 1928 " would require ZIP64 extensions") 1929 zip64endrec = struct.pack( 1930 structEndArchive64, stringEndArchive64, 1931 44, 45, 45, 0, 0, centDirCount, centDirCount, 1932 centDirSize, centDirOffset) 1933 self.fp.write(zip64endrec) 1934 1935 zip64locrec = struct.pack( 1936 structEndArchive64Locator, 1937 stringEndArchive64Locator, 0, pos2, 1) 1938 self.fp.write(zip64locrec) 1939 centDirCount = min(centDirCount, 0xFFFF) 1940 centDirSize = min(centDirSize, 0xFFFFFFFF) 1941 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1942 1943 endrec = struct.pack(structEndArchive, stringEndArchive, 1944 0, 0, centDirCount, centDirCount, 1945 centDirSize, centDirOffset, len(self._comment)) 1946 self.fp.write(endrec) 1947 self.fp.write(self._comment) 1948 if self.mode == "a": 1949 self.fp.truncate() 1950 self.fp.flush() 1951 1952 def _fpclose(self, fp): 1953 assert self._fileRefCnt > 0 1954 self._fileRefCnt -= 1 1955 if not self._fileRefCnt and not self._filePassed: 1956 fp.close() 1957 1958 1959class PyZipFile(ZipFile): 1960 """Class to create ZIP archives with Python library files and packages.""" 1961 1962 def __init__(self, file, mode="r", compression=ZIP_STORED, 1963 allowZip64=True, optimize=-1): 1964 ZipFile.__init__(self, file, mode=mode, compression=compression, 1965 allowZip64=allowZip64) 1966 self._optimize = optimize 1967 1968 def writepy(self, pathname, basename="", filterfunc=None): 1969 """Add all files from "pathname" to the ZIP archive. 1970 1971 If pathname is a package directory, search the directory and 1972 all package subdirectories recursively for all *.py and enter 1973 the modules into the archive. If pathname is a plain 1974 directory, listdir *.py and enter all modules. Else, pathname 1975 must be a Python *.py file and the module will be put into the 1976 archive. Added modules are always module.pyc. 1977 This method will compile the module.py into module.pyc if 1978 necessary. 1979 If filterfunc(pathname) is given, it is called with every argument. 1980 When it is False, the file or directory is skipped. 1981 """ 1982 pathname = os.fspath(pathname) 1983 if filterfunc and not filterfunc(pathname): 1984 if self.debug: 1985 label = 'path' if os.path.isdir(pathname) else 'file' 1986 print('%s %r skipped by filterfunc' % (label, pathname)) 1987 return 1988 dir, name = os.path.split(pathname) 1989 if os.path.isdir(pathname): 1990 initname = os.path.join(pathname, "__init__.py") 1991 if os.path.isfile(initname): 1992 # This is a package directory, add it 1993 if basename: 1994 basename = "%s/%s" % (basename, name) 1995 else: 1996 basename = name 1997 if self.debug: 1998 print("Adding package in", pathname, "as", basename) 1999 fname, arcname = self._get_codename(initname[0:-3], basename) 2000 if self.debug: 2001 print("Adding", arcname) 2002 self.write(fname, arcname) 2003 dirlist = sorted(os.listdir(pathname)) 2004 dirlist.remove("__init__.py") 2005 # Add all *.py files and package subdirectories 2006 for filename in dirlist: 2007 path = os.path.join(pathname, filename) 2008 root, ext = os.path.splitext(filename) 2009 if os.path.isdir(path): 2010 if os.path.isfile(os.path.join(path, "__init__.py")): 2011 # This is a package directory, add it 2012 self.writepy(path, basename, 2013 filterfunc=filterfunc) # Recursive call 2014 elif ext == ".py": 2015 if filterfunc and not filterfunc(path): 2016 if self.debug: 2017 print('file %r skipped by filterfunc' % path) 2018 continue 2019 fname, arcname = self._get_codename(path[0:-3], 2020 basename) 2021 if self.debug: 2022 print("Adding", arcname) 2023 self.write(fname, arcname) 2024 else: 2025 # This is NOT a package directory, add its files at top level 2026 if self.debug: 2027 print("Adding files from directory", pathname) 2028 for filename in sorted(os.listdir(pathname)): 2029 path = os.path.join(pathname, filename) 2030 root, ext = os.path.splitext(filename) 2031 if ext == ".py": 2032 if filterfunc and not filterfunc(path): 2033 if self.debug: 2034 print('file %r skipped by filterfunc' % path) 2035 continue 2036 fname, arcname = self._get_codename(path[0:-3], 2037 basename) 2038 if self.debug: 2039 print("Adding", arcname) 2040 self.write(fname, arcname) 2041 else: 2042 if pathname[-3:] != ".py": 2043 raise RuntimeError( 2044 'Files added with writepy() must end with ".py"') 2045 fname, arcname = self._get_codename(pathname[0:-3], basename) 2046 if self.debug: 2047 print("Adding file", arcname) 2048 self.write(fname, arcname) 2049 2050 def _get_codename(self, pathname, basename): 2051 """Return (filename, archivename) for the path. 2052 2053 Given a module name path, return the correct file path and 2054 archive name, compiling if necessary. For example, given 2055 /python/lib/string, return (/python/lib/string.pyc, string). 2056 """ 2057 def _compile(file, optimize=-1): 2058 import py_compile 2059 if self.debug: 2060 print("Compiling", file) 2061 try: 2062 py_compile.compile(file, doraise=True, optimize=optimize) 2063 except py_compile.PyCompileError as err: 2064 print(err.msg) 2065 return False 2066 return True 2067 2068 file_py = pathname + ".py" 2069 file_pyc = pathname + ".pyc" 2070 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2071 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2072 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2073 if self._optimize == -1: 2074 # legacy mode: use whatever file is present 2075 if (os.path.isfile(file_pyc) and 2076 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2077 # Use .pyc file. 2078 arcname = fname = file_pyc 2079 elif (os.path.isfile(pycache_opt0) and 2080 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2081 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2082 # file name in the archive. 2083 fname = pycache_opt0 2084 arcname = file_pyc 2085 elif (os.path.isfile(pycache_opt1) and 2086 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2087 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2088 # file name in the archive. 2089 fname = pycache_opt1 2090 arcname = file_pyc 2091 elif (os.path.isfile(pycache_opt2) and 2092 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2093 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2094 # file name in the archive. 2095 fname = pycache_opt2 2096 arcname = file_pyc 2097 else: 2098 # Compile py into PEP 3147 pyc file. 2099 if _compile(file_py): 2100 if sys.flags.optimize == 0: 2101 fname = pycache_opt0 2102 elif sys.flags.optimize == 1: 2103 fname = pycache_opt1 2104 else: 2105 fname = pycache_opt2 2106 arcname = file_pyc 2107 else: 2108 fname = arcname = file_py 2109 else: 2110 # new mode: use given optimization level 2111 if self._optimize == 0: 2112 fname = pycache_opt0 2113 arcname = file_pyc 2114 else: 2115 arcname = file_pyc 2116 if self._optimize == 1: 2117 fname = pycache_opt1 2118 elif self._optimize == 2: 2119 fname = pycache_opt2 2120 else: 2121 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2122 raise ValueError(msg) 2123 if not (os.path.isfile(fname) and 2124 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2125 if not _compile(file_py, optimize=self._optimize): 2126 fname = arcname = file_py 2127 archivename = os.path.split(arcname)[1] 2128 if basename: 2129 archivename = "%s/%s" % (basename, archivename) 2130 return (fname, archivename) 2131 2132 2133def _parents(path): 2134 """ 2135 Given a path with elements separated by 2136 posixpath.sep, generate all parents of that path. 2137 2138 >>> list(_parents('b/d')) 2139 ['b'] 2140 >>> list(_parents('/b/d/')) 2141 ['/b'] 2142 >>> list(_parents('b/d/f/')) 2143 ['b/d', 'b'] 2144 >>> list(_parents('b')) 2145 [] 2146 >>> list(_parents('')) 2147 [] 2148 """ 2149 return itertools.islice(_ancestry(path), 1, None) 2150 2151 2152def _ancestry(path): 2153 """ 2154 Given a path with elements separated by 2155 posixpath.sep, generate all elements of that path 2156 2157 >>> list(_ancestry('b/d')) 2158 ['b/d', 'b'] 2159 >>> list(_ancestry('/b/d/')) 2160 ['/b/d', '/b'] 2161 >>> list(_ancestry('b/d/f/')) 2162 ['b/d/f', 'b/d', 'b'] 2163 >>> list(_ancestry('b')) 2164 ['b'] 2165 >>> list(_ancestry('')) 2166 [] 2167 """ 2168 path = path.rstrip(posixpath.sep) 2169 while path and path != posixpath.sep: 2170 yield path 2171 path, tail = posixpath.split(path) 2172 2173 2174_dedupe = dict.fromkeys 2175"""Deduplicate an iterable in original order""" 2176 2177 2178def _difference(minuend, subtrahend): 2179 """ 2180 Return items in minuend not in subtrahend, retaining order 2181 with O(1) lookup. 2182 """ 2183 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2184 2185 2186class CompleteDirs(ZipFile): 2187 """ 2188 A ZipFile subclass that ensures that implied directories 2189 are always included in the namelist. 2190 """ 2191 2192 @staticmethod 2193 def _implied_dirs(names): 2194 parents = itertools.chain.from_iterable(map(_parents, names)) 2195 as_dirs = (p + posixpath.sep for p in parents) 2196 return _dedupe(_difference(as_dirs, names)) 2197 2198 def namelist(self): 2199 names = super(CompleteDirs, self).namelist() 2200 return names + list(self._implied_dirs(names)) 2201 2202 def _name_set(self): 2203 return set(self.namelist()) 2204 2205 def resolve_dir(self, name): 2206 """ 2207 If the name represents a directory, return that name 2208 as a directory (with the trailing slash). 2209 """ 2210 names = self._name_set() 2211 dirname = name + '/' 2212 dir_match = name not in names and dirname in names 2213 return dirname if dir_match else name 2214 2215 @classmethod 2216 def make(cls, source): 2217 """ 2218 Given a source (filename or zipfile), return an 2219 appropriate CompleteDirs subclass. 2220 """ 2221 if isinstance(source, CompleteDirs): 2222 return source 2223 2224 if not isinstance(source, ZipFile): 2225 return cls(source) 2226 2227 # Only allow for FastLookup when supplied zipfile is read-only 2228 if 'r' not in source.mode: 2229 cls = CompleteDirs 2230 2231 source.__class__ = cls 2232 return source 2233 2234 2235class FastLookup(CompleteDirs): 2236 """ 2237 ZipFile subclass to ensure implicit 2238 dirs exist and are resolved rapidly. 2239 """ 2240 2241 def namelist(self): 2242 with contextlib.suppress(AttributeError): 2243 return self.__names 2244 self.__names = super(FastLookup, self).namelist() 2245 return self.__names 2246 2247 def _name_set(self): 2248 with contextlib.suppress(AttributeError): 2249 return self.__lookup 2250 self.__lookup = super(FastLookup, self)._name_set() 2251 return self.__lookup 2252 2253 2254class Path: 2255 """ 2256 A pathlib-compatible interface for zip files. 2257 2258 Consider a zip file with this structure:: 2259 2260 . 2261 ├── a.txt 2262 └── b 2263 ├── c.txt 2264 └── d 2265 └── e.txt 2266 2267 >>> data = io.BytesIO() 2268 >>> zf = ZipFile(data, 'w') 2269 >>> zf.writestr('a.txt', 'content of a') 2270 >>> zf.writestr('b/c.txt', 'content of c') 2271 >>> zf.writestr('b/d/e.txt', 'content of e') 2272 >>> zf.filename = 'mem/abcde.zip' 2273 2274 Path accepts the zipfile object itself or a filename 2275 2276 >>> root = Path(zf) 2277 2278 From there, several path operations are available. 2279 2280 Directory iteration (including the zip file itself): 2281 2282 >>> a, b = root.iterdir() 2283 >>> a 2284 Path('mem/abcde.zip', 'a.txt') 2285 >>> b 2286 Path('mem/abcde.zip', 'b/') 2287 2288 name property: 2289 2290 >>> b.name 2291 'b' 2292 2293 join with divide operator: 2294 2295 >>> c = b / 'c.txt' 2296 >>> c 2297 Path('mem/abcde.zip', 'b/c.txt') 2298 >>> c.name 2299 'c.txt' 2300 2301 Read text: 2302 2303 >>> c.read_text() 2304 'content of c' 2305 2306 existence: 2307 2308 >>> c.exists() 2309 True 2310 >>> (b / 'missing.txt').exists() 2311 False 2312 2313 Coercion to string: 2314 2315 >>> import os 2316 >>> str(c).replace(os.sep, posixpath.sep) 2317 'mem/abcde.zip/b/c.txt' 2318 2319 At the root, ``name``, ``filename``, and ``parent`` 2320 resolve to the zipfile. Note these attributes are not 2321 valid and will raise a ``ValueError`` if the zipfile 2322 has no filename. 2323 2324 >>> root.name 2325 'abcde.zip' 2326 >>> str(root.filename).replace(os.sep, posixpath.sep) 2327 'mem/abcde.zip' 2328 >>> str(root.parent) 2329 'mem' 2330 """ 2331 2332 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2333 2334 def __init__(self, root, at=""): 2335 """ 2336 Construct a Path from a ZipFile or filename. 2337 2338 Note: When the source is an existing ZipFile object, 2339 its type (__class__) will be mutated to a 2340 specialized type. If the caller wishes to retain the 2341 original type, the caller should either create a 2342 separate ZipFile object or pass a filename. 2343 """ 2344 self.root = FastLookup.make(root) 2345 self.at = at 2346 2347 def open(self, mode='r', *args, pwd=None, **kwargs): 2348 """ 2349 Open this entry as text or binary following the semantics 2350 of ``pathlib.Path.open()`` by passing arguments through 2351 to io.TextIOWrapper(). 2352 """ 2353 if self.is_dir(): 2354 raise IsADirectoryError(self) 2355 zip_mode = mode[0] 2356 if not self.exists() and zip_mode == 'r': 2357 raise FileNotFoundError(self) 2358 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2359 if 'b' in mode: 2360 if args or kwargs: 2361 raise ValueError("encoding args invalid for binary operation") 2362 return stream 2363 else: 2364 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2365 return io.TextIOWrapper(stream, *args, **kwargs) 2366 2367 @property 2368 def name(self): 2369 return pathlib.Path(self.at).name or self.filename.name 2370 2371 @property 2372 def suffix(self): 2373 return pathlib.Path(self.at).suffix or self.filename.suffix 2374 2375 @property 2376 def suffixes(self): 2377 return pathlib.Path(self.at).suffixes or self.filename.suffixes 2378 2379 @property 2380 def stem(self): 2381 return pathlib.Path(self.at).stem or self.filename.stem 2382 2383 @property 2384 def filename(self): 2385 return pathlib.Path(self.root.filename).joinpath(self.at) 2386 2387 def read_text(self, *args, **kwargs): 2388 kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) 2389 with self.open('r', *args, **kwargs) as strm: 2390 return strm.read() 2391 2392 def read_bytes(self): 2393 with self.open('rb') as strm: 2394 return strm.read() 2395 2396 def _is_child(self, path): 2397 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2398 2399 def _next(self, at): 2400 return self.__class__(self.root, at) 2401 2402 def is_dir(self): 2403 return not self.at or self.at.endswith("/") 2404 2405 def is_file(self): 2406 return self.exists() and not self.is_dir() 2407 2408 def exists(self): 2409 return self.at in self.root._name_set() 2410 2411 def iterdir(self): 2412 if not self.is_dir(): 2413 raise ValueError("Can't listdir a file") 2414 subs = map(self._next, self.root.namelist()) 2415 return filter(self._is_child, subs) 2416 2417 def __str__(self): 2418 return posixpath.join(self.root.filename, self.at) 2419 2420 def __repr__(self): 2421 return self.__repr.format(self=self) 2422 2423 def joinpath(self, *other): 2424 next = posixpath.join(self.at, *other) 2425 return self._next(self.root.resolve_dir(next)) 2426 2427 __truediv__ = joinpath 2428 2429 @property 2430 def parent(self): 2431 if not self.at: 2432 return self.filename.parent 2433 parent_at = posixpath.dirname(self.at.rstrip('/')) 2434 if parent_at: 2435 parent_at += '/' 2436 return self._next(parent_at) 2437 2438 2439def main(args=None): 2440 import argparse 2441 2442 description = 'A simple command-line interface for zipfile module.' 2443 parser = argparse.ArgumentParser(description=description) 2444 group = parser.add_mutually_exclusive_group(required=True) 2445 group.add_argument('-l', '--list', metavar='<zipfile>', 2446 help='Show listing of a zipfile') 2447 group.add_argument('-e', '--extract', nargs=2, 2448 metavar=('<zipfile>', '<output_dir>'), 2449 help='Extract zipfile into target dir') 2450 group.add_argument('-c', '--create', nargs='+', 2451 metavar=('<name>', '<file>'), 2452 help='Create zipfile from sources') 2453 group.add_argument('-t', '--test', metavar='<zipfile>', 2454 help='Test if a zipfile is valid') 2455 args = parser.parse_args(args) 2456 2457 if args.test is not None: 2458 src = args.test 2459 with ZipFile(src, 'r') as zf: 2460 badfile = zf.testzip() 2461 if badfile: 2462 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2463 print("Done testing") 2464 2465 elif args.list is not None: 2466 src = args.list 2467 with ZipFile(src, 'r') as zf: 2468 zf.printdir() 2469 2470 elif args.extract is not None: 2471 src, curdir = args.extract 2472 with ZipFile(src, 'r') as zf: 2473 zf.extractall(curdir) 2474 2475 elif args.create is not None: 2476 zip_name = args.create.pop(0) 2477 files = args.create 2478 2479 def addToZip(zf, path, zippath): 2480 if os.path.isfile(path): 2481 zf.write(path, zippath, ZIP_DEFLATED) 2482 elif os.path.isdir(path): 2483 if zippath: 2484 zf.write(path, zippath) 2485 for nm in sorted(os.listdir(path)): 2486 addToZip(zf, 2487 os.path.join(path, nm), os.path.join(zippath, nm)) 2488 # else: ignore 2489 2490 with ZipFile(zip_name, 'w') as zf: 2491 for path in files: 2492 zippath = os.path.basename(path) 2493 if not zippath: 2494 zippath = os.path.basename(os.path.dirname(path)) 2495 if zippath in ('', os.curdir, os.pardir): 2496 zippath = '' 2497 addToZip(zf, path, zippath) 2498 2499 2500if __name__ == "__main__": 2501 main() 2502