1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import functools 8import importlib.util 9import io 10import itertools 11import os 12import posixpath 13import shutil 14import stat 15import struct 16import sys 17import threading 18import time 19import contextlib 20 21try: 22 import zlib # We may need its compression method 23 crc32 = zlib.crc32 24except ImportError: 25 zlib = None 26 crc32 = binascii.crc32 27 28try: 29 import bz2 # We may need its compression method 30except ImportError: 31 bz2 = None 32 33try: 34 import lzma # We may need its compression method 35except ImportError: 36 lzma = None 37 38__all__ = ["BadZipFile", "BadZipfile", "error", 39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 41 "Path"] 42 43class BadZipFile(Exception): 44 pass 45 46 47class LargeZipFile(Exception): 48 """ 49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 50 and those extensions are disabled. 51 """ 52 53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 54 55 56ZIP64_LIMIT = (1 << 31) - 1 57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 58ZIP_MAX_COMMENT = (1 << 16) - 1 59 60# constants for Zip file compression methods 61ZIP_STORED = 0 62ZIP_DEFLATED = 8 63ZIP_BZIP2 = 12 64ZIP_LZMA = 14 65# Other ZIP compression methods not supported 66 67DEFAULT_VERSION = 20 68ZIP64_VERSION = 45 69BZIP2_VERSION = 46 70LZMA_VERSION = 63 71# we recognize (but not necessarily support) all features up to that version 72MAX_EXTRACT_VERSION = 63 73 74# Below are some formats and associated data for reading/writing headers using 75# the struct module. The names and structures of headers/records are those used 76# in the PKWARE description of the ZIP file format: 77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 78# (URL valid as of January 2008) 79 80# The "end of central directory" structure, magic number, size, and indices 81# (section V.I in the format document) 82structEndArchive = b"<4s4H2LH" 83stringEndArchive = b"PK\005\006" 84sizeEndCentDir = struct.calcsize(structEndArchive) 85 86_ECD_SIGNATURE = 0 87_ECD_DISK_NUMBER = 1 88_ECD_DISK_START = 2 89_ECD_ENTRIES_THIS_DISK = 3 90_ECD_ENTRIES_TOTAL = 4 91_ECD_SIZE = 5 92_ECD_OFFSET = 6 93_ECD_COMMENT_SIZE = 7 94# These last two indices are not part of the structure as defined in the 95# spec, but they are used internally by this module as a convenience 96_ECD_COMMENT = 8 97_ECD_LOCATION = 9 98 99# The "central directory" structure, magic number, size, and indices 100# of entries in the structure (section V.F in the format document) 101structCentralDir = "<4s4B4HL2L5H2L" 102stringCentralDir = b"PK\001\002" 103sizeCentralDir = struct.calcsize(structCentralDir) 104 105# indexes of entries in the central directory structure 106_CD_SIGNATURE = 0 107_CD_CREATE_VERSION = 1 108_CD_CREATE_SYSTEM = 2 109_CD_EXTRACT_VERSION = 3 110_CD_EXTRACT_SYSTEM = 4 111_CD_FLAG_BITS = 5 112_CD_COMPRESS_TYPE = 6 113_CD_TIME = 7 114_CD_DATE = 8 115_CD_CRC = 9 116_CD_COMPRESSED_SIZE = 10 117_CD_UNCOMPRESSED_SIZE = 11 118_CD_FILENAME_LENGTH = 12 119_CD_EXTRA_FIELD_LENGTH = 13 120_CD_COMMENT_LENGTH = 14 121_CD_DISK_NUMBER_START = 15 122_CD_INTERNAL_FILE_ATTRIBUTES = 16 123_CD_EXTERNAL_FILE_ATTRIBUTES = 17 124_CD_LOCAL_HEADER_OFFSET = 18 125 126# The "local file header" structure, magic number, size, and indices 127# (section V.A in the format document) 128structFileHeader = "<4s2B4HL2L2H" 129stringFileHeader = b"PK\003\004" 130sizeFileHeader = struct.calcsize(structFileHeader) 131 132_FH_SIGNATURE = 0 133_FH_EXTRACT_VERSION = 1 134_FH_EXTRACT_SYSTEM = 2 135_FH_GENERAL_PURPOSE_FLAG_BITS = 3 136_FH_COMPRESSION_METHOD = 4 137_FH_LAST_MOD_TIME = 5 138_FH_LAST_MOD_DATE = 6 139_FH_CRC = 7 140_FH_COMPRESSED_SIZE = 8 141_FH_UNCOMPRESSED_SIZE = 9 142_FH_FILENAME_LENGTH = 10 143_FH_EXTRA_FIELD_LENGTH = 11 144 145# The "Zip64 end of central directory locator" structure, magic number, and size 146structEndArchive64Locator = "<4sLQL" 147stringEndArchive64Locator = b"PK\x06\x07" 148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 149 150# The "Zip64 end of central directory" record, magic number, size, and indices 151# (section V.G in the format document) 152structEndArchive64 = "<4sQ2H2L4Q" 153stringEndArchive64 = b"PK\x06\x06" 154sizeEndCentDir64 = struct.calcsize(structEndArchive64) 155 156_CD64_SIGNATURE = 0 157_CD64_DIRECTORY_RECSIZE = 1 158_CD64_CREATE_VERSION = 2 159_CD64_EXTRACT_VERSION = 3 160_CD64_DISK_NUMBER = 4 161_CD64_DISK_NUMBER_START = 5 162_CD64_NUMBER_ENTRIES_THIS_DISK = 6 163_CD64_NUMBER_ENTRIES_TOTAL = 7 164_CD64_DIRECTORY_SIZE = 8 165_CD64_OFFSET_START_CENTDIR = 9 166 167_DD_SIGNATURE = 0x08074b50 168 169_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 170 171def _strip_extra(extra, xids): 172 # Remove Extra Fields with specified IDs. 173 unpack = _EXTRA_FIELD_STRUCT.unpack 174 modified = False 175 buffer = [] 176 start = i = 0 177 while i + 4 <= len(extra): 178 xid, xlen = unpack(extra[i : i + 4]) 179 j = i + 4 + xlen 180 if xid in xids: 181 if i != start: 182 buffer.append(extra[start : i]) 183 start = j 184 modified = True 185 i = j 186 if not modified: 187 return extra 188 return b''.join(buffer) 189 190def _check_zipfile(fp): 191 try: 192 if _EndRecData(fp): 193 return True # file has correct magic number 194 except OSError: 195 pass 196 return False 197 198def is_zipfile(filename): 199 """Quickly see if a file is a ZIP file by checking the magic number. 200 201 The filename argument may be a file or file-like object too. 202 """ 203 result = False 204 try: 205 if hasattr(filename, "read"): 206 result = _check_zipfile(fp=filename) 207 else: 208 with open(filename, "rb") as fp: 209 result = _check_zipfile(fp) 210 except OSError: 211 pass 212 return result 213 214def _EndRecData64(fpin, offset, endrec): 215 """ 216 Read the ZIP64 end-of-archive records and use that to update endrec 217 """ 218 try: 219 fpin.seek(offset - sizeEndCentDir64Locator, 2) 220 except OSError: 221 # If the seek fails, the file is not large enough to contain a ZIP64 222 # end-of-archive record, so just return the end record we were given. 223 return endrec 224 225 data = fpin.read(sizeEndCentDir64Locator) 226 if len(data) != sizeEndCentDir64Locator: 227 return endrec 228 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 229 if sig != stringEndArchive64Locator: 230 return endrec 231 232 if diskno != 0 or disks > 1: 233 raise BadZipFile("zipfiles that span multiple disks are not supported") 234 235 # Assume no 'zip64 extensible data' 236 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 237 data = fpin.read(sizeEndCentDir64) 238 if len(data) != sizeEndCentDir64: 239 return endrec 240 sig, sz, create_version, read_version, disk_num, disk_dir, \ 241 dircount, dircount2, dirsize, diroffset = \ 242 struct.unpack(structEndArchive64, data) 243 if sig != stringEndArchive64: 244 return endrec 245 246 # Update the original endrec using data from the ZIP64 record 247 endrec[_ECD_SIGNATURE] = sig 248 endrec[_ECD_DISK_NUMBER] = disk_num 249 endrec[_ECD_DISK_START] = disk_dir 250 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 251 endrec[_ECD_ENTRIES_TOTAL] = dircount2 252 endrec[_ECD_SIZE] = dirsize 253 endrec[_ECD_OFFSET] = diroffset 254 return endrec 255 256 257def _EndRecData(fpin): 258 """Return data from the "End of Central Directory" record, or None. 259 260 The data is a list of the nine items in the ZIP "End of central dir" 261 record followed by a tenth item, the file seek offset of this record.""" 262 263 # Determine file size 264 fpin.seek(0, 2) 265 filesize = fpin.tell() 266 267 # Check to see if this is ZIP file with no archive comment (the 268 # "end of central directory" structure should be the last item in the 269 # file if this is the case). 270 try: 271 fpin.seek(-sizeEndCentDir, 2) 272 except OSError: 273 return None 274 data = fpin.read() 275 if (len(data) == sizeEndCentDir and 276 data[0:4] == stringEndArchive and 277 data[-2:] == b"\000\000"): 278 # the signature is correct and there's no comment, unpack structure 279 endrec = struct.unpack(structEndArchive, data) 280 endrec=list(endrec) 281 282 # Append a blank comment and record start offset 283 endrec.append(b"") 284 endrec.append(filesize - sizeEndCentDir) 285 286 # Try to read the "Zip64 end of central directory" structure 287 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 288 289 # Either this is not a ZIP file, or it is a ZIP file with an archive 290 # comment. Search the end of the file for the "end of central directory" 291 # record signature. The comment is the last item in the ZIP file and may be 292 # up to 64K long. It is assumed that the "end of central directory" magic 293 # number does not appear in the comment. 294 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 295 fpin.seek(maxCommentStart, 0) 296 data = fpin.read() 297 start = data.rfind(stringEndArchive) 298 if start >= 0: 299 # found the magic number; attempt to unpack and interpret 300 recData = data[start:start+sizeEndCentDir] 301 if len(recData) != sizeEndCentDir: 302 # Zip file is corrupted. 303 return None 304 endrec = list(struct.unpack(structEndArchive, recData)) 305 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 306 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 307 endrec.append(comment) 308 endrec.append(maxCommentStart + start) 309 310 # Try to read the "Zip64 end of central directory" structure 311 return _EndRecData64(fpin, maxCommentStart + start - filesize, 312 endrec) 313 314 # Unable to find a valid end of central directory structure 315 return None 316 317 318class ZipInfo (object): 319 """Class with attributes describing each file in the ZIP archive.""" 320 321 __slots__ = ( 322 'orig_filename', 323 'filename', 324 'date_time', 325 'compress_type', 326 '_compresslevel', 327 'comment', 328 'extra', 329 'create_system', 330 'create_version', 331 'extract_version', 332 'reserved', 333 'flag_bits', 334 'volume', 335 'internal_attr', 336 'external_attr', 337 'header_offset', 338 'CRC', 339 'compress_size', 340 'file_size', 341 '_raw_time', 342 ) 343 344 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 345 self.orig_filename = filename # Original file name in archive 346 347 # Terminate the file name at the first null byte. Null bytes in file 348 # names are used as tricks by viruses in archives. 349 null_byte = filename.find(chr(0)) 350 if null_byte >= 0: 351 filename = filename[0:null_byte] 352 # This is used to ensure paths in generated ZIP files always use 353 # forward slashes as the directory separator, as required by the 354 # ZIP format specification. 355 if os.sep != "/" and os.sep in filename: 356 filename = filename.replace(os.sep, "/") 357 358 self.filename = filename # Normalized file name 359 self.date_time = date_time # year, month, day, hour, min, sec 360 361 if date_time[0] < 1980: 362 raise ValueError('ZIP does not support timestamps before 1980') 363 364 # Standard values: 365 self.compress_type = ZIP_STORED # Type of compression for the file 366 self._compresslevel = None # Level for the compressor 367 self.comment = b"" # Comment for each file 368 self.extra = b"" # ZIP extra data 369 if sys.platform == 'win32': 370 self.create_system = 0 # System which created ZIP archive 371 else: 372 # Assume everything else is unix-y 373 self.create_system = 3 # System which created ZIP archive 374 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 375 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 376 self.reserved = 0 # Must be zero 377 self.flag_bits = 0 # ZIP flag bits 378 self.volume = 0 # Volume number of file header 379 self.internal_attr = 0 # Internal attributes 380 self.external_attr = 0 # External file attributes 381 # Other attributes are set by class ZipFile: 382 # header_offset Byte offset to the file header 383 # CRC CRC-32 of the uncompressed file 384 # compress_size Size of the compressed file 385 # file_size Size of the uncompressed file 386 387 def __repr__(self): 388 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 389 if self.compress_type != ZIP_STORED: 390 result.append(' compress_type=%s' % 391 compressor_names.get(self.compress_type, 392 self.compress_type)) 393 hi = self.external_attr >> 16 394 lo = self.external_attr & 0xFFFF 395 if hi: 396 result.append(' filemode=%r' % stat.filemode(hi)) 397 if lo: 398 result.append(' external_attr=%#x' % lo) 399 isdir = self.is_dir() 400 if not isdir or self.file_size: 401 result.append(' file_size=%r' % self.file_size) 402 if ((not isdir or self.compress_size) and 403 (self.compress_type != ZIP_STORED or 404 self.file_size != self.compress_size)): 405 result.append(' compress_size=%r' % self.compress_size) 406 result.append('>') 407 return ''.join(result) 408 409 def FileHeader(self, zip64=None): 410 """Return the per-file header as a bytes object.""" 411 dt = self.date_time 412 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 413 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 414 if self.flag_bits & 0x08: 415 # Set these to zero because we write them after the file data 416 CRC = compress_size = file_size = 0 417 else: 418 CRC = self.CRC 419 compress_size = self.compress_size 420 file_size = self.file_size 421 422 extra = self.extra 423 424 min_version = 0 425 if zip64 is None: 426 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 427 if zip64: 428 fmt = '<HHQQ' 429 extra = extra + struct.pack(fmt, 430 1, struct.calcsize(fmt)-4, file_size, compress_size) 431 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 432 if not zip64: 433 raise LargeZipFile("Filesize would require ZIP64 extensions") 434 # File is larger than what fits into a 4 byte integer, 435 # fall back to the ZIP64 extension 436 file_size = 0xffffffff 437 compress_size = 0xffffffff 438 min_version = ZIP64_VERSION 439 440 if self.compress_type == ZIP_BZIP2: 441 min_version = max(BZIP2_VERSION, min_version) 442 elif self.compress_type == ZIP_LZMA: 443 min_version = max(LZMA_VERSION, min_version) 444 445 self.extract_version = max(min_version, self.extract_version) 446 self.create_version = max(min_version, self.create_version) 447 filename, flag_bits = self._encodeFilenameFlags() 448 header = struct.pack(structFileHeader, stringFileHeader, 449 self.extract_version, self.reserved, flag_bits, 450 self.compress_type, dostime, dosdate, CRC, 451 compress_size, file_size, 452 len(filename), len(extra)) 453 return header + filename + extra 454 455 def _encodeFilenameFlags(self): 456 try: 457 return self.filename.encode('ascii'), self.flag_bits 458 except UnicodeEncodeError: 459 return self.filename.encode('utf-8'), self.flag_bits | 0x800 460 461 def _decodeExtra(self): 462 # Try to decode the extra field. 463 extra = self.extra 464 unpack = struct.unpack 465 while len(extra) >= 4: 466 tp, ln = unpack('<HH', extra[:4]) 467 if ln+4 > len(extra): 468 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 469 if tp == 0x0001: 470 if ln >= 24: 471 counts = unpack('<QQQ', extra[4:28]) 472 elif ln == 16: 473 counts = unpack('<QQ', extra[4:20]) 474 elif ln == 8: 475 counts = unpack('<Q', extra[4:12]) 476 elif ln == 0: 477 counts = () 478 else: 479 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 480 481 idx = 0 482 483 # ZIP64 extension (large files and/or large archives) 484 if self.file_size in (0xffffffffffffffff, 0xffffffff): 485 if len(counts) <= idx: 486 raise BadZipFile( 487 "Corrupt zip64 extra field. File size not found." 488 ) 489 self.file_size = counts[idx] 490 idx += 1 491 492 if self.compress_size == 0xFFFFFFFF: 493 if len(counts) <= idx: 494 raise BadZipFile( 495 "Corrupt zip64 extra field. Compress size not found." 496 ) 497 self.compress_size = counts[idx] 498 idx += 1 499 500 if self.header_offset == 0xffffffff: 501 if len(counts) <= idx: 502 raise BadZipFile( 503 "Corrupt zip64 extra field. Header offset not found." 504 ) 505 old = self.header_offset 506 self.header_offset = counts[idx] 507 idx+=1 508 509 extra = extra[ln+4:] 510 511 @classmethod 512 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 513 """Construct an appropriate ZipInfo for a file on the filesystem. 514 515 filename should be the path to a file or directory on the filesystem. 516 517 arcname is the name which it will have within the archive (by default, 518 this will be the same as filename, but without a drive letter and with 519 leading path separators removed). 520 """ 521 if isinstance(filename, os.PathLike): 522 filename = os.fspath(filename) 523 st = os.stat(filename) 524 isdir = stat.S_ISDIR(st.st_mode) 525 mtime = time.localtime(st.st_mtime) 526 date_time = mtime[0:6] 527 if not strict_timestamps and date_time[0] < 1980: 528 date_time = (1980, 1, 1, 0, 0, 0) 529 elif not strict_timestamps and date_time[0] > 2107: 530 date_time = (2107, 12, 31, 23, 59, 59) 531 # Create ZipInfo instance to store file information 532 if arcname is None: 533 arcname = filename 534 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 535 while arcname[0] in (os.sep, os.altsep): 536 arcname = arcname[1:] 537 if isdir: 538 arcname += '/' 539 zinfo = cls(arcname, date_time) 540 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 541 if isdir: 542 zinfo.file_size = 0 543 zinfo.external_attr |= 0x10 # MS-DOS directory flag 544 else: 545 zinfo.file_size = st.st_size 546 547 return zinfo 548 549 def is_dir(self): 550 """Return True if this archive member is a directory.""" 551 return self.filename[-1] == '/' 552 553 554# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 555# internal keys. We noticed that a direct implementation is faster than 556# relying on binascii.crc32(). 557 558_crctable = None 559def _gen_crc(crc): 560 for j in range(8): 561 if crc & 1: 562 crc = (crc >> 1) ^ 0xEDB88320 563 else: 564 crc >>= 1 565 return crc 566 567# ZIP supports a password-based form of encryption. Even though known 568# plaintext attacks have been found against it, it is still useful 569# to be able to get data out of such a file. 570# 571# Usage: 572# zd = _ZipDecrypter(mypwd) 573# plain_bytes = zd(cypher_bytes) 574 575def _ZipDecrypter(pwd): 576 key0 = 305419896 577 key1 = 591751049 578 key2 = 878082192 579 580 global _crctable 581 if _crctable is None: 582 _crctable = list(map(_gen_crc, range(256))) 583 crctable = _crctable 584 585 def crc32(ch, crc): 586 """Compute the CRC32 primitive on one byte.""" 587 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 588 589 def update_keys(c): 590 nonlocal key0, key1, key2 591 key0 = crc32(c, key0) 592 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 593 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 594 key2 = crc32(key1 >> 24, key2) 595 596 for p in pwd: 597 update_keys(p) 598 599 def decrypter(data): 600 """Decrypt a bytes object.""" 601 result = bytearray() 602 append = result.append 603 for c in data: 604 k = key2 | 2 605 c ^= ((k * (k^1)) >> 8) & 0xFF 606 update_keys(c) 607 append(c) 608 return bytes(result) 609 610 return decrypter 611 612 613class LZMACompressor: 614 615 def __init__(self): 616 self._comp = None 617 618 def _init(self): 619 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 620 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 621 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 622 ]) 623 return struct.pack('<BBH', 9, 4, len(props)) + props 624 625 def compress(self, data): 626 if self._comp is None: 627 return self._init() + self._comp.compress(data) 628 return self._comp.compress(data) 629 630 def flush(self): 631 if self._comp is None: 632 return self._init() + self._comp.flush() 633 return self._comp.flush() 634 635 636class LZMADecompressor: 637 638 def __init__(self): 639 self._decomp = None 640 self._unconsumed = b'' 641 self.eof = False 642 643 def decompress(self, data): 644 if self._decomp is None: 645 self._unconsumed += data 646 if len(self._unconsumed) <= 4: 647 return b'' 648 psize, = struct.unpack('<H', self._unconsumed[2:4]) 649 if len(self._unconsumed) <= 4 + psize: 650 return b'' 651 652 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 653 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 654 self._unconsumed[4:4 + psize]) 655 ]) 656 data = self._unconsumed[4 + psize:] 657 del self._unconsumed 658 659 result = self._decomp.decompress(data) 660 self.eof = self._decomp.eof 661 return result 662 663 664compressor_names = { 665 0: 'store', 666 1: 'shrink', 667 2: 'reduce', 668 3: 'reduce', 669 4: 'reduce', 670 5: 'reduce', 671 6: 'implode', 672 7: 'tokenize', 673 8: 'deflate', 674 9: 'deflate64', 675 10: 'implode', 676 12: 'bzip2', 677 14: 'lzma', 678 18: 'terse', 679 19: 'lz77', 680 97: 'wavpack', 681 98: 'ppmd', 682} 683 684def _check_compression(compression): 685 if compression == ZIP_STORED: 686 pass 687 elif compression == ZIP_DEFLATED: 688 if not zlib: 689 raise RuntimeError( 690 "Compression requires the (missing) zlib module") 691 elif compression == ZIP_BZIP2: 692 if not bz2: 693 raise RuntimeError( 694 "Compression requires the (missing) bz2 module") 695 elif compression == ZIP_LZMA: 696 if not lzma: 697 raise RuntimeError( 698 "Compression requires the (missing) lzma module") 699 else: 700 raise NotImplementedError("That compression method is not supported") 701 702 703def _get_compressor(compress_type, compresslevel=None): 704 if compress_type == ZIP_DEFLATED: 705 if compresslevel is not None: 706 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 707 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 708 elif compress_type == ZIP_BZIP2: 709 if compresslevel is not None: 710 return bz2.BZ2Compressor(compresslevel) 711 return bz2.BZ2Compressor() 712 # compresslevel is ignored for ZIP_LZMA 713 elif compress_type == ZIP_LZMA: 714 return LZMACompressor() 715 else: 716 return None 717 718 719def _get_decompressor(compress_type): 720 _check_compression(compress_type) 721 if compress_type == ZIP_STORED: 722 return None 723 elif compress_type == ZIP_DEFLATED: 724 return zlib.decompressobj(-15) 725 elif compress_type == ZIP_BZIP2: 726 return bz2.BZ2Decompressor() 727 elif compress_type == ZIP_LZMA: 728 return LZMADecompressor() 729 else: 730 descr = compressor_names.get(compress_type) 731 if descr: 732 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 733 else: 734 raise NotImplementedError("compression type %d" % (compress_type,)) 735 736 737class _SharedFile: 738 def __init__(self, file, pos, close, lock, writing): 739 self._file = file 740 self._pos = pos 741 self._close = close 742 self._lock = lock 743 self._writing = writing 744 self.seekable = file.seekable 745 self.tell = file.tell 746 747 def seek(self, offset, whence=0): 748 with self._lock: 749 if self._writing(): 750 raise ValueError("Can't reposition in the ZIP file while " 751 "there is an open writing handle on it. " 752 "Close the writing handle before trying to read.") 753 self._file.seek(offset, whence) 754 self._pos = self._file.tell() 755 return self._pos 756 757 def read(self, n=-1): 758 with self._lock: 759 if self._writing(): 760 raise ValueError("Can't read from the ZIP file while there " 761 "is an open writing handle on it. " 762 "Close the writing handle before trying to read.") 763 self._file.seek(self._pos) 764 data = self._file.read(n) 765 self._pos = self._file.tell() 766 return data 767 768 def close(self): 769 if self._file is not None: 770 fileobj = self._file 771 self._file = None 772 self._close(fileobj) 773 774# Provide the tell method for unseekable stream 775class _Tellable: 776 def __init__(self, fp): 777 self.fp = fp 778 self.offset = 0 779 780 def write(self, data): 781 n = self.fp.write(data) 782 self.offset += n 783 return n 784 785 def tell(self): 786 return self.offset 787 788 def flush(self): 789 self.fp.flush() 790 791 def close(self): 792 self.fp.close() 793 794 795class ZipExtFile(io.BufferedIOBase): 796 """File-like object for reading an archive member. 797 Is returned by ZipFile.open(). 798 """ 799 800 # Max size supported by decompressor. 801 MAX_N = 1 << 31 - 1 802 803 # Read from compressed files in 4k blocks. 804 MIN_READ_SIZE = 4096 805 806 # Chunk size to read during seek 807 MAX_SEEK_READ = 1 << 24 808 809 def __init__(self, fileobj, mode, zipinfo, pwd=None, 810 close_fileobj=False): 811 self._fileobj = fileobj 812 self._pwd = pwd 813 self._close_fileobj = close_fileobj 814 815 self._compress_type = zipinfo.compress_type 816 self._compress_left = zipinfo.compress_size 817 self._left = zipinfo.file_size 818 819 self._decompressor = _get_decompressor(self._compress_type) 820 821 self._eof = False 822 self._readbuffer = b'' 823 self._offset = 0 824 825 self.newlines = None 826 827 self.mode = mode 828 self.name = zipinfo.filename 829 830 if hasattr(zipinfo, 'CRC'): 831 self._expected_crc = zipinfo.CRC 832 self._running_crc = crc32(b'') 833 else: 834 self._expected_crc = None 835 836 self._seekable = False 837 try: 838 if fileobj.seekable(): 839 self._orig_compress_start = fileobj.tell() 840 self._orig_compress_size = zipinfo.compress_size 841 self._orig_file_size = zipinfo.file_size 842 self._orig_start_crc = self._running_crc 843 self._seekable = True 844 except AttributeError: 845 pass 846 847 self._decrypter = None 848 if pwd: 849 if zipinfo.flag_bits & 0x8: 850 # compare against the file type from extended local headers 851 check_byte = (zipinfo._raw_time >> 8) & 0xff 852 else: 853 # compare against the CRC otherwise 854 check_byte = (zipinfo.CRC >> 24) & 0xff 855 h = self._init_decrypter() 856 if h != check_byte: 857 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 858 859 860 def _init_decrypter(self): 861 self._decrypter = _ZipDecrypter(self._pwd) 862 # The first 12 bytes in the cypher stream is an encryption header 863 # used to strengthen the algorithm. The first 11 bytes are 864 # completely random, while the 12th contains the MSB of the CRC, 865 # or the MSB of the file time depending on the header type 866 # and is used to check the correctness of the password. 867 header = self._fileobj.read(12) 868 self._compress_left -= 12 869 return self._decrypter(header)[11] 870 871 def __repr__(self): 872 result = ['<%s.%s' % (self.__class__.__module__, 873 self.__class__.__qualname__)] 874 if not self.closed: 875 result.append(' name=%r mode=%r' % (self.name, self.mode)) 876 if self._compress_type != ZIP_STORED: 877 result.append(' compress_type=%s' % 878 compressor_names.get(self._compress_type, 879 self._compress_type)) 880 else: 881 result.append(' [closed]') 882 result.append('>') 883 return ''.join(result) 884 885 def readline(self, limit=-1): 886 """Read and return a line from the stream. 887 888 If limit is specified, at most limit bytes will be read. 889 """ 890 891 if limit < 0: 892 # Shortcut common case - newline found in buffer. 893 i = self._readbuffer.find(b'\n', self._offset) + 1 894 if i > 0: 895 line = self._readbuffer[self._offset: i] 896 self._offset = i 897 return line 898 899 return io.BufferedIOBase.readline(self, limit) 900 901 def peek(self, n=1): 902 """Returns buffered bytes without advancing the position.""" 903 if n > len(self._readbuffer) - self._offset: 904 chunk = self.read(n) 905 if len(chunk) > self._offset: 906 self._readbuffer = chunk + self._readbuffer[self._offset:] 907 self._offset = 0 908 else: 909 self._offset -= len(chunk) 910 911 # Return up to 512 bytes to reduce allocation overhead for tight loops. 912 return self._readbuffer[self._offset: self._offset + 512] 913 914 def readable(self): 915 return True 916 917 def read(self, n=-1): 918 """Read and return up to n bytes. 919 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 920 """ 921 if n is None or n < 0: 922 buf = self._readbuffer[self._offset:] 923 self._readbuffer = b'' 924 self._offset = 0 925 while not self._eof: 926 buf += self._read1(self.MAX_N) 927 return buf 928 929 end = n + self._offset 930 if end < len(self._readbuffer): 931 buf = self._readbuffer[self._offset:end] 932 self._offset = end 933 return buf 934 935 n = end - len(self._readbuffer) 936 buf = self._readbuffer[self._offset:] 937 self._readbuffer = b'' 938 self._offset = 0 939 while n > 0 and not self._eof: 940 data = self._read1(n) 941 if n < len(data): 942 self._readbuffer = data 943 self._offset = n 944 buf += data[:n] 945 break 946 buf += data 947 n -= len(data) 948 return buf 949 950 def _update_crc(self, newdata): 951 # Update the CRC using the given data. 952 if self._expected_crc is None: 953 # No need to compute the CRC if we don't have a reference value 954 return 955 self._running_crc = crc32(newdata, self._running_crc) 956 # Check the CRC if we're at the end of the file 957 if self._eof and self._running_crc != self._expected_crc: 958 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 959 960 def read1(self, n): 961 """Read up to n bytes with at most one read() system call.""" 962 963 if n is None or n < 0: 964 buf = self._readbuffer[self._offset:] 965 self._readbuffer = b'' 966 self._offset = 0 967 while not self._eof: 968 data = self._read1(self.MAX_N) 969 if data: 970 buf += data 971 break 972 return buf 973 974 end = n + self._offset 975 if end < len(self._readbuffer): 976 buf = self._readbuffer[self._offset:end] 977 self._offset = end 978 return buf 979 980 n = end - len(self._readbuffer) 981 buf = self._readbuffer[self._offset:] 982 self._readbuffer = b'' 983 self._offset = 0 984 if n > 0: 985 while not self._eof: 986 data = self._read1(n) 987 if n < len(data): 988 self._readbuffer = data 989 self._offset = n 990 buf += data[:n] 991 break 992 if data: 993 buf += data 994 break 995 return buf 996 997 def _read1(self, n): 998 # Read up to n compressed bytes with at most one read() system call, 999 # decrypt and decompress them. 1000 if self._eof or n <= 0: 1001 return b'' 1002 1003 # Read from file. 1004 if self._compress_type == ZIP_DEFLATED: 1005 ## Handle unconsumed data. 1006 data = self._decompressor.unconsumed_tail 1007 if n > len(data): 1008 data += self._read2(n - len(data)) 1009 else: 1010 data = self._read2(n) 1011 1012 if self._compress_type == ZIP_STORED: 1013 self._eof = self._compress_left <= 0 1014 elif self._compress_type == ZIP_DEFLATED: 1015 n = max(n, self.MIN_READ_SIZE) 1016 data = self._decompressor.decompress(data, n) 1017 self._eof = (self._decompressor.eof or 1018 self._compress_left <= 0 and 1019 not self._decompressor.unconsumed_tail) 1020 if self._eof: 1021 data += self._decompressor.flush() 1022 else: 1023 data = self._decompressor.decompress(data) 1024 self._eof = self._decompressor.eof or self._compress_left <= 0 1025 1026 data = data[:self._left] 1027 self._left -= len(data) 1028 if self._left <= 0: 1029 self._eof = True 1030 self._update_crc(data) 1031 return data 1032 1033 def _read2(self, n): 1034 if self._compress_left <= 0: 1035 return b'' 1036 1037 n = max(n, self.MIN_READ_SIZE) 1038 n = min(n, self._compress_left) 1039 1040 data = self._fileobj.read(n) 1041 self._compress_left -= len(data) 1042 if not data: 1043 raise EOFError 1044 1045 if self._decrypter is not None: 1046 data = self._decrypter(data) 1047 return data 1048 1049 def close(self): 1050 try: 1051 if self._close_fileobj: 1052 self._fileobj.close() 1053 finally: 1054 super().close() 1055 1056 def seekable(self): 1057 return self._seekable 1058 1059 def seek(self, offset, whence=0): 1060 if not self._seekable: 1061 raise io.UnsupportedOperation("underlying stream is not seekable") 1062 curr_pos = self.tell() 1063 if whence == 0: # Seek from start of file 1064 new_pos = offset 1065 elif whence == 1: # Seek from current position 1066 new_pos = curr_pos + offset 1067 elif whence == 2: # Seek from EOF 1068 new_pos = self._orig_file_size + offset 1069 else: 1070 raise ValueError("whence must be os.SEEK_SET (0), " 1071 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1072 1073 if new_pos > self._orig_file_size: 1074 new_pos = self._orig_file_size 1075 1076 if new_pos < 0: 1077 new_pos = 0 1078 1079 read_offset = new_pos - curr_pos 1080 buff_offset = read_offset + self._offset 1081 1082 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1083 # Just move the _offset index if the new position is in the _readbuffer 1084 self._offset = buff_offset 1085 read_offset = 0 1086 elif read_offset < 0: 1087 # Position is before the current position. Reset the ZipExtFile 1088 self._fileobj.seek(self._orig_compress_start) 1089 self._running_crc = self._orig_start_crc 1090 self._compress_left = self._orig_compress_size 1091 self._left = self._orig_file_size 1092 self._readbuffer = b'' 1093 self._offset = 0 1094 self._decompressor = _get_decompressor(self._compress_type) 1095 self._eof = False 1096 read_offset = new_pos 1097 if self._decrypter is not None: 1098 self._init_decrypter() 1099 1100 while read_offset > 0: 1101 read_len = min(self.MAX_SEEK_READ, read_offset) 1102 self.read(read_len) 1103 read_offset -= read_len 1104 1105 return self.tell() 1106 1107 def tell(self): 1108 if not self._seekable: 1109 raise io.UnsupportedOperation("underlying stream is not seekable") 1110 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1111 return filepos 1112 1113 1114class _ZipWriteFile(io.BufferedIOBase): 1115 def __init__(self, zf, zinfo, zip64): 1116 self._zinfo = zinfo 1117 self._zip64 = zip64 1118 self._zipfile = zf 1119 self._compressor = _get_compressor(zinfo.compress_type, 1120 zinfo._compresslevel) 1121 self._file_size = 0 1122 self._compress_size = 0 1123 self._crc = 0 1124 1125 @property 1126 def _fileobj(self): 1127 return self._zipfile.fp 1128 1129 def writable(self): 1130 return True 1131 1132 def write(self, data): 1133 if self.closed: 1134 raise ValueError('I/O operation on closed file.') 1135 nbytes = len(data) 1136 self._file_size += nbytes 1137 self._crc = crc32(data, self._crc) 1138 if self._compressor: 1139 data = self._compressor.compress(data) 1140 self._compress_size += len(data) 1141 self._fileobj.write(data) 1142 return nbytes 1143 1144 def close(self): 1145 if self.closed: 1146 return 1147 try: 1148 super().close() 1149 # Flush any data from the compressor, and update header info 1150 if self._compressor: 1151 buf = self._compressor.flush() 1152 self._compress_size += len(buf) 1153 self._fileobj.write(buf) 1154 self._zinfo.compress_size = self._compress_size 1155 else: 1156 self._zinfo.compress_size = self._file_size 1157 self._zinfo.CRC = self._crc 1158 self._zinfo.file_size = self._file_size 1159 1160 # Write updated header info 1161 if self._zinfo.flag_bits & 0x08: 1162 # Write CRC and file sizes after the file data 1163 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1164 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1165 self._zinfo.compress_size, self._zinfo.file_size)) 1166 self._zipfile.start_dir = self._fileobj.tell() 1167 else: 1168 if not self._zip64: 1169 if self._file_size > ZIP64_LIMIT: 1170 raise RuntimeError( 1171 'File size unexpectedly exceeded ZIP64 limit') 1172 if self._compress_size > ZIP64_LIMIT: 1173 raise RuntimeError( 1174 'Compressed size unexpectedly exceeded ZIP64 limit') 1175 # Seek backwards and write file header (which will now include 1176 # correct CRC and file sizes) 1177 1178 # Preserve current position in file 1179 self._zipfile.start_dir = self._fileobj.tell() 1180 self._fileobj.seek(self._zinfo.header_offset) 1181 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1182 self._fileobj.seek(self._zipfile.start_dir) 1183 1184 # Successfully written: Add file to our caches 1185 self._zipfile.filelist.append(self._zinfo) 1186 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1187 finally: 1188 self._zipfile._writing = False 1189 1190 1191 1192class ZipFile: 1193 """ Class with methods to open, read, write, close, list zip files. 1194 1195 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1196 compresslevel=None) 1197 1198 file: Either the path to the file, or a file-like object. 1199 If it is a path, the file will be opened and closed by ZipFile. 1200 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1201 or append 'a'. 1202 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1203 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1204 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1205 needed, otherwise it will raise an exception when this would 1206 be necessary. 1207 compresslevel: None (default for the given compression type) or an integer 1208 specifying the level to pass to the compressor. 1209 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1210 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1211 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1212 1213 """ 1214 1215 fp = None # Set here since __del__ checks it 1216 _windows_illegal_name_trans_table = None 1217 1218 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1219 compresslevel=None, *, strict_timestamps=True): 1220 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1221 or append 'a'.""" 1222 if mode not in ('r', 'w', 'x', 'a'): 1223 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1224 1225 _check_compression(compression) 1226 1227 self._allowZip64 = allowZip64 1228 self._didModify = False 1229 self.debug = 0 # Level of printing: 0 through 3 1230 self.NameToInfo = {} # Find file info given name 1231 self.filelist = [] # List of ZipInfo instances for archive 1232 self.compression = compression # Method of compression 1233 self.compresslevel = compresslevel 1234 self.mode = mode 1235 self.pwd = None 1236 self._comment = b'' 1237 self._strict_timestamps = strict_timestamps 1238 1239 # Check if we were passed a file-like object 1240 if isinstance(file, os.PathLike): 1241 file = os.fspath(file) 1242 if isinstance(file, str): 1243 # No, it's a filename 1244 self._filePassed = 0 1245 self.filename = file 1246 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1247 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1248 filemode = modeDict[mode] 1249 while True: 1250 try: 1251 self.fp = io.open(file, filemode) 1252 except OSError: 1253 if filemode in modeDict: 1254 filemode = modeDict[filemode] 1255 continue 1256 raise 1257 break 1258 else: 1259 self._filePassed = 1 1260 self.fp = file 1261 self.filename = getattr(file, 'name', None) 1262 self._fileRefCnt = 1 1263 self._lock = threading.RLock() 1264 self._seekable = True 1265 self._writing = False 1266 1267 try: 1268 if mode == 'r': 1269 self._RealGetContents() 1270 elif mode in ('w', 'x'): 1271 # set the modified flag so central directory gets written 1272 # even if no files are added to the archive 1273 self._didModify = True 1274 try: 1275 self.start_dir = self.fp.tell() 1276 except (AttributeError, OSError): 1277 self.fp = _Tellable(self.fp) 1278 self.start_dir = 0 1279 self._seekable = False 1280 else: 1281 # Some file-like objects can provide tell() but not seek() 1282 try: 1283 self.fp.seek(self.start_dir) 1284 except (AttributeError, OSError): 1285 self._seekable = False 1286 elif mode == 'a': 1287 try: 1288 # See if file is a zip file 1289 self._RealGetContents() 1290 # seek to start of directory and overwrite 1291 self.fp.seek(self.start_dir) 1292 except BadZipFile: 1293 # file is not a zip file, just append 1294 self.fp.seek(0, 2) 1295 1296 # set the modified flag so central directory gets written 1297 # even if no files are added to the archive 1298 self._didModify = True 1299 self.start_dir = self.fp.tell() 1300 else: 1301 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1302 except: 1303 fp = self.fp 1304 self.fp = None 1305 self._fpclose(fp) 1306 raise 1307 1308 def __enter__(self): 1309 return self 1310 1311 def __exit__(self, type, value, traceback): 1312 self.close() 1313 1314 def __repr__(self): 1315 result = ['<%s.%s' % (self.__class__.__module__, 1316 self.__class__.__qualname__)] 1317 if self.fp is not None: 1318 if self._filePassed: 1319 result.append(' file=%r' % self.fp) 1320 elif self.filename is not None: 1321 result.append(' filename=%r' % self.filename) 1322 result.append(' mode=%r' % self.mode) 1323 else: 1324 result.append(' [closed]') 1325 result.append('>') 1326 return ''.join(result) 1327 1328 def _RealGetContents(self): 1329 """Read in the table of contents for the ZIP file.""" 1330 fp = self.fp 1331 try: 1332 endrec = _EndRecData(fp) 1333 except OSError: 1334 raise BadZipFile("File is not a zip file") 1335 if not endrec: 1336 raise BadZipFile("File is not a zip file") 1337 if self.debug > 1: 1338 print(endrec) 1339 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1340 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1341 self._comment = endrec[_ECD_COMMENT] # archive comment 1342 1343 # "concat" is zero, unless zip was concatenated to another file 1344 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1345 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1346 # If Zip64 extension structures are present, account for them 1347 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1348 1349 if self.debug > 2: 1350 inferred = concat + offset_cd 1351 print("given, inferred, offset", offset_cd, inferred, concat) 1352 # self.start_dir: Position of start of central directory 1353 self.start_dir = offset_cd + concat 1354 fp.seek(self.start_dir, 0) 1355 data = fp.read(size_cd) 1356 fp = io.BytesIO(data) 1357 total = 0 1358 while total < size_cd: 1359 centdir = fp.read(sizeCentralDir) 1360 if len(centdir) != sizeCentralDir: 1361 raise BadZipFile("Truncated central directory") 1362 centdir = struct.unpack(structCentralDir, centdir) 1363 if centdir[_CD_SIGNATURE] != stringCentralDir: 1364 raise BadZipFile("Bad magic number for central directory") 1365 if self.debug > 2: 1366 print(centdir) 1367 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1368 flags = centdir[5] 1369 if flags & 0x800: 1370 # UTF-8 file names extension 1371 filename = filename.decode('utf-8') 1372 else: 1373 # Historical ZIP filename encoding 1374 filename = filename.decode('cp437') 1375 # Create ZipInfo instance to store file information 1376 x = ZipInfo(filename) 1377 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1378 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1379 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1380 (x.create_version, x.create_system, x.extract_version, x.reserved, 1381 x.flag_bits, x.compress_type, t, d, 1382 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1383 if x.extract_version > MAX_EXTRACT_VERSION: 1384 raise NotImplementedError("zip file version %.1f" % 1385 (x.extract_version / 10)) 1386 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1387 # Convert date/time code to (year, month, day, hour, min, sec) 1388 x._raw_time = t 1389 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1390 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1391 1392 x._decodeExtra() 1393 x.header_offset = x.header_offset + concat 1394 self.filelist.append(x) 1395 self.NameToInfo[x.filename] = x 1396 1397 # update total bytes read from central directory 1398 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1399 + centdir[_CD_EXTRA_FIELD_LENGTH] 1400 + centdir[_CD_COMMENT_LENGTH]) 1401 1402 if self.debug > 2: 1403 print("total", total) 1404 1405 1406 def namelist(self): 1407 """Return a list of file names in the archive.""" 1408 return [data.filename for data in self.filelist] 1409 1410 def infolist(self): 1411 """Return a list of class ZipInfo instances for files in the 1412 archive.""" 1413 return self.filelist 1414 1415 def printdir(self, file=None): 1416 """Print a table of contents for the zip file.""" 1417 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1418 file=file) 1419 for zinfo in self.filelist: 1420 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1421 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1422 file=file) 1423 1424 def testzip(self): 1425 """Read all the files and check the CRC.""" 1426 chunk_size = 2 ** 20 1427 for zinfo in self.filelist: 1428 try: 1429 # Read by chunks, to avoid an OverflowError or a 1430 # MemoryError with very large embedded files. 1431 with self.open(zinfo.filename, "r") as f: 1432 while f.read(chunk_size): # Check CRC-32 1433 pass 1434 except BadZipFile: 1435 return zinfo.filename 1436 1437 def getinfo(self, name): 1438 """Return the instance of ZipInfo given 'name'.""" 1439 info = self.NameToInfo.get(name) 1440 if info is None: 1441 raise KeyError( 1442 'There is no item named %r in the archive' % name) 1443 1444 return info 1445 1446 def setpassword(self, pwd): 1447 """Set default password for encrypted files.""" 1448 if pwd and not isinstance(pwd, bytes): 1449 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1450 if pwd: 1451 self.pwd = pwd 1452 else: 1453 self.pwd = None 1454 1455 @property 1456 def comment(self): 1457 """The comment text associated with the ZIP file.""" 1458 return self._comment 1459 1460 @comment.setter 1461 def comment(self, comment): 1462 if not isinstance(comment, bytes): 1463 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1464 # check for valid comment length 1465 if len(comment) > ZIP_MAX_COMMENT: 1466 import warnings 1467 warnings.warn('Archive comment is too long; truncating to %d bytes' 1468 % ZIP_MAX_COMMENT, stacklevel=2) 1469 comment = comment[:ZIP_MAX_COMMENT] 1470 self._comment = comment 1471 self._didModify = True 1472 1473 def read(self, name, pwd=None): 1474 """Return file bytes for name.""" 1475 with self.open(name, "r", pwd) as fp: 1476 return fp.read() 1477 1478 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1479 """Return file-like object for 'name'. 1480 1481 name is a string for the file name within the ZIP file, or a ZipInfo 1482 object. 1483 1484 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1485 write to a file newly added to the archive. 1486 1487 pwd is the password to decrypt files (only used for reading). 1488 1489 When writing, if the file size is not known in advance but may exceed 1490 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1491 files. If the size is known in advance, it is best to pass a ZipInfo 1492 instance for name, with zinfo.file_size set. 1493 """ 1494 if mode not in {"r", "w"}: 1495 raise ValueError('open() requires mode "r" or "w"') 1496 if pwd and not isinstance(pwd, bytes): 1497 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1498 if pwd and (mode == "w"): 1499 raise ValueError("pwd is only supported for reading files") 1500 if not self.fp: 1501 raise ValueError( 1502 "Attempt to use ZIP archive that was already closed") 1503 1504 # Make sure we have an info object 1505 if isinstance(name, ZipInfo): 1506 # 'name' is already an info object 1507 zinfo = name 1508 elif mode == 'w': 1509 zinfo = ZipInfo(name) 1510 zinfo.compress_type = self.compression 1511 zinfo._compresslevel = self.compresslevel 1512 else: 1513 # Get info object for name 1514 zinfo = self.getinfo(name) 1515 1516 if mode == 'w': 1517 return self._open_to_write(zinfo, force_zip64=force_zip64) 1518 1519 if self._writing: 1520 raise ValueError("Can't read from the ZIP file while there " 1521 "is an open writing handle on it. " 1522 "Close the writing handle before trying to read.") 1523 1524 # Open for reading: 1525 self._fileRefCnt += 1 1526 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1527 self._fpclose, self._lock, lambda: self._writing) 1528 try: 1529 # Skip the file header: 1530 fheader = zef_file.read(sizeFileHeader) 1531 if len(fheader) != sizeFileHeader: 1532 raise BadZipFile("Truncated file header") 1533 fheader = struct.unpack(structFileHeader, fheader) 1534 if fheader[_FH_SIGNATURE] != stringFileHeader: 1535 raise BadZipFile("Bad magic number for file header") 1536 1537 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1538 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1539 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1540 1541 if zinfo.flag_bits & 0x20: 1542 # Zip 2.7: compressed patched data 1543 raise NotImplementedError("compressed patched data (flag bit 5)") 1544 1545 if zinfo.flag_bits & 0x40: 1546 # strong encryption 1547 raise NotImplementedError("strong encryption (flag bit 6)") 1548 1549 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800: 1550 # UTF-8 filename 1551 fname_str = fname.decode("utf-8") 1552 else: 1553 fname_str = fname.decode("cp437") 1554 1555 if fname_str != zinfo.orig_filename: 1556 raise BadZipFile( 1557 'File name in directory %r and header %r differ.' 1558 % (zinfo.orig_filename, fname)) 1559 1560 # check for encrypted flag & handle password 1561 is_encrypted = zinfo.flag_bits & 0x1 1562 if is_encrypted: 1563 if not pwd: 1564 pwd = self.pwd 1565 if not pwd: 1566 raise RuntimeError("File %r is encrypted, password " 1567 "required for extraction" % name) 1568 else: 1569 pwd = None 1570 1571 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1572 except: 1573 zef_file.close() 1574 raise 1575 1576 def _open_to_write(self, zinfo, force_zip64=False): 1577 if force_zip64 and not self._allowZip64: 1578 raise ValueError( 1579 "force_zip64 is True, but allowZip64 was False when opening " 1580 "the ZIP file." 1581 ) 1582 if self._writing: 1583 raise ValueError("Can't write to the ZIP file while there is " 1584 "another write handle open on it. " 1585 "Close the first handle before opening another.") 1586 1587 # Sizes and CRC are overwritten with correct data after processing the file 1588 if not hasattr(zinfo, 'file_size'): 1589 zinfo.file_size = 0 1590 zinfo.compress_size = 0 1591 zinfo.CRC = 0 1592 1593 zinfo.flag_bits = 0x00 1594 if zinfo.compress_type == ZIP_LZMA: 1595 # Compressed data includes an end-of-stream (EOS) marker 1596 zinfo.flag_bits |= 0x02 1597 if not self._seekable: 1598 zinfo.flag_bits |= 0x08 1599 1600 if not zinfo.external_attr: 1601 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1602 1603 # Compressed size can be larger than uncompressed size 1604 zip64 = self._allowZip64 and \ 1605 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1606 1607 if self._seekable: 1608 self.fp.seek(self.start_dir) 1609 zinfo.header_offset = self.fp.tell() 1610 1611 self._writecheck(zinfo) 1612 self._didModify = True 1613 1614 self.fp.write(zinfo.FileHeader(zip64)) 1615 1616 self._writing = True 1617 return _ZipWriteFile(self, zinfo, zip64) 1618 1619 def extract(self, member, path=None, pwd=None): 1620 """Extract a member from the archive to the current working directory, 1621 using its full name. Its file information is extracted as accurately 1622 as possible. `member' may be a filename or a ZipInfo object. You can 1623 specify a different directory using `path'. 1624 """ 1625 if path is None: 1626 path = os.getcwd() 1627 else: 1628 path = os.fspath(path) 1629 1630 return self._extract_member(member, path, pwd) 1631 1632 def extractall(self, path=None, members=None, pwd=None): 1633 """Extract all members from the archive to the current working 1634 directory. `path' specifies a different directory to extract to. 1635 `members' is optional and must be a subset of the list returned 1636 by namelist(). 1637 """ 1638 if members is None: 1639 members = self.namelist() 1640 1641 if path is None: 1642 path = os.getcwd() 1643 else: 1644 path = os.fspath(path) 1645 1646 for zipinfo in members: 1647 self._extract_member(zipinfo, path, pwd) 1648 1649 @classmethod 1650 def _sanitize_windows_name(cls, arcname, pathsep): 1651 """Replace bad characters and remove trailing dots from parts.""" 1652 table = cls._windows_illegal_name_trans_table 1653 if not table: 1654 illegal = ':<>|"?*' 1655 table = str.maketrans(illegal, '_' * len(illegal)) 1656 cls._windows_illegal_name_trans_table = table 1657 arcname = arcname.translate(table) 1658 # remove trailing dots 1659 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1660 # rejoin, removing empty parts. 1661 arcname = pathsep.join(x for x in arcname if x) 1662 return arcname 1663 1664 def _extract_member(self, member, targetpath, pwd): 1665 """Extract the ZipInfo object 'member' to a physical 1666 file on the path targetpath. 1667 """ 1668 if not isinstance(member, ZipInfo): 1669 member = self.getinfo(member) 1670 1671 # build the destination pathname, replacing 1672 # forward slashes to platform specific separators. 1673 arcname = member.filename.replace('/', os.path.sep) 1674 1675 if os.path.altsep: 1676 arcname = arcname.replace(os.path.altsep, os.path.sep) 1677 # interpret absolute pathname as relative, remove drive letter or 1678 # UNC path, redundant separators, "." and ".." components. 1679 arcname = os.path.splitdrive(arcname)[1] 1680 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1681 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1682 if x not in invalid_path_parts) 1683 if os.path.sep == '\\': 1684 # filter illegal characters on Windows 1685 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1686 1687 targetpath = os.path.join(targetpath, arcname) 1688 targetpath = os.path.normpath(targetpath) 1689 1690 # Create all upper directories if necessary. 1691 upperdirs = os.path.dirname(targetpath) 1692 if upperdirs and not os.path.exists(upperdirs): 1693 os.makedirs(upperdirs) 1694 1695 if member.is_dir(): 1696 if not os.path.isdir(targetpath): 1697 os.mkdir(targetpath) 1698 return targetpath 1699 1700 with self.open(member, pwd=pwd) as source, \ 1701 open(targetpath, "wb") as target: 1702 shutil.copyfileobj(source, target) 1703 1704 return targetpath 1705 1706 def _writecheck(self, zinfo): 1707 """Check for errors before writing a file to the archive.""" 1708 if zinfo.filename in self.NameToInfo: 1709 import warnings 1710 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1711 if self.mode not in ('w', 'x', 'a'): 1712 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1713 if not self.fp: 1714 raise ValueError( 1715 "Attempt to write ZIP archive that was already closed") 1716 _check_compression(zinfo.compress_type) 1717 if not self._allowZip64: 1718 requires_zip64 = None 1719 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1720 requires_zip64 = "Files count" 1721 elif zinfo.file_size > ZIP64_LIMIT: 1722 requires_zip64 = "Filesize" 1723 elif zinfo.header_offset > ZIP64_LIMIT: 1724 requires_zip64 = "Zipfile size" 1725 if requires_zip64: 1726 raise LargeZipFile(requires_zip64 + 1727 " would require ZIP64 extensions") 1728 1729 def write(self, filename, arcname=None, 1730 compress_type=None, compresslevel=None): 1731 """Put the bytes from filename into the archive under the name 1732 arcname.""" 1733 if not self.fp: 1734 raise ValueError( 1735 "Attempt to write to ZIP archive that was already closed") 1736 if self._writing: 1737 raise ValueError( 1738 "Can't write to ZIP archive while an open writing handle exists" 1739 ) 1740 1741 zinfo = ZipInfo.from_file(filename, arcname, 1742 strict_timestamps=self._strict_timestamps) 1743 1744 if zinfo.is_dir(): 1745 zinfo.compress_size = 0 1746 zinfo.CRC = 0 1747 else: 1748 if compress_type is not None: 1749 zinfo.compress_type = compress_type 1750 else: 1751 zinfo.compress_type = self.compression 1752 1753 if compresslevel is not None: 1754 zinfo._compresslevel = compresslevel 1755 else: 1756 zinfo._compresslevel = self.compresslevel 1757 1758 if zinfo.is_dir(): 1759 with self._lock: 1760 if self._seekable: 1761 self.fp.seek(self.start_dir) 1762 zinfo.header_offset = self.fp.tell() # Start of header bytes 1763 if zinfo.compress_type == ZIP_LZMA: 1764 # Compressed data includes an end-of-stream (EOS) marker 1765 zinfo.flag_bits |= 0x02 1766 1767 self._writecheck(zinfo) 1768 self._didModify = True 1769 1770 self.filelist.append(zinfo) 1771 self.NameToInfo[zinfo.filename] = zinfo 1772 self.fp.write(zinfo.FileHeader(False)) 1773 self.start_dir = self.fp.tell() 1774 else: 1775 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1776 shutil.copyfileobj(src, dest, 1024*8) 1777 1778 def writestr(self, zinfo_or_arcname, data, 1779 compress_type=None, compresslevel=None): 1780 """Write a file into the archive. The contents is 'data', which 1781 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1782 it is encoded as UTF-8 first. 1783 'zinfo_or_arcname' is either a ZipInfo instance or 1784 the name of the file in the archive.""" 1785 if isinstance(data, str): 1786 data = data.encode("utf-8") 1787 if not isinstance(zinfo_or_arcname, ZipInfo): 1788 zinfo = ZipInfo(filename=zinfo_or_arcname, 1789 date_time=time.localtime(time.time())[:6]) 1790 zinfo.compress_type = self.compression 1791 zinfo._compresslevel = self.compresslevel 1792 if zinfo.filename[-1] == '/': 1793 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1794 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1795 else: 1796 zinfo.external_attr = 0o600 << 16 # ?rw------- 1797 else: 1798 zinfo = zinfo_or_arcname 1799 1800 if not self.fp: 1801 raise ValueError( 1802 "Attempt to write to ZIP archive that was already closed") 1803 if self._writing: 1804 raise ValueError( 1805 "Can't write to ZIP archive while an open writing handle exists." 1806 ) 1807 1808 if compress_type is not None: 1809 zinfo.compress_type = compress_type 1810 1811 if compresslevel is not None: 1812 zinfo._compresslevel = compresslevel 1813 1814 zinfo.file_size = len(data) # Uncompressed size 1815 with self._lock: 1816 with self.open(zinfo, mode='w') as dest: 1817 dest.write(data) 1818 1819 def __del__(self): 1820 """Call the "close()" method in case the user forgot.""" 1821 self.close() 1822 1823 def close(self): 1824 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1825 records.""" 1826 if self.fp is None: 1827 return 1828 1829 if self._writing: 1830 raise ValueError("Can't close the ZIP file while there is " 1831 "an open writing handle on it. " 1832 "Close the writing handle before closing the zip.") 1833 1834 try: 1835 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1836 with self._lock: 1837 if self._seekable: 1838 self.fp.seek(self.start_dir) 1839 self._write_end_record() 1840 finally: 1841 fp = self.fp 1842 self.fp = None 1843 self._fpclose(fp) 1844 1845 def _write_end_record(self): 1846 for zinfo in self.filelist: # write central directory 1847 dt = zinfo.date_time 1848 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1849 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1850 extra = [] 1851 if zinfo.file_size > ZIP64_LIMIT \ 1852 or zinfo.compress_size > ZIP64_LIMIT: 1853 extra.append(zinfo.file_size) 1854 extra.append(zinfo.compress_size) 1855 file_size = 0xffffffff 1856 compress_size = 0xffffffff 1857 else: 1858 file_size = zinfo.file_size 1859 compress_size = zinfo.compress_size 1860 1861 if zinfo.header_offset > ZIP64_LIMIT: 1862 extra.append(zinfo.header_offset) 1863 header_offset = 0xffffffff 1864 else: 1865 header_offset = zinfo.header_offset 1866 1867 extra_data = zinfo.extra 1868 min_version = 0 1869 if extra: 1870 # Append a ZIP64 field to the extra's 1871 extra_data = _strip_extra(extra_data, (1,)) 1872 extra_data = struct.pack( 1873 '<HH' + 'Q'*len(extra), 1874 1, 8*len(extra), *extra) + extra_data 1875 1876 min_version = ZIP64_VERSION 1877 1878 if zinfo.compress_type == ZIP_BZIP2: 1879 min_version = max(BZIP2_VERSION, min_version) 1880 elif zinfo.compress_type == ZIP_LZMA: 1881 min_version = max(LZMA_VERSION, min_version) 1882 1883 extract_version = max(min_version, zinfo.extract_version) 1884 create_version = max(min_version, zinfo.create_version) 1885 try: 1886 filename, flag_bits = zinfo._encodeFilenameFlags() 1887 centdir = struct.pack(structCentralDir, 1888 stringCentralDir, create_version, 1889 zinfo.create_system, extract_version, zinfo.reserved, 1890 flag_bits, zinfo.compress_type, dostime, dosdate, 1891 zinfo.CRC, compress_size, file_size, 1892 len(filename), len(extra_data), len(zinfo.comment), 1893 0, zinfo.internal_attr, zinfo.external_attr, 1894 header_offset) 1895 except DeprecationWarning: 1896 print((structCentralDir, stringCentralDir, create_version, 1897 zinfo.create_system, extract_version, zinfo.reserved, 1898 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1899 zinfo.CRC, compress_size, file_size, 1900 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1901 0, zinfo.internal_attr, zinfo.external_attr, 1902 header_offset), file=sys.stderr) 1903 raise 1904 self.fp.write(centdir) 1905 self.fp.write(filename) 1906 self.fp.write(extra_data) 1907 self.fp.write(zinfo.comment) 1908 1909 pos2 = self.fp.tell() 1910 # Write end-of-zip-archive record 1911 centDirCount = len(self.filelist) 1912 centDirSize = pos2 - self.start_dir 1913 centDirOffset = self.start_dir 1914 requires_zip64 = None 1915 if centDirCount > ZIP_FILECOUNT_LIMIT: 1916 requires_zip64 = "Files count" 1917 elif centDirOffset > ZIP64_LIMIT: 1918 requires_zip64 = "Central directory offset" 1919 elif centDirSize > ZIP64_LIMIT: 1920 requires_zip64 = "Central directory size" 1921 if requires_zip64: 1922 # Need to write the ZIP64 end-of-archive records 1923 if not self._allowZip64: 1924 raise LargeZipFile(requires_zip64 + 1925 " would require ZIP64 extensions") 1926 zip64endrec = struct.pack( 1927 structEndArchive64, stringEndArchive64, 1928 44, 45, 45, 0, 0, centDirCount, centDirCount, 1929 centDirSize, centDirOffset) 1930 self.fp.write(zip64endrec) 1931 1932 zip64locrec = struct.pack( 1933 structEndArchive64Locator, 1934 stringEndArchive64Locator, 0, pos2, 1) 1935 self.fp.write(zip64locrec) 1936 centDirCount = min(centDirCount, 0xFFFF) 1937 centDirSize = min(centDirSize, 0xFFFFFFFF) 1938 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1939 1940 endrec = struct.pack(structEndArchive, stringEndArchive, 1941 0, 0, centDirCount, centDirCount, 1942 centDirSize, centDirOffset, len(self._comment)) 1943 self.fp.write(endrec) 1944 self.fp.write(self._comment) 1945 if self.mode == "a": 1946 self.fp.truncate() 1947 self.fp.flush() 1948 1949 def _fpclose(self, fp): 1950 assert self._fileRefCnt > 0 1951 self._fileRefCnt -= 1 1952 if not self._fileRefCnt and not self._filePassed: 1953 fp.close() 1954 1955 1956class PyZipFile(ZipFile): 1957 """Class to create ZIP archives with Python library files and packages.""" 1958 1959 def __init__(self, file, mode="r", compression=ZIP_STORED, 1960 allowZip64=True, optimize=-1): 1961 ZipFile.__init__(self, file, mode=mode, compression=compression, 1962 allowZip64=allowZip64) 1963 self._optimize = optimize 1964 1965 def writepy(self, pathname, basename="", filterfunc=None): 1966 """Add all files from "pathname" to the ZIP archive. 1967 1968 If pathname is a package directory, search the directory and 1969 all package subdirectories recursively for all *.py and enter 1970 the modules into the archive. If pathname is a plain 1971 directory, listdir *.py and enter all modules. Else, pathname 1972 must be a Python *.py file and the module will be put into the 1973 archive. Added modules are always module.pyc. 1974 This method will compile the module.py into module.pyc if 1975 necessary. 1976 If filterfunc(pathname) is given, it is called with every argument. 1977 When it is False, the file or directory is skipped. 1978 """ 1979 pathname = os.fspath(pathname) 1980 if filterfunc and not filterfunc(pathname): 1981 if self.debug: 1982 label = 'path' if os.path.isdir(pathname) else 'file' 1983 print('%s %r skipped by filterfunc' % (label, pathname)) 1984 return 1985 dir, name = os.path.split(pathname) 1986 if os.path.isdir(pathname): 1987 initname = os.path.join(pathname, "__init__.py") 1988 if os.path.isfile(initname): 1989 # This is a package directory, add it 1990 if basename: 1991 basename = "%s/%s" % (basename, name) 1992 else: 1993 basename = name 1994 if self.debug: 1995 print("Adding package in", pathname, "as", basename) 1996 fname, arcname = self._get_codename(initname[0:-3], basename) 1997 if self.debug: 1998 print("Adding", arcname) 1999 self.write(fname, arcname) 2000 dirlist = sorted(os.listdir(pathname)) 2001 dirlist.remove("__init__.py") 2002 # Add all *.py files and package subdirectories 2003 for filename in dirlist: 2004 path = os.path.join(pathname, filename) 2005 root, ext = os.path.splitext(filename) 2006 if os.path.isdir(path): 2007 if os.path.isfile(os.path.join(path, "__init__.py")): 2008 # This is a package directory, add it 2009 self.writepy(path, basename, 2010 filterfunc=filterfunc) # Recursive call 2011 elif ext == ".py": 2012 if filterfunc and not filterfunc(path): 2013 if self.debug: 2014 print('file %r skipped by filterfunc' % path) 2015 continue 2016 fname, arcname = self._get_codename(path[0:-3], 2017 basename) 2018 if self.debug: 2019 print("Adding", arcname) 2020 self.write(fname, arcname) 2021 else: 2022 # This is NOT a package directory, add its files at top level 2023 if self.debug: 2024 print("Adding files from directory", pathname) 2025 for filename in sorted(os.listdir(pathname)): 2026 path = os.path.join(pathname, filename) 2027 root, ext = os.path.splitext(filename) 2028 if ext == ".py": 2029 if filterfunc and not filterfunc(path): 2030 if self.debug: 2031 print('file %r skipped by filterfunc' % path) 2032 continue 2033 fname, arcname = self._get_codename(path[0:-3], 2034 basename) 2035 if self.debug: 2036 print("Adding", arcname) 2037 self.write(fname, arcname) 2038 else: 2039 if pathname[-3:] != ".py": 2040 raise RuntimeError( 2041 'Files added with writepy() must end with ".py"') 2042 fname, arcname = self._get_codename(pathname[0:-3], basename) 2043 if self.debug: 2044 print("Adding file", arcname) 2045 self.write(fname, arcname) 2046 2047 def _get_codename(self, pathname, basename): 2048 """Return (filename, archivename) for the path. 2049 2050 Given a module name path, return the correct file path and 2051 archive name, compiling if necessary. For example, given 2052 /python/lib/string, return (/python/lib/string.pyc, string). 2053 """ 2054 def _compile(file, optimize=-1): 2055 import py_compile 2056 if self.debug: 2057 print("Compiling", file) 2058 try: 2059 py_compile.compile(file, doraise=True, optimize=optimize) 2060 except py_compile.PyCompileError as err: 2061 print(err.msg) 2062 return False 2063 return True 2064 2065 file_py = pathname + ".py" 2066 file_pyc = pathname + ".pyc" 2067 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2068 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2069 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2070 if self._optimize == -1: 2071 # legacy mode: use whatever file is present 2072 if (os.path.isfile(file_pyc) and 2073 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2074 # Use .pyc file. 2075 arcname = fname = file_pyc 2076 elif (os.path.isfile(pycache_opt0) and 2077 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2078 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2079 # file name in the archive. 2080 fname = pycache_opt0 2081 arcname = file_pyc 2082 elif (os.path.isfile(pycache_opt1) and 2083 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2084 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2085 # file name in the archive. 2086 fname = pycache_opt1 2087 arcname = file_pyc 2088 elif (os.path.isfile(pycache_opt2) and 2089 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2090 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2091 # file name in the archive. 2092 fname = pycache_opt2 2093 arcname = file_pyc 2094 else: 2095 # Compile py into PEP 3147 pyc file. 2096 if _compile(file_py): 2097 if sys.flags.optimize == 0: 2098 fname = pycache_opt0 2099 elif sys.flags.optimize == 1: 2100 fname = pycache_opt1 2101 else: 2102 fname = pycache_opt2 2103 arcname = file_pyc 2104 else: 2105 fname = arcname = file_py 2106 else: 2107 # new mode: use given optimization level 2108 if self._optimize == 0: 2109 fname = pycache_opt0 2110 arcname = file_pyc 2111 else: 2112 arcname = file_pyc 2113 if self._optimize == 1: 2114 fname = pycache_opt1 2115 elif self._optimize == 2: 2116 fname = pycache_opt2 2117 else: 2118 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2119 raise ValueError(msg) 2120 if not (os.path.isfile(fname) and 2121 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2122 if not _compile(file_py, optimize=self._optimize): 2123 fname = arcname = file_py 2124 archivename = os.path.split(arcname)[1] 2125 if basename: 2126 archivename = "%s/%s" % (basename, archivename) 2127 return (fname, archivename) 2128 2129 2130def _parents(path): 2131 """ 2132 Given a path with elements separated by 2133 posixpath.sep, generate all parents of that path. 2134 2135 >>> list(_parents('b/d')) 2136 ['b'] 2137 >>> list(_parents('/b/d/')) 2138 ['/b'] 2139 >>> list(_parents('b/d/f/')) 2140 ['b/d', 'b'] 2141 >>> list(_parents('b')) 2142 [] 2143 >>> list(_parents('')) 2144 [] 2145 """ 2146 return itertools.islice(_ancestry(path), 1, None) 2147 2148 2149def _ancestry(path): 2150 """ 2151 Given a path with elements separated by 2152 posixpath.sep, generate all elements of that path 2153 2154 >>> list(_ancestry('b/d')) 2155 ['b/d', 'b'] 2156 >>> list(_ancestry('/b/d/')) 2157 ['/b/d', '/b'] 2158 >>> list(_ancestry('b/d/f/')) 2159 ['b/d/f', 'b/d', 'b'] 2160 >>> list(_ancestry('b')) 2161 ['b'] 2162 >>> list(_ancestry('')) 2163 [] 2164 """ 2165 path = path.rstrip(posixpath.sep) 2166 while path and path != posixpath.sep: 2167 yield path 2168 path, tail = posixpath.split(path) 2169 2170 2171_dedupe = dict.fromkeys 2172"""Deduplicate an iterable in original order""" 2173 2174 2175def _difference(minuend, subtrahend): 2176 """ 2177 Return items in minuend not in subtrahend, retaining order 2178 with O(1) lookup. 2179 """ 2180 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2181 2182 2183class CompleteDirs(ZipFile): 2184 """ 2185 A ZipFile subclass that ensures that implied directories 2186 are always included in the namelist. 2187 """ 2188 2189 @staticmethod 2190 def _implied_dirs(names): 2191 parents = itertools.chain.from_iterable(map(_parents, names)) 2192 as_dirs = (p + posixpath.sep for p in parents) 2193 return _dedupe(_difference(as_dirs, names)) 2194 2195 def namelist(self): 2196 names = super(CompleteDirs, self).namelist() 2197 return names + list(self._implied_dirs(names)) 2198 2199 def _name_set(self): 2200 return set(self.namelist()) 2201 2202 def resolve_dir(self, name): 2203 """ 2204 If the name represents a directory, return that name 2205 as a directory (with the trailing slash). 2206 """ 2207 names = self._name_set() 2208 dirname = name + '/' 2209 dir_match = name not in names and dirname in names 2210 return dirname if dir_match else name 2211 2212 @classmethod 2213 def make(cls, source): 2214 """ 2215 Given a source (filename or zipfile), return an 2216 appropriate CompleteDirs subclass. 2217 """ 2218 if isinstance(source, CompleteDirs): 2219 return source 2220 2221 if not isinstance(source, ZipFile): 2222 return cls(source) 2223 2224 # Only allow for FastPath when supplied zipfile is read-only 2225 if 'r' not in source.mode: 2226 cls = CompleteDirs 2227 2228 res = cls.__new__(cls) 2229 vars(res).update(vars(source)) 2230 return res 2231 2232 2233class FastLookup(CompleteDirs): 2234 """ 2235 ZipFile subclass to ensure implicit 2236 dirs exist and are resolved rapidly. 2237 """ 2238 def namelist(self): 2239 with contextlib.suppress(AttributeError): 2240 return self.__names 2241 self.__names = super(FastLookup, self).namelist() 2242 return self.__names 2243 2244 def _name_set(self): 2245 with contextlib.suppress(AttributeError): 2246 return self.__lookup 2247 self.__lookup = super(FastLookup, self)._name_set() 2248 return self.__lookup 2249 2250 2251class Path: 2252 """ 2253 A pathlib-compatible interface for zip files. 2254 2255 Consider a zip file with this structure:: 2256 2257 . 2258 ├── a.txt 2259 └── b 2260 ├── c.txt 2261 └── d 2262 └── e.txt 2263 2264 >>> data = io.BytesIO() 2265 >>> zf = ZipFile(data, 'w') 2266 >>> zf.writestr('a.txt', 'content of a') 2267 >>> zf.writestr('b/c.txt', 'content of c') 2268 >>> zf.writestr('b/d/e.txt', 'content of e') 2269 >>> zf.filename = 'abcde.zip' 2270 2271 Path accepts the zipfile object itself or a filename 2272 2273 >>> root = Path(zf) 2274 2275 From there, several path operations are available. 2276 2277 Directory iteration (including the zip file itself): 2278 2279 >>> a, b = root.iterdir() 2280 >>> a 2281 Path('abcde.zip', 'a.txt') 2282 >>> b 2283 Path('abcde.zip', 'b/') 2284 2285 name property: 2286 2287 >>> b.name 2288 'b' 2289 2290 join with divide operator: 2291 2292 >>> c = b / 'c.txt' 2293 >>> c 2294 Path('abcde.zip', 'b/c.txt') 2295 >>> c.name 2296 'c.txt' 2297 2298 Read text: 2299 2300 >>> c.read_text() 2301 'content of c' 2302 2303 existence: 2304 2305 >>> c.exists() 2306 True 2307 >>> (b / 'missing.txt').exists() 2308 False 2309 2310 Coercion to string: 2311 2312 >>> str(c) 2313 'abcde.zip/b/c.txt' 2314 """ 2315 2316 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2317 2318 def __init__(self, root, at=""): 2319 self.root = FastLookup.make(root) 2320 self.at = at 2321 2322 @property 2323 def open(self): 2324 return functools.partial(self.root.open, self.at) 2325 2326 @property 2327 def name(self): 2328 return posixpath.basename(self.at.rstrip("/")) 2329 2330 def read_text(self, *args, **kwargs): 2331 with self.open() as strm: 2332 return io.TextIOWrapper(strm, *args, **kwargs).read() 2333 2334 def read_bytes(self): 2335 with self.open() as strm: 2336 return strm.read() 2337 2338 def _is_child(self, path): 2339 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2340 2341 def _next(self, at): 2342 return Path(self.root, at) 2343 2344 def is_dir(self): 2345 return not self.at or self.at.endswith("/") 2346 2347 def is_file(self): 2348 return not self.is_dir() 2349 2350 def exists(self): 2351 return self.at in self.root._name_set() 2352 2353 def iterdir(self): 2354 if not self.is_dir(): 2355 raise ValueError("Can't listdir a file") 2356 subs = map(self._next, self.root.namelist()) 2357 return filter(self._is_child, subs) 2358 2359 def __str__(self): 2360 return posixpath.join(self.root.filename, self.at) 2361 2362 def __repr__(self): 2363 return self.__repr.format(self=self) 2364 2365 def joinpath(self, add): 2366 next = posixpath.join(self.at, add) 2367 return self._next(self.root.resolve_dir(next)) 2368 2369 __truediv__ = joinpath 2370 2371 @property 2372 def parent(self): 2373 parent_at = posixpath.dirname(self.at.rstrip('/')) 2374 if parent_at: 2375 parent_at += '/' 2376 return self._next(parent_at) 2377 2378 2379def main(args=None): 2380 import argparse 2381 2382 description = 'A simple command-line interface for zipfile module.' 2383 parser = argparse.ArgumentParser(description=description) 2384 group = parser.add_mutually_exclusive_group(required=True) 2385 group.add_argument('-l', '--list', metavar='<zipfile>', 2386 help='Show listing of a zipfile') 2387 group.add_argument('-e', '--extract', nargs=2, 2388 metavar=('<zipfile>', '<output_dir>'), 2389 help='Extract zipfile into target dir') 2390 group.add_argument('-c', '--create', nargs='+', 2391 metavar=('<name>', '<file>'), 2392 help='Create zipfile from sources') 2393 group.add_argument('-t', '--test', metavar='<zipfile>', 2394 help='Test if a zipfile is valid') 2395 args = parser.parse_args(args) 2396 2397 if args.test is not None: 2398 src = args.test 2399 with ZipFile(src, 'r') as zf: 2400 badfile = zf.testzip() 2401 if badfile: 2402 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2403 print("Done testing") 2404 2405 elif args.list is not None: 2406 src = args.list 2407 with ZipFile(src, 'r') as zf: 2408 zf.printdir() 2409 2410 elif args.extract is not None: 2411 src, curdir = args.extract 2412 with ZipFile(src, 'r') as zf: 2413 zf.extractall(curdir) 2414 2415 elif args.create is not None: 2416 zip_name = args.create.pop(0) 2417 files = args.create 2418 2419 def addToZip(zf, path, zippath): 2420 if os.path.isfile(path): 2421 zf.write(path, zippath, ZIP_DEFLATED) 2422 elif os.path.isdir(path): 2423 if zippath: 2424 zf.write(path, zippath) 2425 for nm in sorted(os.listdir(path)): 2426 addToZip(zf, 2427 os.path.join(path, nm), os.path.join(zippath, nm)) 2428 # else: ignore 2429 2430 with ZipFile(zip_name, 'w') as zf: 2431 for path in files: 2432 zippath = os.path.basename(path) 2433 if not zippath: 2434 zippath = os.path.basename(os.path.dirname(path)) 2435 if zippath in ('', os.curdir, os.pardir): 2436 zippath = '' 2437 addToZip(zf, path, zippath) 2438 2439 2440if __name__ == "__main__": 2441 main() 2442