1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import shutil
13import stat
14import struct
15import sys
16import threading
17import time
18import contextlib
19import pathlib
20
21try:
22    import zlib # We may need its compression method
23    crc32 = zlib.crc32
24except ImportError:
25    zlib = None
26    crc32 = binascii.crc32
27
28try:
29    import bz2 # We may need its compression method
30except ImportError:
31    bz2 = None
32
33try:
34    import lzma # We may need its compression method
35except ImportError:
36    lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41           "Path"]
42
43class BadZipFile(Exception):
44    pass
45
46
47class LargeZipFile(Exception):
48    """
49    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50    and those extensions are disabled.
51    """
52
53error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module.  The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# General purpose bit flags
127# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
128_MASK_ENCRYPTED = 1 << 0
129# Bits 1 and 2 have different meanings depending on the compression used.
130_MASK_COMPRESS_OPTION_1 = 1 << 1
131# _MASK_COMPRESS_OPTION_2 = 1 << 2
132# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed
133# size are zero in the local header and the real values are written in the data
134# descriptor immediately following the compressed data.
135_MASK_USE_DATA_DESCRIPTOR = 1 << 3
136# Bit 4: Reserved for use with compression method 8, for enhanced deflating.
137# _MASK_RESERVED_BIT_4 = 1 << 4
138_MASK_COMPRESSED_PATCH = 1 << 5
139_MASK_STRONG_ENCRYPTION = 1 << 6
140# _MASK_UNUSED_BIT_7 = 1 << 7
141# _MASK_UNUSED_BIT_8 = 1 << 8
142# _MASK_UNUSED_BIT_9 = 1 << 9
143# _MASK_UNUSED_BIT_10 = 1 << 10
144_MASK_UTF_FILENAME = 1 << 11
145# Bit 12: Reserved by PKWARE for enhanced compression.
146# _MASK_RESERVED_BIT_12 = 1 << 12
147# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
148# Bit 14, 15: Reserved by PKWARE
149# _MASK_RESERVED_BIT_14 = 1 << 14
150# _MASK_RESERVED_BIT_15 = 1 << 15
151
152# The "local file header" structure, magic number, size, and indices
153# (section V.A in the format document)
154structFileHeader = "<4s2B4HL2L2H"
155stringFileHeader = b"PK\003\004"
156sizeFileHeader = struct.calcsize(structFileHeader)
157
158_FH_SIGNATURE = 0
159_FH_EXTRACT_VERSION = 1
160_FH_EXTRACT_SYSTEM = 2
161_FH_GENERAL_PURPOSE_FLAG_BITS = 3
162_FH_COMPRESSION_METHOD = 4
163_FH_LAST_MOD_TIME = 5
164_FH_LAST_MOD_DATE = 6
165_FH_CRC = 7
166_FH_COMPRESSED_SIZE = 8
167_FH_UNCOMPRESSED_SIZE = 9
168_FH_FILENAME_LENGTH = 10
169_FH_EXTRA_FIELD_LENGTH = 11
170
171# The "Zip64 end of central directory locator" structure, magic number, and size
172structEndArchive64Locator = "<4sLQL"
173stringEndArchive64Locator = b"PK\x06\x07"
174sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
175
176# The "Zip64 end of central directory" record, magic number, size, and indices
177# (section V.G in the format document)
178structEndArchive64 = "<4sQ2H2L4Q"
179stringEndArchive64 = b"PK\x06\x06"
180sizeEndCentDir64 = struct.calcsize(structEndArchive64)
181
182_CD64_SIGNATURE = 0
183_CD64_DIRECTORY_RECSIZE = 1
184_CD64_CREATE_VERSION = 2
185_CD64_EXTRACT_VERSION = 3
186_CD64_DISK_NUMBER = 4
187_CD64_DISK_NUMBER_START = 5
188_CD64_NUMBER_ENTRIES_THIS_DISK = 6
189_CD64_NUMBER_ENTRIES_TOTAL = 7
190_CD64_DIRECTORY_SIZE = 8
191_CD64_OFFSET_START_CENTDIR = 9
192
193_DD_SIGNATURE = 0x08074b50
194
195_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
196
197def _strip_extra(extra, xids):
198    # Remove Extra Fields with specified IDs.
199    unpack = _EXTRA_FIELD_STRUCT.unpack
200    modified = False
201    buffer = []
202    start = i = 0
203    while i + 4 <= len(extra):
204        xid, xlen = unpack(extra[i : i + 4])
205        j = i + 4 + xlen
206        if xid in xids:
207            if i != start:
208                buffer.append(extra[start : i])
209            start = j
210            modified = True
211        i = j
212    if not modified:
213        return extra
214    return b''.join(buffer)
215
216def _check_zipfile(fp):
217    try:
218        if _EndRecData(fp):
219            return True         # file has correct magic number
220    except OSError:
221        pass
222    return False
223
224def is_zipfile(filename):
225    """Quickly see if a file is a ZIP file by checking the magic number.
226
227    The filename argument may be a file or file-like object too.
228    """
229    result = False
230    try:
231        if hasattr(filename, "read"):
232            result = _check_zipfile(fp=filename)
233        else:
234            with open(filename, "rb") as fp:
235                result = _check_zipfile(fp)
236    except OSError:
237        pass
238    return result
239
240def _EndRecData64(fpin, offset, endrec):
241    """
242    Read the ZIP64 end-of-archive records and use that to update endrec
243    """
244    try:
245        fpin.seek(offset - sizeEndCentDir64Locator, 2)
246    except OSError:
247        # If the seek fails, the file is not large enough to contain a ZIP64
248        # end-of-archive record, so just return the end record we were given.
249        return endrec
250
251    data = fpin.read(sizeEndCentDir64Locator)
252    if len(data) != sizeEndCentDir64Locator:
253        return endrec
254    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
255    if sig != stringEndArchive64Locator:
256        return endrec
257
258    if diskno != 0 or disks > 1:
259        raise BadZipFile("zipfiles that span multiple disks are not supported")
260
261    # Assume no 'zip64 extensible data'
262    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
263    data = fpin.read(sizeEndCentDir64)
264    if len(data) != sizeEndCentDir64:
265        return endrec
266    sig, sz, create_version, read_version, disk_num, disk_dir, \
267        dircount, dircount2, dirsize, diroffset = \
268        struct.unpack(structEndArchive64, data)
269    if sig != stringEndArchive64:
270        return endrec
271
272    # Update the original endrec using data from the ZIP64 record
273    endrec[_ECD_SIGNATURE] = sig
274    endrec[_ECD_DISK_NUMBER] = disk_num
275    endrec[_ECD_DISK_START] = disk_dir
276    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
277    endrec[_ECD_ENTRIES_TOTAL] = dircount2
278    endrec[_ECD_SIZE] = dirsize
279    endrec[_ECD_OFFSET] = diroffset
280    return endrec
281
282
283def _EndRecData(fpin):
284    """Return data from the "End of Central Directory" record, or None.
285
286    The data is a list of the nine items in the ZIP "End of central dir"
287    record followed by a tenth item, the file seek offset of this record."""
288
289    # Determine file size
290    fpin.seek(0, 2)
291    filesize = fpin.tell()
292
293    # Check to see if this is ZIP file with no archive comment (the
294    # "end of central directory" structure should be the last item in the
295    # file if this is the case).
296    try:
297        fpin.seek(-sizeEndCentDir, 2)
298    except OSError:
299        return None
300    data = fpin.read()
301    if (len(data) == sizeEndCentDir and
302        data[0:4] == stringEndArchive and
303        data[-2:] == b"\000\000"):
304        # the signature is correct and there's no comment, unpack structure
305        endrec = struct.unpack(structEndArchive, data)
306        endrec=list(endrec)
307
308        # Append a blank comment and record start offset
309        endrec.append(b"")
310        endrec.append(filesize - sizeEndCentDir)
311
312        # Try to read the "Zip64 end of central directory" structure
313        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
314
315    # Either this is not a ZIP file, or it is a ZIP file with an archive
316    # comment.  Search the end of the file for the "end of central directory"
317    # record signature. The comment is the last item in the ZIP file and may be
318    # up to 64K long.  It is assumed that the "end of central directory" magic
319    # number does not appear in the comment.
320    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
321    fpin.seek(maxCommentStart, 0)
322    data = fpin.read()
323    start = data.rfind(stringEndArchive)
324    if start >= 0:
325        # found the magic number; attempt to unpack and interpret
326        recData = data[start:start+sizeEndCentDir]
327        if len(recData) != sizeEndCentDir:
328            # Zip file is corrupted.
329            return None
330        endrec = list(struct.unpack(structEndArchive, recData))
331        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
332        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
333        endrec.append(comment)
334        endrec.append(maxCommentStart + start)
335
336        # Try to read the "Zip64 end of central directory" structure
337        return _EndRecData64(fpin, maxCommentStart + start - filesize,
338                             endrec)
339
340    # Unable to find a valid end of central directory structure
341    return None
342
343
344class ZipInfo (object):
345    """Class with attributes describing each file in the ZIP archive."""
346
347    __slots__ = (
348        'orig_filename',
349        'filename',
350        'date_time',
351        'compress_type',
352        '_compresslevel',
353        'comment',
354        'extra',
355        'create_system',
356        'create_version',
357        'extract_version',
358        'reserved',
359        'flag_bits',
360        'volume',
361        'internal_attr',
362        'external_attr',
363        'header_offset',
364        'CRC',
365        'compress_size',
366        'file_size',
367        '_raw_time',
368    )
369
370    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
371        self.orig_filename = filename   # Original file name in archive
372
373        # Terminate the file name at the first null byte.  Null bytes in file
374        # names are used as tricks by viruses in archives.
375        null_byte = filename.find(chr(0))
376        if null_byte >= 0:
377            filename = filename[0:null_byte]
378        # This is used to ensure paths in generated ZIP files always use
379        # forward slashes as the directory separator, as required by the
380        # ZIP format specification.
381        if os.sep != "/" and os.sep in filename:
382            filename = filename.replace(os.sep, "/")
383
384        self.filename = filename        # Normalized file name
385        self.date_time = date_time      # year, month, day, hour, min, sec
386
387        if date_time[0] < 1980:
388            raise ValueError('ZIP does not support timestamps before 1980')
389
390        # Standard values:
391        self.compress_type = ZIP_STORED # Type of compression for the file
392        self._compresslevel = None      # Level for the compressor
393        self.comment = b""              # Comment for each file
394        self.extra = b""                # ZIP extra data
395        if sys.platform == 'win32':
396            self.create_system = 0          # System which created ZIP archive
397        else:
398            # Assume everything else is unix-y
399            self.create_system = 3          # System which created ZIP archive
400        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
401        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
402        self.reserved = 0               # Must be zero
403        self.flag_bits = 0              # ZIP flag bits
404        self.volume = 0                 # Volume number of file header
405        self.internal_attr = 0          # Internal attributes
406        self.external_attr = 0          # External file attributes
407        self.compress_size = 0          # Size of the compressed file
408        self.file_size = 0              # Size of the uncompressed file
409        # Other attributes are set by class ZipFile:
410        # header_offset         Byte offset to the file header
411        # CRC                   CRC-32 of the uncompressed file
412
413    def __repr__(self):
414        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
415        if self.compress_type != ZIP_STORED:
416            result.append(' compress_type=%s' %
417                          compressor_names.get(self.compress_type,
418                                               self.compress_type))
419        hi = self.external_attr >> 16
420        lo = self.external_attr & 0xFFFF
421        if hi:
422            result.append(' filemode=%r' % stat.filemode(hi))
423        if lo:
424            result.append(' external_attr=%#x' % lo)
425        isdir = self.is_dir()
426        if not isdir or self.file_size:
427            result.append(' file_size=%r' % self.file_size)
428        if ((not isdir or self.compress_size) and
429            (self.compress_type != ZIP_STORED or
430             self.file_size != self.compress_size)):
431            result.append(' compress_size=%r' % self.compress_size)
432        result.append('>')
433        return ''.join(result)
434
435    def FileHeader(self, zip64=None):
436        """Return the per-file header as a bytes object."""
437        dt = self.date_time
438        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
439        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
440        if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
441            # Set these to zero because we write them after the file data
442            CRC = compress_size = file_size = 0
443        else:
444            CRC = self.CRC
445            compress_size = self.compress_size
446            file_size = self.file_size
447
448        extra = self.extra
449
450        min_version = 0
451        if zip64 is None:
452            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
453        if zip64:
454            fmt = '<HHQQ'
455            extra = extra + struct.pack(fmt,
456                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
457        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
458            if not zip64:
459                raise LargeZipFile("Filesize would require ZIP64 extensions")
460            # File is larger than what fits into a 4 byte integer,
461            # fall back to the ZIP64 extension
462            file_size = 0xffffffff
463            compress_size = 0xffffffff
464            min_version = ZIP64_VERSION
465
466        if self.compress_type == ZIP_BZIP2:
467            min_version = max(BZIP2_VERSION, min_version)
468        elif self.compress_type == ZIP_LZMA:
469            min_version = max(LZMA_VERSION, min_version)
470
471        self.extract_version = max(min_version, self.extract_version)
472        self.create_version = max(min_version, self.create_version)
473        filename, flag_bits = self._encodeFilenameFlags()
474        header = struct.pack(structFileHeader, stringFileHeader,
475                             self.extract_version, self.reserved, flag_bits,
476                             self.compress_type, dostime, dosdate, CRC,
477                             compress_size, file_size,
478                             len(filename), len(extra))
479        return header + filename + extra
480
481    def _encodeFilenameFlags(self):
482        try:
483            return self.filename.encode('ascii'), self.flag_bits
484        except UnicodeEncodeError:
485            return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
486
487    def _decodeExtra(self):
488        # Try to decode the extra field.
489        extra = self.extra
490        unpack = struct.unpack
491        while len(extra) >= 4:
492            tp, ln = unpack('<HH', extra[:4])
493            if ln+4 > len(extra):
494                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
495            if tp == 0x0001:
496                data = extra[4:ln+4]
497                # ZIP64 extension (large files and/or large archives)
498                try:
499                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
500                        field = "File size"
501                        self.file_size, = unpack('<Q', data[:8])
502                        data = data[8:]
503                    if self.compress_size == 0xFFFF_FFFF:
504                        field = "Compress size"
505                        self.compress_size, = unpack('<Q', data[:8])
506                        data = data[8:]
507                    if self.header_offset == 0xFFFF_FFFF:
508                        field = "Header offset"
509                        self.header_offset, = unpack('<Q', data[:8])
510                except struct.error:
511                    raise BadZipFile(f"Corrupt zip64 extra field. "
512                                     f"{field} not found.") from None
513
514            extra = extra[ln+4:]
515
516    @classmethod
517    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
518        """Construct an appropriate ZipInfo for a file on the filesystem.
519
520        filename should be the path to a file or directory on the filesystem.
521
522        arcname is the name which it will have within the archive (by default,
523        this will be the same as filename, but without a drive letter and with
524        leading path separators removed).
525        """
526        if isinstance(filename, os.PathLike):
527            filename = os.fspath(filename)
528        st = os.stat(filename)
529        isdir = stat.S_ISDIR(st.st_mode)
530        mtime = time.localtime(st.st_mtime)
531        date_time = mtime[0:6]
532        if not strict_timestamps and date_time[0] < 1980:
533            date_time = (1980, 1, 1, 0, 0, 0)
534        elif not strict_timestamps and date_time[0] > 2107:
535            date_time = (2107, 12, 31, 23, 59, 59)
536        # Create ZipInfo instance to store file information
537        if arcname is None:
538            arcname = filename
539        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
540        while arcname[0] in (os.sep, os.altsep):
541            arcname = arcname[1:]
542        if isdir:
543            arcname += '/'
544        zinfo = cls(arcname, date_time)
545        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
546        if isdir:
547            zinfo.file_size = 0
548            zinfo.external_attr |= 0x10  # MS-DOS directory flag
549        else:
550            zinfo.file_size = st.st_size
551
552        return zinfo
553
554    def is_dir(self):
555        """Return True if this archive member is a directory."""
556        return self.filename[-1] == '/'
557
558
559# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
560# internal keys. We noticed that a direct implementation is faster than
561# relying on binascii.crc32().
562
563_crctable = None
564def _gen_crc(crc):
565    for j in range(8):
566        if crc & 1:
567            crc = (crc >> 1) ^ 0xEDB88320
568        else:
569            crc >>= 1
570    return crc
571
572# ZIP supports a password-based form of encryption. Even though known
573# plaintext attacks have been found against it, it is still useful
574# to be able to get data out of such a file.
575#
576# Usage:
577#     zd = _ZipDecrypter(mypwd)
578#     plain_bytes = zd(cypher_bytes)
579
580def _ZipDecrypter(pwd):
581    key0 = 305419896
582    key1 = 591751049
583    key2 = 878082192
584
585    global _crctable
586    if _crctable is None:
587        _crctable = list(map(_gen_crc, range(256)))
588    crctable = _crctable
589
590    def crc32(ch, crc):
591        """Compute the CRC32 primitive on one byte."""
592        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
593
594    def update_keys(c):
595        nonlocal key0, key1, key2
596        key0 = crc32(c, key0)
597        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
598        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
599        key2 = crc32(key1 >> 24, key2)
600
601    for p in pwd:
602        update_keys(p)
603
604    def decrypter(data):
605        """Decrypt a bytes object."""
606        result = bytearray()
607        append = result.append
608        for c in data:
609            k = key2 | 2
610            c ^= ((k * (k^1)) >> 8) & 0xFF
611            update_keys(c)
612            append(c)
613        return bytes(result)
614
615    return decrypter
616
617
618class LZMACompressor:
619
620    def __init__(self):
621        self._comp = None
622
623    def _init(self):
624        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
625        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
626            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
627        ])
628        return struct.pack('<BBH', 9, 4, len(props)) + props
629
630    def compress(self, data):
631        if self._comp is None:
632            return self._init() + self._comp.compress(data)
633        return self._comp.compress(data)
634
635    def flush(self):
636        if self._comp is None:
637            return self._init() + self._comp.flush()
638        return self._comp.flush()
639
640
641class LZMADecompressor:
642
643    def __init__(self):
644        self._decomp = None
645        self._unconsumed = b''
646        self.eof = False
647
648    def decompress(self, data):
649        if self._decomp is None:
650            self._unconsumed += data
651            if len(self._unconsumed) <= 4:
652                return b''
653            psize, = struct.unpack('<H', self._unconsumed[2:4])
654            if len(self._unconsumed) <= 4 + psize:
655                return b''
656
657            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
658                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
659                                               self._unconsumed[4:4 + psize])
660            ])
661            data = self._unconsumed[4 + psize:]
662            del self._unconsumed
663
664        result = self._decomp.decompress(data)
665        self.eof = self._decomp.eof
666        return result
667
668
669compressor_names = {
670    0: 'store',
671    1: 'shrink',
672    2: 'reduce',
673    3: 'reduce',
674    4: 'reduce',
675    5: 'reduce',
676    6: 'implode',
677    7: 'tokenize',
678    8: 'deflate',
679    9: 'deflate64',
680    10: 'implode',
681    12: 'bzip2',
682    14: 'lzma',
683    18: 'terse',
684    19: 'lz77',
685    97: 'wavpack',
686    98: 'ppmd',
687}
688
689def _check_compression(compression):
690    if compression == ZIP_STORED:
691        pass
692    elif compression == ZIP_DEFLATED:
693        if not zlib:
694            raise RuntimeError(
695                "Compression requires the (missing) zlib module")
696    elif compression == ZIP_BZIP2:
697        if not bz2:
698            raise RuntimeError(
699                "Compression requires the (missing) bz2 module")
700    elif compression == ZIP_LZMA:
701        if not lzma:
702            raise RuntimeError(
703                "Compression requires the (missing) lzma module")
704    else:
705        raise NotImplementedError("That compression method is not supported")
706
707
708def _get_compressor(compress_type, compresslevel=None):
709    if compress_type == ZIP_DEFLATED:
710        if compresslevel is not None:
711            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
712        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
713    elif compress_type == ZIP_BZIP2:
714        if compresslevel is not None:
715            return bz2.BZ2Compressor(compresslevel)
716        return bz2.BZ2Compressor()
717    # compresslevel is ignored for ZIP_LZMA
718    elif compress_type == ZIP_LZMA:
719        return LZMACompressor()
720    else:
721        return None
722
723
724def _get_decompressor(compress_type):
725    _check_compression(compress_type)
726    if compress_type == ZIP_STORED:
727        return None
728    elif compress_type == ZIP_DEFLATED:
729        return zlib.decompressobj(-15)
730    elif compress_type == ZIP_BZIP2:
731        return bz2.BZ2Decompressor()
732    elif compress_type == ZIP_LZMA:
733        return LZMADecompressor()
734    else:
735        descr = compressor_names.get(compress_type)
736        if descr:
737            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
738        else:
739            raise NotImplementedError("compression type %d" % (compress_type,))
740
741
742class _SharedFile:
743    def __init__(self, file, pos, close, lock, writing):
744        self._file = file
745        self._pos = pos
746        self._close = close
747        self._lock = lock
748        self._writing = writing
749        self.seekable = file.seekable
750        self.tell = file.tell
751
752    def seek(self, offset, whence=0):
753        with self._lock:
754            if self._writing():
755                raise ValueError("Can't reposition in the ZIP file while "
756                        "there is an open writing handle on it. "
757                        "Close the writing handle before trying to read.")
758            self._file.seek(offset, whence)
759            self._pos = self._file.tell()
760            return self._pos
761
762    def read(self, n=-1):
763        with self._lock:
764            if self._writing():
765                raise ValueError("Can't read from the ZIP file while there "
766                        "is an open writing handle on it. "
767                        "Close the writing handle before trying to read.")
768            self._file.seek(self._pos)
769            data = self._file.read(n)
770            self._pos = self._file.tell()
771            return data
772
773    def close(self):
774        if self._file is not None:
775            fileobj = self._file
776            self._file = None
777            self._close(fileobj)
778
779# Provide the tell method for unseekable stream
780class _Tellable:
781    def __init__(self, fp):
782        self.fp = fp
783        self.offset = 0
784
785    def write(self, data):
786        n = self.fp.write(data)
787        self.offset += n
788        return n
789
790    def tell(self):
791        return self.offset
792
793    def flush(self):
794        self.fp.flush()
795
796    def close(self):
797        self.fp.close()
798
799
800class ZipExtFile(io.BufferedIOBase):
801    """File-like object for reading an archive member.
802       Is returned by ZipFile.open().
803    """
804
805    # Max size supported by decompressor.
806    MAX_N = 1 << 31 - 1
807
808    # Read from compressed files in 4k blocks.
809    MIN_READ_SIZE = 4096
810
811    # Chunk size to read during seek
812    MAX_SEEK_READ = 1 << 24
813
814    def __init__(self, fileobj, mode, zipinfo, pwd=None,
815                 close_fileobj=False):
816        self._fileobj = fileobj
817        self._pwd = pwd
818        self._close_fileobj = close_fileobj
819
820        self._compress_type = zipinfo.compress_type
821        self._compress_left = zipinfo.compress_size
822        self._left = zipinfo.file_size
823
824        self._decompressor = _get_decompressor(self._compress_type)
825
826        self._eof = False
827        self._readbuffer = b''
828        self._offset = 0
829
830        self.newlines = None
831
832        self.mode = mode
833        self.name = zipinfo.filename
834
835        if hasattr(zipinfo, 'CRC'):
836            self._expected_crc = zipinfo.CRC
837            self._running_crc = crc32(b'')
838        else:
839            self._expected_crc = None
840
841        self._seekable = False
842        try:
843            if fileobj.seekable():
844                self._orig_compress_start = fileobj.tell()
845                self._orig_compress_size = zipinfo.compress_size
846                self._orig_file_size = zipinfo.file_size
847                self._orig_start_crc = self._running_crc
848                self._seekable = True
849        except AttributeError:
850            pass
851
852        self._decrypter = None
853        if pwd:
854            if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
855                # compare against the file type from extended local headers
856                check_byte = (zipinfo._raw_time >> 8) & 0xff
857            else:
858                # compare against the CRC otherwise
859                check_byte = (zipinfo.CRC >> 24) & 0xff
860            h = self._init_decrypter()
861            if h != check_byte:
862                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
863
864
865    def _init_decrypter(self):
866        self._decrypter = _ZipDecrypter(self._pwd)
867        # The first 12 bytes in the cypher stream is an encryption header
868        #  used to strengthen the algorithm. The first 11 bytes are
869        #  completely random, while the 12th contains the MSB of the CRC,
870        #  or the MSB of the file time depending on the header type
871        #  and is used to check the correctness of the password.
872        header = self._fileobj.read(12)
873        self._compress_left -= 12
874        return self._decrypter(header)[11]
875
876    def __repr__(self):
877        result = ['<%s.%s' % (self.__class__.__module__,
878                              self.__class__.__qualname__)]
879        if not self.closed:
880            result.append(' name=%r mode=%r' % (self.name, self.mode))
881            if self._compress_type != ZIP_STORED:
882                result.append(' compress_type=%s' %
883                              compressor_names.get(self._compress_type,
884                                                   self._compress_type))
885        else:
886            result.append(' [closed]')
887        result.append('>')
888        return ''.join(result)
889
890    def readline(self, limit=-1):
891        """Read and return a line from the stream.
892
893        If limit is specified, at most limit bytes will be read.
894        """
895
896        if limit < 0:
897            # Shortcut common case - newline found in buffer.
898            i = self._readbuffer.find(b'\n', self._offset) + 1
899            if i > 0:
900                line = self._readbuffer[self._offset: i]
901                self._offset = i
902                return line
903
904        return io.BufferedIOBase.readline(self, limit)
905
906    def peek(self, n=1):
907        """Returns buffered bytes without advancing the position."""
908        if n > len(self._readbuffer) - self._offset:
909            chunk = self.read(n)
910            if len(chunk) > self._offset:
911                self._readbuffer = chunk + self._readbuffer[self._offset:]
912                self._offset = 0
913            else:
914                self._offset -= len(chunk)
915
916        # Return up to 512 bytes to reduce allocation overhead for tight loops.
917        return self._readbuffer[self._offset: self._offset + 512]
918
919    def readable(self):
920        if self.closed:
921            raise ValueError("I/O operation on closed file.")
922        return True
923
924    def read(self, n=-1):
925        """Read and return up to n bytes.
926        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
927        """
928        if self.closed:
929            raise ValueError("read from closed file.")
930        if n is None or n < 0:
931            buf = self._readbuffer[self._offset:]
932            self._readbuffer = b''
933            self._offset = 0
934            while not self._eof:
935                buf += self._read1(self.MAX_N)
936            return buf
937
938        end = n + self._offset
939        if end < len(self._readbuffer):
940            buf = self._readbuffer[self._offset:end]
941            self._offset = end
942            return buf
943
944        n = end - len(self._readbuffer)
945        buf = self._readbuffer[self._offset:]
946        self._readbuffer = b''
947        self._offset = 0
948        while n > 0 and not self._eof:
949            data = self._read1(n)
950            if n < len(data):
951                self._readbuffer = data
952                self._offset = n
953                buf += data[:n]
954                break
955            buf += data
956            n -= len(data)
957        return buf
958
959    def _update_crc(self, newdata):
960        # Update the CRC using the given data.
961        if self._expected_crc is None:
962            # No need to compute the CRC if we don't have a reference value
963            return
964        self._running_crc = crc32(newdata, self._running_crc)
965        # Check the CRC if we're at the end of the file
966        if self._eof and self._running_crc != self._expected_crc:
967            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
968
969    def read1(self, n):
970        """Read up to n bytes with at most one read() system call."""
971
972        if n is None or n < 0:
973            buf = self._readbuffer[self._offset:]
974            self._readbuffer = b''
975            self._offset = 0
976            while not self._eof:
977                data = self._read1(self.MAX_N)
978                if data:
979                    buf += data
980                    break
981            return buf
982
983        end = n + self._offset
984        if end < len(self._readbuffer):
985            buf = self._readbuffer[self._offset:end]
986            self._offset = end
987            return buf
988
989        n = end - len(self._readbuffer)
990        buf = self._readbuffer[self._offset:]
991        self._readbuffer = b''
992        self._offset = 0
993        if n > 0:
994            while not self._eof:
995                data = self._read1(n)
996                if n < len(data):
997                    self._readbuffer = data
998                    self._offset = n
999                    buf += data[:n]
1000                    break
1001                if data:
1002                    buf += data
1003                    break
1004        return buf
1005
1006    def _read1(self, n):
1007        # Read up to n compressed bytes with at most one read() system call,
1008        # decrypt and decompress them.
1009        if self._eof or n <= 0:
1010            return b''
1011
1012        # Read from file.
1013        if self._compress_type == ZIP_DEFLATED:
1014            ## Handle unconsumed data.
1015            data = self._decompressor.unconsumed_tail
1016            if n > len(data):
1017                data += self._read2(n - len(data))
1018        else:
1019            data = self._read2(n)
1020
1021        if self._compress_type == ZIP_STORED:
1022            self._eof = self._compress_left <= 0
1023        elif self._compress_type == ZIP_DEFLATED:
1024            n = max(n, self.MIN_READ_SIZE)
1025            data = self._decompressor.decompress(data, n)
1026            self._eof = (self._decompressor.eof or
1027                         self._compress_left <= 0 and
1028                         not self._decompressor.unconsumed_tail)
1029            if self._eof:
1030                data += self._decompressor.flush()
1031        else:
1032            data = self._decompressor.decompress(data)
1033            self._eof = self._decompressor.eof or self._compress_left <= 0
1034
1035        data = data[:self._left]
1036        self._left -= len(data)
1037        if self._left <= 0:
1038            self._eof = True
1039        self._update_crc(data)
1040        return data
1041
1042    def _read2(self, n):
1043        if self._compress_left <= 0:
1044            return b''
1045
1046        n = max(n, self.MIN_READ_SIZE)
1047        n = min(n, self._compress_left)
1048
1049        data = self._fileobj.read(n)
1050        self._compress_left -= len(data)
1051        if not data:
1052            raise EOFError
1053
1054        if self._decrypter is not None:
1055            data = self._decrypter(data)
1056        return data
1057
1058    def close(self):
1059        try:
1060            if self._close_fileobj:
1061                self._fileobj.close()
1062        finally:
1063            super().close()
1064
1065    def seekable(self):
1066        if self.closed:
1067            raise ValueError("I/O operation on closed file.")
1068        return self._seekable
1069
1070    def seek(self, offset, whence=0):
1071        if self.closed:
1072            raise ValueError("seek on closed file.")
1073        if not self._seekable:
1074            raise io.UnsupportedOperation("underlying stream is not seekable")
1075        curr_pos = self.tell()
1076        if whence == 0: # Seek from start of file
1077            new_pos = offset
1078        elif whence == 1: # Seek from current position
1079            new_pos = curr_pos + offset
1080        elif whence == 2: # Seek from EOF
1081            new_pos = self._orig_file_size + offset
1082        else:
1083            raise ValueError("whence must be os.SEEK_SET (0), "
1084                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1085
1086        if new_pos > self._orig_file_size:
1087            new_pos = self._orig_file_size
1088
1089        if new_pos < 0:
1090            new_pos = 0
1091
1092        read_offset = new_pos - curr_pos
1093        buff_offset = read_offset + self._offset
1094
1095        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1096            # Just move the _offset index if the new position is in the _readbuffer
1097            self._offset = buff_offset
1098            read_offset = 0
1099        elif read_offset < 0:
1100            # Position is before the current position. Reset the ZipExtFile
1101            self._fileobj.seek(self._orig_compress_start)
1102            self._running_crc = self._orig_start_crc
1103            self._compress_left = self._orig_compress_size
1104            self._left = self._orig_file_size
1105            self._readbuffer = b''
1106            self._offset = 0
1107            self._decompressor = _get_decompressor(self._compress_type)
1108            self._eof = False
1109            read_offset = new_pos
1110            if self._decrypter is not None:
1111                self._init_decrypter()
1112
1113        while read_offset > 0:
1114            read_len = min(self.MAX_SEEK_READ, read_offset)
1115            self.read(read_len)
1116            read_offset -= read_len
1117
1118        return self.tell()
1119
1120    def tell(self):
1121        if self.closed:
1122            raise ValueError("tell on closed file.")
1123        if not self._seekable:
1124            raise io.UnsupportedOperation("underlying stream is not seekable")
1125        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1126        return filepos
1127
1128
1129class _ZipWriteFile(io.BufferedIOBase):
1130    def __init__(self, zf, zinfo, zip64):
1131        self._zinfo = zinfo
1132        self._zip64 = zip64
1133        self._zipfile = zf
1134        self._compressor = _get_compressor(zinfo.compress_type,
1135                                           zinfo._compresslevel)
1136        self._file_size = 0
1137        self._compress_size = 0
1138        self._crc = 0
1139
1140    @property
1141    def _fileobj(self):
1142        return self._zipfile.fp
1143
1144    def writable(self):
1145        return True
1146
1147    def write(self, data):
1148        if self.closed:
1149            raise ValueError('I/O operation on closed file.')
1150        nbytes = len(data)
1151        self._file_size += nbytes
1152        self._crc = crc32(data, self._crc)
1153        if self._compressor:
1154            data = self._compressor.compress(data)
1155            self._compress_size += len(data)
1156        self._fileobj.write(data)
1157        return nbytes
1158
1159    def close(self):
1160        if self.closed:
1161            return
1162        try:
1163            super().close()
1164            # Flush any data from the compressor, and update header info
1165            if self._compressor:
1166                buf = self._compressor.flush()
1167                self._compress_size += len(buf)
1168                self._fileobj.write(buf)
1169                self._zinfo.compress_size = self._compress_size
1170            else:
1171                self._zinfo.compress_size = self._file_size
1172            self._zinfo.CRC = self._crc
1173            self._zinfo.file_size = self._file_size
1174
1175            # Write updated header info
1176            if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1177                # Write CRC and file sizes after the file data
1178                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1179                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1180                    self._zinfo.compress_size, self._zinfo.file_size))
1181                self._zipfile.start_dir = self._fileobj.tell()
1182            else:
1183                if not self._zip64:
1184                    if self._file_size > ZIP64_LIMIT:
1185                        raise RuntimeError(
1186                            'File size unexpectedly exceeded ZIP64 limit')
1187                    if self._compress_size > ZIP64_LIMIT:
1188                        raise RuntimeError(
1189                            'Compressed size unexpectedly exceeded ZIP64 limit')
1190                # Seek backwards and write file header (which will now include
1191                # correct CRC and file sizes)
1192
1193                # Preserve current position in file
1194                self._zipfile.start_dir = self._fileobj.tell()
1195                self._fileobj.seek(self._zinfo.header_offset)
1196                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1197                self._fileobj.seek(self._zipfile.start_dir)
1198
1199            # Successfully written: Add file to our caches
1200            self._zipfile.filelist.append(self._zinfo)
1201            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1202        finally:
1203            self._zipfile._writing = False
1204
1205
1206
1207class ZipFile:
1208    """ Class with methods to open, read, write, close, list zip files.
1209
1210    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1211                compresslevel=None)
1212
1213    file: Either the path to the file, or a file-like object.
1214          If it is a path, the file will be opened and closed by ZipFile.
1215    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1216          or append 'a'.
1217    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1218                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1219    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1220                needed, otherwise it will raise an exception when this would
1221                be necessary.
1222    compresslevel: None (default for the given compression type) or an integer
1223                   specifying the level to pass to the compressor.
1224                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1225                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1226                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1227
1228    """
1229
1230    fp = None                   # Set here since __del__ checks it
1231    _windows_illegal_name_trans_table = None
1232
1233    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1234                 compresslevel=None, *, strict_timestamps=True):
1235        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1236        or append 'a'."""
1237        if mode not in ('r', 'w', 'x', 'a'):
1238            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1239
1240        _check_compression(compression)
1241
1242        self._allowZip64 = allowZip64
1243        self._didModify = False
1244        self.debug = 0  # Level of printing: 0 through 3
1245        self.NameToInfo = {}    # Find file info given name
1246        self.filelist = []      # List of ZipInfo instances for archive
1247        self.compression = compression  # Method of compression
1248        self.compresslevel = compresslevel
1249        self.mode = mode
1250        self.pwd = None
1251        self._comment = b''
1252        self._strict_timestamps = strict_timestamps
1253
1254        # Check if we were passed a file-like object
1255        if isinstance(file, os.PathLike):
1256            file = os.fspath(file)
1257        if isinstance(file, str):
1258            # No, it's a filename
1259            self._filePassed = 0
1260            self.filename = file
1261            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1262                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1263            filemode = modeDict[mode]
1264            while True:
1265                try:
1266                    self.fp = io.open(file, filemode)
1267                except OSError:
1268                    if filemode in modeDict:
1269                        filemode = modeDict[filemode]
1270                        continue
1271                    raise
1272                break
1273        else:
1274            self._filePassed = 1
1275            self.fp = file
1276            self.filename = getattr(file, 'name', None)
1277        self._fileRefCnt = 1
1278        self._lock = threading.RLock()
1279        self._seekable = True
1280        self._writing = False
1281
1282        try:
1283            if mode == 'r':
1284                self._RealGetContents()
1285            elif mode in ('w', 'x'):
1286                # set the modified flag so central directory gets written
1287                # even if no files are added to the archive
1288                self._didModify = True
1289                try:
1290                    self.start_dir = self.fp.tell()
1291                except (AttributeError, OSError):
1292                    self.fp = _Tellable(self.fp)
1293                    self.start_dir = 0
1294                    self._seekable = False
1295                else:
1296                    # Some file-like objects can provide tell() but not seek()
1297                    try:
1298                        self.fp.seek(self.start_dir)
1299                    except (AttributeError, OSError):
1300                        self._seekable = False
1301            elif mode == 'a':
1302                try:
1303                    # See if file is a zip file
1304                    self._RealGetContents()
1305                    # seek to start of directory and overwrite
1306                    self.fp.seek(self.start_dir)
1307                except BadZipFile:
1308                    # file is not a zip file, just append
1309                    self.fp.seek(0, 2)
1310
1311                    # set the modified flag so central directory gets written
1312                    # even if no files are added to the archive
1313                    self._didModify = True
1314                    self.start_dir = self.fp.tell()
1315            else:
1316                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1317        except:
1318            fp = self.fp
1319            self.fp = None
1320            self._fpclose(fp)
1321            raise
1322
1323    def __enter__(self):
1324        return self
1325
1326    def __exit__(self, type, value, traceback):
1327        self.close()
1328
1329    def __repr__(self):
1330        result = ['<%s.%s' % (self.__class__.__module__,
1331                              self.__class__.__qualname__)]
1332        if self.fp is not None:
1333            if self._filePassed:
1334                result.append(' file=%r' % self.fp)
1335            elif self.filename is not None:
1336                result.append(' filename=%r' % self.filename)
1337            result.append(' mode=%r' % self.mode)
1338        else:
1339            result.append(' [closed]')
1340        result.append('>')
1341        return ''.join(result)
1342
1343    def _RealGetContents(self):
1344        """Read in the table of contents for the ZIP file."""
1345        fp = self.fp
1346        try:
1347            endrec = _EndRecData(fp)
1348        except OSError:
1349            raise BadZipFile("File is not a zip file")
1350        if not endrec:
1351            raise BadZipFile("File is not a zip file")
1352        if self.debug > 1:
1353            print(endrec)
1354        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1355        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1356        self._comment = endrec[_ECD_COMMENT]    # archive comment
1357
1358        # "concat" is zero, unless zip was concatenated to another file
1359        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1360        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1361            # If Zip64 extension structures are present, account for them
1362            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1363
1364        if self.debug > 2:
1365            inferred = concat + offset_cd
1366            print("given, inferred, offset", offset_cd, inferred, concat)
1367        # self.start_dir:  Position of start of central directory
1368        self.start_dir = offset_cd + concat
1369        fp.seek(self.start_dir, 0)
1370        data = fp.read(size_cd)
1371        fp = io.BytesIO(data)
1372        total = 0
1373        while total < size_cd:
1374            centdir = fp.read(sizeCentralDir)
1375            if len(centdir) != sizeCentralDir:
1376                raise BadZipFile("Truncated central directory")
1377            centdir = struct.unpack(structCentralDir, centdir)
1378            if centdir[_CD_SIGNATURE] != stringCentralDir:
1379                raise BadZipFile("Bad magic number for central directory")
1380            if self.debug > 2:
1381                print(centdir)
1382            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1383            flags = centdir[5]
1384            if flags & _MASK_UTF_FILENAME:
1385                # UTF-8 file names extension
1386                filename = filename.decode('utf-8')
1387            else:
1388                # Historical ZIP filename encoding
1389                filename = filename.decode('cp437')
1390            # Create ZipInfo instance to store file information
1391            x = ZipInfo(filename)
1392            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1393            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1394            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1395            (x.create_version, x.create_system, x.extract_version, x.reserved,
1396             x.flag_bits, x.compress_type, t, d,
1397             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1398            if x.extract_version > MAX_EXTRACT_VERSION:
1399                raise NotImplementedError("zip file version %.1f" %
1400                                          (x.extract_version / 10))
1401            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1402            # Convert date/time code to (year, month, day, hour, min, sec)
1403            x._raw_time = t
1404            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1405                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1406
1407            x._decodeExtra()
1408            x.header_offset = x.header_offset + concat
1409            self.filelist.append(x)
1410            self.NameToInfo[x.filename] = x
1411
1412            # update total bytes read from central directory
1413            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1414                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1415                     + centdir[_CD_COMMENT_LENGTH])
1416
1417            if self.debug > 2:
1418                print("total", total)
1419
1420
1421    def namelist(self):
1422        """Return a list of file names in the archive."""
1423        return [data.filename for data in self.filelist]
1424
1425    def infolist(self):
1426        """Return a list of class ZipInfo instances for files in the
1427        archive."""
1428        return self.filelist
1429
1430    def printdir(self, file=None):
1431        """Print a table of contents for the zip file."""
1432        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1433              file=file)
1434        for zinfo in self.filelist:
1435            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1436            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1437                  file=file)
1438
1439    def testzip(self):
1440        """Read all the files and check the CRC."""
1441        chunk_size = 2 ** 20
1442        for zinfo in self.filelist:
1443            try:
1444                # Read by chunks, to avoid an OverflowError or a
1445                # MemoryError with very large embedded files.
1446                with self.open(zinfo.filename, "r") as f:
1447                    while f.read(chunk_size):     # Check CRC-32
1448                        pass
1449            except BadZipFile:
1450                return zinfo.filename
1451
1452    def getinfo(self, name):
1453        """Return the instance of ZipInfo given 'name'."""
1454        info = self.NameToInfo.get(name)
1455        if info is None:
1456            raise KeyError(
1457                'There is no item named %r in the archive' % name)
1458
1459        return info
1460
1461    def setpassword(self, pwd):
1462        """Set default password for encrypted files."""
1463        if pwd and not isinstance(pwd, bytes):
1464            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1465        if pwd:
1466            self.pwd = pwd
1467        else:
1468            self.pwd = None
1469
1470    @property
1471    def comment(self):
1472        """The comment text associated with the ZIP file."""
1473        return self._comment
1474
1475    @comment.setter
1476    def comment(self, comment):
1477        if not isinstance(comment, bytes):
1478            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1479        # check for valid comment length
1480        if len(comment) > ZIP_MAX_COMMENT:
1481            import warnings
1482            warnings.warn('Archive comment is too long; truncating to %d bytes'
1483                          % ZIP_MAX_COMMENT, stacklevel=2)
1484            comment = comment[:ZIP_MAX_COMMENT]
1485        self._comment = comment
1486        self._didModify = True
1487
1488    def read(self, name, pwd=None):
1489        """Return file bytes for name."""
1490        with self.open(name, "r", pwd) as fp:
1491            return fp.read()
1492
1493    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1494        """Return file-like object for 'name'.
1495
1496        name is a string for the file name within the ZIP file, or a ZipInfo
1497        object.
1498
1499        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1500        write to a file newly added to the archive.
1501
1502        pwd is the password to decrypt files (only used for reading).
1503
1504        When writing, if the file size is not known in advance but may exceed
1505        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1506        files.  If the size is known in advance, it is best to pass a ZipInfo
1507        instance for name, with zinfo.file_size set.
1508        """
1509        if mode not in {"r", "w"}:
1510            raise ValueError('open() requires mode "r" or "w"')
1511        if pwd and (mode == "w"):
1512            raise ValueError("pwd is only supported for reading files")
1513        if not self.fp:
1514            raise ValueError(
1515                "Attempt to use ZIP archive that was already closed")
1516
1517        # Make sure we have an info object
1518        if isinstance(name, ZipInfo):
1519            # 'name' is already an info object
1520            zinfo = name
1521        elif mode == 'w':
1522            zinfo = ZipInfo(name)
1523            zinfo.compress_type = self.compression
1524            zinfo._compresslevel = self.compresslevel
1525        else:
1526            # Get info object for name
1527            zinfo = self.getinfo(name)
1528
1529        if mode == 'w':
1530            return self._open_to_write(zinfo, force_zip64=force_zip64)
1531
1532        if self._writing:
1533            raise ValueError("Can't read from the ZIP file while there "
1534                    "is an open writing handle on it. "
1535                    "Close the writing handle before trying to read.")
1536
1537        # Open for reading:
1538        self._fileRefCnt += 1
1539        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1540                               self._fpclose, self._lock, lambda: self._writing)
1541        try:
1542            # Skip the file header:
1543            fheader = zef_file.read(sizeFileHeader)
1544            if len(fheader) != sizeFileHeader:
1545                raise BadZipFile("Truncated file header")
1546            fheader = struct.unpack(structFileHeader, fheader)
1547            if fheader[_FH_SIGNATURE] != stringFileHeader:
1548                raise BadZipFile("Bad magic number for file header")
1549
1550            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1551            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1552                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1553
1554            if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
1555                # Zip 2.7: compressed patched data
1556                raise NotImplementedError("compressed patched data (flag bit 5)")
1557
1558            if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
1559                # strong encryption
1560                raise NotImplementedError("strong encryption (flag bit 6)")
1561
1562            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
1563                # UTF-8 filename
1564                fname_str = fname.decode("utf-8")
1565            else:
1566                fname_str = fname.decode("cp437")
1567
1568            if fname_str != zinfo.orig_filename:
1569                raise BadZipFile(
1570                    'File name in directory %r and header %r differ.'
1571                    % (zinfo.orig_filename, fname))
1572
1573            # check for encrypted flag & handle password
1574            is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
1575            if is_encrypted:
1576                if not pwd:
1577                    pwd = self.pwd
1578                if pwd and not isinstance(pwd, bytes):
1579                    raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1580                if not pwd:
1581                    raise RuntimeError("File %r is encrypted, password "
1582                                       "required for extraction" % name)
1583            else:
1584                pwd = None
1585
1586            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1587        except:
1588            zef_file.close()
1589            raise
1590
1591    def _open_to_write(self, zinfo, force_zip64=False):
1592        if force_zip64 and not self._allowZip64:
1593            raise ValueError(
1594                "force_zip64 is True, but allowZip64 was False when opening "
1595                "the ZIP file."
1596            )
1597        if self._writing:
1598            raise ValueError("Can't write to the ZIP file while there is "
1599                             "another write handle open on it. "
1600                             "Close the first handle before opening another.")
1601
1602        # Size and CRC are overwritten with correct data after processing the file
1603        zinfo.compress_size = 0
1604        zinfo.CRC = 0
1605
1606        zinfo.flag_bits = 0x00
1607        if zinfo.compress_type == ZIP_LZMA:
1608            # Compressed data includes an end-of-stream (EOS) marker
1609            zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1610        if not self._seekable:
1611            zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
1612
1613        if not zinfo.external_attr:
1614            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1615
1616        # Compressed size can be larger than uncompressed size
1617        zip64 = self._allowZip64 and \
1618                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1619
1620        if self._seekable:
1621            self.fp.seek(self.start_dir)
1622        zinfo.header_offset = self.fp.tell()
1623
1624        self._writecheck(zinfo)
1625        self._didModify = True
1626
1627        self.fp.write(zinfo.FileHeader(zip64))
1628
1629        self._writing = True
1630        return _ZipWriteFile(self, zinfo, zip64)
1631
1632    def extract(self, member, path=None, pwd=None):
1633        """Extract a member from the archive to the current working directory,
1634           using its full name. Its file information is extracted as accurately
1635           as possible. `member' may be a filename or a ZipInfo object. You can
1636           specify a different directory using `path'.
1637        """
1638        if path is None:
1639            path = os.getcwd()
1640        else:
1641            path = os.fspath(path)
1642
1643        return self._extract_member(member, path, pwd)
1644
1645    def extractall(self, path=None, members=None, pwd=None):
1646        """Extract all members from the archive to the current working
1647           directory. `path' specifies a different directory to extract to.
1648           `members' is optional and must be a subset of the list returned
1649           by namelist().
1650        """
1651        if members is None:
1652            members = self.namelist()
1653
1654        if path is None:
1655            path = os.getcwd()
1656        else:
1657            path = os.fspath(path)
1658
1659        for zipinfo in members:
1660            self._extract_member(zipinfo, path, pwd)
1661
1662    @classmethod
1663    def _sanitize_windows_name(cls, arcname, pathsep):
1664        """Replace bad characters and remove trailing dots from parts."""
1665        table = cls._windows_illegal_name_trans_table
1666        if not table:
1667            illegal = ':<>|"?*'
1668            table = str.maketrans(illegal, '_' * len(illegal))
1669            cls._windows_illegal_name_trans_table = table
1670        arcname = arcname.translate(table)
1671        # remove trailing dots
1672        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1673        # rejoin, removing empty parts.
1674        arcname = pathsep.join(x for x in arcname if x)
1675        return arcname
1676
1677    def _extract_member(self, member, targetpath, pwd):
1678        """Extract the ZipInfo object 'member' to a physical
1679           file on the path targetpath.
1680        """
1681        if not isinstance(member, ZipInfo):
1682            member = self.getinfo(member)
1683
1684        # build the destination pathname, replacing
1685        # forward slashes to platform specific separators.
1686        arcname = member.filename.replace('/', os.path.sep)
1687
1688        if os.path.altsep:
1689            arcname = arcname.replace(os.path.altsep, os.path.sep)
1690        # interpret absolute pathname as relative, remove drive letter or
1691        # UNC path, redundant separators, "." and ".." components.
1692        arcname = os.path.splitdrive(arcname)[1]
1693        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1694        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1695                                   if x not in invalid_path_parts)
1696        if os.path.sep == '\\':
1697            # filter illegal characters on Windows
1698            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1699
1700        targetpath = os.path.join(targetpath, arcname)
1701        targetpath = os.path.normpath(targetpath)
1702
1703        # Create all upper directories if necessary.
1704        upperdirs = os.path.dirname(targetpath)
1705        if upperdirs and not os.path.exists(upperdirs):
1706            os.makedirs(upperdirs)
1707
1708        if member.is_dir():
1709            if not os.path.isdir(targetpath):
1710                os.mkdir(targetpath)
1711            return targetpath
1712
1713        with self.open(member, pwd=pwd) as source, \
1714             open(targetpath, "wb") as target:
1715            shutil.copyfileobj(source, target)
1716
1717        return targetpath
1718
1719    def _writecheck(self, zinfo):
1720        """Check for errors before writing a file to the archive."""
1721        if zinfo.filename in self.NameToInfo:
1722            import warnings
1723            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1724        if self.mode not in ('w', 'x', 'a'):
1725            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1726        if not self.fp:
1727            raise ValueError(
1728                "Attempt to write ZIP archive that was already closed")
1729        _check_compression(zinfo.compress_type)
1730        if not self._allowZip64:
1731            requires_zip64 = None
1732            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1733                requires_zip64 = "Files count"
1734            elif zinfo.file_size > ZIP64_LIMIT:
1735                requires_zip64 = "Filesize"
1736            elif zinfo.header_offset > ZIP64_LIMIT:
1737                requires_zip64 = "Zipfile size"
1738            if requires_zip64:
1739                raise LargeZipFile(requires_zip64 +
1740                                   " would require ZIP64 extensions")
1741
1742    def write(self, filename, arcname=None,
1743              compress_type=None, compresslevel=None):
1744        """Put the bytes from filename into the archive under the name
1745        arcname."""
1746        if not self.fp:
1747            raise ValueError(
1748                "Attempt to write to ZIP archive that was already closed")
1749        if self._writing:
1750            raise ValueError(
1751                "Can't write to ZIP archive while an open writing handle exists"
1752            )
1753
1754        zinfo = ZipInfo.from_file(filename, arcname,
1755                                  strict_timestamps=self._strict_timestamps)
1756
1757        if zinfo.is_dir():
1758            zinfo.compress_size = 0
1759            zinfo.CRC = 0
1760        else:
1761            if compress_type is not None:
1762                zinfo.compress_type = compress_type
1763            else:
1764                zinfo.compress_type = self.compression
1765
1766            if compresslevel is not None:
1767                zinfo._compresslevel = compresslevel
1768            else:
1769                zinfo._compresslevel = self.compresslevel
1770
1771        if zinfo.is_dir():
1772            with self._lock:
1773                if self._seekable:
1774                    self.fp.seek(self.start_dir)
1775                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1776                if zinfo.compress_type == ZIP_LZMA:
1777                # Compressed data includes an end-of-stream (EOS) marker
1778                    zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1779
1780                self._writecheck(zinfo)
1781                self._didModify = True
1782
1783                self.filelist.append(zinfo)
1784                self.NameToInfo[zinfo.filename] = zinfo
1785                self.fp.write(zinfo.FileHeader(False))
1786                self.start_dir = self.fp.tell()
1787        else:
1788            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1789                shutil.copyfileobj(src, dest, 1024*8)
1790
1791    def writestr(self, zinfo_or_arcname, data,
1792                 compress_type=None, compresslevel=None):
1793        """Write a file into the archive.  The contents is 'data', which
1794        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1795        it is encoded as UTF-8 first.
1796        'zinfo_or_arcname' is either a ZipInfo instance or
1797        the name of the file in the archive."""
1798        if isinstance(data, str):
1799            data = data.encode("utf-8")
1800        if not isinstance(zinfo_or_arcname, ZipInfo):
1801            zinfo = ZipInfo(filename=zinfo_or_arcname,
1802                            date_time=time.localtime(time.time())[:6])
1803            zinfo.compress_type = self.compression
1804            zinfo._compresslevel = self.compresslevel
1805            if zinfo.filename[-1] == '/':
1806                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1807                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1808            else:
1809                zinfo.external_attr = 0o600 << 16     # ?rw-------
1810        else:
1811            zinfo = zinfo_or_arcname
1812
1813        if not self.fp:
1814            raise ValueError(
1815                "Attempt to write to ZIP archive that was already closed")
1816        if self._writing:
1817            raise ValueError(
1818                "Can't write to ZIP archive while an open writing handle exists."
1819            )
1820
1821        if compress_type is not None:
1822            zinfo.compress_type = compress_type
1823
1824        if compresslevel is not None:
1825            zinfo._compresslevel = compresslevel
1826
1827        zinfo.file_size = len(data)            # Uncompressed size
1828        with self._lock:
1829            with self.open(zinfo, mode='w') as dest:
1830                dest.write(data)
1831
1832    def __del__(self):
1833        """Call the "close()" method in case the user forgot."""
1834        self.close()
1835
1836    def close(self):
1837        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1838        records."""
1839        if self.fp is None:
1840            return
1841
1842        if self._writing:
1843            raise ValueError("Can't close the ZIP file while there is "
1844                             "an open writing handle on it. "
1845                             "Close the writing handle before closing the zip.")
1846
1847        try:
1848            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1849                with self._lock:
1850                    if self._seekable:
1851                        self.fp.seek(self.start_dir)
1852                    self._write_end_record()
1853        finally:
1854            fp = self.fp
1855            self.fp = None
1856            self._fpclose(fp)
1857
1858    def _write_end_record(self):
1859        for zinfo in self.filelist:         # write central directory
1860            dt = zinfo.date_time
1861            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1862            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1863            extra = []
1864            if zinfo.file_size > ZIP64_LIMIT \
1865               or zinfo.compress_size > ZIP64_LIMIT:
1866                extra.append(zinfo.file_size)
1867                extra.append(zinfo.compress_size)
1868                file_size = 0xffffffff
1869                compress_size = 0xffffffff
1870            else:
1871                file_size = zinfo.file_size
1872                compress_size = zinfo.compress_size
1873
1874            if zinfo.header_offset > ZIP64_LIMIT:
1875                extra.append(zinfo.header_offset)
1876                header_offset = 0xffffffff
1877            else:
1878                header_offset = zinfo.header_offset
1879
1880            extra_data = zinfo.extra
1881            min_version = 0
1882            if extra:
1883                # Append a ZIP64 field to the extra's
1884                extra_data = _strip_extra(extra_data, (1,))
1885                extra_data = struct.pack(
1886                    '<HH' + 'Q'*len(extra),
1887                    1, 8*len(extra), *extra) + extra_data
1888
1889                min_version = ZIP64_VERSION
1890
1891            if zinfo.compress_type == ZIP_BZIP2:
1892                min_version = max(BZIP2_VERSION, min_version)
1893            elif zinfo.compress_type == ZIP_LZMA:
1894                min_version = max(LZMA_VERSION, min_version)
1895
1896            extract_version = max(min_version, zinfo.extract_version)
1897            create_version = max(min_version, zinfo.create_version)
1898            filename, flag_bits = zinfo._encodeFilenameFlags()
1899            centdir = struct.pack(structCentralDir,
1900                                  stringCentralDir, create_version,
1901                                  zinfo.create_system, extract_version, zinfo.reserved,
1902                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1903                                  zinfo.CRC, compress_size, file_size,
1904                                  len(filename), len(extra_data), len(zinfo.comment),
1905                                  0, zinfo.internal_attr, zinfo.external_attr,
1906                                  header_offset)
1907            self.fp.write(centdir)
1908            self.fp.write(filename)
1909            self.fp.write(extra_data)
1910            self.fp.write(zinfo.comment)
1911
1912        pos2 = self.fp.tell()
1913        # Write end-of-zip-archive record
1914        centDirCount = len(self.filelist)
1915        centDirSize = pos2 - self.start_dir
1916        centDirOffset = self.start_dir
1917        requires_zip64 = None
1918        if centDirCount > ZIP_FILECOUNT_LIMIT:
1919            requires_zip64 = "Files count"
1920        elif centDirOffset > ZIP64_LIMIT:
1921            requires_zip64 = "Central directory offset"
1922        elif centDirSize > ZIP64_LIMIT:
1923            requires_zip64 = "Central directory size"
1924        if requires_zip64:
1925            # Need to write the ZIP64 end-of-archive records
1926            if not self._allowZip64:
1927                raise LargeZipFile(requires_zip64 +
1928                                   " would require ZIP64 extensions")
1929            zip64endrec = struct.pack(
1930                structEndArchive64, stringEndArchive64,
1931                44, 45, 45, 0, 0, centDirCount, centDirCount,
1932                centDirSize, centDirOffset)
1933            self.fp.write(zip64endrec)
1934
1935            zip64locrec = struct.pack(
1936                structEndArchive64Locator,
1937                stringEndArchive64Locator, 0, pos2, 1)
1938            self.fp.write(zip64locrec)
1939            centDirCount = min(centDirCount, 0xFFFF)
1940            centDirSize = min(centDirSize, 0xFFFFFFFF)
1941            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1942
1943        endrec = struct.pack(structEndArchive, stringEndArchive,
1944                             0, 0, centDirCount, centDirCount,
1945                             centDirSize, centDirOffset, len(self._comment))
1946        self.fp.write(endrec)
1947        self.fp.write(self._comment)
1948        if self.mode == "a":
1949            self.fp.truncate()
1950        self.fp.flush()
1951
1952    def _fpclose(self, fp):
1953        assert self._fileRefCnt > 0
1954        self._fileRefCnt -= 1
1955        if not self._fileRefCnt and not self._filePassed:
1956            fp.close()
1957
1958
1959class PyZipFile(ZipFile):
1960    """Class to create ZIP archives with Python library files and packages."""
1961
1962    def __init__(self, file, mode="r", compression=ZIP_STORED,
1963                 allowZip64=True, optimize=-1):
1964        ZipFile.__init__(self, file, mode=mode, compression=compression,
1965                         allowZip64=allowZip64)
1966        self._optimize = optimize
1967
1968    def writepy(self, pathname, basename="", filterfunc=None):
1969        """Add all files from "pathname" to the ZIP archive.
1970
1971        If pathname is a package directory, search the directory and
1972        all package subdirectories recursively for all *.py and enter
1973        the modules into the archive.  If pathname is a plain
1974        directory, listdir *.py and enter all modules.  Else, pathname
1975        must be a Python *.py file and the module will be put into the
1976        archive.  Added modules are always module.pyc.
1977        This method will compile the module.py into module.pyc if
1978        necessary.
1979        If filterfunc(pathname) is given, it is called with every argument.
1980        When it is False, the file or directory is skipped.
1981        """
1982        pathname = os.fspath(pathname)
1983        if filterfunc and not filterfunc(pathname):
1984            if self.debug:
1985                label = 'path' if os.path.isdir(pathname) else 'file'
1986                print('%s %r skipped by filterfunc' % (label, pathname))
1987            return
1988        dir, name = os.path.split(pathname)
1989        if os.path.isdir(pathname):
1990            initname = os.path.join(pathname, "__init__.py")
1991            if os.path.isfile(initname):
1992                # This is a package directory, add it
1993                if basename:
1994                    basename = "%s/%s" % (basename, name)
1995                else:
1996                    basename = name
1997                if self.debug:
1998                    print("Adding package in", pathname, "as", basename)
1999                fname, arcname = self._get_codename(initname[0:-3], basename)
2000                if self.debug:
2001                    print("Adding", arcname)
2002                self.write(fname, arcname)
2003                dirlist = sorted(os.listdir(pathname))
2004                dirlist.remove("__init__.py")
2005                # Add all *.py files and package subdirectories
2006                for filename in dirlist:
2007                    path = os.path.join(pathname, filename)
2008                    root, ext = os.path.splitext(filename)
2009                    if os.path.isdir(path):
2010                        if os.path.isfile(os.path.join(path, "__init__.py")):
2011                            # This is a package directory, add it
2012                            self.writepy(path, basename,
2013                                         filterfunc=filterfunc)  # Recursive call
2014                    elif ext == ".py":
2015                        if filterfunc and not filterfunc(path):
2016                            if self.debug:
2017                                print('file %r skipped by filterfunc' % path)
2018                            continue
2019                        fname, arcname = self._get_codename(path[0:-3],
2020                                                            basename)
2021                        if self.debug:
2022                            print("Adding", arcname)
2023                        self.write(fname, arcname)
2024            else:
2025                # This is NOT a package directory, add its files at top level
2026                if self.debug:
2027                    print("Adding files from directory", pathname)
2028                for filename in sorted(os.listdir(pathname)):
2029                    path = os.path.join(pathname, filename)
2030                    root, ext = os.path.splitext(filename)
2031                    if ext == ".py":
2032                        if filterfunc and not filterfunc(path):
2033                            if self.debug:
2034                                print('file %r skipped by filterfunc' % path)
2035                            continue
2036                        fname, arcname = self._get_codename(path[0:-3],
2037                                                            basename)
2038                        if self.debug:
2039                            print("Adding", arcname)
2040                        self.write(fname, arcname)
2041        else:
2042            if pathname[-3:] != ".py":
2043                raise RuntimeError(
2044                    'Files added with writepy() must end with ".py"')
2045            fname, arcname = self._get_codename(pathname[0:-3], basename)
2046            if self.debug:
2047                print("Adding file", arcname)
2048            self.write(fname, arcname)
2049
2050    def _get_codename(self, pathname, basename):
2051        """Return (filename, archivename) for the path.
2052
2053        Given a module name path, return the correct file path and
2054        archive name, compiling if necessary.  For example, given
2055        /python/lib/string, return (/python/lib/string.pyc, string).
2056        """
2057        def _compile(file, optimize=-1):
2058            import py_compile
2059            if self.debug:
2060                print("Compiling", file)
2061            try:
2062                py_compile.compile(file, doraise=True, optimize=optimize)
2063            except py_compile.PyCompileError as err:
2064                print(err.msg)
2065                return False
2066            return True
2067
2068        file_py  = pathname + ".py"
2069        file_pyc = pathname + ".pyc"
2070        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2071        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2072        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2073        if self._optimize == -1:
2074            # legacy mode: use whatever file is present
2075            if (os.path.isfile(file_pyc) and
2076                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2077                # Use .pyc file.
2078                arcname = fname = file_pyc
2079            elif (os.path.isfile(pycache_opt0) and
2080                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2081                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2082                # file name in the archive.
2083                fname = pycache_opt0
2084                arcname = file_pyc
2085            elif (os.path.isfile(pycache_opt1) and
2086                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2087                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2088                # file name in the archive.
2089                fname = pycache_opt1
2090                arcname = file_pyc
2091            elif (os.path.isfile(pycache_opt2) and
2092                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2093                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2094                # file name in the archive.
2095                fname = pycache_opt2
2096                arcname = file_pyc
2097            else:
2098                # Compile py into PEP 3147 pyc file.
2099                if _compile(file_py):
2100                    if sys.flags.optimize == 0:
2101                        fname = pycache_opt0
2102                    elif sys.flags.optimize == 1:
2103                        fname = pycache_opt1
2104                    else:
2105                        fname = pycache_opt2
2106                    arcname = file_pyc
2107                else:
2108                    fname = arcname = file_py
2109        else:
2110            # new mode: use given optimization level
2111            if self._optimize == 0:
2112                fname = pycache_opt0
2113                arcname = file_pyc
2114            else:
2115                arcname = file_pyc
2116                if self._optimize == 1:
2117                    fname = pycache_opt1
2118                elif self._optimize == 2:
2119                    fname = pycache_opt2
2120                else:
2121                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2122                    raise ValueError(msg)
2123            if not (os.path.isfile(fname) and
2124                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2125                if not _compile(file_py, optimize=self._optimize):
2126                    fname = arcname = file_py
2127        archivename = os.path.split(arcname)[1]
2128        if basename:
2129            archivename = "%s/%s" % (basename, archivename)
2130        return (fname, archivename)
2131
2132
2133def _parents(path):
2134    """
2135    Given a path with elements separated by
2136    posixpath.sep, generate all parents of that path.
2137
2138    >>> list(_parents('b/d'))
2139    ['b']
2140    >>> list(_parents('/b/d/'))
2141    ['/b']
2142    >>> list(_parents('b/d/f/'))
2143    ['b/d', 'b']
2144    >>> list(_parents('b'))
2145    []
2146    >>> list(_parents(''))
2147    []
2148    """
2149    return itertools.islice(_ancestry(path), 1, None)
2150
2151
2152def _ancestry(path):
2153    """
2154    Given a path with elements separated by
2155    posixpath.sep, generate all elements of that path
2156
2157    >>> list(_ancestry('b/d'))
2158    ['b/d', 'b']
2159    >>> list(_ancestry('/b/d/'))
2160    ['/b/d', '/b']
2161    >>> list(_ancestry('b/d/f/'))
2162    ['b/d/f', 'b/d', 'b']
2163    >>> list(_ancestry('b'))
2164    ['b']
2165    >>> list(_ancestry(''))
2166    []
2167    """
2168    path = path.rstrip(posixpath.sep)
2169    while path and path != posixpath.sep:
2170        yield path
2171        path, tail = posixpath.split(path)
2172
2173
2174_dedupe = dict.fromkeys
2175"""Deduplicate an iterable in original order"""
2176
2177
2178def _difference(minuend, subtrahend):
2179    """
2180    Return items in minuend not in subtrahend, retaining order
2181    with O(1) lookup.
2182    """
2183    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2184
2185
2186class CompleteDirs(ZipFile):
2187    """
2188    A ZipFile subclass that ensures that implied directories
2189    are always included in the namelist.
2190    """
2191
2192    @staticmethod
2193    def _implied_dirs(names):
2194        parents = itertools.chain.from_iterable(map(_parents, names))
2195        as_dirs = (p + posixpath.sep for p in parents)
2196        return _dedupe(_difference(as_dirs, names))
2197
2198    def namelist(self):
2199        names = super(CompleteDirs, self).namelist()
2200        return names + list(self._implied_dirs(names))
2201
2202    def _name_set(self):
2203        return set(self.namelist())
2204
2205    def resolve_dir(self, name):
2206        """
2207        If the name represents a directory, return that name
2208        as a directory (with the trailing slash).
2209        """
2210        names = self._name_set()
2211        dirname = name + '/'
2212        dir_match = name not in names and dirname in names
2213        return dirname if dir_match else name
2214
2215    @classmethod
2216    def make(cls, source):
2217        """
2218        Given a source (filename or zipfile), return an
2219        appropriate CompleteDirs subclass.
2220        """
2221        if isinstance(source, CompleteDirs):
2222            return source
2223
2224        if not isinstance(source, ZipFile):
2225            return cls(source)
2226
2227        # Only allow for FastLookup when supplied zipfile is read-only
2228        if 'r' not in source.mode:
2229            cls = CompleteDirs
2230
2231        source.__class__ = cls
2232        return source
2233
2234
2235class FastLookup(CompleteDirs):
2236    """
2237    ZipFile subclass to ensure implicit
2238    dirs exist and are resolved rapidly.
2239    """
2240
2241    def namelist(self):
2242        with contextlib.suppress(AttributeError):
2243            return self.__names
2244        self.__names = super(FastLookup, self).namelist()
2245        return self.__names
2246
2247    def _name_set(self):
2248        with contextlib.suppress(AttributeError):
2249            return self.__lookup
2250        self.__lookup = super(FastLookup, self)._name_set()
2251        return self.__lookup
2252
2253
2254class Path:
2255    """
2256    A pathlib-compatible interface for zip files.
2257
2258    Consider a zip file with this structure::
2259
2260        .
2261        ├── a.txt
2262        └── b
2263            ├── c.txt
2264            └── d
2265                └── e.txt
2266
2267    >>> data = io.BytesIO()
2268    >>> zf = ZipFile(data, 'w')
2269    >>> zf.writestr('a.txt', 'content of a')
2270    >>> zf.writestr('b/c.txt', 'content of c')
2271    >>> zf.writestr('b/d/e.txt', 'content of e')
2272    >>> zf.filename = 'mem/abcde.zip'
2273
2274    Path accepts the zipfile object itself or a filename
2275
2276    >>> root = Path(zf)
2277
2278    From there, several path operations are available.
2279
2280    Directory iteration (including the zip file itself):
2281
2282    >>> a, b = root.iterdir()
2283    >>> a
2284    Path('mem/abcde.zip', 'a.txt')
2285    >>> b
2286    Path('mem/abcde.zip', 'b/')
2287
2288    name property:
2289
2290    >>> b.name
2291    'b'
2292
2293    join with divide operator:
2294
2295    >>> c = b / 'c.txt'
2296    >>> c
2297    Path('mem/abcde.zip', 'b/c.txt')
2298    >>> c.name
2299    'c.txt'
2300
2301    Read text:
2302
2303    >>> c.read_text()
2304    'content of c'
2305
2306    existence:
2307
2308    >>> c.exists()
2309    True
2310    >>> (b / 'missing.txt').exists()
2311    False
2312
2313    Coercion to string:
2314
2315    >>> import os
2316    >>> str(c).replace(os.sep, posixpath.sep)
2317    'mem/abcde.zip/b/c.txt'
2318
2319    At the root, ``name``, ``filename``, and ``parent``
2320    resolve to the zipfile. Note these attributes are not
2321    valid and will raise a ``ValueError`` if the zipfile
2322    has no filename.
2323
2324    >>> root.name
2325    'abcde.zip'
2326    >>> str(root.filename).replace(os.sep, posixpath.sep)
2327    'mem/abcde.zip'
2328    >>> str(root.parent)
2329    'mem'
2330    """
2331
2332    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2333
2334    def __init__(self, root, at=""):
2335        """
2336        Construct a Path from a ZipFile or filename.
2337
2338        Note: When the source is an existing ZipFile object,
2339        its type (__class__) will be mutated to a
2340        specialized type. If the caller wishes to retain the
2341        original type, the caller should either create a
2342        separate ZipFile object or pass a filename.
2343        """
2344        self.root = FastLookup.make(root)
2345        self.at = at
2346
2347    def open(self, mode='r', *args, pwd=None, **kwargs):
2348        """
2349        Open this entry as text or binary following the semantics
2350        of ``pathlib.Path.open()`` by passing arguments through
2351        to io.TextIOWrapper().
2352        """
2353        if self.is_dir():
2354            raise IsADirectoryError(self)
2355        zip_mode = mode[0]
2356        if not self.exists() and zip_mode == 'r':
2357            raise FileNotFoundError(self)
2358        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2359        if 'b' in mode:
2360            if args or kwargs:
2361                raise ValueError("encoding args invalid for binary operation")
2362            return stream
2363        else:
2364            kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2365        return io.TextIOWrapper(stream, *args, **kwargs)
2366
2367    @property
2368    def name(self):
2369        return pathlib.Path(self.at).name or self.filename.name
2370
2371    @property
2372    def suffix(self):
2373        return pathlib.Path(self.at).suffix or self.filename.suffix
2374
2375    @property
2376    def suffixes(self):
2377        return pathlib.Path(self.at).suffixes or self.filename.suffixes
2378
2379    @property
2380    def stem(self):
2381        return pathlib.Path(self.at).stem or self.filename.stem
2382
2383    @property
2384    def filename(self):
2385        return pathlib.Path(self.root.filename).joinpath(self.at)
2386
2387    def read_text(self, *args, **kwargs):
2388        kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2389        with self.open('r', *args, **kwargs) as strm:
2390            return strm.read()
2391
2392    def read_bytes(self):
2393        with self.open('rb') as strm:
2394            return strm.read()
2395
2396    def _is_child(self, path):
2397        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2398
2399    def _next(self, at):
2400        return self.__class__(self.root, at)
2401
2402    def is_dir(self):
2403        return not self.at or self.at.endswith("/")
2404
2405    def is_file(self):
2406        return self.exists() and not self.is_dir()
2407
2408    def exists(self):
2409        return self.at in self.root._name_set()
2410
2411    def iterdir(self):
2412        if not self.is_dir():
2413            raise ValueError("Can't listdir a file")
2414        subs = map(self._next, self.root.namelist())
2415        return filter(self._is_child, subs)
2416
2417    def __str__(self):
2418        return posixpath.join(self.root.filename, self.at)
2419
2420    def __repr__(self):
2421        return self.__repr.format(self=self)
2422
2423    def joinpath(self, *other):
2424        next = posixpath.join(self.at, *other)
2425        return self._next(self.root.resolve_dir(next))
2426
2427    __truediv__ = joinpath
2428
2429    @property
2430    def parent(self):
2431        if not self.at:
2432            return self.filename.parent
2433        parent_at = posixpath.dirname(self.at.rstrip('/'))
2434        if parent_at:
2435            parent_at += '/'
2436        return self._next(parent_at)
2437
2438
2439def main(args=None):
2440    import argparse
2441
2442    description = 'A simple command-line interface for zipfile module.'
2443    parser = argparse.ArgumentParser(description=description)
2444    group = parser.add_mutually_exclusive_group(required=True)
2445    group.add_argument('-l', '--list', metavar='<zipfile>',
2446                       help='Show listing of a zipfile')
2447    group.add_argument('-e', '--extract', nargs=2,
2448                       metavar=('<zipfile>', '<output_dir>'),
2449                       help='Extract zipfile into target dir')
2450    group.add_argument('-c', '--create', nargs='+',
2451                       metavar=('<name>', '<file>'),
2452                       help='Create zipfile from sources')
2453    group.add_argument('-t', '--test', metavar='<zipfile>',
2454                       help='Test if a zipfile is valid')
2455    args = parser.parse_args(args)
2456
2457    if args.test is not None:
2458        src = args.test
2459        with ZipFile(src, 'r') as zf:
2460            badfile = zf.testzip()
2461        if badfile:
2462            print("The following enclosed file is corrupted: {!r}".format(badfile))
2463        print("Done testing")
2464
2465    elif args.list is not None:
2466        src = args.list
2467        with ZipFile(src, 'r') as zf:
2468            zf.printdir()
2469
2470    elif args.extract is not None:
2471        src, curdir = args.extract
2472        with ZipFile(src, 'r') as zf:
2473            zf.extractall(curdir)
2474
2475    elif args.create is not None:
2476        zip_name = args.create.pop(0)
2477        files = args.create
2478
2479        def addToZip(zf, path, zippath):
2480            if os.path.isfile(path):
2481                zf.write(path, zippath, ZIP_DEFLATED)
2482            elif os.path.isdir(path):
2483                if zippath:
2484                    zf.write(path, zippath)
2485                for nm in sorted(os.listdir(path)):
2486                    addToZip(zf,
2487                             os.path.join(path, nm), os.path.join(zippath, nm))
2488            # else: ignore
2489
2490        with ZipFile(zip_name, 'w') as zf:
2491            for path in files:
2492                zippath = os.path.basename(path)
2493                if not zippath:
2494                    zippath = os.path.basename(os.path.dirname(path))
2495                if zippath in ('', os.curdir, os.pardir):
2496                    zippath = ''
2497                addToZip(zf, path, zippath)
2498
2499
2500if __name__ == "__main__":
2501    main()
2502