1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import io
7import os
8import importlib.util
9import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
15import threading
16
17try:
18    import zlib # We may need its compression method
19    crc32 = zlib.crc32
20except ImportError:
21    zlib = None
22    crc32 = binascii.crc32
23
24try:
25    import bz2 # We may need its compression method
26except ImportError:
27    bz2 = None
28
29try:
30    import lzma # We may need its compression method
31except ImportError:
32    lzma = None
33
34__all__ = ["BadZipFile", "BadZipfile", "error",
35           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
36           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
37
38class BadZipFile(Exception):
39    pass
40
41
42class LargeZipFile(Exception):
43    """
44    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45    and those extensions are disabled.
46    """
47
48error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
49
50
51ZIP64_LIMIT = (1 << 31) - 1
52ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
53ZIP_MAX_COMMENT = (1 << 16) - 1
54
55# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
58ZIP_BZIP2 = 12
59ZIP_LZMA = 14
60# Other ZIP compression methods not supported
61
62DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
65LZMA_VERSION = 63
66# we recognize (but not necessarily support) all features up to that version
67MAX_EXTRACT_VERSION = 63
68
69# Below are some formats and associated data for reading/writing headers using
70# the struct module.  The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
74
75# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
77structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
80
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
97stringCentralDir = b"PK\001\002"
98sizeCentralDir = struct.calcsize(structCentralDir)
99
100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
105_CD_EXTRACT_SYSTEM = 4
106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
124stringFileHeader = b"PK\003\004"
125sizeFileHeader = struct.calcsize(structFileHeader)
126
127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
129_FH_EXTRACT_SYSTEM = 2
130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
140# The "Zip64 end of central directory locator" structure, magic number, and size
141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
162_DD_SIGNATURE = 0x08074b50
163
164_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
165
166def _strip_extra(extra, xids):
167    # Remove Extra Fields with specified IDs.
168    unpack = _EXTRA_FIELD_STRUCT.unpack
169    modified = False
170    buffer = []
171    start = i = 0
172    while i + 4 <= len(extra):
173        xid, xlen = unpack(extra[i : i + 4])
174        j = i + 4 + xlen
175        if xid in xids:
176            if i != start:
177                buffer.append(extra[start : i])
178            start = j
179            modified = True
180        i = j
181    if not modified:
182        return extra
183    return b''.join(buffer)
184
185def _check_zipfile(fp):
186    try:
187        if _EndRecData(fp):
188            return True         # file has correct magic number
189    except OSError:
190        pass
191    return False
192
193def is_zipfile(filename):
194    """Quickly see if a file is a ZIP file by checking the magic number.
195
196    The filename argument may be a file or file-like object too.
197    """
198    result = False
199    try:
200        if hasattr(filename, "read"):
201            result = _check_zipfile(fp=filename)
202        else:
203            with open(filename, "rb") as fp:
204                result = _check_zipfile(fp)
205    except OSError:
206        pass
207    return result
208
209def _EndRecData64(fpin, offset, endrec):
210    """
211    Read the ZIP64 end-of-archive records and use that to update endrec
212    """
213    try:
214        fpin.seek(offset - sizeEndCentDir64Locator, 2)
215    except OSError:
216        # If the seek fails, the file is not large enough to contain a ZIP64
217        # end-of-archive record, so just return the end record we were given.
218        return endrec
219
220    data = fpin.read(sizeEndCentDir64Locator)
221    if len(data) != sizeEndCentDir64Locator:
222        return endrec
223    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
224    if sig != stringEndArchive64Locator:
225        return endrec
226
227    if diskno != 0 or disks > 1:
228        raise BadZipFile("zipfiles that span multiple disks are not supported")
229
230    # Assume no 'zip64 extensible data'
231    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
232    data = fpin.read(sizeEndCentDir64)
233    if len(data) != sizeEndCentDir64:
234        return endrec
235    sig, sz, create_version, read_version, disk_num, disk_dir, \
236        dircount, dircount2, dirsize, diroffset = \
237        struct.unpack(structEndArchive64, data)
238    if sig != stringEndArchive64:
239        return endrec
240
241    # Update the original endrec using data from the ZIP64 record
242    endrec[_ECD_SIGNATURE] = sig
243    endrec[_ECD_DISK_NUMBER] = disk_num
244    endrec[_ECD_DISK_START] = disk_dir
245    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
246    endrec[_ECD_ENTRIES_TOTAL] = dircount2
247    endrec[_ECD_SIZE] = dirsize
248    endrec[_ECD_OFFSET] = diroffset
249    return endrec
250
251
252def _EndRecData(fpin):
253    """Return data from the "End of Central Directory" record, or None.
254
255    The data is a list of the nine items in the ZIP "End of central dir"
256    record followed by a tenth item, the file seek offset of this record."""
257
258    # Determine file size
259    fpin.seek(0, 2)
260    filesize = fpin.tell()
261
262    # Check to see if this is ZIP file with no archive comment (the
263    # "end of central directory" structure should be the last item in the
264    # file if this is the case).
265    try:
266        fpin.seek(-sizeEndCentDir, 2)
267    except OSError:
268        return None
269    data = fpin.read()
270    if (len(data) == sizeEndCentDir and
271        data[0:4] == stringEndArchive and
272        data[-2:] == b"\000\000"):
273        # the signature is correct and there's no comment, unpack structure
274        endrec = struct.unpack(structEndArchive, data)
275        endrec=list(endrec)
276
277        # Append a blank comment and record start offset
278        endrec.append(b"")
279        endrec.append(filesize - sizeEndCentDir)
280
281        # Try to read the "Zip64 end of central directory" structure
282        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
283
284    # Either this is not a ZIP file, or it is a ZIP file with an archive
285    # comment.  Search the end of the file for the "end of central directory"
286    # record signature. The comment is the last item in the ZIP file and may be
287    # up to 64K long.  It is assumed that the "end of central directory" magic
288    # number does not appear in the comment.
289    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
290    fpin.seek(maxCommentStart, 0)
291    data = fpin.read()
292    start = data.rfind(stringEndArchive)
293    if start >= 0:
294        # found the magic number; attempt to unpack and interpret
295        recData = data[start:start+sizeEndCentDir]
296        if len(recData) != sizeEndCentDir:
297            # Zip file is corrupted.
298            return None
299        endrec = list(struct.unpack(structEndArchive, recData))
300        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
301        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
302        endrec.append(comment)
303        endrec.append(maxCommentStart + start)
304
305        # Try to read the "Zip64 end of central directory" structure
306        return _EndRecData64(fpin, maxCommentStart + start - filesize,
307                             endrec)
308
309    # Unable to find a valid end of central directory structure
310    return None
311
312
313class ZipInfo (object):
314    """Class with attributes describing each file in the ZIP archive."""
315
316    __slots__ = (
317        'orig_filename',
318        'filename',
319        'date_time',
320        'compress_type',
321        '_compresslevel',
322        'comment',
323        'extra',
324        'create_system',
325        'create_version',
326        'extract_version',
327        'reserved',
328        'flag_bits',
329        'volume',
330        'internal_attr',
331        'external_attr',
332        'header_offset',
333        'CRC',
334        'compress_size',
335        'file_size',
336        '_raw_time',
337    )
338
339    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
340        self.orig_filename = filename   # Original file name in archive
341
342        # Terminate the file name at the first null byte.  Null bytes in file
343        # names are used as tricks by viruses in archives.
344        null_byte = filename.find(chr(0))
345        if null_byte >= 0:
346            filename = filename[0:null_byte]
347        # This is used to ensure paths in generated ZIP files always use
348        # forward slashes as the directory separator, as required by the
349        # ZIP format specification.
350        if os.sep != "/" and os.sep in filename:
351            filename = filename.replace(os.sep, "/")
352
353        self.filename = filename        # Normalized file name
354        self.date_time = date_time      # year, month, day, hour, min, sec
355
356        if date_time[0] < 1980:
357            raise ValueError('ZIP does not support timestamps before 1980')
358
359        # Standard values:
360        self.compress_type = ZIP_STORED # Type of compression for the file
361        self._compresslevel = None      # Level for the compressor
362        self.comment = b""              # Comment for each file
363        self.extra = b""                # ZIP extra data
364        if sys.platform == 'win32':
365            self.create_system = 0          # System which created ZIP archive
366        else:
367            # Assume everything else is unix-y
368            self.create_system = 3          # System which created ZIP archive
369        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
370        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
371        self.reserved = 0               # Must be zero
372        self.flag_bits = 0              # ZIP flag bits
373        self.volume = 0                 # Volume number of file header
374        self.internal_attr = 0          # Internal attributes
375        self.external_attr = 0          # External file attributes
376        # Other attributes are set by class ZipFile:
377        # header_offset         Byte offset to the file header
378        # CRC                   CRC-32 of the uncompressed file
379        # compress_size         Size of the compressed file
380        # file_size             Size of the uncompressed file
381
382    def __repr__(self):
383        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
384        if self.compress_type != ZIP_STORED:
385            result.append(' compress_type=%s' %
386                          compressor_names.get(self.compress_type,
387                                               self.compress_type))
388        hi = self.external_attr >> 16
389        lo = self.external_attr & 0xFFFF
390        if hi:
391            result.append(' filemode=%r' % stat.filemode(hi))
392        if lo:
393            result.append(' external_attr=%#x' % lo)
394        isdir = self.is_dir()
395        if not isdir or self.file_size:
396            result.append(' file_size=%r' % self.file_size)
397        if ((not isdir or self.compress_size) and
398            (self.compress_type != ZIP_STORED or
399             self.file_size != self.compress_size)):
400            result.append(' compress_size=%r' % self.compress_size)
401        result.append('>')
402        return ''.join(result)
403
404    def FileHeader(self, zip64=None):
405        """Return the per-file header as a bytes object."""
406        dt = self.date_time
407        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
408        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
409        if self.flag_bits & 0x08:
410            # Set these to zero because we write them after the file data
411            CRC = compress_size = file_size = 0
412        else:
413            CRC = self.CRC
414            compress_size = self.compress_size
415            file_size = self.file_size
416
417        extra = self.extra
418
419        min_version = 0
420        if zip64 is None:
421            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
422        if zip64:
423            fmt = '<HHQQ'
424            extra = extra + struct.pack(fmt,
425                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
426        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
427            if not zip64:
428                raise LargeZipFile("Filesize would require ZIP64 extensions")
429            # File is larger than what fits into a 4 byte integer,
430            # fall back to the ZIP64 extension
431            file_size = 0xffffffff
432            compress_size = 0xffffffff
433            min_version = ZIP64_VERSION
434
435        if self.compress_type == ZIP_BZIP2:
436            min_version = max(BZIP2_VERSION, min_version)
437        elif self.compress_type == ZIP_LZMA:
438            min_version = max(LZMA_VERSION, min_version)
439
440        self.extract_version = max(min_version, self.extract_version)
441        self.create_version = max(min_version, self.create_version)
442        filename, flag_bits = self._encodeFilenameFlags()
443        header = struct.pack(structFileHeader, stringFileHeader,
444                             self.extract_version, self.reserved, flag_bits,
445                             self.compress_type, dostime, dosdate, CRC,
446                             compress_size, file_size,
447                             len(filename), len(extra))
448        return header + filename + extra
449
450    def _encodeFilenameFlags(self):
451        try:
452            return self.filename.encode('ascii'), self.flag_bits
453        except UnicodeEncodeError:
454            return self.filename.encode('utf-8'), self.flag_bits | 0x800
455
456    def _decodeExtra(self):
457        # Try to decode the extra field.
458        extra = self.extra
459        unpack = struct.unpack
460        while len(extra) >= 4:
461            tp, ln = unpack('<HH', extra[:4])
462            if ln+4 > len(extra):
463                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
464            if tp == 0x0001:
465                if ln >= 24:
466                    counts = unpack('<QQQ', extra[4:28])
467                elif ln == 16:
468                    counts = unpack('<QQ', extra[4:20])
469                elif ln == 8:
470                    counts = unpack('<Q', extra[4:12])
471                elif ln == 0:
472                    counts = ()
473                else:
474                    raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
475
476                idx = 0
477
478                # ZIP64 extension (large files and/or large archives)
479                if self.file_size in (0xffffffffffffffff, 0xffffffff):
480                    if len(counts) <= idx:
481                        raise BadZipFile(
482                            "Corrupt zip64 extra field. File size not found."
483                        )
484                    self.file_size = counts[idx]
485                    idx += 1
486
487                if self.compress_size == 0xFFFFFFFF:
488                    if len(counts) <= idx:
489                        raise BadZipFile(
490                            "Corrupt zip64 extra field. Compress size not found."
491                        )
492                    self.compress_size = counts[idx]
493                    idx += 1
494
495                if self.header_offset == 0xffffffff:
496                    if len(counts) <= idx:
497                        raise BadZipFile(
498                            "Corrupt zip64 extra field. Header offset not found."
499                        )
500                    old = self.header_offset
501                    self.header_offset = counts[idx]
502                    idx+=1
503
504            extra = extra[ln+4:]
505
506    @classmethod
507    def from_file(cls, filename, arcname=None):
508        """Construct an appropriate ZipInfo for a file on the filesystem.
509
510        filename should be the path to a file or directory on the filesystem.
511
512        arcname is the name which it will have within the archive (by default,
513        this will be the same as filename, but without a drive letter and with
514        leading path separators removed).
515        """
516        if isinstance(filename, os.PathLike):
517            filename = os.fspath(filename)
518        st = os.stat(filename)
519        isdir = stat.S_ISDIR(st.st_mode)
520        mtime = time.localtime(st.st_mtime)
521        date_time = mtime[0:6]
522        # Create ZipInfo instance to store file information
523        if arcname is None:
524            arcname = filename
525        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
526        while arcname[0] in (os.sep, os.altsep):
527            arcname = arcname[1:]
528        if isdir:
529            arcname += '/'
530        zinfo = cls(arcname, date_time)
531        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
532        if isdir:
533            zinfo.file_size = 0
534            zinfo.external_attr |= 0x10  # MS-DOS directory flag
535        else:
536            zinfo.file_size = st.st_size
537
538        return zinfo
539
540    def is_dir(self):
541        """Return True if this archive member is a directory."""
542        return self.filename[-1] == '/'
543
544
545# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
546# internal keys. We noticed that a direct implementation is faster than
547# relying on binascii.crc32().
548
549_crctable = None
550def _gen_crc(crc):
551    for j in range(8):
552        if crc & 1:
553            crc = (crc >> 1) ^ 0xEDB88320
554        else:
555            crc >>= 1
556    return crc
557
558# ZIP supports a password-based form of encryption. Even though known
559# plaintext attacks have been found against it, it is still useful
560# to be able to get data out of such a file.
561#
562# Usage:
563#     zd = _ZipDecrypter(mypwd)
564#     plain_bytes = zd(cypher_bytes)
565
566def _ZipDecrypter(pwd):
567    key0 = 305419896
568    key1 = 591751049
569    key2 = 878082192
570
571    global _crctable
572    if _crctable is None:
573        _crctable = list(map(_gen_crc, range(256)))
574    crctable = _crctable
575
576    def crc32(ch, crc):
577        """Compute the CRC32 primitive on one byte."""
578        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
579
580    def update_keys(c):
581        nonlocal key0, key1, key2
582        key0 = crc32(c, key0)
583        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
584        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
585        key2 = crc32(key1 >> 24, key2)
586
587    for p in pwd:
588        update_keys(p)
589
590    def decrypter(data):
591        """Decrypt a bytes object."""
592        result = bytearray()
593        append = result.append
594        for c in data:
595            k = key2 | 2
596            c ^= ((k * (k^1)) >> 8) & 0xFF
597            update_keys(c)
598            append(c)
599        return bytes(result)
600
601    return decrypter
602
603
604class LZMACompressor:
605
606    def __init__(self):
607        self._comp = None
608
609    def _init(self):
610        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
611        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
612            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
613        ])
614        return struct.pack('<BBH', 9, 4, len(props)) + props
615
616    def compress(self, data):
617        if self._comp is None:
618            return self._init() + self._comp.compress(data)
619        return self._comp.compress(data)
620
621    def flush(self):
622        if self._comp is None:
623            return self._init() + self._comp.flush()
624        return self._comp.flush()
625
626
627class LZMADecompressor:
628
629    def __init__(self):
630        self._decomp = None
631        self._unconsumed = b''
632        self.eof = False
633
634    def decompress(self, data):
635        if self._decomp is None:
636            self._unconsumed += data
637            if len(self._unconsumed) <= 4:
638                return b''
639            psize, = struct.unpack('<H', self._unconsumed[2:4])
640            if len(self._unconsumed) <= 4 + psize:
641                return b''
642
643            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
644                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
645                                               self._unconsumed[4:4 + psize])
646            ])
647            data = self._unconsumed[4 + psize:]
648            del self._unconsumed
649
650        result = self._decomp.decompress(data)
651        self.eof = self._decomp.eof
652        return result
653
654
655compressor_names = {
656    0: 'store',
657    1: 'shrink',
658    2: 'reduce',
659    3: 'reduce',
660    4: 'reduce',
661    5: 'reduce',
662    6: 'implode',
663    7: 'tokenize',
664    8: 'deflate',
665    9: 'deflate64',
666    10: 'implode',
667    12: 'bzip2',
668    14: 'lzma',
669    18: 'terse',
670    19: 'lz77',
671    97: 'wavpack',
672    98: 'ppmd',
673}
674
675def _check_compression(compression):
676    if compression == ZIP_STORED:
677        pass
678    elif compression == ZIP_DEFLATED:
679        if not zlib:
680            raise RuntimeError(
681                "Compression requires the (missing) zlib module")
682    elif compression == ZIP_BZIP2:
683        if not bz2:
684            raise RuntimeError(
685                "Compression requires the (missing) bz2 module")
686    elif compression == ZIP_LZMA:
687        if not lzma:
688            raise RuntimeError(
689                "Compression requires the (missing) lzma module")
690    else:
691        raise NotImplementedError("That compression method is not supported")
692
693
694def _get_compressor(compress_type, compresslevel=None):
695    if compress_type == ZIP_DEFLATED:
696        if compresslevel is not None:
697            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
698        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
699    elif compress_type == ZIP_BZIP2:
700        if compresslevel is not None:
701            return bz2.BZ2Compressor(compresslevel)
702        return bz2.BZ2Compressor()
703    # compresslevel is ignored for ZIP_LZMA
704    elif compress_type == ZIP_LZMA:
705        return LZMACompressor()
706    else:
707        return None
708
709
710def _get_decompressor(compress_type):
711    if compress_type == ZIP_STORED:
712        return None
713    elif compress_type == ZIP_DEFLATED:
714        return zlib.decompressobj(-15)
715    elif compress_type == ZIP_BZIP2:
716        return bz2.BZ2Decompressor()
717    elif compress_type == ZIP_LZMA:
718        return LZMADecompressor()
719    else:
720        descr = compressor_names.get(compress_type)
721        if descr:
722            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
723        else:
724            raise NotImplementedError("compression type %d" % (compress_type,))
725
726
727class _SharedFile:
728    def __init__(self, file, pos, close, lock, writing):
729        self._file = file
730        self._pos = pos
731        self._close = close
732        self._lock = lock
733        self._writing = writing
734        self.seekable = file.seekable
735        self.tell = file.tell
736
737    def seek(self, offset, whence=0):
738        with self._lock:
739            if self._writing():
740                raise ValueError("Can't reposition in the ZIP file while "
741                        "there is an open writing handle on it. "
742                        "Close the writing handle before trying to read.")
743            self._file.seek(offset, whence)
744            self._pos = self._file.tell()
745            return self._pos
746
747    def read(self, n=-1):
748        with self._lock:
749            if self._writing():
750                raise ValueError("Can't read from the ZIP file while there "
751                        "is an open writing handle on it. "
752                        "Close the writing handle before trying to read.")
753            self._file.seek(self._pos)
754            data = self._file.read(n)
755            self._pos = self._file.tell()
756            return data
757
758    def close(self):
759        if self._file is not None:
760            fileobj = self._file
761            self._file = None
762            self._close(fileobj)
763
764# Provide the tell method for unseekable stream
765class _Tellable:
766    def __init__(self, fp):
767        self.fp = fp
768        self.offset = 0
769
770    def write(self, data):
771        n = self.fp.write(data)
772        self.offset += n
773        return n
774
775    def tell(self):
776        return self.offset
777
778    def flush(self):
779        self.fp.flush()
780
781    def close(self):
782        self.fp.close()
783
784
785class ZipExtFile(io.BufferedIOBase):
786    """File-like object for reading an archive member.
787       Is returned by ZipFile.open().
788    """
789
790    # Max size supported by decompressor.
791    MAX_N = 1 << 31 - 1
792
793    # Read from compressed files in 4k blocks.
794    MIN_READ_SIZE = 4096
795
796    # Chunk size to read during seek
797    MAX_SEEK_READ = 1 << 24
798
799    def __init__(self, fileobj, mode, zipinfo, pwd=None,
800                 close_fileobj=False):
801        self._fileobj = fileobj
802        self._pwd = pwd
803        self._close_fileobj = close_fileobj
804
805        self._compress_type = zipinfo.compress_type
806        self._compress_left = zipinfo.compress_size
807        self._left = zipinfo.file_size
808
809        self._decompressor = _get_decompressor(self._compress_type)
810
811        self._eof = False
812        self._readbuffer = b''
813        self._offset = 0
814
815        self.newlines = None
816
817        self.mode = mode
818        self.name = zipinfo.filename
819
820        if hasattr(zipinfo, 'CRC'):
821            self._expected_crc = zipinfo.CRC
822            self._running_crc = crc32(b'')
823        else:
824            self._expected_crc = None
825
826        self._seekable = False
827        try:
828            if fileobj.seekable():
829                self._orig_compress_start = fileobj.tell()
830                self._orig_compress_size = zipinfo.compress_size
831                self._orig_file_size = zipinfo.file_size
832                self._orig_start_crc = self._running_crc
833                self._seekable = True
834        except AttributeError:
835            pass
836
837        self._decrypter = None
838        if pwd:
839            if zipinfo.flag_bits & 0x8:
840                # compare against the file type from extended local headers
841                check_byte = (zipinfo._raw_time >> 8) & 0xff
842            else:
843                # compare against the CRC otherwise
844                check_byte = (zipinfo.CRC >> 24) & 0xff
845            h = self._init_decrypter()
846            if h != check_byte:
847                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
848
849
850    def _init_decrypter(self):
851        self._decrypter = _ZipDecrypter(self._pwd)
852        # The first 12 bytes in the cypher stream is an encryption header
853        #  used to strengthen the algorithm. The first 11 bytes are
854        #  completely random, while the 12th contains the MSB of the CRC,
855        #  or the MSB of the file time depending on the header type
856        #  and is used to check the correctness of the password.
857        header = self._fileobj.read(12)
858        self._compress_left -= 12
859        return self._decrypter(header)[11]
860
861    def __repr__(self):
862        result = ['<%s.%s' % (self.__class__.__module__,
863                              self.__class__.__qualname__)]
864        if not self.closed:
865            result.append(' name=%r mode=%r' % (self.name, self.mode))
866            if self._compress_type != ZIP_STORED:
867                result.append(' compress_type=%s' %
868                              compressor_names.get(self._compress_type,
869                                                   self._compress_type))
870        else:
871            result.append(' [closed]')
872        result.append('>')
873        return ''.join(result)
874
875    def readline(self, limit=-1):
876        """Read and return a line from the stream.
877
878        If limit is specified, at most limit bytes will be read.
879        """
880
881        if limit < 0:
882            # Shortcut common case - newline found in buffer.
883            i = self._readbuffer.find(b'\n', self._offset) + 1
884            if i > 0:
885                line = self._readbuffer[self._offset: i]
886                self._offset = i
887                return line
888
889        return io.BufferedIOBase.readline(self, limit)
890
891    def peek(self, n=1):
892        """Returns buffered bytes without advancing the position."""
893        if n > len(self._readbuffer) - self._offset:
894            chunk = self.read(n)
895            if len(chunk) > self._offset:
896                self._readbuffer = chunk + self._readbuffer[self._offset:]
897                self._offset = 0
898            else:
899                self._offset -= len(chunk)
900
901        # Return up to 512 bytes to reduce allocation overhead for tight loops.
902        return self._readbuffer[self._offset: self._offset + 512]
903
904    def readable(self):
905        return True
906
907    def read(self, n=-1):
908        """Read and return up to n bytes.
909        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
910        """
911        if n is None or n < 0:
912            buf = self._readbuffer[self._offset:]
913            self._readbuffer = b''
914            self._offset = 0
915            while not self._eof:
916                buf += self._read1(self.MAX_N)
917            return buf
918
919        end = n + self._offset
920        if end < len(self._readbuffer):
921            buf = self._readbuffer[self._offset:end]
922            self._offset = end
923            return buf
924
925        n = end - len(self._readbuffer)
926        buf = self._readbuffer[self._offset:]
927        self._readbuffer = b''
928        self._offset = 0
929        while n > 0 and not self._eof:
930            data = self._read1(n)
931            if n < len(data):
932                self._readbuffer = data
933                self._offset = n
934                buf += data[:n]
935                break
936            buf += data
937            n -= len(data)
938        return buf
939
940    def _update_crc(self, newdata):
941        # Update the CRC using the given data.
942        if self._expected_crc is None:
943            # No need to compute the CRC if we don't have a reference value
944            return
945        self._running_crc = crc32(newdata, self._running_crc)
946        # Check the CRC if we're at the end of the file
947        if self._eof and self._running_crc != self._expected_crc:
948            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
949
950    def read1(self, n):
951        """Read up to n bytes with at most one read() system call."""
952
953        if n is None or n < 0:
954            buf = self._readbuffer[self._offset:]
955            self._readbuffer = b''
956            self._offset = 0
957            while not self._eof:
958                data = self._read1(self.MAX_N)
959                if data:
960                    buf += data
961                    break
962            return buf
963
964        end = n + self._offset
965        if end < len(self._readbuffer):
966            buf = self._readbuffer[self._offset:end]
967            self._offset = end
968            return buf
969
970        n = end - len(self._readbuffer)
971        buf = self._readbuffer[self._offset:]
972        self._readbuffer = b''
973        self._offset = 0
974        if n > 0:
975            while not self._eof:
976                data = self._read1(n)
977                if n < len(data):
978                    self._readbuffer = data
979                    self._offset = n
980                    buf += data[:n]
981                    break
982                if data:
983                    buf += data
984                    break
985        return buf
986
987    def _read1(self, n):
988        # Read up to n compressed bytes with at most one read() system call,
989        # decrypt and decompress them.
990        if self._eof or n <= 0:
991            return b''
992
993        # Read from file.
994        if self._compress_type == ZIP_DEFLATED:
995            ## Handle unconsumed data.
996            data = self._decompressor.unconsumed_tail
997            if n > len(data):
998                data += self._read2(n - len(data))
999        else:
1000            data = self._read2(n)
1001
1002        if self._compress_type == ZIP_STORED:
1003            self._eof = self._compress_left <= 0
1004        elif self._compress_type == ZIP_DEFLATED:
1005            n = max(n, self.MIN_READ_SIZE)
1006            data = self._decompressor.decompress(data, n)
1007            self._eof = (self._decompressor.eof or
1008                         self._compress_left <= 0 and
1009                         not self._decompressor.unconsumed_tail)
1010            if self._eof:
1011                data += self._decompressor.flush()
1012        else:
1013            data = self._decompressor.decompress(data)
1014            self._eof = self._decompressor.eof or self._compress_left <= 0
1015
1016        data = data[:self._left]
1017        self._left -= len(data)
1018        if self._left <= 0:
1019            self._eof = True
1020        self._update_crc(data)
1021        return data
1022
1023    def _read2(self, n):
1024        if self._compress_left <= 0:
1025            return b''
1026
1027        n = max(n, self.MIN_READ_SIZE)
1028        n = min(n, self._compress_left)
1029
1030        data = self._fileobj.read(n)
1031        self._compress_left -= len(data)
1032        if not data:
1033            raise EOFError
1034
1035        if self._decrypter is not None:
1036            data = self._decrypter(data)
1037        return data
1038
1039    def close(self):
1040        try:
1041            if self._close_fileobj:
1042                self._fileobj.close()
1043        finally:
1044            super().close()
1045
1046    def seekable(self):
1047        return self._seekable
1048
1049    def seek(self, offset, whence=0):
1050        if not self._seekable:
1051            raise io.UnsupportedOperation("underlying stream is not seekable")
1052        curr_pos = self.tell()
1053        if whence == 0: # Seek from start of file
1054            new_pos = offset
1055        elif whence == 1: # Seek from current position
1056            new_pos = curr_pos + offset
1057        elif whence == 2: # Seek from EOF
1058            new_pos = self._orig_file_size + offset
1059        else:
1060            raise ValueError("whence must be os.SEEK_SET (0), "
1061                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1062
1063        if new_pos > self._orig_file_size:
1064            new_pos = self._orig_file_size
1065
1066        if new_pos < 0:
1067            new_pos = 0
1068
1069        read_offset = new_pos - curr_pos
1070        buff_offset = read_offset + self._offset
1071
1072        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1073            # Just move the _offset index if the new position is in the _readbuffer
1074            self._offset = buff_offset
1075            read_offset = 0
1076        elif read_offset < 0:
1077            # Position is before the current position. Reset the ZipExtFile
1078            self._fileobj.seek(self._orig_compress_start)
1079            self._running_crc = self._orig_start_crc
1080            self._compress_left = self._orig_compress_size
1081            self._left = self._orig_file_size
1082            self._readbuffer = b''
1083            self._offset = 0
1084            self._decompressor = _get_decompressor(self._compress_type)
1085            self._eof = False
1086            read_offset = new_pos
1087            if self._decrypter is not None:
1088                self._init_decrypter()
1089
1090        while read_offset > 0:
1091            read_len = min(self.MAX_SEEK_READ, read_offset)
1092            self.read(read_len)
1093            read_offset -= read_len
1094
1095        return self.tell()
1096
1097    def tell(self):
1098        if not self._seekable:
1099            raise io.UnsupportedOperation("underlying stream is not seekable")
1100        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1101        return filepos
1102
1103
1104class _ZipWriteFile(io.BufferedIOBase):
1105    def __init__(self, zf, zinfo, zip64):
1106        self._zinfo = zinfo
1107        self._zip64 = zip64
1108        self._zipfile = zf
1109        self._compressor = _get_compressor(zinfo.compress_type,
1110                                           zinfo._compresslevel)
1111        self._file_size = 0
1112        self._compress_size = 0
1113        self._crc = 0
1114
1115    @property
1116    def _fileobj(self):
1117        return self._zipfile.fp
1118
1119    def writable(self):
1120        return True
1121
1122    def write(self, data):
1123        if self.closed:
1124            raise ValueError('I/O operation on closed file.')
1125        nbytes = len(data)
1126        self._file_size += nbytes
1127        self._crc = crc32(data, self._crc)
1128        if self._compressor:
1129            data = self._compressor.compress(data)
1130            self._compress_size += len(data)
1131        self._fileobj.write(data)
1132        return nbytes
1133
1134    def close(self):
1135        if self.closed:
1136            return
1137        try:
1138            super().close()
1139            # Flush any data from the compressor, and update header info
1140            if self._compressor:
1141                buf = self._compressor.flush()
1142                self._compress_size += len(buf)
1143                self._fileobj.write(buf)
1144                self._zinfo.compress_size = self._compress_size
1145            else:
1146                self._zinfo.compress_size = self._file_size
1147            self._zinfo.CRC = self._crc
1148            self._zinfo.file_size = self._file_size
1149
1150            # Write updated header info
1151            if self._zinfo.flag_bits & 0x08:
1152                # Write CRC and file sizes after the file data
1153                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1154                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1155                    self._zinfo.compress_size, self._zinfo.file_size))
1156                self._zipfile.start_dir = self._fileobj.tell()
1157            else:
1158                if not self._zip64:
1159                    if self._file_size > ZIP64_LIMIT:
1160                        raise RuntimeError(
1161                            'File size unexpectedly exceeded ZIP64 limit')
1162                    if self._compress_size > ZIP64_LIMIT:
1163                        raise RuntimeError(
1164                            'Compressed size unexpectedly exceeded ZIP64 limit')
1165                # Seek backwards and write file header (which will now include
1166                # correct CRC and file sizes)
1167
1168                # Preserve current position in file
1169                self._zipfile.start_dir = self._fileobj.tell()
1170                self._fileobj.seek(self._zinfo.header_offset)
1171                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1172                self._fileobj.seek(self._zipfile.start_dir)
1173
1174            # Successfully written: Add file to our caches
1175            self._zipfile.filelist.append(self._zinfo)
1176            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1177        finally:
1178            self._zipfile._writing = False
1179
1180
1181
1182class ZipFile:
1183    """ Class with methods to open, read, write, close, list zip files.
1184
1185    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1186                compresslevel=None)
1187
1188    file: Either the path to the file, or a file-like object.
1189          If it is a path, the file will be opened and closed by ZipFile.
1190    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1191          or append 'a'.
1192    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1193                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1194    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1195                needed, otherwise it will raise an exception when this would
1196                be necessary.
1197    compresslevel: None (default for the given compression type) or an integer
1198                   specifying the level to pass to the compressor.
1199                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1200                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1201                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1202
1203    """
1204
1205    fp = None                   # Set here since __del__ checks it
1206    _windows_illegal_name_trans_table = None
1207
1208    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1209                 compresslevel=None):
1210        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1211        or append 'a'."""
1212        if mode not in ('r', 'w', 'x', 'a'):
1213            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1214
1215        _check_compression(compression)
1216
1217        self._allowZip64 = allowZip64
1218        self._didModify = False
1219        self.debug = 0  # Level of printing: 0 through 3
1220        self.NameToInfo = {}    # Find file info given name
1221        self.filelist = []      # List of ZipInfo instances for archive
1222        self.compression = compression  # Method of compression
1223        self.compresslevel = compresslevel
1224        self.mode = mode
1225        self.pwd = None
1226        self._comment = b''
1227
1228        # Check if we were passed a file-like object
1229        if isinstance(file, os.PathLike):
1230            file = os.fspath(file)
1231        if isinstance(file, str):
1232            # No, it's a filename
1233            self._filePassed = 0
1234            self.filename = file
1235            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1236                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1237            filemode = modeDict[mode]
1238            while True:
1239                try:
1240                    self.fp = io.open(file, filemode)
1241                except OSError:
1242                    if filemode in modeDict:
1243                        filemode = modeDict[filemode]
1244                        continue
1245                    raise
1246                break
1247        else:
1248            self._filePassed = 1
1249            self.fp = file
1250            self.filename = getattr(file, 'name', None)
1251        self._fileRefCnt = 1
1252        self._lock = threading.RLock()
1253        self._seekable = True
1254        self._writing = False
1255
1256        try:
1257            if mode == 'r':
1258                self._RealGetContents()
1259            elif mode in ('w', 'x'):
1260                # set the modified flag so central directory gets written
1261                # even if no files are added to the archive
1262                self._didModify = True
1263                try:
1264                    self.start_dir = self.fp.tell()
1265                except (AttributeError, OSError):
1266                    self.fp = _Tellable(self.fp)
1267                    self.start_dir = 0
1268                    self._seekable = False
1269                else:
1270                    # Some file-like objects can provide tell() but not seek()
1271                    try:
1272                        self.fp.seek(self.start_dir)
1273                    except (AttributeError, OSError):
1274                        self._seekable = False
1275            elif mode == 'a':
1276                try:
1277                    # See if file is a zip file
1278                    self._RealGetContents()
1279                    # seek to start of directory and overwrite
1280                    self.fp.seek(self.start_dir)
1281                except BadZipFile:
1282                    # file is not a zip file, just append
1283                    self.fp.seek(0, 2)
1284
1285                    # set the modified flag so central directory gets written
1286                    # even if no files are added to the archive
1287                    self._didModify = True
1288                    self.start_dir = self.fp.tell()
1289            else:
1290                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1291        except:
1292            fp = self.fp
1293            self.fp = None
1294            self._fpclose(fp)
1295            raise
1296
1297    def __enter__(self):
1298        return self
1299
1300    def __exit__(self, type, value, traceback):
1301        self.close()
1302
1303    def __repr__(self):
1304        result = ['<%s.%s' % (self.__class__.__module__,
1305                              self.__class__.__qualname__)]
1306        if self.fp is not None:
1307            if self._filePassed:
1308                result.append(' file=%r' % self.fp)
1309            elif self.filename is not None:
1310                result.append(' filename=%r' % self.filename)
1311            result.append(' mode=%r' % self.mode)
1312        else:
1313            result.append(' [closed]')
1314        result.append('>')
1315        return ''.join(result)
1316
1317    def _RealGetContents(self):
1318        """Read in the table of contents for the ZIP file."""
1319        fp = self.fp
1320        try:
1321            endrec = _EndRecData(fp)
1322        except OSError:
1323            raise BadZipFile("File is not a zip file")
1324        if not endrec:
1325            raise BadZipFile("File is not a zip file")
1326        if self.debug > 1:
1327            print(endrec)
1328        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1329        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1330        self._comment = endrec[_ECD_COMMENT]    # archive comment
1331
1332        # "concat" is zero, unless zip was concatenated to another file
1333        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1334        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1335            # If Zip64 extension structures are present, account for them
1336            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1337
1338        if self.debug > 2:
1339            inferred = concat + offset_cd
1340            print("given, inferred, offset", offset_cd, inferred, concat)
1341        # self.start_dir:  Position of start of central directory
1342        self.start_dir = offset_cd + concat
1343        fp.seek(self.start_dir, 0)
1344        data = fp.read(size_cd)
1345        fp = io.BytesIO(data)
1346        total = 0
1347        while total < size_cd:
1348            centdir = fp.read(sizeCentralDir)
1349            if len(centdir) != sizeCentralDir:
1350                raise BadZipFile("Truncated central directory")
1351            centdir = struct.unpack(structCentralDir, centdir)
1352            if centdir[_CD_SIGNATURE] != stringCentralDir:
1353                raise BadZipFile("Bad magic number for central directory")
1354            if self.debug > 2:
1355                print(centdir)
1356            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1357            flags = centdir[5]
1358            if flags & 0x800:
1359                # UTF-8 file names extension
1360                filename = filename.decode('utf-8')
1361            else:
1362                # Historical ZIP filename encoding
1363                filename = filename.decode('cp437')
1364            # Create ZipInfo instance to store file information
1365            x = ZipInfo(filename)
1366            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1367            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1368            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1369            (x.create_version, x.create_system, x.extract_version, x.reserved,
1370             x.flag_bits, x.compress_type, t, d,
1371             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1372            if x.extract_version > MAX_EXTRACT_VERSION:
1373                raise NotImplementedError("zip file version %.1f" %
1374                                          (x.extract_version / 10))
1375            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1376            # Convert date/time code to (year, month, day, hour, min, sec)
1377            x._raw_time = t
1378            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1379                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1380
1381            x._decodeExtra()
1382            x.header_offset = x.header_offset + concat
1383            self.filelist.append(x)
1384            self.NameToInfo[x.filename] = x
1385
1386            # update total bytes read from central directory
1387            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1388                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1389                     + centdir[_CD_COMMENT_LENGTH])
1390
1391            if self.debug > 2:
1392                print("total", total)
1393
1394
1395    def namelist(self):
1396        """Return a list of file names in the archive."""
1397        return [data.filename for data in self.filelist]
1398
1399    def infolist(self):
1400        """Return a list of class ZipInfo instances for files in the
1401        archive."""
1402        return self.filelist
1403
1404    def printdir(self, file=None):
1405        """Print a table of contents for the zip file."""
1406        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1407              file=file)
1408        for zinfo in self.filelist:
1409            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1410            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1411                  file=file)
1412
1413    def testzip(self):
1414        """Read all the files and check the CRC."""
1415        chunk_size = 2 ** 20
1416        for zinfo in self.filelist:
1417            try:
1418                # Read by chunks, to avoid an OverflowError or a
1419                # MemoryError with very large embedded files.
1420                with self.open(zinfo.filename, "r") as f:
1421                    while f.read(chunk_size):     # Check CRC-32
1422                        pass
1423            except BadZipFile:
1424                return zinfo.filename
1425
1426    def getinfo(self, name):
1427        """Return the instance of ZipInfo given 'name'."""
1428        info = self.NameToInfo.get(name)
1429        if info is None:
1430            raise KeyError(
1431                'There is no item named %r in the archive' % name)
1432
1433        return info
1434
1435    def setpassword(self, pwd):
1436        """Set default password for encrypted files."""
1437        if pwd and not isinstance(pwd, bytes):
1438            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1439        if pwd:
1440            self.pwd = pwd
1441        else:
1442            self.pwd = None
1443
1444    @property
1445    def comment(self):
1446        """The comment text associated with the ZIP file."""
1447        return self._comment
1448
1449    @comment.setter
1450    def comment(self, comment):
1451        if not isinstance(comment, bytes):
1452            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1453        # check for valid comment length
1454        if len(comment) > ZIP_MAX_COMMENT:
1455            import warnings
1456            warnings.warn('Archive comment is too long; truncating to %d bytes'
1457                          % ZIP_MAX_COMMENT, stacklevel=2)
1458            comment = comment[:ZIP_MAX_COMMENT]
1459        self._comment = comment
1460        self._didModify = True
1461
1462    def read(self, name, pwd=None):
1463        """Return file bytes for name."""
1464        with self.open(name, "r", pwd) as fp:
1465            return fp.read()
1466
1467    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1468        """Return file-like object for 'name'.
1469
1470        name is a string for the file name within the ZIP file, or a ZipInfo
1471        object.
1472
1473        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1474        write to a file newly added to the archive.
1475
1476        pwd is the password to decrypt files (only used for reading).
1477
1478        When writing, if the file size is not known in advance but may exceed
1479        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1480        files.  If the size is known in advance, it is best to pass a ZipInfo
1481        instance for name, with zinfo.file_size set.
1482        """
1483        if mode not in {"r", "w"}:
1484            raise ValueError('open() requires mode "r" or "w"')
1485        if pwd and not isinstance(pwd, bytes):
1486            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1487        if pwd and (mode == "w"):
1488            raise ValueError("pwd is only supported for reading files")
1489        if not self.fp:
1490            raise ValueError(
1491                "Attempt to use ZIP archive that was already closed")
1492
1493        # Make sure we have an info object
1494        if isinstance(name, ZipInfo):
1495            # 'name' is already an info object
1496            zinfo = name
1497        elif mode == 'w':
1498            zinfo = ZipInfo(name)
1499            zinfo.compress_type = self.compression
1500            zinfo._compresslevel = self.compresslevel
1501        else:
1502            # Get info object for name
1503            zinfo = self.getinfo(name)
1504
1505        if mode == 'w':
1506            return self._open_to_write(zinfo, force_zip64=force_zip64)
1507
1508        if self._writing:
1509            raise ValueError("Can't read from the ZIP file while there "
1510                    "is an open writing handle on it. "
1511                    "Close the writing handle before trying to read.")
1512
1513        # Open for reading:
1514        self._fileRefCnt += 1
1515        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1516                               self._fpclose, self._lock, lambda: self._writing)
1517        try:
1518            # Skip the file header:
1519            fheader = zef_file.read(sizeFileHeader)
1520            if len(fheader) != sizeFileHeader:
1521                raise BadZipFile("Truncated file header")
1522            fheader = struct.unpack(structFileHeader, fheader)
1523            if fheader[_FH_SIGNATURE] != stringFileHeader:
1524                raise BadZipFile("Bad magic number for file header")
1525
1526            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1527            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1528                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1529
1530            if zinfo.flag_bits & 0x20:
1531                # Zip 2.7: compressed patched data
1532                raise NotImplementedError("compressed patched data (flag bit 5)")
1533
1534            if zinfo.flag_bits & 0x40:
1535                # strong encryption
1536                raise NotImplementedError("strong encryption (flag bit 6)")
1537
1538            if zinfo.flag_bits & 0x800:
1539                # UTF-8 filename
1540                fname_str = fname.decode("utf-8")
1541            else:
1542                fname_str = fname.decode("cp437")
1543
1544            if fname_str != zinfo.orig_filename:
1545                raise BadZipFile(
1546                    'File name in directory %r and header %r differ.'
1547                    % (zinfo.orig_filename, fname))
1548
1549            # check for encrypted flag & handle password
1550            is_encrypted = zinfo.flag_bits & 0x1
1551            if is_encrypted:
1552                if not pwd:
1553                    pwd = self.pwd
1554                if not pwd:
1555                    raise RuntimeError("File %r is encrypted, password "
1556                                       "required for extraction" % name)
1557            else:
1558                pwd = None
1559
1560            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1561        except:
1562            zef_file.close()
1563            raise
1564
1565    def _open_to_write(self, zinfo, force_zip64=False):
1566        if force_zip64 and not self._allowZip64:
1567            raise ValueError(
1568                "force_zip64 is True, but allowZip64 was False when opening "
1569                "the ZIP file."
1570            )
1571        if self._writing:
1572            raise ValueError("Can't write to the ZIP file while there is "
1573                             "another write handle open on it. "
1574                             "Close the first handle before opening another.")
1575
1576        # Sizes and CRC are overwritten with correct data after processing the file
1577        if not hasattr(zinfo, 'file_size'):
1578            zinfo.file_size = 0
1579        zinfo.compress_size = 0
1580        zinfo.CRC = 0
1581
1582        zinfo.flag_bits = 0x00
1583        if zinfo.compress_type == ZIP_LZMA:
1584            # Compressed data includes an end-of-stream (EOS) marker
1585            zinfo.flag_bits |= 0x02
1586        if not self._seekable:
1587            zinfo.flag_bits |= 0x08
1588
1589        if not zinfo.external_attr:
1590            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1591
1592        # Compressed size can be larger than uncompressed size
1593        zip64 = self._allowZip64 and \
1594                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1595
1596        if self._seekable:
1597            self.fp.seek(self.start_dir)
1598        zinfo.header_offset = self.fp.tell()
1599
1600        self._writecheck(zinfo)
1601        self._didModify = True
1602
1603        self.fp.write(zinfo.FileHeader(zip64))
1604
1605        self._writing = True
1606        return _ZipWriteFile(self, zinfo, zip64)
1607
1608    def extract(self, member, path=None, pwd=None):
1609        """Extract a member from the archive to the current working directory,
1610           using its full name. Its file information is extracted as accurately
1611           as possible. `member' may be a filename or a ZipInfo object. You can
1612           specify a different directory using `path'.
1613        """
1614        if path is None:
1615            path = os.getcwd()
1616        else:
1617            path = os.fspath(path)
1618
1619        return self._extract_member(member, path, pwd)
1620
1621    def extractall(self, path=None, members=None, pwd=None):
1622        """Extract all members from the archive to the current working
1623           directory. `path' specifies a different directory to extract to.
1624           `members' is optional and must be a subset of the list returned
1625           by namelist().
1626        """
1627        if members is None:
1628            members = self.namelist()
1629
1630        if path is None:
1631            path = os.getcwd()
1632        else:
1633            path = os.fspath(path)
1634
1635        for zipinfo in members:
1636            self._extract_member(zipinfo, path, pwd)
1637
1638    @classmethod
1639    def _sanitize_windows_name(cls, arcname, pathsep):
1640        """Replace bad characters and remove trailing dots from parts."""
1641        table = cls._windows_illegal_name_trans_table
1642        if not table:
1643            illegal = ':<>|"?*'
1644            table = str.maketrans(illegal, '_' * len(illegal))
1645            cls._windows_illegal_name_trans_table = table
1646        arcname = arcname.translate(table)
1647        # remove trailing dots
1648        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1649        # rejoin, removing empty parts.
1650        arcname = pathsep.join(x for x in arcname if x)
1651        return arcname
1652
1653    def _extract_member(self, member, targetpath, pwd):
1654        """Extract the ZipInfo object 'member' to a physical
1655           file on the path targetpath.
1656        """
1657        if not isinstance(member, ZipInfo):
1658            member = self.getinfo(member)
1659
1660        # build the destination pathname, replacing
1661        # forward slashes to platform specific separators.
1662        arcname = member.filename.replace('/', os.path.sep)
1663
1664        if os.path.altsep:
1665            arcname = arcname.replace(os.path.altsep, os.path.sep)
1666        # interpret absolute pathname as relative, remove drive letter or
1667        # UNC path, redundant separators, "." and ".." components.
1668        arcname = os.path.splitdrive(arcname)[1]
1669        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1670        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1671                                   if x not in invalid_path_parts)
1672        if os.path.sep == '\\':
1673            # filter illegal characters on Windows
1674            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1675
1676        targetpath = os.path.join(targetpath, arcname)
1677        targetpath = os.path.normpath(targetpath)
1678
1679        # Create all upper directories if necessary.
1680        upperdirs = os.path.dirname(targetpath)
1681        if upperdirs and not os.path.exists(upperdirs):
1682            os.makedirs(upperdirs)
1683
1684        if member.is_dir():
1685            if not os.path.isdir(targetpath):
1686                os.mkdir(targetpath)
1687            return targetpath
1688
1689        with self.open(member, pwd=pwd) as source, \
1690             open(targetpath, "wb") as target:
1691            shutil.copyfileobj(source, target)
1692
1693        return targetpath
1694
1695    def _writecheck(self, zinfo):
1696        """Check for errors before writing a file to the archive."""
1697        if zinfo.filename in self.NameToInfo:
1698            import warnings
1699            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1700        if self.mode not in ('w', 'x', 'a'):
1701            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1702        if not self.fp:
1703            raise ValueError(
1704                "Attempt to write ZIP archive that was already closed")
1705        _check_compression(zinfo.compress_type)
1706        if not self._allowZip64:
1707            requires_zip64 = None
1708            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1709                requires_zip64 = "Files count"
1710            elif zinfo.file_size > ZIP64_LIMIT:
1711                requires_zip64 = "Filesize"
1712            elif zinfo.header_offset > ZIP64_LIMIT:
1713                requires_zip64 = "Zipfile size"
1714            if requires_zip64:
1715                raise LargeZipFile(requires_zip64 +
1716                                   " would require ZIP64 extensions")
1717
1718    def write(self, filename, arcname=None,
1719              compress_type=None, compresslevel=None):
1720        """Put the bytes from filename into the archive under the name
1721        arcname."""
1722        if not self.fp:
1723            raise ValueError(
1724                "Attempt to write to ZIP archive that was already closed")
1725        if self._writing:
1726            raise ValueError(
1727                "Can't write to ZIP archive while an open writing handle exists"
1728            )
1729
1730        zinfo = ZipInfo.from_file(filename, arcname)
1731
1732        if zinfo.is_dir():
1733            zinfo.compress_size = 0
1734            zinfo.CRC = 0
1735        else:
1736            if compress_type is not None:
1737                zinfo.compress_type = compress_type
1738            else:
1739                zinfo.compress_type = self.compression
1740
1741            if compresslevel is not None:
1742                zinfo._compresslevel = compresslevel
1743            else:
1744                zinfo._compresslevel = self.compresslevel
1745
1746        if zinfo.is_dir():
1747            with self._lock:
1748                if self._seekable:
1749                    self.fp.seek(self.start_dir)
1750                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1751                if zinfo.compress_type == ZIP_LZMA:
1752                # Compressed data includes an end-of-stream (EOS) marker
1753                    zinfo.flag_bits |= 0x02
1754
1755                self._writecheck(zinfo)
1756                self._didModify = True
1757
1758                self.filelist.append(zinfo)
1759                self.NameToInfo[zinfo.filename] = zinfo
1760                self.fp.write(zinfo.FileHeader(False))
1761                self.start_dir = self.fp.tell()
1762        else:
1763            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1764                shutil.copyfileobj(src, dest, 1024*8)
1765
1766    def writestr(self, zinfo_or_arcname, data,
1767                 compress_type=None, compresslevel=None):
1768        """Write a file into the archive.  The contents is 'data', which
1769        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1770        it is encoded as UTF-8 first.
1771        'zinfo_or_arcname' is either a ZipInfo instance or
1772        the name of the file in the archive."""
1773        if isinstance(data, str):
1774            data = data.encode("utf-8")
1775        if not isinstance(zinfo_or_arcname, ZipInfo):
1776            zinfo = ZipInfo(filename=zinfo_or_arcname,
1777                            date_time=time.localtime(time.time())[:6])
1778            zinfo.compress_type = self.compression
1779            zinfo._compresslevel = self.compresslevel
1780            if zinfo.filename[-1] == '/':
1781                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1782                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1783            else:
1784                zinfo.external_attr = 0o600 << 16     # ?rw-------
1785        else:
1786            zinfo = zinfo_or_arcname
1787
1788        if not self.fp:
1789            raise ValueError(
1790                "Attempt to write to ZIP archive that was already closed")
1791        if self._writing:
1792            raise ValueError(
1793                "Can't write to ZIP archive while an open writing handle exists."
1794            )
1795
1796        if compress_type is not None:
1797            zinfo.compress_type = compress_type
1798
1799        if compresslevel is not None:
1800            zinfo._compresslevel = compresslevel
1801
1802        zinfo.file_size = len(data)            # Uncompressed size
1803        with self._lock:
1804            with self.open(zinfo, mode='w') as dest:
1805                dest.write(data)
1806
1807    def __del__(self):
1808        """Call the "close()" method in case the user forgot."""
1809        self.close()
1810
1811    def close(self):
1812        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1813        records."""
1814        if self.fp is None:
1815            return
1816
1817        if self._writing:
1818            raise ValueError("Can't close the ZIP file while there is "
1819                             "an open writing handle on it. "
1820                             "Close the writing handle before closing the zip.")
1821
1822        try:
1823            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1824                with self._lock:
1825                    if self._seekable:
1826                        self.fp.seek(self.start_dir)
1827                    self._write_end_record()
1828        finally:
1829            fp = self.fp
1830            self.fp = None
1831            self._fpclose(fp)
1832
1833    def _write_end_record(self):
1834        for zinfo in self.filelist:         # write central directory
1835            dt = zinfo.date_time
1836            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1837            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1838            extra = []
1839            if zinfo.file_size > ZIP64_LIMIT \
1840               or zinfo.compress_size > ZIP64_LIMIT:
1841                extra.append(zinfo.file_size)
1842                extra.append(zinfo.compress_size)
1843                file_size = 0xffffffff
1844                compress_size = 0xffffffff
1845            else:
1846                file_size = zinfo.file_size
1847                compress_size = zinfo.compress_size
1848
1849            if zinfo.header_offset > ZIP64_LIMIT:
1850                extra.append(zinfo.header_offset)
1851                header_offset = 0xffffffff
1852            else:
1853                header_offset = zinfo.header_offset
1854
1855            extra_data = zinfo.extra
1856            min_version = 0
1857            if extra:
1858                # Append a ZIP64 field to the extra's
1859                extra_data = _strip_extra(extra_data, (1,))
1860                extra_data = struct.pack(
1861                    '<HH' + 'Q'*len(extra),
1862                    1, 8*len(extra), *extra) + extra_data
1863
1864                min_version = ZIP64_VERSION
1865
1866            if zinfo.compress_type == ZIP_BZIP2:
1867                min_version = max(BZIP2_VERSION, min_version)
1868            elif zinfo.compress_type == ZIP_LZMA:
1869                min_version = max(LZMA_VERSION, min_version)
1870
1871            extract_version = max(min_version, zinfo.extract_version)
1872            create_version = max(min_version, zinfo.create_version)
1873            try:
1874                filename, flag_bits = zinfo._encodeFilenameFlags()
1875                centdir = struct.pack(structCentralDir,
1876                                      stringCentralDir, create_version,
1877                                      zinfo.create_system, extract_version, zinfo.reserved,
1878                                      flag_bits, zinfo.compress_type, dostime, dosdate,
1879                                      zinfo.CRC, compress_size, file_size,
1880                                      len(filename), len(extra_data), len(zinfo.comment),
1881                                      0, zinfo.internal_attr, zinfo.external_attr,
1882                                      header_offset)
1883            except DeprecationWarning:
1884                print((structCentralDir, stringCentralDir, create_version,
1885                       zinfo.create_system, extract_version, zinfo.reserved,
1886                       zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1887                       zinfo.CRC, compress_size, file_size,
1888                       len(zinfo.filename), len(extra_data), len(zinfo.comment),
1889                       0, zinfo.internal_attr, zinfo.external_attr,
1890                       header_offset), file=sys.stderr)
1891                raise
1892            self.fp.write(centdir)
1893            self.fp.write(filename)
1894            self.fp.write(extra_data)
1895            self.fp.write(zinfo.comment)
1896
1897        pos2 = self.fp.tell()
1898        # Write end-of-zip-archive record
1899        centDirCount = len(self.filelist)
1900        centDirSize = pos2 - self.start_dir
1901        centDirOffset = self.start_dir
1902        requires_zip64 = None
1903        if centDirCount > ZIP_FILECOUNT_LIMIT:
1904            requires_zip64 = "Files count"
1905        elif centDirOffset > ZIP64_LIMIT:
1906            requires_zip64 = "Central directory offset"
1907        elif centDirSize > ZIP64_LIMIT:
1908            requires_zip64 = "Central directory size"
1909        if requires_zip64:
1910            # Need to write the ZIP64 end-of-archive records
1911            if not self._allowZip64:
1912                raise LargeZipFile(requires_zip64 +
1913                                   " would require ZIP64 extensions")
1914            zip64endrec = struct.pack(
1915                structEndArchive64, stringEndArchive64,
1916                44, 45, 45, 0, 0, centDirCount, centDirCount,
1917                centDirSize, centDirOffset)
1918            self.fp.write(zip64endrec)
1919
1920            zip64locrec = struct.pack(
1921                structEndArchive64Locator,
1922                stringEndArchive64Locator, 0, pos2, 1)
1923            self.fp.write(zip64locrec)
1924            centDirCount = min(centDirCount, 0xFFFF)
1925            centDirSize = min(centDirSize, 0xFFFFFFFF)
1926            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1927
1928        endrec = struct.pack(structEndArchive, stringEndArchive,
1929                             0, 0, centDirCount, centDirCount,
1930                             centDirSize, centDirOffset, len(self._comment))
1931        self.fp.write(endrec)
1932        self.fp.write(self._comment)
1933        self.fp.flush()
1934
1935    def _fpclose(self, fp):
1936        assert self._fileRefCnt > 0
1937        self._fileRefCnt -= 1
1938        if not self._fileRefCnt and not self._filePassed:
1939            fp.close()
1940
1941
1942class PyZipFile(ZipFile):
1943    """Class to create ZIP archives with Python library files and packages."""
1944
1945    def __init__(self, file, mode="r", compression=ZIP_STORED,
1946                 allowZip64=True, optimize=-1):
1947        ZipFile.__init__(self, file, mode=mode, compression=compression,
1948                         allowZip64=allowZip64)
1949        self._optimize = optimize
1950
1951    def writepy(self, pathname, basename="", filterfunc=None):
1952        """Add all files from "pathname" to the ZIP archive.
1953
1954        If pathname is a package directory, search the directory and
1955        all package subdirectories recursively for all *.py and enter
1956        the modules into the archive.  If pathname is a plain
1957        directory, listdir *.py and enter all modules.  Else, pathname
1958        must be a Python *.py file and the module will be put into the
1959        archive.  Added modules are always module.pyc.
1960        This method will compile the module.py into module.pyc if
1961        necessary.
1962        If filterfunc(pathname) is given, it is called with every argument.
1963        When it is False, the file or directory is skipped.
1964        """
1965        pathname = os.fspath(pathname)
1966        if filterfunc and not filterfunc(pathname):
1967            if self.debug:
1968                label = 'path' if os.path.isdir(pathname) else 'file'
1969                print('%s %r skipped by filterfunc' % (label, pathname))
1970            return
1971        dir, name = os.path.split(pathname)
1972        if os.path.isdir(pathname):
1973            initname = os.path.join(pathname, "__init__.py")
1974            if os.path.isfile(initname):
1975                # This is a package directory, add it
1976                if basename:
1977                    basename = "%s/%s" % (basename, name)
1978                else:
1979                    basename = name
1980                if self.debug:
1981                    print("Adding package in", pathname, "as", basename)
1982                fname, arcname = self._get_codename(initname[0:-3], basename)
1983                if self.debug:
1984                    print("Adding", arcname)
1985                self.write(fname, arcname)
1986                dirlist = sorted(os.listdir(pathname))
1987                dirlist.remove("__init__.py")
1988                # Add all *.py files and package subdirectories
1989                for filename in dirlist:
1990                    path = os.path.join(pathname, filename)
1991                    root, ext = os.path.splitext(filename)
1992                    if os.path.isdir(path):
1993                        if os.path.isfile(os.path.join(path, "__init__.py")):
1994                            # This is a package directory, add it
1995                            self.writepy(path, basename,
1996                                         filterfunc=filterfunc)  # Recursive call
1997                    elif ext == ".py":
1998                        if filterfunc and not filterfunc(path):
1999                            if self.debug:
2000                                print('file %r skipped by filterfunc' % path)
2001                            continue
2002                        fname, arcname = self._get_codename(path[0:-3],
2003                                                            basename)
2004                        if self.debug:
2005                            print("Adding", arcname)
2006                        self.write(fname, arcname)
2007            else:
2008                # This is NOT a package directory, add its files at top level
2009                if self.debug:
2010                    print("Adding files from directory", pathname)
2011                for filename in sorted(os.listdir(pathname)):
2012                    path = os.path.join(pathname, filename)
2013                    root, ext = os.path.splitext(filename)
2014                    if ext == ".py":
2015                        if filterfunc and not filterfunc(path):
2016                            if self.debug:
2017                                print('file %r skipped by filterfunc' % path)
2018                            continue
2019                        fname, arcname = self._get_codename(path[0:-3],
2020                                                            basename)
2021                        if self.debug:
2022                            print("Adding", arcname)
2023                        self.write(fname, arcname)
2024        else:
2025            if pathname[-3:] != ".py":
2026                raise RuntimeError(
2027                    'Files added with writepy() must end with ".py"')
2028            fname, arcname = self._get_codename(pathname[0:-3], basename)
2029            if self.debug:
2030                print("Adding file", arcname)
2031            self.write(fname, arcname)
2032
2033    def _get_codename(self, pathname, basename):
2034        """Return (filename, archivename) for the path.
2035
2036        Given a module name path, return the correct file path and
2037        archive name, compiling if necessary.  For example, given
2038        /python/lib/string, return (/python/lib/string.pyc, string).
2039        """
2040        def _compile(file, optimize=-1):
2041            import py_compile
2042            if self.debug:
2043                print("Compiling", file)
2044            try:
2045                py_compile.compile(file, doraise=True, optimize=optimize)
2046            except py_compile.PyCompileError as err:
2047                print(err.msg)
2048                return False
2049            return True
2050
2051        file_py  = pathname + ".py"
2052        file_pyc = pathname + ".pyc"
2053        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2054        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2055        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2056        if self._optimize == -1:
2057            # legacy mode: use whatever file is present
2058            if (os.path.isfile(file_pyc) and
2059                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2060                # Use .pyc file.
2061                arcname = fname = file_pyc
2062            elif (os.path.isfile(pycache_opt0) and
2063                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2064                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2065                # file name in the archive.
2066                fname = pycache_opt0
2067                arcname = file_pyc
2068            elif (os.path.isfile(pycache_opt1) and
2069                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2070                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2071                # file name in the archive.
2072                fname = pycache_opt1
2073                arcname = file_pyc
2074            elif (os.path.isfile(pycache_opt2) and
2075                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2076                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2077                # file name in the archive.
2078                fname = pycache_opt2
2079                arcname = file_pyc
2080            else:
2081                # Compile py into PEP 3147 pyc file.
2082                if _compile(file_py):
2083                    if sys.flags.optimize == 0:
2084                        fname = pycache_opt0
2085                    elif sys.flags.optimize == 1:
2086                        fname = pycache_opt1
2087                    else:
2088                        fname = pycache_opt2
2089                    arcname = file_pyc
2090                else:
2091                    fname = arcname = file_py
2092        else:
2093            # new mode: use given optimization level
2094            if self._optimize == 0:
2095                fname = pycache_opt0
2096                arcname = file_pyc
2097            else:
2098                arcname = file_pyc
2099                if self._optimize == 1:
2100                    fname = pycache_opt1
2101                elif self._optimize == 2:
2102                    fname = pycache_opt2
2103                else:
2104                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2105                    raise ValueError(msg)
2106            if not (os.path.isfile(fname) and
2107                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2108                if not _compile(file_py, optimize=self._optimize):
2109                    fname = arcname = file_py
2110        archivename = os.path.split(arcname)[1]
2111        if basename:
2112            archivename = "%s/%s" % (basename, archivename)
2113        return (fname, archivename)
2114
2115
2116def main(args=None):
2117    import argparse
2118
2119    description = 'A simple command-line interface for zipfile module.'
2120    parser = argparse.ArgumentParser(description=description)
2121    group = parser.add_mutually_exclusive_group(required=True)
2122    group.add_argument('-l', '--list', metavar='<zipfile>',
2123                       help='Show listing of a zipfile')
2124    group.add_argument('-e', '--extract', nargs=2,
2125                       metavar=('<zipfile>', '<output_dir>'),
2126                       help='Extract zipfile into target dir')
2127    group.add_argument('-c', '--create', nargs='+',
2128                       metavar=('<name>', '<file>'),
2129                       help='Create zipfile from sources')
2130    group.add_argument('-t', '--test', metavar='<zipfile>',
2131                       help='Test if a zipfile is valid')
2132    args = parser.parse_args(args)
2133
2134    if args.test is not None:
2135        src = args.test
2136        with ZipFile(src, 'r') as zf:
2137            badfile = zf.testzip()
2138        if badfile:
2139            print("The following enclosed file is corrupted: {!r}".format(badfile))
2140        print("Done testing")
2141
2142    elif args.list is not None:
2143        src = args.list
2144        with ZipFile(src, 'r') as zf:
2145            zf.printdir()
2146
2147    elif args.extract is not None:
2148        src, curdir = args.extract
2149        with ZipFile(src, 'r') as zf:
2150            zf.extractall(curdir)
2151
2152    elif args.create is not None:
2153        zip_name = args.create.pop(0)
2154        files = args.create
2155
2156        def addToZip(zf, path, zippath):
2157            if os.path.isfile(path):
2158                zf.write(path, zippath, ZIP_DEFLATED)
2159            elif os.path.isdir(path):
2160                if zippath:
2161                    zf.write(path, zippath)
2162                for nm in sorted(os.listdir(path)):
2163                    addToZip(zf,
2164                             os.path.join(path, nm), os.path.join(zippath, nm))
2165            # else: ignore
2166
2167        with ZipFile(zip_name, 'w') as zf:
2168            for path in files:
2169                zippath = os.path.basename(path)
2170                if not zippath:
2171                    zippath = os.path.basename(os.path.dirname(path))
2172                if zippath in ('', os.curdir, os.pardir):
2173                    zippath = ''
2174                addToZip(zf, path, zippath)
2175
2176if __name__ == "__main__":
2177    main()
2178