1#-----------------------------------------------------------------------------
2# Copyright (c) 2005-2019, PyInstaller Development Team.
3#
4# Distributed under the terms of the GNU General Public License with exception
5# for distributing bootloader.
6#
7# The full license is in the file COPYING.txt, distributed with this software.
8#-----------------------------------------------------------------------------
9
10# TODO clean up this module
11
12# Subclasses may not need marshal or struct, but since they're
13# builtin, importing is safe.
14#
15# While an Archive is really an abstraction for any "filesystem
16# within a file", it is tuned for use with imputil.FuncImporter.
17# This assumes it contains python code objects, indexed by the
18# the internal name (ie, no '.py').
19
20# See pyi_carchive.py for a more general archive (contains anything)
21# that can be understood by a C program.
22
23
24### **NOTE** This module is used during bootstrap.
25### Import *ONLY* builtin modules.
26
27import marshal
28import struct
29import sys
30import zlib
31if sys.version_info[0] == 2:
32    import thread
33else:
34    import _thread as thread
35
36
37# For decrypting Python modules.
38CRYPT_BLOCK_SIZE = 16
39
40
41# content types for PYZ
42PYZ_TYPE_MODULE = 0
43PYZ_TYPE_PKG = 1
44PYZ_TYPE_DATA = 2
45
46class FilePos(object):
47    """
48    This class keeps track of the file object representing and current position
49    in a file.
50    """
51    def __init__(self):
52        # The file object representing this file.
53        self.file = None
54        # The position in the file when it was last closed.
55        self.pos = 0
56
57
58class ArchiveFile(object):
59    """
60    File class support auto open when access member from file object
61    This class is use to avoid file locking on windows
62    """
63
64    def __init__(self, *args, **kwargs):
65        self.args = args
66        self.kwargs = kwargs
67        self._filePos = {}
68
69    def local(self):
70        """
71        Return an instance of FilePos for the current thread. This is a crude
72        # re-implementation of threading.local, which isn't a built-in module
73        # and therefore isn't available.
74        """
75        ti = thread.get_ident()
76        if ti not in self._filePos:
77            self._filePos[ti] = FilePos()
78        return self._filePos[ti]
79
80    def __getattr__(self, name):
81        """
82        Make this class act like a file, by invoking most methods on its
83        underlying file object.
84        """
85        file = self.local().file
86        assert file
87        return getattr(file, name)
88
89    def __enter__(self):
90        """
91        Open file and seek to pos record from last close.
92        """
93        # The file shouldn't be open yet.
94        fp = self.local()
95        assert not fp.file
96        # Open the file and seek to the last position.
97        fp.file = open(*self.args, **self.kwargs)
98        fp.file.seek(fp.pos)
99
100    def __exit__(self, type, value, traceback):
101        """
102        Close file and record pos.
103        """
104        # The file should still be open.
105        fp = self.local()
106        assert fp.file
107
108        # Close the file and record its position.
109        fp.pos = fp.file.tell()
110        fp.file.close()
111        fp.file = None
112
113
114class ArchiveReadError(RuntimeError):
115    pass
116
117
118class ArchiveReader(object):
119    """
120    A base class for a repository of python code objects.
121    The extract method is used by imputil.ArchiveImporter
122    to get code objects by name (fully qualified name), so
123    an enduser "import a.b" would become
124      extract('a.__init__')
125      extract('a.b')
126    """
127    MAGIC = b'PYL\0'
128    HDRLEN = 12  # default is MAGIC followed by python's magic, int pos of toc
129    TOCPOS = 8
130    os = None
131    _bincache = None
132
133    def __init__(self, path=None, start=0):
134        """
135        Initialize an Archive. If path is omitted, it will be an empty Archive.
136        """
137        self.toc = None
138        self.path = path
139        self.start = start
140
141        # In Python 3 module 'imp' is no longer built-in and we cannot use it.
142        # There is for Python 3 another way how to obtain magic value.
143        if sys.version_info[0] == 2:
144            import imp
145            self.pymagic = imp.get_magic()
146        else:
147            # We cannot use at this bootstrap stage importlib directly
148            # but its frozen variant.
149            import _frozen_importlib
150            if sys.version_info[1] == 4:
151                # Python 3.4
152                self.pymagic = _frozen_importlib.MAGIC_NUMBER
153            else:
154                # Python 3.5+
155                self.pymagic = _frozen_importlib._bootstrap_external.MAGIC_NUMBER
156
157        if path is not None:
158            self.lib = ArchiveFile(self.path, 'rb')
159            with self.lib:
160                self.checkmagic()
161                self.loadtoc()
162
163
164    def loadtoc(self):
165        """
166        Overridable.
167        Default: After magic comes an int (4 byte native) giving the
168        position of the TOC within self.lib.
169        Default: The TOC is a marshal-able string.
170        """
171        self.lib.seek(self.start + self.TOCPOS)
172        (offset,) = struct.unpack('!i', self.lib.read(4))
173        self.lib.seek(self.start + offset)
174        # Use marshal.loads() since load() arg must be a file object
175        # Convert the read list into a dict for faster access
176        self.toc = dict(marshal.loads(self.lib.read()))
177
178    ######## This is what is called by FuncImporter #######
179    ## Since an Archive is flat, we ignore parent and modname.
180    #XXX obsolete - imputil only code
181    ##  def get_code(self, parent, modname, fqname):
182    ##      pass
183
184    def is_package(self, name):
185        ispkg, pos = self.toc.get(name, (0, None))
186        if pos is None:
187            return None
188        return bool(ispkg)
189
190    ####### Core method - Override as needed  #########
191    def extract(self, name):
192        """
193        Get the object corresponding to name, or None.
194        For use with imputil ArchiveImporter, object is a python code object.
195        'name' is the name as specified in an 'import name'.
196        'import a.b' will become:
197        extract('a') (return None because 'a' is not a code object)
198        extract('a.__init__') (return a code object)
199        extract('a.b') (return a code object)
200        Default implementation:
201          self.toc is a dict
202          self.toc[name] is pos
203          self.lib has the code object marshal-ed at pos
204        """
205        ispkg, pos = self.toc.get(name, (0, None))
206        if pos is None:
207            return None
208        with self.lib:
209            self.lib.seek(self.start + pos)
210            # use marshal.loads() sind load() arg must be a file object
211            obj = marshal.loads(self.lib.read())
212        return ispkg, obj
213
214    ########################################################################
215    # Informational methods
216
217    def contents(self):
218        """
219        Return a list of the contents
220        Default implementation assumes self.toc is a dict like object.
221        Not required by ArchiveImporter.
222        """
223        return list(self.toc.keys())
224
225    def checkmagic(self):
226        """
227        Overridable.
228        Check to see if the file object self.lib actually has a file
229        we understand.
230        """
231        self.lib.seek(self.start)  # default - magic is at start of file
232
233        if self.lib.read(len(self.MAGIC)) != self.MAGIC:
234            raise ArchiveReadError("%s is not a valid %s archive file"
235                                   % (self.path, self.__class__.__name__))
236
237        if self.lib.read(len(self.pymagic)) != self.pymagic:
238            raise ArchiveReadError("%s has version mismatch to dll" %
239                (self.path))
240
241        self.lib.read(4)
242
243
244class Cipher(object):
245    """
246    This class is used only to decrypt Python modules.
247    """
248    def __init__(self):
249        # At build-type the key is given to us from inside the spec file, at
250        # bootstrap-time, we must look for it ourselves by trying to import
251        # the generated 'pyi_crypto_key' module.
252        import pyimod00_crypto_key
253        key = pyimod00_crypto_key.key
254
255        assert type(key) is str
256        if len(key) > CRYPT_BLOCK_SIZE:
257            self.key = key[0:CRYPT_BLOCK_SIZE]
258        else:
259            self.key = key.zfill(CRYPT_BLOCK_SIZE)
260        assert len(self.key) == CRYPT_BLOCK_SIZE
261
262        # Import the right AES module.
263        self._aes = self._import_aesmod()
264
265    def _import_aesmod(self):
266        """
267        Tries to import the AES module from PyCrypto.
268
269        PyCrypto 2.4 and 2.6 uses different name of the AES extension.
270        """
271        # The _AES.so module exists only in PyCrypto 2.6 and later. Try to import
272        # that first.
273        modname = 'Crypto.Cipher._AES'
274
275        if sys.version_info[0] == 2:
276            # Not-so-easy way: at bootstrap time we have to load the module from the
277            # temporary directory in a manner similar to pyi_importers.CExtensionImporter.
278            from pyimod03_importers import CExtensionImporter
279            importer = CExtensionImporter()
280            # NOTE: We _must_ call find_module first.
281            mod = importer.find_module(modname)
282            # Fallback to AES.so, which should be there in PyCrypto 2.4 and earlier.
283            if not mod:
284                modname = 'Crypto.Cipher.AES'
285                mod = importer.find_module(modname)
286                if not mod:
287                    # Raise import error if none of the AES modules is found.
288                    raise ImportError(modname)
289            mod = mod.load_module(modname)
290        else:
291            kwargs = dict(fromlist=['Crypto', 'Cipher'])
292            try:
293                mod = __import__(modname, **kwargs)
294            except ImportError:
295                modname = 'Crypto.Cipher.AES'
296                mod = __import__(modname, **kwargs)
297
298        # Issue #1663: Remove the AES module from sys.modules list. Otherwise
299        # it interferes with using 'Crypto.Cipher' module in users' code.
300        if modname in sys.modules:
301            del sys.modules[modname]
302        return mod
303
304    def __create_cipher(self, iv):
305        # The 'BlockAlgo' class is stateful, this factory method is used to
306        # re-initialize the block cipher class with each call to encrypt() and
307        # decrypt().
308        return self._aes.new(self.key, self._aes.MODE_CFB, iv)
309
310    def decrypt(self, data):
311        return self.__create_cipher(data[:CRYPT_BLOCK_SIZE]).decrypt(data[CRYPT_BLOCK_SIZE:])
312
313
314class ZlibArchiveReader(ArchiveReader):
315    """
316    ZlibArchive - an archive with compressed entries. Archive is read
317    from the executable created by PyInstaller.
318
319    This archive is used for bundling python modules inside the executable.
320
321    NOTE: The whole ZlibArchive (PYZ) is compressed so it is not necessary
322          to compress single modules with zlib.
323    """
324    MAGIC = b'PYZ\0'
325    TOCPOS = 8
326    HDRLEN = ArchiveReader.HDRLEN + 5
327
328    def __init__(self, path=None, offset=None):
329        if path is None:
330            offset = 0
331        elif offset is None:
332            for i in range(len(path) - 1, - 1, - 1):
333                if path[i] == '?':
334                    try:
335                        offset = int(path[i + 1:])
336                    except ValueError:
337                        # Just ignore any spurious "?" in the path
338                        # (like in Windows UNC \\?\<path>).
339                        continue
340                    path = path[:i]
341                    break
342            else:
343                offset = 0
344
345        super(ZlibArchiveReader, self).__init__(path, offset)
346
347        # Try to import the key module. If the key module is not available
348        # then it means that encryption is disabled.
349        try:
350            import pyimod00_crypto_key
351            self.cipher = Cipher()
352        except ImportError:
353            self.cipher = None
354
355    def is_package(self, name):
356        (typ, pos, length) = self.toc.get(name, (0, None, 0))
357        if pos is None:
358            return None
359        return typ == PYZ_TYPE_PKG
360
361    def extract(self, name):
362        (typ, pos, length) = self.toc.get(name, (0, None, 0))
363        if pos is None:
364            return None
365        with self.lib:
366            self.lib.seek(self.start + pos)
367            obj = self.lib.read(length)
368        try:
369            if self.cipher:
370                obj = self.cipher.decrypt(obj)
371            obj = zlib.decompress(obj)
372            if typ in (PYZ_TYPE_MODULE, PYZ_TYPE_PKG):
373                obj = marshal.loads(obj)
374        except EOFError:
375            raise ImportError("PYZ entry '%s' failed to unmarshal" % name)
376        return typ, obj
377