1#     Copyright 2021, Kay Hayen, mailto:kay.hayen@gmail.com
2#
3#     Part of "Nuitka", an optimizing Python compiler that is compatible and
4#     integrates with CPython, but also works on its own.
5#
6#     Licensed under the Apache License, Version 2.0 (the "License");
7#     you may not use this file except in compliance with the License.
8#     You may obtain a copy of the License at
9#
10#        http://www.apache.org/licenses/LICENSE-2.0
11#
12#     Unless required by applicable law or agreed to in writing, software
13#     distributed under the License is distributed on an "AS IS" BASIS,
14#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15#     See the License for the specific language governing permissions and
16#     limitations under the License.
17#
18""" Nodes related to importing modules or names.
19
20Normally imports are mostly relatively static, but Nuitka also attempts to
21cover the uses of "__import__" built-in and other import techniques, that
22allow dynamic values.
23
24If other optimizations make it possible to predict these, the compiler can go
25deeper that what it normally could. The import expression node can recurse.
26"""
27
28import os
29import pkgutil
30import sys
31
32from nuitka.__past__ import (  # pylint: disable=I0021,redefined-builtin
33    long,
34    unicode,
35)
36from nuitka.codegen.Reports import onMissingTrust
37from nuitka.importing.IgnoreListing import getModuleIgnoreList
38from nuitka.importing.Importing import (
39    findModule,
40    getModuleNameAndKindFromFilename,
41)
42from nuitka.importing.Recursion import decideRecursion, recurseTo
43from nuitka.importing.StandardLibrary import isStandardLibraryPath
44from nuitka.ModuleRegistry import getUncompiledModule
45from nuitka.Options import isStandaloneMode, shallWarnUnusualCode
46from nuitka.PythonOperators import python_version
47from nuitka.Tracing import inclusion_logger, unusual_logger
48from nuitka.utils.FileOperations import relpath
49from nuitka.utils.ModuleNames import ModuleName
50
51from .ConstantRefNodes import makeConstantRefNode
52from .ExpressionBases import (
53    ExpressionBase,
54    ExpressionChildHavingBase,
55    ExpressionChildrenHavingBase,
56)
57from .LocalsScopes import GlobalsDictHandle
58from .NodeBases import StatementChildHavingBase
59from .NodeMakingHelpers import makeRaiseExceptionReplacementExpression
60from .shapes.BuiltinTypeShapes import tshape_module, tshape_module_builtin
61
62# These module are supported in code generation to be imported the hard way.
63hard_modules = frozenset(
64    (
65        "os",
66        "sys",
67        "types",
68        "__future__",
69        "site",
70        "importlib",
71        "_frozen_importlib",
72        "_frozen_importlib_external",
73        "pkgutil",
74        "functools",
75    )
76)
77
78trust_undefined = 0
79trust_constant = 1
80trust_exist = 2
81trust_future = trust_exist
82trust_importable = 3
83
84hard_modules_trust = {
85    "os": {},
86    "sys": {"version": trust_constant, "stdout": trust_exist, "stderr": trust_exist},
87    "types": {},
88    "__future__": {},
89    "site": {},
90    "importlib": {},
91    "_frozen_importlib": {},
92    "_frozen_importlib_external": {},
93    "pkgutil": {"get_data": trust_exist},
94    "functools": {"partial": trust_exist},
95}
96
97hard_modules_trust["__future__"] = {
98    "unicode_literals": trust_future,
99    "absolute_import": trust_future,
100    "division": trust_future,
101    "print_function": trust_future,
102    "generator_stop": trust_future,
103    "nested_scopes": trust_future,
104    "generators": trust_future,
105    "with_statement": trust_future,
106}
107
108if python_version >= 0x270:
109    import importlib
110
111    for module_info in pkgutil.walk_packages(importlib.__path__):
112        hard_modules_trust["importlib"][module_info[1]] = trust_importable
113
114import __future__
115
116if hasattr(__future__, "barry_as_FLUFL"):
117    hard_modules_trust["__future__"]["barry_as_FLUFL"] = trust_future
118if hasattr(__future__, "annotations"):
119    hard_modules_trust["__future__"]["annotations"] = trust_future
120
121
122def isHardModuleWithoutSideEffect(module_name):
123    return module_name in hard_modules and module_name != "site"
124
125
126def makeExpressionAbsoluteImportNode(module_name, source_ref):
127    return ExpressionBuiltinImport(
128        name=makeConstantRefNode(module_name, source_ref, True),
129        globals_arg=None,
130        locals_arg=None,
131        fromlist=None,
132        level=makeConstantRefNode(0, source_ref, True),
133        source_ref=source_ref,
134    )
135
136
137class ExpressionImportModuleHard(ExpressionBase):
138    """Hard coded import names, e.g. of "__future__"
139
140    These are directly created for some Python mechanics, but also due to
141    compile time optimization for imports of statically known modules.
142    """
143
144    kind = "EXPRESSION_IMPORT_MODULE_HARD"
145
146    __slots__ = ("module_name", "module")
147
148    def __init__(self, module_name, source_ref):
149        ExpressionBase.__init__(self, source_ref=source_ref)
150
151        self.module_name = module_name
152
153        if isHardModuleWithoutSideEffect(self.module_name):
154            self.module = __import__(module_name)
155        else:
156            self.module = None
157
158    def finalize(self):
159        del self.parent
160
161    def getDetails(self):
162        return {"module_name": self.module_name}
163
164    def getModuleName(self):
165        return self.module_name
166
167    def mayHaveSideEffects(self):
168        return self.module is None
169
170    def mayRaiseException(self, exception_type):
171        return self.mayHaveSideEffects()
172
173    def getTypeShape(self):
174        if self.module_name in sys.builtin_module_names:
175            return tshape_module_builtin
176        else:
177            return tshape_module
178
179    def computeExpressionRaw(self, trace_collection):
180        if self.mayRaiseException(BaseException):
181            trace_collection.onExceptionRaiseExit(BaseException)
182
183        return self, None, None
184
185    def computeExpressionImportName(self, import_node, import_name, trace_collection):
186        return self.computeExpressionAttribute(
187            import_node, import_name, trace_collection
188        )
189
190    @staticmethod
191    def _getImportNameErrorString(module, module_name, name):
192        if python_version < 0x340:
193            return "cannot import name %s" % name
194        if python_version < 0x370:
195            return "cannot import name %r" % name
196        elif isStandaloneMode():
197            return "cannot import name %r from %r" % (name, module_name)
198        else:
199            return "cannot import name %r from %r (%s)" % (
200                name,
201                module_name,
202                module.__file__ if hasattr(module, "__file__") else "unknown location",
203            )
204
205    def computeExpressionAttribute(self, lookup_node, attribute_name, trace_collection):
206        # By default, an attribute lookup may change everything about the lookup
207        # source.
208
209        if self.module is not None:
210            trust = hard_modules_trust[self.module_name].get(
211                attribute_name, trust_undefined
212            )
213
214            if trust is trust_importable:
215                # TODO: Change this is a hard module import itself, currently these are not all trusted
216                # themselves yet. We do not have to indicate exception, but it makes no sense to annotate
217                # that here at this point.
218                trace_collection.onExceptionRaiseExit(BaseException)
219            elif not hasattr(self.module, attribute_name) and hard_modules_trust[
220                self.module_name
221            ].get(attribute_name, attribute_name):
222                new_node = makeRaiseExceptionReplacementExpression(
223                    expression=lookup_node,
224                    exception_type="ImportError",
225                    exception_value=self._getImportNameErrorString(
226                        self.module, self.module_name, attribute_name
227                    ),
228                )
229
230                trace_collection.onExceptionRaiseExit(ImportError)
231
232                return (
233                    new_node,
234                    "new_raise",
235                    "Hard module %r attribute missing %r pre-computed."
236                    % (self.module_name, attribute_name),
237                )
238            else:
239                if trust is trust_undefined:
240                    onMissingTrust(
241                        "Hard module %r attribute %r missing trust config for existing value.",
242                        lookup_node.getSourceReference(),
243                        self.module_name,
244                        attribute_name,
245                    )
246
247                    trace_collection.onExceptionRaiseExit(ImportError)
248                elif trust is trust_constant:
249                    # Make sure it's actually there, and not becoming the getattr default by accident.
250                    assert hasattr(self.module, self.import_name), self
251
252                    return (
253                        makeConstantRefNode(
254                            constant=getattr(self.module, self.import_name),
255                            source_ref=lookup_node.getSourceReference(),
256                            user_provided=True,
257                        ),
258                        "new_constant",
259                        "Hard module %r imported %r pre-computed to constant value."
260                        % (self.module_name, self.import_name),
261                    )
262                else:
263                    result = ExpressionImportModuleNameHard(
264                        module_name=self.module_name,
265                        import_name=attribute_name,
266                        source_ref=lookup_node.getSourceReference(),
267                    )
268
269                    return (
270                        result,
271                        "new_expression",
272                        "Attribute lookup %r of hard module %r becomes hard module name import."
273                        % (self.module_name, attribute_name),
274                    )
275
276        else:
277            # Nothing can be known, but lets not do control flow escape, that is just
278            # too unlikely.
279            trace_collection.onExceptionRaiseExit(BaseException)
280
281        return lookup_node, None, None
282
283
284class ExpressionImportModuleNameHard(ExpressionBase):
285    """Hard coded import names, e.g. of "os.path.dirname"
286
287    These are directly created for some Python mechanics.
288    """
289
290    kind = "EXPRESSION_IMPORT_MODULE_NAME_HARD"
291
292    __slots__ = "module_name", "import_name", "trust"
293
294    def __init__(self, module_name, import_name, source_ref):
295        ExpressionBase.__init__(self, source_ref=source_ref)
296
297        self.module_name = module_name
298        self.import_name = import_name
299
300        self.trust = hard_modules_trust[self.module_name].get(self.import_name)
301
302    def finalize(self):
303        del self.parent
304
305    def getDetails(self):
306        return {"module_name": self.module_name, "import_name": self.import_name}
307
308    def getModuleName(self):
309        return self.module_name
310
311    def getImportName(self):
312        return self.import_name
313
314    def computeExpressionRaw(self, trace_collection):
315        # As good as it gets, will exist, otherwise we do not get created.
316
317        if self.mayHaveSideEffects():
318            trace_collection.onExceptionRaiseExit(AttributeError)
319
320        return self, None, None
321
322    def mayHaveSideEffects(self):
323        return self.trust is None
324
325    def mayRaiseException(self, exception_type):
326        return self.trust is None
327
328
329class ExpressionBuiltinImport(ExpressionChildrenHavingBase):
330    # Very detail rich node, pylint: disable=too-many-instance-attributes
331
332    __slots__ = (
333        "recurse_attempted",
334        "imported_module_desc",
335        "import_list_modules_desc",
336        "package_modules_desc",
337        "finding",
338        "type_shape",
339        "builtin_module",
340        "module_filename",
341    )
342
343    kind = "EXPRESSION_BUILTIN_IMPORT"
344
345    named_children = ("name", "globals_arg", "locals_arg", "fromlist", "level")
346
347    _warned_about = set()
348
349    def __init__(self, name, globals_arg, locals_arg, fromlist, level, source_ref):
350        ExpressionChildrenHavingBase.__init__(
351            self,
352            values={
353                "name": name,
354                "globals_arg": globals_arg,
355                "locals_arg": locals_arg,
356                "fromlist": fromlist,
357                "level": level,
358            },
359            source_ref=source_ref,
360        )
361
362        self.recurse_attempted = False
363
364        # The module actually referenced in that import.
365        self.imported_module_desc = None
366
367        # The fromlist imported modules if any.
368        self.import_list_modules_desc = []
369
370        # For "package.sub_package.module" we also need to import the package,
371        # because the imported_module not be found, as it's not a module, e.g.
372        # in the case of "os.path" or "six.moves".
373        self.package_modules_desc = None
374
375        self.finding = None
376
377        self.type_shape = tshape_module
378
379        self.builtin_module = None
380
381        # If found, filename of the imported module.
382        self.module_filename = None
383
384    def _consider(self, trace_collection, module_filename, module_package):
385        assert module_package is None or (
386            type(module_package) is ModuleName and module_package != ""
387        ), repr(module_package)
388
389        module_filename = os.path.normpath(module_filename)
390
391        module_name, module_kind = getModuleNameAndKindFromFilename(module_filename)
392
393        if module_kind is not None:
394            module_fullpath = ModuleName.makeModuleNameInPackage(
395                module_name, module_package
396            )
397
398            decision, reason = decideRecursion(
399                module_filename=module_filename,
400                module_name=module_fullpath,
401                module_kind=module_kind,
402            )
403
404            if decision:
405                module_relpath = relpath(module_filename)
406
407                imported_module = recurseTo(
408                    signal_change=trace_collection.signalChange,
409                    module_package=module_package,
410                    module_filename=module_filename,
411                    module_relpath=module_relpath,
412                    module_kind=module_kind,
413                    reason=reason,
414                )
415
416                return imported_module
417            elif decision is False and module_kind == "py":
418                uncompiled_module = getUncompiledModule(
419                    module_fullpath, module_filename
420                )
421
422                if uncompiled_module is not None:
423                    return uncompiled_module
424            elif decision is None and module_kind == "py":
425                if (
426                    module_filename not in self._warned_about
427                    and module_fullpath not in getModuleIgnoreList()
428                ):
429                    self._warned_about.add(module_filename)
430
431                    inclusion_logger.warning(
432                        """\
433Not following import to '%(full_path)s' (%(filename)s), please specify \
434--nofollow-imports (do not follow any imports), \
435--follow-imports (follow all generally), \
436--nofollow-import-to=%(full_path)s (do not follow it), \
437--follow-import-to=%(full_path)s (follow import it) to change."""
438                        % {"full_path": module_fullpath, "filename": module_filename}
439                    )
440
441    def _attemptRecursion(self, trace_collection, module_name):
442        # Complex stuff, pylint: disable=too-many-branches
443
444        parent_module = self.getParentModule()
445
446        parent_package = parent_module.getFullName()
447        if not parent_module.isCompiledPythonPackage():
448            parent_package = parent_package.getPackageName()
449
450        level = self.subnode_level
451
452        if level is None:
453            level = 0 if parent_module.getFutureSpec().isAbsoluteImport() else -1
454        elif not level.isCompileTimeConstant():
455            return
456        else:
457            level = level.getCompileTimeConstant()
458
459        # TODO: Catch this as a static error maybe.
460        if type(level) not in (int, long):
461            return
462
463        module_package, self.module_filename, self.finding = findModule(
464            importing=self,
465            module_name=ModuleName(module_name),
466            parent_package=parent_package,
467            level=level,
468            warn=True,
469        )
470
471        if self.module_filename is not None:
472            imported_module = self._consider(
473                trace_collection=trace_collection,
474                module_filename=self.module_filename,
475                module_package=module_package,
476            )
477
478            if imported_module is not None:
479                self.imported_module_desc = (
480                    imported_module.getFullName(),
481                    imported_module.getFilename(),
482                )
483
484                import_list = self.subnode_fromlist
485
486                if import_list is not None:
487                    if import_list.isCompileTimeConstant():
488                        import_list = import_list.getCompileTimeConstant()
489
490                    if type(import_list) not in (tuple, list):
491                        import_list = None
492
493                if import_list and imported_module.isCompiledPythonPackage():
494                    for import_item in import_list:
495                        if import_item == "*":
496                            continue
497
498                        module_package, module_filename, _finding = findModule(
499                            importing=self,
500                            module_name=ModuleName(import_item),
501                            parent_package=imported_module.getFullName(),
502                            level=-1,  # Relative import, so child is used.
503                            warn=False,
504                        )
505
506                        if module_filename is not None:
507                            sub_imported_module = self._consider(
508                                trace_collection=trace_collection,
509                                module_filename=module_filename,
510                                module_package=module_package,
511                            )
512
513                            if sub_imported_module is not None:
514                                self.import_list_modules_desc.append(
515                                    (
516                                        sub_imported_module.getFullName(),
517                                        sub_imported_module.getFilename(),
518                                    )
519                                )
520        else:
521            module_name = ModuleName(module_name)
522
523            while True:
524                module_name = module_name.getPackageName()
525
526                if module_name is None:
527                    break
528
529                module_package, module_filename, _finding = findModule(
530                    importing=self,
531                    module_name=module_name,
532                    parent_package=parent_package,
533                    level=level,
534                    warn=True,
535                )
536
537                if module_filename is not None:
538                    package_module = self._consider(
539                        trace_collection=trace_collection,
540                        module_filename=module_filename,
541                        module_package=module_package,
542                    )
543
544                    if package_module is not None:
545                        if self.package_modules_desc is None:
546                            self.package_modules_desc = []
547
548                        self.package_modules_desc.append(
549                            (package_module.getFullName(), package_module.getFilename())
550                        )
551
552    def _addUsedModules(self, trace_collection):
553        if self.finding != "not-found":
554            if self.imported_module_desc is not None:
555                trace_collection.onUsedModule(
556                    module_name=self.imported_module_desc[0],
557                    module_relpath=self.imported_module_desc[1],
558                )
559
560            for import_list_module_desc in self.import_list_modules_desc:
561                trace_collection.onUsedModule(
562                    import_list_module_desc[0], import_list_module_desc[1]
563                )
564
565        # These are added in any case.
566        if self.package_modules_desc is not None:
567            for package_module_desc in self.package_modules_desc:
568                trace_collection.onUsedModule(
569                    package_module_desc[0], package_module_desc[1]
570                )
571
572    def computeExpression(self, trace_collection):
573        # Many cases to deal with, pylint: disable=too-many-branches
574
575        # TODO: In fact, if the module is not a package, we don't have to insist
576        # on the "fromlist" that much, but normally it's not used for anything
577        # but packages, so it will be rare.
578        self._addUsedModules(trace_collection)
579
580        # Attempt to recurse if not already done.
581        if self.recurse_attempted:
582            if self.finding == "not-found":
583                # Importing and not finding, may raise an exception obviously.
584                trace_collection.onExceptionRaiseExit(BaseException)
585            else:
586                # If we know it exists, only RuntimeError shall occur.
587                trace_collection.onExceptionRaiseExit(RuntimeError)
588
589            # We stay here.
590            return self, None, None
591
592        module_name = self.subnode_name
593
594        if module_name.isCompileTimeConstant():
595            imported_module_name = module_name.getCompileTimeConstant()
596
597            if type(imported_module_name) in (str, unicode):
598                # TODO: This is not handling decoding errors all that well.
599                if str is not unicode and type(imported_module_name) is unicode:
600                    imported_module_name = str(imported_module_name)
601
602                self._attemptRecursion(
603                    trace_collection=trace_collection, module_name=imported_module_name
604                )
605
606                self.recurse_attempted = True
607
608                if self.finding == "absolute" and imported_module_name in hard_modules:
609                    if isStandardLibraryPath(self.module_filename):
610                        result = ExpressionImportModuleHard(
611                            module_name=imported_module_name, source_ref=self.source_ref
612                        )
613
614                        return (
615                            result,
616                            "new_expression",
617                            "Lowered import of standard library module %r to hard import."
618                            % imported_module_name,
619                        )
620                    elif shallWarnUnusualCode():
621                        unusual_logger.warning(
622                            "%s Standard library module %r used from outside path %r."
623                            % (
624                                self.source_ref.getAsString(),
625                                imported_module_name,
626                                self.module_filename,
627                            )
628                        )
629
630                if self.finding == "built-in":
631                    if imported_module_name in hard_modules:
632                        result = ExpressionImportModuleHard(
633                            module_name=imported_module_name, source_ref=self.source_ref
634                        )
635
636                        return (
637                            result,
638                            "new_expression",
639                            "Lowered import of built-in module %r to hard import."
640                            % imported_module_name,
641                        )
642
643                    self.type_shape = tshape_module_builtin
644                    self.builtin_module = __import__(imported_module_name)
645
646                self._addUsedModules(trace_collection)
647            else:
648                # TODO: This doesn't preserve side effects.
649
650                # Non-strings is going to raise an error.
651                (
652                    new_node,
653                    change_tags,
654                    message,
655                ) = trace_collection.getCompileTimeComputationResult(
656                    node=self,
657                    computation=lambda: __import__(
658                        module_name.getCompileTimeConstant()
659                    ),
660                    description="Replaced '__import__' call with non-string module name argument.",
661                )
662
663                # Must fail, must not go on when it doesn't.
664                assert change_tags == "new_raise", module_name
665
666                return new_node, change_tags, message
667
668        # Importing may raise an exception obviously, unless we know it will
669        # not.
670        if self.finding != "built-in":
671            trace_collection.onExceptionRaiseExit(BaseException)
672
673        # TODO: May return a module or module variable reference of some sort in
674        # the future with embedded modules.
675        return self, None, None
676
677    # TODO: Add computeExpressionImportName
678
679    def mayRaiseException(self, exception_type):
680        return self.finding != "built-in"
681
682    def mayRaiseExceptionImportName(self, exception_type, import_name):
683        if self.finding == "built-in":
684            return not hasattr(self.builtin_module, import_name)
685        else:
686            return True
687
688    def getTypeShape(self):
689        return self.type_shape
690
691
692class StatementImportStar(StatementChildHavingBase):
693    kind = "STATEMENT_IMPORT_STAR"
694
695    named_child = "module"
696
697    __slots__ = ("target_scope",)
698
699    def __init__(self, target_scope, module_import, source_ref):
700        StatementChildHavingBase.__init__(
701            self, value=module_import, source_ref=source_ref
702        )
703
704        self.target_scope = target_scope
705
706        # TODO: Abstract these things.
707        if type(self.target_scope) is GlobalsDictHandle:
708            self.target_scope.markAsEscaped()
709
710    def getTargetDictScope(self):
711        return self.target_scope
712
713    def computeStatement(self, trace_collection):
714        trace_collection.onExpression(self.subnode_module)
715
716        trace_collection.onLocalsDictEscaped(self.target_scope)
717
718        # Need to invalidate everything, and everything could be assigned to
719        # something else now.
720        trace_collection.removeAllKnowledge()
721
722        # We could always encounter that __all__ is a strange beast and causes
723        # the exception.
724        trace_collection.onExceptionRaiseExit(BaseException)
725
726        return self, None, None
727
728    @staticmethod
729    def mayRaiseException(exception_type):
730        # Not done. TODO: Later we can try and check for "__all__" if it
731        # really can be that way.
732        return True
733
734    @staticmethod
735    def getStatementNiceName():
736        return "star import statement"
737
738
739class ExpressionImportName(ExpressionChildHavingBase):
740    kind = "EXPRESSION_IMPORT_NAME"
741
742    named_child = "module"
743
744    __slots__ = ("import_name", "level")
745
746    def __init__(self, module, import_name, level, source_ref):
747        ExpressionChildHavingBase.__init__(self, value=module, source_ref=source_ref)
748
749        self.import_name = import_name
750        self.level = level
751
752        # Not allowed.
753        assert level is not None
754
755        assert module is not None
756
757    def getImportName(self):
758        return self.import_name
759
760    def getImportLevel(self):
761        return self.level
762
763    def getDetails(self):
764        return {"import_name": self.import_name, "level": self.level}
765
766    def computeExpression(self, trace_collection):
767        return self.subnode_module.computeExpressionImportName(
768            import_node=self,
769            import_name=self.import_name,
770            trace_collection=trace_collection,
771        )
772
773    def mayRaiseException(self, exception_type):
774        return self.subnode_module.mayRaiseExceptionImportName(
775            exception_type=exception_type, import_name=self.import_name
776        )
777