1#     Copyright 2021, Kay Hayen, mailto:kay.hayen@gmail.com
2#
3#     Part of "Nuitka", an optimizing Python compiler that is compatible and
4#     integrates with CPython, but also works on its own.
5#
6#     Licensed under the Apache License, Version 2.0 (the "License");
7#     you may not use this file except in compliance with the License.
8#     You may obtain a copy of the License at
9#
10#        http://www.apache.org/licenses/LICENSE-2.0
11#
12#     Unless required by applicable law or agreed to in writing, software
13#     distributed under the License is distributed on an "AS IS" BASIS,
14#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15#     See the License for the specific language governing permissions and
16#     limitations under the License.
17#
18""" Read source code from files.
19
20This is tremendously more complex than one might think, due to encoding issues
21and version differences of Python versions.
22"""
23
24import os
25import re
26import sys
27
28from nuitka import Options, SourceCodeReferences
29from nuitka.__past__ import unicode  # pylint: disable=I0021,redefined-builtin
30from nuitka.plugins.Plugins import Plugins
31from nuitka.PythonVersions import python_version, python_version_str
32from nuitka.Tracing import general
33from nuitka.utils.FileOperations import putTextFileContents
34from nuitka.utils.Shebang import getShebangFromSource, parseShebang
35from nuitka.utils.Utils import getOS
36
37from .SyntaxErrors import raiseSyntaxError
38
39_fstrings_installed = False
40
41
42def _installFutureFStrings():
43    """Install fake UTF8 handle just as future-fstrings does.
44
45    This unbreaks at least
46    """
47
48    # Singleton, pylint: disable=global-statement
49    global _fstrings_installed
50
51    if _fstrings_installed:
52        return
53
54    # TODO: Not supporting anything before that.
55    if python_version >= 0x360:
56        import codecs
57
58        # Play trick for of "future_strings" PyPI package support. It's not needed,
59        # but some people use it even on newer Python.
60        try:
61            codecs.lookup("future-fstrings")
62        except LookupError:
63            import encodings
64
65            utf8 = encodings.search_function("utf8")
66            codec_map = {"future-fstrings": utf8, "future_fstrings": utf8}
67            codecs.register(codec_map.get)
68    else:
69        try:
70            import future_fstrings
71        except ImportError:
72            pass
73        else:
74            future_fstrings.register()
75
76    _fstrings_installed = True
77
78
79def _readSourceCodeFromFilename3(source_filename):
80    # Only using this for Python3, for Python2 it's too buggy.
81    import tokenize
82
83    _installFutureFStrings()
84
85    with tokenize.open(source_filename) as source_file:
86        return source_file.read()
87
88
89def _detectEncoding2(source_file):
90    # Detect the encoding.
91    encoding = "ascii"
92
93    line1 = source_file.readline()
94
95    if line1.startswith(b"\xef\xbb\xbf"):
96        # BOM marker makes it clear.
97        encoding = "utf-8"
98    else:
99        line1_match = re.search(b"coding[:=]\\s*([-\\w.]+)", line1)
100
101        if line1_match:
102            encoding = line1_match.group(1)
103        else:
104            line2 = source_file.readline()
105
106            line2_match = re.search(b"coding[:=]\\s*([-\\w.]+)", line2)
107
108            if line2_match:
109                encoding = line2_match.group(1)
110
111    source_file.seek(0)
112
113    return encoding
114
115
116def _readSourceCodeFromFilename2(source_filename):
117    _installFutureFStrings()
118
119    # Detect the encoding.
120    with open(source_filename, "rU") as source_file:
121        encoding = _detectEncoding2(source_file)
122
123        source_code = source_file.read()
124
125        # Try and detect SyntaxError from missing or wrong encodings.
126        if type(source_code) is not unicode and encoding == "ascii":
127            try:
128                _source_code = source_code.decode(encoding)
129            except UnicodeDecodeError as e:
130                lines = source_code.split("\n")
131                so_far = 0
132
133                for count, line in enumerate(lines):
134                    so_far += len(line) + 1
135
136                    if so_far > e.args[2]:
137                        break
138                else:
139                    # Cannot happen, decode error implies non-empty.
140                    count = -1
141
142                wrong_byte = re.search(
143                    "byte 0x([a-f0-9]{2}) in position", str(e)
144                ).group(1)
145
146                raiseSyntaxError(
147                    """\
148Non-ASCII character '\\x%s' in file %s on line %d, but no encoding declared; \
149see http://python.org/dev/peps/pep-0263/ for details"""
150                    % (wrong_byte, source_filename, count + 1),
151                    SourceCodeReferences.fromFilename(source_filename).atLineNumber(
152                        count + 1
153                    ),
154                    display_line=False,
155                )
156
157    return source_code
158
159
160def readSourceCodeFromFilename(module_name, source_filename):
161    if python_version < 0x300:
162        source_code = _readSourceCodeFromFilename2(source_filename)
163    else:
164        source_code = _readSourceCodeFromFilename3(source_filename)
165
166    # Allow plug-ins to mess with source code. Test code calls this
167    # without a module and doesn't want changes from plugins.
168    if module_name is not None:
169        source_code_modified = Plugins.onModuleSourceCode(module_name, source_code)
170    else:
171        source_code_modified = source_code
172
173    if Options.shallPersistModifications() and source_code_modified != source_code:
174        orig_source_filename = source_filename + ".orig"
175
176        if not os.path.exists(orig_source_filename):
177            putTextFileContents(filename=orig_source_filename, contents=source_code)
178
179        putTextFileContents(filename=source_filename, contents=source_code_modified)
180
181    return source_code_modified
182
183
184def checkPythonVersionFromCode(source_code):
185    # There is a lot of cases to consider, pylint: disable=too-many-branches
186
187    shebang = getShebangFromSource(source_code)
188
189    if shebang is not None:
190        binary, _args = parseShebang(shebang)
191
192        if getOS() != "Windows":
193            try:
194                if os.path.samefile(sys.executable, binary):
195                    return True
196            except OSError:  # Might not exist
197                pass
198
199        basename = os.path.basename(binary)
200
201        # Not sure if we should do that.
202        if basename == "python":
203            result = python_version < 0x300
204        elif basename == "python3":
205            result = python_version >= 0x300
206        elif basename == "python2":
207            result = python_version < 0x300
208        elif basename == "python2.7":
209            result = python_version < 0x300
210        elif basename == "python2.6":
211            result = python_version < 0x270
212        elif basename == "python3.2":
213            result = 0x330 > python_version >= 0x300
214        elif basename == "python3.3":
215            result = 0x340 > python_version >= 0x330
216        elif basename == "python3.4":
217            result = 0x350 > python_version >= 0x340
218        elif basename == "python3.5":
219            result = 0x360 > python_version >= 0x350
220        elif basename == "python3.6":
221            result = 0x370 > python_version >= 0x360
222        elif basename == "python3.7":
223            result = 0x380 > python_version >= 0x370
224        elif basename == "python3.8":
225            result = 0x390 > python_version >= 0x380
226        elif basename == "python3.9":
227            result = 0x3A0 > python_version >= 0x390
228        elif basename == "python3.10":
229            result = 0x3B0 > python_version >= 0x3A0
230        else:
231            result = None
232
233        if result is False:
234            general.sysexit(
235                """\
236The program you compiled wants to be run with: %s.
237
238Nuitka is currently running with Python version '%s', which seems to not
239match that. Nuitka cannot guess the Python version of your source code. You
240therefore might want to specify: '%s -m nuitka'.
241
242That will make use the correct Python version for Nuitka.
243"""
244                % (shebang, python_version_str, binary)
245            )
246
247
248def readSourceLine(source_ref):
249    import linecache
250
251    return linecache.getline(
252        filename=source_ref.getFilename(), lineno=source_ref.getLineNumber()
253    )
254