1# Copyright 2021, Kay Hayen, mailto:kay.hayen@gmail.com 2# 3# Part of "Nuitka", an optimizing Python compiler that is compatible and 4# integrates with CPython, but also works on its own. 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17# 18""" Read source code from files. 19 20This is tremendously more complex than one might think, due to encoding issues 21and version differences of Python versions. 22""" 23 24import os 25import re 26import sys 27 28from nuitka import Options, SourceCodeReferences 29from nuitka.__past__ import unicode # pylint: disable=I0021,redefined-builtin 30from nuitka.plugins.Plugins import Plugins 31from nuitka.PythonVersions import python_version, python_version_str 32from nuitka.Tracing import general 33from nuitka.utils.FileOperations import putTextFileContents 34from nuitka.utils.Shebang import getShebangFromSource, parseShebang 35from nuitka.utils.Utils import getOS 36 37from .SyntaxErrors import raiseSyntaxError 38 39_fstrings_installed = False 40 41 42def _installFutureFStrings(): 43 """Install fake UTF8 handle just as future-fstrings does. 44 45 This unbreaks at least 46 """ 47 48 # Singleton, pylint: disable=global-statement 49 global _fstrings_installed 50 51 if _fstrings_installed: 52 return 53 54 # TODO: Not supporting anything before that. 55 if python_version >= 0x360: 56 import codecs 57 58 # Play trick for of "future_strings" PyPI package support. It's not needed, 59 # but some people use it even on newer Python. 60 try: 61 codecs.lookup("future-fstrings") 62 except LookupError: 63 import encodings 64 65 utf8 = encodings.search_function("utf8") 66 codec_map = {"future-fstrings": utf8, "future_fstrings": utf8} 67 codecs.register(codec_map.get) 68 else: 69 try: 70 import future_fstrings 71 except ImportError: 72 pass 73 else: 74 future_fstrings.register() 75 76 _fstrings_installed = True 77 78 79def _readSourceCodeFromFilename3(source_filename): 80 # Only using this for Python3, for Python2 it's too buggy. 81 import tokenize 82 83 _installFutureFStrings() 84 85 with tokenize.open(source_filename) as source_file: 86 return source_file.read() 87 88 89def _detectEncoding2(source_file): 90 # Detect the encoding. 91 encoding = "ascii" 92 93 line1 = source_file.readline() 94 95 if line1.startswith(b"\xef\xbb\xbf"): 96 # BOM marker makes it clear. 97 encoding = "utf-8" 98 else: 99 line1_match = re.search(b"coding[:=]\\s*([-\\w.]+)", line1) 100 101 if line1_match: 102 encoding = line1_match.group(1) 103 else: 104 line2 = source_file.readline() 105 106 line2_match = re.search(b"coding[:=]\\s*([-\\w.]+)", line2) 107 108 if line2_match: 109 encoding = line2_match.group(1) 110 111 source_file.seek(0) 112 113 return encoding 114 115 116def _readSourceCodeFromFilename2(source_filename): 117 _installFutureFStrings() 118 119 # Detect the encoding. 120 with open(source_filename, "rU") as source_file: 121 encoding = _detectEncoding2(source_file) 122 123 source_code = source_file.read() 124 125 # Try and detect SyntaxError from missing or wrong encodings. 126 if type(source_code) is not unicode and encoding == "ascii": 127 try: 128 _source_code = source_code.decode(encoding) 129 except UnicodeDecodeError as e: 130 lines = source_code.split("\n") 131 so_far = 0 132 133 for count, line in enumerate(lines): 134 so_far += len(line) + 1 135 136 if so_far > e.args[2]: 137 break 138 else: 139 # Cannot happen, decode error implies non-empty. 140 count = -1 141 142 wrong_byte = re.search( 143 "byte 0x([a-f0-9]{2}) in position", str(e) 144 ).group(1) 145 146 raiseSyntaxError( 147 """\ 148Non-ASCII character '\\x%s' in file %s on line %d, but no encoding declared; \ 149see http://python.org/dev/peps/pep-0263/ for details""" 150 % (wrong_byte, source_filename, count + 1), 151 SourceCodeReferences.fromFilename(source_filename).atLineNumber( 152 count + 1 153 ), 154 display_line=False, 155 ) 156 157 return source_code 158 159 160def readSourceCodeFromFilename(module_name, source_filename): 161 if python_version < 0x300: 162 source_code = _readSourceCodeFromFilename2(source_filename) 163 else: 164 source_code = _readSourceCodeFromFilename3(source_filename) 165 166 # Allow plug-ins to mess with source code. Test code calls this 167 # without a module and doesn't want changes from plugins. 168 if module_name is not None: 169 source_code_modified = Plugins.onModuleSourceCode(module_name, source_code) 170 else: 171 source_code_modified = source_code 172 173 if Options.shallPersistModifications() and source_code_modified != source_code: 174 orig_source_filename = source_filename + ".orig" 175 176 if not os.path.exists(orig_source_filename): 177 putTextFileContents(filename=orig_source_filename, contents=source_code) 178 179 putTextFileContents(filename=source_filename, contents=source_code_modified) 180 181 return source_code_modified 182 183 184def checkPythonVersionFromCode(source_code): 185 # There is a lot of cases to consider, pylint: disable=too-many-branches 186 187 shebang = getShebangFromSource(source_code) 188 189 if shebang is not None: 190 binary, _args = parseShebang(shebang) 191 192 if getOS() != "Windows": 193 try: 194 if os.path.samefile(sys.executable, binary): 195 return True 196 except OSError: # Might not exist 197 pass 198 199 basename = os.path.basename(binary) 200 201 # Not sure if we should do that. 202 if basename == "python": 203 result = python_version < 0x300 204 elif basename == "python3": 205 result = python_version >= 0x300 206 elif basename == "python2": 207 result = python_version < 0x300 208 elif basename == "python2.7": 209 result = python_version < 0x300 210 elif basename == "python2.6": 211 result = python_version < 0x270 212 elif basename == "python3.2": 213 result = 0x330 > python_version >= 0x300 214 elif basename == "python3.3": 215 result = 0x340 > python_version >= 0x330 216 elif basename == "python3.4": 217 result = 0x350 > python_version >= 0x340 218 elif basename == "python3.5": 219 result = 0x360 > python_version >= 0x350 220 elif basename == "python3.6": 221 result = 0x370 > python_version >= 0x360 222 elif basename == "python3.7": 223 result = 0x380 > python_version >= 0x370 224 elif basename == "python3.8": 225 result = 0x390 > python_version >= 0x380 226 elif basename == "python3.9": 227 result = 0x3A0 > python_version >= 0x390 228 elif basename == "python3.10": 229 result = 0x3B0 > python_version >= 0x3A0 230 else: 231 result = None 232 233 if result is False: 234 general.sysexit( 235 """\ 236The program you compiled wants to be run with: %s. 237 238Nuitka is currently running with Python version '%s', which seems to not 239match that. Nuitka cannot guess the Python version of your source code. You 240therefore might want to specify: '%s -m nuitka'. 241 242That will make use the correct Python version for Nuitka. 243""" 244 % (shebang, python_version_str, binary) 245 ) 246 247 248def readSourceLine(source_ref): 249 import linecache 250 251 return linecache.getline( 252 filename=source_ref.getFilename(), lineno=source_ref.getLineNumber() 253 ) 254