1# cython: infer_types=True, language_level=3, py2_import=True, auto_pickle=False 2# 3# Cython Scanner 4# 5 6from __future__ import absolute_import 7 8import cython 9cython.declare(make_lexicon=object, lexicon=object, 10 print_function=object, error=object, warning=object, 11 os=object, platform=object) 12 13import os 14import platform 15 16from .. import Utils 17from ..Plex.Scanners import Scanner 18from ..Plex.Errors import UnrecognizedInput 19from .Errors import error, warning 20from .Lexicon import any_string_prefix, make_lexicon, IDENT 21from .Future import print_function 22 23debug_scanner = 0 24trace_scanner = 0 25scanner_debug_flags = 0 26scanner_dump_file = None 27 28lexicon = None 29 30 31def get_lexicon(): 32 global lexicon 33 if not lexicon: 34 lexicon = make_lexicon() 35 return lexicon 36 37 38#------------------------------------------------------------------ 39 40py_reserved_words = [ 41 "global", "nonlocal", "def", "class", "print", "del", "pass", "break", 42 "continue", "return", "raise", "import", "exec", "try", 43 "except", "finally", "while", "if", "elif", "else", "for", 44 "in", "assert", "and", "or", "not", "is", "lambda", 45 "from", "yield", "with", 46] 47 48pyx_reserved_words = py_reserved_words + [ 49 "include", "ctypedef", "cdef", "cpdef", 50 "cimport", "DEF", "IF", "ELIF", "ELSE" 51] 52 53 54class Method(object): 55 56 def __init__(self, name, **kwargs): 57 self.name = name 58 self.kwargs = kwargs or None 59 self.__name__ = name # for Plex tracing 60 61 def __call__(self, stream, text): 62 method = getattr(stream, self.name) 63 # self.kwargs is almost always unused => avoid call overhead 64 return method(text, **self.kwargs) if self.kwargs is not None else method(text) 65 66 def __copy__(self): 67 return self # immutable, no need to copy 68 69 def __deepcopy__(self, memo): 70 return self # immutable, no need to copy 71 72 73#------------------------------------------------------------------ 74 75class CompileTimeScope(object): 76 77 def __init__(self, outer=None): 78 self.entries = {} 79 self.outer = outer 80 81 def declare(self, name, value): 82 self.entries[name] = value 83 84 def update(self, other): 85 self.entries.update(other) 86 87 def lookup_here(self, name): 88 return self.entries[name] 89 90 def __contains__(self, name): 91 return name in self.entries 92 93 def lookup(self, name): 94 try: 95 return self.lookup_here(name) 96 except KeyError: 97 outer = self.outer 98 if outer: 99 return outer.lookup(name) 100 else: 101 raise 102 103 104def initial_compile_time_env(): 105 benv = CompileTimeScope() 106 names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE') 107 for name, value in zip(names, platform.uname()): 108 benv.declare(name, value) 109 try: 110 import __builtin__ as builtins 111 except ImportError: 112 import builtins 113 114 names = ( 115 'False', 'True', 116 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes', 117 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter', 118 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len', 119 'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range', 120 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str', 121 'sum', 'tuple', 'zip', 122 ### defined below in a platform independent way 123 # 'long', 'unicode', 'reduce', 'xrange' 124 ) 125 126 for name in names: 127 try: 128 benv.declare(name, getattr(builtins, name)) 129 except AttributeError: 130 # ignore, likely Py3 131 pass 132 133 # Py2/3 adaptations 134 from functools import reduce 135 benv.declare('reduce', reduce) 136 benv.declare('unicode', getattr(builtins, 'unicode', getattr(builtins, 'str'))) 137 benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int'))) 138 benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range'))) 139 140 denv = CompileTimeScope(benv) 141 return denv 142 143 144#------------------------------------------------------------------ 145 146class SourceDescriptor(object): 147 """ 148 A SourceDescriptor should be considered immutable. 149 """ 150 filename = None 151 152 _file_type = 'pyx' 153 154 _escaped_description = None 155 _cmp_name = '' 156 def __str__(self): 157 assert False # To catch all places where a descriptor is used directly as a filename 158 159 def set_file_type_from_name(self, filename): 160 name, ext = os.path.splitext(filename) 161 self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx' 162 163 def is_cython_file(self): 164 return self._file_type in ('pyx', 'pxd') 165 166 def is_python_file(self): 167 return self._file_type == 'py' 168 169 def get_escaped_description(self): 170 if self._escaped_description is None: 171 esc_desc = \ 172 self.get_description().encode('ASCII', 'replace').decode("ASCII") 173 # Use forward slashes on Windows since these paths 174 # will be used in the #line directives in the C/C++ files. 175 self._escaped_description = esc_desc.replace('\\', '/') 176 return self._escaped_description 177 178 def __gt__(self, other): 179 # this is only used to provide some sort of order 180 try: 181 return self._cmp_name > other._cmp_name 182 except AttributeError: 183 return False 184 185 def __lt__(self, other): 186 # this is only used to provide some sort of order 187 try: 188 return self._cmp_name < other._cmp_name 189 except AttributeError: 190 return False 191 192 def __le__(self, other): 193 # this is only used to provide some sort of order 194 try: 195 return self._cmp_name <= other._cmp_name 196 except AttributeError: 197 return False 198 199 def __copy__(self): 200 return self # immutable, no need to copy 201 202 def __deepcopy__(self, memo): 203 return self # immutable, no need to copy 204 205 206class FileSourceDescriptor(SourceDescriptor): 207 """ 208 Represents a code source. A code source is a more generic abstraction 209 for a "filename" (as sometimes the code doesn't come from a file). 210 Instances of code sources are passed to Scanner.__init__ as the 211 optional name argument and will be passed back when asking for 212 the position()-tuple. 213 """ 214 def __init__(self, filename, path_description=None): 215 filename = Utils.decode_filename(filename) 216 self.path_description = path_description or filename 217 self.filename = filename 218 # Prefer relative paths to current directory (which is most likely the project root) over absolute paths. 219 workdir = os.path.abspath('.') + os.sep 220 self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename 221 self.set_file_type_from_name(filename) 222 self._cmp_name = filename 223 self._lines = {} 224 225 def get_lines(self, encoding=None, error_handling=None): 226 # we cache the lines only the second time this is called, in 227 # order to save memory when they are only used once 228 key = (encoding, error_handling) 229 try: 230 lines = self._lines[key] 231 if lines is not None: 232 return lines 233 except KeyError: 234 pass 235 236 with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f: 237 lines = list(f) 238 239 if key in self._lines: 240 self._lines[key] = lines 241 else: 242 # do not cache the first access, but remember that we 243 # already read it once 244 self._lines[key] = None 245 return lines 246 247 def get_description(self): 248 try: 249 return os.path.relpath(self.path_description) 250 except ValueError: 251 # path not under current directory => use complete file path 252 return self.path_description 253 254 def get_error_description(self): 255 path = self.filename 256 cwd = Utils.decode_filename(os.getcwd() + os.path.sep) 257 if path.startswith(cwd): 258 return path[len(cwd):] 259 return path 260 261 def get_filenametable_entry(self): 262 return self.file_path 263 264 def __eq__(self, other): 265 return isinstance(other, FileSourceDescriptor) and self.filename == other.filename 266 267 def __hash__(self): 268 return hash(self.filename) 269 270 def __repr__(self): 271 return "<FileSourceDescriptor:%s>" % self.filename 272 273 274class StringSourceDescriptor(SourceDescriptor): 275 """ 276 Instances of this class can be used instead of a filenames if the 277 code originates from a string object. 278 """ 279 def __init__(self, name, code): 280 self.name = name 281 #self.set_file_type_from_name(name) 282 self.codelines = [x + "\n" for x in code.split("\n")] 283 self._cmp_name = name 284 285 def get_lines(self, encoding=None, error_handling=None): 286 if not encoding: 287 return self.codelines 288 else: 289 return [line.encode(encoding, error_handling).decode(encoding) 290 for line in self.codelines] 291 292 def get_description(self): 293 return self.name 294 295 get_error_description = get_description 296 297 def get_filenametable_entry(self): 298 return "stringsource" 299 300 def __hash__(self): 301 return id(self) 302 # Do not hash on the name, an identical string source should be the 303 # same object (name is often defaulted in other places) 304 # return hash(self.name) 305 306 def __eq__(self, other): 307 return isinstance(other, StringSourceDescriptor) and self.name == other.name 308 309 def __repr__(self): 310 return "<StringSourceDescriptor:%s>" % self.name 311 312 313#------------------------------------------------------------------ 314 315class PyrexScanner(Scanner): 316 # context Context Compilation context 317 # included_files [string] Files included with 'include' statement 318 # compile_time_env dict Environment for conditional compilation 319 # compile_time_eval boolean In a true conditional compilation context 320 # compile_time_expr boolean In a compile-time expression context 321 322 def __init__(self, file, filename, parent_scanner=None, 323 scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None): 324 Scanner.__init__(self, get_lexicon(), file, filename, initial_pos) 325 326 if filename.is_python_file(): 327 self.in_python_file = True 328 self.keywords = set(py_reserved_words) 329 else: 330 self.in_python_file = False 331 self.keywords = set(pyx_reserved_words) 332 333 self.async_enabled = 0 334 335 if parent_scanner: 336 self.context = parent_scanner.context 337 self.included_files = parent_scanner.included_files 338 self.compile_time_env = parent_scanner.compile_time_env 339 self.compile_time_eval = parent_scanner.compile_time_eval 340 self.compile_time_expr = parent_scanner.compile_time_expr 341 342 if parent_scanner.async_enabled: 343 self.enter_async() 344 else: 345 self.context = context 346 self.included_files = scope.included_files 347 self.compile_time_env = initial_compile_time_env() 348 self.compile_time_eval = 1 349 self.compile_time_expr = 0 350 if getattr(context.options, 'compile_time_env', None): 351 self.compile_time_env.update(context.options.compile_time_env) 352 self.parse_comments = parse_comments 353 self.source_encoding = source_encoding 354 self.trace = trace_scanner 355 self.indentation_stack = [0] 356 self.indentation_char = None 357 self.bracket_nesting_level = 0 358 359 self.begin('INDENT') 360 self.sy = '' 361 self.next() 362 363 def commentline(self, text): 364 if self.parse_comments: 365 self.produce('commentline', text) 366 367 def strip_underscores(self, text, symbol): 368 self.produce(symbol, text.replace('_', '')) 369 370 def current_level(self): 371 return self.indentation_stack[-1] 372 373 def open_bracket_action(self, text): 374 self.bracket_nesting_level += 1 375 return text 376 377 def close_bracket_action(self, text): 378 self.bracket_nesting_level -= 1 379 return text 380 381 def newline_action(self, text): 382 if self.bracket_nesting_level == 0: 383 self.begin('INDENT') 384 self.produce('NEWLINE', '') 385 386 string_states = { 387 "'": 'SQ_STRING', 388 '"': 'DQ_STRING', 389 "'''": 'TSQ_STRING', 390 '"""': 'TDQ_STRING' 391 } 392 393 def begin_string_action(self, text): 394 while text[:1] in any_string_prefix: 395 text = text[1:] 396 self.begin(self.string_states[text]) 397 self.produce('BEGIN_STRING') 398 399 def end_string_action(self, text): 400 self.begin('') 401 self.produce('END_STRING') 402 403 def unclosed_string_action(self, text): 404 self.end_string_action(text) 405 self.error("Unclosed string literal") 406 407 def indentation_action(self, text): 408 self.begin('') 409 # Indentation within brackets should be ignored. 410 #if self.bracket_nesting_level > 0: 411 # return 412 # Check that tabs and spaces are being used consistently. 413 if text: 414 c = text[0] 415 #print "Scanner.indentation_action: indent with", repr(c) ### 416 if self.indentation_char is None: 417 self.indentation_char = c 418 #print "Scanner.indentation_action: setting indent_char to", repr(c) 419 else: 420 if self.indentation_char != c: 421 self.error("Mixed use of tabs and spaces") 422 if text.replace(c, "") != "": 423 self.error("Mixed use of tabs and spaces") 424 # Figure out how many indents/dedents to do 425 current_level = self.current_level() 426 new_level = len(text) 427 #print "Changing indent level from", current_level, "to", new_level ### 428 if new_level == current_level: 429 return 430 elif new_level > current_level: 431 #print "...pushing level", new_level ### 432 self.indentation_stack.append(new_level) 433 self.produce('INDENT', '') 434 else: 435 while new_level < self.current_level(): 436 #print "...popping level", self.indentation_stack[-1] ### 437 self.indentation_stack.pop() 438 self.produce('DEDENT', '') 439 #print "...current level now", self.current_level() ### 440 if new_level != self.current_level(): 441 self.error("Inconsistent indentation") 442 443 def eof_action(self, text): 444 while len(self.indentation_stack) > 1: 445 self.produce('DEDENT', '') 446 self.indentation_stack.pop() 447 self.produce('EOF', '') 448 449 def next(self): 450 try: 451 sy, systring = self.read() 452 except UnrecognizedInput: 453 self.error("Unrecognized character") 454 return # just a marker, error() always raises 455 if sy == IDENT: 456 if systring in self.keywords: 457 if systring == u'print' and print_function in self.context.future_directives: 458 self.keywords.discard('print') 459 elif systring == u'exec' and self.context.language_level >= 3: 460 self.keywords.discard('exec') 461 else: 462 sy = systring 463 systring = self.context.intern_ustring(systring) 464 self.sy = sy 465 self.systring = systring 466 if False: # debug_scanner: 467 _, line, col = self.position() 468 if not self.systring or self.sy == self.systring: 469 t = self.sy 470 else: 471 t = "%s %s" % (self.sy, self.systring) 472 print("--- %3d %2d %s" % (line, col, t)) 473 474 def peek(self): 475 saved = self.sy, self.systring 476 self.next() 477 next = self.sy, self.systring 478 self.unread(*next) 479 self.sy, self.systring = saved 480 return next 481 482 def put_back(self, sy, systring): 483 self.unread(self.sy, self.systring) 484 self.sy = sy 485 self.systring = systring 486 487 def unread(self, token, value): 488 # This method should be added to Plex 489 self.queue.insert(0, (token, value)) 490 491 def error(self, message, pos=None, fatal=True): 492 if pos is None: 493 pos = self.position() 494 if self.sy == 'INDENT': 495 error(pos, "Possible inconsistent indentation") 496 err = error(pos, message) 497 if fatal: raise err 498 499 def expect(self, what, message=None): 500 if self.sy == what: 501 self.next() 502 else: 503 self.expected(what, message) 504 505 def expect_keyword(self, what, message=None): 506 if self.sy == IDENT and self.systring == what: 507 self.next() 508 else: 509 self.expected(what, message) 510 511 def expected(self, what, message=None): 512 if message: 513 self.error(message) 514 else: 515 if self.sy == IDENT: 516 found = self.systring 517 else: 518 found = self.sy 519 self.error("Expected '%s', found '%s'" % (what, found)) 520 521 def expect_indent(self): 522 self.expect('INDENT', "Expected an increase in indentation level") 523 524 def expect_dedent(self): 525 self.expect('DEDENT', "Expected a decrease in indentation level") 526 527 def expect_newline(self, message="Expected a newline", ignore_semicolon=False): 528 # Expect either a newline or end of file 529 useless_trailing_semicolon = None 530 if ignore_semicolon and self.sy == ';': 531 useless_trailing_semicolon = self.position() 532 self.next() 533 if self.sy != 'EOF': 534 self.expect('NEWLINE', message) 535 if useless_trailing_semicolon is not None: 536 warning(useless_trailing_semicolon, "useless trailing semicolon") 537 538 def enter_async(self): 539 self.async_enabled += 1 540 if self.async_enabled == 1: 541 self.keywords.add('async') 542 self.keywords.add('await') 543 544 def exit_async(self): 545 assert self.async_enabled > 0 546 self.async_enabled -= 1 547 if not self.async_enabled: 548 self.keywords.discard('await') 549 self.keywords.discard('async') 550 if self.sy in ('async', 'await'): 551 self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy) 552