1# cython: infer_types=True, language_level=3, py2_import=True, auto_pickle=False
2#
3#   Cython Scanner
4#
5
6from __future__ import absolute_import
7
8import cython
9cython.declare(make_lexicon=object, lexicon=object,
10               print_function=object, error=object, warning=object,
11               os=object, platform=object)
12
13import os
14import platform
15
16from .. import Utils
17from ..Plex.Scanners import Scanner
18from ..Plex.Errors import UnrecognizedInput
19from .Errors import error, warning
20from .Lexicon import any_string_prefix, make_lexicon, IDENT
21from .Future import print_function
22
23debug_scanner = 0
24trace_scanner = 0
25scanner_debug_flags = 0
26scanner_dump_file = None
27
28lexicon = None
29
30
31def get_lexicon():
32    global lexicon
33    if not lexicon:
34        lexicon = make_lexicon()
35    return lexicon
36
37
38#------------------------------------------------------------------
39
40py_reserved_words = [
41    "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
42    "continue", "return", "raise", "import", "exec", "try",
43    "except", "finally", "while", "if", "elif", "else", "for",
44    "in", "assert", "and", "or", "not", "is", "lambda",
45    "from", "yield", "with",
46]
47
48pyx_reserved_words = py_reserved_words + [
49    "include", "ctypedef", "cdef", "cpdef",
50    "cimport", "DEF", "IF", "ELIF", "ELSE"
51]
52
53
54class Method(object):
55
56    def __init__(self, name, **kwargs):
57        self.name = name
58        self.kwargs = kwargs or None
59        self.__name__ = name  # for Plex tracing
60
61    def __call__(self, stream, text):
62        method = getattr(stream, self.name)
63        # self.kwargs is almost always unused => avoid call overhead
64        return method(text, **self.kwargs) if self.kwargs is not None else method(text)
65
66    def __copy__(self):
67        return self  # immutable, no need to copy
68
69    def __deepcopy__(self, memo):
70        return self  # immutable, no need to copy
71
72
73#------------------------------------------------------------------
74
75class CompileTimeScope(object):
76
77    def __init__(self, outer=None):
78        self.entries = {}
79        self.outer = outer
80
81    def declare(self, name, value):
82        self.entries[name] = value
83
84    def update(self, other):
85        self.entries.update(other)
86
87    def lookup_here(self, name):
88        return self.entries[name]
89
90    def __contains__(self, name):
91        return name in self.entries
92
93    def lookup(self, name):
94        try:
95            return self.lookup_here(name)
96        except KeyError:
97            outer = self.outer
98            if outer:
99                return outer.lookup(name)
100            else:
101                raise
102
103
104def initial_compile_time_env():
105    benv = CompileTimeScope()
106    names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE')
107    for name, value in zip(names, platform.uname()):
108        benv.declare(name, value)
109    try:
110        import __builtin__ as builtins
111    except ImportError:
112        import builtins
113
114    names = (
115        'False', 'True',
116        'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
117        'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
118        'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
119        'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
120        'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
121        'sum', 'tuple', 'zip',
122        ### defined below in a platform independent way
123        # 'long', 'unicode', 'reduce', 'xrange'
124    )
125
126    for name in names:
127        try:
128            benv.declare(name, getattr(builtins, name))
129        except AttributeError:
130            # ignore, likely Py3
131            pass
132
133    # Py2/3 adaptations
134    from functools import reduce
135    benv.declare('reduce', reduce)
136    benv.declare('unicode', getattr(builtins, 'unicode', getattr(builtins, 'str')))
137    benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int')))
138    benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range')))
139
140    denv = CompileTimeScope(benv)
141    return denv
142
143
144#------------------------------------------------------------------
145
146class SourceDescriptor(object):
147    """
148    A SourceDescriptor should be considered immutable.
149    """
150    filename = None
151
152    _file_type = 'pyx'
153
154    _escaped_description = None
155    _cmp_name = ''
156    def __str__(self):
157        assert False # To catch all places where a descriptor is used directly as a filename
158
159    def set_file_type_from_name(self, filename):
160        name, ext = os.path.splitext(filename)
161        self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
162
163    def is_cython_file(self):
164        return self._file_type in ('pyx', 'pxd')
165
166    def is_python_file(self):
167        return self._file_type == 'py'
168
169    def get_escaped_description(self):
170        if self._escaped_description is None:
171            esc_desc = \
172                self.get_description().encode('ASCII', 'replace').decode("ASCII")
173            # Use forward slashes on Windows since these paths
174            # will be used in the #line directives in the C/C++ files.
175            self._escaped_description = esc_desc.replace('\\', '/')
176        return self._escaped_description
177
178    def __gt__(self, other):
179        # this is only used to provide some sort of order
180        try:
181            return self._cmp_name > other._cmp_name
182        except AttributeError:
183            return False
184
185    def __lt__(self, other):
186        # this is only used to provide some sort of order
187        try:
188            return self._cmp_name < other._cmp_name
189        except AttributeError:
190            return False
191
192    def __le__(self, other):
193        # this is only used to provide some sort of order
194        try:
195            return self._cmp_name <= other._cmp_name
196        except AttributeError:
197            return False
198
199    def __copy__(self):
200        return self  # immutable, no need to copy
201
202    def __deepcopy__(self, memo):
203        return self  # immutable, no need to copy
204
205
206class FileSourceDescriptor(SourceDescriptor):
207    """
208    Represents a code source. A code source is a more generic abstraction
209    for a "filename" (as sometimes the code doesn't come from a file).
210    Instances of code sources are passed to Scanner.__init__ as the
211    optional name argument and will be passed back when asking for
212    the position()-tuple.
213    """
214    def __init__(self, filename, path_description=None):
215        filename = Utils.decode_filename(filename)
216        self.path_description = path_description or filename
217        self.filename = filename
218        # Prefer relative paths to current directory (which is most likely the project root) over absolute paths.
219        workdir = os.path.abspath('.') + os.sep
220        self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename
221        self.set_file_type_from_name(filename)
222        self._cmp_name = filename
223        self._lines = {}
224
225    def get_lines(self, encoding=None, error_handling=None):
226        # we cache the lines only the second time this is called, in
227        # order to save memory when they are only used once
228        key = (encoding, error_handling)
229        try:
230            lines = self._lines[key]
231            if lines is not None:
232                return lines
233        except KeyError:
234            pass
235
236        with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f:
237            lines = list(f)
238
239        if key in self._lines:
240            self._lines[key] = lines
241        else:
242            # do not cache the first access, but remember that we
243            # already read it once
244            self._lines[key] = None
245        return lines
246
247    def get_description(self):
248        try:
249            return os.path.relpath(self.path_description)
250        except ValueError:
251            # path not under current directory => use complete file path
252            return self.path_description
253
254    def get_error_description(self):
255        path = self.filename
256        cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
257        if path.startswith(cwd):
258            return path[len(cwd):]
259        return path
260
261    def get_filenametable_entry(self):
262        return self.file_path
263
264    def __eq__(self, other):
265        return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
266
267    def __hash__(self):
268        return hash(self.filename)
269
270    def __repr__(self):
271        return "<FileSourceDescriptor:%s>" % self.filename
272
273
274class StringSourceDescriptor(SourceDescriptor):
275    """
276    Instances of this class can be used instead of a filenames if the
277    code originates from a string object.
278    """
279    def __init__(self, name, code):
280        self.name = name
281        #self.set_file_type_from_name(name)
282        self.codelines = [x + "\n" for x in code.split("\n")]
283        self._cmp_name = name
284
285    def get_lines(self, encoding=None, error_handling=None):
286        if not encoding:
287            return self.codelines
288        else:
289            return [line.encode(encoding, error_handling).decode(encoding)
290                    for line in self.codelines]
291
292    def get_description(self):
293        return self.name
294
295    get_error_description = get_description
296
297    def get_filenametable_entry(self):
298        return "stringsource"
299
300    def __hash__(self):
301        return id(self)
302        # Do not hash on the name, an identical string source should be the
303        # same object (name is often defaulted in other places)
304        # return hash(self.name)
305
306    def __eq__(self, other):
307        return isinstance(other, StringSourceDescriptor) and self.name == other.name
308
309    def __repr__(self):
310        return "<StringSourceDescriptor:%s>" % self.name
311
312
313#------------------------------------------------------------------
314
315class PyrexScanner(Scanner):
316    #  context            Context  Compilation context
317    #  included_files     [string] Files included with 'include' statement
318    #  compile_time_env   dict     Environment for conditional compilation
319    #  compile_time_eval  boolean  In a true conditional compilation context
320    #  compile_time_expr  boolean  In a compile-time expression context
321
322    def __init__(self, file, filename, parent_scanner=None,
323                 scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
324        Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
325
326        if filename.is_python_file():
327            self.in_python_file = True
328            self.keywords = set(py_reserved_words)
329        else:
330            self.in_python_file = False
331            self.keywords = set(pyx_reserved_words)
332
333        self.async_enabled = 0
334
335        if parent_scanner:
336            self.context = parent_scanner.context
337            self.included_files = parent_scanner.included_files
338            self.compile_time_env = parent_scanner.compile_time_env
339            self.compile_time_eval = parent_scanner.compile_time_eval
340            self.compile_time_expr = parent_scanner.compile_time_expr
341
342            if parent_scanner.async_enabled:
343                self.enter_async()
344        else:
345            self.context = context
346            self.included_files = scope.included_files
347            self.compile_time_env = initial_compile_time_env()
348            self.compile_time_eval = 1
349            self.compile_time_expr = 0
350            if getattr(context.options, 'compile_time_env', None):
351                self.compile_time_env.update(context.options.compile_time_env)
352        self.parse_comments = parse_comments
353        self.source_encoding = source_encoding
354        self.trace = trace_scanner
355        self.indentation_stack = [0]
356        self.indentation_char = None
357        self.bracket_nesting_level = 0
358
359        self.begin('INDENT')
360        self.sy = ''
361        self.next()
362
363    def commentline(self, text):
364        if self.parse_comments:
365            self.produce('commentline', text)
366
367    def strip_underscores(self, text, symbol):
368        self.produce(symbol, text.replace('_', ''))
369
370    def current_level(self):
371        return self.indentation_stack[-1]
372
373    def open_bracket_action(self, text):
374        self.bracket_nesting_level += 1
375        return text
376
377    def close_bracket_action(self, text):
378        self.bracket_nesting_level -= 1
379        return text
380
381    def newline_action(self, text):
382        if self.bracket_nesting_level == 0:
383            self.begin('INDENT')
384            self.produce('NEWLINE', '')
385
386    string_states = {
387        "'":   'SQ_STRING',
388        '"':   'DQ_STRING',
389        "'''": 'TSQ_STRING',
390        '"""': 'TDQ_STRING'
391    }
392
393    def begin_string_action(self, text):
394        while text[:1] in any_string_prefix:
395            text = text[1:]
396        self.begin(self.string_states[text])
397        self.produce('BEGIN_STRING')
398
399    def end_string_action(self, text):
400        self.begin('')
401        self.produce('END_STRING')
402
403    def unclosed_string_action(self, text):
404        self.end_string_action(text)
405        self.error("Unclosed string literal")
406
407    def indentation_action(self, text):
408        self.begin('')
409        # Indentation within brackets should be ignored.
410        #if self.bracket_nesting_level > 0:
411        #    return
412        # Check that tabs and spaces are being used consistently.
413        if text:
414            c = text[0]
415            #print "Scanner.indentation_action: indent with", repr(c) ###
416            if self.indentation_char is None:
417                self.indentation_char = c
418                #print "Scanner.indentation_action: setting indent_char to", repr(c)
419            else:
420                if self.indentation_char != c:
421                    self.error("Mixed use of tabs and spaces")
422            if text.replace(c, "") != "":
423                self.error("Mixed use of tabs and spaces")
424        # Figure out how many indents/dedents to do
425        current_level = self.current_level()
426        new_level = len(text)
427        #print "Changing indent level from", current_level, "to", new_level ###
428        if new_level == current_level:
429            return
430        elif new_level > current_level:
431            #print "...pushing level", new_level ###
432            self.indentation_stack.append(new_level)
433            self.produce('INDENT', '')
434        else:
435            while new_level < self.current_level():
436                #print "...popping level", self.indentation_stack[-1] ###
437                self.indentation_stack.pop()
438                self.produce('DEDENT', '')
439            #print "...current level now", self.current_level() ###
440            if new_level != self.current_level():
441                self.error("Inconsistent indentation")
442
443    def eof_action(self, text):
444        while len(self.indentation_stack) > 1:
445            self.produce('DEDENT', '')
446            self.indentation_stack.pop()
447        self.produce('EOF', '')
448
449    def next(self):
450        try:
451            sy, systring = self.read()
452        except UnrecognizedInput:
453            self.error("Unrecognized character")
454            return  # just a marker, error() always raises
455        if sy == IDENT:
456            if systring in self.keywords:
457                if systring == u'print' and print_function in self.context.future_directives:
458                    self.keywords.discard('print')
459                elif systring == u'exec' and self.context.language_level >= 3:
460                    self.keywords.discard('exec')
461                else:
462                    sy = systring
463            systring = self.context.intern_ustring(systring)
464        self.sy = sy
465        self.systring = systring
466        if False: # debug_scanner:
467            _, line, col = self.position()
468            if not self.systring or self.sy == self.systring:
469                t = self.sy
470            else:
471                t = "%s %s" % (self.sy, self.systring)
472            print("--- %3d %2d %s" % (line, col, t))
473
474    def peek(self):
475        saved = self.sy, self.systring
476        self.next()
477        next = self.sy, self.systring
478        self.unread(*next)
479        self.sy, self.systring = saved
480        return next
481
482    def put_back(self, sy, systring):
483        self.unread(self.sy, self.systring)
484        self.sy = sy
485        self.systring = systring
486
487    def unread(self, token, value):
488        # This method should be added to Plex
489        self.queue.insert(0, (token, value))
490
491    def error(self, message, pos=None, fatal=True):
492        if pos is None:
493            pos = self.position()
494        if self.sy == 'INDENT':
495            error(pos, "Possible inconsistent indentation")
496        err = error(pos, message)
497        if fatal: raise err
498
499    def expect(self, what, message=None):
500        if self.sy == what:
501            self.next()
502        else:
503            self.expected(what, message)
504
505    def expect_keyword(self, what, message=None):
506        if self.sy == IDENT and self.systring == what:
507            self.next()
508        else:
509            self.expected(what, message)
510
511    def expected(self, what, message=None):
512        if message:
513            self.error(message)
514        else:
515            if self.sy == IDENT:
516                found = self.systring
517            else:
518                found = self.sy
519            self.error("Expected '%s', found '%s'" % (what, found))
520
521    def expect_indent(self):
522        self.expect('INDENT', "Expected an increase in indentation level")
523
524    def expect_dedent(self):
525        self.expect('DEDENT', "Expected a decrease in indentation level")
526
527    def expect_newline(self, message="Expected a newline", ignore_semicolon=False):
528        # Expect either a newline or end of file
529        useless_trailing_semicolon = None
530        if ignore_semicolon and self.sy == ';':
531            useless_trailing_semicolon = self.position()
532            self.next()
533        if self.sy != 'EOF':
534            self.expect('NEWLINE', message)
535        if useless_trailing_semicolon is not None:
536            warning(useless_trailing_semicolon, "useless trailing semicolon")
537
538    def enter_async(self):
539        self.async_enabled += 1
540        if self.async_enabled == 1:
541            self.keywords.add('async')
542            self.keywords.add('await')
543
544    def exit_async(self):
545        assert self.async_enabled > 0
546        self.async_enabled -= 1
547        if not self.async_enabled:
548            self.keywords.discard('await')
549            self.keywords.discard('async')
550            if self.sy in ('async', 'await'):
551                self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy)
552