1# cython: auto_cpdef=True, infer_types=True, language_level=3, py2_import=True
2#
3#   Parser
4#
5
6from __future__ import absolute_import
7
8# This should be done automatically
9import cython
10cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
11               bytes_literal=object, StringEncoding=object,
12               FileSourceDescriptor=object, lookup_unicodechar=object, unicode_category=object,
13               Future=object, Options=object, error=object, warning=object,
14               Builtin=object, ModuleNode=object, Utils=object, _unicode=object, _bytes=object,
15               re=object, sys=object, _parse_escape_sequences=object, _parse_escape_sequences_raw=object,
16               partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint,
17               _CDEF_MODIFIERS=tuple, COMMON_BINOP_MISTAKES=dict)
18
19from io import StringIO
20import re
21import sys
22from unicodedata import lookup as lookup_unicodechar, category as unicode_category
23from functools import partial, reduce
24
25from .Scanning import PyrexScanner, FileSourceDescriptor, StringSourceDescriptor
26from . import Nodes
27from . import ExprNodes
28from . import Builtin
29from . import StringEncoding
30from .StringEncoding import EncodedString, bytes_literal, _unicode, _bytes
31from .ModuleNode import ModuleNode
32from .Errors import error, warning
33from .. import Utils
34from . import Future
35from . import Options
36
37_IS_PY3 = sys.version_info[0] >= 3
38_IS_2BYTE_UNICODE = sys.maxunicode == 0xffff
39_CDEF_MODIFIERS = ('inline', 'nogil', 'api')
40
41
42class Ctx(object):
43    #  Parsing context
44    level = 'other'
45    visibility = 'private'
46    cdef_flag = 0
47    typedef_flag = 0
48    api = 0
49    overridable = 0
50    nogil = 0
51    namespace = None
52    templates = None
53    allow_struct_enum_decorator = False
54
55    def __init__(self, **kwds):
56        self.__dict__.update(kwds)
57
58    def __call__(self, **kwds):
59        ctx = Ctx()
60        d = ctx.__dict__
61        d.update(self.__dict__)
62        d.update(kwds)
63        return ctx
64
65
66def p_ident(s, message="Expected an identifier"):
67    if s.sy == 'IDENT':
68        name = s.context.intern_ustring(s.systring)
69        s.next()
70        return name
71    else:
72        s.error(message)
73
74def p_ident_list(s):
75    names = []
76    while s.sy == 'IDENT':
77        names.append(s.context.intern_ustring(s.systring))
78        s.next()
79        if s.sy != ',':
80            break
81        s.next()
82    return names
83
84#------------------------------------------
85#
86#   Expressions
87#
88#------------------------------------------
89
90def p_binop_operator(s):
91    pos = s.position()
92    op = s.sy
93    s.next()
94    return op, pos
95
96def p_binop_expr(s, ops, p_sub_expr):
97    n1 = p_sub_expr(s)
98    while s.sy in ops:
99        op, pos = p_binop_operator(s)
100        n2 = p_sub_expr(s)
101        n1 = ExprNodes.binop_node(pos, op, n1, n2)
102        if op == '/':
103            if Future.division in s.context.future_directives:
104                n1.truedivision = True
105            else:
106                n1.truedivision = None  # unknown
107    return n1
108
109#lambdef: 'lambda' [varargslist] ':' test
110
111def p_lambdef(s, allow_conditional=True):
112    # s.sy == 'lambda'
113    pos = s.position()
114    s.next()
115    if s.sy == ':':
116        args = []
117        star_arg = starstar_arg = None
118    else:
119        args, star_arg, starstar_arg = p_varargslist(
120            s, terminator=':', annotated=False)
121    s.expect(':')
122    if allow_conditional:
123        expr = p_test(s)
124    else:
125        expr = p_test_nocond(s)
126    return ExprNodes.LambdaNode(
127        pos, args = args,
128        star_arg = star_arg, starstar_arg = starstar_arg,
129        result_expr = expr)
130
131#lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
132
133def p_lambdef_nocond(s):
134    return p_lambdef(s, allow_conditional=False)
135
136#test: or_test ['if' or_test 'else' test] | lambdef
137
138def p_test(s):
139    if s.sy == 'lambda':
140        return p_lambdef(s)
141    pos = s.position()
142    expr = p_or_test(s)
143    if s.sy == 'if':
144        s.next()
145        test = p_or_test(s)
146        s.expect('else')
147        other = p_test(s)
148        return ExprNodes.CondExprNode(pos, test=test, true_val=expr, false_val=other)
149    else:
150        return expr
151
152#test_nocond: or_test | lambdef_nocond
153
154def p_test_nocond(s):
155    if s.sy == 'lambda':
156        return p_lambdef_nocond(s)
157    else:
158        return p_or_test(s)
159
160#or_test: and_test ('or' and_test)*
161
162COMMON_BINOP_MISTAKES = {'||': 'or', '&&': 'and'}
163
164def p_or_test(s):
165    return p_rassoc_binop_expr(s, u'or', p_and_test)
166
167def p_rassoc_binop_expr(s, op, p_subexpr):
168    n1 = p_subexpr(s)
169    if s.sy == op:
170        pos = s.position()
171        op = s.sy
172        s.next()
173        n2 = p_rassoc_binop_expr(s, op, p_subexpr)
174        n1 = ExprNodes.binop_node(pos, op, n1, n2)
175    elif s.sy in COMMON_BINOP_MISTAKES and COMMON_BINOP_MISTAKES[s.sy] == op:
176        # Only report this for the current operator since we pass through here twice for 'and' and 'or'.
177        warning(s.position(),
178                "Found the C operator '%s', did you mean the Python operator '%s'?" % (s.sy, op),
179                level=1)
180    return n1
181
182#and_test: not_test ('and' not_test)*
183
184def p_and_test(s):
185    #return p_binop_expr(s, ('and',), p_not_test)
186    return p_rassoc_binop_expr(s, u'and', p_not_test)
187
188#not_test: 'not' not_test | comparison
189
190def p_not_test(s):
191    if s.sy == 'not':
192        pos = s.position()
193        s.next()
194        return ExprNodes.NotNode(pos, operand = p_not_test(s))
195    else:
196        return p_comparison(s)
197
198#comparison: expr (comp_op expr)*
199#comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
200
201def p_comparison(s):
202    n1 = p_starred_expr(s)
203    if s.sy in comparison_ops:
204        pos = s.position()
205        op = p_cmp_op(s)
206        n2 = p_starred_expr(s)
207        n1 = ExprNodes.PrimaryCmpNode(pos,
208            operator = op, operand1 = n1, operand2 = n2)
209        if s.sy in comparison_ops:
210            n1.cascade = p_cascaded_cmp(s)
211    return n1
212
213def p_test_or_starred_expr(s):
214    if s.sy == '*':
215        return p_starred_expr(s)
216    else:
217        return p_test(s)
218
219def p_starred_expr(s):
220    pos = s.position()
221    if s.sy == '*':
222        starred = True
223        s.next()
224    else:
225        starred = False
226    expr = p_bit_expr(s)
227    if starred:
228        expr = ExprNodes.StarredUnpackingNode(pos, expr)
229    return expr
230
231def p_cascaded_cmp(s):
232    pos = s.position()
233    op = p_cmp_op(s)
234    n2 = p_starred_expr(s)
235    result = ExprNodes.CascadedCmpNode(pos,
236        operator = op, operand2 = n2)
237    if s.sy in comparison_ops:
238        result.cascade = p_cascaded_cmp(s)
239    return result
240
241def p_cmp_op(s):
242    if s.sy == 'not':
243        s.next()
244        s.expect('in')
245        op = 'not_in'
246    elif s.sy == 'is':
247        s.next()
248        if s.sy == 'not':
249            s.next()
250            op = 'is_not'
251        else:
252            op = 'is'
253    else:
254        op = s.sy
255        s.next()
256    if op == '<>':
257        op = '!='
258    return op
259
260comparison_ops = cython.declare(frozenset, frozenset((
261    '<', '>', '==', '>=', '<=', '<>', '!=',
262    'in', 'is', 'not'
263)))
264
265#expr: xor_expr ('|' xor_expr)*
266
267def p_bit_expr(s):
268    return p_binop_expr(s, ('|',), p_xor_expr)
269
270#xor_expr: and_expr ('^' and_expr)*
271
272def p_xor_expr(s):
273    return p_binop_expr(s, ('^',), p_and_expr)
274
275#and_expr: shift_expr ('&' shift_expr)*
276
277def p_and_expr(s):
278    return p_binop_expr(s, ('&',), p_shift_expr)
279
280#shift_expr: arith_expr (('<<'|'>>') arith_expr)*
281
282def p_shift_expr(s):
283    return p_binop_expr(s, ('<<', '>>'), p_arith_expr)
284
285#arith_expr: term (('+'|'-') term)*
286
287def p_arith_expr(s):
288    return p_binop_expr(s, ('+', '-'), p_term)
289
290#term: factor (('*'|'@'|'/'|'%'|'//') factor)*
291
292def p_term(s):
293    return p_binop_expr(s, ('*', '@', '/', '%', '//'), p_factor)
294
295#factor: ('+'|'-'|'~'|'&'|typecast|sizeof) factor | power
296
297def p_factor(s):
298    # little indirection for C-ification purposes
299    return _p_factor(s)
300
301def _p_factor(s):
302    sy = s.sy
303    if sy in ('+', '-', '~'):
304        op = s.sy
305        pos = s.position()
306        s.next()
307        return ExprNodes.unop_node(pos, op, p_factor(s))
308    elif not s.in_python_file:
309        if sy == '&':
310            pos = s.position()
311            s.next()
312            arg = p_factor(s)
313            return ExprNodes.AmpersandNode(pos, operand = arg)
314        elif sy == "<":
315            return p_typecast(s)
316        elif sy == 'IDENT' and s.systring == "sizeof":
317            return p_sizeof(s)
318    return p_power(s)
319
320def p_typecast(s):
321    # s.sy == "<"
322    pos = s.position()
323    s.next()
324    base_type = p_c_base_type(s)
325    is_memslice = isinstance(base_type, Nodes.MemoryViewSliceTypeNode)
326    is_other_unnamed_type = isinstance(base_type, (
327        Nodes.TemplatedTypeNode,
328        Nodes.CConstOrVolatileTypeNode,
329        Nodes.CTupleBaseTypeNode,
330    ))
331    if not (is_memslice or is_other_unnamed_type) and base_type.name is None:
332        s.error("Unknown type")
333    declarator = p_c_declarator(s, empty = 1)
334    if s.sy == '?':
335        s.next()
336        typecheck = 1
337    else:
338        typecheck = 0
339    s.expect(">")
340    operand = p_factor(s)
341    if is_memslice:
342        return ExprNodes.CythonArrayNode(pos, base_type_node=base_type, operand=operand)
343
344    return ExprNodes.TypecastNode(pos,
345        base_type = base_type,
346        declarator = declarator,
347        operand = operand,
348        typecheck = typecheck)
349
350def p_sizeof(s):
351    # s.sy == ident "sizeof"
352    pos = s.position()
353    s.next()
354    s.expect('(')
355    # Here we decide if we are looking at an expression or type
356    # If it is actually a type, but parsable as an expression,
357    # we treat it as an expression here.
358    if looking_at_expr(s):
359        operand = p_test(s)
360        node = ExprNodes.SizeofVarNode(pos, operand = operand)
361    else:
362        base_type = p_c_base_type(s)
363        declarator = p_c_declarator(s, empty = 1)
364        node = ExprNodes.SizeofTypeNode(pos,
365            base_type = base_type, declarator = declarator)
366    s.expect(')')
367    return node
368
369
370def p_yield_expression(s):
371    # s.sy == "yield"
372    pos = s.position()
373    s.next()
374    is_yield_from = False
375    if s.sy == 'from':
376        is_yield_from = True
377        s.next()
378    if s.sy != ')' and s.sy not in statement_terminators:
379        # "yield from" does not support implicit tuples, but "yield" does ("yield 1,2")
380        arg = p_test(s) if is_yield_from else p_testlist(s)
381    else:
382        if is_yield_from:
383            s.error("'yield from' requires a source argument",
384                    pos=pos, fatal=False)
385        arg = None
386    if is_yield_from:
387        return ExprNodes.YieldFromExprNode(pos, arg=arg)
388    else:
389        return ExprNodes.YieldExprNode(pos, arg=arg)
390
391
392def p_yield_statement(s):
393    # s.sy == "yield"
394    yield_expr = p_yield_expression(s)
395    return Nodes.ExprStatNode(yield_expr.pos, expr=yield_expr)
396
397
398def p_async_statement(s, ctx, decorators):
399    # s.sy >> 'async' ...
400    if s.sy == 'def':
401        # 'async def' statements aren't allowed in pxd files
402        if 'pxd' in ctx.level:
403            s.error('def statement not allowed here')
404        s.level = ctx.level
405        return p_def_statement(s, decorators, is_async_def=True)
406    elif decorators:
407        s.error("Decorators can only be followed by functions or classes")
408    elif s.sy == 'for':
409        return p_for_statement(s, is_async=True)
410    elif s.sy == 'with':
411        s.next()
412        return p_with_items(s, is_async=True)
413    else:
414        s.error("expected one of 'def', 'for', 'with' after 'async'")
415
416
417#power: atom_expr ('**' factor)*
418#atom_expr: ['await'] atom trailer*
419
420def p_power(s):
421    if s.systring == 'new' and s.peek()[0] == 'IDENT':
422        return p_new_expr(s)
423    await_pos = None
424    if s.sy == 'await':
425        await_pos = s.position()
426        s.next()
427    n1 = p_atom(s)
428    while s.sy in ('(', '[', '.'):
429        n1 = p_trailer(s, n1)
430    if await_pos:
431        n1 = ExprNodes.AwaitExprNode(await_pos, arg=n1)
432    if s.sy == '**':
433        pos = s.position()
434        s.next()
435        n2 = p_factor(s)
436        n1 = ExprNodes.binop_node(pos, '**', n1, n2)
437    return n1
438
439
440def p_new_expr(s):
441    # s.systring == 'new'.
442    pos = s.position()
443    s.next()
444    cppclass = p_c_base_type(s)
445    return p_call(s, ExprNodes.NewExprNode(pos, cppclass = cppclass))
446
447#trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
448
449def p_trailer(s, node1):
450    pos = s.position()
451    if s.sy == '(':
452        return p_call(s, node1)
453    elif s.sy == '[':
454        return p_index(s, node1)
455    else:  # s.sy == '.'
456        s.next()
457        name = p_ident(s)
458        return ExprNodes.AttributeNode(pos,
459            obj=node1, attribute=name)
460
461
462# arglist:  argument (',' argument)* [',']
463# argument: [test '='] test       # Really [keyword '='] test
464
465# since PEP 448:
466# argument: ( test [comp_for] |
467#             test '=' test |
468#             '**' expr |
469#             star_expr )
470
471def p_call_parse_args(s, allow_genexp=True):
472    # s.sy == '('
473    pos = s.position()
474    s.next()
475    positional_args = []
476    keyword_args = []
477    starstar_seen = False
478    last_was_tuple_unpack = False
479    while s.sy != ')':
480        if s.sy == '*':
481            if starstar_seen:
482                s.error("Non-keyword arg following keyword arg", pos=s.position())
483            s.next()
484            positional_args.append(p_test(s))
485            last_was_tuple_unpack = True
486        elif s.sy == '**':
487            s.next()
488            keyword_args.append(p_test(s))
489            starstar_seen = True
490        else:
491            arg = p_test(s)
492            if s.sy == '=':
493                s.next()
494                if not arg.is_name:
495                    s.error("Expected an identifier before '='",
496                            pos=arg.pos)
497                encoded_name = s.context.intern_ustring(arg.name)
498                keyword = ExprNodes.IdentifierStringNode(
499                    arg.pos, value=encoded_name)
500                arg = p_test(s)
501                keyword_args.append((keyword, arg))
502            else:
503                if keyword_args:
504                    s.error("Non-keyword arg following keyword arg", pos=arg.pos)
505                if positional_args and not last_was_tuple_unpack:
506                    positional_args[-1].append(arg)
507                else:
508                    positional_args.append([arg])
509                last_was_tuple_unpack = False
510        if s.sy != ',':
511            break
512        s.next()
513
514    if s.sy in ('for', 'async'):
515        if not keyword_args and not last_was_tuple_unpack:
516            if len(positional_args) == 1 and len(positional_args[0]) == 1:
517                positional_args = [[p_genexp(s, positional_args[0][0])]]
518    s.expect(')')
519    return positional_args or [[]], keyword_args
520
521
522def p_call_build_packed_args(pos, positional_args, keyword_args):
523    keyword_dict = None
524
525    subtuples = [
526        ExprNodes.TupleNode(pos, args=arg) if isinstance(arg, list) else ExprNodes.AsTupleNode(pos, arg=arg)
527        for arg in positional_args
528    ]
529    # TODO: implement a faster way to join tuples than creating each one and adding them
530    arg_tuple = reduce(partial(ExprNodes.binop_node, pos, '+'), subtuples)
531
532    if keyword_args:
533        kwargs = []
534        dict_items = []
535        for item in keyword_args:
536            if isinstance(item, tuple):
537                key, value = item
538                dict_items.append(ExprNodes.DictItemNode(pos=key.pos, key=key, value=value))
539            elif item.is_dict_literal:
540                # unpack "**{a:b}" directly
541                dict_items.extend(item.key_value_pairs)
542            else:
543                if dict_items:
544                    kwargs.append(ExprNodes.DictNode(
545                        dict_items[0].pos, key_value_pairs=dict_items, reject_duplicates=True))
546                    dict_items = []
547                kwargs.append(item)
548
549        if dict_items:
550            kwargs.append(ExprNodes.DictNode(
551                dict_items[0].pos, key_value_pairs=dict_items, reject_duplicates=True))
552
553        if kwargs:
554            if len(kwargs) == 1 and kwargs[0].is_dict_literal:
555                # only simple keyword arguments found -> one dict
556                keyword_dict = kwargs[0]
557            else:
558                # at least one **kwargs
559                keyword_dict = ExprNodes.MergedDictNode(pos, keyword_args=kwargs)
560
561    return arg_tuple, keyword_dict
562
563
564def p_call(s, function):
565    # s.sy == '('
566    pos = s.position()
567    positional_args, keyword_args = p_call_parse_args(s)
568
569    if not keyword_args and len(positional_args) == 1 and isinstance(positional_args[0], list):
570        return ExprNodes.SimpleCallNode(pos, function=function, args=positional_args[0])
571    else:
572        arg_tuple, keyword_dict = p_call_build_packed_args(pos, positional_args, keyword_args)
573        return ExprNodes.GeneralCallNode(
574            pos, function=function, positional_args=arg_tuple, keyword_args=keyword_dict)
575
576
577#lambdef: 'lambda' [varargslist] ':' test
578
579#subscriptlist: subscript (',' subscript)* [',']
580
581def p_index(s, base):
582    # s.sy == '['
583    pos = s.position()
584    s.next()
585    subscripts, is_single_value = p_subscript_list(s)
586    if is_single_value and len(subscripts[0]) == 2:
587        start, stop = subscripts[0]
588        result = ExprNodes.SliceIndexNode(pos,
589            base = base, start = start, stop = stop)
590    else:
591        indexes = make_slice_nodes(pos, subscripts)
592        if is_single_value:
593            index = indexes[0]
594        else:
595            index = ExprNodes.TupleNode(pos, args = indexes)
596        result = ExprNodes.IndexNode(pos,
597            base = base, index = index)
598    s.expect(']')
599    return result
600
601def p_subscript_list(s):
602    is_single_value = True
603    items = [p_subscript(s)]
604    while s.sy == ',':
605        is_single_value = False
606        s.next()
607        if s.sy == ']':
608            break
609        items.append(p_subscript(s))
610    return items, is_single_value
611
612#subscript: '.' '.' '.' | test | [test] ':' [test] [':' [test]]
613
614def p_subscript(s):
615    # Parse a subscript and return a list of
616    # 1, 2 or 3 ExprNodes, depending on how
617    # many slice elements were encountered.
618    pos = s.position()
619    start = p_slice_element(s, (':',))
620    if s.sy != ':':
621        return [start]
622    s.next()
623    stop = p_slice_element(s, (':', ',', ']'))
624    if s.sy != ':':
625        return [start, stop]
626    s.next()
627    step = p_slice_element(s, (':', ',', ']'))
628    return [start, stop, step]
629
630def p_slice_element(s, follow_set):
631    # Simple expression which may be missing iff
632    # it is followed by something in follow_set.
633    if s.sy not in follow_set:
634        return p_test(s)
635    else:
636        return None
637
638def expect_ellipsis(s):
639    s.expect('.')
640    s.expect('.')
641    s.expect('.')
642
643def make_slice_nodes(pos, subscripts):
644    # Convert a list of subscripts as returned
645    # by p_subscript_list into a list of ExprNodes,
646    # creating SliceNodes for elements with 2 or
647    # more components.
648    result = []
649    for subscript in subscripts:
650        if len(subscript) == 1:
651            result.append(subscript[0])
652        else:
653            result.append(make_slice_node(pos, *subscript))
654    return result
655
656def make_slice_node(pos, start, stop = None, step = None):
657    if not start:
658        start = ExprNodes.NoneNode(pos)
659    if not stop:
660        stop = ExprNodes.NoneNode(pos)
661    if not step:
662        step = ExprNodes.NoneNode(pos)
663    return ExprNodes.SliceNode(pos,
664        start = start, stop = stop, step = step)
665
666#atom: '(' [yield_expr|testlist_comp] ')' | '[' [listmaker] ']' | '{' [dict_or_set_maker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
667
668def p_atom(s):
669    pos = s.position()
670    sy = s.sy
671    if sy == '(':
672        s.next()
673        if s.sy == ')':
674            result = ExprNodes.TupleNode(pos, args = [])
675        elif s.sy == 'yield':
676            result = p_yield_expression(s)
677        else:
678            result = p_testlist_comp(s)
679        s.expect(')')
680        return result
681    elif sy == '[':
682        return p_list_maker(s)
683    elif sy == '{':
684        return p_dict_or_set_maker(s)
685    elif sy == '`':
686        return p_backquote_expr(s)
687    elif sy == '.':
688        expect_ellipsis(s)
689        return ExprNodes.EllipsisNode(pos)
690    elif sy == 'INT':
691        return p_int_literal(s)
692    elif sy == 'FLOAT':
693        value = s.systring
694        s.next()
695        return ExprNodes.FloatNode(pos, value = value)
696    elif sy == 'IMAG':
697        value = s.systring[:-1]
698        s.next()
699        return ExprNodes.ImagNode(pos, value = value)
700    elif sy == 'BEGIN_STRING':
701        kind, bytes_value, unicode_value = p_cat_string_literal(s)
702        if kind == 'c':
703            return ExprNodes.CharNode(pos, value = bytes_value)
704        elif kind == 'u':
705            return ExprNodes.UnicodeNode(pos, value = unicode_value, bytes_value = bytes_value)
706        elif kind == 'b':
707            return ExprNodes.BytesNode(pos, value = bytes_value)
708        elif kind == 'f':
709            return ExprNodes.JoinedStrNode(pos, values = unicode_value)
710        elif kind == '':
711            return ExprNodes.StringNode(pos, value = bytes_value, unicode_value = unicode_value)
712        else:
713            s.error("invalid string kind '%s'" % kind)
714    elif sy == 'IDENT':
715        name = s.systring
716        if name == "None":
717            result = ExprNodes.NoneNode(pos)
718        elif name == "True":
719            result = ExprNodes.BoolNode(pos, value=True)
720        elif name == "False":
721            result = ExprNodes.BoolNode(pos, value=False)
722        elif name == "NULL" and not s.in_python_file:
723            result = ExprNodes.NullNode(pos)
724        else:
725            result = p_name(s, name)
726        s.next()
727        return result
728    else:
729        s.error("Expected an identifier or literal")
730
731def p_int_literal(s):
732    pos = s.position()
733    value = s.systring
734    s.next()
735    unsigned = ""
736    longness = ""
737    while value[-1] in u"UuLl":
738        if value[-1] in u"Ll":
739            longness += "L"
740        else:
741            unsigned += "U"
742        value = value[:-1]
743    # '3L' is ambiguous in Py2 but not in Py3.  '3U' and '3LL' are
744    # illegal in Py2 Python files.  All suffixes are illegal in Py3
745    # Python files.
746    is_c_literal = None
747    if unsigned:
748        is_c_literal = True
749    elif longness:
750        if longness == 'LL' or s.context.language_level >= 3:
751            is_c_literal = True
752    if s.in_python_file:
753        if is_c_literal:
754            error(pos, "illegal integer literal syntax in Python source file")
755        is_c_literal = False
756    return ExprNodes.IntNode(pos,
757                             is_c_literal = is_c_literal,
758                             value = value,
759                             unsigned = unsigned,
760                             longness = longness)
761
762
763def p_name(s, name):
764    pos = s.position()
765    if not s.compile_time_expr and name in s.compile_time_env:
766        value = s.compile_time_env.lookup_here(name)
767        node = wrap_compile_time_constant(pos, value)
768        if node is not None:
769            return node
770    return ExprNodes.NameNode(pos, name=name)
771
772
773def wrap_compile_time_constant(pos, value):
774    rep = repr(value)
775    if value is None:
776        return ExprNodes.NoneNode(pos)
777    elif value is Ellipsis:
778        return ExprNodes.EllipsisNode(pos)
779    elif isinstance(value, bool):
780        return ExprNodes.BoolNode(pos, value=value)
781    elif isinstance(value, int):
782        return ExprNodes.IntNode(pos, value=rep, constant_result=value)
783    elif isinstance(value, float):
784        return ExprNodes.FloatNode(pos, value=rep, constant_result=value)
785    elif isinstance(value, complex):
786        node = ExprNodes.ImagNode(pos, value=repr(value.imag), constant_result=complex(0.0, value.imag))
787        if value.real:
788            # FIXME: should we care about -0.0 ?
789            # probably not worth using the '-' operator for negative imag values
790            node = ExprNodes.binop_node(
791                pos, '+', ExprNodes.FloatNode(pos, value=repr(value.real), constant_result=value.real), node,
792                constant_result=value)
793        return node
794    elif isinstance(value, _unicode):
795        return ExprNodes.UnicodeNode(pos, value=EncodedString(value))
796    elif isinstance(value, _bytes):
797        bvalue = bytes_literal(value, 'ascii')  # actually: unknown encoding, but BytesLiteral requires one
798        return ExprNodes.BytesNode(pos, value=bvalue, constant_result=value)
799    elif isinstance(value, tuple):
800        args = [wrap_compile_time_constant(pos, arg)
801                for arg in value]
802        if None not in args:
803            return ExprNodes.TupleNode(pos, args=args)
804        else:
805            # error already reported
806            return None
807    elif not _IS_PY3 and isinstance(value, long):
808        return ExprNodes.IntNode(pos, value=rep.rstrip('L'), constant_result=value)
809    error(pos, "Invalid type for compile-time constant: %r (type %s)"
810               % (value, value.__class__.__name__))
811    return None
812
813
814def p_cat_string_literal(s):
815    # A sequence of one or more adjacent string literals.
816    # Returns (kind, bytes_value, unicode_value)
817    # where kind in ('b', 'c', 'u', 'f', '')
818    pos = s.position()
819    kind, bytes_value, unicode_value = p_string_literal(s)
820    if kind == 'c' or s.sy != 'BEGIN_STRING':
821        return kind, bytes_value, unicode_value
822    bstrings, ustrings, positions = [bytes_value], [unicode_value], [pos]
823    bytes_value = unicode_value = None
824    while s.sy == 'BEGIN_STRING':
825        pos = s.position()
826        next_kind, next_bytes_value, next_unicode_value = p_string_literal(s)
827        if next_kind == 'c':
828            error(pos, "Cannot concatenate char literal with another string or char literal")
829            continue
830        elif next_kind != kind:
831            # concatenating f strings and normal strings is allowed and leads to an f string
832            if {kind, next_kind} in ({'f', 'u'}, {'f', ''}):
833                kind = 'f'
834            else:
835                error(pos, "Cannot mix string literals of different types, expected %s'', got %s''" % (
836                    kind, next_kind))
837                continue
838        bstrings.append(next_bytes_value)
839        ustrings.append(next_unicode_value)
840        positions.append(pos)
841    # join and rewrap the partial literals
842    if kind in ('b', 'c', '') or kind == 'u' and None not in bstrings:
843        # Py3 enforced unicode literals are parsed as bytes/unicode combination
844        bytes_value = bytes_literal(StringEncoding.join_bytes(bstrings), s.source_encoding)
845    if kind in ('u', ''):
846        unicode_value = EncodedString(u''.join([u for u in ustrings if u is not None]))
847    if kind == 'f':
848        unicode_value = []
849        for u, pos in zip(ustrings, positions):
850            if isinstance(u, list):
851                unicode_value += u
852            else:
853                # non-f-string concatenated into the f-string
854                unicode_value.append(ExprNodes.UnicodeNode(pos, value=EncodedString(u)))
855    return kind, bytes_value, unicode_value
856
857
858def p_opt_string_literal(s, required_type='u'):
859    if s.sy != 'BEGIN_STRING':
860        return None
861    pos = s.position()
862    kind, bytes_value, unicode_value = p_string_literal(s, required_type)
863    if required_type == 'u':
864        if kind == 'f':
865            s.error("f-string not allowed here", pos)
866        return unicode_value
867    elif required_type == 'b':
868        return bytes_value
869    else:
870        s.error("internal parser configuration error")
871
872
873def check_for_non_ascii_characters(string):
874    for c in string:
875        if c >= u'\x80':
876            return True
877    return False
878
879
880def p_string_literal(s, kind_override=None):
881    # A single string or char literal.  Returns (kind, bvalue, uvalue)
882    # where kind in ('b', 'c', 'u', 'f', '').  The 'bvalue' is the source
883    # code byte sequence of the string literal, 'uvalue' is the
884    # decoded Unicode string.  Either of the two may be None depending
885    # on the 'kind' of string, only unprefixed strings have both
886    # representations. In f-strings, the uvalue is a list of the Unicode
887    # strings and f-string expressions that make up the f-string.
888
889    # s.sy == 'BEGIN_STRING'
890    pos = s.position()
891    is_python3_source = s.context.language_level >= 3
892    has_non_ascii_literal_characters = False
893    string_start_pos = (pos[0], pos[1], pos[2] + len(s.systring))
894    kind_string = s.systring.rstrip('"\'').lower()
895    if len(kind_string) > 1:
896        if len(set(kind_string)) != len(kind_string):
897            error(pos, 'Duplicate string prefix character')
898        if 'b' in kind_string and 'u' in kind_string:
899            error(pos, 'String prefixes b and u cannot be combined')
900        if 'b' in kind_string and 'f' in kind_string:
901            error(pos, 'String prefixes b and f cannot be combined')
902        if 'u' in kind_string and 'f' in kind_string:
903            error(pos, 'String prefixes u and f cannot be combined')
904
905    is_raw = 'r' in kind_string
906
907    if 'c' in kind_string:
908        # this should never happen, since the lexer does not allow combining c
909        # with other prefix characters
910        if len(kind_string) != 1:
911            error(pos, 'Invalid string prefix for character literal')
912        kind = 'c'
913    elif 'f' in kind_string:
914        kind = 'f'     # u is ignored
915        is_raw = True  # postpone the escape resolution
916    elif 'b' in kind_string:
917        kind = 'b'
918    elif 'u' in kind_string:
919        kind = 'u'
920    else:
921        kind = ''
922
923    if kind == '' and kind_override is None and Future.unicode_literals in s.context.future_directives:
924        chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
925        kind = 'u'
926    else:
927        if kind_override is not None and kind_override in 'ub':
928            kind = kind_override
929        if kind in ('u', 'f'):  # f-strings are scanned exactly like Unicode literals, but are parsed further later
930            chars = StringEncoding.UnicodeLiteralBuilder()
931        elif kind == '':
932            chars = StringEncoding.StrLiteralBuilder(s.source_encoding)
933        else:
934            chars = StringEncoding.BytesLiteralBuilder(s.source_encoding)
935
936    while 1:
937        s.next()
938        sy = s.sy
939        systr = s.systring
940        # print "p_string_literal: sy =", sy, repr(s.systring) ###
941        if sy == 'CHARS':
942            chars.append(systr)
943            if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
944                has_non_ascii_literal_characters = True
945        elif sy == 'ESCAPE':
946            # in Py2, 'ur' raw unicode strings resolve unicode escapes but nothing else
947            if is_raw and (is_python3_source or kind != 'u' or systr[1] not in u'Uu'):
948                chars.append(systr)
949                if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
950                    has_non_ascii_literal_characters = True
951            else:
952                _append_escape_sequence(kind, chars, systr, s)
953        elif sy == 'NEWLINE':
954            chars.append(u'\n')
955        elif sy == 'END_STRING':
956            break
957        elif sy == 'EOF':
958            s.error("Unclosed string literal", pos=pos)
959        else:
960            s.error("Unexpected token %r:%r in string literal" % (
961                sy, s.systring))
962
963    if kind == 'c':
964        unicode_value = None
965        bytes_value = chars.getchar()
966        if len(bytes_value) != 1:
967            error(pos, u"invalid character literal: %r" % bytes_value)
968    else:
969        bytes_value, unicode_value = chars.getstrings()
970        if (has_non_ascii_literal_characters
971                and is_python3_source and Future.unicode_literals in s.context.future_directives):
972            # Python 3 forbids literal non-ASCII characters in byte strings
973            if kind == 'b':
974                s.error("bytes can only contain ASCII literal characters.", pos=pos)
975            bytes_value = None
976    if kind == 'f':
977        unicode_value = p_f_string(s, unicode_value, string_start_pos, is_raw='r' in kind_string)
978    s.next()
979    return (kind, bytes_value, unicode_value)
980
981
982def _append_escape_sequence(kind, builder, escape_sequence, s):
983    c = escape_sequence[1]
984    if c in u"01234567":
985        builder.append_charval(int(escape_sequence[1:], 8))
986    elif c in u"'\"\\":
987        builder.append(c)
988    elif c in u"abfnrtv":
989        builder.append(StringEncoding.char_from_escape_sequence(escape_sequence))
990    elif c == u'\n':
991        pass  # line continuation
992    elif c == u'x':  # \xXX
993        if len(escape_sequence) == 4:
994            builder.append_charval(int(escape_sequence[2:], 16))
995        else:
996            s.error("Invalid hex escape '%s'" % escape_sequence, fatal=False)
997    elif c in u'NUu' and kind in ('u', 'f', ''):  # \uxxxx, \Uxxxxxxxx, \N{...}
998        chrval = -1
999        if c == u'N':
1000            uchar = None
1001            try:
1002                uchar = lookup_unicodechar(escape_sequence[3:-1])
1003                chrval = ord(uchar)
1004            except KeyError:
1005                s.error("Unknown Unicode character name %s" %
1006                        repr(escape_sequence[3:-1]).lstrip('u'), fatal=False)
1007            except TypeError:
1008                # 2-byte unicode build of CPython?
1009                if (uchar is not None and _IS_2BYTE_UNICODE and len(uchar) == 2 and
1010                        unicode_category(uchar[0]) == 'Cs' and unicode_category(uchar[1]) == 'Cs'):
1011                    # surrogate pair instead of single character
1012                    chrval = 0x10000 + (ord(uchar[0]) - 0xd800) >> 10 + (ord(uchar[1]) - 0xdc00)
1013                else:
1014                    raise
1015        elif len(escape_sequence) in (6, 10):
1016            chrval = int(escape_sequence[2:], 16)
1017            if chrval > 1114111:  # sys.maxunicode:
1018                s.error("Invalid unicode escape '%s'" % escape_sequence)
1019                chrval = -1
1020        else:
1021            s.error("Invalid unicode escape '%s'" % escape_sequence, fatal=False)
1022        if chrval >= 0:
1023            builder.append_uescape(chrval, escape_sequence)
1024    else:
1025        builder.append(escape_sequence)
1026
1027
1028_parse_escape_sequences_raw, _parse_escape_sequences = [re.compile((
1029    # escape sequences:
1030    br'(\\(?:' +
1031    (br'\\?' if is_raw else (
1032        br'[\\abfnrtv"\'{]|'
1033        br'[0-7]{2,3}|'
1034        br'N\{[^}]*\}|'
1035        br'x[0-9a-fA-F]{2}|'
1036        br'u[0-9a-fA-F]{4}|'
1037        br'U[0-9a-fA-F]{8}|'
1038        br'[NxuU]|'  # detect invalid escape sequences that do not match above
1039    )) +
1040    br')?|'
1041    # non-escape sequences:
1042    br'\{\{?|'
1043    br'\}\}?|'
1044    br'[^\\{}]+)'
1045    ).decode('us-ascii')).match
1046    for is_raw in (True, False)]
1047
1048
1049def _f_string_error_pos(pos, string, i):
1050    return (pos[0], pos[1], pos[2] + i + 1)  # FIXME: handle newlines in string
1051
1052
1053def p_f_string(s, unicode_value, pos, is_raw):
1054    # Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
1055    # or FormattedValueNodes.
1056    values = []
1057    next_start = 0
1058    size = len(unicode_value)
1059    builder = StringEncoding.UnicodeLiteralBuilder()
1060    _parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
1061
1062    while next_start < size:
1063        end = next_start
1064        match = _parse_seq(unicode_value, next_start)
1065        if match is None:
1066            error(_f_string_error_pos(pos, unicode_value, next_start), "Invalid escape sequence")
1067
1068        next_start = match.end()
1069        part = match.group()
1070        c = part[0]
1071        if c == '\\':
1072            if not is_raw and len(part) > 1:
1073                _append_escape_sequence('f', builder, part, s)
1074            else:
1075                builder.append(part)
1076        elif c == '{':
1077            if part == '{{':
1078                builder.append('{')
1079            else:
1080                # start of an expression
1081                if builder.chars:
1082                    values.append(ExprNodes.UnicodeNode(pos, value=builder.getstring()))
1083                    builder = StringEncoding.UnicodeLiteralBuilder()
1084                next_start, expr_nodes = p_f_string_expr(s, unicode_value, pos, next_start, is_raw)
1085                values.extend(expr_nodes)
1086        elif c == '}':
1087            if part == '}}':
1088                builder.append('}')
1089            else:
1090                error(_f_string_error_pos(pos, unicode_value, end),
1091                      "f-string: single '}' is not allowed")
1092        else:
1093            builder.append(part)
1094
1095    if builder.chars:
1096        values.append(ExprNodes.UnicodeNode(pos, value=builder.getstring()))
1097    return values
1098
1099
1100def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
1101    # Parses a {}-delimited expression inside an f-string. Returns a list of nodes
1102    # [UnicodeNode?, FormattedValueNode] and the index in the string that follows
1103    # the expression.
1104    #
1105    # ? = Optional
1106    i = starting_index
1107    size = len(unicode_value)
1108    conversion_char = terminal_char = format_spec = None
1109    format_spec_str = None
1110    expr_text = None
1111    NO_CHAR = 2**30
1112
1113    nested_depth = 0
1114    quote_char = NO_CHAR
1115    in_triple_quotes = False
1116    backslash_reported = False
1117
1118    while True:
1119        if i >= size:
1120            break  # error will be reported below
1121        c = unicode_value[i]
1122
1123        if quote_char != NO_CHAR:
1124            if c == '\\':
1125                # avoid redundant error reports along '\' sequences
1126                if not backslash_reported:
1127                    error(_f_string_error_pos(pos, unicode_value, i),
1128                          "backslashes not allowed in f-strings")
1129                backslash_reported = True
1130            elif c == quote_char:
1131                if in_triple_quotes:
1132                    if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
1133                        in_triple_quotes = False
1134                        quote_char = NO_CHAR
1135                        i += 2
1136                else:
1137                    quote_char = NO_CHAR
1138        elif c in '\'"':
1139            quote_char = c
1140            if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
1141                in_triple_quotes = True
1142                i += 2
1143        elif c in '{[(':
1144            nested_depth += 1
1145        elif nested_depth != 0 and c in '}])':
1146            nested_depth -= 1
1147        elif c == '#':
1148            error(_f_string_error_pos(pos, unicode_value, i),
1149                  "format string cannot include #")
1150        elif nested_depth == 0 and c in '><=!:}':
1151            # allow special cases with '!' and '='
1152            if i + 1 < size and c in '!=><':
1153                if unicode_value[i + 1] == '=':
1154                    i += 2  # we checked 2, so we can skip 2: '!=', '==', '>=', '<='
1155                    continue
1156                elif c in '><':  # allow single '<' and '>'
1157                    i += 1
1158                    continue
1159            terminal_char = c
1160            break
1161        i += 1
1162
1163    # normalise line endings as the parser expects that
1164    expr_str = unicode_value[starting_index:i].replace('\r\n', '\n').replace('\r', '\n')
1165    expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2)  # TODO: find exact code position (concat, multi-line, ...)
1166
1167    if not expr_str.strip():
1168        error(_f_string_error_pos(pos, unicode_value, starting_index),
1169              "empty expression not allowed in f-string")
1170
1171    if terminal_char == '=':
1172        i += 1
1173        while i < size and unicode_value[i].isspace():
1174            i += 1
1175
1176        if i < size:
1177            terminal_char = unicode_value[i]
1178            expr_text = unicode_value[starting_index:i]
1179        # otherwise: error will be reported below
1180
1181    if terminal_char == '!':
1182        i += 1
1183        if i + 2 > size:
1184            pass  # error will be reported below
1185        else:
1186            conversion_char = unicode_value[i]
1187            i += 1
1188            terminal_char = unicode_value[i]
1189
1190    if terminal_char == ':':
1191        in_triple_quotes = False
1192        in_string = False
1193        nested_depth = 0
1194        start_format_spec = i + 1
1195        while True:
1196            if i >= size:
1197                break  # error will be reported below
1198            c = unicode_value[i]
1199            if not in_triple_quotes and not in_string:
1200                if c == '{':
1201                    nested_depth += 1
1202                elif c == '}':
1203                    if nested_depth > 0:
1204                        nested_depth -= 1
1205                    else:
1206                        terminal_char = c
1207                        break
1208            if c in '\'"':
1209                if not in_string and i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
1210                    in_triple_quotes = not in_triple_quotes
1211                    i += 2
1212                elif not in_triple_quotes:
1213                    in_string = not in_string
1214            i += 1
1215
1216        format_spec_str = unicode_value[start_format_spec:i]
1217
1218    if expr_text and conversion_char is None and format_spec_str is None:
1219        conversion_char = 'r'
1220
1221    if terminal_char != '}':
1222        error(_f_string_error_pos(pos, unicode_value, i),
1223              "missing '}' in format string expression" + (
1224                  ", found '%s'" % terminal_char if terminal_char else ""))
1225
1226    # parse the expression as if it was surrounded by parentheses
1227    buf = StringIO('(%s)' % expr_str)
1228    scanner = PyrexScanner(buf, expr_pos[0], parent_scanner=s, source_encoding=s.source_encoding, initial_pos=expr_pos)
1229    expr = p_testlist(scanner)  # TODO is testlist right here?
1230
1231    # validate the conversion char
1232    if conversion_char is not None and not ExprNodes.FormattedValueNode.find_conversion_func(conversion_char):
1233        error(expr_pos, "invalid conversion character '%s'" % conversion_char)
1234
1235    # the format spec is itself treated like an f-string
1236    if format_spec_str:
1237        format_spec = ExprNodes.JoinedStrNode(pos, values=p_f_string(s, format_spec_str, pos, is_raw))
1238
1239    nodes = []
1240    if expr_text:
1241        nodes.append(ExprNodes.UnicodeNode(pos, value=StringEncoding.EncodedString(expr_text)))
1242    nodes.append(ExprNodes.FormattedValueNode(pos, value=expr, conversion_char=conversion_char, format_spec=format_spec))
1243
1244    return i + 1, nodes
1245
1246
1247# since PEP 448:
1248# list_display  ::=     "[" [listmaker] "]"
1249# listmaker     ::=     (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
1250# comp_iter     ::=     comp_for | comp_if
1251# comp_for      ::=     ["async"] "for" expression_list "in" testlist [comp_iter]
1252# comp_if       ::=     "if" test [comp_iter]
1253
1254def p_list_maker(s):
1255    # s.sy == '['
1256    pos = s.position()
1257    s.next()
1258    if s.sy == ']':
1259        s.expect(']')
1260        return ExprNodes.ListNode(pos, args=[])
1261
1262    expr = p_test_or_starred_expr(s)
1263    if s.sy in ('for', 'async'):
1264        if expr.is_starred:
1265            s.error("iterable unpacking cannot be used in comprehension")
1266        append = ExprNodes.ComprehensionAppendNode(pos, expr=expr)
1267        loop = p_comp_for(s, append)
1268        s.expect(']')
1269        return ExprNodes.ComprehensionNode(
1270            pos, loop=loop, append=append, type=Builtin.list_type,
1271            # list comprehensions leak their loop variable in Py2
1272            has_local_scope=s.context.language_level >= 3)
1273
1274    # (merged) list literal
1275    if s.sy == ',':
1276        s.next()
1277        exprs = p_test_or_starred_expr_list(s, expr)
1278    else:
1279        exprs = [expr]
1280    s.expect(']')
1281    return ExprNodes.ListNode(pos, args=exprs)
1282
1283
1284def p_comp_iter(s, body):
1285    if s.sy in ('for', 'async'):
1286        return p_comp_for(s, body)
1287    elif s.sy == 'if':
1288        return p_comp_if(s, body)
1289    else:
1290        # insert the 'append' operation into the loop
1291        return body
1292
1293def p_comp_for(s, body):
1294    pos = s.position()
1295    # [async] for ...
1296    is_async = False
1297    if s.sy == 'async':
1298        is_async = True
1299        s.next()
1300
1301    # s.sy == 'for'
1302    s.expect('for')
1303    kw = p_for_bounds(s, allow_testlist=False, is_async=is_async)
1304    kw.update(else_clause=None, body=p_comp_iter(s, body), is_async=is_async)
1305    return Nodes.ForStatNode(pos, **kw)
1306
1307def p_comp_if(s, body):
1308    # s.sy == 'if'
1309    pos = s.position()
1310    s.next()
1311    test = p_test_nocond(s)
1312    return Nodes.IfStatNode(pos,
1313        if_clauses = [Nodes.IfClauseNode(pos, condition = test,
1314                                         body = p_comp_iter(s, body))],
1315        else_clause = None )
1316
1317
1318# since PEP 448:
1319#dictorsetmaker: ( ((test ':' test | '**' expr)
1320#                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
1321#                  ((test | star_expr)
1322#                   (comp_for | (',' (test | star_expr))* [','])) )
1323
1324def p_dict_or_set_maker(s):
1325    # s.sy == '{'
1326    pos = s.position()
1327    s.next()
1328    if s.sy == '}':
1329        s.next()
1330        return ExprNodes.DictNode(pos, key_value_pairs=[])
1331
1332    parts = []
1333    target_type = 0
1334    last_was_simple_item = False
1335    while True:
1336        if s.sy in ('*', '**'):
1337            # merged set/dict literal
1338            if target_type == 0:
1339                target_type = 1 if s.sy == '*' else 2  # 'stars'
1340            elif target_type != len(s.sy):
1341                s.error("unexpected %sitem found in %s literal" % (
1342                    s.sy, 'set' if target_type == 1 else 'dict'))
1343            s.next()
1344            if s.sy == '*':
1345                s.error("expected expression, found '*'")
1346            item = p_starred_expr(s)
1347            parts.append(item)
1348            last_was_simple_item = False
1349        else:
1350            item = p_test(s)
1351            if target_type == 0:
1352                target_type = 2 if s.sy == ':' else 1  # dict vs. set
1353            if target_type == 2:
1354                # dict literal
1355                s.expect(':')
1356                key = item
1357                value = p_test(s)
1358                item = ExprNodes.DictItemNode(key.pos, key=key, value=value)
1359            if last_was_simple_item:
1360                parts[-1].append(item)
1361            else:
1362                parts.append([item])
1363                last_was_simple_item = True
1364
1365        if s.sy == ',':
1366            s.next()
1367            if s.sy == '}':
1368                break
1369        else:
1370            break
1371
1372    if s.sy in ('for', 'async'):
1373        # dict/set comprehension
1374        if len(parts) == 1 and isinstance(parts[0], list) and len(parts[0]) == 1:
1375            item = parts[0][0]
1376            if target_type == 2:
1377                assert isinstance(item, ExprNodes.DictItemNode), type(item)
1378                comprehension_type = Builtin.dict_type
1379                append = ExprNodes.DictComprehensionAppendNode(
1380                    item.pos, key_expr=item.key, value_expr=item.value)
1381            else:
1382                comprehension_type = Builtin.set_type
1383                append = ExprNodes.ComprehensionAppendNode(item.pos, expr=item)
1384            loop = p_comp_for(s, append)
1385            s.expect('}')
1386            return ExprNodes.ComprehensionNode(pos, loop=loop, append=append, type=comprehension_type)
1387        else:
1388            # syntax error, try to find a good error message
1389            if len(parts) == 1 and not isinstance(parts[0], list):
1390                s.error("iterable unpacking cannot be used in comprehension")
1391            else:
1392                # e.g. "{1,2,3 for ..."
1393                s.expect('}')
1394            return ExprNodes.DictNode(pos, key_value_pairs=[])
1395
1396    s.expect('}')
1397    if target_type == 1:
1398        # (merged) set literal
1399        items = []
1400        set_items = []
1401        for part in parts:
1402            if isinstance(part, list):
1403                set_items.extend(part)
1404            else:
1405                if set_items:
1406                    items.append(ExprNodes.SetNode(set_items[0].pos, args=set_items))
1407                    set_items = []
1408                items.append(part)
1409        if set_items:
1410            items.append(ExprNodes.SetNode(set_items[0].pos, args=set_items))
1411        if len(items) == 1 and items[0].is_set_literal:
1412            return items[0]
1413        return ExprNodes.MergedSequenceNode(pos, args=items, type=Builtin.set_type)
1414    else:
1415        # (merged) dict literal
1416        items = []
1417        dict_items = []
1418        for part in parts:
1419            if isinstance(part, list):
1420                dict_items.extend(part)
1421            else:
1422                if dict_items:
1423                    items.append(ExprNodes.DictNode(dict_items[0].pos, key_value_pairs=dict_items))
1424                    dict_items = []
1425                items.append(part)
1426        if dict_items:
1427            items.append(ExprNodes.DictNode(dict_items[0].pos, key_value_pairs=dict_items))
1428        if len(items) == 1 and items[0].is_dict_literal:
1429            return items[0]
1430        return ExprNodes.MergedDictNode(pos, keyword_args=items, reject_duplicates=False)
1431
1432
1433# NOTE: no longer in Py3 :)
1434def p_backquote_expr(s):
1435    # s.sy == '`'
1436    pos = s.position()
1437    s.next()
1438    args = [p_test(s)]
1439    while s.sy == ',':
1440        s.next()
1441        args.append(p_test(s))
1442    s.expect('`')
1443    if len(args) == 1:
1444        arg = args[0]
1445    else:
1446        arg = ExprNodes.TupleNode(pos, args = args)
1447    return ExprNodes.BackquoteNode(pos, arg = arg)
1448
1449def p_simple_expr_list(s, expr=None):
1450    exprs = expr is not None and [expr] or []
1451    while s.sy not in expr_terminators:
1452        exprs.append( p_test(s) )
1453        if s.sy != ',':
1454            break
1455        s.next()
1456    return exprs
1457
1458
1459def p_test_or_starred_expr_list(s, expr=None):
1460    exprs = expr is not None and [expr] or []
1461    while s.sy not in expr_terminators:
1462        exprs.append(p_test_or_starred_expr(s))
1463        if s.sy != ',':
1464            break
1465        s.next()
1466    return exprs
1467
1468
1469#testlist: test (',' test)* [',']
1470
1471def p_testlist(s):
1472    pos = s.position()
1473    expr = p_test(s)
1474    if s.sy == ',':
1475        s.next()
1476        exprs = p_simple_expr_list(s, expr)
1477        return ExprNodes.TupleNode(pos, args = exprs)
1478    else:
1479        return expr
1480
1481# testlist_star_expr: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
1482
1483def p_testlist_star_expr(s):
1484    pos = s.position()
1485    expr = p_test_or_starred_expr(s)
1486    if s.sy == ',':
1487        s.next()
1488        exprs = p_test_or_starred_expr_list(s, expr)
1489        return ExprNodes.TupleNode(pos, args = exprs)
1490    else:
1491        return expr
1492
1493# testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
1494
1495def p_testlist_comp(s):
1496    pos = s.position()
1497    expr = p_test_or_starred_expr(s)
1498    if s.sy == ',':
1499        s.next()
1500        exprs = p_test_or_starred_expr_list(s, expr)
1501        return ExprNodes.TupleNode(pos, args = exprs)
1502    elif s.sy in ('for', 'async'):
1503        return p_genexp(s, expr)
1504    else:
1505        return expr
1506
1507def p_genexp(s, expr):
1508    # s.sy == 'async' | 'for'
1509    loop = p_comp_for(s, Nodes.ExprStatNode(
1510        expr.pos, expr = ExprNodes.YieldExprNode(expr.pos, arg=expr)))
1511    return ExprNodes.GeneratorExpressionNode(expr.pos, loop=loop)
1512
1513expr_terminators = cython.declare(frozenset, frozenset((
1514    ')', ']', '}', ':', '=', 'NEWLINE')))
1515
1516
1517#-------------------------------------------------------
1518#
1519#   Statements
1520#
1521#-------------------------------------------------------
1522
1523def p_global_statement(s):
1524    # assume s.sy == 'global'
1525    pos = s.position()
1526    s.next()
1527    names = p_ident_list(s)
1528    return Nodes.GlobalNode(pos, names = names)
1529
1530
1531def p_nonlocal_statement(s):
1532    pos = s.position()
1533    s.next()
1534    names = p_ident_list(s)
1535    return Nodes.NonlocalNode(pos, names = names)
1536
1537
1538def p_expression_or_assignment(s):
1539    expr = p_testlist_star_expr(s)
1540    has_annotation = False
1541    if s.sy == ':' and (expr.is_name or expr.is_subscript or expr.is_attribute):
1542        has_annotation = True
1543        s.next()
1544        expr.annotation = p_annotation(s)
1545
1546    if s.sy == '=' and expr.is_starred:
1547        # This is a common enough error to make when learning Cython to let
1548        # it fail as early as possible and give a very clear error message.
1549        s.error("a starred assignment target must be in a list or tuple"
1550                " - maybe you meant to use an index assignment: var[0] = ...",
1551                pos=expr.pos)
1552
1553    expr_list = [expr]
1554    while s.sy == '=':
1555        s.next()
1556        if s.sy == 'yield':
1557            expr = p_yield_expression(s)
1558        else:
1559            expr = p_testlist_star_expr(s)
1560        expr_list.append(expr)
1561    if len(expr_list) == 1:
1562        if re.match(r"([-+*/%^&|]|<<|>>|\*\*|//|@)=", s.sy):
1563            lhs = expr_list[0]
1564            if isinstance(lhs, ExprNodes.SliceIndexNode):
1565                # implementation requires IndexNode
1566                lhs = ExprNodes.IndexNode(
1567                    lhs.pos,
1568                    base=lhs.base,
1569                    index=make_slice_node(lhs.pos, lhs.start, lhs.stop))
1570            elif not isinstance(lhs, (ExprNodes.AttributeNode, ExprNodes.IndexNode, ExprNodes.NameNode)):
1571                error(lhs.pos, "Illegal operand for inplace operation.")
1572            operator = s.sy[:-1]
1573            s.next()
1574            if s.sy == 'yield':
1575                rhs = p_yield_expression(s)
1576            else:
1577                rhs = p_testlist(s)
1578            return Nodes.InPlaceAssignmentNode(lhs.pos, operator=operator, lhs=lhs, rhs=rhs)
1579        expr = expr_list[0]
1580        return Nodes.ExprStatNode(expr.pos, expr=expr)
1581
1582    rhs = expr_list[-1]
1583    if len(expr_list) == 2:
1584        return Nodes.SingleAssignmentNode(rhs.pos, lhs=expr_list[0], rhs=rhs, first=has_annotation)
1585    else:
1586        return Nodes.CascadedAssignmentNode(rhs.pos, lhs_list=expr_list[:-1], rhs=rhs)
1587
1588
1589def p_print_statement(s):
1590    # s.sy == 'print'
1591    pos = s.position()
1592    ends_with_comma = 0
1593    s.next()
1594    if s.sy == '>>':
1595        s.next()
1596        stream = p_test(s)
1597        if s.sy == ',':
1598            s.next()
1599            ends_with_comma = s.sy in ('NEWLINE', 'EOF')
1600    else:
1601        stream = None
1602    args = []
1603    if s.sy not in ('NEWLINE', 'EOF'):
1604        args.append(p_test(s))
1605        while s.sy == ',':
1606            s.next()
1607            if s.sy in ('NEWLINE', 'EOF'):
1608                ends_with_comma = 1
1609                break
1610            args.append(p_test(s))
1611    arg_tuple = ExprNodes.TupleNode(pos, args=args)
1612    return Nodes.PrintStatNode(pos,
1613        arg_tuple=arg_tuple, stream=stream,
1614        append_newline=not ends_with_comma)
1615
1616
1617def p_exec_statement(s):
1618    # s.sy == 'exec'
1619    pos = s.position()
1620    s.next()
1621    code = p_bit_expr(s)
1622    if isinstance(code, ExprNodes.TupleNode):
1623        # Py3 compatibility syntax
1624        tuple_variant = True
1625        args = code.args
1626        if len(args) not in (2, 3):
1627            s.error("expected tuple of length 2 or 3, got length %d" % len(args),
1628                    pos=pos, fatal=False)
1629            args = [code]
1630    else:
1631        tuple_variant = False
1632        args = [code]
1633    if s.sy == 'in':
1634        if tuple_variant:
1635            s.error("tuple variant of exec does not support additional 'in' arguments",
1636                    fatal=False)
1637        s.next()
1638        args.append(p_test(s))
1639        if s.sy == ',':
1640            s.next()
1641            args.append(p_test(s))
1642    return Nodes.ExecStatNode(pos, args=args)
1643
1644def p_del_statement(s):
1645    # s.sy == 'del'
1646    pos = s.position()
1647    s.next()
1648    # FIXME: 'exprlist' in Python
1649    args = p_simple_expr_list(s)
1650    return Nodes.DelStatNode(pos, args = args)
1651
1652def p_pass_statement(s, with_newline = 0):
1653    pos = s.position()
1654    s.expect('pass')
1655    if with_newline:
1656        s.expect_newline("Expected a newline", ignore_semicolon=True)
1657    return Nodes.PassStatNode(pos)
1658
1659def p_break_statement(s):
1660    # s.sy == 'break'
1661    pos = s.position()
1662    s.next()
1663    return Nodes.BreakStatNode(pos)
1664
1665def p_continue_statement(s):
1666    # s.sy == 'continue'
1667    pos = s.position()
1668    s.next()
1669    return Nodes.ContinueStatNode(pos)
1670
1671def p_return_statement(s):
1672    # s.sy == 'return'
1673    pos = s.position()
1674    s.next()
1675    if s.sy not in statement_terminators:
1676        value = p_testlist(s)
1677    else:
1678        value = None
1679    return Nodes.ReturnStatNode(pos, value = value)
1680
1681def p_raise_statement(s):
1682    # s.sy == 'raise'
1683    pos = s.position()
1684    s.next()
1685    exc_type = None
1686    exc_value = None
1687    exc_tb = None
1688    cause = None
1689    if s.sy not in statement_terminators:
1690        exc_type = p_test(s)
1691        if s.sy == ',':
1692            s.next()
1693            exc_value = p_test(s)
1694            if s.sy == ',':
1695                s.next()
1696                exc_tb = p_test(s)
1697        elif s.sy == 'from':
1698            s.next()
1699            cause = p_test(s)
1700    if exc_type or exc_value or exc_tb:
1701        return Nodes.RaiseStatNode(pos,
1702            exc_type = exc_type,
1703            exc_value = exc_value,
1704            exc_tb = exc_tb,
1705            cause = cause)
1706    else:
1707        return Nodes.ReraiseStatNode(pos)
1708
1709
1710def p_import_statement(s):
1711    # s.sy in ('import', 'cimport')
1712    pos = s.position()
1713    kind = s.sy
1714    s.next()
1715    items = [p_dotted_name(s, as_allowed=1)]
1716    while s.sy == ',':
1717        s.next()
1718        items.append(p_dotted_name(s, as_allowed=1))
1719    stats = []
1720    is_absolute = Future.absolute_import in s.context.future_directives
1721    for pos, target_name, dotted_name, as_name in items:
1722        if kind == 'cimport':
1723            stat = Nodes.CImportStatNode(
1724                pos,
1725                module_name=dotted_name,
1726                as_name=as_name,
1727                is_absolute=is_absolute)
1728        else:
1729            stat = Nodes.SingleAssignmentNode(
1730                pos,
1731                lhs=ExprNodes.NameNode(pos, name=as_name or target_name),
1732                rhs=ExprNodes.ImportNode(
1733                    pos,
1734                    module_name=ExprNodes.IdentifierStringNode(pos, value=dotted_name),
1735                    level=0 if is_absolute else None,
1736                    get_top_level_module='.' in dotted_name and as_name is None,
1737                    name_list=None))
1738        stats.append(stat)
1739    return Nodes.StatListNode(pos, stats=stats)
1740
1741
1742def p_from_import_statement(s, first_statement = 0):
1743    # s.sy == 'from'
1744    pos = s.position()
1745    s.next()
1746    if s.sy == '.':
1747        # count relative import level
1748        level = 0
1749        while s.sy == '.':
1750            level += 1
1751            s.next()
1752    else:
1753        level = None
1754    if level is not None and s.sy in ('import', 'cimport'):
1755        # we are dealing with "from .. import foo, bar"
1756        dotted_name_pos, dotted_name = s.position(), s.context.intern_ustring('')
1757    else:
1758        if level is None and Future.absolute_import in s.context.future_directives:
1759            level = 0
1760        (dotted_name_pos, _, dotted_name, _) = p_dotted_name(s, as_allowed=False)
1761    if s.sy not in ('import', 'cimport'):
1762        s.error("Expected 'import' or 'cimport'")
1763    kind = s.sy
1764    s.next()
1765
1766    is_cimport = kind == 'cimport'
1767    is_parenthesized = False
1768    if s.sy == '*':
1769        imported_names = [(s.position(), s.context.intern_ustring("*"), None, None)]
1770        s.next()
1771    else:
1772        if s.sy == '(':
1773            is_parenthesized = True
1774            s.next()
1775        imported_names = [p_imported_name(s, is_cimport)]
1776    while s.sy == ',':
1777        s.next()
1778        if is_parenthesized and s.sy == ')':
1779            break
1780        imported_names.append(p_imported_name(s, is_cimport))
1781    if is_parenthesized:
1782        s.expect(')')
1783    if dotted_name == '__future__':
1784        if not first_statement:
1785            s.error("from __future__ imports must occur at the beginning of the file")
1786        elif level:
1787            s.error("invalid syntax")
1788        else:
1789            for (name_pos, name, as_name, kind) in imported_names:
1790                if name == "braces":
1791                    s.error("not a chance", name_pos)
1792                    break
1793                try:
1794                    directive = getattr(Future, name)
1795                except AttributeError:
1796                    s.error("future feature %s is not defined" % name, name_pos)
1797                    break
1798                s.context.future_directives.add(directive)
1799        return Nodes.PassStatNode(pos)
1800    elif kind == 'cimport':
1801        return Nodes.FromCImportStatNode(
1802            pos, module_name=dotted_name,
1803            relative_level=level,
1804            imported_names=imported_names)
1805    else:
1806        imported_name_strings = []
1807        items = []
1808        for (name_pos, name, as_name, kind) in imported_names:
1809            imported_name_strings.append(
1810                ExprNodes.IdentifierStringNode(name_pos, value=name))
1811            items.append(
1812                (name, ExprNodes.NameNode(name_pos, name=as_name or name)))
1813        import_list = ExprNodes.ListNode(
1814            imported_names[0][0], args=imported_name_strings)
1815        return Nodes.FromImportStatNode(pos,
1816            module = ExprNodes.ImportNode(dotted_name_pos,
1817                module_name = ExprNodes.IdentifierStringNode(pos, value = dotted_name),
1818                level = level,
1819                name_list = import_list),
1820            items = items)
1821
1822
1823imported_name_kinds = cython.declare(frozenset, frozenset((
1824    'class', 'struct', 'union')))
1825
1826def p_imported_name(s, is_cimport):
1827    pos = s.position()
1828    kind = None
1829    if is_cimport and s.systring in imported_name_kinds:
1830        kind = s.systring
1831        s.next()
1832    name = p_ident(s)
1833    as_name = p_as_name(s)
1834    return (pos, name, as_name, kind)
1835
1836
1837def p_dotted_name(s, as_allowed):
1838    pos = s.position()
1839    target_name = p_ident(s)
1840    as_name = None
1841    names = [target_name]
1842    while s.sy == '.':
1843        s.next()
1844        names.append(p_ident(s))
1845    if as_allowed:
1846        as_name = p_as_name(s)
1847    return (pos, target_name, s.context.intern_ustring(u'.'.join(names)), as_name)
1848
1849
1850def p_as_name(s):
1851    if s.sy == 'IDENT' and s.systring == 'as':
1852        s.next()
1853        return p_ident(s)
1854    else:
1855        return None
1856
1857
1858def p_assert_statement(s):
1859    # s.sy == 'assert'
1860    pos = s.position()
1861    s.next()
1862    cond = p_test(s)
1863    if s.sy == ',':
1864        s.next()
1865        value = p_test(s)
1866    else:
1867        value = None
1868    return Nodes.AssertStatNode(pos, condition=cond, value=value)
1869
1870
1871statement_terminators = cython.declare(frozenset, frozenset((
1872    ';', 'NEWLINE', 'EOF')))
1873
1874def p_if_statement(s):
1875    # s.sy == 'if'
1876    pos = s.position()
1877    s.next()
1878    if_clauses = [p_if_clause(s)]
1879    while s.sy == 'elif':
1880        s.next()
1881        if_clauses.append(p_if_clause(s))
1882    else_clause = p_else_clause(s)
1883    return Nodes.IfStatNode(pos,
1884        if_clauses = if_clauses, else_clause = else_clause)
1885
1886def p_if_clause(s):
1887    pos = s.position()
1888    test = p_test(s)
1889    body = p_suite(s)
1890    return Nodes.IfClauseNode(pos,
1891        condition = test, body = body)
1892
1893def p_else_clause(s):
1894    if s.sy == 'else':
1895        s.next()
1896        return p_suite(s)
1897    else:
1898        return None
1899
1900def p_while_statement(s):
1901    # s.sy == 'while'
1902    pos = s.position()
1903    s.next()
1904    test = p_test(s)
1905    body = p_suite(s)
1906    else_clause = p_else_clause(s)
1907    return Nodes.WhileStatNode(pos,
1908        condition = test, body = body,
1909        else_clause = else_clause)
1910
1911
1912def p_for_statement(s, is_async=False):
1913    # s.sy == 'for'
1914    pos = s.position()
1915    s.next()
1916    kw = p_for_bounds(s, allow_testlist=True, is_async=is_async)
1917    body = p_suite(s)
1918    else_clause = p_else_clause(s)
1919    kw.update(body=body, else_clause=else_clause, is_async=is_async)
1920    return Nodes.ForStatNode(pos, **kw)
1921
1922
1923def p_for_bounds(s, allow_testlist=True, is_async=False):
1924    target = p_for_target(s)
1925    if s.sy == 'in':
1926        s.next()
1927        iterator = p_for_iterator(s, allow_testlist, is_async=is_async)
1928        return dict(target=target, iterator=iterator)
1929    elif not s.in_python_file and not is_async:
1930        if s.sy == 'from':
1931            s.next()
1932            bound1 = p_bit_expr(s)
1933        else:
1934            # Support shorter "for a <= x < b" syntax
1935            bound1, target = target, None
1936        rel1 = p_for_from_relation(s)
1937        name2_pos = s.position()
1938        name2 = p_ident(s)
1939        rel2_pos = s.position()
1940        rel2 = p_for_from_relation(s)
1941        bound2 = p_bit_expr(s)
1942        step = p_for_from_step(s)
1943        if target is None:
1944            target = ExprNodes.NameNode(name2_pos, name = name2)
1945        else:
1946            if not target.is_name:
1947                error(target.pos,
1948                    "Target of for-from statement must be a variable name")
1949            elif name2 != target.name:
1950                error(name2_pos,
1951                    "Variable name in for-from range does not match target")
1952        if rel1[0] != rel2[0]:
1953            error(rel2_pos,
1954                "Relation directions in for-from do not match")
1955        return dict(target = target,
1956                    bound1 = bound1,
1957                    relation1 = rel1,
1958                    relation2 = rel2,
1959                    bound2 = bound2,
1960                    step = step,
1961                    )
1962    else:
1963        s.expect('in')
1964        return {}
1965
1966def p_for_from_relation(s):
1967    if s.sy in inequality_relations:
1968        op = s.sy
1969        s.next()
1970        return op
1971    else:
1972        s.error("Expected one of '<', '<=', '>' '>='")
1973
1974def p_for_from_step(s):
1975    if s.sy == 'IDENT' and s.systring == 'by':
1976        s.next()
1977        step = p_bit_expr(s)
1978        return step
1979    else:
1980        return None
1981
1982inequality_relations = cython.declare(frozenset, frozenset((
1983    '<', '<=', '>', '>=')))
1984
1985def p_target(s, terminator):
1986    pos = s.position()
1987    expr = p_starred_expr(s)
1988    if s.sy == ',':
1989        s.next()
1990        exprs = [expr]
1991        while s.sy != terminator:
1992            exprs.append(p_starred_expr(s))
1993            if s.sy != ',':
1994                break
1995            s.next()
1996        return ExprNodes.TupleNode(pos, args = exprs)
1997    else:
1998        return expr
1999
2000
2001def p_for_target(s):
2002    return p_target(s, 'in')
2003
2004
2005def p_for_iterator(s, allow_testlist=True, is_async=False):
2006    pos = s.position()
2007    if allow_testlist:
2008        expr = p_testlist(s)
2009    else:
2010        expr = p_or_test(s)
2011    return (ExprNodes.AsyncIteratorNode if is_async else ExprNodes.IteratorNode)(pos, sequence=expr)
2012
2013
2014def p_try_statement(s):
2015    # s.sy == 'try'
2016    pos = s.position()
2017    s.next()
2018    body = p_suite(s)
2019    except_clauses = []
2020    else_clause = None
2021    if s.sy in ('except', 'else'):
2022        while s.sy == 'except':
2023            except_clauses.append(p_except_clause(s))
2024        if s.sy == 'else':
2025            s.next()
2026            else_clause = p_suite(s)
2027        body = Nodes.TryExceptStatNode(pos,
2028            body = body, except_clauses = except_clauses,
2029            else_clause = else_clause)
2030        if s.sy != 'finally':
2031            return body
2032        # try-except-finally is equivalent to nested try-except/try-finally
2033    if s.sy == 'finally':
2034        s.next()
2035        finally_clause = p_suite(s)
2036        return Nodes.TryFinallyStatNode(pos,
2037            body = body, finally_clause = finally_clause)
2038    else:
2039        s.error("Expected 'except' or 'finally'")
2040
2041def p_except_clause(s):
2042    # s.sy == 'except'
2043    pos = s.position()
2044    s.next()
2045    exc_type = None
2046    exc_value = None
2047    is_except_as = False
2048    if s.sy != ':':
2049        exc_type = p_test(s)
2050        # normalise into list of single exception tests
2051        if isinstance(exc_type, ExprNodes.TupleNode):
2052            exc_type = exc_type.args
2053        else:
2054            exc_type = [exc_type]
2055        if s.sy == ',' or (s.sy == 'IDENT' and s.systring == 'as'
2056                           and s.context.language_level == 2):
2057            s.next()
2058            exc_value = p_test(s)
2059        elif s.sy == 'IDENT' and s.systring == 'as':
2060            # Py3 syntax requires a name here
2061            s.next()
2062            pos2 = s.position()
2063            name = p_ident(s)
2064            exc_value = ExprNodes.NameNode(pos2, name = name)
2065            is_except_as = True
2066    body = p_suite(s)
2067    return Nodes.ExceptClauseNode(pos,
2068        pattern = exc_type, target = exc_value,
2069        body = body, is_except_as=is_except_as)
2070
2071def p_include_statement(s, ctx):
2072    pos = s.position()
2073    s.next()  # 'include'
2074    unicode_include_file_name = p_string_literal(s, 'u')[2]
2075    s.expect_newline("Syntax error in include statement")
2076    if s.compile_time_eval:
2077        include_file_name = unicode_include_file_name
2078        include_file_path = s.context.find_include_file(include_file_name, pos)
2079        if include_file_path:
2080            s.included_files.append(include_file_name)
2081            with Utils.open_source_file(include_file_path) as f:
2082                source_desc = FileSourceDescriptor(include_file_path)
2083                s2 = PyrexScanner(f, source_desc, s, source_encoding=f.encoding, parse_comments=s.parse_comments)
2084                tree = p_statement_list(s2, ctx)
2085            return tree
2086        else:
2087            return None
2088    else:
2089        return Nodes.PassStatNode(pos)
2090
2091
2092def p_with_statement(s):
2093    s.next()  # 'with'
2094    if s.systring == 'template' and not s.in_python_file:
2095        node = p_with_template(s)
2096    else:
2097        node = p_with_items(s)
2098    return node
2099
2100
2101def p_with_items(s, is_async=False):
2102    pos = s.position()
2103    if not s.in_python_file and s.sy == 'IDENT' and s.systring in ('nogil', 'gil'):
2104        if is_async:
2105            s.error("with gil/nogil cannot be async")
2106        state = s.systring
2107        s.next()
2108
2109        # support conditional gil/nogil
2110        condition = None
2111        if s.sy == '(':
2112            s.next()
2113            condition = p_test(s)
2114            s.expect(')')
2115
2116        if s.sy == ',':
2117            s.next()
2118            body = p_with_items(s)
2119        else:
2120            body = p_suite(s)
2121        return Nodes.GILStatNode(pos, state=state, body=body, condition=condition)
2122    else:
2123        manager = p_test(s)
2124        target = None
2125        if s.sy == 'IDENT' and s.systring == 'as':
2126            s.next()
2127            target = p_starred_expr(s)
2128        if s.sy == ',':
2129            s.next()
2130            body = p_with_items(s, is_async=is_async)
2131        else:
2132            body = p_suite(s)
2133    return Nodes.WithStatNode(pos, manager=manager, target=target, body=body, is_async=is_async)
2134
2135
2136def p_with_template(s):
2137    pos = s.position()
2138    templates = []
2139    s.next()
2140    s.expect('[')
2141    templates.append(s.systring)
2142    s.next()
2143    while s.systring == ',':
2144        s.next()
2145        templates.append(s.systring)
2146        s.next()
2147    s.expect(']')
2148    if s.sy == ':':
2149        s.next()
2150        s.expect_newline("Syntax error in template function declaration")
2151        s.expect_indent()
2152        body_ctx = Ctx()
2153        body_ctx.templates = templates
2154        func_or_var = p_c_func_or_var_declaration(s, pos, body_ctx)
2155        s.expect_dedent()
2156        return func_or_var
2157    else:
2158        error(pos, "Syntax error in template function declaration")
2159
2160def p_simple_statement(s, first_statement = 0):
2161    #print "p_simple_statement:", s.sy, s.systring ###
2162    if s.sy == 'global':
2163        node = p_global_statement(s)
2164    elif s.sy == 'nonlocal':
2165        node = p_nonlocal_statement(s)
2166    elif s.sy == 'print':
2167        node = p_print_statement(s)
2168    elif s.sy == 'exec':
2169        node = p_exec_statement(s)
2170    elif s.sy == 'del':
2171        node = p_del_statement(s)
2172    elif s.sy == 'break':
2173        node = p_break_statement(s)
2174    elif s.sy == 'continue':
2175        node = p_continue_statement(s)
2176    elif s.sy == 'return':
2177        node = p_return_statement(s)
2178    elif s.sy == 'raise':
2179        node = p_raise_statement(s)
2180    elif s.sy in ('import', 'cimport'):
2181        node = p_import_statement(s)
2182    elif s.sy == 'from':
2183        node = p_from_import_statement(s, first_statement = first_statement)
2184    elif s.sy == 'yield':
2185        node = p_yield_statement(s)
2186    elif s.sy == 'assert':
2187        node = p_assert_statement(s)
2188    elif s.sy == 'pass':
2189        node = p_pass_statement(s)
2190    else:
2191        node = p_expression_or_assignment(s)
2192    return node
2193
2194def p_simple_statement_list(s, ctx, first_statement = 0):
2195    # Parse a series of simple statements on one line
2196    # separated by semicolons.
2197    stat = p_simple_statement(s, first_statement = first_statement)
2198    pos = stat.pos
2199    stats = []
2200    if not isinstance(stat, Nodes.PassStatNode):
2201        stats.append(stat)
2202    while s.sy == ';':
2203        #print "p_simple_statement_list: maybe more to follow" ###
2204        s.next()
2205        if s.sy in ('NEWLINE', 'EOF'):
2206            break
2207        stat = p_simple_statement(s, first_statement = first_statement)
2208        if isinstance(stat, Nodes.PassStatNode):
2209            continue
2210        stats.append(stat)
2211        first_statement = False
2212
2213    if not stats:
2214        stat = Nodes.PassStatNode(pos)
2215    elif len(stats) == 1:
2216        stat = stats[0]
2217    else:
2218        stat = Nodes.StatListNode(pos, stats = stats)
2219
2220    if s.sy not in ('NEWLINE', 'EOF'):
2221        # provide a better error message for users who accidentally write Cython code in .py files
2222        if isinstance(stat, Nodes.ExprStatNode):
2223            if stat.expr.is_name and stat.expr.name == 'cdef':
2224                s.error("The 'cdef' keyword is only allowed in Cython files (pyx/pxi/pxd)", pos)
2225    s.expect_newline("Syntax error in simple statement list")
2226
2227    return stat
2228
2229def p_compile_time_expr(s):
2230    old = s.compile_time_expr
2231    s.compile_time_expr = 1
2232    expr = p_testlist(s)
2233    s.compile_time_expr = old
2234    return expr
2235
2236def p_DEF_statement(s):
2237    pos = s.position()
2238    denv = s.compile_time_env
2239    s.next()  # 'DEF'
2240    name = p_ident(s)
2241    s.expect('=')
2242    expr = p_compile_time_expr(s)
2243    if s.compile_time_eval:
2244        value = expr.compile_time_value(denv)
2245        #print "p_DEF_statement: %s = %r" % (name, value) ###
2246        denv.declare(name, value)
2247    s.expect_newline("Expected a newline", ignore_semicolon=True)
2248    return Nodes.PassStatNode(pos)
2249
2250def p_IF_statement(s, ctx):
2251    pos = s.position()
2252    saved_eval = s.compile_time_eval
2253    current_eval = saved_eval
2254    denv = s.compile_time_env
2255    result = None
2256    while 1:
2257        s.next()  # 'IF' or 'ELIF'
2258        expr = p_compile_time_expr(s)
2259        s.compile_time_eval = current_eval and bool(expr.compile_time_value(denv))
2260        body = p_suite(s, ctx)
2261        if s.compile_time_eval:
2262            result = body
2263            current_eval = 0
2264        if s.sy != 'ELIF':
2265            break
2266    if s.sy == 'ELSE':
2267        s.next()
2268        s.compile_time_eval = current_eval
2269        body = p_suite(s, ctx)
2270        if current_eval:
2271            result = body
2272    if not result:
2273        result = Nodes.PassStatNode(pos)
2274    s.compile_time_eval = saved_eval
2275    return result
2276
2277def p_statement(s, ctx, first_statement = 0):
2278    cdef_flag = ctx.cdef_flag
2279    decorators = None
2280    if s.sy == 'ctypedef':
2281        if ctx.level not in ('module', 'module_pxd'):
2282            s.error("ctypedef statement not allowed here")
2283        #if ctx.api:
2284        #    error(s.position(), "'api' not allowed with 'ctypedef'")
2285        return p_ctypedef_statement(s, ctx)
2286    elif s.sy == 'DEF':
2287        return p_DEF_statement(s)
2288    elif s.sy == 'IF':
2289        return p_IF_statement(s, ctx)
2290    elif s.sy == '@':
2291        if ctx.level not in ('module', 'class', 'c_class', 'function', 'property', 'module_pxd', 'c_class_pxd', 'other'):
2292            s.error('decorator not allowed here')
2293        s.level = ctx.level
2294        decorators = p_decorators(s)
2295        if not ctx.allow_struct_enum_decorator and s.sy not in ('def', 'cdef', 'cpdef', 'class', 'async'):
2296            if s.sy == 'IDENT' and s.systring == 'async':
2297                pass  # handled below
2298            else:
2299                s.error("Decorators can only be followed by functions or classes")
2300    elif s.sy == 'pass' and cdef_flag:
2301        # empty cdef block
2302        return p_pass_statement(s, with_newline=1)
2303
2304    overridable = 0
2305    if s.sy == 'cdef':
2306        cdef_flag = 1
2307        s.next()
2308    elif s.sy == 'cpdef':
2309        cdef_flag = 1
2310        overridable = 1
2311        s.next()
2312    if cdef_flag:
2313        if ctx.level not in ('module', 'module_pxd', 'function', 'c_class', 'c_class_pxd'):
2314            s.error('cdef statement not allowed here')
2315        s.level = ctx.level
2316        node = p_cdef_statement(s, ctx(overridable=overridable))
2317        if decorators is not None:
2318            tup = (Nodes.CFuncDefNode, Nodes.CVarDefNode, Nodes.CClassDefNode)
2319            if ctx.allow_struct_enum_decorator:
2320                tup += (Nodes.CStructOrUnionDefNode, Nodes.CEnumDefNode)
2321            if not isinstance(node, tup):
2322                s.error("Decorators can only be followed by functions or classes")
2323            node.decorators = decorators
2324        return node
2325    else:
2326        if ctx.api:
2327            s.error("'api' not allowed with this statement", fatal=False)
2328        elif s.sy == 'def':
2329            # def statements aren't allowed in pxd files, except
2330            # as part of a cdef class
2331            if ('pxd' in ctx.level) and (ctx.level != 'c_class_pxd'):
2332                s.error('def statement not allowed here')
2333            s.level = ctx.level
2334            return p_def_statement(s, decorators)
2335        elif s.sy == 'class':
2336            if ctx.level not in ('module', 'function', 'class', 'other'):
2337                s.error("class definition not allowed here")
2338            return p_class_statement(s, decorators)
2339        elif s.sy == 'include':
2340            if ctx.level not in ('module', 'module_pxd'):
2341                s.error("include statement not allowed here")
2342            return p_include_statement(s, ctx)
2343        elif ctx.level == 'c_class' and s.sy == 'IDENT' and s.systring == 'property':
2344            return p_property_decl(s)
2345        elif s.sy == 'pass' and ctx.level != 'property':
2346            return p_pass_statement(s, with_newline=True)
2347        else:
2348            if ctx.level in ('c_class_pxd', 'property'):
2349                node = p_ignorable_statement(s)
2350                if node is not None:
2351                    return node
2352                s.error("Executable statement not allowed here")
2353            if s.sy == 'if':
2354                return p_if_statement(s)
2355            elif s.sy == 'while':
2356                return p_while_statement(s)
2357            elif s.sy == 'for':
2358                return p_for_statement(s)
2359            elif s.sy == 'try':
2360                return p_try_statement(s)
2361            elif s.sy == 'with':
2362                return p_with_statement(s)
2363            elif s.sy == 'async':
2364                s.next()
2365                return p_async_statement(s, ctx, decorators)
2366            else:
2367                if s.sy == 'IDENT' and s.systring == 'async':
2368                    ident_name = s.systring
2369                    # PEP 492 enables the async/await keywords when it spots "async def ..."
2370                    s.next()
2371                    if s.sy == 'def':
2372                        return p_async_statement(s, ctx, decorators)
2373                    elif decorators:
2374                        s.error("Decorators can only be followed by functions or classes")
2375                    s.put_back(u'IDENT', ident_name)  # re-insert original token
2376                return p_simple_statement_list(s, ctx, first_statement=first_statement)
2377
2378
2379def p_statement_list(s, ctx, first_statement = 0):
2380    # Parse a series of statements separated by newlines.
2381    pos = s.position()
2382    stats = []
2383    while s.sy not in ('DEDENT', 'EOF'):
2384        stat = p_statement(s, ctx, first_statement = first_statement)
2385        if isinstance(stat, Nodes.PassStatNode):
2386            continue
2387        stats.append(stat)
2388        first_statement = False
2389    if not stats:
2390        return Nodes.PassStatNode(pos)
2391    elif len(stats) == 1:
2392        return stats[0]
2393    else:
2394        return Nodes.StatListNode(pos, stats = stats)
2395
2396
2397def p_suite(s, ctx=Ctx()):
2398    return p_suite_with_docstring(s, ctx, with_doc_only=False)[1]
2399
2400
2401def p_suite_with_docstring(s, ctx, with_doc_only=False):
2402    s.expect(':')
2403    doc = None
2404    if s.sy == 'NEWLINE':
2405        s.next()
2406        s.expect_indent()
2407        if with_doc_only:
2408            doc = p_doc_string(s)
2409        body = p_statement_list(s, ctx)
2410        s.expect_dedent()
2411    else:
2412        if ctx.api:
2413            s.error("'api' not allowed with this statement", fatal=False)
2414        if ctx.level in ('module', 'class', 'function', 'other'):
2415            body = p_simple_statement_list(s, ctx)
2416        else:
2417            body = p_pass_statement(s)
2418            s.expect_newline("Syntax error in declarations", ignore_semicolon=True)
2419    if not with_doc_only:
2420        doc, body = _extract_docstring(body)
2421    return doc, body
2422
2423
2424def p_positional_and_keyword_args(s, end_sy_set, templates = None):
2425    """
2426    Parses positional and keyword arguments. end_sy_set
2427    should contain any s.sy that terminate the argument list.
2428    Argument expansion (* and **) are not allowed.
2429
2430    Returns: (positional_args, keyword_args)
2431    """
2432    positional_args = []
2433    keyword_args = []
2434    pos_idx = 0
2435
2436    while s.sy not in end_sy_set:
2437        if s.sy == '*' or s.sy == '**':
2438            s.error('Argument expansion not allowed here.', fatal=False)
2439
2440        parsed_type = False
2441        if s.sy == 'IDENT' and s.peek()[0] == '=':
2442            ident = s.systring
2443            s.next()  # s.sy is '='
2444            s.next()
2445            if looking_at_expr(s):
2446                arg = p_test(s)
2447            else:
2448                base_type = p_c_base_type(s, templates = templates)
2449                declarator = p_c_declarator(s, empty = 1)
2450                arg = Nodes.CComplexBaseTypeNode(base_type.pos,
2451                    base_type = base_type, declarator = declarator)
2452                parsed_type = True
2453            keyword_node = ExprNodes.IdentifierStringNode(arg.pos, value=ident)
2454            keyword_args.append((keyword_node, arg))
2455            was_keyword = True
2456
2457        else:
2458            if looking_at_expr(s):
2459                arg = p_test(s)
2460            else:
2461                base_type = p_c_base_type(s, templates = templates)
2462                declarator = p_c_declarator(s, empty = 1)
2463                arg = Nodes.CComplexBaseTypeNode(base_type.pos,
2464                    base_type = base_type, declarator = declarator)
2465                parsed_type = True
2466            positional_args.append(arg)
2467            pos_idx += 1
2468            if len(keyword_args) > 0:
2469                s.error("Non-keyword arg following keyword arg",
2470                        pos=arg.pos)
2471
2472        if s.sy != ',':
2473            if s.sy not in end_sy_set:
2474                if parsed_type:
2475                    s.error("Unmatched %s" % " or ".join(end_sy_set))
2476            break
2477        s.next()
2478    return positional_args, keyword_args
2479
2480def p_c_base_type(s, nonempty=False, templates=None):
2481    if s.sy == '(':
2482        return p_c_complex_base_type(s, templates = templates)
2483    else:
2484        return p_c_simple_base_type(s, nonempty=nonempty, templates=templates)
2485
2486def p_calling_convention(s):
2487    if s.sy == 'IDENT' and s.systring in calling_convention_words:
2488        result = s.systring
2489        s.next()
2490        return result
2491    else:
2492        return ""
2493
2494
2495calling_convention_words = cython.declare(frozenset, frozenset((
2496    "__stdcall", "__cdecl", "__fastcall")))
2497
2498
2499def p_c_complex_base_type(s, templates = None):
2500    # s.sy == '('
2501    pos = s.position()
2502    s.next()
2503    base_type = p_c_base_type(s, templates=templates)
2504    declarator = p_c_declarator(s, empty=True)
2505    type_node = Nodes.CComplexBaseTypeNode(
2506        pos, base_type=base_type, declarator=declarator)
2507    if s.sy == ',':
2508        components = [type_node]
2509        while s.sy == ',':
2510            s.next()
2511            if s.sy == ')':
2512                break
2513            base_type = p_c_base_type(s, templates=templates)
2514            declarator = p_c_declarator(s, empty=True)
2515            components.append(Nodes.CComplexBaseTypeNode(
2516                pos, base_type=base_type, declarator=declarator))
2517        type_node = Nodes.CTupleBaseTypeNode(pos, components = components)
2518
2519    s.expect(')')
2520    if s.sy == '[':
2521        if is_memoryviewslice_access(s):
2522            type_node = p_memoryviewslice_access(s, type_node)
2523        else:
2524            type_node = p_buffer_or_template(s, type_node, templates)
2525    return type_node
2526
2527
2528def p_c_simple_base_type(s, nonempty, templates=None):
2529    is_basic = 0
2530    signed = 1
2531    longness = 0
2532    complex = 0
2533    module_path = []
2534    pos = s.position()
2535
2536    # Handle const/volatile
2537    is_const = is_volatile = 0
2538    while s.sy == 'IDENT':
2539        if s.systring == 'const':
2540            if is_const: error(pos, "Duplicate 'const'")
2541            is_const = 1
2542        elif s.systring == 'volatile':
2543            if is_volatile: error(pos, "Duplicate 'volatile'")
2544            is_volatile = 1
2545        else:
2546            break
2547        s.next()
2548    if is_const or is_volatile:
2549        base_type = p_c_base_type(s, nonempty=nonempty, templates=templates)
2550        if isinstance(base_type, Nodes.MemoryViewSliceTypeNode):
2551            # reverse order to avoid having to write "(const int)[:]"
2552            base_type.base_type_node = Nodes.CConstOrVolatileTypeNode(pos,
2553                base_type=base_type.base_type_node, is_const=is_const, is_volatile=is_volatile)
2554            return base_type
2555        return Nodes.CConstOrVolatileTypeNode(pos,
2556            base_type=base_type, is_const=is_const, is_volatile=is_volatile)
2557
2558    if s.sy != 'IDENT':
2559        error(pos, "Expected an identifier, found '%s'" % s.sy)
2560    if looking_at_base_type(s):
2561        #print "p_c_simple_base_type: looking_at_base_type at", s.position()
2562        is_basic = 1
2563        if s.sy == 'IDENT' and s.systring in special_basic_c_types:
2564            signed, longness = special_basic_c_types[s.systring]
2565            name = s.systring
2566            s.next()
2567        else:
2568            signed, longness = p_sign_and_longness(s)
2569            if s.sy == 'IDENT' and s.systring in basic_c_type_names:
2570                name = s.systring
2571                s.next()
2572            else:
2573                name = 'int'  # long [int], short [int], long [int] complex, etc.
2574        if s.sy == 'IDENT' and s.systring == 'complex':
2575            complex = 1
2576            s.next()
2577    elif looking_at_dotted_name(s):
2578        #print "p_c_simple_base_type: looking_at_type_name at", s.position()
2579        name = s.systring
2580        s.next()
2581        while s.sy == '.':
2582            module_path.append(name)
2583            s.next()
2584            name = p_ident(s)
2585    else:
2586        name = s.systring
2587        s.next()
2588        if nonempty and s.sy != 'IDENT':
2589            # Make sure this is not a declaration of a variable or function.
2590            if s.sy == '(':
2591                s.next()
2592                if (s.sy == '*' or s.sy == '**' or s.sy == '&'
2593                        or (s.sy == 'IDENT' and s.systring in calling_convention_words)):
2594                    s.put_back(u'(', u'(')
2595                else:
2596                    s.put_back(u'(', u'(')
2597                    s.put_back(u'IDENT', name)
2598                    name = None
2599            elif s.sy not in ('*', '**', '[', '&'):
2600                s.put_back(u'IDENT', name)
2601                name = None
2602
2603    type_node = Nodes.CSimpleBaseTypeNode(pos,
2604        name = name, module_path = module_path,
2605        is_basic_c_type = is_basic, signed = signed,
2606        complex = complex, longness = longness,
2607        templates = templates)
2608
2609    #    declarations here.
2610    if s.sy == '[':
2611        if is_memoryviewslice_access(s):
2612            type_node = p_memoryviewslice_access(s, type_node)
2613        else:
2614            type_node = p_buffer_or_template(s, type_node, templates)
2615
2616    if s.sy == '.':
2617        s.next()
2618        name = p_ident(s)
2619        type_node = Nodes.CNestedBaseTypeNode(pos, base_type = type_node, name = name)
2620
2621    return type_node
2622
2623def p_buffer_or_template(s, base_type_node, templates):
2624    # s.sy == '['
2625    pos = s.position()
2626    s.next()
2627    # Note that buffer_positional_options_count=1, so the only positional argument is dtype.
2628    # For templated types, all parameters are types.
2629    positional_args, keyword_args = (
2630        p_positional_and_keyword_args(s, (']',), templates)
2631    )
2632    s.expect(']')
2633
2634    if s.sy == '[':
2635        base_type_node = p_buffer_or_template(s, base_type_node, templates)
2636
2637    keyword_dict = ExprNodes.DictNode(pos,
2638        key_value_pairs = [
2639            ExprNodes.DictItemNode(pos=key.pos, key=key, value=value)
2640            for key, value in keyword_args
2641        ])
2642    result = Nodes.TemplatedTypeNode(pos,
2643        positional_args = positional_args,
2644        keyword_args = keyword_dict,
2645        base_type_node = base_type_node)
2646    return result
2647
2648def p_bracketed_base_type(s, base_type_node, nonempty, empty):
2649    # s.sy == '['
2650    if empty and not nonempty:
2651        # sizeof-like thing.  Only anonymous C arrays allowed (int[SIZE]).
2652        return base_type_node
2653    elif not empty and nonempty:
2654        # declaration of either memoryview slice or buffer.
2655        if is_memoryviewslice_access(s):
2656            return p_memoryviewslice_access(s, base_type_node)
2657        else:
2658            return p_buffer_or_template(s, base_type_node, None)
2659            # return p_buffer_access(s, base_type_node)
2660    elif not empty and not nonempty:
2661        # only anonymous C arrays and memoryview slice arrays here.  We
2662        # disallow buffer declarations for now, due to ambiguity with anonymous
2663        # C arrays.
2664        if is_memoryviewslice_access(s):
2665            return p_memoryviewslice_access(s, base_type_node)
2666        else:
2667            return base_type_node
2668
2669def is_memoryviewslice_access(s):
2670    # s.sy == '['
2671    # a memoryview slice declaration is distinguishable from a buffer access
2672    # declaration by the first entry in the bracketed list.  The buffer will
2673    # not have an unnested colon in the first entry; the memoryview slice will.
2674    saved = [(s.sy, s.systring)]
2675    s.next()
2676    retval = False
2677    if s.systring == ':':
2678        retval = True
2679    elif s.sy == 'INT':
2680        saved.append((s.sy, s.systring))
2681        s.next()
2682        if s.sy == ':':
2683            retval = True
2684
2685    for sv in saved[::-1]:
2686        s.put_back(*sv)
2687
2688    return retval
2689
2690def p_memoryviewslice_access(s, base_type_node):
2691    # s.sy == '['
2692    pos = s.position()
2693    s.next()
2694    subscripts, _ = p_subscript_list(s)
2695    # make sure each entry in subscripts is a slice
2696    for subscript in subscripts:
2697        if len(subscript) < 2:
2698            s.error("An axis specification in memoryview declaration does not have a ':'.")
2699    s.expect(']')
2700    indexes = make_slice_nodes(pos, subscripts)
2701    result = Nodes.MemoryViewSliceTypeNode(pos,
2702            base_type_node = base_type_node,
2703            axes = indexes)
2704    return result
2705
2706def looking_at_name(s):
2707    return s.sy == 'IDENT' and s.systring not in calling_convention_words
2708
2709def looking_at_expr(s):
2710    if s.systring in base_type_start_words:
2711        return False
2712    elif s.sy == 'IDENT':
2713        is_type = False
2714        name = s.systring
2715        dotted_path = []
2716        s.next()
2717
2718        while s.sy == '.':
2719            s.next()
2720            dotted_path.append(s.systring)
2721            s.expect('IDENT')
2722
2723        saved = s.sy, s.systring
2724        if s.sy == 'IDENT':
2725            is_type = True
2726        elif s.sy == '*' or s.sy == '**':
2727            s.next()
2728            is_type = s.sy in (')', ']')
2729            s.put_back(*saved)
2730        elif s.sy == '(':
2731            s.next()
2732            is_type = s.sy == '*'
2733            s.put_back(*saved)
2734        elif s.sy == '[':
2735            s.next()
2736            is_type = s.sy == ']' or not looking_at_expr(s)  # could be a nested template type
2737            s.put_back(*saved)
2738
2739        dotted_path.reverse()
2740        for p in dotted_path:
2741            s.put_back(u'IDENT', p)
2742            s.put_back(u'.', u'.')
2743
2744        s.put_back(u'IDENT', name)
2745        return not is_type and saved[0]
2746    else:
2747        return True
2748
2749def looking_at_base_type(s):
2750    #print "looking_at_base_type?", s.sy, s.systring, s.position()
2751    return s.sy == 'IDENT' and s.systring in base_type_start_words
2752
2753def looking_at_dotted_name(s):
2754    if s.sy == 'IDENT':
2755        name = s.systring
2756        s.next()
2757        result = s.sy == '.'
2758        s.put_back(u'IDENT', name)
2759        return result
2760    else:
2761        return 0
2762
2763def looking_at_call(s):
2764    "See if we're looking at a.b.c("
2765    # Don't mess up the original position, so save and restore it.
2766    # Unfortunately there's no good way to handle this, as a subsequent call
2767    # to next() will not advance the position until it reads a new token.
2768    position = s.start_line, s.start_col
2769    result = looking_at_expr(s) == u'('
2770    if not result:
2771        s.start_line, s.start_col = position
2772    return result
2773
2774basic_c_type_names = cython.declare(frozenset, frozenset((
2775    "void", "char", "int", "float", "double", "bint")))
2776
2777special_basic_c_types = cython.declare(dict, {
2778    # name : (signed, longness)
2779    "Py_UNICODE" : (0, 0),
2780    "Py_UCS4"    : (0, 0),
2781    "Py_hash_t"  : (2, 0),
2782    "Py_ssize_t" : (2, 0),
2783    "ssize_t"    : (2, 0),
2784    "size_t"     : (0, 0),
2785    "ptrdiff_t"  : (2, 0),
2786    "Py_tss_t"   : (1, 0),
2787})
2788
2789sign_and_longness_words = cython.declare(frozenset, frozenset((
2790    "short", "long", "signed", "unsigned")))
2791
2792base_type_start_words = cython.declare(
2793    frozenset,
2794    basic_c_type_names
2795    | sign_and_longness_words
2796    | frozenset(special_basic_c_types))
2797
2798struct_enum_union = cython.declare(frozenset, frozenset((
2799    "struct", "union", "enum", "packed")))
2800
2801def p_sign_and_longness(s):
2802    signed = 1
2803    longness = 0
2804    while s.sy == 'IDENT' and s.systring in sign_and_longness_words:
2805        if s.systring == 'unsigned':
2806            signed = 0
2807        elif s.systring == 'signed':
2808            signed = 2
2809        elif s.systring == 'short':
2810            longness = -1
2811        elif s.systring == 'long':
2812            longness += 1
2813        s.next()
2814    return signed, longness
2815
2816def p_opt_cname(s):
2817    literal = p_opt_string_literal(s, 'u')
2818    if literal is not None:
2819        cname = EncodedString(literal)
2820        cname.encoding = s.source_encoding
2821    else:
2822        cname = None
2823    return cname
2824
2825def p_c_declarator(s, ctx = Ctx(), empty = 0, is_type = 0, cmethod_flag = 0,
2826                   assignable = 0, nonempty = 0,
2827                   calling_convention_allowed = 0):
2828    # If empty is true, the declarator must be empty. If nonempty is true,
2829    # the declarator must be nonempty. Otherwise we don't care.
2830    # If cmethod_flag is true, then if this declarator declares
2831    # a function, it's a C method of an extension type.
2832    pos = s.position()
2833    if s.sy == '(':
2834        s.next()
2835        if s.sy == ')' or looking_at_name(s):
2836            base = Nodes.CNameDeclaratorNode(pos, name=s.context.intern_ustring(u""), cname=None)
2837            result = p_c_func_declarator(s, pos, ctx, base, cmethod_flag)
2838        else:
2839            result = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
2840                                    cmethod_flag = cmethod_flag,
2841                                    nonempty = nonempty,
2842                                    calling_convention_allowed = 1)
2843            s.expect(')')
2844    else:
2845        result = p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
2846                                       assignable, nonempty)
2847    if not calling_convention_allowed and result.calling_convention and s.sy != '(':
2848        error(s.position(), "%s on something that is not a function"
2849            % result.calling_convention)
2850    while s.sy in ('[', '('):
2851        pos = s.position()
2852        if s.sy == '[':
2853            result = p_c_array_declarator(s, result)
2854        else:  # sy == '('
2855            s.next()
2856            result = p_c_func_declarator(s, pos, ctx, result, cmethod_flag)
2857        cmethod_flag = 0
2858    return result
2859
2860def p_c_array_declarator(s, base):
2861    pos = s.position()
2862    s.next()  # '['
2863    if s.sy != ']':
2864        dim = p_testlist(s)
2865    else:
2866        dim = None
2867    s.expect(']')
2868    return Nodes.CArrayDeclaratorNode(pos, base = base, dimension = dim)
2869
2870def p_c_func_declarator(s, pos, ctx, base, cmethod_flag):
2871    # Opening paren has already been skipped
2872    args = p_c_arg_list(s, ctx, cmethod_flag = cmethod_flag,
2873                        nonempty_declarators = 0)
2874    ellipsis = p_optional_ellipsis(s)
2875    s.expect(')')
2876    nogil = p_nogil(s)
2877    exc_val, exc_check = p_exception_value_clause(s)
2878    with_gil = p_with_gil(s)
2879    return Nodes.CFuncDeclaratorNode(pos,
2880        base = base, args = args, has_varargs = ellipsis,
2881        exception_value = exc_val, exception_check = exc_check,
2882        nogil = nogil or ctx.nogil or with_gil, with_gil = with_gil)
2883
2884supported_overloaded_operators = cython.declare(frozenset, frozenset((
2885    '+', '-', '*', '/', '%',
2886    '++', '--', '~', '|', '&', '^', '<<', '>>', ',',
2887    '==', '!=', '>=', '>', '<=', '<',
2888    '[]', '()', '!', '=',
2889    'bool',
2890)))
2891
2892def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
2893                          assignable, nonempty):
2894    pos = s.position()
2895    calling_convention = p_calling_convention(s)
2896    if s.sy in ('*', '**'):
2897        # scanner returns '**' as a single token
2898        is_ptrptr = s.sy == '**'
2899        s.next()
2900
2901        const_pos = s.position()
2902        is_const = s.systring == 'const' and s.sy == 'IDENT'
2903        if is_const:
2904            s.next()
2905
2906        base = p_c_declarator(s, ctx, empty=empty, is_type=is_type,
2907                              cmethod_flag=cmethod_flag,
2908                              assignable=assignable, nonempty=nonempty)
2909        if is_const:
2910            base = Nodes.CConstDeclaratorNode(const_pos, base=base)
2911        if is_ptrptr:
2912            base = Nodes.CPtrDeclaratorNode(pos, base=base)
2913        result = Nodes.CPtrDeclaratorNode(pos, base=base)
2914    elif s.sy == '&' or (s.sy == '&&' and s.context.cpp):
2915        node_class = Nodes.CppRvalueReferenceDeclaratorNode if s.sy == '&&' else Nodes.CReferenceDeclaratorNode
2916        s.next()
2917        base = p_c_declarator(s, ctx, empty=empty, is_type=is_type,
2918                              cmethod_flag=cmethod_flag,
2919                              assignable=assignable, nonempty=nonempty)
2920        result = node_class(pos, base=base)
2921    else:
2922        rhs = None
2923        if s.sy == 'IDENT':
2924            name = s.systring
2925            if empty:
2926                error(s.position(), "Declarator should be empty")
2927            s.next()
2928            cname = p_opt_cname(s)
2929            if name != 'operator' and s.sy == '=' and assignable:
2930                s.next()
2931                rhs = p_test(s)
2932        else:
2933            if nonempty:
2934                error(s.position(), "Empty declarator")
2935            name = ""
2936            cname = None
2937        if cname is None and ctx.namespace is not None and nonempty:
2938            cname = ctx.namespace + "::" + name
2939        if name == 'operator' and ctx.visibility == 'extern' and nonempty:
2940            op = s.sy
2941            if [1 for c in op if c in '+-*/<=>!%&|([^~,']:
2942                s.next()
2943                # Handle diphthong operators.
2944                if op == '(':
2945                    s.expect(')')
2946                    op = '()'
2947                elif op == '[':
2948                    s.expect(']')
2949                    op = '[]'
2950                elif op in ('-', '+', '|', '&') and s.sy == op:
2951                    op *= 2       # ++, --, ...
2952                    s.next()
2953                elif s.sy == '=':
2954                    op += s.sy    # +=, -=, ...
2955                    s.next()
2956                if op not in supported_overloaded_operators:
2957                    s.error("Overloading operator '%s' not yet supported." % op,
2958                            fatal=False)
2959                name += op
2960            elif op == 'IDENT':
2961                op = s.systring
2962                if op not in supported_overloaded_operators:
2963                    s.error("Overloading operator '%s' not yet supported." % op,
2964                            fatal=False)
2965                name = name + ' ' + op
2966                s.next()
2967        result = Nodes.CNameDeclaratorNode(pos,
2968            name = name, cname = cname, default = rhs)
2969    result.calling_convention = calling_convention
2970    return result
2971
2972def p_nogil(s):
2973    if s.sy == 'IDENT' and s.systring == 'nogil':
2974        s.next()
2975        return 1
2976    else:
2977        return 0
2978
2979def p_with_gil(s):
2980    if s.sy == 'with':
2981        s.next()
2982        s.expect_keyword('gil')
2983        return 1
2984    else:
2985        return 0
2986
2987def p_exception_value_clause(s):
2988    exc_val = None
2989    exc_check = 0
2990    if s.sy == 'except':
2991        s.next()
2992        if s.sy == '*':
2993            exc_check = 1
2994            s.next()
2995        elif s.sy == '+':
2996            exc_check = '+'
2997            s.next()
2998            if s.sy == 'IDENT':
2999                name = s.systring
3000                s.next()
3001                exc_val = p_name(s, name)
3002            elif s.sy == '*':
3003                exc_val = ExprNodes.CharNode(s.position(), value=u'*')
3004                s.next()
3005        else:
3006            if s.sy == '?':
3007                exc_check = 1
3008                s.next()
3009            exc_val = p_test(s)
3010    return exc_val, exc_check
3011
3012c_arg_list_terminators = cython.declare(frozenset, frozenset((
3013    '*', '**', '.', ')', ':', '/')))
3014
3015def p_c_arg_list(s, ctx = Ctx(), in_pyfunc = 0, cmethod_flag = 0,
3016                 nonempty_declarators = 0, kw_only = 0, annotated = 1):
3017    #  Comma-separated list of C argument declarations, possibly empty.
3018    #  May have a trailing comma.
3019    args = []
3020    is_self_arg = cmethod_flag
3021    while s.sy not in c_arg_list_terminators:
3022        args.append(p_c_arg_decl(s, ctx, in_pyfunc, is_self_arg,
3023            nonempty = nonempty_declarators, kw_only = kw_only,
3024            annotated = annotated))
3025        if s.sy != ',':
3026            break
3027        s.next()
3028        is_self_arg = 0
3029    return args
3030
3031def p_optional_ellipsis(s):
3032    if s.sy == '.':
3033        expect_ellipsis(s)
3034        return 1
3035    else:
3036        return 0
3037
3038def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0,
3039                 kw_only = 0, annotated = 1):
3040    pos = s.position()
3041    not_none = or_none = 0
3042    default = None
3043    annotation = None
3044    if s.in_python_file:
3045        # empty type declaration
3046        base_type = Nodes.CSimpleBaseTypeNode(pos,
3047            name = None, module_path = [],
3048            is_basic_c_type = 0, signed = 0,
3049            complex = 0, longness = 0,
3050            is_self_arg = cmethod_flag, templates = None)
3051    else:
3052        base_type = p_c_base_type(s, nonempty=nonempty)
3053    declarator = p_c_declarator(s, ctx, nonempty = nonempty)
3054    if s.sy in ('not', 'or') and not s.in_python_file:
3055        kind = s.sy
3056        s.next()
3057        if s.sy == 'IDENT' and s.systring == 'None':
3058            s.next()
3059        else:
3060            s.error("Expected 'None'")
3061        if not in_pyfunc:
3062            error(pos, "'%s None' only allowed in Python functions" % kind)
3063        or_none = kind == 'or'
3064        not_none = kind == 'not'
3065    if annotated and s.sy == ':':
3066        s.next()
3067        annotation = p_annotation(s)
3068    if s.sy == '=':
3069        s.next()
3070        if 'pxd' in ctx.level:
3071            if s.sy in ['*', '?']:
3072                # TODO(github/1736): Make this an error for inline declarations.
3073                default = ExprNodes.NoneNode(pos)
3074                s.next()
3075            elif 'inline' in ctx.modifiers:
3076                default = p_test(s)
3077            else:
3078                error(pos, "default values cannot be specified in pxd files, use ? or *")
3079        else:
3080            default = p_test(s)
3081    return Nodes.CArgDeclNode(pos,
3082        base_type = base_type,
3083        declarator = declarator,
3084        not_none = not_none,
3085        or_none = or_none,
3086        default = default,
3087        annotation = annotation,
3088        kw_only = kw_only)
3089
3090def p_api(s):
3091    if s.sy == 'IDENT' and s.systring == 'api':
3092        s.next()
3093        return 1
3094    else:
3095        return 0
3096
3097def p_cdef_statement(s, ctx):
3098    pos = s.position()
3099    ctx.visibility = p_visibility(s, ctx.visibility)
3100    ctx.api = ctx.api or p_api(s)
3101    if ctx.api:
3102        if ctx.visibility not in ('private', 'public'):
3103            error(pos, "Cannot combine 'api' with '%s'" % ctx.visibility)
3104    if (ctx.visibility == 'extern') and s.sy == 'from':
3105        return p_cdef_extern_block(s, pos, ctx)
3106    elif s.sy == 'import':
3107        s.next()
3108        return p_cdef_extern_block(s, pos, ctx)
3109    elif p_nogil(s):
3110        ctx.nogil = 1
3111        if ctx.overridable:
3112            error(pos, "cdef blocks cannot be declared cpdef")
3113        return p_cdef_block(s, ctx)
3114    elif s.sy == ':':
3115        if ctx.overridable:
3116            error(pos, "cdef blocks cannot be declared cpdef")
3117        return p_cdef_block(s, ctx)
3118    elif s.sy == 'class':
3119        if ctx.level not in ('module', 'module_pxd'):
3120            error(pos, "Extension type definition not allowed here")
3121        if ctx.overridable:
3122            error(pos, "Extension types cannot be declared cpdef")
3123        return p_c_class_definition(s, pos, ctx)
3124    elif s.sy == 'IDENT' and s.systring == 'cppclass':
3125        return p_cpp_class_definition(s, pos, ctx)
3126    elif s.sy == 'IDENT' and s.systring in struct_enum_union:
3127        if ctx.level not in ('module', 'module_pxd'):
3128            error(pos, "C struct/union/enum definition not allowed here")
3129        if ctx.overridable:
3130            if s.systring != 'enum':
3131                error(pos, "C struct/union cannot be declared cpdef")
3132        return p_struct_enum(s, pos, ctx)
3133    elif s.sy == 'IDENT' and s.systring == 'fused':
3134        return p_fused_definition(s, pos, ctx)
3135    else:
3136        return p_c_func_or_var_declaration(s, pos, ctx)
3137
3138def p_cdef_block(s, ctx):
3139    return p_suite(s, ctx(cdef_flag = 1))
3140
3141def p_cdef_extern_block(s, pos, ctx):
3142    if ctx.overridable:
3143        error(pos, "cdef extern blocks cannot be declared cpdef")
3144    include_file = None
3145    s.expect('from')
3146    if s.sy == '*':
3147        s.next()
3148    else:
3149        include_file = p_string_literal(s, 'u')[2]
3150    ctx = ctx(cdef_flag = 1, visibility = 'extern')
3151    if s.systring == "namespace":
3152        s.next()
3153        ctx.namespace = p_string_literal(s, 'u')[2]
3154    if p_nogil(s):
3155        ctx.nogil = 1
3156
3157    # Use "docstring" as verbatim string to include
3158    verbatim_include, body = p_suite_with_docstring(s, ctx, True)
3159
3160    return Nodes.CDefExternNode(pos,
3161        include_file = include_file,
3162        verbatim_include = verbatim_include,
3163        body = body,
3164        namespace = ctx.namespace)
3165
3166def p_c_enum_definition(s, pos, ctx):
3167    # s.sy == ident 'enum'
3168    s.next()
3169
3170    scoped = False
3171    if s.context.cpp and (s.sy == 'class' or (s.sy == 'IDENT' and s.systring == 'struct')):
3172        scoped = True
3173        s.next()
3174
3175    if s.sy == 'IDENT':
3176        name = s.systring
3177        s.next()
3178        cname = p_opt_cname(s)
3179        if cname is None and ctx.namespace is not None:
3180            cname = ctx.namespace + "::" + name
3181    else:
3182        name = cname = None
3183        if scoped:
3184            s.error("Unnamed scoped enum not allowed")
3185
3186    if scoped and s.sy == '(':
3187        s.next()
3188        underlying_type = p_c_base_type(s)
3189        s.expect(')')
3190    else:
3191        underlying_type = Nodes.CSimpleBaseTypeNode(
3192            pos,
3193            name="int",
3194            module_path = [],
3195            is_basic_c_type = True,
3196            signed = 1,
3197            complex = 0,
3198            longness = 0
3199        )
3200
3201    s.expect(':')
3202    items = []
3203
3204    doc = None
3205    if s.sy != 'NEWLINE':
3206        p_c_enum_line(s, ctx, items)
3207    else:
3208        s.next()  # 'NEWLINE'
3209        s.expect_indent()
3210        doc = p_doc_string(s)
3211
3212        while s.sy not in ('DEDENT', 'EOF'):
3213            p_c_enum_line(s, ctx, items)
3214
3215        s.expect_dedent()
3216
3217    if not items and ctx.visibility != "extern":
3218        error(pos, "Empty enum definition not allowed outside a 'cdef extern from' block")
3219
3220    return Nodes.CEnumDefNode(
3221        pos, name=name, cname=cname,
3222        scoped=scoped, items=items,
3223        underlying_type=underlying_type,
3224        typedef_flag=ctx.typedef_flag, visibility=ctx.visibility,
3225        create_wrapper=ctx.overridable,
3226        api=ctx.api, in_pxd=ctx.level == 'module_pxd', doc=doc)
3227
3228def p_c_enum_line(s, ctx, items):
3229    if s.sy != 'pass':
3230        p_c_enum_item(s, ctx, items)
3231        while s.sy == ',':
3232            s.next()
3233            if s.sy in ('NEWLINE', 'EOF'):
3234                break
3235            p_c_enum_item(s, ctx, items)
3236    else:
3237        s.next()
3238    s.expect_newline("Syntax error in enum item list")
3239
3240def p_c_enum_item(s, ctx, items):
3241    pos = s.position()
3242    name = p_ident(s)
3243    cname = p_opt_cname(s)
3244    if cname is None and ctx.namespace is not None:
3245        cname = ctx.namespace + "::" + name
3246    value = None
3247    if s.sy == '=':
3248        s.next()
3249        value = p_test(s)
3250    items.append(Nodes.CEnumDefItemNode(pos,
3251        name = name, cname = cname, value = value))
3252
3253def p_c_struct_or_union_definition(s, pos, ctx):
3254    packed = False
3255    if s.systring == 'packed':
3256        packed = True
3257        s.next()
3258        if s.sy != 'IDENT' or s.systring != 'struct':
3259            s.expected('struct')
3260    # s.sy == ident 'struct' or 'union'
3261    kind = s.systring
3262    s.next()
3263    name = p_ident(s)
3264    cname = p_opt_cname(s)
3265    if cname is None and ctx.namespace is not None:
3266        cname = ctx.namespace + "::" + name
3267    attributes = None
3268    if s.sy == ':':
3269        s.next()
3270        attributes = []
3271        if s.sy == 'pass':
3272            s.next()
3273            s.expect_newline("Expected a newline", ignore_semicolon=True)
3274        else:
3275            s.expect('NEWLINE')
3276            s.expect_indent()
3277            body_ctx = Ctx()
3278            while s.sy != 'DEDENT':
3279                if s.sy != 'pass':
3280                    attributes.append(
3281                        p_c_func_or_var_declaration(s, s.position(), body_ctx))
3282                else:
3283                    s.next()
3284                    s.expect_newline("Expected a newline")
3285            s.expect_dedent()
3286
3287        if not attributes and ctx.visibility != "extern":
3288            error(pos, "Empty struct or union definition not allowed outside a 'cdef extern from' block")
3289    else:
3290        s.expect_newline("Syntax error in struct or union definition")
3291
3292    return Nodes.CStructOrUnionDefNode(pos,
3293        name = name, cname = cname, kind = kind, attributes = attributes,
3294        typedef_flag = ctx.typedef_flag, visibility = ctx.visibility,
3295        api = ctx.api, in_pxd = ctx.level == 'module_pxd', packed = packed)
3296
3297def p_fused_definition(s, pos, ctx):
3298    """
3299    c(type)def fused my_fused_type:
3300        ...
3301    """
3302    # s.systring == 'fused'
3303
3304    if ctx.level not in ('module', 'module_pxd'):
3305        error(pos, "Fused type definition not allowed here")
3306
3307    s.next()
3308    name = p_ident(s)
3309
3310    s.expect(":")
3311    s.expect_newline()
3312    s.expect_indent()
3313
3314    types = []
3315    while s.sy != 'DEDENT':
3316        if s.sy != 'pass':
3317            #types.append(p_c_declarator(s))
3318            types.append(p_c_base_type(s))  #, nonempty=1))
3319        else:
3320            s.next()
3321
3322        s.expect_newline()
3323
3324    s.expect_dedent()
3325
3326    if not types:
3327        error(pos, "Need at least one type")
3328
3329    return Nodes.FusedTypeNode(pos, name=name, types=types)
3330
3331def p_struct_enum(s, pos, ctx):
3332    if s.systring == 'enum':
3333        return p_c_enum_definition(s, pos, ctx)
3334    else:
3335        return p_c_struct_or_union_definition(s, pos, ctx)
3336
3337def p_visibility(s, prev_visibility):
3338    pos = s.position()
3339    visibility = prev_visibility
3340    if s.sy == 'IDENT' and s.systring in ('extern', 'public', 'readonly'):
3341        visibility = s.systring
3342        if prev_visibility != 'private' and visibility != prev_visibility:
3343            s.error("Conflicting visibility options '%s' and '%s'"
3344                % (prev_visibility, visibility), fatal=False)
3345        s.next()
3346    return visibility
3347
3348def p_c_modifiers(s):
3349    if s.sy == 'IDENT' and s.systring in ('inline',):
3350        modifier = s.systring
3351        s.next()
3352        return [modifier] + p_c_modifiers(s)
3353    return []
3354
3355def p_c_func_or_var_declaration(s, pos, ctx):
3356    cmethod_flag = ctx.level in ('c_class', 'c_class_pxd')
3357    modifiers = p_c_modifiers(s)
3358    base_type = p_c_base_type(s, nonempty = 1, templates = ctx.templates)
3359    declarator = p_c_declarator(s, ctx(modifiers=modifiers), cmethod_flag = cmethod_flag,
3360                                assignable = 1, nonempty = 1)
3361    declarator.overridable = ctx.overridable
3362    if s.sy == 'IDENT' and s.systring == 'const' and ctx.level == 'cpp_class':
3363        s.next()
3364        is_const_method = 1
3365    else:
3366        is_const_method = 0
3367    if s.sy == '->':
3368        # Special enough to give a better error message and keep going.
3369        s.error(
3370            "Return type annotation is not allowed in cdef/cpdef signatures. "
3371            "Please define it before the function name, as in C signatures.",
3372            fatal=False)
3373        s.next()
3374        p_test(s)  # Keep going, but ignore result.
3375    if s.sy == ':':
3376        if ctx.level not in ('module', 'c_class', 'module_pxd', 'c_class_pxd', 'cpp_class') and not ctx.templates:
3377            s.error("C function definition not allowed here")
3378        doc, suite = p_suite_with_docstring(s, Ctx(level='function'))
3379        result = Nodes.CFuncDefNode(pos,
3380            visibility = ctx.visibility,
3381            base_type = base_type,
3382            declarator = declarator,
3383            body = suite,
3384            doc = doc,
3385            modifiers = modifiers,
3386            api = ctx.api,
3387            overridable = ctx.overridable,
3388            is_const_method = is_const_method)
3389    else:
3390        #if api:
3391        #    s.error("'api' not allowed with variable declaration")
3392        if is_const_method:
3393            declarator.is_const_method = is_const_method
3394        declarators = [declarator]
3395        while s.sy == ',':
3396            s.next()
3397            if s.sy == 'NEWLINE':
3398                break
3399            declarator = p_c_declarator(s, ctx, cmethod_flag = cmethod_flag,
3400                                        assignable = 1, nonempty = 1)
3401            declarators.append(declarator)
3402        doc_line = s.start_line + 1
3403        s.expect_newline("Syntax error in C variable declaration", ignore_semicolon=True)
3404        if ctx.level in ('c_class', 'c_class_pxd') and s.start_line == doc_line:
3405            doc = p_doc_string(s)
3406        else:
3407            doc = None
3408        result = Nodes.CVarDefNode(pos,
3409            visibility = ctx.visibility,
3410            base_type = base_type,
3411            declarators = declarators,
3412            in_pxd = ctx.level in ('module_pxd', 'c_class_pxd'),
3413            doc = doc,
3414            api = ctx.api,
3415            modifiers = modifiers,
3416            overridable = ctx.overridable)
3417    return result
3418
3419def p_ctypedef_statement(s, ctx):
3420    # s.sy == 'ctypedef'
3421    pos = s.position()
3422    s.next()
3423    visibility = p_visibility(s, ctx.visibility)
3424    api = p_api(s)
3425    ctx = ctx(typedef_flag = 1, visibility = visibility)
3426    if api:
3427        ctx.api = 1
3428    if s.sy == 'class':
3429        return p_c_class_definition(s, pos, ctx)
3430    elif s.sy == 'IDENT' and s.systring in struct_enum_union:
3431        return p_struct_enum(s, pos, ctx)
3432    elif s.sy == 'IDENT' and s.systring == 'fused':
3433        return p_fused_definition(s, pos, ctx)
3434    else:
3435        base_type = p_c_base_type(s, nonempty = 1)
3436        declarator = p_c_declarator(s, ctx, is_type = 1, nonempty = 1)
3437        s.expect_newline("Syntax error in ctypedef statement", ignore_semicolon=True)
3438        return Nodes.CTypeDefNode(
3439            pos, base_type = base_type,
3440            declarator = declarator,
3441            visibility = visibility, api = api,
3442            in_pxd = ctx.level == 'module_pxd')
3443
3444def p_decorators(s):
3445    decorators = []
3446    while s.sy == '@':
3447        pos = s.position()
3448        s.next()
3449        decstring = p_dotted_name(s, as_allowed=0)[2]
3450        names = decstring.split('.')
3451        decorator = ExprNodes.NameNode(pos, name=s.context.intern_ustring(names[0]))
3452        for name in names[1:]:
3453            decorator = ExprNodes.AttributeNode(
3454                pos, attribute=s.context.intern_ustring(name), obj=decorator)
3455        if s.sy == '(':
3456            decorator = p_call(s, decorator)
3457        decorators.append(Nodes.DecoratorNode(pos, decorator=decorator))
3458        s.expect_newline("Expected a newline after decorator")
3459    return decorators
3460
3461
3462def _reject_cdef_modifier_in_py(s, name):
3463    """Step over incorrectly placed cdef modifiers (@see _CDEF_MODIFIERS) to provide a good error message for them.
3464    """
3465    if s.sy == 'IDENT' and name in _CDEF_MODIFIERS:
3466        # Special enough to provide a good error message.
3467        s.error("Cannot use cdef modifier '%s' in Python function signature. Use a decorator instead." % name, fatal=False)
3468        return p_ident(s)  # Keep going, in case there are other errors.
3469    return name
3470
3471
3472def p_def_statement(s, decorators=None, is_async_def=False):
3473    # s.sy == 'def'
3474    pos = decorators[0].pos if decorators else s.position()
3475    # PEP 492 switches the async/await keywords on in "async def" functions
3476    if is_async_def:
3477        s.enter_async()
3478    s.next()
3479    name = _reject_cdef_modifier_in_py(s, p_ident(s))
3480    s.expect(
3481        '(',
3482        "Expected '(', found '%s'. Did you use cdef syntax in a Python declaration? "
3483        "Use decorators and Python type annotations instead." % (
3484            s.systring if s.sy == 'IDENT' else s.sy))
3485    args, star_arg, starstar_arg = p_varargslist(s, terminator=')')
3486    s.expect(')')
3487    _reject_cdef_modifier_in_py(s, s.systring)
3488    return_type_annotation = None
3489    if s.sy == '->':
3490        s.next()
3491        return_type_annotation = p_annotation(s)
3492        _reject_cdef_modifier_in_py(s, s.systring)
3493
3494    doc, body = p_suite_with_docstring(s, Ctx(level='function'))
3495    if is_async_def:
3496        s.exit_async()
3497
3498    return Nodes.DefNode(
3499        pos, name=name, args=args, star_arg=star_arg, starstar_arg=starstar_arg,
3500        doc=doc, body=body, decorators=decorators, is_async_def=is_async_def,
3501        return_type_annotation=return_type_annotation)
3502
3503
3504def p_varargslist(s, terminator=')', annotated=1):
3505    args = p_c_arg_list(s, in_pyfunc = 1, nonempty_declarators = 1,
3506                        annotated = annotated)
3507    star_arg = None
3508    starstar_arg = None
3509    if s.sy == '/':
3510        if len(args) == 0:
3511            s.error("Got zero positional-only arguments despite presence of "
3512                    "positional-only specifier '/'")
3513        s.next()
3514        # Mark all args to the left as pos only
3515        for arg in args:
3516            arg.pos_only = 1
3517        if s.sy == ',':
3518            s.next()
3519            args.extend(p_c_arg_list(s, in_pyfunc = 1,
3520                nonempty_declarators = 1, annotated = annotated))
3521        elif s.sy != terminator:
3522            s.error("Syntax error in Python function argument list")
3523    if s.sy == '*':
3524        s.next()
3525        if s.sy == 'IDENT':
3526            star_arg = p_py_arg_decl(s, annotated=annotated)
3527        if s.sy == ',':
3528            s.next()
3529            args.extend(p_c_arg_list(s, in_pyfunc = 1,
3530                nonempty_declarators = 1, kw_only = 1, annotated = annotated))
3531        elif s.sy != terminator:
3532            s.error("Syntax error in Python function argument list")
3533    if s.sy == '**':
3534        s.next()
3535        starstar_arg = p_py_arg_decl(s, annotated=annotated)
3536    if s.sy == ',':
3537        s.next()
3538    return (args, star_arg, starstar_arg)
3539
3540def p_py_arg_decl(s, annotated = 1):
3541    pos = s.position()
3542    name = p_ident(s)
3543    annotation = None
3544    if annotated and s.sy == ':':
3545        s.next()
3546        annotation = p_annotation(s)
3547    return Nodes.PyArgDeclNode(pos, name = name, annotation = annotation)
3548
3549
3550def p_class_statement(s, decorators):
3551    # s.sy == 'class'
3552    pos = s.position()
3553    s.next()
3554    class_name = EncodedString(p_ident(s))
3555    class_name.encoding = s.source_encoding  # FIXME: why is this needed?
3556    arg_tuple = None
3557    keyword_dict = None
3558    if s.sy == '(':
3559        positional_args, keyword_args = p_call_parse_args(s, allow_genexp=False)
3560        arg_tuple, keyword_dict = p_call_build_packed_args(pos, positional_args, keyword_args)
3561    if arg_tuple is None:
3562        # XXX: empty arg_tuple
3563        arg_tuple = ExprNodes.TupleNode(pos, args=[])
3564    doc, body = p_suite_with_docstring(s, Ctx(level='class'))
3565    return Nodes.PyClassDefNode(
3566        pos, name=class_name,
3567        bases=arg_tuple,
3568        keyword_args=keyword_dict,
3569        doc=doc, body=body, decorators=decorators,
3570        force_py3_semantics=s.context.language_level >= 3)
3571
3572
3573def p_c_class_definition(s, pos,  ctx):
3574    # s.sy == 'class'
3575    s.next()
3576    module_path = []
3577    class_name = p_ident(s)
3578    while s.sy == '.':
3579        s.next()
3580        module_path.append(class_name)
3581        class_name = p_ident(s)
3582    if module_path and ctx.visibility != 'extern':
3583        error(pos, "Qualified class name only allowed for 'extern' C class")
3584    if module_path and s.sy == 'IDENT' and s.systring == 'as':
3585        s.next()
3586        as_name = p_ident(s)
3587    else:
3588        as_name = class_name
3589    objstruct_name = None
3590    typeobj_name = None
3591    bases = None
3592    check_size = None
3593    if s.sy == '(':
3594        positional_args, keyword_args = p_call_parse_args(s, allow_genexp=False)
3595        if keyword_args:
3596            s.error("C classes cannot take keyword bases.")
3597        bases, _ = p_call_build_packed_args(pos, positional_args, keyword_args)
3598    if bases is None:
3599        bases = ExprNodes.TupleNode(pos, args=[])
3600
3601    if s.sy == '[':
3602        if ctx.visibility not in ('public', 'extern') and not ctx.api:
3603            error(s.position(), "Name options only allowed for 'public', 'api', or 'extern' C class")
3604        objstruct_name, typeobj_name, check_size = p_c_class_options(s)
3605    if s.sy == ':':
3606        if ctx.level == 'module_pxd':
3607            body_level = 'c_class_pxd'
3608        else:
3609            body_level = 'c_class'
3610        doc, body = p_suite_with_docstring(s, Ctx(level=body_level))
3611    else:
3612        s.expect_newline("Syntax error in C class definition")
3613        doc = None
3614        body = None
3615    if ctx.visibility == 'extern':
3616        if not module_path:
3617            error(pos, "Module name required for 'extern' C class")
3618        if typeobj_name:
3619            error(pos, "Type object name specification not allowed for 'extern' C class")
3620    elif ctx.visibility == 'public':
3621        if not objstruct_name:
3622            error(pos, "Object struct name specification required for 'public' C class")
3623        if not typeobj_name:
3624            error(pos, "Type object name specification required for 'public' C class")
3625    elif ctx.visibility == 'private':
3626        if ctx.api:
3627            if not objstruct_name:
3628                error(pos, "Object struct name specification required for 'api' C class")
3629            if not typeobj_name:
3630                error(pos, "Type object name specification required for 'api' C class")
3631    else:
3632        error(pos, "Invalid class visibility '%s'" % ctx.visibility)
3633    return Nodes.CClassDefNode(pos,
3634        visibility = ctx.visibility,
3635        typedef_flag = ctx.typedef_flag,
3636        api = ctx.api,
3637        module_name = ".".join(module_path),
3638        class_name = class_name,
3639        as_name = as_name,
3640        bases = bases,
3641        objstruct_name = objstruct_name,
3642        typeobj_name = typeobj_name,
3643        check_size = check_size,
3644        in_pxd = ctx.level == 'module_pxd',
3645        doc = doc,
3646        body = body)
3647
3648
3649def p_c_class_options(s):
3650    objstruct_name = None
3651    typeobj_name = None
3652    check_size = None
3653    s.expect('[')
3654    while 1:
3655        if s.sy != 'IDENT':
3656            break
3657        if s.systring == 'object':
3658            s.next()
3659            objstruct_name = p_ident(s)
3660        elif s.systring == 'type':
3661            s.next()
3662            typeobj_name = p_ident(s)
3663        elif s.systring == 'check_size':
3664            s.next()
3665            check_size = p_ident(s)
3666            if check_size not in ('ignore', 'warn', 'error'):
3667                s.error("Expected one of ignore, warn or error, found %r" % check_size)
3668        if s.sy != ',':
3669            break
3670        s.next()
3671    s.expect(']', "Expected 'object', 'type' or 'check_size'")
3672    return objstruct_name, typeobj_name, check_size
3673
3674
3675def p_property_decl(s):
3676    pos = s.position()
3677    s.next()  # 'property'
3678    name = p_ident(s)
3679    doc, body = p_suite_with_docstring(
3680        s, Ctx(level='property'), with_doc_only=True)
3681    return Nodes.PropertyNode(pos, name=name, doc=doc, body=body)
3682
3683
3684def p_ignorable_statement(s):
3685    """
3686    Parses any kind of ignorable statement that is allowed in .pxd files.
3687    """
3688    if s.sy == 'BEGIN_STRING':
3689        pos = s.position()
3690        string_node = p_atom(s)
3691        s.expect_newline("Syntax error in string", ignore_semicolon=True)
3692        return Nodes.ExprStatNode(pos, expr=string_node)
3693    return None
3694
3695
3696def p_doc_string(s):
3697    if s.sy == 'BEGIN_STRING':
3698        pos = s.position()
3699        kind, bytes_result, unicode_result = p_cat_string_literal(s)
3700        s.expect_newline("Syntax error in doc string", ignore_semicolon=True)
3701        if kind in ('u', ''):
3702            return unicode_result
3703        warning(pos, "Python 3 requires docstrings to be unicode strings")
3704        return bytes_result
3705    else:
3706        return None
3707
3708
3709def _extract_docstring(node):
3710    """
3711    Extract a docstring from a statement or from the first statement
3712    in a list.  Remove the statement if found.  Return a tuple
3713    (plain-docstring or None, node).
3714    """
3715    doc_node = None
3716    if node is None:
3717        pass
3718    elif isinstance(node, Nodes.ExprStatNode):
3719        if node.expr.is_string_literal:
3720            doc_node = node.expr
3721            node = Nodes.StatListNode(node.pos, stats=[])
3722    elif isinstance(node, Nodes.StatListNode) and node.stats:
3723        stats = node.stats
3724        if isinstance(stats[0], Nodes.ExprStatNode):
3725            if stats[0].expr.is_string_literal:
3726                doc_node = stats[0].expr
3727                del stats[0]
3728
3729    if doc_node is None:
3730        doc = None
3731    elif isinstance(doc_node, ExprNodes.BytesNode):
3732        warning(node.pos,
3733                "Python 3 requires docstrings to be unicode strings")
3734        doc = doc_node.value
3735    elif isinstance(doc_node, ExprNodes.StringNode):
3736        doc = doc_node.unicode_value
3737        if doc is None:
3738            doc = doc_node.value
3739    else:
3740        doc = doc_node.value
3741    return doc, node
3742
3743
3744def p_code(s, level=None, ctx=Ctx):
3745    body = p_statement_list(s, ctx(level = level), first_statement = 1)
3746    if s.sy != 'EOF':
3747        s.error("Syntax error in statement [%s,%s]" % (
3748            repr(s.sy), repr(s.systring)))
3749    return body
3750
3751
3752_match_compiler_directive_comment = cython.declare(object, re.compile(
3753    r"^#\s*cython\s*:\s*((\w|[.])+\s*=.*)$").match)
3754
3755
3756def p_compiler_directive_comments(s):
3757    result = {}
3758    while s.sy == 'commentline':
3759        pos = s.position()
3760        m = _match_compiler_directive_comment(s.systring)
3761        if m:
3762            directives_string = m.group(1).strip()
3763            try:
3764                new_directives = Options.parse_directive_list(directives_string, ignore_unknown=True)
3765            except ValueError as e:
3766                s.error(e.args[0], fatal=False)
3767                s.next()
3768                continue
3769
3770            for name in new_directives:
3771                if name not in result:
3772                    pass
3773                elif new_directives[name] == result[name]:
3774                    warning(pos, "Duplicate directive found: %s" % (name,))
3775                else:
3776                    s.error("Conflicting settings found for top-level directive %s: %r and %r" % (
3777                        name, result[name], new_directives[name]), pos=pos)
3778
3779            if 'language_level' in new_directives:
3780                # Make sure we apply the language level already to the first token that follows the comments.
3781                s.context.set_language_level(new_directives['language_level'])
3782
3783            result.update(new_directives)
3784
3785        s.next()
3786    return result
3787
3788
3789def p_module(s, pxd, full_module_name, ctx=Ctx):
3790    pos = s.position()
3791
3792    directive_comments = p_compiler_directive_comments(s)
3793    s.parse_comments = False
3794
3795    if s.context.language_level is None:
3796        s.context.set_language_level('3str')
3797        if pos[0].filename:
3798            import warnings
3799            warnings.warn(
3800                "Cython directive 'language_level' not set, using '3str' for now (Py3). "
3801                "This has changed from earlier releases! File: %s" % pos[0].filename,
3802                FutureWarning,
3803                stacklevel=1 if cython.compiled else 2,
3804            )
3805
3806    level = 'module_pxd' if pxd else 'module'
3807    doc = p_doc_string(s)
3808    body = p_statement_list(s, ctx(level=level), first_statement = 1)
3809    if s.sy != 'EOF':
3810        s.error("Syntax error in statement [%s,%s]" % (
3811            repr(s.sy), repr(s.systring)))
3812    return ModuleNode(pos, doc = doc, body = body,
3813                      full_module_name = full_module_name,
3814                      directive_comments = directive_comments)
3815
3816def p_template_definition(s):
3817    name = p_ident(s)
3818    if s.sy == '=':
3819        s.expect('=')
3820        s.expect('*')
3821        required = False
3822    else:
3823        required = True
3824    return name, required
3825
3826def p_cpp_class_definition(s, pos,  ctx):
3827    # s.sy == 'cppclass'
3828    s.next()
3829    class_name = p_ident(s)
3830    cname = p_opt_cname(s)
3831    if cname is None and ctx.namespace is not None:
3832        cname = ctx.namespace + "::" + class_name
3833    if s.sy == '.':
3834        error(pos, "Qualified class name not allowed C++ class")
3835    if s.sy == '[':
3836        s.next()
3837        templates = [p_template_definition(s)]
3838        while s.sy == ',':
3839            s.next()
3840            templates.append(p_template_definition(s))
3841        s.expect(']')
3842        template_names = [name for name, required in templates]
3843    else:
3844        templates = None
3845        template_names = None
3846    if s.sy == '(':
3847        s.next()
3848        base_classes = [p_c_base_type(s, templates = template_names)]
3849        while s.sy == ',':
3850            s.next()
3851            base_classes.append(p_c_base_type(s, templates = template_names))
3852        s.expect(')')
3853    else:
3854        base_classes = []
3855    if s.sy == '[':
3856        error(s.position(), "Name options not allowed for C++ class")
3857    nogil = p_nogil(s)
3858    if s.sy == ':':
3859        s.next()
3860        s.expect('NEWLINE')
3861        s.expect_indent()
3862        # Allow a cppclass to have docstrings. It will be discarded as comment.
3863        # The goal of this is consistency: we can make docstrings inside cppclass methods,
3864        # so why not on the cppclass itself ?
3865        p_doc_string(s)
3866        attributes = []
3867        body_ctx = Ctx(visibility = ctx.visibility, level='cpp_class', nogil=nogil or ctx.nogil)
3868        body_ctx.templates = template_names
3869        while s.sy != 'DEDENT':
3870            if s.sy != 'pass':
3871                attributes.append(p_cpp_class_attribute(s, body_ctx))
3872            else:
3873                s.next()
3874                s.expect_newline("Expected a newline")
3875        s.expect_dedent()
3876    else:
3877        attributes = None
3878        s.expect_newline("Syntax error in C++ class definition")
3879    return Nodes.CppClassNode(pos,
3880        name = class_name,
3881        cname = cname,
3882        base_classes = base_classes,
3883        visibility = ctx.visibility,
3884        in_pxd = ctx.level == 'module_pxd',
3885        attributes = attributes,
3886        templates = templates)
3887
3888def p_cpp_class_attribute(s, ctx):
3889    decorators = None
3890    if s.sy == '@':
3891        decorators = p_decorators(s)
3892    if s.systring == 'cppclass':
3893        return p_cpp_class_definition(s, s.position(), ctx)
3894    elif s.systring == 'ctypedef':
3895        return p_ctypedef_statement(s, ctx)
3896    elif s.sy == 'IDENT' and s.systring in struct_enum_union:
3897        if s.systring != 'enum':
3898            return p_cpp_class_definition(s, s.position(), ctx)
3899        else:
3900            return p_struct_enum(s, s.position(), ctx)
3901    else:
3902        node = p_c_func_or_var_declaration(s, s.position(), ctx)
3903        if decorators is not None:
3904            tup = Nodes.CFuncDefNode, Nodes.CVarDefNode, Nodes.CClassDefNode
3905            if ctx.allow_struct_enum_decorator:
3906                tup += Nodes.CStructOrUnionDefNode, Nodes.CEnumDefNode
3907            if not isinstance(node, tup):
3908                s.error("Decorators can only be followed by functions or classes")
3909            node.decorators = decorators
3910        return node
3911
3912
3913#----------------------------------------------
3914#
3915#   Debugging
3916#
3917#----------------------------------------------
3918
3919def print_parse_tree(f, node, level, key = None):
3920    ind = "  " * level
3921    if node:
3922        f.write(ind)
3923        if key:
3924            f.write("%s: " % key)
3925        t = type(node)
3926        if t is tuple:
3927            f.write("(%s @ %s\n" % (node[0], node[1]))
3928            for i in range(2, len(node)):
3929                print_parse_tree(f, node[i], level+1)
3930            f.write("%s)\n" % ind)
3931            return
3932        elif isinstance(node, Nodes.Node):
3933            try:
3934                tag = node.tag
3935            except AttributeError:
3936                tag = node.__class__.__name__
3937            f.write("%s @ %s\n" % (tag, node.pos))
3938            for name, value in node.__dict__.items():
3939                if name != 'tag' and name != 'pos':
3940                    print_parse_tree(f, value, level+1, name)
3941            return
3942        elif t is list:
3943            f.write("[\n")
3944            for i in range(len(node)):
3945                print_parse_tree(f, node[i], level+1)
3946            f.write("%s]\n" % ind)
3947            return
3948    f.write("%s%s\n" % (ind, node))
3949
3950def p_annotation(s):
3951    """An annotation just has the "test" syntax, but also stores the string it came from
3952
3953    Note that the string is *allowed* to be changed/processed (although isn't here)
3954    so may not exactly match the string generated by Python, and if it doesn't
3955    then it is not a bug.
3956    """
3957    pos = s.position()
3958    expr = p_test(s)
3959    return ExprNodes.AnnotationNode(pos, expr=expr)
3960