1from __future__ import unicode_literals
2import re
3from . import ast
4from .stream import EOF, EOL, FluentParserStream
5from .errors import ParseError
6
7
8def with_span(fn):
9    def decorated(self, ps, *args, **kwargs):
10        if not self.with_spans:
11            return fn(self, ps, *args, **kwargs)
12
13        start = ps.index
14        node = fn(self, ps, *args, **kwargs)
15
16        # Don't re-add the span if the node already has it. This may happen
17        # when one decorated function calls another decorated function.
18        if node.span is not None:
19            return node
20
21        end = ps.index
22        node.add_span(start, end)
23        return node
24
25    return decorated
26
27
28class FluentParser(object):
29    """This class is used to parse Fluent source content.
30
31    ``with_spans`` enables source information in the form of
32    :class:`.ast.Span` objects for each :class:`.ast.SyntaxNode`.
33    """
34    def __init__(self, with_spans=True):
35        self.with_spans = with_spans
36
37    def parse(self, source):
38        """Create a :class:`.ast.Resource` from a Fluent source.
39        """
40        ps = FluentParserStream(source)
41        ps.skip_blank_block()
42
43        entries = []
44        last_comment = None
45
46        while ps.current_char:
47            entry = self.get_entry_or_junk(ps)
48            blank_lines = ps.skip_blank_block()
49
50            # Regular Comments require special logic. Comments may be attached
51            # to Messages or Terms if they are followed immediately by them.
52            # However they should parse as standalone when they're followed by
53            # Junk. Consequently, we only attach Comments once we know that the
54            # Message or the Term parsed successfully.
55            if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \
56                    and ps.current_char:
57                # Stash the comment and decide what to do with it
58                # in the next pass.
59                last_comment = entry
60                continue
61
62            if last_comment is not None:
63                if isinstance(entry, (ast.Message, ast.Term)):
64                    entry.comment = last_comment
65                    if self.with_spans:
66                        entry.span.start = entry.comment.span.start
67                else:
68                    entries.append(last_comment)
69                # In either case, the stashed comment has been dealt with;
70                # clear it.
71                last_comment = None
72
73            entries.append(entry)
74
75        res = ast.Resource(entries)
76
77        if self.with_spans:
78            res.add_span(0, ps.index)
79
80        return res
81
82    def parse_entry(self, source):
83        """Parse the first :class:`.ast.Entry` in source.
84
85        Skip all encountered comments and start parsing at the first :class:`.ast.Message`
86        or :class:`.ast.Term` start. Return :class:`.ast.Junk` if the parsing is not successful.
87
88        Preceding comments are ignored unless they contain syntax errors
89        themselves, in which case :class:`.ast.Junk` for the invalid comment is returned.
90        """
91        ps = FluentParserStream(source)
92        ps.skip_blank_block()
93
94        while ps.current_char == '#':
95            skipped = self.get_entry_or_junk(ps)
96            if isinstance(skipped, ast.Junk):
97                # Don't skip Junk comments.
98                return skipped
99            ps.skip_blank_block()
100
101        return self.get_entry_or_junk(ps)
102
103    def get_entry_or_junk(self, ps):
104        entry_start_pos = ps.index
105
106        try:
107            entry = self.get_entry(ps)
108            ps.expect_line_end()
109            return entry
110        except ParseError as err:
111            error_index = ps.index
112            ps.skip_to_next_entry_start(entry_start_pos)
113            next_entry_start = ps.index
114            if next_entry_start < error_index:
115                # The position of the error must be inside of the Junk's span.
116                error_index = next_entry_start
117
118            # Create a Junk instance
119            slice = ps.string[entry_start_pos:next_entry_start]
120            junk = ast.Junk(slice)
121            if self.with_spans:
122                junk.add_span(entry_start_pos, next_entry_start)
123            annot = ast.Annotation(err.code, err.args, err.message)
124            annot.add_span(error_index, error_index)
125            junk.add_annotation(annot)
126            return junk
127
128    def get_entry(self, ps):
129        if ps.current_char == '#':
130            return self.get_comment(ps)
131
132        if ps.current_char == '-':
133            return self.get_term(ps)
134
135        if ps.is_identifier_start():
136            return self.get_message(ps)
137
138        raise ParseError('E0002')
139
140    @with_span
141    def get_comment(self, ps):
142        # 0 - comment
143        # 1 - group comment
144        # 2 - resource comment
145        level = -1
146        content = ''
147
148        while True:
149            i = -1
150            while ps.current_char == '#' \
151                    and (i < (2 if level == -1 else level)):
152                ps.next()
153                i += 1
154
155            if level == -1:
156                level = i
157
158            if ps.current_char != EOL:
159                ps.expect_char(' ')
160                ch = ps.take_char(lambda x: x != EOL)
161                while ch:
162                    content += ch
163                    ch = ps.take_char(lambda x: x != EOL)
164
165            if ps.is_next_line_comment(level=level):
166                content += ps.current_char
167                ps.next()
168            else:
169                break
170
171        if level == 0:
172            return ast.Comment(content)
173        elif level == 1:
174            return ast.GroupComment(content)
175        elif level == 2:
176            return ast.ResourceComment(content)
177
178    @with_span
179    def get_message(self, ps):
180        id = self.get_identifier(ps)
181        ps.skip_blank_inline()
182        ps.expect_char('=')
183
184        value = self.maybe_get_pattern(ps)
185        attrs = self.get_attributes(ps)
186
187        if value is None and len(attrs) == 0:
188            raise ParseError('E0005', id.name)
189
190        return ast.Message(id, value, attrs)
191
192    @with_span
193    def get_term(self, ps):
194        ps.expect_char('-')
195        id = self.get_identifier(ps)
196
197        ps.skip_blank_inline()
198        ps.expect_char('=')
199
200        value = self.maybe_get_pattern(ps)
201        if value is None:
202            raise ParseError('E0006', id.name)
203
204        attrs = self.get_attributes(ps)
205        return ast.Term(id, value, attrs)
206
207    @with_span
208    def get_attribute(self, ps):
209        ps.expect_char('.')
210
211        key = self.get_identifier(ps)
212
213        ps.skip_blank_inline()
214        ps.expect_char('=')
215
216        value = self.maybe_get_pattern(ps)
217        if value is None:
218            raise ParseError('E0012')
219
220        return ast.Attribute(key, value)
221
222    def get_attributes(self, ps):
223        attrs = []
224        ps.peek_blank()
225
226        while ps.is_attribute_start():
227            ps.skip_to_peek()
228            attr = self.get_attribute(ps)
229            attrs.append(attr)
230            ps.peek_blank()
231
232        return attrs
233
234    @with_span
235    def get_identifier(self, ps):
236        name = ps.take_id_start()
237        ch = ps.take_id_char()
238        while ch:
239            name += ch
240            ch = ps.take_id_char()
241
242        return ast.Identifier(name)
243
244    def get_variant_key(self, ps):
245        ch = ps.current_char
246
247        if ch is EOF:
248            raise ParseError('E0013')
249
250        cc = ord(ch)
251        if ((cc >= 48 and cc <= 57) or cc == 45):  # 0-9, -
252            return self.get_number(ps)
253
254        return self.get_identifier(ps)
255
256    @with_span
257    def get_variant(self, ps, has_default):
258        default_index = False
259
260        if ps.current_char == '*':
261            if has_default:
262                raise ParseError('E0015')
263            ps.next()
264            default_index = True
265
266        ps.expect_char('[')
267        ps.skip_blank()
268
269        key = self.get_variant_key(ps)
270
271        ps.skip_blank()
272        ps.expect_char(']')
273
274        value = self.maybe_get_pattern(ps)
275        if value is None:
276            raise ParseError('E0012')
277
278        return ast.Variant(key, value, default_index)
279
280    def get_variants(self, ps):
281        variants = []
282        has_default = False
283
284        ps.skip_blank()
285        while ps.is_variant_start():
286            variant = self.get_variant(ps, has_default)
287
288            if variant.default:
289                has_default = True
290
291            variants.append(variant)
292            ps.expect_line_end()
293            ps.skip_blank()
294
295        if len(variants) == 0:
296            raise ParseError('E0011')
297
298        if not has_default:
299            raise ParseError('E0010')
300
301        return variants
302
303    def get_digits(self, ps):
304        num = ''
305
306        ch = ps.take_digit()
307        while ch:
308            num += ch
309            ch = ps.take_digit()
310
311        if len(num) == 0:
312            raise ParseError('E0004', '0-9')
313
314        return num
315
316    @with_span
317    def get_number(self, ps):
318        num = ''
319
320        if ps.current_char == '-':
321            num += '-'
322            ps.next()
323
324        num += self.get_digits(ps)
325
326        if ps.current_char == '.':
327            num += '.'
328            ps.next()
329            num += self.get_digits(ps)
330
331        return ast.NumberLiteral(num)
332
333    def maybe_get_pattern(self, ps):
334        '''Parse an inline or a block Pattern, or None
335
336        maybe_get_pattern distinguishes between patterns which start on the
337        same line as the indentifier (aka inline singleline patterns and inline
338        multiline patterns), and patterns which start on a new line (aka block
339        patterns). The distinction is important for the dedentation logic: the
340        indent of the first line of a block pattern must be taken into account
341        when calculating the maximum common indent.
342        '''
343        ps.peek_blank_inline()
344        if ps.is_value_start():
345            ps.skip_to_peek()
346            return self.get_pattern(ps, is_block=False)
347
348        ps.peek_blank_block()
349        if ps.is_value_continuation():
350            ps.skip_to_peek()
351            return self.get_pattern(ps, is_block=True)
352
353        return None
354
355    @with_span
356    def get_pattern(self, ps, is_block):
357        elements = []
358        if is_block:
359            # A block pattern is a pattern which starts on a new line. Measure
360            # the indent of this first line for the dedentation logic.
361            blank_start = ps.index
362            first_indent = ps.skip_blank_inline()
363            elements.append(self.Indent(first_indent, blank_start, ps.index))
364            common_indent_length = len(first_indent)
365        else:
366            common_indent_length = float('infinity')
367
368        while ps.current_char:
369            if ps.current_char == EOL:
370                blank_start = ps.index
371                blank_lines = ps.peek_blank_block()
372                if ps.is_value_continuation():
373                    ps.skip_to_peek()
374                    indent = ps.skip_blank_inline()
375                    common_indent_length = min(common_indent_length, len(indent))
376                    elements.append(self.Indent(blank_lines + indent, blank_start, ps.index))
377                    continue
378
379                # The end condition for get_pattern's while loop is a newline
380                # which is not followed by a valid pattern continuation.
381                ps.reset_peek()
382                break
383
384            if ps.current_char == '}':
385                raise ParseError('E0027')
386
387            if ps.current_char == '{':
388                element = self.get_placeable(ps)
389            else:
390                element = self.get_text_element(ps)
391
392            elements.append(element)
393
394        dedented = self.dedent(elements, common_indent_length)
395        return ast.Pattern(dedented)
396
397    class Indent(ast.SyntaxNode):
398        def __init__(self, value, start, end):
399            super(FluentParser.Indent, self).__init__()
400            self.value = value
401            self.add_span(start, end)
402
403    def dedent(self, elements, common_indent):
404        '''Dedent a list of elements by removing the maximum common indent from
405        the beginning of text lines. The common indent is calculated in
406        get_pattern.
407        '''
408        trimmed = []
409
410        for element in elements:
411            if isinstance(element, ast.Placeable):
412                trimmed.append(element)
413                continue
414
415            if isinstance(element, self.Indent):
416                # Strip the common indent.
417                element.value = element.value[:len(element.value) - common_indent]
418                if len(element.value) == 0:
419                    continue
420
421            prev = trimmed[-1] if len(trimmed) > 0 else None
422            if isinstance(prev, ast.TextElement):
423                # Join adjacent TextElements by replacing them with their sum.
424                sum = ast.TextElement(prev.value + element.value)
425                if self.with_spans:
426                    sum.add_span(prev.span.start, element.span.end)
427                trimmed[-1] = sum
428                continue
429
430            if isinstance(element, self.Indent):
431                # If the indent hasn't been merged into a preceding
432                # TextElements, convert it into a new TextElement.
433                text_element = ast.TextElement(element.value)
434                if self.with_spans:
435                    text_element.add_span(element.span.start, element.span.end)
436                element = text_element
437
438            trimmed.append(element)
439
440        # Trim trailing whitespace from the Pattern.
441        last_element = trimmed[-1] if len(trimmed) > 0 else None
442        if isinstance(last_element, ast.TextElement):
443            last_element.value = last_element.value.rstrip(' \t\n\r')
444            if last_element.value == "":
445                trimmed.pop()
446
447        return trimmed
448
449    @with_span
450    def get_text_element(self, ps):
451        buf = ''
452
453        while ps.current_char:
454            ch = ps.current_char
455
456            if ch == '{' or ch == '}':
457                return ast.TextElement(buf)
458
459            if ch == EOL:
460                return ast.TextElement(buf)
461
462            buf += ch
463            ps.next()
464
465        return ast.TextElement(buf)
466
467    def get_escape_sequence(self, ps):
468        next = ps.current_char
469
470        if next == '\\' or next == '"':
471            ps.next()
472            return '\\{}'.format(next)
473
474        if next == 'u':
475            return self.get_unicode_escape_sequence(ps, next, 4)
476
477        if next == 'U':
478            return self.get_unicode_escape_sequence(ps, next, 6)
479
480        raise ParseError('E0025', next)
481
482    def get_unicode_escape_sequence(self, ps, u, digits):
483        ps.expect_char(u)
484        sequence = ''
485        for _ in range(digits):
486            ch = ps.take_hex_digit()
487            if not ch:
488                raise ParseError('E0026', '\\{}{}{}'.format(u, sequence, ps.current_char))
489            sequence += ch
490
491        return '\\{}{}'.format(u, sequence)
492
493    @with_span
494    def get_placeable(self, ps):
495        ps.expect_char('{')
496        ps.skip_blank()
497        expression = self.get_expression(ps)
498        ps.expect_char('}')
499        return ast.Placeable(expression)
500
501    @with_span
502    def get_expression(self, ps):
503        selector = self.get_inline_expression(ps)
504
505        ps.skip_blank()
506
507        if ps.current_char == '-':
508            if ps.peek() != '>':
509                ps.reset_peek()
510                return selector
511
512            if isinstance(selector, ast.MessageReference):
513                if selector.attribute is None:
514                    raise ParseError('E0016')
515                else:
516                    raise ParseError('E0018')
517
518            elif (
519                isinstance(selector, ast.TermReference)
520            ):
521                if selector.attribute is None:
522                    raise ParseError('E0017')
523            elif not (
524                isinstance(selector, (
525                    ast.StringLiteral,
526                    ast.NumberLiteral,
527                    ast.VariableReference,
528                    ast.FunctionReference,
529                ))
530            ):
531                raise ParseError('E0029')
532
533            ps.next()
534            ps.next()
535
536            ps.skip_blank_inline()
537            ps.expect_line_end()
538
539            variants = self.get_variants(ps)
540            return ast.SelectExpression(selector, variants)
541
542        if (
543            isinstance(selector, ast.TermReference)
544            and selector.attribute is not None
545        ):
546            raise ParseError('E0019')
547
548        return selector
549
550    @with_span
551    def get_inline_expression(self, ps):
552        if ps.current_char == '{':
553            return self.get_placeable(ps)
554
555        if ps.is_number_start():
556            return self.get_number(ps)
557
558        if ps.current_char == '"':
559            return self.get_string(ps)
560
561        if ps.current_char == '$':
562            ps.next()
563            id = self.get_identifier(ps)
564            return ast.VariableReference(id)
565
566        if ps.current_char == '-':
567            ps.next()
568            id = self.get_identifier(ps)
569            attribute = None
570            if ps.current_char == '.':
571                ps.next()
572                attribute = self.get_identifier(ps)
573            arguments = None
574            ps.peek_blank()
575            if ps.current_peek == '(':
576                ps.skip_to_peek()
577                arguments = self.get_call_arguments(ps)
578            return ast.TermReference(id, attribute, arguments)
579
580        if ps.is_identifier_start():
581            id = self.get_identifier(ps)
582            ps.peek_blank()
583
584            if ps.current_peek == '(':
585                # It's a Function. Ensure it's all upper-case.
586                if not re.match('^[A-Z][A-Z0-9_-]*$', id.name):
587                    raise ParseError('E0008')
588                ps.skip_to_peek()
589                args = self.get_call_arguments(ps)
590                return ast.FunctionReference(id, args)
591
592            attribute = None
593            if ps.current_char == '.':
594                ps.next()
595                attribute = self.get_identifier(ps)
596
597            return ast.MessageReference(id, attribute)
598
599        raise ParseError('E0028')
600
601    @with_span
602    def get_call_argument(self, ps):
603        exp = self.get_inline_expression(ps)
604
605        ps.skip_blank()
606
607        if ps.current_char != ':':
608            return exp
609
610        if isinstance(exp, ast.MessageReference) and exp.attribute is None:
611            ps.next()
612            ps.skip_blank()
613
614            value = self.get_literal(ps)
615            return ast.NamedArgument(exp.id, value)
616
617        raise ParseError('E0009')
618
619    @with_span
620    def get_call_arguments(self, ps):
621        positional = []
622        named = []
623        argument_names = set()
624
625        ps.expect_char('(')
626        ps.skip_blank()
627
628        while True:
629            if ps.current_char == ')':
630                break
631
632            arg = self.get_call_argument(ps)
633            if isinstance(arg, ast.NamedArgument):
634                if arg.name.name in argument_names:
635                    raise ParseError('E0022')
636                named.append(arg)
637                argument_names.add(arg.name.name)
638            elif len(argument_names) > 0:
639                raise ParseError('E0021')
640            else:
641                positional.append(arg)
642
643            ps.skip_blank()
644
645            if ps.current_char == ',':
646                ps.next()
647                ps.skip_blank()
648                continue
649
650            break
651
652        ps.expect_char(')')
653        return ast.CallArguments(positional, named)
654
655    @with_span
656    def get_string(self, ps):
657        value = ''
658
659        ps.expect_char('"')
660
661        while True:
662            ch = ps.take_char(lambda x: x != '"' and x != EOL)
663            if not ch:
664                break
665            if ch == '\\':
666                value += self.get_escape_sequence(ps)
667            else:
668                value += ch
669
670        if ps.current_char == EOL:
671            raise ParseError('E0020')
672
673        ps.expect_char('"')
674
675        return ast.StringLiteral(value)
676
677    @with_span
678    def get_literal(self, ps):
679        if ps.is_number_start():
680            return self.get_number(ps)
681        if ps.current_char == '"':
682            return self.get_string(ps)
683        raise ParseError('E0014')
684