1from __future__ import unicode_literals
2import re
3from . import ast
4from .stream import EOF, EOL, FluentParserStream
5from .errors import ParseError
6
7
8def with_span(fn):
9    def decorated(self, ps, *args, **kwargs):
10        if not self.with_spans:
11            return fn(self, ps, *args, **kwargs)
12
13        start = ps.index
14        node = fn(self, ps, *args, **kwargs)
15
16        # Don't re-add the span if the node already has it. This may happen
17        # when one decorated function calls another decorated function.
18        if node.span is not None:
19            return node
20
21        end = ps.index
22        node.add_span(start, end)
23        return node
24
25    return decorated
26
27
28class FluentParser(object):
29    def __init__(self, with_spans=True):
30        self.with_spans = with_spans
31
32    def parse(self, source):
33        ps = FluentParserStream(source)
34        ps.skip_blank_block()
35
36        entries = []
37        last_comment = None
38
39        while ps.current_char:
40            entry = self.get_entry_or_junk(ps)
41            blank_lines = ps.skip_blank_block()
42
43            # Regular Comments require special logic. Comments may be attached
44            # to Messages or Terms if they are followed immediately by them.
45            # However they should parse as standalone when they're followed by
46            # Junk. Consequently, we only attach Comments once we know that the
47            # Message or the Term parsed successfully.
48            if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \
49                    and ps.current_char:
50                # Stash the comment and decide what to do with it
51                # in the next pass.
52                last_comment = entry
53                continue
54
55            if last_comment is not None:
56                if isinstance(entry, (ast.Message, ast.Term)):
57                    entry.comment = last_comment
58                    if self.with_spans:
59                        entry.span.start = entry.comment.span.start
60                else:
61                    entries.append(last_comment)
62                # In either case, the stashed comment has been dealt with;
63                # clear it.
64                last_comment = None
65
66            entries.append(entry)
67
68        res = ast.Resource(entries)
69
70        if self.with_spans:
71            res.add_span(0, ps.index)
72
73        return res
74
75    def parse_entry(self, source):
76        """Parse the first Message or Term in source.
77
78        Skip all encountered comments and start parsing at the first Mesage
79        or Term start. Return Junk if the parsing is not successful.
80
81        Preceding comments are ignored unless they contain syntax errors
82        themselves, in which case Junk for the invalid comment is returned.
83        """
84        ps = FluentParserStream(source)
85        ps.skip_blank_block()
86
87        while ps.current_char == '#':
88            skipped = self.get_entry_or_junk(ps)
89            if isinstance(skipped, ast.Junk):
90                # Don't skip Junk comments.
91                return skipped
92            ps.skip_blank_block()
93
94        return self.get_entry_or_junk(ps)
95
96    def get_entry_or_junk(self, ps):
97        entry_start_pos = ps.index
98
99        try:
100            entry = self.get_entry(ps)
101            ps.expect_line_end()
102            return entry
103        except ParseError as err:
104            error_index = ps.index
105            ps.skip_to_next_entry_start(entry_start_pos)
106            next_entry_start = ps.index
107            if next_entry_start < error_index:
108                # The position of the error must be inside of the Junk's span.
109                error_index = next_entry_start
110
111            # Create a Junk instance
112            slice = ps.string[entry_start_pos:next_entry_start]
113            junk = ast.Junk(slice)
114            if self.with_spans:
115                junk.add_span(entry_start_pos, next_entry_start)
116            annot = ast.Annotation(err.code, err.args, err.message)
117            annot.add_span(error_index, error_index)
118            junk.add_annotation(annot)
119            return junk
120
121    def get_entry(self, ps):
122        if ps.current_char == '#':
123            return self.get_comment(ps)
124
125        if ps.current_char == '-':
126            return self.get_term(ps)
127
128        if ps.is_identifier_start():
129            return self.get_message(ps)
130
131        raise ParseError('E0002')
132
133    @with_span
134    def get_comment(self, ps):
135        # 0 - comment
136        # 1 - group comment
137        # 2 - resource comment
138        level = -1
139        content = ''
140
141        while True:
142            i = -1
143            while ps.current_char == '#' \
144                    and (i < (2 if level == -1 else level)):
145                ps.next()
146                i += 1
147
148            if level == -1:
149                level = i
150
151            if ps.current_char != EOL:
152                ps.expect_char(' ')
153                ch = ps.take_char(lambda x: x != EOL)
154                while ch:
155                    content += ch
156                    ch = ps.take_char(lambda x: x != EOL)
157
158            if ps.is_next_line_comment(level=level):
159                content += ps.current_char
160                ps.next()
161            else:
162                break
163
164        if level == 0:
165            return ast.Comment(content)
166        elif level == 1:
167            return ast.GroupComment(content)
168        elif level == 2:
169            return ast.ResourceComment(content)
170
171    @with_span
172    def get_message(self, ps):
173        id = self.get_identifier(ps)
174        ps.skip_blank_inline()
175        ps.expect_char('=')
176
177        value = self.maybe_get_pattern(ps)
178        attrs = self.get_attributes(ps)
179
180        if value is None and len(attrs) == 0:
181            raise ParseError('E0005', id.name)
182
183        return ast.Message(id, value, attrs)
184
185    @with_span
186    def get_term(self, ps):
187        ps.expect_char('-')
188        id = self.get_identifier(ps)
189
190        ps.skip_blank_inline()
191        ps.expect_char('=')
192
193        value = self.maybe_get_pattern(ps)
194        if value is None:
195            raise ParseError('E0006', id.name)
196
197        attrs = self.get_attributes(ps)
198        return ast.Term(id, value, attrs)
199
200    @with_span
201    def get_attribute(self, ps):
202        ps.expect_char('.')
203
204        key = self.get_identifier(ps)
205
206        ps.skip_blank_inline()
207        ps.expect_char('=')
208
209        value = self.maybe_get_pattern(ps)
210        if value is None:
211            raise ParseError('E0012')
212
213        return ast.Attribute(key, value)
214
215    def get_attributes(self, ps):
216        attrs = []
217        ps.peek_blank()
218
219        while ps.is_attribute_start():
220            ps.skip_to_peek()
221            attr = self.get_attribute(ps)
222            attrs.append(attr)
223            ps.peek_blank()
224
225        return attrs
226
227    @with_span
228    def get_identifier(self, ps):
229        name = ps.take_id_start()
230        ch = ps.take_id_char()
231        while ch:
232            name += ch
233            ch = ps.take_id_char()
234
235        return ast.Identifier(name)
236
237    def get_variant_key(self, ps):
238        ch = ps.current_char
239
240        if ch is EOF:
241            raise ParseError('E0013')
242
243        cc = ord(ch)
244        if ((cc >= 48 and cc <= 57) or cc == 45):  # 0-9, -
245            return self.get_number(ps)
246
247        return self.get_identifier(ps)
248
249    @with_span
250    def get_variant(self, ps, has_default):
251        default_index = False
252
253        if ps.current_char == '*':
254            if has_default:
255                raise ParseError('E0015')
256            ps.next()
257            default_index = True
258
259        ps.expect_char('[')
260        ps.skip_blank()
261
262        key = self.get_variant_key(ps)
263
264        ps.skip_blank()
265        ps.expect_char(']')
266
267        value = self.maybe_get_pattern(ps)
268        if value is None:
269            raise ParseError('E0012')
270
271        return ast.Variant(key, value, default_index)
272
273    def get_variants(self, ps):
274        variants = []
275        has_default = False
276
277        ps.skip_blank()
278        while ps.is_variant_start():
279            variant = self.get_variant(ps, has_default)
280
281            if variant.default:
282                has_default = True
283
284            variants.append(variant)
285            ps.expect_line_end()
286            ps.skip_blank()
287
288        if len(variants) == 0:
289            raise ParseError('E0011')
290
291        if not has_default:
292            raise ParseError('E0010')
293
294        return variants
295
296    def get_digits(self, ps):
297        num = ''
298
299        ch = ps.take_digit()
300        while ch:
301            num += ch
302            ch = ps.take_digit()
303
304        if len(num) == 0:
305            raise ParseError('E0004', '0-9')
306
307        return num
308
309    @with_span
310    def get_number(self, ps):
311        num = ''
312
313        if ps.current_char == '-':
314            num += '-'
315            ps.next()
316
317        num += self.get_digits(ps)
318
319        if ps.current_char == '.':
320            num += '.'
321            ps.next()
322            num += self.get_digits(ps)
323
324        return ast.NumberLiteral(num)
325
326    def maybe_get_pattern(self, ps):
327        '''Parse an inline or a block Pattern, or None
328
329        maybe_get_pattern distinguishes between patterns which start on the
330        same line as the indentifier (aka inline singleline patterns and inline
331        multiline patterns), and patterns which start on a new line (aka block
332        patterns). The distinction is important for the dedentation logic: the
333        indent of the first line of a block pattern must be taken into account
334        when calculating the maximum common indent.
335        '''
336        ps.peek_blank_inline()
337        if ps.is_value_start():
338            ps.skip_to_peek()
339            return self.get_pattern(ps, is_block=False)
340
341        ps.peek_blank_block()
342        if ps.is_value_continuation():
343            ps.skip_to_peek()
344            return self.get_pattern(ps, is_block=True)
345
346        return None
347
348    @with_span
349    def get_pattern(self, ps, is_block):
350        elements = []
351        if is_block:
352            # A block pattern is a pattern which starts on a new line. Measure
353            # the indent of this first line for the dedentation logic.
354            blank_start = ps.index
355            first_indent = ps.skip_blank_inline()
356            elements.append(self.Indent(first_indent, blank_start, ps.index))
357            common_indent_length = len(first_indent)
358        else:
359            common_indent_length = float('infinity')
360
361        while ps.current_char:
362            if ps.current_char == EOL:
363                blank_start = ps.index
364                blank_lines = ps.peek_blank_block()
365                if ps.is_value_continuation():
366                    ps.skip_to_peek()
367                    indent = ps.skip_blank_inline()
368                    common_indent_length = min(common_indent_length, len(indent))
369                    elements.append(self.Indent(blank_lines + indent, blank_start, ps.index))
370                    continue
371
372                # The end condition for get_pattern's while loop is a newline
373                # which is not followed by a valid pattern continuation.
374                ps.reset_peek()
375                break
376
377            if ps.current_char == '}':
378                raise ParseError('E0027')
379
380            if ps.current_char == '{':
381                element = self.get_placeable(ps)
382            else:
383                element = self.get_text_element(ps)
384
385            elements.append(element)
386
387        dedented = self.dedent(elements, common_indent_length)
388        return ast.Pattern(dedented)
389
390    class Indent(ast.SyntaxNode):
391        def __init__(self, value, start, end):
392            super(FluentParser.Indent, self).__init__()
393            self.value = value
394            self.add_span(start, end)
395
396    def dedent(self, elements, common_indent):
397        '''Dedent a list of elements by removing the maximum common indent from
398        the beginning of text lines. The common indent is calculated in
399        get_pattern.
400        '''
401        trimmed = []
402
403        for element in elements:
404            if isinstance(element, ast.Placeable):
405                trimmed.append(element)
406                continue
407
408            if isinstance(element, self.Indent):
409                # Strip the common indent.
410                element.value = element.value[:len(element.value) - common_indent]
411                if len(element.value) == 0:
412                    continue
413
414            prev = trimmed[-1] if len(trimmed) > 0 else None
415            if isinstance(prev, ast.TextElement):
416                # Join adjacent TextElements by replacing them with their sum.
417                sum = ast.TextElement(prev.value + element.value)
418                if self.with_spans:
419                    sum.add_span(prev.span.start, element.span.end)
420                trimmed[-1] = sum
421                continue
422
423            if isinstance(element, self.Indent):
424                # If the indent hasn't been merged into a preceding
425                # TextElements, convert it into a new TextElement.
426                text_element = ast.TextElement(element.value)
427                if self.with_spans:
428                    text_element.add_span(element.span.start, element.span.end)
429                element = text_element
430
431            trimmed.append(element)
432
433        # Trim trailing whitespace from the Pattern.
434        last_element = trimmed[-1] if len(trimmed) > 0 else None
435        if isinstance(last_element, ast.TextElement):
436            last_element.value = last_element.value.rstrip(' \t\n\r')
437            if last_element.value == "":
438                trimmed.pop()
439
440        return trimmed
441
442    @with_span
443    def get_text_element(self, ps):
444        buf = ''
445
446        while ps.current_char:
447            ch = ps.current_char
448
449            if ch == '{' or ch == '}':
450                return ast.TextElement(buf)
451
452            if ch == EOL:
453                return ast.TextElement(buf)
454
455            buf += ch
456            ps.next()
457
458        return ast.TextElement(buf)
459
460    def get_escape_sequence(self, ps):
461        next = ps.current_char
462
463        if next == '\\' or next == '"':
464            ps.next()
465            return '\\{}'.format(next)
466
467        if next == 'u':
468            return self.get_unicode_escape_sequence(ps, next, 4)
469
470        if next == 'U':
471            return self.get_unicode_escape_sequence(ps, next, 6)
472
473        raise ParseError('E0025', next)
474
475    def get_unicode_escape_sequence(self, ps, u, digits):
476        ps.expect_char(u)
477        sequence = ''
478        for _ in range(digits):
479            ch = ps.take_hex_digit()
480            if not ch:
481                raise ParseError('E0026', '\\{}{}{}'.format(u, sequence, ps.current_char))
482            sequence += ch
483
484        return '\\{}{}'.format(u, sequence)
485
486    @with_span
487    def get_placeable(self, ps):
488        ps.expect_char('{')
489        ps.skip_blank()
490        expression = self.get_expression(ps)
491        ps.expect_char('}')
492        return ast.Placeable(expression)
493
494    @with_span
495    def get_expression(self, ps):
496        selector = self.get_inline_expression(ps)
497
498        ps.skip_blank()
499
500        if ps.current_char == '-':
501            if ps.peek() != '>':
502                ps.reset_peek()
503                return selector
504
505            if isinstance(selector, ast.MessageReference):
506                if selector.attribute is None:
507                    raise ParseError('E0016')
508                else:
509                    raise ParseError('E0018')
510
511            elif (
512                isinstance(selector, ast.TermReference)
513            ):
514                if selector.attribute is None:
515                    raise ParseError('E0017')
516            elif not (
517                isinstance(selector, (
518                    ast.StringLiteral,
519                    ast.NumberLiteral,
520                    ast.VariableReference,
521                    ast.FunctionReference,
522                ))
523            ):
524                raise ParseError('E0029')
525
526            ps.next()
527            ps.next()
528
529            ps.skip_blank_inline()
530            ps.expect_line_end()
531
532            variants = self.get_variants(ps)
533            return ast.SelectExpression(selector, variants)
534
535        if (
536            isinstance(selector, ast.TermReference)
537            and selector.attribute is not None
538        ):
539            raise ParseError('E0019')
540
541        return selector
542
543    @with_span
544    def get_inline_expression(self, ps):
545        if ps.current_char == '{':
546            return self.get_placeable(ps)
547
548        if ps.is_number_start():
549            return self.get_number(ps)
550
551        if ps.current_char == '"':
552            return self.get_string(ps)
553
554        if ps.current_char == '$':
555            ps.next()
556            id = self.get_identifier(ps)
557            return ast.VariableReference(id)
558
559        if ps.current_char == '-':
560            ps.next()
561            id = self.get_identifier(ps)
562            attribute = None
563            if ps.current_char == '.':
564                ps.next()
565                attribute = self.get_identifier(ps)
566            arguments = None
567            ps.peek_blank()
568            if ps.current_peek == '(':
569                ps.skip_to_peek()
570                arguments = self.get_call_arguments(ps)
571            return ast.TermReference(id, attribute, arguments)
572
573        if ps.is_identifier_start():
574            id = self.get_identifier(ps)
575            ps.peek_blank()
576
577            if ps.current_peek == '(':
578                # It's a Function. Ensure it's all upper-case.
579                if not re.match('^[A-Z][A-Z0-9_-]*$', id.name):
580                    raise ParseError('E0008')
581                ps.skip_to_peek()
582                args = self.get_call_arguments(ps)
583                return ast.FunctionReference(id, args)
584
585            attribute = None
586            if ps.current_char == '.':
587                ps.next()
588                attribute = self.get_identifier(ps)
589
590            return ast.MessageReference(id, attribute)
591
592        raise ParseError('E0028')
593
594    @with_span
595    def get_call_argument(self, ps):
596        exp = self.get_inline_expression(ps)
597
598        ps.skip_blank()
599
600        if ps.current_char != ':':
601            return exp
602
603        if isinstance(exp, ast.MessageReference) and exp.attribute is None:
604            ps.next()
605            ps.skip_blank()
606
607            value = self.get_literal(ps)
608            return ast.NamedArgument(exp.id, value)
609
610        raise ParseError('E0009')
611
612    @with_span
613    def get_call_arguments(self, ps):
614        positional = []
615        named = []
616        argument_names = set()
617
618        ps.expect_char('(')
619        ps.skip_blank()
620
621        while True:
622            if ps.current_char == ')':
623                break
624
625            arg = self.get_call_argument(ps)
626            if isinstance(arg, ast.NamedArgument):
627                if arg.name.name in argument_names:
628                    raise ParseError('E0022')
629                named.append(arg)
630                argument_names.add(arg.name.name)
631            elif len(argument_names) > 0:
632                raise ParseError('E0021')
633            else:
634                positional.append(arg)
635
636            ps.skip_blank()
637
638            if ps.current_char == ',':
639                ps.next()
640                ps.skip_blank()
641                continue
642
643            break
644
645        ps.expect_char(')')
646        return ast.CallArguments(positional, named)
647
648    @with_span
649    def get_string(self, ps):
650        value = ''
651
652        ps.expect_char('"')
653
654        while True:
655            ch = ps.take_char(lambda x: x != '"' and x != EOL)
656            if not ch:
657                break
658            if ch == '\\':
659                value += self.get_escape_sequence(ps)
660            else:
661                value += ch
662
663        if ps.current_char == EOL:
664            raise ParseError('E0020')
665
666        ps.expect_char('"')
667
668        return ast.StringLiteral(value)
669
670    @with_span
671    def get_literal(self, ps):
672        if ps.is_number_start():
673            return self.get_number(ps)
674        if ps.current_char == '"':
675            return self.get_string(ps)
676        raise ParseError('E0014')
677