1from __future__ import unicode_literals 2import re 3from . import ast 4from .stream import EOF, EOL, FluentParserStream 5from .errors import ParseError 6 7 8def with_span(fn): 9 def decorated(self, ps, *args, **kwargs): 10 if not self.with_spans: 11 return fn(self, ps, *args, **kwargs) 12 13 start = ps.index 14 node = fn(self, ps, *args, **kwargs) 15 16 # Don't re-add the span if the node already has it. This may happen 17 # when one decorated function calls another decorated function. 18 if node.span is not None: 19 return node 20 21 end = ps.index 22 node.add_span(start, end) 23 return node 24 25 return decorated 26 27 28class FluentParser(object): 29 """This class is used to parse Fluent source content. 30 31 ``with_spans`` enables source information in the form of 32 :class:`.ast.Span` objects for each :class:`.ast.SyntaxNode`. 33 """ 34 def __init__(self, with_spans=True): 35 self.with_spans = with_spans 36 37 def parse(self, source): 38 """Create a :class:`.ast.Resource` from a Fluent source. 39 """ 40 ps = FluentParserStream(source) 41 ps.skip_blank_block() 42 43 entries = [] 44 last_comment = None 45 46 while ps.current_char: 47 entry = self.get_entry_or_junk(ps) 48 blank_lines = ps.skip_blank_block() 49 50 # Regular Comments require special logic. Comments may be attached 51 # to Messages or Terms if they are followed immediately by them. 52 # However they should parse as standalone when they're followed by 53 # Junk. Consequently, we only attach Comments once we know that the 54 # Message or the Term parsed successfully. 55 if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \ 56 and ps.current_char: 57 # Stash the comment and decide what to do with it 58 # in the next pass. 59 last_comment = entry 60 continue 61 62 if last_comment is not None: 63 if isinstance(entry, (ast.Message, ast.Term)): 64 entry.comment = last_comment 65 if self.with_spans: 66 entry.span.start = entry.comment.span.start 67 else: 68 entries.append(last_comment) 69 # In either case, the stashed comment has been dealt with; 70 # clear it. 71 last_comment = None 72 73 entries.append(entry) 74 75 res = ast.Resource(entries) 76 77 if self.with_spans: 78 res.add_span(0, ps.index) 79 80 return res 81 82 def parse_entry(self, source): 83 """Parse the first :class:`.ast.Entry` in source. 84 85 Skip all encountered comments and start parsing at the first :class:`.ast.Message` 86 or :class:`.ast.Term` start. Return :class:`.ast.Junk` if the parsing is not successful. 87 88 Preceding comments are ignored unless they contain syntax errors 89 themselves, in which case :class:`.ast.Junk` for the invalid comment is returned. 90 """ 91 ps = FluentParserStream(source) 92 ps.skip_blank_block() 93 94 while ps.current_char == '#': 95 skipped = self.get_entry_or_junk(ps) 96 if isinstance(skipped, ast.Junk): 97 # Don't skip Junk comments. 98 return skipped 99 ps.skip_blank_block() 100 101 return self.get_entry_or_junk(ps) 102 103 def get_entry_or_junk(self, ps): 104 entry_start_pos = ps.index 105 106 try: 107 entry = self.get_entry(ps) 108 ps.expect_line_end() 109 return entry 110 except ParseError as err: 111 error_index = ps.index 112 ps.skip_to_next_entry_start(entry_start_pos) 113 next_entry_start = ps.index 114 if next_entry_start < error_index: 115 # The position of the error must be inside of the Junk's span. 116 error_index = next_entry_start 117 118 # Create a Junk instance 119 slice = ps.string[entry_start_pos:next_entry_start] 120 junk = ast.Junk(slice) 121 if self.with_spans: 122 junk.add_span(entry_start_pos, next_entry_start) 123 annot = ast.Annotation(err.code, err.args, err.message) 124 annot.add_span(error_index, error_index) 125 junk.add_annotation(annot) 126 return junk 127 128 def get_entry(self, ps): 129 if ps.current_char == '#': 130 return self.get_comment(ps) 131 132 if ps.current_char == '-': 133 return self.get_term(ps) 134 135 if ps.is_identifier_start(): 136 return self.get_message(ps) 137 138 raise ParseError('E0002') 139 140 @with_span 141 def get_comment(self, ps): 142 # 0 - comment 143 # 1 - group comment 144 # 2 - resource comment 145 level = -1 146 content = '' 147 148 while True: 149 i = -1 150 while ps.current_char == '#' \ 151 and (i < (2 if level == -1 else level)): 152 ps.next() 153 i += 1 154 155 if level == -1: 156 level = i 157 158 if ps.current_char != EOL: 159 ps.expect_char(' ') 160 ch = ps.take_char(lambda x: x != EOL) 161 while ch: 162 content += ch 163 ch = ps.take_char(lambda x: x != EOL) 164 165 if ps.is_next_line_comment(level=level): 166 content += ps.current_char 167 ps.next() 168 else: 169 break 170 171 if level == 0: 172 return ast.Comment(content) 173 elif level == 1: 174 return ast.GroupComment(content) 175 elif level == 2: 176 return ast.ResourceComment(content) 177 178 @with_span 179 def get_message(self, ps): 180 id = self.get_identifier(ps) 181 ps.skip_blank_inline() 182 ps.expect_char('=') 183 184 value = self.maybe_get_pattern(ps) 185 attrs = self.get_attributes(ps) 186 187 if value is None and len(attrs) == 0: 188 raise ParseError('E0005', id.name) 189 190 return ast.Message(id, value, attrs) 191 192 @with_span 193 def get_term(self, ps): 194 ps.expect_char('-') 195 id = self.get_identifier(ps) 196 197 ps.skip_blank_inline() 198 ps.expect_char('=') 199 200 value = self.maybe_get_pattern(ps) 201 if value is None: 202 raise ParseError('E0006', id.name) 203 204 attrs = self.get_attributes(ps) 205 return ast.Term(id, value, attrs) 206 207 @with_span 208 def get_attribute(self, ps): 209 ps.expect_char('.') 210 211 key = self.get_identifier(ps) 212 213 ps.skip_blank_inline() 214 ps.expect_char('=') 215 216 value = self.maybe_get_pattern(ps) 217 if value is None: 218 raise ParseError('E0012') 219 220 return ast.Attribute(key, value) 221 222 def get_attributes(self, ps): 223 attrs = [] 224 ps.peek_blank() 225 226 while ps.is_attribute_start(): 227 ps.skip_to_peek() 228 attr = self.get_attribute(ps) 229 attrs.append(attr) 230 ps.peek_blank() 231 232 return attrs 233 234 @with_span 235 def get_identifier(self, ps): 236 name = ps.take_id_start() 237 ch = ps.take_id_char() 238 while ch: 239 name += ch 240 ch = ps.take_id_char() 241 242 return ast.Identifier(name) 243 244 def get_variant_key(self, ps): 245 ch = ps.current_char 246 247 if ch is EOF: 248 raise ParseError('E0013') 249 250 cc = ord(ch) 251 if ((cc >= 48 and cc <= 57) or cc == 45): # 0-9, - 252 return self.get_number(ps) 253 254 return self.get_identifier(ps) 255 256 @with_span 257 def get_variant(self, ps, has_default): 258 default_index = False 259 260 if ps.current_char == '*': 261 if has_default: 262 raise ParseError('E0015') 263 ps.next() 264 default_index = True 265 266 ps.expect_char('[') 267 ps.skip_blank() 268 269 key = self.get_variant_key(ps) 270 271 ps.skip_blank() 272 ps.expect_char(']') 273 274 value = self.maybe_get_pattern(ps) 275 if value is None: 276 raise ParseError('E0012') 277 278 return ast.Variant(key, value, default_index) 279 280 def get_variants(self, ps): 281 variants = [] 282 has_default = False 283 284 ps.skip_blank() 285 while ps.is_variant_start(): 286 variant = self.get_variant(ps, has_default) 287 288 if variant.default: 289 has_default = True 290 291 variants.append(variant) 292 ps.expect_line_end() 293 ps.skip_blank() 294 295 if len(variants) == 0: 296 raise ParseError('E0011') 297 298 if not has_default: 299 raise ParseError('E0010') 300 301 return variants 302 303 def get_digits(self, ps): 304 num = '' 305 306 ch = ps.take_digit() 307 while ch: 308 num += ch 309 ch = ps.take_digit() 310 311 if len(num) == 0: 312 raise ParseError('E0004', '0-9') 313 314 return num 315 316 @with_span 317 def get_number(self, ps): 318 num = '' 319 320 if ps.current_char == '-': 321 num += '-' 322 ps.next() 323 324 num += self.get_digits(ps) 325 326 if ps.current_char == '.': 327 num += '.' 328 ps.next() 329 num += self.get_digits(ps) 330 331 return ast.NumberLiteral(num) 332 333 def maybe_get_pattern(self, ps): 334 '''Parse an inline or a block Pattern, or None 335 336 maybe_get_pattern distinguishes between patterns which start on the 337 same line as the indentifier (aka inline singleline patterns and inline 338 multiline patterns), and patterns which start on a new line (aka block 339 patterns). The distinction is important for the dedentation logic: the 340 indent of the first line of a block pattern must be taken into account 341 when calculating the maximum common indent. 342 ''' 343 ps.peek_blank_inline() 344 if ps.is_value_start(): 345 ps.skip_to_peek() 346 return self.get_pattern(ps, is_block=False) 347 348 ps.peek_blank_block() 349 if ps.is_value_continuation(): 350 ps.skip_to_peek() 351 return self.get_pattern(ps, is_block=True) 352 353 return None 354 355 @with_span 356 def get_pattern(self, ps, is_block): 357 elements = [] 358 if is_block: 359 # A block pattern is a pattern which starts on a new line. Measure 360 # the indent of this first line for the dedentation logic. 361 blank_start = ps.index 362 first_indent = ps.skip_blank_inline() 363 elements.append(self.Indent(first_indent, blank_start, ps.index)) 364 common_indent_length = len(first_indent) 365 else: 366 common_indent_length = float('infinity') 367 368 while ps.current_char: 369 if ps.current_char == EOL: 370 blank_start = ps.index 371 blank_lines = ps.peek_blank_block() 372 if ps.is_value_continuation(): 373 ps.skip_to_peek() 374 indent = ps.skip_blank_inline() 375 common_indent_length = min(common_indent_length, len(indent)) 376 elements.append(self.Indent(blank_lines + indent, blank_start, ps.index)) 377 continue 378 379 # The end condition for get_pattern's while loop is a newline 380 # which is not followed by a valid pattern continuation. 381 ps.reset_peek() 382 break 383 384 if ps.current_char == '}': 385 raise ParseError('E0027') 386 387 if ps.current_char == '{': 388 element = self.get_placeable(ps) 389 else: 390 element = self.get_text_element(ps) 391 392 elements.append(element) 393 394 dedented = self.dedent(elements, common_indent_length) 395 return ast.Pattern(dedented) 396 397 class Indent(ast.SyntaxNode): 398 def __init__(self, value, start, end): 399 super(FluentParser.Indent, self).__init__() 400 self.value = value 401 self.add_span(start, end) 402 403 def dedent(self, elements, common_indent): 404 '''Dedent a list of elements by removing the maximum common indent from 405 the beginning of text lines. The common indent is calculated in 406 get_pattern. 407 ''' 408 trimmed = [] 409 410 for element in elements: 411 if isinstance(element, ast.Placeable): 412 trimmed.append(element) 413 continue 414 415 if isinstance(element, self.Indent): 416 # Strip the common indent. 417 element.value = element.value[:len(element.value) - common_indent] 418 if len(element.value) == 0: 419 continue 420 421 prev = trimmed[-1] if len(trimmed) > 0 else None 422 if isinstance(prev, ast.TextElement): 423 # Join adjacent TextElements by replacing them with their sum. 424 sum = ast.TextElement(prev.value + element.value) 425 if self.with_spans: 426 sum.add_span(prev.span.start, element.span.end) 427 trimmed[-1] = sum 428 continue 429 430 if isinstance(element, self.Indent): 431 # If the indent hasn't been merged into a preceding 432 # TextElements, convert it into a new TextElement. 433 text_element = ast.TextElement(element.value) 434 if self.with_spans: 435 text_element.add_span(element.span.start, element.span.end) 436 element = text_element 437 438 trimmed.append(element) 439 440 # Trim trailing whitespace from the Pattern. 441 last_element = trimmed[-1] if len(trimmed) > 0 else None 442 if isinstance(last_element, ast.TextElement): 443 last_element.value = last_element.value.rstrip(' \t\n\r') 444 if last_element.value == "": 445 trimmed.pop() 446 447 return trimmed 448 449 @with_span 450 def get_text_element(self, ps): 451 buf = '' 452 453 while ps.current_char: 454 ch = ps.current_char 455 456 if ch == '{' or ch == '}': 457 return ast.TextElement(buf) 458 459 if ch == EOL: 460 return ast.TextElement(buf) 461 462 buf += ch 463 ps.next() 464 465 return ast.TextElement(buf) 466 467 def get_escape_sequence(self, ps): 468 next = ps.current_char 469 470 if next == '\\' or next == '"': 471 ps.next() 472 return '\\{}'.format(next) 473 474 if next == 'u': 475 return self.get_unicode_escape_sequence(ps, next, 4) 476 477 if next == 'U': 478 return self.get_unicode_escape_sequence(ps, next, 6) 479 480 raise ParseError('E0025', next) 481 482 def get_unicode_escape_sequence(self, ps, u, digits): 483 ps.expect_char(u) 484 sequence = '' 485 for _ in range(digits): 486 ch = ps.take_hex_digit() 487 if not ch: 488 raise ParseError('E0026', '\\{}{}{}'.format(u, sequence, ps.current_char)) 489 sequence += ch 490 491 return '\\{}{}'.format(u, sequence) 492 493 @with_span 494 def get_placeable(self, ps): 495 ps.expect_char('{') 496 ps.skip_blank() 497 expression = self.get_expression(ps) 498 ps.expect_char('}') 499 return ast.Placeable(expression) 500 501 @with_span 502 def get_expression(self, ps): 503 selector = self.get_inline_expression(ps) 504 505 ps.skip_blank() 506 507 if ps.current_char == '-': 508 if ps.peek() != '>': 509 ps.reset_peek() 510 return selector 511 512 if isinstance(selector, ast.MessageReference): 513 if selector.attribute is None: 514 raise ParseError('E0016') 515 else: 516 raise ParseError('E0018') 517 518 elif ( 519 isinstance(selector, ast.TermReference) 520 ): 521 if selector.attribute is None: 522 raise ParseError('E0017') 523 elif not ( 524 isinstance(selector, ( 525 ast.StringLiteral, 526 ast.NumberLiteral, 527 ast.VariableReference, 528 ast.FunctionReference, 529 )) 530 ): 531 raise ParseError('E0029') 532 533 ps.next() 534 ps.next() 535 536 ps.skip_blank_inline() 537 ps.expect_line_end() 538 539 variants = self.get_variants(ps) 540 return ast.SelectExpression(selector, variants) 541 542 if ( 543 isinstance(selector, ast.TermReference) 544 and selector.attribute is not None 545 ): 546 raise ParseError('E0019') 547 548 return selector 549 550 @with_span 551 def get_inline_expression(self, ps): 552 if ps.current_char == '{': 553 return self.get_placeable(ps) 554 555 if ps.is_number_start(): 556 return self.get_number(ps) 557 558 if ps.current_char == '"': 559 return self.get_string(ps) 560 561 if ps.current_char == '$': 562 ps.next() 563 id = self.get_identifier(ps) 564 return ast.VariableReference(id) 565 566 if ps.current_char == '-': 567 ps.next() 568 id = self.get_identifier(ps) 569 attribute = None 570 if ps.current_char == '.': 571 ps.next() 572 attribute = self.get_identifier(ps) 573 arguments = None 574 ps.peek_blank() 575 if ps.current_peek == '(': 576 ps.skip_to_peek() 577 arguments = self.get_call_arguments(ps) 578 return ast.TermReference(id, attribute, arguments) 579 580 if ps.is_identifier_start(): 581 id = self.get_identifier(ps) 582 ps.peek_blank() 583 584 if ps.current_peek == '(': 585 # It's a Function. Ensure it's all upper-case. 586 if not re.match('^[A-Z][A-Z0-9_-]*$', id.name): 587 raise ParseError('E0008') 588 ps.skip_to_peek() 589 args = self.get_call_arguments(ps) 590 return ast.FunctionReference(id, args) 591 592 attribute = None 593 if ps.current_char == '.': 594 ps.next() 595 attribute = self.get_identifier(ps) 596 597 return ast.MessageReference(id, attribute) 598 599 raise ParseError('E0028') 600 601 @with_span 602 def get_call_argument(self, ps): 603 exp = self.get_inline_expression(ps) 604 605 ps.skip_blank() 606 607 if ps.current_char != ':': 608 return exp 609 610 if isinstance(exp, ast.MessageReference) and exp.attribute is None: 611 ps.next() 612 ps.skip_blank() 613 614 value = self.get_literal(ps) 615 return ast.NamedArgument(exp.id, value) 616 617 raise ParseError('E0009') 618 619 @with_span 620 def get_call_arguments(self, ps): 621 positional = [] 622 named = [] 623 argument_names = set() 624 625 ps.expect_char('(') 626 ps.skip_blank() 627 628 while True: 629 if ps.current_char == ')': 630 break 631 632 arg = self.get_call_argument(ps) 633 if isinstance(arg, ast.NamedArgument): 634 if arg.name.name in argument_names: 635 raise ParseError('E0022') 636 named.append(arg) 637 argument_names.add(arg.name.name) 638 elif len(argument_names) > 0: 639 raise ParseError('E0021') 640 else: 641 positional.append(arg) 642 643 ps.skip_blank() 644 645 if ps.current_char == ',': 646 ps.next() 647 ps.skip_blank() 648 continue 649 650 break 651 652 ps.expect_char(')') 653 return ast.CallArguments(positional, named) 654 655 @with_span 656 def get_string(self, ps): 657 value = '' 658 659 ps.expect_char('"') 660 661 while True: 662 ch = ps.take_char(lambda x: x != '"' and x != EOL) 663 if not ch: 664 break 665 if ch == '\\': 666 value += self.get_escape_sequence(ps) 667 else: 668 value += ch 669 670 if ps.current_char == EOL: 671 raise ParseError('E0020') 672 673 ps.expect_char('"') 674 675 return ast.StringLiteral(value) 676 677 @with_span 678 def get_literal(self, ps): 679 if ps.is_number_start(): 680 return self.get_number(ps) 681 if ps.current_char == '"': 682 return self.get_string(ps) 683 raise ParseError('E0014') 684