1from __future__ import unicode_literals 2import re 3from . import ast 4from .stream import EOF, EOL, FluentParserStream 5from .errors import ParseError 6 7 8def with_span(fn): 9 def decorated(self, ps, *args, **kwargs): 10 if not self.with_spans: 11 return fn(self, ps, *args, **kwargs) 12 13 start = ps.index 14 node = fn(self, ps, *args, **kwargs) 15 16 # Don't re-add the span if the node already has it. This may happen 17 # when one decorated function calls another decorated function. 18 if node.span is not None: 19 return node 20 21 end = ps.index 22 node.add_span(start, end) 23 return node 24 25 return decorated 26 27 28class FluentParser(object): 29 def __init__(self, with_spans=True): 30 self.with_spans = with_spans 31 32 def parse(self, source): 33 ps = FluentParserStream(source) 34 ps.skip_blank_block() 35 36 entries = [] 37 last_comment = None 38 39 while ps.current_char: 40 entry = self.get_entry_or_junk(ps) 41 blank_lines = ps.skip_blank_block() 42 43 # Regular Comments require special logic. Comments may be attached 44 # to Messages or Terms if they are followed immediately by them. 45 # However they should parse as standalone when they're followed by 46 # Junk. Consequently, we only attach Comments once we know that the 47 # Message or the Term parsed successfully. 48 if isinstance(entry, ast.Comment) and len(blank_lines) == 0 \ 49 and ps.current_char: 50 # Stash the comment and decide what to do with it 51 # in the next pass. 52 last_comment = entry 53 continue 54 55 if last_comment is not None: 56 if isinstance(entry, (ast.Message, ast.Term)): 57 entry.comment = last_comment 58 if self.with_spans: 59 entry.span.start = entry.comment.span.start 60 else: 61 entries.append(last_comment) 62 # In either case, the stashed comment has been dealt with; 63 # clear it. 64 last_comment = None 65 66 entries.append(entry) 67 68 res = ast.Resource(entries) 69 70 if self.with_spans: 71 res.add_span(0, ps.index) 72 73 return res 74 75 def parse_entry(self, source): 76 """Parse the first Message or Term in source. 77 78 Skip all encountered comments and start parsing at the first Mesage 79 or Term start. Return Junk if the parsing is not successful. 80 81 Preceding comments are ignored unless they contain syntax errors 82 themselves, in which case Junk for the invalid comment is returned. 83 """ 84 ps = FluentParserStream(source) 85 ps.skip_blank_block() 86 87 while ps.current_char == '#': 88 skipped = self.get_entry_or_junk(ps) 89 if isinstance(skipped, ast.Junk): 90 # Don't skip Junk comments. 91 return skipped 92 ps.skip_blank_block() 93 94 return self.get_entry_or_junk(ps) 95 96 def get_entry_or_junk(self, ps): 97 entry_start_pos = ps.index 98 99 try: 100 entry = self.get_entry(ps) 101 ps.expect_line_end() 102 return entry 103 except ParseError as err: 104 error_index = ps.index 105 ps.skip_to_next_entry_start(entry_start_pos) 106 next_entry_start = ps.index 107 if next_entry_start < error_index: 108 # The position of the error must be inside of the Junk's span. 109 error_index = next_entry_start 110 111 # Create a Junk instance 112 slice = ps.string[entry_start_pos:next_entry_start] 113 junk = ast.Junk(slice) 114 if self.with_spans: 115 junk.add_span(entry_start_pos, next_entry_start) 116 annot = ast.Annotation(err.code, err.args, err.message) 117 annot.add_span(error_index, error_index) 118 junk.add_annotation(annot) 119 return junk 120 121 def get_entry(self, ps): 122 if ps.current_char == '#': 123 return self.get_comment(ps) 124 125 if ps.current_char == '-': 126 return self.get_term(ps) 127 128 if ps.is_identifier_start(): 129 return self.get_message(ps) 130 131 raise ParseError('E0002') 132 133 @with_span 134 def get_comment(self, ps): 135 # 0 - comment 136 # 1 - group comment 137 # 2 - resource comment 138 level = -1 139 content = '' 140 141 while True: 142 i = -1 143 while ps.current_char == '#' \ 144 and (i < (2 if level == -1 else level)): 145 ps.next() 146 i += 1 147 148 if level == -1: 149 level = i 150 151 if ps.current_char != EOL: 152 ps.expect_char(' ') 153 ch = ps.take_char(lambda x: x != EOL) 154 while ch: 155 content += ch 156 ch = ps.take_char(lambda x: x != EOL) 157 158 if ps.is_next_line_comment(level=level): 159 content += ps.current_char 160 ps.next() 161 else: 162 break 163 164 if level == 0: 165 return ast.Comment(content) 166 elif level == 1: 167 return ast.GroupComment(content) 168 elif level == 2: 169 return ast.ResourceComment(content) 170 171 @with_span 172 def get_message(self, ps): 173 id = self.get_identifier(ps) 174 ps.skip_blank_inline() 175 ps.expect_char('=') 176 177 value = self.maybe_get_pattern(ps) 178 attrs = self.get_attributes(ps) 179 180 if value is None and len(attrs) == 0: 181 raise ParseError('E0005', id.name) 182 183 return ast.Message(id, value, attrs) 184 185 @with_span 186 def get_term(self, ps): 187 ps.expect_char('-') 188 id = self.get_identifier(ps) 189 190 ps.skip_blank_inline() 191 ps.expect_char('=') 192 193 value = self.maybe_get_pattern(ps) 194 if value is None: 195 raise ParseError('E0006', id.name) 196 197 attrs = self.get_attributes(ps) 198 return ast.Term(id, value, attrs) 199 200 @with_span 201 def get_attribute(self, ps): 202 ps.expect_char('.') 203 204 key = self.get_identifier(ps) 205 206 ps.skip_blank_inline() 207 ps.expect_char('=') 208 209 value = self.maybe_get_pattern(ps) 210 if value is None: 211 raise ParseError('E0012') 212 213 return ast.Attribute(key, value) 214 215 def get_attributes(self, ps): 216 attrs = [] 217 ps.peek_blank() 218 219 while ps.is_attribute_start(): 220 ps.skip_to_peek() 221 attr = self.get_attribute(ps) 222 attrs.append(attr) 223 ps.peek_blank() 224 225 return attrs 226 227 @with_span 228 def get_identifier(self, ps): 229 name = ps.take_id_start() 230 ch = ps.take_id_char() 231 while ch: 232 name += ch 233 ch = ps.take_id_char() 234 235 return ast.Identifier(name) 236 237 def get_variant_key(self, ps): 238 ch = ps.current_char 239 240 if ch is EOF: 241 raise ParseError('E0013') 242 243 cc = ord(ch) 244 if ((cc >= 48 and cc <= 57) or cc == 45): # 0-9, - 245 return self.get_number(ps) 246 247 return self.get_identifier(ps) 248 249 @with_span 250 def get_variant(self, ps, has_default): 251 default_index = False 252 253 if ps.current_char == '*': 254 if has_default: 255 raise ParseError('E0015') 256 ps.next() 257 default_index = True 258 259 ps.expect_char('[') 260 ps.skip_blank() 261 262 key = self.get_variant_key(ps) 263 264 ps.skip_blank() 265 ps.expect_char(']') 266 267 value = self.maybe_get_pattern(ps) 268 if value is None: 269 raise ParseError('E0012') 270 271 return ast.Variant(key, value, default_index) 272 273 def get_variants(self, ps): 274 variants = [] 275 has_default = False 276 277 ps.skip_blank() 278 while ps.is_variant_start(): 279 variant = self.get_variant(ps, has_default) 280 281 if variant.default: 282 has_default = True 283 284 variants.append(variant) 285 ps.expect_line_end() 286 ps.skip_blank() 287 288 if len(variants) == 0: 289 raise ParseError('E0011') 290 291 if not has_default: 292 raise ParseError('E0010') 293 294 return variants 295 296 def get_digits(self, ps): 297 num = '' 298 299 ch = ps.take_digit() 300 while ch: 301 num += ch 302 ch = ps.take_digit() 303 304 if len(num) == 0: 305 raise ParseError('E0004', '0-9') 306 307 return num 308 309 @with_span 310 def get_number(self, ps): 311 num = '' 312 313 if ps.current_char == '-': 314 num += '-' 315 ps.next() 316 317 num += self.get_digits(ps) 318 319 if ps.current_char == '.': 320 num += '.' 321 ps.next() 322 num += self.get_digits(ps) 323 324 return ast.NumberLiteral(num) 325 326 def maybe_get_pattern(self, ps): 327 '''Parse an inline or a block Pattern, or None 328 329 maybe_get_pattern distinguishes between patterns which start on the 330 same line as the indentifier (aka inline singleline patterns and inline 331 multiline patterns), and patterns which start on a new line (aka block 332 patterns). The distinction is important for the dedentation logic: the 333 indent of the first line of a block pattern must be taken into account 334 when calculating the maximum common indent. 335 ''' 336 ps.peek_blank_inline() 337 if ps.is_value_start(): 338 ps.skip_to_peek() 339 return self.get_pattern(ps, is_block=False) 340 341 ps.peek_blank_block() 342 if ps.is_value_continuation(): 343 ps.skip_to_peek() 344 return self.get_pattern(ps, is_block=True) 345 346 return None 347 348 @with_span 349 def get_pattern(self, ps, is_block): 350 elements = [] 351 if is_block: 352 # A block pattern is a pattern which starts on a new line. Measure 353 # the indent of this first line for the dedentation logic. 354 blank_start = ps.index 355 first_indent = ps.skip_blank_inline() 356 elements.append(self.Indent(first_indent, blank_start, ps.index)) 357 common_indent_length = len(first_indent) 358 else: 359 common_indent_length = float('infinity') 360 361 while ps.current_char: 362 if ps.current_char == EOL: 363 blank_start = ps.index 364 blank_lines = ps.peek_blank_block() 365 if ps.is_value_continuation(): 366 ps.skip_to_peek() 367 indent = ps.skip_blank_inline() 368 common_indent_length = min(common_indent_length, len(indent)) 369 elements.append(self.Indent(blank_lines + indent, blank_start, ps.index)) 370 continue 371 372 # The end condition for get_pattern's while loop is a newline 373 # which is not followed by a valid pattern continuation. 374 ps.reset_peek() 375 break 376 377 if ps.current_char == '}': 378 raise ParseError('E0027') 379 380 if ps.current_char == '{': 381 element = self.get_placeable(ps) 382 else: 383 element = self.get_text_element(ps) 384 385 elements.append(element) 386 387 dedented = self.dedent(elements, common_indent_length) 388 return ast.Pattern(dedented) 389 390 class Indent(ast.SyntaxNode): 391 def __init__(self, value, start, end): 392 super(FluentParser.Indent, self).__init__() 393 self.value = value 394 self.add_span(start, end) 395 396 def dedent(self, elements, common_indent): 397 '''Dedent a list of elements by removing the maximum common indent from 398 the beginning of text lines. The common indent is calculated in 399 get_pattern. 400 ''' 401 trimmed = [] 402 403 for element in elements: 404 if isinstance(element, ast.Placeable): 405 trimmed.append(element) 406 continue 407 408 if isinstance(element, self.Indent): 409 # Strip the common indent. 410 element.value = element.value[:len(element.value) - common_indent] 411 if len(element.value) == 0: 412 continue 413 414 prev = trimmed[-1] if len(trimmed) > 0 else None 415 if isinstance(prev, ast.TextElement): 416 # Join adjacent TextElements by replacing them with their sum. 417 sum = ast.TextElement(prev.value + element.value) 418 if self.with_spans: 419 sum.add_span(prev.span.start, element.span.end) 420 trimmed[-1] = sum 421 continue 422 423 if isinstance(element, self.Indent): 424 # If the indent hasn't been merged into a preceding 425 # TextElements, convert it into a new TextElement. 426 text_element = ast.TextElement(element.value) 427 if self.with_spans: 428 text_element.add_span(element.span.start, element.span.end) 429 element = text_element 430 431 trimmed.append(element) 432 433 # Trim trailing whitespace from the Pattern. 434 last_element = trimmed[-1] if len(trimmed) > 0 else None 435 if isinstance(last_element, ast.TextElement): 436 last_element.value = last_element.value.rstrip(' \t\n\r') 437 if last_element.value == "": 438 trimmed.pop() 439 440 return trimmed 441 442 @with_span 443 def get_text_element(self, ps): 444 buf = '' 445 446 while ps.current_char: 447 ch = ps.current_char 448 449 if ch == '{' or ch == '}': 450 return ast.TextElement(buf) 451 452 if ch == EOL: 453 return ast.TextElement(buf) 454 455 buf += ch 456 ps.next() 457 458 return ast.TextElement(buf) 459 460 def get_escape_sequence(self, ps): 461 next = ps.current_char 462 463 if next == '\\' or next == '"': 464 ps.next() 465 return '\\{}'.format(next) 466 467 if next == 'u': 468 return self.get_unicode_escape_sequence(ps, next, 4) 469 470 if next == 'U': 471 return self.get_unicode_escape_sequence(ps, next, 6) 472 473 raise ParseError('E0025', next) 474 475 def get_unicode_escape_sequence(self, ps, u, digits): 476 ps.expect_char(u) 477 sequence = '' 478 for _ in range(digits): 479 ch = ps.take_hex_digit() 480 if not ch: 481 raise ParseError('E0026', '\\{}{}{}'.format(u, sequence, ps.current_char)) 482 sequence += ch 483 484 return '\\{}{}'.format(u, sequence) 485 486 @with_span 487 def get_placeable(self, ps): 488 ps.expect_char('{') 489 ps.skip_blank() 490 expression = self.get_expression(ps) 491 ps.expect_char('}') 492 return ast.Placeable(expression) 493 494 @with_span 495 def get_expression(self, ps): 496 selector = self.get_inline_expression(ps) 497 498 ps.skip_blank() 499 500 if ps.current_char == '-': 501 if ps.peek() != '>': 502 ps.reset_peek() 503 return selector 504 505 if isinstance(selector, ast.MessageReference): 506 if selector.attribute is None: 507 raise ParseError('E0016') 508 else: 509 raise ParseError('E0018') 510 511 elif ( 512 isinstance(selector, ast.TermReference) 513 ): 514 if selector.attribute is None: 515 raise ParseError('E0017') 516 elif not ( 517 isinstance(selector, ( 518 ast.StringLiteral, 519 ast.NumberLiteral, 520 ast.VariableReference, 521 ast.FunctionReference, 522 )) 523 ): 524 raise ParseError('E0029') 525 526 ps.next() 527 ps.next() 528 529 ps.skip_blank_inline() 530 ps.expect_line_end() 531 532 variants = self.get_variants(ps) 533 return ast.SelectExpression(selector, variants) 534 535 if ( 536 isinstance(selector, ast.TermReference) 537 and selector.attribute is not None 538 ): 539 raise ParseError('E0019') 540 541 return selector 542 543 @with_span 544 def get_inline_expression(self, ps): 545 if ps.current_char == '{': 546 return self.get_placeable(ps) 547 548 if ps.is_number_start(): 549 return self.get_number(ps) 550 551 if ps.current_char == '"': 552 return self.get_string(ps) 553 554 if ps.current_char == '$': 555 ps.next() 556 id = self.get_identifier(ps) 557 return ast.VariableReference(id) 558 559 if ps.current_char == '-': 560 ps.next() 561 id = self.get_identifier(ps) 562 attribute = None 563 if ps.current_char == '.': 564 ps.next() 565 attribute = self.get_identifier(ps) 566 arguments = None 567 ps.peek_blank() 568 if ps.current_peek == '(': 569 ps.skip_to_peek() 570 arguments = self.get_call_arguments(ps) 571 return ast.TermReference(id, attribute, arguments) 572 573 if ps.is_identifier_start(): 574 id = self.get_identifier(ps) 575 ps.peek_blank() 576 577 if ps.current_peek == '(': 578 # It's a Function. Ensure it's all upper-case. 579 if not re.match('^[A-Z][A-Z0-9_-]*$', id.name): 580 raise ParseError('E0008') 581 ps.skip_to_peek() 582 args = self.get_call_arguments(ps) 583 return ast.FunctionReference(id, args) 584 585 attribute = None 586 if ps.current_char == '.': 587 ps.next() 588 attribute = self.get_identifier(ps) 589 590 return ast.MessageReference(id, attribute) 591 592 raise ParseError('E0028') 593 594 @with_span 595 def get_call_argument(self, ps): 596 exp = self.get_inline_expression(ps) 597 598 ps.skip_blank() 599 600 if ps.current_char != ':': 601 return exp 602 603 if isinstance(exp, ast.MessageReference) and exp.attribute is None: 604 ps.next() 605 ps.skip_blank() 606 607 value = self.get_literal(ps) 608 return ast.NamedArgument(exp.id, value) 609 610 raise ParseError('E0009') 611 612 @with_span 613 def get_call_arguments(self, ps): 614 positional = [] 615 named = [] 616 argument_names = set() 617 618 ps.expect_char('(') 619 ps.skip_blank() 620 621 while True: 622 if ps.current_char == ')': 623 break 624 625 arg = self.get_call_argument(ps) 626 if isinstance(arg, ast.NamedArgument): 627 if arg.name.name in argument_names: 628 raise ParseError('E0022') 629 named.append(arg) 630 argument_names.add(arg.name.name) 631 elif len(argument_names) > 0: 632 raise ParseError('E0021') 633 else: 634 positional.append(arg) 635 636 ps.skip_blank() 637 638 if ps.current_char == ',': 639 ps.next() 640 ps.skip_blank() 641 continue 642 643 break 644 645 ps.expect_char(')') 646 return ast.CallArguments(positional, named) 647 648 @with_span 649 def get_string(self, ps): 650 value = '' 651 652 ps.expect_char('"') 653 654 while True: 655 ch = ps.take_char(lambda x: x != '"' and x != EOL) 656 if not ch: 657 break 658 if ch == '\\': 659 value += self.get_escape_sequence(ps) 660 else: 661 value += ch 662 663 if ps.current_char == EOL: 664 raise ParseError('E0020') 665 666 ps.expect_char('"') 667 668 return ast.StringLiteral(value) 669 670 @with_span 671 def get_literal(self, ps): 672 if ps.is_number_start(): 673 return self.get_number(ps) 674 if ps.current_char == '"': 675 return self.get_string(ps) 676 raise ParseError('E0014') 677