1# -*- coding: utf-8 -*- 2"""Implements a Jinja / Python combination lexer. The ``Lexer`` class 3is used to do some preprocessing. It filters out invalid operators like 4the bitshift operators we don't allow in templates. It separates 5template code and python code in expressions. 6""" 7import re 8from ast import literal_eval 9from collections import deque 10from operator import itemgetter 11 12from ._compat import implements_iterator 13from ._compat import intern 14from ._compat import iteritems 15from ._compat import text_type 16from .exceptions import TemplateSyntaxError 17from .utils import LRUCache 18 19# cache for the lexers. Exists in order to be able to have multiple 20# environments with the same lexer 21_lexer_cache = LRUCache(50) 22 23# static regular expressions 24whitespace_re = re.compile(r"\s+", re.U) 25newline_re = re.compile(r"(\r\n|\r|\n)") 26string_re = re.compile( 27 r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S 28) 29integer_re = re.compile(r"(\d+_)*\d+") 30float_re = re.compile( 31 r""" 32 (?<!\.) # doesn't start with a . 33 (\d+_)*\d+ # digits, possibly _ separated 34 ( 35 (\.(\d+_)*\d+)? # optional fractional part 36 e[+\-]?(\d+_)*\d+ # exponent part 37 | 38 \.(\d+_)*\d+ # required fractional part 39 ) 40 """, 41 re.IGNORECASE | re.VERBOSE, 42) 43 44try: 45 # check if this Python supports Unicode identifiers 46 compile("föö", "<unknown>", "eval") 47except SyntaxError: 48 # Python 2, no Unicode support, use ASCII identifiers 49 name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") 50 check_ident = False 51else: 52 # Unicode support, import generated re pattern and set flag to use 53 # str.isidentifier to validate during lexing. 54 from ._identifier import pattern as name_re 55 56 check_ident = True 57 58# internal the tokens and keep references to them 59TOKEN_ADD = intern("add") 60TOKEN_ASSIGN = intern("assign") 61TOKEN_COLON = intern("colon") 62TOKEN_COMMA = intern("comma") 63TOKEN_DIV = intern("div") 64TOKEN_DOT = intern("dot") 65TOKEN_EQ = intern("eq") 66TOKEN_FLOORDIV = intern("floordiv") 67TOKEN_GT = intern("gt") 68TOKEN_GTEQ = intern("gteq") 69TOKEN_LBRACE = intern("lbrace") 70TOKEN_LBRACKET = intern("lbracket") 71TOKEN_LPAREN = intern("lparen") 72TOKEN_LT = intern("lt") 73TOKEN_LTEQ = intern("lteq") 74TOKEN_MOD = intern("mod") 75TOKEN_MUL = intern("mul") 76TOKEN_NE = intern("ne") 77TOKEN_PIPE = intern("pipe") 78TOKEN_POW = intern("pow") 79TOKEN_RBRACE = intern("rbrace") 80TOKEN_RBRACKET = intern("rbracket") 81TOKEN_RPAREN = intern("rparen") 82TOKEN_SEMICOLON = intern("semicolon") 83TOKEN_SUB = intern("sub") 84TOKEN_TILDE = intern("tilde") 85TOKEN_WHITESPACE = intern("whitespace") 86TOKEN_FLOAT = intern("float") 87TOKEN_INTEGER = intern("integer") 88TOKEN_NAME = intern("name") 89TOKEN_STRING = intern("string") 90TOKEN_OPERATOR = intern("operator") 91TOKEN_BLOCK_BEGIN = intern("block_begin") 92TOKEN_BLOCK_END = intern("block_end") 93TOKEN_VARIABLE_BEGIN = intern("variable_begin") 94TOKEN_VARIABLE_END = intern("variable_end") 95TOKEN_RAW_BEGIN = intern("raw_begin") 96TOKEN_RAW_END = intern("raw_end") 97TOKEN_COMMENT_BEGIN = intern("comment_begin") 98TOKEN_COMMENT_END = intern("comment_end") 99TOKEN_COMMENT = intern("comment") 100TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin") 101TOKEN_LINESTATEMENT_END = intern("linestatement_end") 102TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin") 103TOKEN_LINECOMMENT_END = intern("linecomment_end") 104TOKEN_LINECOMMENT = intern("linecomment") 105TOKEN_DATA = intern("data") 106TOKEN_INITIAL = intern("initial") 107TOKEN_EOF = intern("eof") 108 109# bind operators to token types 110operators = { 111 "+": TOKEN_ADD, 112 "-": TOKEN_SUB, 113 "/": TOKEN_DIV, 114 "//": TOKEN_FLOORDIV, 115 "*": TOKEN_MUL, 116 "%": TOKEN_MOD, 117 "**": TOKEN_POW, 118 "~": TOKEN_TILDE, 119 "[": TOKEN_LBRACKET, 120 "]": TOKEN_RBRACKET, 121 "(": TOKEN_LPAREN, 122 ")": TOKEN_RPAREN, 123 "{": TOKEN_LBRACE, 124 "}": TOKEN_RBRACE, 125 "==": TOKEN_EQ, 126 "!=": TOKEN_NE, 127 ">": TOKEN_GT, 128 ">=": TOKEN_GTEQ, 129 "<": TOKEN_LT, 130 "<=": TOKEN_LTEQ, 131 "=": TOKEN_ASSIGN, 132 ".": TOKEN_DOT, 133 ":": TOKEN_COLON, 134 "|": TOKEN_PIPE, 135 ",": TOKEN_COMMA, 136 ";": TOKEN_SEMICOLON, 137} 138 139reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) 140assert len(operators) == len(reverse_operators), "operators dropped" 141operator_re = re.compile( 142 "(%s)" % "|".join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x))) 143) 144 145ignored_tokens = frozenset( 146 [ 147 TOKEN_COMMENT_BEGIN, 148 TOKEN_COMMENT, 149 TOKEN_COMMENT_END, 150 TOKEN_WHITESPACE, 151 TOKEN_LINECOMMENT_BEGIN, 152 TOKEN_LINECOMMENT_END, 153 TOKEN_LINECOMMENT, 154 ] 155) 156ignore_if_empty = frozenset( 157 [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] 158) 159 160 161def _describe_token_type(token_type): 162 if token_type in reverse_operators: 163 return reverse_operators[token_type] 164 return { 165 TOKEN_COMMENT_BEGIN: "begin of comment", 166 TOKEN_COMMENT_END: "end of comment", 167 TOKEN_COMMENT: "comment", 168 TOKEN_LINECOMMENT: "comment", 169 TOKEN_BLOCK_BEGIN: "begin of statement block", 170 TOKEN_BLOCK_END: "end of statement block", 171 TOKEN_VARIABLE_BEGIN: "begin of print statement", 172 TOKEN_VARIABLE_END: "end of print statement", 173 TOKEN_LINESTATEMENT_BEGIN: "begin of line statement", 174 TOKEN_LINESTATEMENT_END: "end of line statement", 175 TOKEN_DATA: "template data / text", 176 TOKEN_EOF: "end of template", 177 }.get(token_type, token_type) 178 179 180def describe_token(token): 181 """Returns a description of the token.""" 182 if token.type == TOKEN_NAME: 183 return token.value 184 return _describe_token_type(token.type) 185 186 187def describe_token_expr(expr): 188 """Like `describe_token` but for token expressions.""" 189 if ":" in expr: 190 type, value = expr.split(":", 1) 191 if type == TOKEN_NAME: 192 return value 193 else: 194 type = expr 195 return _describe_token_type(type) 196 197 198def count_newlines(value): 199 """Count the number of newline characters in the string. This is 200 useful for extensions that filter a stream. 201 """ 202 return len(newline_re.findall(value)) 203 204 205def compile_rules(environment): 206 """Compiles all the rules from the environment into a list of rules.""" 207 e = re.escape 208 rules = [ 209 ( 210 len(environment.comment_start_string), 211 TOKEN_COMMENT_BEGIN, 212 e(environment.comment_start_string), 213 ), 214 ( 215 len(environment.block_start_string), 216 TOKEN_BLOCK_BEGIN, 217 e(environment.block_start_string), 218 ), 219 ( 220 len(environment.variable_start_string), 221 TOKEN_VARIABLE_BEGIN, 222 e(environment.variable_start_string), 223 ), 224 ] 225 226 if environment.line_statement_prefix is not None: 227 rules.append( 228 ( 229 len(environment.line_statement_prefix), 230 TOKEN_LINESTATEMENT_BEGIN, 231 r"^[ \t\v]*" + e(environment.line_statement_prefix), 232 ) 233 ) 234 if environment.line_comment_prefix is not None: 235 rules.append( 236 ( 237 len(environment.line_comment_prefix), 238 TOKEN_LINECOMMENT_BEGIN, 239 r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix), 240 ) 241 ) 242 243 return [x[1:] for x in sorted(rules, reverse=True)] 244 245 246class Failure(object): 247 """Class that raises a `TemplateSyntaxError` if called. 248 Used by the `Lexer` to specify known errors. 249 """ 250 251 def __init__(self, message, cls=TemplateSyntaxError): 252 self.message = message 253 self.error_class = cls 254 255 def __call__(self, lineno, filename): 256 raise self.error_class(self.message, lineno, filename) 257 258 259class Token(tuple): 260 """Token class.""" 261 262 __slots__ = () 263 lineno, type, value = (property(itemgetter(x)) for x in range(3)) 264 265 def __new__(cls, lineno, type, value): 266 return tuple.__new__(cls, (lineno, intern(str(type)), value)) 267 268 def __str__(self): 269 if self.type in reverse_operators: 270 return reverse_operators[self.type] 271 elif self.type == "name": 272 return self.value 273 return self.type 274 275 def test(self, expr): 276 """Test a token against a token expression. This can either be a 277 token type or ``'token_type:token_value'``. This can only test 278 against string values and types. 279 """ 280 # here we do a regular string equality check as test_any is usually 281 # passed an iterable of not interned strings. 282 if self.type == expr: 283 return True 284 elif ":" in expr: 285 return expr.split(":", 1) == [self.type, self.value] 286 return False 287 288 def test_any(self, *iterable): 289 """Test against multiple token expressions.""" 290 for expr in iterable: 291 if self.test(expr): 292 return True 293 return False 294 295 def __repr__(self): 296 return "Token(%r, %r, %r)" % (self.lineno, self.type, self.value) 297 298 299@implements_iterator 300class TokenStreamIterator(object): 301 """The iterator for tokenstreams. Iterate over the stream 302 until the eof token is reached. 303 """ 304 305 def __init__(self, stream): 306 self.stream = stream 307 308 def __iter__(self): 309 return self 310 311 def __next__(self): 312 token = self.stream.current 313 if token.type is TOKEN_EOF: 314 self.stream.close() 315 raise StopIteration() 316 next(self.stream) 317 return token 318 319 320@implements_iterator 321class TokenStream(object): 322 """A token stream is an iterable that yields :class:`Token`\\s. The 323 parser however does not iterate over it but calls :meth:`next` to go 324 one token ahead. The current active token is stored as :attr:`current`. 325 """ 326 327 def __init__(self, generator, name, filename): 328 self._iter = iter(generator) 329 self._pushed = deque() 330 self.name = name 331 self.filename = filename 332 self.closed = False 333 self.current = Token(1, TOKEN_INITIAL, "") 334 next(self) 335 336 def __iter__(self): 337 return TokenStreamIterator(self) 338 339 def __bool__(self): 340 return bool(self._pushed) or self.current.type is not TOKEN_EOF 341 342 __nonzero__ = __bool__ # py2 343 344 @property 345 def eos(self): 346 """Are we at the end of the stream?""" 347 return not self 348 349 def push(self, token): 350 """Push a token back to the stream.""" 351 self._pushed.append(token) 352 353 def look(self): 354 """Look at the next token.""" 355 old_token = next(self) 356 result = self.current 357 self.push(result) 358 self.current = old_token 359 return result 360 361 def skip(self, n=1): 362 """Got n tokens ahead.""" 363 for _ in range(n): 364 next(self) 365 366 def next_if(self, expr): 367 """Perform the token test and return the token if it matched. 368 Otherwise the return value is `None`. 369 """ 370 if self.current.test(expr): 371 return next(self) 372 373 def skip_if(self, expr): 374 """Like :meth:`next_if` but only returns `True` or `False`.""" 375 return self.next_if(expr) is not None 376 377 def __next__(self): 378 """Go one token ahead and return the old one. 379 380 Use the built-in :func:`next` instead of calling this directly. 381 """ 382 rv = self.current 383 if self._pushed: 384 self.current = self._pushed.popleft() 385 elif self.current.type is not TOKEN_EOF: 386 try: 387 self.current = next(self._iter) 388 except StopIteration: 389 self.close() 390 return rv 391 392 def close(self): 393 """Close the stream.""" 394 self.current = Token(self.current.lineno, TOKEN_EOF, "") 395 self._iter = None 396 self.closed = True 397 398 def expect(self, expr): 399 """Expect a given token type and return it. This accepts the same 400 argument as :meth:`jinja2.lexer.Token.test`. 401 """ 402 if not self.current.test(expr): 403 expr = describe_token_expr(expr) 404 if self.current.type is TOKEN_EOF: 405 raise TemplateSyntaxError( 406 "unexpected end of template, expected %r." % expr, 407 self.current.lineno, 408 self.name, 409 self.filename, 410 ) 411 raise TemplateSyntaxError( 412 "expected token %r, got %r" % (expr, describe_token(self.current)), 413 self.current.lineno, 414 self.name, 415 self.filename, 416 ) 417 try: 418 return self.current 419 finally: 420 next(self) 421 422 423def get_lexer(environment): 424 """Return a lexer which is probably cached.""" 425 key = ( 426 environment.block_start_string, 427 environment.block_end_string, 428 environment.variable_start_string, 429 environment.variable_end_string, 430 environment.comment_start_string, 431 environment.comment_end_string, 432 environment.line_statement_prefix, 433 environment.line_comment_prefix, 434 environment.trim_blocks, 435 environment.lstrip_blocks, 436 environment.newline_sequence, 437 environment.keep_trailing_newline, 438 ) 439 lexer = _lexer_cache.get(key) 440 if lexer is None: 441 lexer = Lexer(environment) 442 _lexer_cache[key] = lexer 443 return lexer 444 445 446class OptionalLStrip(tuple): 447 """A special tuple for marking a point in the state that can have 448 lstrip applied. 449 """ 450 451 __slots__ = () 452 453 # Even though it looks like a no-op, creating instances fails 454 # without this. 455 def __new__(cls, *members, **kwargs): 456 return super(OptionalLStrip, cls).__new__(cls, members) 457 458 459class Lexer(object): 460 """Class that implements a lexer for a given environment. Automatically 461 created by the environment class, usually you don't have to do that. 462 463 Note that the lexer is not automatically bound to an environment. 464 Multiple environments can share the same lexer. 465 """ 466 467 def __init__(self, environment): 468 # shortcuts 469 e = re.escape 470 471 def c(x): 472 return re.compile(x, re.M | re.S) 473 474 # lexing rules for tags 475 tag_rules = [ 476 (whitespace_re, TOKEN_WHITESPACE, None), 477 (float_re, TOKEN_FLOAT, None), 478 (integer_re, TOKEN_INTEGER, None), 479 (name_re, TOKEN_NAME, None), 480 (string_re, TOKEN_STRING, None), 481 (operator_re, TOKEN_OPERATOR, None), 482 ] 483 484 # assemble the root lexing rule. because "|" is ungreedy 485 # we have to sort by length so that the lexer continues working 486 # as expected when we have parsing rules like <% for block and 487 # <%= for variables. (if someone wants asp like syntax) 488 # variables are just part of the rules if variable processing 489 # is required. 490 root_tag_rules = compile_rules(environment) 491 492 # block suffix if trimming is enabled 493 block_suffix_re = environment.trim_blocks and "\\n?" or "" 494 495 # If lstrip is enabled, it should not be applied if there is any 496 # non-whitespace between the newline and block. 497 self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None 498 499 self.newline_sequence = environment.newline_sequence 500 self.keep_trailing_newline = environment.keep_trailing_newline 501 502 # global lexing rules 503 self.rules = { 504 "root": [ 505 # directives 506 ( 507 c( 508 "(.*?)(?:%s)" 509 % "|".join( 510 [ 511 r"(?P<raw_begin>%s(\-|\+|)\s*raw\s*(?:\-%s\s*|%s))" 512 % ( 513 e(environment.block_start_string), 514 e(environment.block_end_string), 515 e(environment.block_end_string), 516 ) 517 ] 518 + [ 519 r"(?P<%s>%s(\-|\+|))" % (n, r) 520 for n, r in root_tag_rules 521 ] 522 ) 523 ), 524 OptionalLStrip(TOKEN_DATA, "#bygroup"), 525 "#bygroup", 526 ), 527 # data 528 (c(".+"), TOKEN_DATA, None), 529 ], 530 # comments 531 TOKEN_COMMENT_BEGIN: [ 532 ( 533 c( 534 r"(.*?)((?:\-%s\s*|%s)%s)" 535 % ( 536 e(environment.comment_end_string), 537 e(environment.comment_end_string), 538 block_suffix_re, 539 ) 540 ), 541 (TOKEN_COMMENT, TOKEN_COMMENT_END), 542 "#pop", 543 ), 544 (c("(.)"), (Failure("Missing end of comment tag"),), None), 545 ], 546 # blocks 547 TOKEN_BLOCK_BEGIN: [ 548 ( 549 c( 550 r"(?:\-%s\s*|%s)%s" 551 % ( 552 e(environment.block_end_string), 553 e(environment.block_end_string), 554 block_suffix_re, 555 ) 556 ), 557 TOKEN_BLOCK_END, 558 "#pop", 559 ), 560 ] 561 + tag_rules, 562 # variables 563 TOKEN_VARIABLE_BEGIN: [ 564 ( 565 c( 566 r"\-%s\s*|%s" 567 % ( 568 e(environment.variable_end_string), 569 e(environment.variable_end_string), 570 ) 571 ), 572 TOKEN_VARIABLE_END, 573 "#pop", 574 ) 575 ] 576 + tag_rules, 577 # raw block 578 TOKEN_RAW_BEGIN: [ 579 ( 580 c( 581 r"(.*?)((?:%s(\-|\+|))\s*endraw\s*(?:\-%s\s*|%s%s))" 582 % ( 583 e(environment.block_start_string), 584 e(environment.block_end_string), 585 e(environment.block_end_string), 586 block_suffix_re, 587 ) 588 ), 589 OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), 590 "#pop", 591 ), 592 (c("(.)"), (Failure("Missing end of raw directive"),), None), 593 ], 594 # line statements 595 TOKEN_LINESTATEMENT_BEGIN: [ 596 (c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop") 597 ] 598 + tag_rules, 599 # line comments 600 TOKEN_LINECOMMENT_BEGIN: [ 601 ( 602 c(r"(.*?)()(?=\n|$)"), 603 (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END), 604 "#pop", 605 ) 606 ], 607 } 608 609 def _normalize_newlines(self, value): 610 """Called for strings and template data to normalize it to unicode.""" 611 return newline_re.sub(self.newline_sequence, value) 612 613 def tokenize(self, source, name=None, filename=None, state=None): 614 """Calls tokeniter + tokenize and wraps it in a token stream.""" 615 stream = self.tokeniter(source, name, filename, state) 616 return TokenStream(self.wrap(stream, name, filename), name, filename) 617 618 def wrap(self, stream, name=None, filename=None): 619 """This is called with the stream as returned by `tokenize` and wraps 620 every token in a :class:`Token` and converts the value. 621 """ 622 for lineno, token, value in stream: 623 if token in ignored_tokens: 624 continue 625 elif token == TOKEN_LINESTATEMENT_BEGIN: 626 token = TOKEN_BLOCK_BEGIN 627 elif token == TOKEN_LINESTATEMENT_END: 628 token = TOKEN_BLOCK_END 629 # we are not interested in those tokens in the parser 630 elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): 631 continue 632 elif token == TOKEN_DATA: 633 value = self._normalize_newlines(value) 634 elif token == "keyword": 635 token = value 636 elif token == TOKEN_NAME: 637 value = str(value) 638 if check_ident and not value.isidentifier(): 639 raise TemplateSyntaxError( 640 "Invalid character in identifier", lineno, name, filename 641 ) 642 elif token == TOKEN_STRING: 643 # try to unescape string 644 try: 645 value = ( 646 self._normalize_newlines(value[1:-1]) 647 .encode("ascii", "backslashreplace") 648 .decode("unicode-escape") 649 ) 650 except Exception as e: 651 msg = str(e).split(":")[-1].strip() 652 raise TemplateSyntaxError(msg, lineno, name, filename) 653 elif token == TOKEN_INTEGER: 654 value = int(value.replace("_", "")) 655 elif token == TOKEN_FLOAT: 656 # remove all "_" first to support more Python versions 657 value = literal_eval(value.replace("_", "")) 658 elif token == TOKEN_OPERATOR: 659 token = operators[value] 660 yield Token(lineno, token, value) 661 662 def tokeniter(self, source, name, filename=None, state=None): 663 """This method tokenizes the text and returns the tokens in a 664 generator. Use this method if you just want to tokenize a template. 665 """ 666 source = text_type(source) 667 lines = source.splitlines() 668 if self.keep_trailing_newline and source: 669 for newline in ("\r\n", "\r", "\n"): 670 if source.endswith(newline): 671 lines.append("") 672 break 673 source = "\n".join(lines) 674 pos = 0 675 lineno = 1 676 stack = ["root"] 677 if state is not None and state != "root": 678 assert state in ("variable", "block"), "invalid state" 679 stack.append(state + "_begin") 680 statetokens = self.rules[stack[-1]] 681 source_length = len(source) 682 balancing_stack = [] 683 lstrip_unless_re = self.lstrip_unless_re 684 newlines_stripped = 0 685 line_starting = True 686 687 while 1: 688 # tokenizer loop 689 for regex, tokens, new_state in statetokens: 690 m = regex.match(source, pos) 691 # if no match we try again with the next rule 692 if m is None: 693 continue 694 695 # we only match blocks and variables if braces / parentheses 696 # are balanced. continue parsing with the lower rule which 697 # is the operator rule. do this only if the end tags look 698 # like operators 699 if balancing_stack and tokens in ( 700 TOKEN_VARIABLE_END, 701 TOKEN_BLOCK_END, 702 TOKEN_LINESTATEMENT_END, 703 ): 704 continue 705 706 # tuples support more options 707 if isinstance(tokens, tuple): 708 groups = m.groups() 709 710 if isinstance(tokens, OptionalLStrip): 711 # Rule supports lstrip. Match will look like 712 # text, block type, whitespace control, type, control, ... 713 text = groups[0] 714 715 # Skipping the text and first type, every other group is the 716 # whitespace control for each type. One of the groups will be 717 # -, +, or empty string instead of None. 718 strip_sign = next(g for g in groups[2::2] if g is not None) 719 720 if strip_sign == "-": 721 # Strip all whitespace between the text and the tag. 722 stripped = text.rstrip() 723 newlines_stripped = text[len(stripped) :].count("\n") 724 groups = (stripped,) + groups[1:] 725 elif ( 726 # Not marked for preserving whitespace. 727 strip_sign != "+" 728 # lstrip is enabled. 729 and lstrip_unless_re is not None 730 # Not a variable expression. 731 and not m.groupdict().get(TOKEN_VARIABLE_BEGIN) 732 ): 733 # The start of text between the last newline and the tag. 734 l_pos = text.rfind("\n") + 1 735 if l_pos > 0 or line_starting: 736 # If there's only whitespace between the newline and the 737 # tag, strip it. 738 if not lstrip_unless_re.search(text, l_pos): 739 groups = (text[:l_pos],) + groups[1:] 740 741 for idx, token in enumerate(tokens): 742 # failure group 743 if token.__class__ is Failure: 744 raise token(lineno, filename) 745 # bygroup is a bit more complex, in that case we 746 # yield for the current token the first named 747 # group that matched 748 elif token == "#bygroup": 749 for key, value in iteritems(m.groupdict()): 750 if value is not None: 751 yield lineno, key, value 752 lineno += value.count("\n") 753 break 754 else: 755 raise RuntimeError( 756 "%r wanted to resolve " 757 "the token dynamically" 758 " but no group matched" % regex 759 ) 760 # normal group 761 else: 762 data = groups[idx] 763 if data or token not in ignore_if_empty: 764 yield lineno, token, data 765 lineno += data.count("\n") + newlines_stripped 766 newlines_stripped = 0 767 768 # strings as token just are yielded as it. 769 else: 770 data = m.group() 771 # update brace/parentheses balance 772 if tokens == TOKEN_OPERATOR: 773 if data == "{": 774 balancing_stack.append("}") 775 elif data == "(": 776 balancing_stack.append(")") 777 elif data == "[": 778 balancing_stack.append("]") 779 elif data in ("}", ")", "]"): 780 if not balancing_stack: 781 raise TemplateSyntaxError( 782 "unexpected '%s'" % data, lineno, name, filename 783 ) 784 expected_op = balancing_stack.pop() 785 if expected_op != data: 786 raise TemplateSyntaxError( 787 "unexpected '%s', " 788 "expected '%s'" % (data, expected_op), 789 lineno, 790 name, 791 filename, 792 ) 793 # yield items 794 if data or tokens not in ignore_if_empty: 795 yield lineno, tokens, data 796 lineno += data.count("\n") 797 798 line_starting = m.group()[-1:] == "\n" 799 800 # fetch new position into new variable so that we can check 801 # if there is a internal parsing error which would result 802 # in an infinite loop 803 pos2 = m.end() 804 805 # handle state changes 806 if new_state is not None: 807 # remove the uppermost state 808 if new_state == "#pop": 809 stack.pop() 810 # resolve the new state by group checking 811 elif new_state == "#bygroup": 812 for key, value in iteritems(m.groupdict()): 813 if value is not None: 814 stack.append(key) 815 break 816 else: 817 raise RuntimeError( 818 "%r wanted to resolve the " 819 "new state dynamically but" 820 " no group matched" % regex 821 ) 822 # direct state name given 823 else: 824 stack.append(new_state) 825 statetokens = self.rules[stack[-1]] 826 # we are still at the same position and no stack change. 827 # this means a loop without break condition, avoid that and 828 # raise error 829 elif pos2 == pos: 830 raise RuntimeError( 831 "%r yielded empty string without stack change" % regex 832 ) 833 # publish new function and start again 834 pos = pos2 835 break 836 # if loop terminated without break we haven't found a single match 837 # either we are at the end of the file or we have a problem 838 else: 839 # end of text 840 if pos >= source_length: 841 return 842 # something went wrong 843 raise TemplateSyntaxError( 844 "unexpected char %r at %d" % (source[pos], pos), 845 lineno, 846 name, 847 filename, 848 ) 849