1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class 2is used to do some preprocessing. It filters out invalid operators like 3the bitshift operators we don't allow in templates. It separates 4template code and python code in expressions. 5""" 6import re 7import typing as t 8from ast import literal_eval 9from collections import deque 10from sys import intern 11 12from ._identifier import pattern as name_re 13from .exceptions import TemplateSyntaxError 14from .utils import LRUCache 15 16if t.TYPE_CHECKING: 17 import typing_extensions as te 18 from .environment import Environment 19 20# cache for the lexers. Exists in order to be able to have multiple 21# environments with the same lexer 22_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50) # type: ignore 23 24# static regular expressions 25whitespace_re = re.compile(r"\s+") 26newline_re = re.compile(r"(\r\n|\r|\n)") 27string_re = re.compile( 28 r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S 29) 30integer_re = re.compile( 31 r""" 32 ( 33 0b(_?[0-1])+ # binary 34 | 35 0o(_?[0-7])+ # octal 36 | 37 0x(_?[\da-f])+ # hex 38 | 39 [1-9](_?\d)* # decimal 40 | 41 0(_?0)* # decimal zero 42 ) 43 """, 44 re.IGNORECASE | re.VERBOSE, 45) 46float_re = re.compile( 47 r""" 48 (?<!\.) # doesn't start with a . 49 (\d+_)*\d+ # digits, possibly _ separated 50 ( 51 (\.(\d+_)*\d+)? # optional fractional part 52 e[+\-]?(\d+_)*\d+ # exponent part 53 | 54 \.(\d+_)*\d+ # required fractional part 55 ) 56 """, 57 re.IGNORECASE | re.VERBOSE, 58) 59 60# internal the tokens and keep references to them 61TOKEN_ADD = intern("add") 62TOKEN_ASSIGN = intern("assign") 63TOKEN_COLON = intern("colon") 64TOKEN_COMMA = intern("comma") 65TOKEN_DIV = intern("div") 66TOKEN_DOT = intern("dot") 67TOKEN_EQ = intern("eq") 68TOKEN_FLOORDIV = intern("floordiv") 69TOKEN_GT = intern("gt") 70TOKEN_GTEQ = intern("gteq") 71TOKEN_LBRACE = intern("lbrace") 72TOKEN_LBRACKET = intern("lbracket") 73TOKEN_LPAREN = intern("lparen") 74TOKEN_LT = intern("lt") 75TOKEN_LTEQ = intern("lteq") 76TOKEN_MOD = intern("mod") 77TOKEN_MUL = intern("mul") 78TOKEN_NE = intern("ne") 79TOKEN_PIPE = intern("pipe") 80TOKEN_POW = intern("pow") 81TOKEN_RBRACE = intern("rbrace") 82TOKEN_RBRACKET = intern("rbracket") 83TOKEN_RPAREN = intern("rparen") 84TOKEN_SEMICOLON = intern("semicolon") 85TOKEN_SUB = intern("sub") 86TOKEN_TILDE = intern("tilde") 87TOKEN_WHITESPACE = intern("whitespace") 88TOKEN_FLOAT = intern("float") 89TOKEN_INTEGER = intern("integer") 90TOKEN_NAME = intern("name") 91TOKEN_STRING = intern("string") 92TOKEN_OPERATOR = intern("operator") 93TOKEN_BLOCK_BEGIN = intern("block_begin") 94TOKEN_BLOCK_END = intern("block_end") 95TOKEN_VARIABLE_BEGIN = intern("variable_begin") 96TOKEN_VARIABLE_END = intern("variable_end") 97TOKEN_RAW_BEGIN = intern("raw_begin") 98TOKEN_RAW_END = intern("raw_end") 99TOKEN_COMMENT_BEGIN = intern("comment_begin") 100TOKEN_COMMENT_END = intern("comment_end") 101TOKEN_COMMENT = intern("comment") 102TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin") 103TOKEN_LINESTATEMENT_END = intern("linestatement_end") 104TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin") 105TOKEN_LINECOMMENT_END = intern("linecomment_end") 106TOKEN_LINECOMMENT = intern("linecomment") 107TOKEN_DATA = intern("data") 108TOKEN_INITIAL = intern("initial") 109TOKEN_EOF = intern("eof") 110 111# bind operators to token types 112operators = { 113 "+": TOKEN_ADD, 114 "-": TOKEN_SUB, 115 "/": TOKEN_DIV, 116 "//": TOKEN_FLOORDIV, 117 "*": TOKEN_MUL, 118 "%": TOKEN_MOD, 119 "**": TOKEN_POW, 120 "~": TOKEN_TILDE, 121 "[": TOKEN_LBRACKET, 122 "]": TOKEN_RBRACKET, 123 "(": TOKEN_LPAREN, 124 ")": TOKEN_RPAREN, 125 "{": TOKEN_LBRACE, 126 "}": TOKEN_RBRACE, 127 "==": TOKEN_EQ, 128 "!=": TOKEN_NE, 129 ">": TOKEN_GT, 130 ">=": TOKEN_GTEQ, 131 "<": TOKEN_LT, 132 "<=": TOKEN_LTEQ, 133 "=": TOKEN_ASSIGN, 134 ".": TOKEN_DOT, 135 ":": TOKEN_COLON, 136 "|": TOKEN_PIPE, 137 ",": TOKEN_COMMA, 138 ";": TOKEN_SEMICOLON, 139} 140 141reverse_operators = {v: k for k, v in operators.items()} 142assert len(operators) == len(reverse_operators), "operators dropped" 143operator_re = re.compile( 144 f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" 145) 146 147ignored_tokens = frozenset( 148 [ 149 TOKEN_COMMENT_BEGIN, 150 TOKEN_COMMENT, 151 TOKEN_COMMENT_END, 152 TOKEN_WHITESPACE, 153 TOKEN_LINECOMMENT_BEGIN, 154 TOKEN_LINECOMMENT_END, 155 TOKEN_LINECOMMENT, 156 ] 157) 158ignore_if_empty = frozenset( 159 [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] 160) 161 162 163def _describe_token_type(token_type: str) -> str: 164 if token_type in reverse_operators: 165 return reverse_operators[token_type] 166 167 return { 168 TOKEN_COMMENT_BEGIN: "begin of comment", 169 TOKEN_COMMENT_END: "end of comment", 170 TOKEN_COMMENT: "comment", 171 TOKEN_LINECOMMENT: "comment", 172 TOKEN_BLOCK_BEGIN: "begin of statement block", 173 TOKEN_BLOCK_END: "end of statement block", 174 TOKEN_VARIABLE_BEGIN: "begin of print statement", 175 TOKEN_VARIABLE_END: "end of print statement", 176 TOKEN_LINESTATEMENT_BEGIN: "begin of line statement", 177 TOKEN_LINESTATEMENT_END: "end of line statement", 178 TOKEN_DATA: "template data / text", 179 TOKEN_EOF: "end of template", 180 }.get(token_type, token_type) 181 182 183def describe_token(token: "Token") -> str: 184 """Returns a description of the token.""" 185 if token.type == TOKEN_NAME: 186 return token.value 187 188 return _describe_token_type(token.type) 189 190 191def describe_token_expr(expr: str) -> str: 192 """Like `describe_token` but for token expressions.""" 193 if ":" in expr: 194 type, value = expr.split(":", 1) 195 196 if type == TOKEN_NAME: 197 return value 198 else: 199 type = expr 200 201 return _describe_token_type(type) 202 203 204def count_newlines(value: str) -> int: 205 """Count the number of newline characters in the string. This is 206 useful for extensions that filter a stream. 207 """ 208 return len(newline_re.findall(value)) 209 210 211def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]: 212 """Compiles all the rules from the environment into a list of rules.""" 213 e = re.escape 214 rules = [ 215 ( 216 len(environment.comment_start_string), 217 TOKEN_COMMENT_BEGIN, 218 e(environment.comment_start_string), 219 ), 220 ( 221 len(environment.block_start_string), 222 TOKEN_BLOCK_BEGIN, 223 e(environment.block_start_string), 224 ), 225 ( 226 len(environment.variable_start_string), 227 TOKEN_VARIABLE_BEGIN, 228 e(environment.variable_start_string), 229 ), 230 ] 231 232 if environment.line_statement_prefix is not None: 233 rules.append( 234 ( 235 len(environment.line_statement_prefix), 236 TOKEN_LINESTATEMENT_BEGIN, 237 r"^[ \t\v]*" + e(environment.line_statement_prefix), 238 ) 239 ) 240 if environment.line_comment_prefix is not None: 241 rules.append( 242 ( 243 len(environment.line_comment_prefix), 244 TOKEN_LINECOMMENT_BEGIN, 245 r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix), 246 ) 247 ) 248 249 return [x[1:] for x in sorted(rules, reverse=True)] 250 251 252class Failure: 253 """Class that raises a `TemplateSyntaxError` if called. 254 Used by the `Lexer` to specify known errors. 255 """ 256 257 def __init__( 258 self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError 259 ) -> None: 260 self.message = message 261 self.error_class = cls 262 263 def __call__(self, lineno: int, filename: str) -> "te.NoReturn": 264 raise self.error_class(self.message, lineno, filename) 265 266 267class Token(t.NamedTuple): 268 lineno: int 269 type: str 270 value: str 271 272 def __str__(self) -> str: 273 return describe_token(self) 274 275 def test(self, expr: str) -> bool: 276 """Test a token against a token expression. This can either be a 277 token type or ``'token_type:token_value'``. This can only test 278 against string values and types. 279 """ 280 # here we do a regular string equality check as test_any is usually 281 # passed an iterable of not interned strings. 282 if self.type == expr: 283 return True 284 285 if ":" in expr: 286 return expr.split(":", 1) == [self.type, self.value] 287 288 return False 289 290 def test_any(self, *iterable: str) -> bool: 291 """Test against multiple token expressions.""" 292 return any(self.test(expr) for expr in iterable) 293 294 295class TokenStreamIterator: 296 """The iterator for tokenstreams. Iterate over the stream 297 until the eof token is reached. 298 """ 299 300 def __init__(self, stream: "TokenStream") -> None: 301 self.stream = stream 302 303 def __iter__(self) -> "TokenStreamIterator": 304 return self 305 306 def __next__(self) -> Token: 307 token = self.stream.current 308 309 if token.type is TOKEN_EOF: 310 self.stream.close() 311 raise StopIteration 312 313 next(self.stream) 314 return token 315 316 317class TokenStream: 318 """A token stream is an iterable that yields :class:`Token`\\s. The 319 parser however does not iterate over it but calls :meth:`next` to go 320 one token ahead. The current active token is stored as :attr:`current`. 321 """ 322 323 def __init__( 324 self, 325 generator: t.Iterable[Token], 326 name: t.Optional[str], 327 filename: t.Optional[str], 328 ): 329 self._iter = iter(generator) 330 self._pushed: "te.Deque[Token]" = deque() 331 self.name = name 332 self.filename = filename 333 self.closed = False 334 self.current = Token(1, TOKEN_INITIAL, "") 335 next(self) 336 337 def __iter__(self) -> TokenStreamIterator: 338 return TokenStreamIterator(self) 339 340 def __bool__(self) -> bool: 341 return bool(self._pushed) or self.current.type is not TOKEN_EOF 342 343 @property 344 def eos(self) -> bool: 345 """Are we at the end of the stream?""" 346 return not self 347 348 def push(self, token: Token) -> None: 349 """Push a token back to the stream.""" 350 self._pushed.append(token) 351 352 def look(self) -> Token: 353 """Look at the next token.""" 354 old_token = next(self) 355 result = self.current 356 self.push(result) 357 self.current = old_token 358 return result 359 360 def skip(self, n: int = 1) -> None: 361 """Got n tokens ahead.""" 362 for _ in range(n): 363 next(self) 364 365 def next_if(self, expr: str) -> t.Optional[Token]: 366 """Perform the token test and return the token if it matched. 367 Otherwise the return value is `None`. 368 """ 369 if self.current.test(expr): 370 return next(self) 371 372 return None 373 374 def skip_if(self, expr: str) -> bool: 375 """Like :meth:`next_if` but only returns `True` or `False`.""" 376 return self.next_if(expr) is not None 377 378 def __next__(self) -> Token: 379 """Go one token ahead and return the old one. 380 381 Use the built-in :func:`next` instead of calling this directly. 382 """ 383 rv = self.current 384 385 if self._pushed: 386 self.current = self._pushed.popleft() 387 elif self.current.type is not TOKEN_EOF: 388 try: 389 self.current = next(self._iter) 390 except StopIteration: 391 self.close() 392 393 return rv 394 395 def close(self) -> None: 396 """Close the stream.""" 397 self.current = Token(self.current.lineno, TOKEN_EOF, "") 398 self._iter = iter(()) 399 self.closed = True 400 401 def expect(self, expr: str) -> Token: 402 """Expect a given token type and return it. This accepts the same 403 argument as :meth:`jinja2.lexer.Token.test`. 404 """ 405 if not self.current.test(expr): 406 expr = describe_token_expr(expr) 407 408 if self.current.type is TOKEN_EOF: 409 raise TemplateSyntaxError( 410 f"unexpected end of template, expected {expr!r}.", 411 self.current.lineno, 412 self.name, 413 self.filename, 414 ) 415 416 raise TemplateSyntaxError( 417 f"expected token {expr!r}, got {describe_token(self.current)!r}", 418 self.current.lineno, 419 self.name, 420 self.filename, 421 ) 422 423 return next(self) 424 425 426def get_lexer(environment: "Environment") -> "Lexer": 427 """Return a lexer which is probably cached.""" 428 key = ( 429 environment.block_start_string, 430 environment.block_end_string, 431 environment.variable_start_string, 432 environment.variable_end_string, 433 environment.comment_start_string, 434 environment.comment_end_string, 435 environment.line_statement_prefix, 436 environment.line_comment_prefix, 437 environment.trim_blocks, 438 environment.lstrip_blocks, 439 environment.newline_sequence, 440 environment.keep_trailing_newline, 441 ) 442 lexer = _lexer_cache.get(key) 443 444 if lexer is None: 445 _lexer_cache[key] = lexer = Lexer(environment) 446 447 return lexer 448 449 450class OptionalLStrip(tuple): 451 """A special tuple for marking a point in the state that can have 452 lstrip applied. 453 """ 454 455 __slots__ = () 456 457 # Even though it looks like a no-op, creating instances fails 458 # without this. 459 def __new__(cls, *members, **kwargs): # type: ignore 460 return super().__new__(cls, members) 461 462 463class _Rule(t.NamedTuple): 464 pattern: t.Pattern[str] 465 tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]] 466 command: t.Optional[str] 467 468 469class Lexer: 470 """Class that implements a lexer for a given environment. Automatically 471 created by the environment class, usually you don't have to do that. 472 473 Note that the lexer is not automatically bound to an environment. 474 Multiple environments can share the same lexer. 475 """ 476 477 def __init__(self, environment: "Environment") -> None: 478 # shortcuts 479 e = re.escape 480 481 def c(x: str) -> t.Pattern[str]: 482 return re.compile(x, re.M | re.S) 483 484 # lexing rules for tags 485 tag_rules: t.List[_Rule] = [ 486 _Rule(whitespace_re, TOKEN_WHITESPACE, None), 487 _Rule(float_re, TOKEN_FLOAT, None), 488 _Rule(integer_re, TOKEN_INTEGER, None), 489 _Rule(name_re, TOKEN_NAME, None), 490 _Rule(string_re, TOKEN_STRING, None), 491 _Rule(operator_re, TOKEN_OPERATOR, None), 492 ] 493 494 # assemble the root lexing rule. because "|" is ungreedy 495 # we have to sort by length so that the lexer continues working 496 # as expected when we have parsing rules like <% for block and 497 # <%= for variables. (if someone wants asp like syntax) 498 # variables are just part of the rules if variable processing 499 # is required. 500 root_tag_rules = compile_rules(environment) 501 502 block_start_re = e(environment.block_start_string) 503 block_end_re = e(environment.block_end_string) 504 comment_end_re = e(environment.comment_end_string) 505 variable_end_re = e(environment.variable_end_string) 506 507 # block suffix if trimming is enabled 508 block_suffix_re = "\\n?" if environment.trim_blocks else "" 509 510 # If lstrip is enabled, it should not be applied if there is any 511 # non-whitespace between the newline and block. 512 self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None 513 514 self.newline_sequence = environment.newline_sequence 515 self.keep_trailing_newline = environment.keep_trailing_newline 516 517 root_raw_re = ( 518 fr"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*" 519 fr"(?:\-{block_end_re}\s*|{block_end_re}))" 520 ) 521 root_parts_re = "|".join( 522 [root_raw_re] + [fr"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules] 523 ) 524 525 # global lexing rules 526 self.rules: t.Dict[str, t.List[_Rule]] = { 527 "root": [ 528 # directives 529 _Rule( 530 c(fr"(.*?)(?:{root_parts_re})"), 531 OptionalLStrip(TOKEN_DATA, "#bygroup"), # type: ignore 532 "#bygroup", 533 ), 534 # data 535 _Rule(c(".+"), TOKEN_DATA, None), 536 ], 537 # comments 538 TOKEN_COMMENT_BEGIN: [ 539 _Rule( 540 c( 541 fr"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*" 542 fr"|{comment_end_re}{block_suffix_re}))" 543 ), 544 (TOKEN_COMMENT, TOKEN_COMMENT_END), 545 "#pop", 546 ), 547 _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None), 548 ], 549 # blocks 550 TOKEN_BLOCK_BEGIN: [ 551 _Rule( 552 c( 553 fr"(?:\+{block_end_re}|\-{block_end_re}\s*" 554 fr"|{block_end_re}{block_suffix_re})" 555 ), 556 TOKEN_BLOCK_END, 557 "#pop", 558 ), 559 ] 560 + tag_rules, 561 # variables 562 TOKEN_VARIABLE_BEGIN: [ 563 _Rule( 564 c(fr"\-{variable_end_re}\s*|{variable_end_re}"), 565 TOKEN_VARIABLE_END, 566 "#pop", 567 ) 568 ] 569 + tag_rules, 570 # raw block 571 TOKEN_RAW_BEGIN: [ 572 _Rule( 573 c( 574 fr"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*" 575 fr"(?:\+{block_end_re}|\-{block_end_re}\s*" 576 fr"|{block_end_re}{block_suffix_re}))" 577 ), 578 OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END), # type: ignore 579 "#pop", 580 ), 581 _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None), 582 ], 583 # line statements 584 TOKEN_LINESTATEMENT_BEGIN: [ 585 _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop") 586 ] 587 + tag_rules, 588 # line comments 589 TOKEN_LINECOMMENT_BEGIN: [ 590 _Rule( 591 c(r"(.*?)()(?=\n|$)"), 592 (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END), 593 "#pop", 594 ) 595 ], 596 } 597 598 def _normalize_newlines(self, value: str) -> str: 599 """Replace all newlines with the configured sequence in strings 600 and template data. 601 """ 602 return newline_re.sub(self.newline_sequence, value) 603 604 def tokenize( 605 self, 606 source: str, 607 name: t.Optional[str] = None, 608 filename: t.Optional[str] = None, 609 state: t.Optional[str] = None, 610 ) -> TokenStream: 611 """Calls tokeniter + tokenize and wraps it in a token stream.""" 612 stream = self.tokeniter(source, name, filename, state) 613 return TokenStream(self.wrap(stream, name, filename), name, filename) 614 615 def wrap( 616 self, 617 stream: t.Iterable[t.Tuple[int, str, str]], 618 name: t.Optional[str] = None, 619 filename: t.Optional[str] = None, 620 ) -> t.Iterator[Token]: 621 """This is called with the stream as returned by `tokenize` and wraps 622 every token in a :class:`Token` and converts the value. 623 """ 624 for lineno, token, value_str in stream: 625 if token in ignored_tokens: 626 continue 627 628 value: t.Any = value_str 629 630 if token == TOKEN_LINESTATEMENT_BEGIN: 631 token = TOKEN_BLOCK_BEGIN 632 elif token == TOKEN_LINESTATEMENT_END: 633 token = TOKEN_BLOCK_END 634 # we are not interested in those tokens in the parser 635 elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): 636 continue 637 elif token == TOKEN_DATA: 638 value = self._normalize_newlines(value_str) 639 elif token == "keyword": 640 token = value_str 641 elif token == TOKEN_NAME: 642 value = value_str 643 644 if not value.isidentifier(): 645 raise TemplateSyntaxError( 646 "Invalid character in identifier", lineno, name, filename 647 ) 648 elif token == TOKEN_STRING: 649 # try to unescape string 650 try: 651 value = ( 652 self._normalize_newlines(value_str[1:-1]) 653 .encode("ascii", "backslashreplace") 654 .decode("unicode-escape") 655 ) 656 except Exception as e: 657 msg = str(e).split(":")[-1].strip() 658 raise TemplateSyntaxError(msg, lineno, name, filename) 659 elif token == TOKEN_INTEGER: 660 value = int(value_str.replace("_", ""), 0) 661 elif token == TOKEN_FLOAT: 662 # remove all "_" first to support more Python versions 663 value = literal_eval(value_str.replace("_", "")) 664 elif token == TOKEN_OPERATOR: 665 token = operators[value_str] 666 667 yield Token(lineno, token, value) 668 669 def tokeniter( 670 self, 671 source: str, 672 name: t.Optional[str], 673 filename: t.Optional[str] = None, 674 state: t.Optional[str] = None, 675 ) -> t.Iterator[t.Tuple[int, str, str]]: 676 """This method tokenizes the text and returns the tokens in a 677 generator. Use this method if you just want to tokenize a template. 678 679 .. versionchanged:: 3.0 680 Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line 681 breaks. 682 """ 683 lines = newline_re.split(source)[::2] 684 685 if not self.keep_trailing_newline and lines[-1] == "": 686 del lines[-1] 687 688 source = "\n".join(lines) 689 pos = 0 690 lineno = 1 691 stack = ["root"] 692 693 if state is not None and state != "root": 694 assert state in ("variable", "block"), "invalid state" 695 stack.append(state + "_begin") 696 697 statetokens = self.rules[stack[-1]] 698 source_length = len(source) 699 balancing_stack: t.List[str] = [] 700 lstrip_unless_re = self.lstrip_unless_re 701 newlines_stripped = 0 702 line_starting = True 703 704 while True: 705 # tokenizer loop 706 for regex, tokens, new_state in statetokens: 707 m = regex.match(source, pos) 708 709 # if no match we try again with the next rule 710 if m is None: 711 continue 712 713 # we only match blocks and variables if braces / parentheses 714 # are balanced. continue parsing with the lower rule which 715 # is the operator rule. do this only if the end tags look 716 # like operators 717 if balancing_stack and tokens in ( 718 TOKEN_VARIABLE_END, 719 TOKEN_BLOCK_END, 720 TOKEN_LINESTATEMENT_END, 721 ): 722 continue 723 724 # tuples support more options 725 if isinstance(tokens, tuple): 726 groups = m.groups() 727 728 if isinstance(tokens, OptionalLStrip): 729 # Rule supports lstrip. Match will look like 730 # text, block type, whitespace control, type, control, ... 731 text = groups[0] 732 # Skipping the text and first type, every other group is the 733 # whitespace control for each type. One of the groups will be 734 # -, +, or empty string instead of None. 735 strip_sign = next(g for g in groups[2::2] if g is not None) 736 737 if strip_sign == "-": 738 # Strip all whitespace between the text and the tag. 739 stripped = text.rstrip() 740 newlines_stripped = text[len(stripped) :].count("\n") 741 groups = [stripped, *groups[1:]] 742 elif ( 743 # Not marked for preserving whitespace. 744 strip_sign != "+" 745 # lstrip is enabled. 746 and lstrip_unless_re is not None 747 # Not a variable expression. 748 and not m.groupdict().get(TOKEN_VARIABLE_BEGIN) 749 ): 750 # The start of text between the last newline and the tag. 751 l_pos = text.rfind("\n") + 1 752 753 if l_pos > 0 or line_starting: 754 # If there's only whitespace between the newline and the 755 # tag, strip it. 756 if not lstrip_unless_re.search(text, l_pos): 757 groups = [text[:l_pos], *groups[1:]] 758 759 for idx, token in enumerate(tokens): 760 # failure group 761 if token.__class__ is Failure: 762 raise token(lineno, filename) 763 # bygroup is a bit more complex, in that case we 764 # yield for the current token the first named 765 # group that matched 766 elif token == "#bygroup": 767 for key, value in m.groupdict().items(): 768 if value is not None: 769 yield lineno, key, value 770 lineno += value.count("\n") 771 break 772 else: 773 raise RuntimeError( 774 f"{regex!r} wanted to resolve the token dynamically" 775 " but no group matched" 776 ) 777 # normal group 778 else: 779 data = groups[idx] 780 781 if data or token not in ignore_if_empty: 782 yield lineno, token, data 783 784 lineno += data.count("\n") + newlines_stripped 785 newlines_stripped = 0 786 787 # strings as token just are yielded as it. 788 else: 789 data = m.group() 790 791 # update brace/parentheses balance 792 if tokens == TOKEN_OPERATOR: 793 if data == "{": 794 balancing_stack.append("}") 795 elif data == "(": 796 balancing_stack.append(")") 797 elif data == "[": 798 balancing_stack.append("]") 799 elif data in ("}", ")", "]"): 800 if not balancing_stack: 801 raise TemplateSyntaxError( 802 f"unexpected '{data}'", lineno, name, filename 803 ) 804 805 expected_op = balancing_stack.pop() 806 807 if expected_op != data: 808 raise TemplateSyntaxError( 809 f"unexpected '{data}', expected '{expected_op}'", 810 lineno, 811 name, 812 filename, 813 ) 814 815 # yield items 816 if data or tokens not in ignore_if_empty: 817 yield lineno, tokens, data 818 819 lineno += data.count("\n") 820 821 line_starting = m.group()[-1:] == "\n" 822 # fetch new position into new variable so that we can check 823 # if there is a internal parsing error which would result 824 # in an infinite loop 825 pos2 = m.end() 826 827 # handle state changes 828 if new_state is not None: 829 # remove the uppermost state 830 if new_state == "#pop": 831 stack.pop() 832 # resolve the new state by group checking 833 elif new_state == "#bygroup": 834 for key, value in m.groupdict().items(): 835 if value is not None: 836 stack.append(key) 837 break 838 else: 839 raise RuntimeError( 840 f"{regex!r} wanted to resolve the new state dynamically" 841 f" but no group matched" 842 ) 843 # direct state name given 844 else: 845 stack.append(new_state) 846 847 statetokens = self.rules[stack[-1]] 848 # we are still at the same position and no stack change. 849 # this means a loop without break condition, avoid that and 850 # raise error 851 elif pos2 == pos: 852 raise RuntimeError( 853 f"{regex!r} yielded empty string without stack change" 854 ) 855 856 # publish new function and start again 857 pos = pos2 858 break 859 # if loop terminated without break we haven't found a single match 860 # either we are at the end of the file or we have a problem 861 else: 862 # end of text 863 if pos >= source_length: 864 return 865 866 # something went wrong 867 raise TemplateSyntaxError( 868 f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename 869 ) 870