1"""Implements a Jinja / Python combination lexer. The ``Lexer`` class
2is used to do some preprocessing. It filters out invalid operators like
3the bitshift operators we don't allow in templates. It separates
4template code and python code in expressions.
5"""
6import re
7import typing as t
8from ast import literal_eval
9from collections import deque
10from sys import intern
11
12from ._identifier import pattern as name_re
13from .exceptions import TemplateSyntaxError
14from .utils import LRUCache
15
16if t.TYPE_CHECKING:
17    import typing_extensions as te
18    from .environment import Environment
19
20# cache for the lexers. Exists in order to be able to have multiple
21# environments with the same lexer
22_lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50)  # type: ignore
23
24# static regular expressions
25whitespace_re = re.compile(r"\s+")
26newline_re = re.compile(r"(\r\n|\r|\n)")
27string_re = re.compile(
28    r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
29)
30integer_re = re.compile(
31    r"""
32    (
33        0b(_?[0-1])+ # binary
34    |
35        0o(_?[0-7])+ # octal
36    |
37        0x(_?[\da-f])+ # hex
38    |
39        [1-9](_?\d)* # decimal
40    |
41        0(_?0)* # decimal zero
42    )
43    """,
44    re.IGNORECASE | re.VERBOSE,
45)
46float_re = re.compile(
47    r"""
48    (?<!\.)  # doesn't start with a .
49    (\d+_)*\d+  # digits, possibly _ separated
50    (
51        (\.(\d+_)*\d+)?  # optional fractional part
52        e[+\-]?(\d+_)*\d+  # exponent part
53    |
54        \.(\d+_)*\d+  # required fractional part
55    )
56    """,
57    re.IGNORECASE | re.VERBOSE,
58)
59
60# internal the tokens and keep references to them
61TOKEN_ADD = intern("add")
62TOKEN_ASSIGN = intern("assign")
63TOKEN_COLON = intern("colon")
64TOKEN_COMMA = intern("comma")
65TOKEN_DIV = intern("div")
66TOKEN_DOT = intern("dot")
67TOKEN_EQ = intern("eq")
68TOKEN_FLOORDIV = intern("floordiv")
69TOKEN_GT = intern("gt")
70TOKEN_GTEQ = intern("gteq")
71TOKEN_LBRACE = intern("lbrace")
72TOKEN_LBRACKET = intern("lbracket")
73TOKEN_LPAREN = intern("lparen")
74TOKEN_LT = intern("lt")
75TOKEN_LTEQ = intern("lteq")
76TOKEN_MOD = intern("mod")
77TOKEN_MUL = intern("mul")
78TOKEN_NE = intern("ne")
79TOKEN_PIPE = intern("pipe")
80TOKEN_POW = intern("pow")
81TOKEN_RBRACE = intern("rbrace")
82TOKEN_RBRACKET = intern("rbracket")
83TOKEN_RPAREN = intern("rparen")
84TOKEN_SEMICOLON = intern("semicolon")
85TOKEN_SUB = intern("sub")
86TOKEN_TILDE = intern("tilde")
87TOKEN_WHITESPACE = intern("whitespace")
88TOKEN_FLOAT = intern("float")
89TOKEN_INTEGER = intern("integer")
90TOKEN_NAME = intern("name")
91TOKEN_STRING = intern("string")
92TOKEN_OPERATOR = intern("operator")
93TOKEN_BLOCK_BEGIN = intern("block_begin")
94TOKEN_BLOCK_END = intern("block_end")
95TOKEN_VARIABLE_BEGIN = intern("variable_begin")
96TOKEN_VARIABLE_END = intern("variable_end")
97TOKEN_RAW_BEGIN = intern("raw_begin")
98TOKEN_RAW_END = intern("raw_end")
99TOKEN_COMMENT_BEGIN = intern("comment_begin")
100TOKEN_COMMENT_END = intern("comment_end")
101TOKEN_COMMENT = intern("comment")
102TOKEN_LINESTATEMENT_BEGIN = intern("linestatement_begin")
103TOKEN_LINESTATEMENT_END = intern("linestatement_end")
104TOKEN_LINECOMMENT_BEGIN = intern("linecomment_begin")
105TOKEN_LINECOMMENT_END = intern("linecomment_end")
106TOKEN_LINECOMMENT = intern("linecomment")
107TOKEN_DATA = intern("data")
108TOKEN_INITIAL = intern("initial")
109TOKEN_EOF = intern("eof")
110
111# bind operators to token types
112operators = {
113    "+": TOKEN_ADD,
114    "-": TOKEN_SUB,
115    "/": TOKEN_DIV,
116    "//": TOKEN_FLOORDIV,
117    "*": TOKEN_MUL,
118    "%": TOKEN_MOD,
119    "**": TOKEN_POW,
120    "~": TOKEN_TILDE,
121    "[": TOKEN_LBRACKET,
122    "]": TOKEN_RBRACKET,
123    "(": TOKEN_LPAREN,
124    ")": TOKEN_RPAREN,
125    "{": TOKEN_LBRACE,
126    "}": TOKEN_RBRACE,
127    "==": TOKEN_EQ,
128    "!=": TOKEN_NE,
129    ">": TOKEN_GT,
130    ">=": TOKEN_GTEQ,
131    "<": TOKEN_LT,
132    "<=": TOKEN_LTEQ,
133    "=": TOKEN_ASSIGN,
134    ".": TOKEN_DOT,
135    ":": TOKEN_COLON,
136    "|": TOKEN_PIPE,
137    ",": TOKEN_COMMA,
138    ";": TOKEN_SEMICOLON,
139}
140
141reverse_operators = {v: k for k, v in operators.items()}
142assert len(operators) == len(reverse_operators), "operators dropped"
143operator_re = re.compile(
144    f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})"
145)
146
147ignored_tokens = frozenset(
148    [
149        TOKEN_COMMENT_BEGIN,
150        TOKEN_COMMENT,
151        TOKEN_COMMENT_END,
152        TOKEN_WHITESPACE,
153        TOKEN_LINECOMMENT_BEGIN,
154        TOKEN_LINECOMMENT_END,
155        TOKEN_LINECOMMENT,
156    ]
157)
158ignore_if_empty = frozenset(
159    [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT]
160)
161
162
163def _describe_token_type(token_type: str) -> str:
164    if token_type in reverse_operators:
165        return reverse_operators[token_type]
166
167    return {
168        TOKEN_COMMENT_BEGIN: "begin of comment",
169        TOKEN_COMMENT_END: "end of comment",
170        TOKEN_COMMENT: "comment",
171        TOKEN_LINECOMMENT: "comment",
172        TOKEN_BLOCK_BEGIN: "begin of statement block",
173        TOKEN_BLOCK_END: "end of statement block",
174        TOKEN_VARIABLE_BEGIN: "begin of print statement",
175        TOKEN_VARIABLE_END: "end of print statement",
176        TOKEN_LINESTATEMENT_BEGIN: "begin of line statement",
177        TOKEN_LINESTATEMENT_END: "end of line statement",
178        TOKEN_DATA: "template data / text",
179        TOKEN_EOF: "end of template",
180    }.get(token_type, token_type)
181
182
183def describe_token(token: "Token") -> str:
184    """Returns a description of the token."""
185    if token.type == TOKEN_NAME:
186        return token.value
187
188    return _describe_token_type(token.type)
189
190
191def describe_token_expr(expr: str) -> str:
192    """Like `describe_token` but for token expressions."""
193    if ":" in expr:
194        type, value = expr.split(":", 1)
195
196        if type == TOKEN_NAME:
197            return value
198    else:
199        type = expr
200
201    return _describe_token_type(type)
202
203
204def count_newlines(value: str) -> int:
205    """Count the number of newline characters in the string.  This is
206    useful for extensions that filter a stream.
207    """
208    return len(newline_re.findall(value))
209
210
211def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]:
212    """Compiles all the rules from the environment into a list of rules."""
213    e = re.escape
214    rules = [
215        (
216            len(environment.comment_start_string),
217            TOKEN_COMMENT_BEGIN,
218            e(environment.comment_start_string),
219        ),
220        (
221            len(environment.block_start_string),
222            TOKEN_BLOCK_BEGIN,
223            e(environment.block_start_string),
224        ),
225        (
226            len(environment.variable_start_string),
227            TOKEN_VARIABLE_BEGIN,
228            e(environment.variable_start_string),
229        ),
230    ]
231
232    if environment.line_statement_prefix is not None:
233        rules.append(
234            (
235                len(environment.line_statement_prefix),
236                TOKEN_LINESTATEMENT_BEGIN,
237                r"^[ \t\v]*" + e(environment.line_statement_prefix),
238            )
239        )
240    if environment.line_comment_prefix is not None:
241        rules.append(
242            (
243                len(environment.line_comment_prefix),
244                TOKEN_LINECOMMENT_BEGIN,
245                r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix),
246            )
247        )
248
249    return [x[1:] for x in sorted(rules, reverse=True)]
250
251
252class Failure:
253    """Class that raises a `TemplateSyntaxError` if called.
254    Used by the `Lexer` to specify known errors.
255    """
256
257    def __init__(
258        self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError
259    ) -> None:
260        self.message = message
261        self.error_class = cls
262
263    def __call__(self, lineno: int, filename: str) -> "te.NoReturn":
264        raise self.error_class(self.message, lineno, filename)
265
266
267class Token(t.NamedTuple):
268    lineno: int
269    type: str
270    value: str
271
272    def __str__(self) -> str:
273        return describe_token(self)
274
275    def test(self, expr: str) -> bool:
276        """Test a token against a token expression.  This can either be a
277        token type or ``'token_type:token_value'``.  This can only test
278        against string values and types.
279        """
280        # here we do a regular string equality check as test_any is usually
281        # passed an iterable of not interned strings.
282        if self.type == expr:
283            return True
284
285        if ":" in expr:
286            return expr.split(":", 1) == [self.type, self.value]
287
288        return False
289
290    def test_any(self, *iterable: str) -> bool:
291        """Test against multiple token expressions."""
292        return any(self.test(expr) for expr in iterable)
293
294
295class TokenStreamIterator:
296    """The iterator for tokenstreams.  Iterate over the stream
297    until the eof token is reached.
298    """
299
300    def __init__(self, stream: "TokenStream") -> None:
301        self.stream = stream
302
303    def __iter__(self) -> "TokenStreamIterator":
304        return self
305
306    def __next__(self) -> Token:
307        token = self.stream.current
308
309        if token.type is TOKEN_EOF:
310            self.stream.close()
311            raise StopIteration
312
313        next(self.stream)
314        return token
315
316
317class TokenStream:
318    """A token stream is an iterable that yields :class:`Token`\\s.  The
319    parser however does not iterate over it but calls :meth:`next` to go
320    one token ahead.  The current active token is stored as :attr:`current`.
321    """
322
323    def __init__(
324        self,
325        generator: t.Iterable[Token],
326        name: t.Optional[str],
327        filename: t.Optional[str],
328    ):
329        self._iter = iter(generator)
330        self._pushed: "te.Deque[Token]" = deque()
331        self.name = name
332        self.filename = filename
333        self.closed = False
334        self.current = Token(1, TOKEN_INITIAL, "")
335        next(self)
336
337    def __iter__(self) -> TokenStreamIterator:
338        return TokenStreamIterator(self)
339
340    def __bool__(self) -> bool:
341        return bool(self._pushed) or self.current.type is not TOKEN_EOF
342
343    @property
344    def eos(self) -> bool:
345        """Are we at the end of the stream?"""
346        return not self
347
348    def push(self, token: Token) -> None:
349        """Push a token back to the stream."""
350        self._pushed.append(token)
351
352    def look(self) -> Token:
353        """Look at the next token."""
354        old_token = next(self)
355        result = self.current
356        self.push(result)
357        self.current = old_token
358        return result
359
360    def skip(self, n: int = 1) -> None:
361        """Got n tokens ahead."""
362        for _ in range(n):
363            next(self)
364
365    def next_if(self, expr: str) -> t.Optional[Token]:
366        """Perform the token test and return the token if it matched.
367        Otherwise the return value is `None`.
368        """
369        if self.current.test(expr):
370            return next(self)
371
372        return None
373
374    def skip_if(self, expr: str) -> bool:
375        """Like :meth:`next_if` but only returns `True` or `False`."""
376        return self.next_if(expr) is not None
377
378    def __next__(self) -> Token:
379        """Go one token ahead and return the old one.
380
381        Use the built-in :func:`next` instead of calling this directly.
382        """
383        rv = self.current
384
385        if self._pushed:
386            self.current = self._pushed.popleft()
387        elif self.current.type is not TOKEN_EOF:
388            try:
389                self.current = next(self._iter)
390            except StopIteration:
391                self.close()
392
393        return rv
394
395    def close(self) -> None:
396        """Close the stream."""
397        self.current = Token(self.current.lineno, TOKEN_EOF, "")
398        self._iter = iter(())
399        self.closed = True
400
401    def expect(self, expr: str) -> Token:
402        """Expect a given token type and return it.  This accepts the same
403        argument as :meth:`jinja2.lexer.Token.test`.
404        """
405        if not self.current.test(expr):
406            expr = describe_token_expr(expr)
407
408            if self.current.type is TOKEN_EOF:
409                raise TemplateSyntaxError(
410                    f"unexpected end of template, expected {expr!r}.",
411                    self.current.lineno,
412                    self.name,
413                    self.filename,
414                )
415
416            raise TemplateSyntaxError(
417                f"expected token {expr!r}, got {describe_token(self.current)!r}",
418                self.current.lineno,
419                self.name,
420                self.filename,
421            )
422
423        return next(self)
424
425
426def get_lexer(environment: "Environment") -> "Lexer":
427    """Return a lexer which is probably cached."""
428    key = (
429        environment.block_start_string,
430        environment.block_end_string,
431        environment.variable_start_string,
432        environment.variable_end_string,
433        environment.comment_start_string,
434        environment.comment_end_string,
435        environment.line_statement_prefix,
436        environment.line_comment_prefix,
437        environment.trim_blocks,
438        environment.lstrip_blocks,
439        environment.newline_sequence,
440        environment.keep_trailing_newline,
441    )
442    lexer = _lexer_cache.get(key)
443
444    if lexer is None:
445        _lexer_cache[key] = lexer = Lexer(environment)
446
447    return lexer
448
449
450class OptionalLStrip(tuple):
451    """A special tuple for marking a point in the state that can have
452    lstrip applied.
453    """
454
455    __slots__ = ()
456
457    # Even though it looks like a no-op, creating instances fails
458    # without this.
459    def __new__(cls, *members, **kwargs):  # type: ignore
460        return super().__new__(cls, members)
461
462
463class _Rule(t.NamedTuple):
464    pattern: t.Pattern[str]
465    tokens: t.Union[str, t.Tuple[str, ...], t.Tuple[Failure]]
466    command: t.Optional[str]
467
468
469class Lexer:
470    """Class that implements a lexer for a given environment. Automatically
471    created by the environment class, usually you don't have to do that.
472
473    Note that the lexer is not automatically bound to an environment.
474    Multiple environments can share the same lexer.
475    """
476
477    def __init__(self, environment: "Environment") -> None:
478        # shortcuts
479        e = re.escape
480
481        def c(x: str) -> t.Pattern[str]:
482            return re.compile(x, re.M | re.S)
483
484        # lexing rules for tags
485        tag_rules: t.List[_Rule] = [
486            _Rule(whitespace_re, TOKEN_WHITESPACE, None),
487            _Rule(float_re, TOKEN_FLOAT, None),
488            _Rule(integer_re, TOKEN_INTEGER, None),
489            _Rule(name_re, TOKEN_NAME, None),
490            _Rule(string_re, TOKEN_STRING, None),
491            _Rule(operator_re, TOKEN_OPERATOR, None),
492        ]
493
494        # assemble the root lexing rule. because "|" is ungreedy
495        # we have to sort by length so that the lexer continues working
496        # as expected when we have parsing rules like <% for block and
497        # <%= for variables. (if someone wants asp like syntax)
498        # variables are just part of the rules if variable processing
499        # is required.
500        root_tag_rules = compile_rules(environment)
501
502        block_start_re = e(environment.block_start_string)
503        block_end_re = e(environment.block_end_string)
504        comment_end_re = e(environment.comment_end_string)
505        variable_end_re = e(environment.variable_end_string)
506
507        # block suffix if trimming is enabled
508        block_suffix_re = "\\n?" if environment.trim_blocks else ""
509
510        # If lstrip is enabled, it should not be applied if there is any
511        # non-whitespace between the newline and block.
512        self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None
513
514        self.newline_sequence = environment.newline_sequence
515        self.keep_trailing_newline = environment.keep_trailing_newline
516
517        root_raw_re = (
518            fr"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*"
519            fr"(?:\-{block_end_re}\s*|{block_end_re}))"
520        )
521        root_parts_re = "|".join(
522            [root_raw_re] + [fr"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules]
523        )
524
525        # global lexing rules
526        self.rules: t.Dict[str, t.List[_Rule]] = {
527            "root": [
528                # directives
529                _Rule(
530                    c(fr"(.*?)(?:{root_parts_re})"),
531                    OptionalLStrip(TOKEN_DATA, "#bygroup"),  # type: ignore
532                    "#bygroup",
533                ),
534                # data
535                _Rule(c(".+"), TOKEN_DATA, None),
536            ],
537            # comments
538            TOKEN_COMMENT_BEGIN: [
539                _Rule(
540                    c(
541                        fr"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*"
542                        fr"|{comment_end_re}{block_suffix_re}))"
543                    ),
544                    (TOKEN_COMMENT, TOKEN_COMMENT_END),
545                    "#pop",
546                ),
547                _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None),
548            ],
549            # blocks
550            TOKEN_BLOCK_BEGIN: [
551                _Rule(
552                    c(
553                        fr"(?:\+{block_end_re}|\-{block_end_re}\s*"
554                        fr"|{block_end_re}{block_suffix_re})"
555                    ),
556                    TOKEN_BLOCK_END,
557                    "#pop",
558                ),
559            ]
560            + tag_rules,
561            # variables
562            TOKEN_VARIABLE_BEGIN: [
563                _Rule(
564                    c(fr"\-{variable_end_re}\s*|{variable_end_re}"),
565                    TOKEN_VARIABLE_END,
566                    "#pop",
567                )
568            ]
569            + tag_rules,
570            # raw block
571            TOKEN_RAW_BEGIN: [
572                _Rule(
573                    c(
574                        fr"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*"
575                        fr"(?:\+{block_end_re}|\-{block_end_re}\s*"
576                        fr"|{block_end_re}{block_suffix_re}))"
577                    ),
578                    OptionalLStrip(TOKEN_DATA, TOKEN_RAW_END),  # type: ignore
579                    "#pop",
580                ),
581                _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None),
582            ],
583            # line statements
584            TOKEN_LINESTATEMENT_BEGIN: [
585                _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop")
586            ]
587            + tag_rules,
588            # line comments
589            TOKEN_LINECOMMENT_BEGIN: [
590                _Rule(
591                    c(r"(.*?)()(?=\n|$)"),
592                    (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END),
593                    "#pop",
594                )
595            ],
596        }
597
598    def _normalize_newlines(self, value: str) -> str:
599        """Replace all newlines with the configured sequence in strings
600        and template data.
601        """
602        return newline_re.sub(self.newline_sequence, value)
603
604    def tokenize(
605        self,
606        source: str,
607        name: t.Optional[str] = None,
608        filename: t.Optional[str] = None,
609        state: t.Optional[str] = None,
610    ) -> TokenStream:
611        """Calls tokeniter + tokenize and wraps it in a token stream."""
612        stream = self.tokeniter(source, name, filename, state)
613        return TokenStream(self.wrap(stream, name, filename), name, filename)
614
615    def wrap(
616        self,
617        stream: t.Iterable[t.Tuple[int, str, str]],
618        name: t.Optional[str] = None,
619        filename: t.Optional[str] = None,
620    ) -> t.Iterator[Token]:
621        """This is called with the stream as returned by `tokenize` and wraps
622        every token in a :class:`Token` and converts the value.
623        """
624        for lineno, token, value_str in stream:
625            if token in ignored_tokens:
626                continue
627
628            value: t.Any = value_str
629
630            if token == TOKEN_LINESTATEMENT_BEGIN:
631                token = TOKEN_BLOCK_BEGIN
632            elif token == TOKEN_LINESTATEMENT_END:
633                token = TOKEN_BLOCK_END
634            # we are not interested in those tokens in the parser
635            elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
636                continue
637            elif token == TOKEN_DATA:
638                value = self._normalize_newlines(value_str)
639            elif token == "keyword":
640                token = value_str
641            elif token == TOKEN_NAME:
642                value = value_str
643
644                if not value.isidentifier():
645                    raise TemplateSyntaxError(
646                        "Invalid character in identifier", lineno, name, filename
647                    )
648            elif token == TOKEN_STRING:
649                # try to unescape string
650                try:
651                    value = (
652                        self._normalize_newlines(value_str[1:-1])
653                        .encode("ascii", "backslashreplace")
654                        .decode("unicode-escape")
655                    )
656                except Exception as e:
657                    msg = str(e).split(":")[-1].strip()
658                    raise TemplateSyntaxError(msg, lineno, name, filename)
659            elif token == TOKEN_INTEGER:
660                value = int(value_str.replace("_", ""), 0)
661            elif token == TOKEN_FLOAT:
662                # remove all "_" first to support more Python versions
663                value = literal_eval(value_str.replace("_", ""))
664            elif token == TOKEN_OPERATOR:
665                token = operators[value_str]
666
667            yield Token(lineno, token, value)
668
669    def tokeniter(
670        self,
671        source: str,
672        name: t.Optional[str],
673        filename: t.Optional[str] = None,
674        state: t.Optional[str] = None,
675    ) -> t.Iterator[t.Tuple[int, str, str]]:
676        """This method tokenizes the text and returns the tokens in a
677        generator. Use this method if you just want to tokenize a template.
678
679        .. versionchanged:: 3.0
680            Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
681            breaks.
682        """
683        lines = newline_re.split(source)[::2]
684
685        if not self.keep_trailing_newline and lines[-1] == "":
686            del lines[-1]
687
688        source = "\n".join(lines)
689        pos = 0
690        lineno = 1
691        stack = ["root"]
692
693        if state is not None and state != "root":
694            assert state in ("variable", "block"), "invalid state"
695            stack.append(state + "_begin")
696
697        statetokens = self.rules[stack[-1]]
698        source_length = len(source)
699        balancing_stack: t.List[str] = []
700        lstrip_unless_re = self.lstrip_unless_re
701        newlines_stripped = 0
702        line_starting = True
703
704        while True:
705            # tokenizer loop
706            for regex, tokens, new_state in statetokens:
707                m = regex.match(source, pos)
708
709                # if no match we try again with the next rule
710                if m is None:
711                    continue
712
713                # we only match blocks and variables if braces / parentheses
714                # are balanced. continue parsing with the lower rule which
715                # is the operator rule. do this only if the end tags look
716                # like operators
717                if balancing_stack and tokens in (
718                    TOKEN_VARIABLE_END,
719                    TOKEN_BLOCK_END,
720                    TOKEN_LINESTATEMENT_END,
721                ):
722                    continue
723
724                # tuples support more options
725                if isinstance(tokens, tuple):
726                    groups = m.groups()
727
728                    if isinstance(tokens, OptionalLStrip):
729                        # Rule supports lstrip. Match will look like
730                        # text, block type, whitespace control, type, control, ...
731                        text = groups[0]
732                        # Skipping the text and first type, every other group is the
733                        # whitespace control for each type. One of the groups will be
734                        # -, +, or empty string instead of None.
735                        strip_sign = next(g for g in groups[2::2] if g is not None)
736
737                        if strip_sign == "-":
738                            # Strip all whitespace between the text and the tag.
739                            stripped = text.rstrip()
740                            newlines_stripped = text[len(stripped) :].count("\n")
741                            groups = [stripped, *groups[1:]]
742                        elif (
743                            # Not marked for preserving whitespace.
744                            strip_sign != "+"
745                            # lstrip is enabled.
746                            and lstrip_unless_re is not None
747                            # Not a variable expression.
748                            and not m.groupdict().get(TOKEN_VARIABLE_BEGIN)
749                        ):
750                            # The start of text between the last newline and the tag.
751                            l_pos = text.rfind("\n") + 1
752
753                            if l_pos > 0 or line_starting:
754                                # If there's only whitespace between the newline and the
755                                # tag, strip it.
756                                if not lstrip_unless_re.search(text, l_pos):
757                                    groups = [text[:l_pos], *groups[1:]]
758
759                    for idx, token in enumerate(tokens):
760                        # failure group
761                        if token.__class__ is Failure:
762                            raise token(lineno, filename)
763                        # bygroup is a bit more complex, in that case we
764                        # yield for the current token the first named
765                        # group that matched
766                        elif token == "#bygroup":
767                            for key, value in m.groupdict().items():
768                                if value is not None:
769                                    yield lineno, key, value
770                                    lineno += value.count("\n")
771                                    break
772                            else:
773                                raise RuntimeError(
774                                    f"{regex!r} wanted to resolve the token dynamically"
775                                    " but no group matched"
776                                )
777                        # normal group
778                        else:
779                            data = groups[idx]
780
781                            if data or token not in ignore_if_empty:
782                                yield lineno, token, data
783
784                            lineno += data.count("\n") + newlines_stripped
785                            newlines_stripped = 0
786
787                # strings as token just are yielded as it.
788                else:
789                    data = m.group()
790
791                    # update brace/parentheses balance
792                    if tokens == TOKEN_OPERATOR:
793                        if data == "{":
794                            balancing_stack.append("}")
795                        elif data == "(":
796                            balancing_stack.append(")")
797                        elif data == "[":
798                            balancing_stack.append("]")
799                        elif data in ("}", ")", "]"):
800                            if not balancing_stack:
801                                raise TemplateSyntaxError(
802                                    f"unexpected '{data}'", lineno, name, filename
803                                )
804
805                            expected_op = balancing_stack.pop()
806
807                            if expected_op != data:
808                                raise TemplateSyntaxError(
809                                    f"unexpected '{data}', expected '{expected_op}'",
810                                    lineno,
811                                    name,
812                                    filename,
813                                )
814
815                    # yield items
816                    if data or tokens not in ignore_if_empty:
817                        yield lineno, tokens, data
818
819                    lineno += data.count("\n")
820
821                line_starting = m.group()[-1:] == "\n"
822                # fetch new position into new variable so that we can check
823                # if there is a internal parsing error which would result
824                # in an infinite loop
825                pos2 = m.end()
826
827                # handle state changes
828                if new_state is not None:
829                    # remove the uppermost state
830                    if new_state == "#pop":
831                        stack.pop()
832                    # resolve the new state by group checking
833                    elif new_state == "#bygroup":
834                        for key, value in m.groupdict().items():
835                            if value is not None:
836                                stack.append(key)
837                                break
838                        else:
839                            raise RuntimeError(
840                                f"{regex!r} wanted to resolve the new state dynamically"
841                                f" but no group matched"
842                            )
843                    # direct state name given
844                    else:
845                        stack.append(new_state)
846
847                    statetokens = self.rules[stack[-1]]
848                # we are still at the same position and no stack change.
849                # this means a loop without break condition, avoid that and
850                # raise error
851                elif pos2 == pos:
852                    raise RuntimeError(
853                        f"{regex!r} yielded empty string without stack change"
854                    )
855
856                # publish new function and start again
857                pos = pos2
858                break
859            # if loop terminated without break we haven't found a single match
860            # either we are at the end of the file or we have a problem
861            else:
862                # end of text
863                if pos >= source_length:
864                    return
865
866                # something went wrong
867                raise TemplateSyntaxError(
868                    f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename
869                )
870