1# -*- coding: utf-8 -*-
2"""
3    jinja2.lexer
4    ~~~~~~~~~~~~
5
6    This module implements a Jinja / Python combination lexer. The
7    `Lexer` class provided by this module is used to do some preprocessing
8    for Jinja.
9
10    On the one hand it filters out invalid operators like the bitshift
11    operators we don't allow in templates. On the other hand it separates
12    template code and python code in expressions.
13
14    :copyright: (c) 2017 by the Jinja Team.
15    :license: BSD, see LICENSE for more details.
16"""
17import re
18from collections import deque
19from operator import itemgetter
20
21from jinja2._compat import implements_iterator, intern, iteritems, text_type
22from jinja2.exceptions import TemplateSyntaxError
23from jinja2.utils import LRUCache
24
25# cache for the lexers. Exists in order to be able to have multiple
26# environments with the same lexer
27_lexer_cache = LRUCache(50)
28
29# static regular expressions
30whitespace_re = re.compile(r'\s+', re.U)
31string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
32                       r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
33integer_re = re.compile(r'\d+')
34
35try:
36    # check if this Python supports Unicode identifiers
37    compile('föö', '<unknown>', 'eval')
38except SyntaxError:
39    # no Unicode support, use ASCII identifiers
40    name_re = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')
41    check_ident = False
42else:
43    # Unicode support, build a pattern to match valid characters, and set flag
44    # to use str.isidentifier to validate during lexing
45    from jinja2 import _identifier
46    name_re = re.compile(r'[\w{0}]+'.format(_identifier.pattern))
47    check_ident = True
48    # remove the pattern from memory after building the regex
49    import sys
50    del sys.modules['jinja2._identifier']
51    import jinja2
52    del jinja2._identifier
53    del _identifier
54
55float_re = re.compile(r'(?<!\.)\d+\.\d+')
56newline_re = re.compile(r'(\r\n|\r|\n)')
57
58# internal the tokens and keep references to them
59TOKEN_ADD = intern('add')
60TOKEN_ASSIGN = intern('assign')
61TOKEN_COLON = intern('colon')
62TOKEN_COMMA = intern('comma')
63TOKEN_DIV = intern('div')
64TOKEN_DOT = intern('dot')
65TOKEN_EQ = intern('eq')
66TOKEN_FLOORDIV = intern('floordiv')
67TOKEN_GT = intern('gt')
68TOKEN_GTEQ = intern('gteq')
69TOKEN_LBRACE = intern('lbrace')
70TOKEN_LBRACKET = intern('lbracket')
71TOKEN_LPAREN = intern('lparen')
72TOKEN_LT = intern('lt')
73TOKEN_LTEQ = intern('lteq')
74TOKEN_MOD = intern('mod')
75TOKEN_MUL = intern('mul')
76TOKEN_NE = intern('ne')
77TOKEN_PIPE = intern('pipe')
78TOKEN_POW = intern('pow')
79TOKEN_RBRACE = intern('rbrace')
80TOKEN_RBRACKET = intern('rbracket')
81TOKEN_RPAREN = intern('rparen')
82TOKEN_SEMICOLON = intern('semicolon')
83TOKEN_SUB = intern('sub')
84TOKEN_TILDE = intern('tilde')
85TOKEN_WHITESPACE = intern('whitespace')
86TOKEN_FLOAT = intern('float')
87TOKEN_INTEGER = intern('integer')
88TOKEN_NAME = intern('name')
89TOKEN_STRING = intern('string')
90TOKEN_OPERATOR = intern('operator')
91TOKEN_BLOCK_BEGIN = intern('block_begin')
92TOKEN_BLOCK_END = intern('block_end')
93TOKEN_VARIABLE_BEGIN = intern('variable_begin')
94TOKEN_VARIABLE_END = intern('variable_end')
95TOKEN_RAW_BEGIN = intern('raw_begin')
96TOKEN_RAW_END = intern('raw_end')
97TOKEN_COMMENT_BEGIN = intern('comment_begin')
98TOKEN_COMMENT_END = intern('comment_end')
99TOKEN_COMMENT = intern('comment')
100TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
101TOKEN_LINESTATEMENT_END = intern('linestatement_end')
102TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
103TOKEN_LINECOMMENT_END = intern('linecomment_end')
104TOKEN_LINECOMMENT = intern('linecomment')
105TOKEN_DATA = intern('data')
106TOKEN_INITIAL = intern('initial')
107TOKEN_EOF = intern('eof')
108
109# bind operators to token types
110operators = {
111    '+':            TOKEN_ADD,
112    '-':            TOKEN_SUB,
113    '/':            TOKEN_DIV,
114    '//':           TOKEN_FLOORDIV,
115    '*':            TOKEN_MUL,
116    '%':            TOKEN_MOD,
117    '**':           TOKEN_POW,
118    '~':            TOKEN_TILDE,
119    '[':            TOKEN_LBRACKET,
120    ']':            TOKEN_RBRACKET,
121    '(':            TOKEN_LPAREN,
122    ')':            TOKEN_RPAREN,
123    '{':            TOKEN_LBRACE,
124    '}':            TOKEN_RBRACE,
125    '==':           TOKEN_EQ,
126    '!=':           TOKEN_NE,
127    '>':            TOKEN_GT,
128    '>=':           TOKEN_GTEQ,
129    '<':            TOKEN_LT,
130    '<=':           TOKEN_LTEQ,
131    '=':            TOKEN_ASSIGN,
132    '.':            TOKEN_DOT,
133    ':':            TOKEN_COLON,
134    '|':            TOKEN_PIPE,
135    ',':            TOKEN_COMMA,
136    ';':            TOKEN_SEMICOLON
137}
138
139reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
140assert len(operators) == len(reverse_operators), 'operators dropped'
141operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
142                         sorted(operators, key=lambda x: -len(x))))
143
144ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
145                            TOKEN_COMMENT_END, TOKEN_WHITESPACE,
146                            TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END,
147                            TOKEN_LINECOMMENT])
148ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
149                             TOKEN_COMMENT, TOKEN_LINECOMMENT])
150
151
152def _describe_token_type(token_type):
153    if token_type in reverse_operators:
154        return reverse_operators[token_type]
155    return {
156        TOKEN_COMMENT_BEGIN:        'begin of comment',
157        TOKEN_COMMENT_END:          'end of comment',
158        TOKEN_COMMENT:              'comment',
159        TOKEN_LINECOMMENT:          'comment',
160        TOKEN_BLOCK_BEGIN:          'begin of statement block',
161        TOKEN_BLOCK_END:            'end of statement block',
162        TOKEN_VARIABLE_BEGIN:       'begin of print statement',
163        TOKEN_VARIABLE_END:         'end of print statement',
164        TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
165        TOKEN_LINESTATEMENT_END:    'end of line statement',
166        TOKEN_DATA:                 'template data / text',
167        TOKEN_EOF:                  'end of template'
168    }.get(token_type, token_type)
169
170
171def describe_token(token):
172    """Returns a description of the token."""
173    if token.type == 'name':
174        return token.value
175    return _describe_token_type(token.type)
176
177
178def describe_token_expr(expr):
179    """Like `describe_token` but for token expressions."""
180    if ':' in expr:
181        type, value = expr.split(':', 1)
182        if type == 'name':
183            return value
184    else:
185        type = expr
186    return _describe_token_type(type)
187
188
189def count_newlines(value):
190    """Count the number of newline characters in the string.  This is
191    useful for extensions that filter a stream.
192    """
193    return len(newline_re.findall(value))
194
195
196def compile_rules(environment):
197    """Compiles all the rules from the environment into a list of rules."""
198    e = re.escape
199    rules = [
200        (len(environment.comment_start_string), 'comment',
201         e(environment.comment_start_string)),
202        (len(environment.block_start_string), 'block',
203         e(environment.block_start_string)),
204        (len(environment.variable_start_string), 'variable',
205         e(environment.variable_start_string))
206    ]
207
208    if environment.line_statement_prefix is not None:
209        rules.append((len(environment.line_statement_prefix), 'linestatement',
210                      r'^[ \t\v]*' + e(environment.line_statement_prefix)))
211    if environment.line_comment_prefix is not None:
212        rules.append((len(environment.line_comment_prefix), 'linecomment',
213                      r'(?:^|(?<=\S))[^\S\r\n]*' +
214                      e(environment.line_comment_prefix)))
215
216    return [x[1:] for x in sorted(rules, reverse=True)]
217
218
219class Failure(object):
220    """Class that raises a `TemplateSyntaxError` if called.
221    Used by the `Lexer` to specify known errors.
222    """
223
224    def __init__(self, message, cls=TemplateSyntaxError):
225        self.message = message
226        self.error_class = cls
227
228    def __call__(self, lineno, filename):
229        raise self.error_class(self.message, lineno, filename)
230
231
232class Token(tuple):
233    """Token class."""
234    __slots__ = ()
235    lineno, type, value = (property(itemgetter(x)) for x in range(3))
236
237    def __new__(cls, lineno, type, value):
238        return tuple.__new__(cls, (lineno, intern(str(type)), value))
239
240    def __str__(self):
241        if self.type in reverse_operators:
242            return reverse_operators[self.type]
243        elif self.type == 'name':
244            return self.value
245        return self.type
246
247    def test(self, expr):
248        """Test a token against a token expression.  This can either be a
249        token type or ``'token_type:token_value'``.  This can only test
250        against string values and types.
251        """
252        # here we do a regular string equality check as test_any is usually
253        # passed an iterable of not interned strings.
254        if self.type == expr:
255            return True
256        elif ':' in expr:
257            return expr.split(':', 1) == [self.type, self.value]
258        return False
259
260    def test_any(self, *iterable):
261        """Test against multiple token expressions."""
262        for expr in iterable:
263            if self.test(expr):
264                return True
265        return False
266
267    def __repr__(self):
268        return 'Token(%r, %r, %r)' % (
269            self.lineno,
270            self.type,
271            self.value
272        )
273
274
275@implements_iterator
276class TokenStreamIterator(object):
277    """The iterator for tokenstreams.  Iterate over the stream
278    until the eof token is reached.
279    """
280
281    def __init__(self, stream):
282        self.stream = stream
283
284    def __iter__(self):
285        return self
286
287    def __next__(self):
288        token = self.stream.current
289        if token.type is TOKEN_EOF:
290            self.stream.close()
291            raise StopIteration()
292        next(self.stream)
293        return token
294
295
296@implements_iterator
297class TokenStream(object):
298    """A token stream is an iterable that yields :class:`Token`\\s.  The
299    parser however does not iterate over it but calls :meth:`next` to go
300    one token ahead.  The current active token is stored as :attr:`current`.
301    """
302
303    def __init__(self, generator, name, filename):
304        self._iter = iter(generator)
305        self._pushed = deque()
306        self.name = name
307        self.filename = filename
308        self.closed = False
309        self.current = Token(1, TOKEN_INITIAL, '')
310        next(self)
311
312    def __iter__(self):
313        return TokenStreamIterator(self)
314
315    def __bool__(self):
316        return bool(self._pushed) or self.current.type is not TOKEN_EOF
317    __nonzero__ = __bool__  # py2
318
319    eos = property(lambda x: not x, doc="Are we at the end of the stream?")
320
321    def push(self, token):
322        """Push a token back to the stream."""
323        self._pushed.append(token)
324
325    def look(self):
326        """Look at the next token."""
327        old_token = next(self)
328        result = self.current
329        self.push(result)
330        self.current = old_token
331        return result
332
333    def skip(self, n=1):
334        """Got n tokens ahead."""
335        for x in range(n):
336            next(self)
337
338    def next_if(self, expr):
339        """Perform the token test and return the token if it matched.
340        Otherwise the return value is `None`.
341        """
342        if self.current.test(expr):
343            return next(self)
344
345    def skip_if(self, expr):
346        """Like :meth:`next_if` but only returns `True` or `False`."""
347        return self.next_if(expr) is not None
348
349    def __next__(self):
350        """Go one token ahead and return the old one.
351
352        Use the built-in :func:`next` instead of calling this directly.
353        """
354        rv = self.current
355        if self._pushed:
356            self.current = self._pushed.popleft()
357        elif self.current.type is not TOKEN_EOF:
358            try:
359                self.current = next(self._iter)
360            except StopIteration:
361                self.close()
362        return rv
363
364    def close(self):
365        """Close the stream."""
366        self.current = Token(self.current.lineno, TOKEN_EOF, '')
367        self._iter = None
368        self.closed = True
369
370    def expect(self, expr):
371        """Expect a given token type and return it.  This accepts the same
372        argument as :meth:`jinja2.lexer.Token.test`.
373        """
374        if not self.current.test(expr):
375            expr = describe_token_expr(expr)
376            if self.current.type is TOKEN_EOF:
377                raise TemplateSyntaxError('unexpected end of template, '
378                                          'expected %r.' % expr,
379                                          self.current.lineno,
380                                          self.name, self.filename)
381            raise TemplateSyntaxError("expected token %r, got %r" %
382                                      (expr, describe_token(self.current)),
383                                      self.current.lineno,
384                                      self.name, self.filename)
385        try:
386            return self.current
387        finally:
388            next(self)
389
390
391def get_lexer(environment):
392    """Return a lexer which is probably cached."""
393    key = (environment.block_start_string,
394           environment.block_end_string,
395           environment.variable_start_string,
396           environment.variable_end_string,
397           environment.comment_start_string,
398           environment.comment_end_string,
399           environment.line_statement_prefix,
400           environment.line_comment_prefix,
401           environment.trim_blocks,
402           environment.lstrip_blocks,
403           environment.newline_sequence,
404           environment.keep_trailing_newline)
405    lexer = _lexer_cache.get(key)
406    if lexer is None:
407        lexer = Lexer(environment)
408        _lexer_cache[key] = lexer
409    return lexer
410
411
412class Lexer(object):
413    """Class that implements a lexer for a given environment. Automatically
414    created by the environment class, usually you don't have to do that.
415
416    Note that the lexer is not automatically bound to an environment.
417    Multiple environments can share the same lexer.
418    """
419
420    def __init__(self, environment):
421        # shortcuts
422        c = lambda x: re.compile(x, re.M | re.S)
423        e = re.escape
424
425        # lexing rules for tags
426        tag_rules = [
427            (whitespace_re, TOKEN_WHITESPACE, None),
428            (float_re, TOKEN_FLOAT, None),
429            (integer_re, TOKEN_INTEGER, None),
430            (name_re, TOKEN_NAME, None),
431            (string_re, TOKEN_STRING, None),
432            (operator_re, TOKEN_OPERATOR, None)
433        ]
434
435        # assemble the root lexing rule. because "|" is ungreedy
436        # we have to sort by length so that the lexer continues working
437        # as expected when we have parsing rules like <% for block and
438        # <%= for variables. (if someone wants asp like syntax)
439        # variables are just part of the rules if variable processing
440        # is required.
441        root_tag_rules = compile_rules(environment)
442
443        # block suffix if trimming is enabled
444        block_suffix_re = environment.trim_blocks and '\\n?' or ''
445
446        # strip leading spaces if lstrip_blocks is enabled
447        prefix_re = {}
448        if environment.lstrip_blocks:
449            # use '{%+' to manually disable lstrip_blocks behavior
450            no_lstrip_re = e('+')
451            # detect overlap between block and variable or comment strings
452            block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
453            # make sure we don't mistake a block for a variable or a comment
454            m = block_diff.match(environment.comment_start_string)
455            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
456            m = block_diff.match(environment.variable_start_string)
457            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
458
459            # detect overlap between comment and variable strings
460            comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
461            m = comment_diff.match(environment.variable_start_string)
462            no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
463
464            lstrip_re = r'^[ \t]*'
465            block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
466                    lstrip_re,
467                    e(environment.block_start_string),
468                    no_lstrip_re,
469                    e(environment.block_start_string),
470                    )
471            comment_prefix_re = r'%s%s%s|%s\+?' % (
472                    lstrip_re,
473                    e(environment.comment_start_string),
474                    no_variable_re,
475                    e(environment.comment_start_string),
476                    )
477            prefix_re['block'] = block_prefix_re
478            prefix_re['comment'] = comment_prefix_re
479        else:
480            block_prefix_re = '%s' % e(environment.block_start_string)
481
482        self.newline_sequence = environment.newline_sequence
483        self.keep_trailing_newline = environment.keep_trailing_newline
484
485        # global lexing rules
486        self.rules = {
487            'root': [
488                # directives
489                (c('(.*?)(?:%s)' % '|'.join(
490                    [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
491                        e(environment.block_start_string),
492                        block_prefix_re,
493                        e(environment.block_end_string),
494                        e(environment.block_end_string)
495                    )] + [
496                        r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
497                        for n, r in root_tag_rules
498                    ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
499                # data
500                (c('.+'), TOKEN_DATA, None)
501            ],
502            # comments
503            TOKEN_COMMENT_BEGIN: [
504                (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
505                    e(environment.comment_end_string),
506                    e(environment.comment_end_string),
507                    block_suffix_re
508                )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
509                (c('(.)'), (Failure('Missing end of comment tag'),), None)
510            ],
511            # blocks
512            TOKEN_BLOCK_BEGIN: [
513                (c(r'(?:\-%s\s*|%s)%s' % (
514                    e(environment.block_end_string),
515                    e(environment.block_end_string),
516                    block_suffix_re
517                )), TOKEN_BLOCK_END, '#pop'),
518            ] + tag_rules,
519            # variables
520            TOKEN_VARIABLE_BEGIN: [
521                (c(r'\-%s\s*|%s' % (
522                    e(environment.variable_end_string),
523                    e(environment.variable_end_string)
524                )), TOKEN_VARIABLE_END, '#pop')
525            ] + tag_rules,
526            # raw block
527            TOKEN_RAW_BEGIN: [
528                (c(r'(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
529                    e(environment.block_start_string),
530                    block_prefix_re,
531                    e(environment.block_end_string),
532                    e(environment.block_end_string),
533                    block_suffix_re
534                )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
535                (c('(.)'), (Failure('Missing end of raw directive'),), None)
536            ],
537            # line statements
538            TOKEN_LINESTATEMENT_BEGIN: [
539                (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
540            ] + tag_rules,
541            # line comments
542            TOKEN_LINECOMMENT_BEGIN: [
543                (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
544                 TOKEN_LINECOMMENT_END), '#pop')
545            ]
546        }
547
548    def _normalize_newlines(self, value):
549        """Called for strings and template data to normalize it to unicode."""
550        return newline_re.sub(self.newline_sequence, value)
551
552    def tokenize(self, source, name=None, filename=None, state=None):
553        """Calls tokeniter + tokenize and wraps it in a token stream.
554        """
555        stream = self.tokeniter(source, name, filename, state)
556        return TokenStream(self.wrap(stream, name, filename), name, filename)
557
558    def wrap(self, stream, name=None, filename=None):
559        """This is called with the stream as returned by `tokenize` and wraps
560        every token in a :class:`Token` and converts the value.
561        """
562        for lineno, token, value in stream:
563            if token in ignored_tokens:
564                continue
565            elif token == 'linestatement_begin':
566                token = 'block_begin'
567            elif token == 'linestatement_end':
568                token = 'block_end'
569            # we are not interested in those tokens in the parser
570            elif token in ('raw_begin', 'raw_end'):
571                continue
572            elif token == 'data':
573                value = self._normalize_newlines(value)
574            elif token == 'keyword':
575                token = value
576            elif token == 'name':
577                value = str(value)
578                if check_ident and not value.isidentifier():
579                    raise TemplateSyntaxError(
580                        'Invalid character in identifier',
581                        lineno, name, filename)
582            elif token == 'string':
583                # try to unescape string
584                try:
585                    value = self._normalize_newlines(value[1:-1]) \
586                        .encode('ascii', 'backslashreplace') \
587                        .decode('unicode-escape')
588                except Exception as e:
589                    msg = str(e).split(':')[-1].strip()
590                    raise TemplateSyntaxError(msg, lineno, name, filename)
591            elif token == 'integer':
592                value = int(value)
593            elif token == 'float':
594                value = float(value)
595            elif token == 'operator':
596                token = operators[value]
597            yield Token(lineno, token, value)
598
599    def tokeniter(self, source, name, filename=None, state=None):
600        """This method tokenizes the text and returns the tokens in a
601        generator.  Use this method if you just want to tokenize a template.
602        """
603        source = text_type(source)
604        lines = source.splitlines()
605        if self.keep_trailing_newline and source:
606            for newline in ('\r\n', '\r', '\n'):
607                if source.endswith(newline):
608                    lines.append('')
609                    break
610        source = '\n'.join(lines)
611        pos = 0
612        lineno = 1
613        stack = ['root']
614        if state is not None and state != 'root':
615            assert state in ('variable', 'block'), 'invalid state'
616            stack.append(state + '_begin')
617        else:
618            state = 'root'
619        statetokens = self.rules[stack[-1]]
620        source_length = len(source)
621
622        balancing_stack = []
623
624        while 1:
625            # tokenizer loop
626            for regex, tokens, new_state in statetokens:
627                m = regex.match(source, pos)
628                # if no match we try again with the next rule
629                if m is None:
630                    continue
631
632                # we only match blocks and variables if braces / parentheses
633                # are balanced. continue parsing with the lower rule which
634                # is the operator rule. do this only if the end tags look
635                # like operators
636                if balancing_stack and \
637                   tokens in ('variable_end', 'block_end',
638                              'linestatement_end'):
639                    continue
640
641                # tuples support more options
642                if isinstance(tokens, tuple):
643                    for idx, token in enumerate(tokens):
644                        # failure group
645                        if token.__class__ is Failure:
646                            raise token(lineno, filename)
647                        # bygroup is a bit more complex, in that case we
648                        # yield for the current token the first named
649                        # group that matched
650                        elif token == '#bygroup':
651                            for key, value in iteritems(m.groupdict()):
652                                if value is not None:
653                                    yield lineno, key, value
654                                    lineno += value.count('\n')
655                                    break
656                            else:
657                                raise RuntimeError('%r wanted to resolve '
658                                                   'the token dynamically'
659                                                   ' but no group matched'
660                                                   % regex)
661                        # normal group
662                        else:
663                            data = m.group(idx + 1)
664                            if data or token not in ignore_if_empty:
665                                yield lineno, token, data
666                            lineno += data.count('\n')
667
668                # strings as token just are yielded as it.
669                else:
670                    data = m.group()
671                    # update brace/parentheses balance
672                    if tokens == 'operator':
673                        if data == '{':
674                            balancing_stack.append('}')
675                        elif data == '(':
676                            balancing_stack.append(')')
677                        elif data == '[':
678                            balancing_stack.append(']')
679                        elif data in ('}', ')', ']'):
680                            if not balancing_stack:
681                                raise TemplateSyntaxError('unexpected \'%s\'' %
682                                                          data, lineno, name,
683                                                          filename)
684                            expected_op = balancing_stack.pop()
685                            if expected_op != data:
686                                raise TemplateSyntaxError('unexpected \'%s\', '
687                                                          'expected \'%s\'' %
688                                                          (data, expected_op),
689                                                          lineno, name,
690                                                          filename)
691                    # yield items
692                    if data or tokens not in ignore_if_empty:
693                        yield lineno, tokens, data
694                    lineno += data.count('\n')
695
696                # fetch new position into new variable so that we can check
697                # if there is a internal parsing error which would result
698                # in an infinite loop
699                pos2 = m.end()
700
701                # handle state changes
702                if new_state is not None:
703                    # remove the uppermost state
704                    if new_state == '#pop':
705                        stack.pop()
706                    # resolve the new state by group checking
707                    elif new_state == '#bygroup':
708                        for key, value in iteritems(m.groupdict()):
709                            if value is not None:
710                                stack.append(key)
711                                break
712                        else:
713                            raise RuntimeError('%r wanted to resolve the '
714                                               'new state dynamically but'
715                                               ' no group matched' %
716                                               regex)
717                    # direct state name given
718                    else:
719                        stack.append(new_state)
720                    statetokens = self.rules[stack[-1]]
721                # we are still at the same position and no stack change.
722                # this means a loop without break condition, avoid that and
723                # raise error
724                elif pos2 == pos:
725                    raise RuntimeError('%r yielded empty string without '
726                                       'stack change' % regex)
727                # publish new function and start again
728                pos = pos2
729                break
730            # if loop terminated without break we haven't found a single match
731            # either we are at the end of the file or we have a problem
732            else:
733                # end of text
734                if pos >= source_length:
735                    return
736                # something went wrong
737                raise TemplateSyntaxError('unexpected char %r at %d' %
738                                          (source[pos], pos), lineno,
739                                          name, filename)
740