1# -*- coding: utf-8 -*-
2"""
3    jinja2.lexer
4    ~~~~~~~~~~~~
5
6    This module implements a Jinja / Python combination lexer. The
7    `Lexer` class provided by this module is used to do some preprocessing
8    for Jinja.
9
10    On the one hand it filters out invalid operators like the bitshift
11    operators we don't allow in templates. On the other hand it separates
12    template code and python code in expressions.
13
14    :copyright: (c) 2017 by the Jinja Team.
15    :license: BSD, see LICENSE for more details.
16"""
17import re
18import sys
19
20from operator import itemgetter
21from collections import deque
22from jinja2.exceptions import TemplateSyntaxError
23from jinja2.utils import LRUCache
24from jinja2._compat import iteritems, implements_iterator, text_type, intern
25
26
27# cache for the lexers. Exists in order to be able to have multiple
28# environments with the same lexer
29_lexer_cache = LRUCache(50)
30
31# static regular expressions
32whitespace_re = re.compile(r'\s+', re.U)
33string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
34                       r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
35integer_re = re.compile(r'\d+')
36
37def _make_name_re():
38    try:
39        compile('föö', '<unknown>', 'eval')
40    except SyntaxError:
41        return re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
42
43    import jinja2
44    from jinja2 import _stringdefs
45    name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
46                                         _stringdefs.xid_continue))
47
48    # Save some memory here
49    sys.modules.pop('jinja2._stringdefs')
50    del _stringdefs
51    del jinja2._stringdefs
52
53    return name_re
54
55# we use the unicode identifier rule if this python version is able
56# to handle unicode identifiers, otherwise the standard ASCII one.
57name_re = _make_name_re()
58del _make_name_re
59
60float_re = re.compile(r'(?<!\.)\d+\.\d+')
61newline_re = re.compile(r'(\r\n|\r|\n)')
62
63# internal the tokens and keep references to them
64TOKEN_ADD = intern('add')
65TOKEN_ASSIGN = intern('assign')
66TOKEN_COLON = intern('colon')
67TOKEN_COMMA = intern('comma')
68TOKEN_DIV = intern('div')
69TOKEN_DOT = intern('dot')
70TOKEN_EQ = intern('eq')
71TOKEN_FLOORDIV = intern('floordiv')
72TOKEN_GT = intern('gt')
73TOKEN_GTEQ = intern('gteq')
74TOKEN_LBRACE = intern('lbrace')
75TOKEN_LBRACKET = intern('lbracket')
76TOKEN_LPAREN = intern('lparen')
77TOKEN_LT = intern('lt')
78TOKEN_LTEQ = intern('lteq')
79TOKEN_MOD = intern('mod')
80TOKEN_MUL = intern('mul')
81TOKEN_NE = intern('ne')
82TOKEN_PIPE = intern('pipe')
83TOKEN_POW = intern('pow')
84TOKEN_RBRACE = intern('rbrace')
85TOKEN_RBRACKET = intern('rbracket')
86TOKEN_RPAREN = intern('rparen')
87TOKEN_SEMICOLON = intern('semicolon')
88TOKEN_SUB = intern('sub')
89TOKEN_TILDE = intern('tilde')
90TOKEN_WHITESPACE = intern('whitespace')
91TOKEN_FLOAT = intern('float')
92TOKEN_INTEGER = intern('integer')
93TOKEN_NAME = intern('name')
94TOKEN_STRING = intern('string')
95TOKEN_OPERATOR = intern('operator')
96TOKEN_BLOCK_BEGIN = intern('block_begin')
97TOKEN_BLOCK_END = intern('block_end')
98TOKEN_VARIABLE_BEGIN = intern('variable_begin')
99TOKEN_VARIABLE_END = intern('variable_end')
100TOKEN_RAW_BEGIN = intern('raw_begin')
101TOKEN_RAW_END = intern('raw_end')
102TOKEN_COMMENT_BEGIN = intern('comment_begin')
103TOKEN_COMMENT_END = intern('comment_end')
104TOKEN_COMMENT = intern('comment')
105TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
106TOKEN_LINESTATEMENT_END = intern('linestatement_end')
107TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
108TOKEN_LINECOMMENT_END = intern('linecomment_end')
109TOKEN_LINECOMMENT = intern('linecomment')
110TOKEN_DATA = intern('data')
111TOKEN_INITIAL = intern('initial')
112TOKEN_EOF = intern('eof')
113
114# bind operators to token types
115operators = {
116    '+':            TOKEN_ADD,
117    '-':            TOKEN_SUB,
118    '/':            TOKEN_DIV,
119    '//':           TOKEN_FLOORDIV,
120    '*':            TOKEN_MUL,
121    '%':            TOKEN_MOD,
122    '**':           TOKEN_POW,
123    '~':            TOKEN_TILDE,
124    '[':            TOKEN_LBRACKET,
125    ']':            TOKEN_RBRACKET,
126    '(':            TOKEN_LPAREN,
127    ')':            TOKEN_RPAREN,
128    '{':            TOKEN_LBRACE,
129    '}':            TOKEN_RBRACE,
130    '==':           TOKEN_EQ,
131    '!=':           TOKEN_NE,
132    '>':            TOKEN_GT,
133    '>=':           TOKEN_GTEQ,
134    '<':            TOKEN_LT,
135    '<=':           TOKEN_LTEQ,
136    '=':            TOKEN_ASSIGN,
137    '.':            TOKEN_DOT,
138    ':':            TOKEN_COLON,
139    '|':            TOKEN_PIPE,
140    ',':            TOKEN_COMMA,
141    ';':            TOKEN_SEMICOLON
142}
143
144reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
145assert len(operators) == len(reverse_operators), 'operators dropped'
146operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
147                         sorted(operators, key=lambda x: -len(x))))
148
149ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
150                            TOKEN_COMMENT_END, TOKEN_WHITESPACE,
151                            TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END,
152                            TOKEN_LINECOMMENT])
153ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
154                             TOKEN_COMMENT, TOKEN_LINECOMMENT])
155
156
157def _describe_token_type(token_type):
158    if token_type in reverse_operators:
159        return reverse_operators[token_type]
160    return {
161        TOKEN_COMMENT_BEGIN:        'begin of comment',
162        TOKEN_COMMENT_END:          'end of comment',
163        TOKEN_COMMENT:              'comment',
164        TOKEN_LINECOMMENT:          'comment',
165        TOKEN_BLOCK_BEGIN:          'begin of statement block',
166        TOKEN_BLOCK_END:            'end of statement block',
167        TOKEN_VARIABLE_BEGIN:       'begin of print statement',
168        TOKEN_VARIABLE_END:         'end of print statement',
169        TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
170        TOKEN_LINESTATEMENT_END:    'end of line statement',
171        TOKEN_DATA:                 'template data / text',
172        TOKEN_EOF:                  'end of template'
173    }.get(token_type, token_type)
174
175
176def describe_token(token):
177    """Returns a description of the token."""
178    if token.type == 'name':
179        return token.value
180    return _describe_token_type(token.type)
181
182
183def describe_token_expr(expr):
184    """Like `describe_token` but for token expressions."""
185    if ':' in expr:
186        type, value = expr.split(':', 1)
187        if type == 'name':
188            return value
189    else:
190        type = expr
191    return _describe_token_type(type)
192
193
194def count_newlines(value):
195    """Count the number of newline characters in the string.  This is
196    useful for extensions that filter a stream.
197    """
198    return len(newline_re.findall(value))
199
200
201def compile_rules(environment):
202    """Compiles all the rules from the environment into a list of rules."""
203    e = re.escape
204    rules = [
205        (len(environment.comment_start_string), 'comment',
206         e(environment.comment_start_string)),
207        (len(environment.block_start_string), 'block',
208         e(environment.block_start_string)),
209        (len(environment.variable_start_string), 'variable',
210         e(environment.variable_start_string))
211    ]
212
213    if environment.line_statement_prefix is not None:
214        rules.append((len(environment.line_statement_prefix), 'linestatement',
215                      r'^[ \t\v]*' + e(environment.line_statement_prefix)))
216    if environment.line_comment_prefix is not None:
217        rules.append((len(environment.line_comment_prefix), 'linecomment',
218                      r'(?:^|(?<=\S))[^\S\r\n]*' +
219                      e(environment.line_comment_prefix)))
220
221    return [x[1:] for x in sorted(rules, reverse=True)]
222
223
224class Failure(object):
225    """Class that raises a `TemplateSyntaxError` if called.
226    Used by the `Lexer` to specify known errors.
227    """
228
229    def __init__(self, message, cls=TemplateSyntaxError):
230        self.message = message
231        self.error_class = cls
232
233    def __call__(self, lineno, filename):
234        raise self.error_class(self.message, lineno, filename)
235
236
237class Token(tuple):
238    """Token class."""
239    __slots__ = ()
240    lineno, type, value = (property(itemgetter(x)) for x in range(3))
241
242    def __new__(cls, lineno, type, value):
243        return tuple.__new__(cls, (lineno, intern(str(type)), value))
244
245    def __str__(self):
246        if self.type in reverse_operators:
247            return reverse_operators[self.type]
248        elif self.type == 'name':
249            return self.value
250        return self.type
251
252    def test(self, expr):
253        """Test a token against a token expression.  This can either be a
254        token type or ``'token_type:token_value'``.  This can only test
255        against string values and types.
256        """
257        # here we do a regular string equality check as test_any is usually
258        # passed an iterable of not interned strings.
259        if self.type == expr:
260            return True
261        elif ':' in expr:
262            return expr.split(':', 1) == [self.type, self.value]
263        return False
264
265    def test_any(self, *iterable):
266        """Test against multiple token expressions."""
267        for expr in iterable:
268            if self.test(expr):
269                return True
270        return False
271
272    def __repr__(self):
273        return 'Token(%r, %r, %r)' % (
274            self.lineno,
275            self.type,
276            self.value
277        )
278
279
280@implements_iterator
281class TokenStreamIterator(object):
282    """The iterator for tokenstreams.  Iterate over the stream
283    until the eof token is reached.
284    """
285
286    def __init__(self, stream):
287        self.stream = stream
288
289    def __iter__(self):
290        return self
291
292    def __next__(self):
293        token = self.stream.current
294        if token.type is TOKEN_EOF:
295            self.stream.close()
296            raise StopIteration()
297        next(self.stream)
298        return token
299
300
301@implements_iterator
302class TokenStream(object):
303    """A token stream is an iterable that yields :class:`Token`\\s.  The
304    parser however does not iterate over it but calls :meth:`next` to go
305    one token ahead.  The current active token is stored as :attr:`current`.
306    """
307
308    def __init__(self, generator, name, filename):
309        self._iter = iter(generator)
310        self._pushed = deque()
311        self.name = name
312        self.filename = filename
313        self.closed = False
314        self.current = Token(1, TOKEN_INITIAL, '')
315        next(self)
316
317    def __iter__(self):
318        return TokenStreamIterator(self)
319
320    def __bool__(self):
321        return bool(self._pushed) or self.current.type is not TOKEN_EOF
322    __nonzero__ = __bool__  # py2
323
324    eos = property(lambda x: not x, doc="Are we at the end of the stream?")
325
326    def push(self, token):
327        """Push a token back to the stream."""
328        self._pushed.append(token)
329
330    def look(self):
331        """Look at the next token."""
332        old_token = next(self)
333        result = self.current
334        self.push(result)
335        self.current = old_token
336        return result
337
338    def skip(self, n=1):
339        """Got n tokens ahead."""
340        for x in range(n):
341            next(self)
342
343    def next_if(self, expr):
344        """Perform the token test and return the token if it matched.
345        Otherwise the return value is `None`.
346        """
347        if self.current.test(expr):
348            return next(self)
349
350    def skip_if(self, expr):
351        """Like :meth:`next_if` but only returns `True` or `False`."""
352        return self.next_if(expr) is not None
353
354    def __next__(self):
355        """Go one token ahead and return the old one"""
356        rv = self.current
357        if self._pushed:
358            self.current = self._pushed.popleft()
359        elif self.current.type is not TOKEN_EOF:
360            try:
361                self.current = next(self._iter)
362            except StopIteration:
363                self.close()
364        return rv
365
366    def close(self):
367        """Close the stream."""
368        self.current = Token(self.current.lineno, TOKEN_EOF, '')
369        self._iter = None
370        self.closed = True
371
372    def expect(self, expr):
373        """Expect a given token type and return it.  This accepts the same
374        argument as :meth:`jinja2.lexer.Token.test`.
375        """
376        if not self.current.test(expr):
377            expr = describe_token_expr(expr)
378            if self.current.type is TOKEN_EOF:
379                raise TemplateSyntaxError('unexpected end of template, '
380                                          'expected %r.' % expr,
381                                          self.current.lineno,
382                                          self.name, self.filename)
383            raise TemplateSyntaxError("expected token %r, got %r" %
384                                      (expr, describe_token(self.current)),
385                                      self.current.lineno,
386                                      self.name, self.filename)
387        try:
388            return self.current
389        finally:
390            next(self)
391
392
393def get_lexer(environment):
394    """Return a lexer which is probably cached."""
395    key = (environment.block_start_string,
396           environment.block_end_string,
397           environment.variable_start_string,
398           environment.variable_end_string,
399           environment.comment_start_string,
400           environment.comment_end_string,
401           environment.line_statement_prefix,
402           environment.line_comment_prefix,
403           environment.trim_blocks,
404           environment.lstrip_blocks,
405           environment.newline_sequence,
406           environment.keep_trailing_newline)
407    lexer = _lexer_cache.get(key)
408    if lexer is None:
409        lexer = Lexer(environment)
410        _lexer_cache[key] = lexer
411    return lexer
412
413
414class Lexer(object):
415    """Class that implements a lexer for a given environment. Automatically
416    created by the environment class, usually you don't have to do that.
417
418    Note that the lexer is not automatically bound to an environment.
419    Multiple environments can share the same lexer.
420    """
421
422    def __init__(self, environment):
423        # shortcuts
424        c = lambda x: re.compile(x, re.M | re.S)
425        e = re.escape
426
427        # lexing rules for tags
428        tag_rules = [
429            (whitespace_re, TOKEN_WHITESPACE, None),
430            (float_re, TOKEN_FLOAT, None),
431            (integer_re, TOKEN_INTEGER, None),
432            (name_re, TOKEN_NAME, None),
433            (string_re, TOKEN_STRING, None),
434            (operator_re, TOKEN_OPERATOR, None)
435        ]
436
437        # assemble the root lexing rule. because "|" is ungreedy
438        # we have to sort by length so that the lexer continues working
439        # as expected when we have parsing rules like <% for block and
440        # <%= for variables. (if someone wants asp like syntax)
441        # variables are just part of the rules if variable processing
442        # is required.
443        root_tag_rules = compile_rules(environment)
444
445        # block suffix if trimming is enabled
446        block_suffix_re = environment.trim_blocks and '\\n?' or ''
447
448        # strip leading spaces if lstrip_blocks is enabled
449        prefix_re = {}
450        if environment.lstrip_blocks:
451            # use '{%+' to manually disable lstrip_blocks behavior
452            no_lstrip_re = e('+')
453            # detect overlap between block and variable or comment strings
454            block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
455            # make sure we don't mistake a block for a variable or a comment
456            m = block_diff.match(environment.comment_start_string)
457            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
458            m = block_diff.match(environment.variable_start_string)
459            no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
460
461            # detect overlap between comment and variable strings
462            comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
463            m = comment_diff.match(environment.variable_start_string)
464            no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
465
466            lstrip_re = r'^[ \t]*'
467            block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
468                    lstrip_re,
469                    e(environment.block_start_string),
470                    no_lstrip_re,
471                    e(environment.block_start_string),
472                    )
473            comment_prefix_re = r'%s%s%s|%s\+?' % (
474                    lstrip_re,
475                    e(environment.comment_start_string),
476                    no_variable_re,
477                    e(environment.comment_start_string),
478                    )
479            prefix_re['block'] = block_prefix_re
480            prefix_re['comment'] = comment_prefix_re
481        else:
482            block_prefix_re = '%s' % e(environment.block_start_string)
483
484        self.newline_sequence = environment.newline_sequence
485        self.keep_trailing_newline = environment.keep_trailing_newline
486
487        # global lexing rules
488        self.rules = {
489            'root': [
490                # directives
491                (c('(.*?)(?:%s)' % '|'.join(
492                    [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
493                        e(environment.block_start_string),
494                        block_prefix_re,
495                        e(environment.block_end_string),
496                        e(environment.block_end_string)
497                    )] + [
498                        r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
499                        for n, r in root_tag_rules
500                    ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
501                # data
502                (c('.+'), TOKEN_DATA, None)
503            ],
504            # comments
505            TOKEN_COMMENT_BEGIN: [
506                (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
507                    e(environment.comment_end_string),
508                    e(environment.comment_end_string),
509                    block_suffix_re
510                )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
511                (c('(.)'), (Failure('Missing end of comment tag'),), None)
512            ],
513            # blocks
514            TOKEN_BLOCK_BEGIN: [
515                (c(r'(?:\-%s\s*|%s)%s' % (
516                    e(environment.block_end_string),
517                    e(environment.block_end_string),
518                    block_suffix_re
519                )), TOKEN_BLOCK_END, '#pop'),
520            ] + tag_rules,
521            # variables
522            TOKEN_VARIABLE_BEGIN: [
523                (c(r'\-%s\s*|%s' % (
524                    e(environment.variable_end_string),
525                    e(environment.variable_end_string)
526                )), TOKEN_VARIABLE_END, '#pop')
527            ] + tag_rules,
528            # raw block
529            TOKEN_RAW_BEGIN: [
530                (c(r'(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
531                    e(environment.block_start_string),
532                    block_prefix_re,
533                    e(environment.block_end_string),
534                    e(environment.block_end_string),
535                    block_suffix_re
536                )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
537                (c('(.)'), (Failure('Missing end of raw directive'),), None)
538            ],
539            # line statements
540            TOKEN_LINESTATEMENT_BEGIN: [
541                (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
542            ] + tag_rules,
543            # line comments
544            TOKEN_LINECOMMENT_BEGIN: [
545                (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
546                 TOKEN_LINECOMMENT_END), '#pop')
547            ]
548        }
549
550    def _normalize_newlines(self, value):
551        """Called for strings and template data to normalize it to unicode."""
552        return newline_re.sub(self.newline_sequence, value)
553
554    def tokenize(self, source, name=None, filename=None, state=None):
555        """Calls tokeniter + tokenize and wraps it in a token stream.
556        """
557        stream = self.tokeniter(source, name, filename, state)
558        return TokenStream(self.wrap(stream, name, filename), name, filename)
559
560    def wrap(self, stream, name=None, filename=None):
561        """This is called with the stream as returned by `tokenize` and wraps
562        every token in a :class:`Token` and converts the value.
563        """
564        for lineno, token, value in stream:
565            if token in ignored_tokens:
566                continue
567            elif token == 'linestatement_begin':
568                token = 'block_begin'
569            elif token == 'linestatement_end':
570                token = 'block_end'
571            # we are not interested in those tokens in the parser
572            elif token in ('raw_begin', 'raw_end'):
573                continue
574            elif token == 'data':
575                value = self._normalize_newlines(value)
576            elif token == 'keyword':
577                token = value
578            elif token == 'name':
579                value = str(value)
580            elif token == 'string':
581                # try to unescape string
582                try:
583                    value = self._normalize_newlines(value[1:-1]) \
584                        .encode('ascii', 'backslashreplace') \
585                        .decode('unicode-escape')
586                except Exception as e:
587                    msg = str(e).split(':')[-1].strip()
588                    raise TemplateSyntaxError(msg, lineno, name, filename)
589            elif token == 'integer':
590                value = int(value)
591            elif token == 'float':
592                value = float(value)
593            elif token == 'operator':
594                token = operators[value]
595            yield Token(lineno, token, value)
596
597    def tokeniter(self, source, name, filename=None, state=None):
598        """This method tokenizes the text and returns the tokens in a
599        generator.  Use this method if you just want to tokenize a template.
600        """
601        source = text_type(source)
602        lines = source.splitlines()
603        if self.keep_trailing_newline and source:
604            for newline in ('\r\n', '\r', '\n'):
605                if source.endswith(newline):
606                    lines.append('')
607                    break
608        source = '\n'.join(lines)
609        pos = 0
610        lineno = 1
611        stack = ['root']
612        if state is not None and state != 'root':
613            assert state in ('variable', 'block'), 'invalid state'
614            stack.append(state + '_begin')
615        else:
616            state = 'root'
617        statetokens = self.rules[stack[-1]]
618        source_length = len(source)
619
620        balancing_stack = []
621
622        while 1:
623            # tokenizer loop
624            for regex, tokens, new_state in statetokens:
625                m = regex.match(source, pos)
626                # if no match we try again with the next rule
627                if m is None:
628                    continue
629
630                # we only match blocks and variables if braces / parentheses
631                # are balanced. continue parsing with the lower rule which
632                # is the operator rule. do this only if the end tags look
633                # like operators
634                if balancing_stack and \
635                   tokens in ('variable_end', 'block_end',
636                              'linestatement_end'):
637                    continue
638
639                # tuples support more options
640                if isinstance(tokens, tuple):
641                    for idx, token in enumerate(tokens):
642                        # failure group
643                        if token.__class__ is Failure:
644                            raise token(lineno, filename)
645                        # bygroup is a bit more complex, in that case we
646                        # yield for the current token the first named
647                        # group that matched
648                        elif token == '#bygroup':
649                            for key, value in iteritems(m.groupdict()):
650                                if value is not None:
651                                    yield lineno, key, value
652                                    lineno += value.count('\n')
653                                    break
654                            else:
655                                raise RuntimeError('%r wanted to resolve '
656                                                   'the token dynamically'
657                                                   ' but no group matched'
658                                                   % regex)
659                        # normal group
660                        else:
661                            data = m.group(idx + 1)
662                            if data or token not in ignore_if_empty:
663                                yield lineno, token, data
664                            lineno += data.count('\n')
665
666                # strings as token just are yielded as it.
667                else:
668                    data = m.group()
669                    # update brace/parentheses balance
670                    if tokens == 'operator':
671                        if data == '{':
672                            balancing_stack.append('}')
673                        elif data == '(':
674                            balancing_stack.append(')')
675                        elif data == '[':
676                            balancing_stack.append(']')
677                        elif data in ('}', ')', ']'):
678                            if not balancing_stack:
679                                raise TemplateSyntaxError('unexpected \'%s\'' %
680                                                          data, lineno, name,
681                                                          filename)
682                            expected_op = balancing_stack.pop()
683                            if expected_op != data:
684                                raise TemplateSyntaxError('unexpected \'%s\', '
685                                                          'expected \'%s\'' %
686                                                          (data, expected_op),
687                                                          lineno, name,
688                                                          filename)
689                    # yield items
690                    if data or tokens not in ignore_if_empty:
691                        yield lineno, tokens, data
692                    lineno += data.count('\n')
693
694                # fetch new position into new variable so that we can check
695                # if there is a internal parsing error which would result
696                # in an infinite loop
697                pos2 = m.end()
698
699                # handle state changes
700                if new_state is not None:
701                    # remove the uppermost state
702                    if new_state == '#pop':
703                        stack.pop()
704                    # resolve the new state by group checking
705                    elif new_state == '#bygroup':
706                        for key, value in iteritems(m.groupdict()):
707                            if value is not None:
708                                stack.append(key)
709                                break
710                        else:
711                            raise RuntimeError('%r wanted to resolve the '
712                                               'new state dynamically but'
713                                               ' no group matched' %
714                                               regex)
715                    # direct state name given
716                    else:
717                        stack.append(new_state)
718                    statetokens = self.rules[stack[-1]]
719                # we are still at the same position and no stack change.
720                # this means a loop without break condition, avoid that and
721                # raise error
722                elif pos2 == pos:
723                    raise RuntimeError('%r yielded empty string without '
724                                       'stack change' % regex)
725                # publish new function and start again
726                pos = pos2
727                break
728            # if loop terminated without break we haven't found a single match
729            # either we are at the end of the file or we have a problem
730            else:
731                # end of text
732                if pos >= source_length:
733                    return
734                # something went wrong
735                raise TemplateSyntaxError('unexpected char %r at %d' %
736                                          (source[pos], pos), lineno,
737                                          name, filename)
738