1# -*- coding: utf-8 -*- 2""" 3 jinja2.lexer 4 ~~~~~~~~~~~~ 5 6 This module implements a Jinja / Python combination lexer. The 7 `Lexer` class provided by this module is used to do some preprocessing 8 for Jinja. 9 10 On the one hand it filters out invalid operators like the bitshift 11 operators we don't allow in templates. On the other hand it separates 12 template code and python code in expressions. 13 14 :copyright: (c) 2017 by the Jinja Team. 15 :license: BSD, see LICENSE for more details. 16""" 17import re 18import sys 19 20from operator import itemgetter 21from collections import deque 22from jinja2.exceptions import TemplateSyntaxError 23from jinja2.utils import LRUCache 24from jinja2._compat import iteritems, implements_iterator, text_type, intern 25 26 27# cache for the lexers. Exists in order to be able to have multiple 28# environments with the same lexer 29_lexer_cache = LRUCache(50) 30 31# static regular expressions 32whitespace_re = re.compile(r'\s+', re.U) 33string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) 35integer_re = re.compile(r'\d+') 36 37def _make_name_re(): 38 try: 39 compile('föö', '<unknown>', 'eval') 40 except SyntaxError: 41 return re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b') 42 43 import jinja2 44 from jinja2 import _stringdefs 45 name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start, 46 _stringdefs.xid_continue)) 47 48 # Save some memory here 49 sys.modules.pop('jinja2._stringdefs') 50 del _stringdefs 51 del jinja2._stringdefs 52 53 return name_re 54 55# we use the unicode identifier rule if this python version is able 56# to handle unicode identifiers, otherwise the standard ASCII one. 57name_re = _make_name_re() 58del _make_name_re 59 60float_re = re.compile(r'(?<!\.)\d+\.\d+') 61newline_re = re.compile(r'(\r\n|\r|\n)') 62 63# internal the tokens and keep references to them 64TOKEN_ADD = intern('add') 65TOKEN_ASSIGN = intern('assign') 66TOKEN_COLON = intern('colon') 67TOKEN_COMMA = intern('comma') 68TOKEN_DIV = intern('div') 69TOKEN_DOT = intern('dot') 70TOKEN_EQ = intern('eq') 71TOKEN_FLOORDIV = intern('floordiv') 72TOKEN_GT = intern('gt') 73TOKEN_GTEQ = intern('gteq') 74TOKEN_LBRACE = intern('lbrace') 75TOKEN_LBRACKET = intern('lbracket') 76TOKEN_LPAREN = intern('lparen') 77TOKEN_LT = intern('lt') 78TOKEN_LTEQ = intern('lteq') 79TOKEN_MOD = intern('mod') 80TOKEN_MUL = intern('mul') 81TOKEN_NE = intern('ne') 82TOKEN_PIPE = intern('pipe') 83TOKEN_POW = intern('pow') 84TOKEN_RBRACE = intern('rbrace') 85TOKEN_RBRACKET = intern('rbracket') 86TOKEN_RPAREN = intern('rparen') 87TOKEN_SEMICOLON = intern('semicolon') 88TOKEN_SUB = intern('sub') 89TOKEN_TILDE = intern('tilde') 90TOKEN_WHITESPACE = intern('whitespace') 91TOKEN_FLOAT = intern('float') 92TOKEN_INTEGER = intern('integer') 93TOKEN_NAME = intern('name') 94TOKEN_STRING = intern('string') 95TOKEN_OPERATOR = intern('operator') 96TOKEN_BLOCK_BEGIN = intern('block_begin') 97TOKEN_BLOCK_END = intern('block_end') 98TOKEN_VARIABLE_BEGIN = intern('variable_begin') 99TOKEN_VARIABLE_END = intern('variable_end') 100TOKEN_RAW_BEGIN = intern('raw_begin') 101TOKEN_RAW_END = intern('raw_end') 102TOKEN_COMMENT_BEGIN = intern('comment_begin') 103TOKEN_COMMENT_END = intern('comment_end') 104TOKEN_COMMENT = intern('comment') 105TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin') 106TOKEN_LINESTATEMENT_END = intern('linestatement_end') 107TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin') 108TOKEN_LINECOMMENT_END = intern('linecomment_end') 109TOKEN_LINECOMMENT = intern('linecomment') 110TOKEN_DATA = intern('data') 111TOKEN_INITIAL = intern('initial') 112TOKEN_EOF = intern('eof') 113 114# bind operators to token types 115operators = { 116 '+': TOKEN_ADD, 117 '-': TOKEN_SUB, 118 '/': TOKEN_DIV, 119 '//': TOKEN_FLOORDIV, 120 '*': TOKEN_MUL, 121 '%': TOKEN_MOD, 122 '**': TOKEN_POW, 123 '~': TOKEN_TILDE, 124 '[': TOKEN_LBRACKET, 125 ']': TOKEN_RBRACKET, 126 '(': TOKEN_LPAREN, 127 ')': TOKEN_RPAREN, 128 '{': TOKEN_LBRACE, 129 '}': TOKEN_RBRACE, 130 '==': TOKEN_EQ, 131 '!=': TOKEN_NE, 132 '>': TOKEN_GT, 133 '>=': TOKEN_GTEQ, 134 '<': TOKEN_LT, 135 '<=': TOKEN_LTEQ, 136 '=': TOKEN_ASSIGN, 137 '.': TOKEN_DOT, 138 ':': TOKEN_COLON, 139 '|': TOKEN_PIPE, 140 ',': TOKEN_COMMA, 141 ';': TOKEN_SEMICOLON 142} 143 144reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) 145assert len(operators) == len(reverse_operators), 'operators dropped' 146operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in 147 sorted(operators, key=lambda x: -len(x)))) 148 149ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, 150 TOKEN_COMMENT_END, TOKEN_WHITESPACE, 151 TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, 152 TOKEN_LINECOMMENT]) 153ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, 154 TOKEN_COMMENT, TOKEN_LINECOMMENT]) 155 156 157def _describe_token_type(token_type): 158 if token_type in reverse_operators: 159 return reverse_operators[token_type] 160 return { 161 TOKEN_COMMENT_BEGIN: 'begin of comment', 162 TOKEN_COMMENT_END: 'end of comment', 163 TOKEN_COMMENT: 'comment', 164 TOKEN_LINECOMMENT: 'comment', 165 TOKEN_BLOCK_BEGIN: 'begin of statement block', 166 TOKEN_BLOCK_END: 'end of statement block', 167 TOKEN_VARIABLE_BEGIN: 'begin of print statement', 168 TOKEN_VARIABLE_END: 'end of print statement', 169 TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement', 170 TOKEN_LINESTATEMENT_END: 'end of line statement', 171 TOKEN_DATA: 'template data / text', 172 TOKEN_EOF: 'end of template' 173 }.get(token_type, token_type) 174 175 176def describe_token(token): 177 """Returns a description of the token.""" 178 if token.type == 'name': 179 return token.value 180 return _describe_token_type(token.type) 181 182 183def describe_token_expr(expr): 184 """Like `describe_token` but for token expressions.""" 185 if ':' in expr: 186 type, value = expr.split(':', 1) 187 if type == 'name': 188 return value 189 else: 190 type = expr 191 return _describe_token_type(type) 192 193 194def count_newlines(value): 195 """Count the number of newline characters in the string. This is 196 useful for extensions that filter a stream. 197 """ 198 return len(newline_re.findall(value)) 199 200 201def compile_rules(environment): 202 """Compiles all the rules from the environment into a list of rules.""" 203 e = re.escape 204 rules = [ 205 (len(environment.comment_start_string), 'comment', 206 e(environment.comment_start_string)), 207 (len(environment.block_start_string), 'block', 208 e(environment.block_start_string)), 209 (len(environment.variable_start_string), 'variable', 210 e(environment.variable_start_string)) 211 ] 212 213 if environment.line_statement_prefix is not None: 214 rules.append((len(environment.line_statement_prefix), 'linestatement', 215 r'^[ \t\v]*' + e(environment.line_statement_prefix))) 216 if environment.line_comment_prefix is not None: 217 rules.append((len(environment.line_comment_prefix), 'linecomment', 218 r'(?:^|(?<=\S))[^\S\r\n]*' + 219 e(environment.line_comment_prefix))) 220 221 return [x[1:] for x in sorted(rules, reverse=True)] 222 223 224class Failure(object): 225 """Class that raises a `TemplateSyntaxError` if called. 226 Used by the `Lexer` to specify known errors. 227 """ 228 229 def __init__(self, message, cls=TemplateSyntaxError): 230 self.message = message 231 self.error_class = cls 232 233 def __call__(self, lineno, filename): 234 raise self.error_class(self.message, lineno, filename) 235 236 237class Token(tuple): 238 """Token class.""" 239 __slots__ = () 240 lineno, type, value = (property(itemgetter(x)) for x in range(3)) 241 242 def __new__(cls, lineno, type, value): 243 return tuple.__new__(cls, (lineno, intern(str(type)), value)) 244 245 def __str__(self): 246 if self.type in reverse_operators: 247 return reverse_operators[self.type] 248 elif self.type == 'name': 249 return self.value 250 return self.type 251 252 def test(self, expr): 253 """Test a token against a token expression. This can either be a 254 token type or ``'token_type:token_value'``. This can only test 255 against string values and types. 256 """ 257 # here we do a regular string equality check as test_any is usually 258 # passed an iterable of not interned strings. 259 if self.type == expr: 260 return True 261 elif ':' in expr: 262 return expr.split(':', 1) == [self.type, self.value] 263 return False 264 265 def test_any(self, *iterable): 266 """Test against multiple token expressions.""" 267 for expr in iterable: 268 if self.test(expr): 269 return True 270 return False 271 272 def __repr__(self): 273 return 'Token(%r, %r, %r)' % ( 274 self.lineno, 275 self.type, 276 self.value 277 ) 278 279 280@implements_iterator 281class TokenStreamIterator(object): 282 """The iterator for tokenstreams. Iterate over the stream 283 until the eof token is reached. 284 """ 285 286 def __init__(self, stream): 287 self.stream = stream 288 289 def __iter__(self): 290 return self 291 292 def __next__(self): 293 token = self.stream.current 294 if token.type is TOKEN_EOF: 295 self.stream.close() 296 raise StopIteration() 297 next(self.stream) 298 return token 299 300 301@implements_iterator 302class TokenStream(object): 303 """A token stream is an iterable that yields :class:`Token`\\s. The 304 parser however does not iterate over it but calls :meth:`next` to go 305 one token ahead. The current active token is stored as :attr:`current`. 306 """ 307 308 def __init__(self, generator, name, filename): 309 self._iter = iter(generator) 310 self._pushed = deque() 311 self.name = name 312 self.filename = filename 313 self.closed = False 314 self.current = Token(1, TOKEN_INITIAL, '') 315 next(self) 316 317 def __iter__(self): 318 return TokenStreamIterator(self) 319 320 def __bool__(self): 321 return bool(self._pushed) or self.current.type is not TOKEN_EOF 322 __nonzero__ = __bool__ # py2 323 324 eos = property(lambda x: not x, doc="Are we at the end of the stream?") 325 326 def push(self, token): 327 """Push a token back to the stream.""" 328 self._pushed.append(token) 329 330 def look(self): 331 """Look at the next token.""" 332 old_token = next(self) 333 result = self.current 334 self.push(result) 335 self.current = old_token 336 return result 337 338 def skip(self, n=1): 339 """Got n tokens ahead.""" 340 for x in range(n): 341 next(self) 342 343 def next_if(self, expr): 344 """Perform the token test and return the token if it matched. 345 Otherwise the return value is `None`. 346 """ 347 if self.current.test(expr): 348 return next(self) 349 350 def skip_if(self, expr): 351 """Like :meth:`next_if` but only returns `True` or `False`.""" 352 return self.next_if(expr) is not None 353 354 def __next__(self): 355 """Go one token ahead and return the old one""" 356 rv = self.current 357 if self._pushed: 358 self.current = self._pushed.popleft() 359 elif self.current.type is not TOKEN_EOF: 360 try: 361 self.current = next(self._iter) 362 except StopIteration: 363 self.close() 364 return rv 365 366 def close(self): 367 """Close the stream.""" 368 self.current = Token(self.current.lineno, TOKEN_EOF, '') 369 self._iter = None 370 self.closed = True 371 372 def expect(self, expr): 373 """Expect a given token type and return it. This accepts the same 374 argument as :meth:`jinja2.lexer.Token.test`. 375 """ 376 if not self.current.test(expr): 377 expr = describe_token_expr(expr) 378 if self.current.type is TOKEN_EOF: 379 raise TemplateSyntaxError('unexpected end of template, ' 380 'expected %r.' % expr, 381 self.current.lineno, 382 self.name, self.filename) 383 raise TemplateSyntaxError("expected token %r, got %r" % 384 (expr, describe_token(self.current)), 385 self.current.lineno, 386 self.name, self.filename) 387 try: 388 return self.current 389 finally: 390 next(self) 391 392 393def get_lexer(environment): 394 """Return a lexer which is probably cached.""" 395 key = (environment.block_start_string, 396 environment.block_end_string, 397 environment.variable_start_string, 398 environment.variable_end_string, 399 environment.comment_start_string, 400 environment.comment_end_string, 401 environment.line_statement_prefix, 402 environment.line_comment_prefix, 403 environment.trim_blocks, 404 environment.lstrip_blocks, 405 environment.newline_sequence, 406 environment.keep_trailing_newline) 407 lexer = _lexer_cache.get(key) 408 if lexer is None: 409 lexer = Lexer(environment) 410 _lexer_cache[key] = lexer 411 return lexer 412 413 414class Lexer(object): 415 """Class that implements a lexer for a given environment. Automatically 416 created by the environment class, usually you don't have to do that. 417 418 Note that the lexer is not automatically bound to an environment. 419 Multiple environments can share the same lexer. 420 """ 421 422 def __init__(self, environment): 423 # shortcuts 424 c = lambda x: re.compile(x, re.M | re.S) 425 e = re.escape 426 427 # lexing rules for tags 428 tag_rules = [ 429 (whitespace_re, TOKEN_WHITESPACE, None), 430 (float_re, TOKEN_FLOAT, None), 431 (integer_re, TOKEN_INTEGER, None), 432 (name_re, TOKEN_NAME, None), 433 (string_re, TOKEN_STRING, None), 434 (operator_re, TOKEN_OPERATOR, None) 435 ] 436 437 # assemble the root lexing rule. because "|" is ungreedy 438 # we have to sort by length so that the lexer continues working 439 # as expected when we have parsing rules like <% for block and 440 # <%= for variables. (if someone wants asp like syntax) 441 # variables are just part of the rules if variable processing 442 # is required. 443 root_tag_rules = compile_rules(environment) 444 445 # block suffix if trimming is enabled 446 block_suffix_re = environment.trim_blocks and '\\n?' or '' 447 448 # strip leading spaces if lstrip_blocks is enabled 449 prefix_re = {} 450 if environment.lstrip_blocks: 451 # use '{%+' to manually disable lstrip_blocks behavior 452 no_lstrip_re = e('+') 453 # detect overlap between block and variable or comment strings 454 block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) 455 # make sure we don't mistake a block for a variable or a comment 456 m = block_diff.match(environment.comment_start_string) 457 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 458 m = block_diff.match(environment.variable_start_string) 459 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 460 461 # detect overlap between comment and variable strings 462 comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) 463 m = comment_diff.match(environment.variable_start_string) 464 no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' 465 466 lstrip_re = r'^[ \t]*' 467 block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( 468 lstrip_re, 469 e(environment.block_start_string), 470 no_lstrip_re, 471 e(environment.block_start_string), 472 ) 473 comment_prefix_re = r'%s%s%s|%s\+?' % ( 474 lstrip_re, 475 e(environment.comment_start_string), 476 no_variable_re, 477 e(environment.comment_start_string), 478 ) 479 prefix_re['block'] = block_prefix_re 480 prefix_re['comment'] = comment_prefix_re 481 else: 482 block_prefix_re = '%s' % e(environment.block_start_string) 483 484 self.newline_sequence = environment.newline_sequence 485 self.keep_trailing_newline = environment.keep_trailing_newline 486 487 # global lexing rules 488 self.rules = { 489 'root': [ 490 # directives 491 (c('(.*?)(?:%s)' % '|'.join( 492 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( 493 e(environment.block_start_string), 494 block_prefix_re, 495 e(environment.block_end_string), 496 e(environment.block_end_string) 497 )] + [ 498 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) 499 for n, r in root_tag_rules 500 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), 501 # data 502 (c('.+'), TOKEN_DATA, None) 503 ], 504 # comments 505 TOKEN_COMMENT_BEGIN: [ 506 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( 507 e(environment.comment_end_string), 508 e(environment.comment_end_string), 509 block_suffix_re 510 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'), 511 (c('(.)'), (Failure('Missing end of comment tag'),), None) 512 ], 513 # blocks 514 TOKEN_BLOCK_BEGIN: [ 515 (c(r'(?:\-%s\s*|%s)%s' % ( 516 e(environment.block_end_string), 517 e(environment.block_end_string), 518 block_suffix_re 519 )), TOKEN_BLOCK_END, '#pop'), 520 ] + tag_rules, 521 # variables 522 TOKEN_VARIABLE_BEGIN: [ 523 (c(r'\-%s\s*|%s' % ( 524 e(environment.variable_end_string), 525 e(environment.variable_end_string) 526 )), TOKEN_VARIABLE_END, '#pop') 527 ] + tag_rules, 528 # raw block 529 TOKEN_RAW_BEGIN: [ 530 (c(r'(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( 531 e(environment.block_start_string), 532 block_prefix_re, 533 e(environment.block_end_string), 534 e(environment.block_end_string), 535 block_suffix_re 536 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), 537 (c('(.)'), (Failure('Missing end of raw directive'),), None) 538 ], 539 # line statements 540 TOKEN_LINESTATEMENT_BEGIN: [ 541 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') 542 ] + tag_rules, 543 # line comments 544 TOKEN_LINECOMMENT_BEGIN: [ 545 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, 546 TOKEN_LINECOMMENT_END), '#pop') 547 ] 548 } 549 550 def _normalize_newlines(self, value): 551 """Called for strings and template data to normalize it to unicode.""" 552 return newline_re.sub(self.newline_sequence, value) 553 554 def tokenize(self, source, name=None, filename=None, state=None): 555 """Calls tokeniter + tokenize and wraps it in a token stream. 556 """ 557 stream = self.tokeniter(source, name, filename, state) 558 return TokenStream(self.wrap(stream, name, filename), name, filename) 559 560 def wrap(self, stream, name=None, filename=None): 561 """This is called with the stream as returned by `tokenize` and wraps 562 every token in a :class:`Token` and converts the value. 563 """ 564 for lineno, token, value in stream: 565 if token in ignored_tokens: 566 continue 567 elif token == 'linestatement_begin': 568 token = 'block_begin' 569 elif token == 'linestatement_end': 570 token = 'block_end' 571 # we are not interested in those tokens in the parser 572 elif token in ('raw_begin', 'raw_end'): 573 continue 574 elif token == 'data': 575 value = self._normalize_newlines(value) 576 elif token == 'keyword': 577 token = value 578 elif token == 'name': 579 value = str(value) 580 elif token == 'string': 581 # try to unescape string 582 try: 583 value = self._normalize_newlines(value[1:-1]) \ 584 .encode('ascii', 'backslashreplace') \ 585 .decode('unicode-escape') 586 except Exception as e: 587 msg = str(e).split(':')[-1].strip() 588 raise TemplateSyntaxError(msg, lineno, name, filename) 589 elif token == 'integer': 590 value = int(value) 591 elif token == 'float': 592 value = float(value) 593 elif token == 'operator': 594 token = operators[value] 595 yield Token(lineno, token, value) 596 597 def tokeniter(self, source, name, filename=None, state=None): 598 """This method tokenizes the text and returns the tokens in a 599 generator. Use this method if you just want to tokenize a template. 600 """ 601 source = text_type(source) 602 lines = source.splitlines() 603 if self.keep_trailing_newline and source: 604 for newline in ('\r\n', '\r', '\n'): 605 if source.endswith(newline): 606 lines.append('') 607 break 608 source = '\n'.join(lines) 609 pos = 0 610 lineno = 1 611 stack = ['root'] 612 if state is not None and state != 'root': 613 assert state in ('variable', 'block'), 'invalid state' 614 stack.append(state + '_begin') 615 else: 616 state = 'root' 617 statetokens = self.rules[stack[-1]] 618 source_length = len(source) 619 620 balancing_stack = [] 621 622 while 1: 623 # tokenizer loop 624 for regex, tokens, new_state in statetokens: 625 m = regex.match(source, pos) 626 # if no match we try again with the next rule 627 if m is None: 628 continue 629 630 # we only match blocks and variables if braces / parentheses 631 # are balanced. continue parsing with the lower rule which 632 # is the operator rule. do this only if the end tags look 633 # like operators 634 if balancing_stack and \ 635 tokens in ('variable_end', 'block_end', 636 'linestatement_end'): 637 continue 638 639 # tuples support more options 640 if isinstance(tokens, tuple): 641 for idx, token in enumerate(tokens): 642 # failure group 643 if token.__class__ is Failure: 644 raise token(lineno, filename) 645 # bygroup is a bit more complex, in that case we 646 # yield for the current token the first named 647 # group that matched 648 elif token == '#bygroup': 649 for key, value in iteritems(m.groupdict()): 650 if value is not None: 651 yield lineno, key, value 652 lineno += value.count('\n') 653 break 654 else: 655 raise RuntimeError('%r wanted to resolve ' 656 'the token dynamically' 657 ' but no group matched' 658 % regex) 659 # normal group 660 else: 661 data = m.group(idx + 1) 662 if data or token not in ignore_if_empty: 663 yield lineno, token, data 664 lineno += data.count('\n') 665 666 # strings as token just are yielded as it. 667 else: 668 data = m.group() 669 # update brace/parentheses balance 670 if tokens == 'operator': 671 if data == '{': 672 balancing_stack.append('}') 673 elif data == '(': 674 balancing_stack.append(')') 675 elif data == '[': 676 balancing_stack.append(']') 677 elif data in ('}', ')', ']'): 678 if not balancing_stack: 679 raise TemplateSyntaxError('unexpected \'%s\'' % 680 data, lineno, name, 681 filename) 682 expected_op = balancing_stack.pop() 683 if expected_op != data: 684 raise TemplateSyntaxError('unexpected \'%s\', ' 685 'expected \'%s\'' % 686 (data, expected_op), 687 lineno, name, 688 filename) 689 # yield items 690 if data or tokens not in ignore_if_empty: 691 yield lineno, tokens, data 692 lineno += data.count('\n') 693 694 # fetch new position into new variable so that we can check 695 # if there is a internal parsing error which would result 696 # in an infinite loop 697 pos2 = m.end() 698 699 # handle state changes 700 if new_state is not None: 701 # remove the uppermost state 702 if new_state == '#pop': 703 stack.pop() 704 # resolve the new state by group checking 705 elif new_state == '#bygroup': 706 for key, value in iteritems(m.groupdict()): 707 if value is not None: 708 stack.append(key) 709 break 710 else: 711 raise RuntimeError('%r wanted to resolve the ' 712 'new state dynamically but' 713 ' no group matched' % 714 regex) 715 # direct state name given 716 else: 717 stack.append(new_state) 718 statetokens = self.rules[stack[-1]] 719 # we are still at the same position and no stack change. 720 # this means a loop without break condition, avoid that and 721 # raise error 722 elif pos2 == pos: 723 raise RuntimeError('%r yielded empty string without ' 724 'stack change' % regex) 725 # publish new function and start again 726 pos = pos2 727 break 728 # if loop terminated without break we haven't found a single match 729 # either we are at the end of the file or we have a problem 730 else: 731 # end of text 732 if pos >= source_length: 733 return 734 # something went wrong 735 raise TemplateSyntaxError('unexpected char %r at %d' % 736 (source[pos], pos), lineno, 737 name, filename) 738