1# -*- coding: utf-8 -*- 2""" 3 jinja2.ext 4 ~~~~~~~~~~ 5 6 Jinja extensions allow to add custom tags similar to the way django custom 7 tags work. By default two example extensions exist: an i18n and a cache 8 extension. 9 10 :copyright: (c) 2010 by the Jinja Team. 11 :license: BSD. 12""" 13from collections import deque 14from jinja2 import nodes 15from jinja2.defaults import * 16from jinja2.environment import get_spontaneous_environment 17from jinja2.runtime import Undefined, concat 18from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError 19from jinja2.utils import contextfunction, import_string, Markup, next 20 21 22# the only real useful gettext functions for a Jinja template. Note 23# that ugettext must be assigned to gettext as Jinja doesn't support 24# non unicode strings. 25GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext') 26 27 28class ExtensionRegistry(type): 29 """Gives the extension an unique identifier.""" 30 31 def __new__(cls, name, bases, d): 32 rv = type.__new__(cls, name, bases, d) 33 rv.identifier = rv.__module__ + '.' + rv.__name__ 34 return rv 35 36 37class Extension(object): 38 """Extensions can be used to add extra functionality to the Jinja template 39 system at the parser level. Custom extensions are bound to an environment 40 but may not store environment specific data on `self`. The reason for 41 this is that an extension can be bound to another environment (for 42 overlays) by creating a copy and reassigning the `environment` attribute. 43 44 As extensions are created by the environment they cannot accept any 45 arguments for configuration. One may want to work around that by using 46 a factory function, but that is not possible as extensions are identified 47 by their import name. The correct way to configure the extension is 48 storing the configuration values on the environment. Because this way the 49 environment ends up acting as central configuration storage the 50 attributes may clash which is why extensions have to ensure that the names 51 they choose for configuration are not too generic. ``prefix`` for example 52 is a terrible name, ``fragment_cache_prefix`` on the other hand is a good 53 name as includes the name of the extension (fragment cache). 54 """ 55 __metaclass__ = ExtensionRegistry 56 57 #: if this extension parses this is the list of tags it's listening to. 58 tags = set() 59 60 #: the priority of that extension. This is especially useful for 61 #: extensions that preprocess values. A lower value means higher 62 #: priority. 63 #: 64 #: .. versionadded:: 2.4 65 priority = 100 66 67 def __init__(self, environment): 68 self.environment = environment 69 70 def bind(self, environment): 71 """Create a copy of this extension bound to another environment.""" 72 rv = object.__new__(self.__class__) 73 rv.__dict__.update(self.__dict__) 74 rv.environment = environment 75 return rv 76 77 def preprocess(self, source, name, filename=None): 78 """This method is called before the actual lexing and can be used to 79 preprocess the source. The `filename` is optional. The return value 80 must be the preprocessed source. 81 """ 82 return source 83 84 def filter_stream(self, stream): 85 """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used 86 to filter tokens returned. This method has to return an iterable of 87 :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a 88 :class:`~jinja2.lexer.TokenStream`. 89 90 In the `ext` folder of the Jinja2 source distribution there is a file 91 called `inlinegettext.py` which implements a filter that utilizes this 92 method. 93 """ 94 return stream 95 96 def parse(self, parser): 97 """If any of the :attr:`tags` matched this method is called with the 98 parser as first argument. The token the parser stream is pointing at 99 is the name token that matched. This method has to return one or a 100 list of multiple nodes. 101 """ 102 raise NotImplementedError() 103 104 def attr(self, name, lineno=None): 105 """Return an attribute node for the current extension. This is useful 106 to pass constants on extensions to generated template code:: 107 108 self.attr('_my_attribute', lineno=lineno) 109 """ 110 return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno) 111 112 def call_method(self, name, args=None, kwargs=None, dyn_args=None, 113 dyn_kwargs=None, lineno=None): 114 """Call a method of the extension. This is a shortcut for 115 :meth:`attr` + :class:`jinja2.nodes.Call`. 116 """ 117 if args is None: 118 args = [] 119 if kwargs is None: 120 kwargs = [] 121 return nodes.Call(self.attr(name, lineno=lineno), args, kwargs, 122 dyn_args, dyn_kwargs, lineno=lineno) 123 124 125@contextfunction 126def _gettext_alias(context, string): 127 return context.resolve('gettext')(string) 128 129 130class InternationalizationExtension(Extension): 131 """This extension adds gettext support to Jinja2.""" 132 tags = set(['trans']) 133 134 # TODO: the i18n extension is currently reevaluating values in a few 135 # situations. Take this example: 136 # {% trans count=something() %}{{ count }} foo{% pluralize 137 # %}{{ count }} fooss{% endtrans %} 138 # something is called twice here. One time for the gettext value and 139 # the other time for the n-parameter of the ngettext function. 140 141 def __init__(self, environment): 142 Extension.__init__(self, environment) 143 environment.globals['_'] = _gettext_alias 144 environment.extend( 145 install_gettext_translations=self._install, 146 install_null_translations=self._install_null, 147 uninstall_gettext_translations=self._uninstall, 148 extract_translations=self._extract 149 ) 150 151 def _install(self, translations): 152 gettext = getattr(translations, 'ugettext', None) 153 if gettext is None: 154 gettext = translations.gettext 155 ngettext = getattr(translations, 'ungettext', None) 156 if ngettext is None: 157 ngettext = translations.ngettext 158 self.environment.globals.update(gettext=gettext, ngettext=ngettext) 159 160 def _install_null(self): 161 self.environment.globals.update( 162 gettext=lambda x: x, 163 ngettext=lambda s, p, n: (n != 1 and (p,) or (s,))[0] 164 ) 165 166 def _uninstall(self, translations): 167 for key in 'gettext', 'ngettext': 168 self.environment.globals.pop(key, None) 169 170 def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS): 171 if isinstance(source, basestring): 172 source = self.environment.parse(source) 173 return extract_from_ast(source, gettext_functions) 174 175 def parse(self, parser): 176 """Parse a translatable tag.""" 177 lineno = next(parser.stream).lineno 178 179 # find all the variables referenced. Additionally a variable can be 180 # defined in the body of the trans block too, but this is checked at 181 # a later state. 182 plural_expr = None 183 variables = {} 184 while parser.stream.current.type != 'block_end': 185 if variables: 186 parser.stream.expect('comma') 187 188 # skip colon for python compatibility 189 if parser.stream.skip_if('colon'): 190 break 191 192 name = parser.stream.expect('name') 193 if name.value in variables: 194 parser.fail('translatable variable %r defined twice.' % 195 name.value, name.lineno, 196 exc=TemplateAssertionError) 197 198 # expressions 199 if parser.stream.current.type == 'assign': 200 next(parser.stream) 201 variables[name.value] = var = parser.parse_expression() 202 else: 203 variables[name.value] = var = nodes.Name(name.value, 'load') 204 if plural_expr is None: 205 plural_expr = var 206 207 parser.stream.expect('block_end') 208 209 plural = plural_names = None 210 have_plural = False 211 referenced = set() 212 213 # now parse until endtrans or pluralize 214 singular_names, singular = self._parse_block(parser, True) 215 if singular_names: 216 referenced.update(singular_names) 217 if plural_expr is None: 218 plural_expr = nodes.Name(singular_names[0], 'load') 219 220 # if we have a pluralize block, we parse that too 221 if parser.stream.current.test('name:pluralize'): 222 have_plural = True 223 next(parser.stream) 224 if parser.stream.current.type != 'block_end': 225 name = parser.stream.expect('name') 226 if name.value not in variables: 227 parser.fail('unknown variable %r for pluralization' % 228 name.value, name.lineno, 229 exc=TemplateAssertionError) 230 plural_expr = variables[name.value] 231 parser.stream.expect('block_end') 232 plural_names, plural = self._parse_block(parser, False) 233 next(parser.stream) 234 referenced.update(plural_names) 235 else: 236 next(parser.stream) 237 238 # register free names as simple name expressions 239 for var in referenced: 240 if var not in variables: 241 variables[var] = nodes.Name(var, 'load') 242 243 # no variables referenced? no need to escape 244 if not referenced: 245 singular = singular.replace('%%', '%') 246 if plural: 247 plural = plural.replace('%%', '%') 248 249 if not have_plural: 250 plural_expr = None 251 elif plural_expr is None: 252 parser.fail('pluralize without variables', lineno) 253 254 if variables: 255 variables = nodes.Dict([nodes.Pair(nodes.Const(x, lineno=lineno), y) 256 for x, y in variables.items()]) 257 else: 258 variables = None 259 260 node = self._make_node(singular, plural, variables, plural_expr) 261 node.set_lineno(lineno) 262 return node 263 264 def _parse_block(self, parser, allow_pluralize): 265 """Parse until the next block tag with a given name.""" 266 referenced = [] 267 buf = [] 268 while 1: 269 if parser.stream.current.type == 'data': 270 buf.append(parser.stream.current.value.replace('%', '%%')) 271 next(parser.stream) 272 elif parser.stream.current.type == 'variable_begin': 273 next(parser.stream) 274 name = parser.stream.expect('name').value 275 referenced.append(name) 276 buf.append('%%(%s)s' % name) 277 parser.stream.expect('variable_end') 278 elif parser.stream.current.type == 'block_begin': 279 next(parser.stream) 280 if parser.stream.current.test('name:endtrans'): 281 break 282 elif parser.stream.current.test('name:pluralize'): 283 if allow_pluralize: 284 break 285 parser.fail('a translatable section can have only one ' 286 'pluralize section') 287 parser.fail('control structures in translatable sections are ' 288 'not allowed') 289 elif parser.stream.eos: 290 parser.fail('unclosed translation block') 291 else: 292 assert False, 'internal parser error' 293 294 return referenced, concat(buf) 295 296 def _make_node(self, singular, plural, variables, plural_expr): 297 """Generates a useful node from the data provided.""" 298 # singular only: 299 if plural_expr is None: 300 gettext = nodes.Name('gettext', 'load') 301 node = nodes.Call(gettext, [nodes.Const(singular)], 302 [], None, None) 303 304 # singular and plural 305 else: 306 ngettext = nodes.Name('ngettext', 'load') 307 node = nodes.Call(ngettext, [ 308 nodes.Const(singular), 309 nodes.Const(plural), 310 plural_expr 311 ], [], None, None) 312 313 # mark the return value as safe if we are in an 314 # environment with autoescaping turned on 315 if self.environment.autoescape: 316 node = nodes.MarkSafe(node) 317 318 if variables: 319 node = nodes.Mod(node, variables) 320 return nodes.Output([node]) 321 322 323class ExprStmtExtension(Extension): 324 """Adds a `do` tag to Jinja2 that works like the print statement just 325 that it doesn't print the return value. 326 """ 327 tags = set(['do']) 328 329 def parse(self, parser): 330 node = nodes.ExprStmt(lineno=next(parser.stream).lineno) 331 node.node = parser.parse_tuple() 332 return node 333 334 335class LoopControlExtension(Extension): 336 """Adds break and continue to the template engine.""" 337 tags = set(['break', 'continue']) 338 339 def parse(self, parser): 340 token = next(parser.stream) 341 if token.value == 'break': 342 return nodes.Break(lineno=token.lineno) 343 return nodes.Continue(lineno=token.lineno) 344 345 346class WithExtension(Extension): 347 """Adds support for a django-like with block.""" 348 tags = set(['with']) 349 350 def parse(self, parser): 351 node = nodes.Scope(lineno=next(parser.stream).lineno) 352 assignments = [] 353 while parser.stream.current.type != 'block_end': 354 lineno = parser.stream.current.lineno 355 if assignments: 356 parser.stream.expect('comma') 357 target = parser.parse_assign_target() 358 parser.stream.expect('assign') 359 expr = parser.parse_expression() 360 assignments.append(nodes.Assign(target, expr, lineno=lineno)) 361 node.body = assignments + \ 362 list(parser.parse_statements(('name:endwith',), 363 drop_needle=True)) 364 return node 365 366 367class AutoEscapeExtension(Extension): 368 """Changes auto escape rules for a scope.""" 369 tags = set(['autoescape']) 370 371 def parse(self, parser): 372 node = nodes.ScopedEvalContextModifier(lineno=next(parser.stream).lineno) 373 node.options = [ 374 nodes.Keyword('autoescape', parser.parse_expression()) 375 ] 376 node.body = parser.parse_statements(('name:endautoescape',), 377 drop_needle=True) 378 return nodes.Scope([node]) 379 380 381def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS, 382 babel_style=True): 383 """Extract localizable strings from the given template node. Per 384 default this function returns matches in babel style that means non string 385 parameters as well as keyword arguments are returned as `None`. This 386 allows Babel to figure out what you really meant if you are using 387 gettext functions that allow keyword arguments for placeholder expansion. 388 If you don't want that behavior set the `babel_style` parameter to `False` 389 which causes only strings to be returned and parameters are always stored 390 in tuples. As a consequence invalid gettext calls (calls without a single 391 string parameter or string parameters after non-string parameters) are 392 skipped. 393 394 This example explains the behavior: 395 396 >>> from jinja2 import Environment 397 >>> env = Environment() 398 >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}') 399 >>> list(extract_from_ast(node)) 400 [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))] 401 >>> list(extract_from_ast(node, babel_style=False)) 402 [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))] 403 404 For every string found this function yields a ``(lineno, function, 405 message)`` tuple, where: 406 407 * ``lineno`` is the number of the line on which the string was found, 408 * ``function`` is the name of the ``gettext`` function used (if the 409 string was extracted from embedded Python code), and 410 * ``message`` is the string itself (a ``unicode`` object, or a tuple 411 of ``unicode`` objects for functions with multiple string arguments). 412 413 This extraction function operates on the AST and is because of that unable 414 to extract any comments. For comment support you have to use the babel 415 extraction interface or extract comments yourself. 416 """ 417 for node in node.find_all(nodes.Call): 418 if not isinstance(node.node, nodes.Name) or \ 419 node.node.name not in gettext_functions: 420 continue 421 422 strings = [] 423 for arg in node.args: 424 if isinstance(arg, nodes.Const) and \ 425 isinstance(arg.value, basestring): 426 strings.append(arg.value) 427 else: 428 strings.append(None) 429 430 for arg in node.kwargs: 431 strings.append(None) 432 if node.dyn_args is not None: 433 strings.append(None) 434 if node.dyn_kwargs is not None: 435 strings.append(None) 436 437 if not babel_style: 438 strings = tuple(x for x in strings if x is not None) 439 if not strings: 440 continue 441 else: 442 if len(strings) == 1: 443 strings = strings[0] 444 else: 445 strings = tuple(strings) 446 yield node.lineno, node.node.name, strings 447 448 449class _CommentFinder(object): 450 """Helper class to find comments in a token stream. Can only 451 find comments for gettext calls forwards. Once the comment 452 from line 4 is found, a comment for line 1 will not return a 453 usable value. 454 """ 455 456 def __init__(self, tokens, comment_tags): 457 self.tokens = tokens 458 self.comment_tags = comment_tags 459 self.offset = 0 460 self.last_lineno = 0 461 462 def find_backwards(self, offset): 463 try: 464 for _, token_type, token_value in \ 465 reversed(self.tokens[self.offset:offset]): 466 if token_type in ('comment', 'linecomment'): 467 try: 468 prefix, comment = token_value.split(None, 1) 469 except ValueError: 470 continue 471 if prefix in self.comment_tags: 472 return [comment.rstrip()] 473 return [] 474 finally: 475 self.offset = offset 476 477 def find_comments(self, lineno): 478 if not self.comment_tags or self.last_lineno > lineno: 479 return [] 480 for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]): 481 if token_lineno > lineno: 482 return self.find_backwards(self.offset + idx) 483 return self.find_backwards(len(self.tokens)) 484 485 486def babel_extract(fileobj, keywords, comment_tags, options): 487 """Babel extraction method for Jinja templates. 488 489 .. versionchanged:: 2.3 490 Basic support for translation comments was added. If `comment_tags` 491 is now set to a list of keywords for extraction, the extractor will 492 try to find the best preceeding comment that begins with one of the 493 keywords. For best results, make sure to not have more than one 494 gettext call in one line of code and the matching comment in the 495 same line or the line before. 496 497 :param fileobj: the file-like object the messages should be extracted from 498 :param keywords: a list of keywords (i.e. function names) that should be 499 recognized as translation functions 500 :param comment_tags: a list of translator tags to search for and include 501 in the results. 502 :param options: a dictionary of additional options (optional) 503 :return: an iterator over ``(lineno, funcname, message, comments)`` tuples. 504 (comments will be empty currently) 505 """ 506 extensions = set() 507 for extension in options.get('extensions', '').split(','): 508 extension = extension.strip() 509 if not extension: 510 continue 511 extensions.add(import_string(extension)) 512 if InternationalizationExtension not in extensions: 513 extensions.add(InternationalizationExtension) 514 515 environment = get_spontaneous_environment( 516 options.get('block_start_string', BLOCK_START_STRING), 517 options.get('block_end_string', BLOCK_END_STRING), 518 options.get('variable_start_string', VARIABLE_START_STRING), 519 options.get('variable_end_string', VARIABLE_END_STRING), 520 options.get('comment_start_string', COMMENT_START_STRING), 521 options.get('comment_end_string', COMMENT_END_STRING), 522 options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX, 523 options.get('line_comment_prefix') or LINE_COMMENT_PREFIX, 524 str(options.get('trim_blocks', TRIM_BLOCKS)).lower() in \ 525 ('1', 'on', 'yes', 'true'), 526 NEWLINE_SEQUENCE, frozenset(extensions), 527 # fill with defaults so that environments are shared 528 # with other spontaneus environments. The rest of the 529 # arguments are optimizer, undefined, finalize, autoescape, 530 # loader, cache size, auto reloading setting and the 531 # bytecode cache 532 True, Undefined, None, False, None, 0, False, None 533 ) 534 535 source = fileobj.read().decode(options.get('encoding', 'utf-8')) 536 try: 537 node = environment.parse(source) 538 tokens = list(environment.lex(environment.preprocess(source))) 539 except TemplateSyntaxError, e: 540 # skip templates with syntax errors 541 return 542 543 finder = _CommentFinder(tokens, comment_tags) 544 for lineno, func, message in extract_from_ast(node, keywords): 545 yield lineno, func, message, finder.find_comments(lineno) 546 547 548#: nicer import names 549i18n = InternationalizationExtension 550do = ExprStmtExtension 551loopcontrols = LoopControlExtension 552with_ = WithExtension 553autoescape = AutoEscapeExtension 554