1# -*- coding: utf-8 -*-
2"""
3    jinja2.ext
4    ~~~~~~~~~~
5
6    Jinja extensions allow to add custom tags similar to the way django custom
7    tags work.  By default two example extensions exist: an i18n and a cache
8    extension.
9
10    :copyright: (c) 2010 by the Jinja Team.
11    :license: BSD.
12"""
13from collections import deque
14from jinja2 import nodes
15from jinja2.defaults import *
16from jinja2.environment import get_spontaneous_environment
17from jinja2.runtime import Undefined, concat
18from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
19from jinja2.utils import contextfunction, import_string, Markup, next
20
21
22# the only real useful gettext functions for a Jinja template.  Note
23# that ugettext must be assigned to gettext as Jinja doesn't support
24# non unicode strings.
25GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext')
26
27
28class ExtensionRegistry(type):
29    """Gives the extension an unique identifier."""
30
31    def __new__(cls, name, bases, d):
32        rv = type.__new__(cls, name, bases, d)
33        rv.identifier = rv.__module__ + '.' + rv.__name__
34        return rv
35
36
37class Extension(object):
38    """Extensions can be used to add extra functionality to the Jinja template
39    system at the parser level.  Custom extensions are bound to an environment
40    but may not store environment specific data on `self`.  The reason for
41    this is that an extension can be bound to another environment (for
42    overlays) by creating a copy and reassigning the `environment` attribute.
43
44    As extensions are created by the environment they cannot accept any
45    arguments for configuration.  One may want to work around that by using
46    a factory function, but that is not possible as extensions are identified
47    by their import name.  The correct way to configure the extension is
48    storing the configuration values on the environment.  Because this way the
49    environment ends up acting as central configuration storage the
50    attributes may clash which is why extensions have to ensure that the names
51    they choose for configuration are not too generic.  ``prefix`` for example
52    is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
53    name as includes the name of the extension (fragment cache).
54    """
55    __metaclass__ = ExtensionRegistry
56
57    #: if this extension parses this is the list of tags it's listening to.
58    tags = set()
59
60    #: the priority of that extension.  This is especially useful for
61    #: extensions that preprocess values.  A lower value means higher
62    #: priority.
63    #:
64    #: .. versionadded:: 2.4
65    priority = 100
66
67    def __init__(self, environment):
68        self.environment = environment
69
70    def bind(self, environment):
71        """Create a copy of this extension bound to another environment."""
72        rv = object.__new__(self.__class__)
73        rv.__dict__.update(self.__dict__)
74        rv.environment = environment
75        return rv
76
77    def preprocess(self, source, name, filename=None):
78        """This method is called before the actual lexing and can be used to
79        preprocess the source.  The `filename` is optional.  The return value
80        must be the preprocessed source.
81        """
82        return source
83
84    def filter_stream(self, stream):
85        """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
86        to filter tokens returned.  This method has to return an iterable of
87        :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
88        :class:`~jinja2.lexer.TokenStream`.
89
90        In the `ext` folder of the Jinja2 source distribution there is a file
91        called `inlinegettext.py` which implements a filter that utilizes this
92        method.
93        """
94        return stream
95
96    def parse(self, parser):
97        """If any of the :attr:`tags` matched this method is called with the
98        parser as first argument.  The token the parser stream is pointing at
99        is the name token that matched.  This method has to return one or a
100        list of multiple nodes.
101        """
102        raise NotImplementedError()
103
104    def attr(self, name, lineno=None):
105        """Return an attribute node for the current extension.  This is useful
106        to pass constants on extensions to generated template code::
107
108            self.attr('_my_attribute', lineno=lineno)
109        """
110        return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
111
112    def call_method(self, name, args=None, kwargs=None, dyn_args=None,
113                    dyn_kwargs=None, lineno=None):
114        """Call a method of the extension.  This is a shortcut for
115        :meth:`attr` + :class:`jinja2.nodes.Call`.
116        """
117        if args is None:
118            args = []
119        if kwargs is None:
120            kwargs = []
121        return nodes.Call(self.attr(name, lineno=lineno), args, kwargs,
122                          dyn_args, dyn_kwargs, lineno=lineno)
123
124
125@contextfunction
126def _gettext_alias(context, string):
127    return context.resolve('gettext')(string)
128
129
130class InternationalizationExtension(Extension):
131    """This extension adds gettext support to Jinja2."""
132    tags = set(['trans'])
133
134    # TODO: the i18n extension is currently reevaluating values in a few
135    # situations.  Take this example:
136    #   {% trans count=something() %}{{ count }} foo{% pluralize
137    #     %}{{ count }} fooss{% endtrans %}
138    # something is called twice here.  One time for the gettext value and
139    # the other time for the n-parameter of the ngettext function.
140
141    def __init__(self, environment):
142        Extension.__init__(self, environment)
143        environment.globals['_'] = _gettext_alias
144        environment.extend(
145            install_gettext_translations=self._install,
146            install_null_translations=self._install_null,
147            uninstall_gettext_translations=self._uninstall,
148            extract_translations=self._extract
149        )
150
151    def _install(self, translations):
152        gettext = getattr(translations, 'ugettext', None)
153        if gettext is None:
154            gettext = translations.gettext
155        ngettext = getattr(translations, 'ungettext', None)
156        if ngettext is None:
157            ngettext = translations.ngettext
158        self.environment.globals.update(gettext=gettext, ngettext=ngettext)
159
160    def _install_null(self):
161        self.environment.globals.update(
162            gettext=lambda x: x,
163            ngettext=lambda s, p, n: (n != 1 and (p,) or (s,))[0]
164        )
165
166    def _uninstall(self, translations):
167        for key in 'gettext', 'ngettext':
168            self.environment.globals.pop(key, None)
169
170    def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
171        if isinstance(source, basestring):
172            source = self.environment.parse(source)
173        return extract_from_ast(source, gettext_functions)
174
175    def parse(self, parser):
176        """Parse a translatable tag."""
177        lineno = next(parser.stream).lineno
178
179        # find all the variables referenced.  Additionally a variable can be
180        # defined in the body of the trans block too, but this is checked at
181        # a later state.
182        plural_expr = None
183        variables = {}
184        while parser.stream.current.type != 'block_end':
185            if variables:
186                parser.stream.expect('comma')
187
188            # skip colon for python compatibility
189            if parser.stream.skip_if('colon'):
190                break
191
192            name = parser.stream.expect('name')
193            if name.value in variables:
194                parser.fail('translatable variable %r defined twice.' %
195                            name.value, name.lineno,
196                            exc=TemplateAssertionError)
197
198            # expressions
199            if parser.stream.current.type == 'assign':
200                next(parser.stream)
201                variables[name.value] = var = parser.parse_expression()
202            else:
203                variables[name.value] = var = nodes.Name(name.value, 'load')
204            if plural_expr is None:
205                plural_expr = var
206
207        parser.stream.expect('block_end')
208
209        plural = plural_names = None
210        have_plural = False
211        referenced = set()
212
213        # now parse until endtrans or pluralize
214        singular_names, singular = self._parse_block(parser, True)
215        if singular_names:
216            referenced.update(singular_names)
217            if plural_expr is None:
218                plural_expr = nodes.Name(singular_names[0], 'load')
219
220        # if we have a pluralize block, we parse that too
221        if parser.stream.current.test('name:pluralize'):
222            have_plural = True
223            next(parser.stream)
224            if parser.stream.current.type != 'block_end':
225                name = parser.stream.expect('name')
226                if name.value not in variables:
227                    parser.fail('unknown variable %r for pluralization' %
228                                name.value, name.lineno,
229                                exc=TemplateAssertionError)
230                plural_expr = variables[name.value]
231            parser.stream.expect('block_end')
232            plural_names, plural = self._parse_block(parser, False)
233            next(parser.stream)
234            referenced.update(plural_names)
235        else:
236            next(parser.stream)
237
238        # register free names as simple name expressions
239        for var in referenced:
240            if var not in variables:
241                variables[var] = nodes.Name(var, 'load')
242
243        # no variables referenced?  no need to escape
244        if not referenced:
245            singular = singular.replace('%%', '%')
246            if plural:
247                plural = plural.replace('%%', '%')
248
249        if not have_plural:
250            plural_expr = None
251        elif plural_expr is None:
252            parser.fail('pluralize without variables', lineno)
253
254        if variables:
255            variables = nodes.Dict([nodes.Pair(nodes.Const(x, lineno=lineno), y)
256                                    for x, y in variables.items()])
257        else:
258            variables = None
259
260        node = self._make_node(singular, plural, variables, plural_expr)
261        node.set_lineno(lineno)
262        return node
263
264    def _parse_block(self, parser, allow_pluralize):
265        """Parse until the next block tag with a given name."""
266        referenced = []
267        buf = []
268        while 1:
269            if parser.stream.current.type == 'data':
270                buf.append(parser.stream.current.value.replace('%', '%%'))
271                next(parser.stream)
272            elif parser.stream.current.type == 'variable_begin':
273                next(parser.stream)
274                name = parser.stream.expect('name').value
275                referenced.append(name)
276                buf.append('%%(%s)s' % name)
277                parser.stream.expect('variable_end')
278            elif parser.stream.current.type == 'block_begin':
279                next(parser.stream)
280                if parser.stream.current.test('name:endtrans'):
281                    break
282                elif parser.stream.current.test('name:pluralize'):
283                    if allow_pluralize:
284                        break
285                    parser.fail('a translatable section can have only one '
286                                'pluralize section')
287                parser.fail('control structures in translatable sections are '
288                            'not allowed')
289            elif parser.stream.eos:
290                parser.fail('unclosed translation block')
291            else:
292                assert False, 'internal parser error'
293
294        return referenced, concat(buf)
295
296    def _make_node(self, singular, plural, variables, plural_expr):
297        """Generates a useful node from the data provided."""
298        # singular only:
299        if plural_expr is None:
300            gettext = nodes.Name('gettext', 'load')
301            node = nodes.Call(gettext, [nodes.Const(singular)],
302                              [], None, None)
303
304        # singular and plural
305        else:
306            ngettext = nodes.Name('ngettext', 'load')
307            node = nodes.Call(ngettext, [
308                nodes.Const(singular),
309                nodes.Const(plural),
310                plural_expr
311            ], [], None, None)
312
313        # mark the return value as safe if we are in an
314        # environment with autoescaping turned on
315        if self.environment.autoescape:
316            node = nodes.MarkSafe(node)
317
318        if variables:
319            node = nodes.Mod(node, variables)
320        return nodes.Output([node])
321
322
323class ExprStmtExtension(Extension):
324    """Adds a `do` tag to Jinja2 that works like the print statement just
325    that it doesn't print the return value.
326    """
327    tags = set(['do'])
328
329    def parse(self, parser):
330        node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
331        node.node = parser.parse_tuple()
332        return node
333
334
335class LoopControlExtension(Extension):
336    """Adds break and continue to the template engine."""
337    tags = set(['break', 'continue'])
338
339    def parse(self, parser):
340        token = next(parser.stream)
341        if token.value == 'break':
342            return nodes.Break(lineno=token.lineno)
343        return nodes.Continue(lineno=token.lineno)
344
345
346class WithExtension(Extension):
347    """Adds support for a django-like with block."""
348    tags = set(['with'])
349
350    def parse(self, parser):
351        node = nodes.Scope(lineno=next(parser.stream).lineno)
352        assignments = []
353        while parser.stream.current.type != 'block_end':
354            lineno = parser.stream.current.lineno
355            if assignments:
356                parser.stream.expect('comma')
357            target = parser.parse_assign_target()
358            parser.stream.expect('assign')
359            expr = parser.parse_expression()
360            assignments.append(nodes.Assign(target, expr, lineno=lineno))
361        node.body = assignments + \
362            list(parser.parse_statements(('name:endwith',),
363                                         drop_needle=True))
364        return node
365
366
367class AutoEscapeExtension(Extension):
368    """Changes auto escape rules for a scope."""
369    tags = set(['autoescape'])
370
371    def parse(self, parser):
372        node = nodes.ScopedEvalContextModifier(lineno=next(parser.stream).lineno)
373        node.options = [
374            nodes.Keyword('autoescape', parser.parse_expression())
375        ]
376        node.body = parser.parse_statements(('name:endautoescape',),
377                                            drop_needle=True)
378        return nodes.Scope([node])
379
380
381def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS,
382                     babel_style=True):
383    """Extract localizable strings from the given template node.  Per
384    default this function returns matches in babel style that means non string
385    parameters as well as keyword arguments are returned as `None`.  This
386    allows Babel to figure out what you really meant if you are using
387    gettext functions that allow keyword arguments for placeholder expansion.
388    If you don't want that behavior set the `babel_style` parameter to `False`
389    which causes only strings to be returned and parameters are always stored
390    in tuples.  As a consequence invalid gettext calls (calls without a single
391    string parameter or string parameters after non-string parameters) are
392    skipped.
393
394    This example explains the behavior:
395
396    >>> from jinja2 import Environment
397    >>> env = Environment()
398    >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
399    >>> list(extract_from_ast(node))
400    [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
401    >>> list(extract_from_ast(node, babel_style=False))
402    [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
403
404    For every string found this function yields a ``(lineno, function,
405    message)`` tuple, where:
406
407    * ``lineno`` is the number of the line on which the string was found,
408    * ``function`` is the name of the ``gettext`` function used (if the
409      string was extracted from embedded Python code), and
410    *  ``message`` is the string itself (a ``unicode`` object, or a tuple
411       of ``unicode`` objects for functions with multiple string arguments).
412
413    This extraction function operates on the AST and is because of that unable
414    to extract any comments.  For comment support you have to use the babel
415    extraction interface or extract comments yourself.
416    """
417    for node in node.find_all(nodes.Call):
418        if not isinstance(node.node, nodes.Name) or \
419           node.node.name not in gettext_functions:
420            continue
421
422        strings = []
423        for arg in node.args:
424            if isinstance(arg, nodes.Const) and \
425               isinstance(arg.value, basestring):
426                strings.append(arg.value)
427            else:
428                strings.append(None)
429
430        for arg in node.kwargs:
431            strings.append(None)
432        if node.dyn_args is not None:
433            strings.append(None)
434        if node.dyn_kwargs is not None:
435            strings.append(None)
436
437        if not babel_style:
438            strings = tuple(x for x in strings if x is not None)
439            if not strings:
440                continue
441        else:
442            if len(strings) == 1:
443                strings = strings[0]
444            else:
445                strings = tuple(strings)
446        yield node.lineno, node.node.name, strings
447
448
449class _CommentFinder(object):
450    """Helper class to find comments in a token stream.  Can only
451    find comments for gettext calls forwards.  Once the comment
452    from line 4 is found, a comment for line 1 will not return a
453    usable value.
454    """
455
456    def __init__(self, tokens, comment_tags):
457        self.tokens = tokens
458        self.comment_tags = comment_tags
459        self.offset = 0
460        self.last_lineno = 0
461
462    def find_backwards(self, offset):
463        try:
464            for _, token_type, token_value in \
465                    reversed(self.tokens[self.offset:offset]):
466                if token_type in ('comment', 'linecomment'):
467                    try:
468                        prefix, comment = token_value.split(None, 1)
469                    except ValueError:
470                        continue
471                    if prefix in self.comment_tags:
472                        return [comment.rstrip()]
473            return []
474        finally:
475            self.offset = offset
476
477    def find_comments(self, lineno):
478        if not self.comment_tags or self.last_lineno > lineno:
479            return []
480        for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]):
481            if token_lineno > lineno:
482                return self.find_backwards(self.offset + idx)
483        return self.find_backwards(len(self.tokens))
484
485
486def babel_extract(fileobj, keywords, comment_tags, options):
487    """Babel extraction method for Jinja templates.
488
489    .. versionchanged:: 2.3
490       Basic support for translation comments was added.  If `comment_tags`
491       is now set to a list of keywords for extraction, the extractor will
492       try to find the best preceeding comment that begins with one of the
493       keywords.  For best results, make sure to not have more than one
494       gettext call in one line of code and the matching comment in the
495       same line or the line before.
496
497    :param fileobj: the file-like object the messages should be extracted from
498    :param keywords: a list of keywords (i.e. function names) that should be
499                     recognized as translation functions
500    :param comment_tags: a list of translator tags to search for and include
501                         in the results.
502    :param options: a dictionary of additional options (optional)
503    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
504             (comments will be empty currently)
505    """
506    extensions = set()
507    for extension in options.get('extensions', '').split(','):
508        extension = extension.strip()
509        if not extension:
510            continue
511        extensions.add(import_string(extension))
512    if InternationalizationExtension not in extensions:
513        extensions.add(InternationalizationExtension)
514
515    environment = get_spontaneous_environment(
516        options.get('block_start_string', BLOCK_START_STRING),
517        options.get('block_end_string', BLOCK_END_STRING),
518        options.get('variable_start_string', VARIABLE_START_STRING),
519        options.get('variable_end_string', VARIABLE_END_STRING),
520        options.get('comment_start_string', COMMENT_START_STRING),
521        options.get('comment_end_string', COMMENT_END_STRING),
522        options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX,
523        options.get('line_comment_prefix') or LINE_COMMENT_PREFIX,
524        str(options.get('trim_blocks', TRIM_BLOCKS)).lower() in \
525            ('1', 'on', 'yes', 'true'),
526        NEWLINE_SEQUENCE, frozenset(extensions),
527        # fill with defaults so that environments are shared
528        # with other spontaneus environments.  The rest of the
529        # arguments are optimizer, undefined, finalize, autoescape,
530        # loader, cache size, auto reloading setting and the
531        # bytecode cache
532        True, Undefined, None, False, None, 0, False, None
533    )
534
535    source = fileobj.read().decode(options.get('encoding', 'utf-8'))
536    try:
537        node = environment.parse(source)
538        tokens = list(environment.lex(environment.preprocess(source)))
539    except TemplateSyntaxError, e:
540        # skip templates with syntax errors
541        return
542
543    finder = _CommentFinder(tokens, comment_tags)
544    for lineno, func, message in extract_from_ast(node, keywords):
545        yield lineno, func, message, finder.find_comments(lineno)
546
547
548#: nicer import names
549i18n = InternationalizationExtension
550do = ExprStmtExtension
551loopcontrols = LoopControlExtension
552with_ = WithExtension
553autoescape = AutoEscapeExtension
554