1"""Extension API for adding custom tags and behavior."""
2import pprint
3import re
4import typing as t
5import warnings
6
7from markupsafe import Markup
8
9from . import defaults
10from . import nodes
11from .environment import Environment
12from .exceptions import TemplateAssertionError
13from .exceptions import TemplateSyntaxError
14from .runtime import concat  # type: ignore
15from .runtime import Context
16from .runtime import Undefined
17from .utils import import_string
18from .utils import pass_context
19
20if t.TYPE_CHECKING:
21    import typing_extensions as te
22    from .lexer import Token
23    from .lexer import TokenStream
24    from .parser import Parser
25
26    class _TranslationsBasic(te.Protocol):
27        def gettext(self, message: str) -> str:
28            ...
29
30        def ngettext(self, singular: str, plural: str, n: int) -> str:
31            pass
32
33    class _TranslationsContext(_TranslationsBasic):
34        def pgettext(self, context: str, message: str) -> str:
35            ...
36
37        def npgettext(self, context: str, singular: str, plural: str, n: int) -> str:
38            ...
39
40    _SupportedTranslations = t.Union[_TranslationsBasic, _TranslationsContext]
41
42
43# I18N functions available in Jinja templates. If the I18N library
44# provides ugettext, it will be assigned to gettext.
45GETTEXT_FUNCTIONS: t.Tuple[str, ...] = (
46    "_",
47    "gettext",
48    "ngettext",
49    "pgettext",
50    "npgettext",
51)
52_ws_re = re.compile(r"\s*\n\s*")
53
54
55class Extension:
56    """Extensions can be used to add extra functionality to the Jinja template
57    system at the parser level.  Custom extensions are bound to an environment
58    but may not store environment specific data on `self`.  The reason for
59    this is that an extension can be bound to another environment (for
60    overlays) by creating a copy and reassigning the `environment` attribute.
61
62    As extensions are created by the environment they cannot accept any
63    arguments for configuration.  One may want to work around that by using
64    a factory function, but that is not possible as extensions are identified
65    by their import name.  The correct way to configure the extension is
66    storing the configuration values on the environment.  Because this way the
67    environment ends up acting as central configuration storage the
68    attributes may clash which is why extensions have to ensure that the names
69    they choose for configuration are not too generic.  ``prefix`` for example
70    is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
71    name as includes the name of the extension (fragment cache).
72    """
73
74    identifier: t.ClassVar[str]
75
76    def __init_subclass__(cls) -> None:
77        cls.identifier = f"{cls.__module__}.{cls.__name__}"
78
79    #: if this extension parses this is the list of tags it's listening to.
80    tags: t.Set[str] = set()
81
82    #: the priority of that extension.  This is especially useful for
83    #: extensions that preprocess values.  A lower value means higher
84    #: priority.
85    #:
86    #: .. versionadded:: 2.4
87    priority = 100
88
89    def __init__(self, environment: Environment) -> None:
90        self.environment = environment
91
92    def bind(self, environment: Environment) -> "Extension":
93        """Create a copy of this extension bound to another environment."""
94        rv = t.cast(Extension, object.__new__(self.__class__))
95        rv.__dict__.update(self.__dict__)
96        rv.environment = environment
97        return rv
98
99    def preprocess(
100        self, source: str, name: t.Optional[str], filename: t.Optional[str] = None
101    ) -> str:
102        """This method is called before the actual lexing and can be used to
103        preprocess the source.  The `filename` is optional.  The return value
104        must be the preprocessed source.
105        """
106        return source
107
108    def filter_stream(
109        self, stream: "TokenStream"
110    ) -> t.Union["TokenStream", t.Iterable["Token"]]:
111        """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
112        to filter tokens returned.  This method has to return an iterable of
113        :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a
114        :class:`~jinja2.lexer.TokenStream`.
115        """
116        return stream
117
118    def parse(self, parser: "Parser") -> t.Union[nodes.Node, t.List[nodes.Node]]:
119        """If any of the :attr:`tags` matched this method is called with the
120        parser as first argument.  The token the parser stream is pointing at
121        is the name token that matched.  This method has to return one or a
122        list of multiple nodes.
123        """
124        raise NotImplementedError()
125
126    def attr(
127        self, name: str, lineno: t.Optional[int] = None
128    ) -> nodes.ExtensionAttribute:
129        """Return an attribute node for the current extension.  This is useful
130        to pass constants on extensions to generated template code.
131
132        ::
133
134            self.attr('_my_attribute', lineno=lineno)
135        """
136        return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
137
138    def call_method(
139        self,
140        name: str,
141        args: t.Optional[t.List[nodes.Expr]] = None,
142        kwargs: t.Optional[t.List[nodes.Keyword]] = None,
143        dyn_args: t.Optional[nodes.Expr] = None,
144        dyn_kwargs: t.Optional[nodes.Expr] = None,
145        lineno: t.Optional[int] = None,
146    ) -> nodes.Call:
147        """Call a method of the extension.  This is a shortcut for
148        :meth:`attr` + :class:`jinja2.nodes.Call`.
149        """
150        if args is None:
151            args = []
152        if kwargs is None:
153            kwargs = []
154        return nodes.Call(
155            self.attr(name, lineno=lineno),
156            args,
157            kwargs,
158            dyn_args,
159            dyn_kwargs,
160            lineno=lineno,
161        )
162
163
164@pass_context
165def _gettext_alias(
166    __context: Context, *args: t.Any, **kwargs: t.Any
167) -> t.Union[t.Any, Undefined]:
168    return __context.call(__context.resolve("gettext"), *args, **kwargs)
169
170
171def _make_new_gettext(func: t.Callable[[str], str]) -> t.Callable[..., str]:
172    @pass_context
173    def gettext(__context: Context, __string: str, **variables: t.Any) -> str:
174        rv = __context.call(func, __string)
175        if __context.eval_ctx.autoescape:
176            rv = Markup(rv)
177        # Always treat as a format string, even if there are no
178        # variables. This makes translation strings more consistent
179        # and predictable. This requires escaping
180        return rv % variables  # type: ignore
181
182    return gettext
183
184
185def _make_new_ngettext(func: t.Callable[[str, str, int], str]) -> t.Callable[..., str]:
186    @pass_context
187    def ngettext(
188        __context: Context,
189        __singular: str,
190        __plural: str,
191        __num: int,
192        **variables: t.Any,
193    ) -> str:
194        variables.setdefault("num", __num)
195        rv = __context.call(func, __singular, __plural, __num)
196        if __context.eval_ctx.autoescape:
197            rv = Markup(rv)
198        # Always treat as a format string, see gettext comment above.
199        return rv % variables  # type: ignore
200
201    return ngettext
202
203
204def _make_new_pgettext(func: t.Callable[[str, str], str]) -> t.Callable[..., str]:
205    @pass_context
206    def pgettext(
207        __context: Context, __string_ctx: str, __string: str, **variables: t.Any
208    ) -> str:
209        variables.setdefault("context", __string_ctx)
210        rv = __context.call(func, __string_ctx, __string)
211
212        if __context.eval_ctx.autoescape:
213            rv = Markup(rv)
214
215        # Always treat as a format string, see gettext comment above.
216        return rv % variables  # type: ignore
217
218    return pgettext
219
220
221def _make_new_npgettext(
222    func: t.Callable[[str, str, str, int], str]
223) -> t.Callable[..., str]:
224    @pass_context
225    def npgettext(
226        __context: Context,
227        __string_ctx: str,
228        __singular: str,
229        __plural: str,
230        __num: int,
231        **variables: t.Any,
232    ) -> str:
233        variables.setdefault("context", __string_ctx)
234        variables.setdefault("num", __num)
235        rv = __context.call(func, __string_ctx, __singular, __plural, __num)
236
237        if __context.eval_ctx.autoescape:
238            rv = Markup(rv)
239
240        # Always treat as a format string, see gettext comment above.
241        return rv % variables  # type: ignore
242
243    return npgettext
244
245
246class InternationalizationExtension(Extension):
247    """This extension adds gettext support to Jinja."""
248
249    tags = {"trans"}
250
251    # TODO: the i18n extension is currently reevaluating values in a few
252    # situations.  Take this example:
253    #   {% trans count=something() %}{{ count }} foo{% pluralize
254    #     %}{{ count }} fooss{% endtrans %}
255    # something is called twice here.  One time for the gettext value and
256    # the other time for the n-parameter of the ngettext function.
257
258    def __init__(self, environment: Environment) -> None:
259        super().__init__(environment)
260        environment.globals["_"] = _gettext_alias
261        environment.extend(
262            install_gettext_translations=self._install,
263            install_null_translations=self._install_null,
264            install_gettext_callables=self._install_callables,
265            uninstall_gettext_translations=self._uninstall,
266            extract_translations=self._extract,
267            newstyle_gettext=False,
268        )
269
270    def _install(
271        self, translations: "_SupportedTranslations", newstyle: t.Optional[bool] = None
272    ) -> None:
273        # ugettext and ungettext are preferred in case the I18N library
274        # is providing compatibility with older Python versions.
275        gettext = getattr(translations, "ugettext", None)
276        if gettext is None:
277            gettext = translations.gettext
278        ngettext = getattr(translations, "ungettext", None)
279        if ngettext is None:
280            ngettext = translations.ngettext
281
282        pgettext = getattr(translations, "pgettext", None)
283        npgettext = getattr(translations, "npgettext", None)
284        self._install_callables(
285            gettext, ngettext, newstyle=newstyle, pgettext=pgettext, npgettext=npgettext
286        )
287
288    def _install_null(self, newstyle: t.Optional[bool] = None) -> None:
289        import gettext
290
291        translations = gettext.NullTranslations()
292
293        if hasattr(translations, "pgettext"):
294            # Python < 3.8
295            pgettext = translations.pgettext  # type: ignore
296        else:
297
298            def pgettext(c: str, s: str) -> str:
299                return s
300
301        if hasattr(translations, "npgettext"):
302            npgettext = translations.npgettext  # type: ignore
303        else:
304
305            def npgettext(c: str, s: str, p: str, n: int) -> str:
306                return s if n == 1 else p
307
308        self._install_callables(
309            gettext=translations.gettext,
310            ngettext=translations.ngettext,
311            newstyle=newstyle,
312            pgettext=pgettext,
313            npgettext=npgettext,
314        )
315
316    def _install_callables(
317        self,
318        gettext: t.Callable[[str], str],
319        ngettext: t.Callable[[str, str, int], str],
320        newstyle: t.Optional[bool] = None,
321        pgettext: t.Optional[t.Callable[[str, str], str]] = None,
322        npgettext: t.Optional[t.Callable[[str, str, str, int], str]] = None,
323    ) -> None:
324        if newstyle is not None:
325            self.environment.newstyle_gettext = newstyle  # type: ignore
326        if self.environment.newstyle_gettext:  # type: ignore
327            gettext = _make_new_gettext(gettext)
328            ngettext = _make_new_ngettext(ngettext)
329
330            if pgettext is not None:
331                pgettext = _make_new_pgettext(pgettext)
332
333            if npgettext is not None:
334                npgettext = _make_new_npgettext(npgettext)
335
336        self.environment.globals.update(
337            gettext=gettext, ngettext=ngettext, pgettext=pgettext, npgettext=npgettext
338        )
339
340    def _uninstall(self, translations: "_SupportedTranslations") -> None:
341        for key in ("gettext", "ngettext", "pgettext", "npgettext"):
342            self.environment.globals.pop(key, None)
343
344    def _extract(
345        self,
346        source: t.Union[str, nodes.Template],
347        gettext_functions: t.Sequence[str] = GETTEXT_FUNCTIONS,
348    ) -> t.Iterator[
349        t.Tuple[int, str, t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]]]
350    ]:
351        if isinstance(source, str):
352            source = self.environment.parse(source)
353        return extract_from_ast(source, gettext_functions)
354
355    def parse(self, parser: "Parser") -> t.Union[nodes.Node, t.List[nodes.Node]]:
356        """Parse a translatable tag."""
357        lineno = next(parser.stream).lineno
358        num_called_num = False
359
360        # find all the variables referenced.  Additionally a variable can be
361        # defined in the body of the trans block too, but this is checked at
362        # a later state.
363        plural_expr: t.Optional[nodes.Expr] = None
364        plural_expr_assignment: t.Optional[nodes.Assign] = None
365        variables: t.Dict[str, nodes.Expr] = {}
366        trimmed = None
367        while parser.stream.current.type != "block_end":
368            if variables:
369                parser.stream.expect("comma")
370
371            # skip colon for python compatibility
372            if parser.stream.skip_if("colon"):
373                break
374
375            token = parser.stream.expect("name")
376            if token.value in variables:
377                parser.fail(
378                    f"translatable variable {token.value!r} defined twice.",
379                    token.lineno,
380                    exc=TemplateAssertionError,
381                )
382
383            # expressions
384            if parser.stream.current.type == "assign":
385                next(parser.stream)
386                variables[token.value] = var = parser.parse_expression()
387            elif trimmed is None and token.value in ("trimmed", "notrimmed"):
388                trimmed = token.value == "trimmed"
389                continue
390            else:
391                variables[token.value] = var = nodes.Name(token.value, "load")
392
393            if plural_expr is None:
394                if isinstance(var, nodes.Call):
395                    plural_expr = nodes.Name("_trans", "load")
396                    variables[token.value] = plural_expr
397                    plural_expr_assignment = nodes.Assign(
398                        nodes.Name("_trans", "store"), var
399                    )
400                else:
401                    plural_expr = var
402                num_called_num = token.value == "num"
403
404        parser.stream.expect("block_end")
405
406        plural = None
407        have_plural = False
408        referenced = set()
409
410        # now parse until endtrans or pluralize
411        singular_names, singular = self._parse_block(parser, True)
412        if singular_names:
413            referenced.update(singular_names)
414            if plural_expr is None:
415                plural_expr = nodes.Name(singular_names[0], "load")
416                num_called_num = singular_names[0] == "num"
417
418        # if we have a pluralize block, we parse that too
419        if parser.stream.current.test("name:pluralize"):
420            have_plural = True
421            next(parser.stream)
422            if parser.stream.current.type != "block_end":
423                token = parser.stream.expect("name")
424                if token.value not in variables:
425                    parser.fail(
426                        f"unknown variable {token.value!r} for pluralization",
427                        token.lineno,
428                        exc=TemplateAssertionError,
429                    )
430                plural_expr = variables[token.value]
431                num_called_num = token.value == "num"
432            parser.stream.expect("block_end")
433            plural_names, plural = self._parse_block(parser, False)
434            next(parser.stream)
435            referenced.update(plural_names)
436        else:
437            next(parser.stream)
438
439        # register free names as simple name expressions
440        for name in referenced:
441            if name not in variables:
442                variables[name] = nodes.Name(name, "load")
443
444        if not have_plural:
445            plural_expr = None
446        elif plural_expr is None:
447            parser.fail("pluralize without variables", lineno)
448
449        if trimmed is None:
450            trimmed = self.environment.policies["ext.i18n.trimmed"]
451        if trimmed:
452            singular = self._trim_whitespace(singular)
453            if plural:
454                plural = self._trim_whitespace(plural)
455
456        node = self._make_node(
457            singular,
458            plural,
459            variables,
460            plural_expr,
461            bool(referenced),
462            num_called_num and have_plural,
463        )
464        node.set_lineno(lineno)
465        if plural_expr_assignment is not None:
466            return [plural_expr_assignment, node]
467        else:
468            return node
469
470    def _trim_whitespace(self, string: str, _ws_re: t.Pattern[str] = _ws_re) -> str:
471        return _ws_re.sub(" ", string.strip())
472
473    def _parse_block(
474        self, parser: "Parser", allow_pluralize: bool
475    ) -> t.Tuple[t.List[str], str]:
476        """Parse until the next block tag with a given name."""
477        referenced = []
478        buf = []
479
480        while True:
481            if parser.stream.current.type == "data":
482                buf.append(parser.stream.current.value.replace("%", "%%"))
483                next(parser.stream)
484            elif parser.stream.current.type == "variable_begin":
485                next(parser.stream)
486                name = parser.stream.expect("name").value
487                referenced.append(name)
488                buf.append(f"%({name})s")
489                parser.stream.expect("variable_end")
490            elif parser.stream.current.type == "block_begin":
491                next(parser.stream)
492                if parser.stream.current.test("name:endtrans"):
493                    break
494                elif parser.stream.current.test("name:pluralize"):
495                    if allow_pluralize:
496                        break
497                    parser.fail(
498                        "a translatable section can have only one pluralize section"
499                    )
500                parser.fail(
501                    "control structures in translatable sections are not allowed"
502                )
503            elif parser.stream.eos:
504                parser.fail("unclosed translation block")
505            else:
506                raise RuntimeError("internal parser error")
507
508        return referenced, concat(buf)
509
510    def _make_node(
511        self,
512        singular: str,
513        plural: t.Optional[str],
514        variables: t.Dict[str, nodes.Expr],
515        plural_expr: t.Optional[nodes.Expr],
516        vars_referenced: bool,
517        num_called_num: bool,
518    ) -> nodes.Output:
519        """Generates a useful node from the data provided."""
520        newstyle = self.environment.newstyle_gettext  # type: ignore
521        node: nodes.Expr
522
523        # no variables referenced?  no need to escape for old style
524        # gettext invocations only if there are vars.
525        if not vars_referenced and not newstyle:
526            singular = singular.replace("%%", "%")
527            if plural:
528                plural = plural.replace("%%", "%")
529
530        # singular only:
531        if plural_expr is None:
532            gettext = nodes.Name("gettext", "load")
533            node = nodes.Call(gettext, [nodes.Const(singular)], [], None, None)
534
535        # singular and plural
536        else:
537            ngettext = nodes.Name("ngettext", "load")
538            node = nodes.Call(
539                ngettext,
540                [nodes.Const(singular), nodes.Const(plural), plural_expr],
541                [],
542                None,
543                None,
544            )
545
546        # in case newstyle gettext is used, the method is powerful
547        # enough to handle the variable expansion and autoescape
548        # handling itself
549        if newstyle:
550            for key, value in variables.items():
551                # the function adds that later anyways in case num was
552                # called num, so just skip it.
553                if num_called_num and key == "num":
554                    continue
555                node.kwargs.append(nodes.Keyword(key, value))
556
557        # otherwise do that here
558        else:
559            # mark the return value as safe if we are in an
560            # environment with autoescaping turned on
561            node = nodes.MarkSafeIfAutoescape(node)
562            if variables:
563                node = nodes.Mod(
564                    node,
565                    nodes.Dict(
566                        [
567                            nodes.Pair(nodes.Const(key), value)
568                            for key, value in variables.items()
569                        ]
570                    ),
571                )
572        return nodes.Output([node])
573
574
575class ExprStmtExtension(Extension):
576    """Adds a `do` tag to Jinja that works like the print statement just
577    that it doesn't print the return value.
578    """
579
580    tags = {"do"}
581
582    def parse(self, parser: "Parser") -> nodes.ExprStmt:
583        node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
584        node.node = parser.parse_tuple()
585        return node
586
587
588class LoopControlExtension(Extension):
589    """Adds break and continue to the template engine."""
590
591    tags = {"break", "continue"}
592
593    def parse(self, parser: "Parser") -> t.Union[nodes.Break, nodes.Continue]:
594        token = next(parser.stream)
595        if token.value == "break":
596            return nodes.Break(lineno=token.lineno)
597        return nodes.Continue(lineno=token.lineno)
598
599
600class WithExtension(Extension):
601    def __init__(self, environment: Environment) -> None:
602        super().__init__(environment)
603        warnings.warn(
604            "The 'with' extension is deprecated and will be removed in"
605            " Jinja 3.1. This is built in now.",
606            DeprecationWarning,
607            stacklevel=3,
608        )
609
610
611class AutoEscapeExtension(Extension):
612    def __init__(self, environment: Environment) -> None:
613        super().__init__(environment)
614        warnings.warn(
615            "The 'autoescape' extension is deprecated and will be"
616            " removed in Jinja 3.1. This is built in now.",
617            DeprecationWarning,
618            stacklevel=3,
619        )
620
621
622class DebugExtension(Extension):
623    """A ``{% debug %}`` tag that dumps the available variables,
624    filters, and tests.
625
626    .. code-block:: html+jinja
627
628        <pre>{% debug %}</pre>
629
630    .. code-block:: text
631
632        {'context': {'cycler': <class 'jinja2.utils.Cycler'>,
633                     ...,
634                     'namespace': <class 'jinja2.utils.Namespace'>},
635         'filters': ['abs', 'attr', 'batch', 'capitalize', 'center', 'count', 'd',
636                     ..., 'urlencode', 'urlize', 'wordcount', 'wordwrap', 'xmlattr'],
637         'tests': ['!=', '<', '<=', '==', '>', '>=', 'callable', 'defined',
638                   ..., 'odd', 'sameas', 'sequence', 'string', 'undefined', 'upper']}
639
640    .. versionadded:: 2.11.0
641    """
642
643    tags = {"debug"}
644
645    def parse(self, parser: "Parser") -> nodes.Output:
646        lineno = parser.stream.expect("name:debug").lineno
647        context = nodes.ContextReference()
648        result = self.call_method("_render", [context], lineno=lineno)
649        return nodes.Output([result], lineno=lineno)
650
651    def _render(self, context: Context) -> str:
652        result = {
653            "context": context.get_all(),
654            "filters": sorted(self.environment.filters.keys()),
655            "tests": sorted(self.environment.tests.keys()),
656        }
657
658        # Set the depth since the intent is to show the top few names.
659        return pprint.pformat(result, depth=3, compact=True)
660
661
662def extract_from_ast(
663    ast: nodes.Template,
664    gettext_functions: t.Sequence[str] = GETTEXT_FUNCTIONS,
665    babel_style: bool = True,
666) -> t.Iterator[
667    t.Tuple[int, str, t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]]]
668]:
669    """Extract localizable strings from the given template node.  Per
670    default this function returns matches in babel style that means non string
671    parameters as well as keyword arguments are returned as `None`.  This
672    allows Babel to figure out what you really meant if you are using
673    gettext functions that allow keyword arguments for placeholder expansion.
674    If you don't want that behavior set the `babel_style` parameter to `False`
675    which causes only strings to be returned and parameters are always stored
676    in tuples.  As a consequence invalid gettext calls (calls without a single
677    string parameter or string parameters after non-string parameters) are
678    skipped.
679
680    This example explains the behavior:
681
682    >>> from jinja2 import Environment
683    >>> env = Environment()
684    >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
685    >>> list(extract_from_ast(node))
686    [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
687    >>> list(extract_from_ast(node, babel_style=False))
688    [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
689
690    For every string found this function yields a ``(lineno, function,
691    message)`` tuple, where:
692
693    * ``lineno`` is the number of the line on which the string was found,
694    * ``function`` is the name of the ``gettext`` function used (if the
695      string was extracted from embedded Python code), and
696    *   ``message`` is the string, or a tuple of strings for functions
697         with multiple string arguments.
698
699    This extraction function operates on the AST and is because of that unable
700    to extract any comments.  For comment support you have to use the babel
701    extraction interface or extract comments yourself.
702    """
703    out: t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]]
704
705    for node in ast.find_all(nodes.Call):
706        if (
707            not isinstance(node.node, nodes.Name)
708            or node.node.name not in gettext_functions
709        ):
710            continue
711
712        strings: t.List[t.Optional[str]] = []
713
714        for arg in node.args:
715            if isinstance(arg, nodes.Const) and isinstance(arg.value, str):
716                strings.append(arg.value)
717            else:
718                strings.append(None)
719
720        for _ in node.kwargs:
721            strings.append(None)
722        if node.dyn_args is not None:
723            strings.append(None)
724        if node.dyn_kwargs is not None:
725            strings.append(None)
726
727        if not babel_style:
728            out = tuple(x for x in strings if x is not None)
729
730            if not out:
731                continue
732        else:
733            if len(strings) == 1:
734                out = strings[0]
735            else:
736                out = tuple(strings)
737
738        yield node.lineno, node.node.name, out
739
740
741class _CommentFinder:
742    """Helper class to find comments in a token stream.  Can only
743    find comments for gettext calls forwards.  Once the comment
744    from line 4 is found, a comment for line 1 will not return a
745    usable value.
746    """
747
748    def __init__(
749        self, tokens: t.Sequence[t.Tuple[int, str, str]], comment_tags: t.Sequence[str]
750    ) -> None:
751        self.tokens = tokens
752        self.comment_tags = comment_tags
753        self.offset = 0
754        self.last_lineno = 0
755
756    def find_backwards(self, offset: int) -> t.List[str]:
757        try:
758            for _, token_type, token_value in reversed(
759                self.tokens[self.offset : offset]
760            ):
761                if token_type in ("comment", "linecomment"):
762                    try:
763                        prefix, comment = token_value.split(None, 1)
764                    except ValueError:
765                        continue
766                    if prefix in self.comment_tags:
767                        return [comment.rstrip()]
768            return []
769        finally:
770            self.offset = offset
771
772    def find_comments(self, lineno: int) -> t.List[str]:
773        if not self.comment_tags or self.last_lineno > lineno:
774            return []
775        for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset :]):
776            if token_lineno > lineno:
777                return self.find_backwards(self.offset + idx)
778        return self.find_backwards(len(self.tokens))
779
780
781def babel_extract(
782    fileobj: t.BinaryIO,
783    keywords: t.Sequence[str],
784    comment_tags: t.Sequence[str],
785    options: t.Dict[str, t.Any],
786) -> t.Iterator[
787    t.Tuple[
788        int, str, t.Union[t.Optional[str], t.Tuple[t.Optional[str], ...]], t.List[str]
789    ]
790]:
791    """Babel extraction method for Jinja templates.
792
793    .. versionchanged:: 2.3
794       Basic support for translation comments was added.  If `comment_tags`
795       is now set to a list of keywords for extraction, the extractor will
796       try to find the best preceding comment that begins with one of the
797       keywords.  For best results, make sure to not have more than one
798       gettext call in one line of code and the matching comment in the
799       same line or the line before.
800
801    .. versionchanged:: 2.5.1
802       The `newstyle_gettext` flag can be set to `True` to enable newstyle
803       gettext calls.
804
805    .. versionchanged:: 2.7
806       A `silent` option can now be provided.  If set to `False` template
807       syntax errors are propagated instead of being ignored.
808
809    :param fileobj: the file-like object the messages should be extracted from
810    :param keywords: a list of keywords (i.e. function names) that should be
811                     recognized as translation functions
812    :param comment_tags: a list of translator tags to search for and include
813                         in the results.
814    :param options: a dictionary of additional options (optional)
815    :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
816             (comments will be empty currently)
817    """
818    extensions: t.Dict[t.Type[Extension], None] = {}
819
820    for extension_name in options.get("extensions", "").split(","):
821        extension_name = extension_name.strip()
822
823        if not extension_name:
824            continue
825
826        extensions[import_string(extension_name)] = None
827
828    if InternationalizationExtension not in extensions:
829        extensions[InternationalizationExtension] = None
830
831    def getbool(options: t.Mapping[str, str], key: str, default: bool = False) -> bool:
832        return options.get(key, str(default)).lower() in {"1", "on", "yes", "true"}
833
834    silent = getbool(options, "silent", True)
835    environment = Environment(
836        options.get("block_start_string", defaults.BLOCK_START_STRING),
837        options.get("block_end_string", defaults.BLOCK_END_STRING),
838        options.get("variable_start_string", defaults.VARIABLE_START_STRING),
839        options.get("variable_end_string", defaults.VARIABLE_END_STRING),
840        options.get("comment_start_string", defaults.COMMENT_START_STRING),
841        options.get("comment_end_string", defaults.COMMENT_END_STRING),
842        options.get("line_statement_prefix") or defaults.LINE_STATEMENT_PREFIX,
843        options.get("line_comment_prefix") or defaults.LINE_COMMENT_PREFIX,
844        getbool(options, "trim_blocks", defaults.TRIM_BLOCKS),
845        getbool(options, "lstrip_blocks", defaults.LSTRIP_BLOCKS),
846        defaults.NEWLINE_SEQUENCE,
847        getbool(options, "keep_trailing_newline", defaults.KEEP_TRAILING_NEWLINE),
848        tuple(extensions),
849        cache_size=0,
850        auto_reload=False,
851    )
852
853    if getbool(options, "trimmed"):
854        environment.policies["ext.i18n.trimmed"] = True
855    if getbool(options, "newstyle_gettext"):
856        environment.newstyle_gettext = True  # type: ignore
857
858    source = fileobj.read().decode(options.get("encoding", "utf-8"))
859    try:
860        node = environment.parse(source)
861        tokens = list(environment.lex(environment.preprocess(source)))
862    except TemplateSyntaxError:
863        if not silent:
864            raise
865        # skip templates with syntax errors
866        return
867
868    finder = _CommentFinder(tokens, comment_tags)
869    for lineno, func, message in extract_from_ast(node, keywords):
870        yield lineno, func, message, finder.find_comments(lineno)
871
872
873#: nicer import names
874i18n = InternationalizationExtension
875do = ExprStmtExtension
876loopcontrols = LoopControlExtension
877with_ = WithExtension
878autoescape = AutoEscapeExtension
879debug = DebugExtension
880