1"""
2    pygments.formatters.latex
3    ~~~~~~~~~~~~~~~~~~~~~~~~~
4
5    Formatter for LaTeX fancyvrb output.
6
7    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11from io import StringIO
12
13from pygments.formatter import Formatter
14from pygments.lexer import Lexer, do_insertions
15from pygments.token import Token, STANDARD_TYPES
16from pygments.util import get_bool_opt, get_int_opt
17
18
19__all__ = ['LatexFormatter']
20
21
22def escape_tex(text, commandprefix):
23    return text.replace('\\', '\x00'). \
24                replace('{', '\x01'). \
25                replace('}', '\x02'). \
26                replace('\x00', r'\%sZbs{}' % commandprefix). \
27                replace('\x01', r'\%sZob{}' % commandprefix). \
28                replace('\x02', r'\%sZcb{}' % commandprefix). \
29                replace('^', r'\%sZca{}' % commandprefix). \
30                replace('_', r'\%sZus{}' % commandprefix). \
31                replace('&', r'\%sZam{}' % commandprefix). \
32                replace('<', r'\%sZlt{}' % commandprefix). \
33                replace('>', r'\%sZgt{}' % commandprefix). \
34                replace('#', r'\%sZsh{}' % commandprefix). \
35                replace('%', r'\%sZpc{}' % commandprefix). \
36                replace('$', r'\%sZdl{}' % commandprefix). \
37                replace('-', r'\%sZhy{}' % commandprefix). \
38                replace("'", r'\%sZsq{}' % commandprefix). \
39                replace('"', r'\%sZdq{}' % commandprefix). \
40                replace('~', r'\%sZti{}' % commandprefix)
41
42
43DOC_TEMPLATE = r'''
44\documentclass{%(docclass)s}
45\usepackage{fancyvrb}
46\usepackage{color}
47\usepackage[%(encoding)s]{inputenc}
48%(preamble)s
49
50%(styledefs)s
51
52\begin{document}
53
54\section*{%(title)s}
55
56%(code)s
57\end{document}
58'''
59
60## Small explanation of the mess below :)
61#
62# The previous version of the LaTeX formatter just assigned a command to
63# each token type defined in the current style.  That obviously is
64# problematic if the highlighted code is produced for a different style
65# than the style commands themselves.
66#
67# This version works much like the HTML formatter which assigns multiple
68# CSS classes to each <span> tag, from the most specific to the least
69# specific token type, thus falling back to the parent token type if one
70# is not defined.  Here, the classes are there too and use the same short
71# forms given in token.STANDARD_TYPES.
72#
73# Highlighted code now only uses one custom command, which by default is
74# \PY and selectable by the commandprefix option (and in addition the
75# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for
76# backwards compatibility purposes).
77#
78# \PY has two arguments: the classes, separated by +, and the text to
79# render in that style.  The classes are resolved into the respective
80# style commands by magic, which serves to ignore unknown classes.
81#
82# The magic macros are:
83# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text
84#   to render in \PY@do.  Their definition determines the style.
85# * \PY@reset resets \PY@it etc. to do nothing.
86# * \PY@toks parses the list of classes, using magic inspired by the
87#   keyval package (but modified to use plusses instead of commas
88#   because fancyvrb redefines commas inside its environments).
89# * \PY@tok processes one class, calling the \PY@tok@classname command
90#   if it exists.
91# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style
92#   for its class.
93# * \PY resets the style, parses the classnames and then calls \PY@do.
94#
95# Tip: to read this code, print it out in substituted form using e.g.
96# >>> print STYLE_TEMPLATE % {'cp': 'PY'}
97
98STYLE_TEMPLATE = r'''
99\makeatletter
100\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%
101    \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%
102    \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}
103\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}
104\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%
105    \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}
106\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%
107    \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}
108\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}
109
110%(styles)s
111
112\def\%(cp)sZbs{\char`\\}
113\def\%(cp)sZus{\char`\_}
114\def\%(cp)sZob{\char`\{}
115\def\%(cp)sZcb{\char`\}}
116\def\%(cp)sZca{\char`\^}
117\def\%(cp)sZam{\char`\&}
118\def\%(cp)sZlt{\char`\<}
119\def\%(cp)sZgt{\char`\>}
120\def\%(cp)sZsh{\char`\#}
121\def\%(cp)sZpc{\char`\%%}
122\def\%(cp)sZdl{\char`\$}
123\def\%(cp)sZhy{\char`\-}
124\def\%(cp)sZsq{\char`\'}
125\def\%(cp)sZdq{\char`\"}
126\def\%(cp)sZti{\char`\~}
127%% for compatibility with earlier versions
128\def\%(cp)sZat{@}
129\def\%(cp)sZlb{[}
130\def\%(cp)sZrb{]}
131\makeatother
132'''
133
134
135def _get_ttype_name(ttype):
136    fname = STANDARD_TYPES.get(ttype)
137    if fname:
138        return fname
139    aname = ''
140    while fname is None:
141        aname = ttype[-1] + aname
142        ttype = ttype.parent
143        fname = STANDARD_TYPES.get(ttype)
144    return fname + aname
145
146
147class LatexFormatter(Formatter):
148    r"""
149    Format tokens as LaTeX code. This needs the `fancyvrb` and `color`
150    standard packages.
151
152    Without the `full` option, code is formatted as one ``Verbatim``
153    environment, like this:
154
155    .. sourcecode:: latex
156
157        \begin{Verbatim}[commandchars=\\\{\}]
158        \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):
159            \PY{k}{pass}
160        \end{Verbatim}
161
162    The special command used here (``\PY``) and all the other macros it needs
163    are output by the `get_style_defs` method.
164
165    With the `full` option, a complete LaTeX document is output, including
166    the command definitions in the preamble.
167
168    The `get_style_defs()` method of a `LatexFormatter` returns a string
169    containing ``\def`` commands defining the macros needed inside the
170    ``Verbatim`` environments.
171
172    Additional options accepted:
173
174    `style`
175        The style to use, can be a string or a Style subclass (default:
176        ``'default'``).
177
178    `full`
179        Tells the formatter to output a "full" document, i.e. a complete
180        self-contained document (default: ``False``).
181
182    `title`
183        If `full` is true, the title that should be used to caption the
184        document (default: ``''``).
185
186    `docclass`
187        If the `full` option is enabled, this is the document class to use
188        (default: ``'article'``).
189
190    `preamble`
191        If the `full` option is enabled, this can be further preamble commands,
192        e.g. ``\usepackage`` (default: ``''``).
193
194    `linenos`
195        If set to ``True``, output line numbers (default: ``False``).
196
197    `linenostart`
198        The line number for the first line (default: ``1``).
199
200    `linenostep`
201        If set to a number n > 1, only every nth line number is printed.
202
203    `verboptions`
204        Additional options given to the Verbatim environment (see the *fancyvrb*
205        docs for possible values) (default: ``''``).
206
207    `commandprefix`
208        The LaTeX commands used to produce colored output are constructed
209        using this prefix and some letters (default: ``'PY'``).
210
211        .. versionadded:: 0.7
212        .. versionchanged:: 0.10
213           The default is now ``'PY'`` instead of ``'C'``.
214
215    `texcomments`
216        If set to ``True``, enables LaTeX comment lines.  That is, LaTex markup
217        in comment tokens is not escaped so that LaTeX can render it (default:
218        ``False``).
219
220        .. versionadded:: 1.2
221
222    `mathescape`
223        If set to ``True``, enables LaTeX math mode escape in comments. That
224        is, ``'$...$'`` inside a comment will trigger math mode (default:
225        ``False``).
226
227        .. versionadded:: 1.2
228
229    `escapeinside`
230        If set to a string of length 2, enables escaping to LaTeX. Text
231        delimited by these 2 characters is read as LaTeX code and
232        typeset accordingly. It has no effect in string literals. It has
233        no effect in comments if `texcomments` or `mathescape` is
234        set. (default: ``''``).
235
236        .. versionadded:: 2.0
237
238    `envname`
239        Allows you to pick an alternative environment name replacing Verbatim.
240        The alternate environment still has to support Verbatim's option syntax.
241        (default: ``'Verbatim'``).
242
243        .. versionadded:: 2.0
244    """
245    name = 'LaTeX'
246    aliases = ['latex', 'tex']
247    filenames = ['*.tex']
248
249    def __init__(self, **options):
250        Formatter.__init__(self, **options)
251        self.docclass = options.get('docclass', 'article')
252        self.preamble = options.get('preamble', '')
253        self.linenos = get_bool_opt(options, 'linenos', False)
254        self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
255        self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
256        self.verboptions = options.get('verboptions', '')
257        self.nobackground = get_bool_opt(options, 'nobackground', False)
258        self.commandprefix = options.get('commandprefix', 'PY')
259        self.texcomments = get_bool_opt(options, 'texcomments', False)
260        self.mathescape = get_bool_opt(options, 'mathescape', False)
261        self.escapeinside = options.get('escapeinside', '')
262        if len(self.escapeinside) == 2:
263            self.left = self.escapeinside[0]
264            self.right = self.escapeinside[1]
265        else:
266            self.escapeinside = ''
267        self.envname = options.get('envname', 'Verbatim')
268
269        self._create_stylesheet()
270
271    def _create_stylesheet(self):
272        t2n = self.ttype2name = {Token: ''}
273        c2d = self.cmd2def = {}
274        cp = self.commandprefix
275
276        def rgbcolor(col):
277            if col:
278                return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0)
279                                 for i in (0, 2, 4)])
280            else:
281                return '1,1,1'
282
283        for ttype, ndef in self.style:
284            name = _get_ttype_name(ttype)
285            cmndef = ''
286            if ndef['bold']:
287                cmndef += r'\let\$$@bf=\textbf'
288            if ndef['italic']:
289                cmndef += r'\let\$$@it=\textit'
290            if ndef['underline']:
291                cmndef += r'\let\$$@ul=\underline'
292            if ndef['roman']:
293                cmndef += r'\let\$$@ff=\textrm'
294            if ndef['sans']:
295                cmndef += r'\let\$$@ff=\textsf'
296            if ndef['mono']:
297                cmndef += r'\let\$$@ff=\textsf'
298            if ndef['color']:
299                cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' %
300                           rgbcolor(ndef['color']))
301            if ndef['border']:
302                cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}'
303                           r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' %
304                           (rgbcolor(ndef['border']),
305                            rgbcolor(ndef['bgcolor'])))
306            elif ndef['bgcolor']:
307                cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}'
308                           r'\colorbox[rgb]{%s}{\strut ##1}}}' %
309                           rgbcolor(ndef['bgcolor']))
310            if cmndef == '':
311                continue
312            cmndef = cmndef.replace('$$', cp)
313            t2n[ttype] = name
314            c2d[name] = cmndef
315
316    def get_style_defs(self, arg=''):
317        """
318        Return the command sequences needed to define the commands
319        used to format text in the verbatim environment. ``arg`` is ignored.
320        """
321        cp = self.commandprefix
322        styles = []
323        for name, definition in self.cmd2def.items():
324            styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition))
325        return STYLE_TEMPLATE % {'cp': self.commandprefix,
326                                 'styles': '\n'.join(styles)}
327
328    def format_unencoded(self, tokensource, outfile):
329        # TODO: add support for background colors
330        t2n = self.ttype2name
331        cp = self.commandprefix
332
333        if self.full:
334            realoutfile = outfile
335            outfile = StringIO()
336
337        outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')
338        if self.linenos:
339            start, step = self.linenostart, self.linenostep
340            outfile.write(',numbers=left' +
341                          (start and ',firstnumber=%d' % start or '') +
342                          (step and ',stepnumber=%d' % step or ''))
343        if self.mathescape or self.texcomments or self.escapeinside:
344            outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7'
345                          '\\catcode`\\_=8\\relax}')
346        if self.verboptions:
347            outfile.write(',' + self.verboptions)
348        outfile.write(']\n')
349
350        for ttype, value in tokensource:
351            if ttype in Token.Comment:
352                if self.texcomments:
353                    # Try to guess comment starting lexeme and escape it ...
354                    start = value[0:1]
355                    for i in range(1, len(value)):
356                        if start[0] != value[i]:
357                            break
358                        start += value[i]
359
360                    value = value[len(start):]
361                    start = escape_tex(start, cp)
362
363                    # ... but do not escape inside comment.
364                    value = start + value
365                elif self.mathescape:
366                    # Only escape parts not inside a math environment.
367                    parts = value.split('$')
368                    in_math = False
369                    for i, part in enumerate(parts):
370                        if not in_math:
371                            parts[i] = escape_tex(part, cp)
372                        in_math = not in_math
373                    value = '$'.join(parts)
374                elif self.escapeinside:
375                    text = value
376                    value = ''
377                    while text:
378                        a, sep1, text = text.partition(self.left)
379                        if sep1:
380                            b, sep2, text = text.partition(self.right)
381                            if sep2:
382                                value += escape_tex(a, cp) + b
383                            else:
384                                value += escape_tex(a + sep1 + b, cp)
385                        else:
386                            value += escape_tex(a, cp)
387                else:
388                    value = escape_tex(value, cp)
389            elif ttype not in Token.Escape:
390                value = escape_tex(value, cp)
391            styles = []
392            while ttype is not Token:
393                try:
394                    styles.append(t2n[ttype])
395                except KeyError:
396                    # not in current style
397                    styles.append(_get_ttype_name(ttype))
398                ttype = ttype.parent
399            styleval = '+'.join(reversed(styles))
400            if styleval:
401                spl = value.split('\n')
402                for line in spl[:-1]:
403                    if line:
404                        outfile.write("\\%s{%s}{%s}" % (cp, styleval, line))
405                    outfile.write('\n')
406                if spl[-1]:
407                    outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1]))
408            else:
409                outfile.write(value)
410
411        outfile.write('\\end{' + self.envname + '}\n')
412
413        if self.full:
414            encoding = self.encoding or 'utf8'
415            # map known existings encodings from LaTeX distribution
416            encoding = {
417                'utf_8': 'utf8',
418                'latin_1': 'latin1',
419                'iso_8859_1': 'latin1',
420            }.get(encoding.replace('-', '_'), encoding)
421            realoutfile.write(DOC_TEMPLATE %
422                dict(docclass  = self.docclass,
423                     preamble  = self.preamble,
424                     title     = self.title,
425                     encoding  = encoding,
426                     styledefs = self.get_style_defs(),
427                     code      = outfile.getvalue()))
428
429
430class LatexEmbeddedLexer(Lexer):
431    """
432    This lexer takes one lexer as argument, the lexer for the language
433    being formatted, and the left and right delimiters for escaped text.
434
435    First everything is scanned using the language lexer to obtain
436    strings and comments. All other consecutive tokens are merged and
437    the resulting text is scanned for escaped segments, which are given
438    the Token.Escape type. Finally text that is not escaped is scanned
439    again with the language lexer.
440    """
441    def __init__(self, left, right, lang, **options):
442        self.left = left
443        self.right = right
444        self.lang = lang
445        Lexer.__init__(self, **options)
446
447    def get_tokens_unprocessed(self, text):
448        # find and remove all the escape tokens (replace with an empty string)
449        # this is very similar to DelegatingLexer.get_tokens_unprocessed.
450        buffered = ''
451        insertions = []
452        insertion_buf = []
453        for i, t, v in self._find_safe_escape_tokens(text):
454            if t is None:
455                if insertion_buf:
456                    insertions.append((len(buffered), insertion_buf))
457                    insertion_buf = []
458                buffered += v
459            else:
460                insertion_buf.append((i, t, v))
461        if insertion_buf:
462            insertions.append((len(buffered), insertion_buf))
463        return do_insertions(insertions,
464                             self.lang.get_tokens_unprocessed(buffered))
465
466    def _find_safe_escape_tokens(self, text):
467        """ find escape tokens that are not in strings or comments """
468        for i, t, v in self._filter_to(
469            self.lang.get_tokens_unprocessed(text),
470            lambda t: t in Token.Comment or t in Token.String
471        ):
472            if t is None:
473                for i2, t2, v2 in self._find_escape_tokens(v):
474                    yield i + i2, t2, v2
475            else:
476                yield i, None, v
477
478    def _filter_to(self, it, pred):
479        """ Keep only the tokens that match `pred`, merge the others together """
480        buf = ''
481        idx = 0
482        for i, t, v in it:
483            if pred(t):
484                if buf:
485                    yield idx, None, buf
486                    buf = ''
487                yield i, t, v
488            else:
489                if not buf:
490                    idx = i
491                buf += v
492        if buf:
493            yield idx, None, buf
494
495    def _find_escape_tokens(self, text):
496        """ Find escape tokens within text, give token=None otherwise """
497        index = 0
498        while text:
499            a, sep1, text = text.partition(self.left)
500            if a:
501                yield index, None, a
502                index += len(a)
503            if sep1:
504                b, sep2, text = text.partition(self.right)
505                if sep2:
506                    yield index + len(sep1), Token.Escape, b
507                    index += len(sep1) + len(b) + len(sep2)
508                else:
509                    yield index, Token.Error, sep1
510                    index += len(sep1)
511                    text = b
512