1""" 2 pygments.formatters.latex 3 ~~~~~~~~~~~~~~~~~~~~~~~~~ 4 5 Formatter for LaTeX fancyvrb output. 6 7 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11from io import StringIO 12 13from pygments.formatter import Formatter 14from pygments.lexer import Lexer, do_insertions 15from pygments.token import Token, STANDARD_TYPES 16from pygments.util import get_bool_opt, get_int_opt 17 18 19__all__ = ['LatexFormatter'] 20 21 22def escape_tex(text, commandprefix): 23 return text.replace('\\', '\x00'). \ 24 replace('{', '\x01'). \ 25 replace('}', '\x02'). \ 26 replace('\x00', r'\%sZbs{}' % commandprefix). \ 27 replace('\x01', r'\%sZob{}' % commandprefix). \ 28 replace('\x02', r'\%sZcb{}' % commandprefix). \ 29 replace('^', r'\%sZca{}' % commandprefix). \ 30 replace('_', r'\%sZus{}' % commandprefix). \ 31 replace('&', r'\%sZam{}' % commandprefix). \ 32 replace('<', r'\%sZlt{}' % commandprefix). \ 33 replace('>', r'\%sZgt{}' % commandprefix). \ 34 replace('#', r'\%sZsh{}' % commandprefix). \ 35 replace('%', r'\%sZpc{}' % commandprefix). \ 36 replace('$', r'\%sZdl{}' % commandprefix). \ 37 replace('-', r'\%sZhy{}' % commandprefix). \ 38 replace("'", r'\%sZsq{}' % commandprefix). \ 39 replace('"', r'\%sZdq{}' % commandprefix). \ 40 replace('~', r'\%sZti{}' % commandprefix) 41 42 43DOC_TEMPLATE = r''' 44\documentclass{%(docclass)s} 45\usepackage{fancyvrb} 46\usepackage{color} 47\usepackage[%(encoding)s]{inputenc} 48%(preamble)s 49 50%(styledefs)s 51 52\begin{document} 53 54\section*{%(title)s} 55 56%(code)s 57\end{document} 58''' 59 60## Small explanation of the mess below :) 61# 62# The previous version of the LaTeX formatter just assigned a command to 63# each token type defined in the current style. That obviously is 64# problematic if the highlighted code is produced for a different style 65# than the style commands themselves. 66# 67# This version works much like the HTML formatter which assigns multiple 68# CSS classes to each <span> tag, from the most specific to the least 69# specific token type, thus falling back to the parent token type if one 70# is not defined. Here, the classes are there too and use the same short 71# forms given in token.STANDARD_TYPES. 72# 73# Highlighted code now only uses one custom command, which by default is 74# \PY and selectable by the commandprefix option (and in addition the 75# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for 76# backwards compatibility purposes). 77# 78# \PY has two arguments: the classes, separated by +, and the text to 79# render in that style. The classes are resolved into the respective 80# style commands by magic, which serves to ignore unknown classes. 81# 82# The magic macros are: 83# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text 84# to render in \PY@do. Their definition determines the style. 85# * \PY@reset resets \PY@it etc. to do nothing. 86# * \PY@toks parses the list of classes, using magic inspired by the 87# keyval package (but modified to use plusses instead of commas 88# because fancyvrb redefines commas inside its environments). 89# * \PY@tok processes one class, calling the \PY@tok@classname command 90# if it exists. 91# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style 92# for its class. 93# * \PY resets the style, parses the classnames and then calls \PY@do. 94# 95# Tip: to read this code, print it out in substituted form using e.g. 96# >>> print STYLE_TEMPLATE % {'cp': 'PY'} 97 98STYLE_TEMPLATE = r''' 99\makeatletter 100\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% 101 \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% 102 \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} 103\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} 104\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% 105 \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} 106\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% 107 \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} 108\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} 109 110%(styles)s 111 112\def\%(cp)sZbs{\char`\\} 113\def\%(cp)sZus{\char`\_} 114\def\%(cp)sZob{\char`\{} 115\def\%(cp)sZcb{\char`\}} 116\def\%(cp)sZca{\char`\^} 117\def\%(cp)sZam{\char`\&} 118\def\%(cp)sZlt{\char`\<} 119\def\%(cp)sZgt{\char`\>} 120\def\%(cp)sZsh{\char`\#} 121\def\%(cp)sZpc{\char`\%%} 122\def\%(cp)sZdl{\char`\$} 123\def\%(cp)sZhy{\char`\-} 124\def\%(cp)sZsq{\char`\'} 125\def\%(cp)sZdq{\char`\"} 126\def\%(cp)sZti{\char`\~} 127%% for compatibility with earlier versions 128\def\%(cp)sZat{@} 129\def\%(cp)sZlb{[} 130\def\%(cp)sZrb{]} 131\makeatother 132''' 133 134 135def _get_ttype_name(ttype): 136 fname = STANDARD_TYPES.get(ttype) 137 if fname: 138 return fname 139 aname = '' 140 while fname is None: 141 aname = ttype[-1] + aname 142 ttype = ttype.parent 143 fname = STANDARD_TYPES.get(ttype) 144 return fname + aname 145 146 147class LatexFormatter(Formatter): 148 r""" 149 Format tokens as LaTeX code. This needs the `fancyvrb` and `color` 150 standard packages. 151 152 Without the `full` option, code is formatted as one ``Verbatim`` 153 environment, like this: 154 155 .. sourcecode:: latex 156 157 \begin{Verbatim}[commandchars=\\\{\}] 158 \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): 159 \PY{k}{pass} 160 \end{Verbatim} 161 162 The special command used here (``\PY``) and all the other macros it needs 163 are output by the `get_style_defs` method. 164 165 With the `full` option, a complete LaTeX document is output, including 166 the command definitions in the preamble. 167 168 The `get_style_defs()` method of a `LatexFormatter` returns a string 169 containing ``\def`` commands defining the macros needed inside the 170 ``Verbatim`` environments. 171 172 Additional options accepted: 173 174 `style` 175 The style to use, can be a string or a Style subclass (default: 176 ``'default'``). 177 178 `full` 179 Tells the formatter to output a "full" document, i.e. a complete 180 self-contained document (default: ``False``). 181 182 `title` 183 If `full` is true, the title that should be used to caption the 184 document (default: ``''``). 185 186 `docclass` 187 If the `full` option is enabled, this is the document class to use 188 (default: ``'article'``). 189 190 `preamble` 191 If the `full` option is enabled, this can be further preamble commands, 192 e.g. ``\usepackage`` (default: ``''``). 193 194 `linenos` 195 If set to ``True``, output line numbers (default: ``False``). 196 197 `linenostart` 198 The line number for the first line (default: ``1``). 199 200 `linenostep` 201 If set to a number n > 1, only every nth line number is printed. 202 203 `verboptions` 204 Additional options given to the Verbatim environment (see the *fancyvrb* 205 docs for possible values) (default: ``''``). 206 207 `commandprefix` 208 The LaTeX commands used to produce colored output are constructed 209 using this prefix and some letters (default: ``'PY'``). 210 211 .. versionadded:: 0.7 212 .. versionchanged:: 0.10 213 The default is now ``'PY'`` instead of ``'C'``. 214 215 `texcomments` 216 If set to ``True``, enables LaTeX comment lines. That is, LaTex markup 217 in comment tokens is not escaped so that LaTeX can render it (default: 218 ``False``). 219 220 .. versionadded:: 1.2 221 222 `mathescape` 223 If set to ``True``, enables LaTeX math mode escape in comments. That 224 is, ``'$...$'`` inside a comment will trigger math mode (default: 225 ``False``). 226 227 .. versionadded:: 1.2 228 229 `escapeinside` 230 If set to a string of length 2, enables escaping to LaTeX. Text 231 delimited by these 2 characters is read as LaTeX code and 232 typeset accordingly. It has no effect in string literals. It has 233 no effect in comments if `texcomments` or `mathescape` is 234 set. (default: ``''``). 235 236 .. versionadded:: 2.0 237 238 `envname` 239 Allows you to pick an alternative environment name replacing Verbatim. 240 The alternate environment still has to support Verbatim's option syntax. 241 (default: ``'Verbatim'``). 242 243 .. versionadded:: 2.0 244 """ 245 name = 'LaTeX' 246 aliases = ['latex', 'tex'] 247 filenames = ['*.tex'] 248 249 def __init__(self, **options): 250 Formatter.__init__(self, **options) 251 self.docclass = options.get('docclass', 'article') 252 self.preamble = options.get('preamble', '') 253 self.linenos = get_bool_opt(options, 'linenos', False) 254 self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) 255 self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) 256 self.verboptions = options.get('verboptions', '') 257 self.nobackground = get_bool_opt(options, 'nobackground', False) 258 self.commandprefix = options.get('commandprefix', 'PY') 259 self.texcomments = get_bool_opt(options, 'texcomments', False) 260 self.mathescape = get_bool_opt(options, 'mathescape', False) 261 self.escapeinside = options.get('escapeinside', '') 262 if len(self.escapeinside) == 2: 263 self.left = self.escapeinside[0] 264 self.right = self.escapeinside[1] 265 else: 266 self.escapeinside = '' 267 self.envname = options.get('envname', 'Verbatim') 268 269 self._create_stylesheet() 270 271 def _create_stylesheet(self): 272 t2n = self.ttype2name = {Token: ''} 273 c2d = self.cmd2def = {} 274 cp = self.commandprefix 275 276 def rgbcolor(col): 277 if col: 278 return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0) 279 for i in (0, 2, 4)]) 280 else: 281 return '1,1,1' 282 283 for ttype, ndef in self.style: 284 name = _get_ttype_name(ttype) 285 cmndef = '' 286 if ndef['bold']: 287 cmndef += r'\let\$$@bf=\textbf' 288 if ndef['italic']: 289 cmndef += r'\let\$$@it=\textit' 290 if ndef['underline']: 291 cmndef += r'\let\$$@ul=\underline' 292 if ndef['roman']: 293 cmndef += r'\let\$$@ff=\textrm' 294 if ndef['sans']: 295 cmndef += r'\let\$$@ff=\textsf' 296 if ndef['mono']: 297 cmndef += r'\let\$$@ff=\textsf' 298 if ndef['color']: 299 cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % 300 rgbcolor(ndef['color'])) 301 if ndef['border']: 302 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}' 303 r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' % 304 (rgbcolor(ndef['border']), 305 rgbcolor(ndef['bgcolor']))) 306 elif ndef['bgcolor']: 307 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}' 308 r'\colorbox[rgb]{%s}{\strut ##1}}}' % 309 rgbcolor(ndef['bgcolor'])) 310 if cmndef == '': 311 continue 312 cmndef = cmndef.replace('$$', cp) 313 t2n[ttype] = name 314 c2d[name] = cmndef 315 316 def get_style_defs(self, arg=''): 317 """ 318 Return the command sequences needed to define the commands 319 used to format text in the verbatim environment. ``arg`` is ignored. 320 """ 321 cp = self.commandprefix 322 styles = [] 323 for name, definition in self.cmd2def.items(): 324 styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition)) 325 return STYLE_TEMPLATE % {'cp': self.commandprefix, 326 'styles': '\n'.join(styles)} 327 328 def format_unencoded(self, tokensource, outfile): 329 # TODO: add support for background colors 330 t2n = self.ttype2name 331 cp = self.commandprefix 332 333 if self.full: 334 realoutfile = outfile 335 outfile = StringIO() 336 337 outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}') 338 if self.linenos: 339 start, step = self.linenostart, self.linenostep 340 outfile.write(',numbers=left' + 341 (start and ',firstnumber=%d' % start or '') + 342 (step and ',stepnumber=%d' % step or '')) 343 if self.mathescape or self.texcomments or self.escapeinside: 344 outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7' 345 '\\catcode`\\_=8\\relax}') 346 if self.verboptions: 347 outfile.write(',' + self.verboptions) 348 outfile.write(']\n') 349 350 for ttype, value in tokensource: 351 if ttype in Token.Comment: 352 if self.texcomments: 353 # Try to guess comment starting lexeme and escape it ... 354 start = value[0:1] 355 for i in range(1, len(value)): 356 if start[0] != value[i]: 357 break 358 start += value[i] 359 360 value = value[len(start):] 361 start = escape_tex(start, cp) 362 363 # ... but do not escape inside comment. 364 value = start + value 365 elif self.mathescape: 366 # Only escape parts not inside a math environment. 367 parts = value.split('$') 368 in_math = False 369 for i, part in enumerate(parts): 370 if not in_math: 371 parts[i] = escape_tex(part, cp) 372 in_math = not in_math 373 value = '$'.join(parts) 374 elif self.escapeinside: 375 text = value 376 value = '' 377 while text: 378 a, sep1, text = text.partition(self.left) 379 if sep1: 380 b, sep2, text = text.partition(self.right) 381 if sep2: 382 value += escape_tex(a, cp) + b 383 else: 384 value += escape_tex(a + sep1 + b, cp) 385 else: 386 value += escape_tex(a, cp) 387 else: 388 value = escape_tex(value, cp) 389 elif ttype not in Token.Escape: 390 value = escape_tex(value, cp) 391 styles = [] 392 while ttype is not Token: 393 try: 394 styles.append(t2n[ttype]) 395 except KeyError: 396 # not in current style 397 styles.append(_get_ttype_name(ttype)) 398 ttype = ttype.parent 399 styleval = '+'.join(reversed(styles)) 400 if styleval: 401 spl = value.split('\n') 402 for line in spl[:-1]: 403 if line: 404 outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) 405 outfile.write('\n') 406 if spl[-1]: 407 outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) 408 else: 409 outfile.write(value) 410 411 outfile.write('\\end{' + self.envname + '}\n') 412 413 if self.full: 414 encoding = self.encoding or 'utf8' 415 # map known existings encodings from LaTeX distribution 416 encoding = { 417 'utf_8': 'utf8', 418 'latin_1': 'latin1', 419 'iso_8859_1': 'latin1', 420 }.get(encoding.replace('-', '_'), encoding) 421 realoutfile.write(DOC_TEMPLATE % 422 dict(docclass = self.docclass, 423 preamble = self.preamble, 424 title = self.title, 425 encoding = encoding, 426 styledefs = self.get_style_defs(), 427 code = outfile.getvalue())) 428 429 430class LatexEmbeddedLexer(Lexer): 431 """ 432 This lexer takes one lexer as argument, the lexer for the language 433 being formatted, and the left and right delimiters for escaped text. 434 435 First everything is scanned using the language lexer to obtain 436 strings and comments. All other consecutive tokens are merged and 437 the resulting text is scanned for escaped segments, which are given 438 the Token.Escape type. Finally text that is not escaped is scanned 439 again with the language lexer. 440 """ 441 def __init__(self, left, right, lang, **options): 442 self.left = left 443 self.right = right 444 self.lang = lang 445 Lexer.__init__(self, **options) 446 447 def get_tokens_unprocessed(self, text): 448 # find and remove all the escape tokens (replace with an empty string) 449 # this is very similar to DelegatingLexer.get_tokens_unprocessed. 450 buffered = '' 451 insertions = [] 452 insertion_buf = [] 453 for i, t, v in self._find_safe_escape_tokens(text): 454 if t is None: 455 if insertion_buf: 456 insertions.append((len(buffered), insertion_buf)) 457 insertion_buf = [] 458 buffered += v 459 else: 460 insertion_buf.append((i, t, v)) 461 if insertion_buf: 462 insertions.append((len(buffered), insertion_buf)) 463 return do_insertions(insertions, 464 self.lang.get_tokens_unprocessed(buffered)) 465 466 def _find_safe_escape_tokens(self, text): 467 """ find escape tokens that are not in strings or comments """ 468 for i, t, v in self._filter_to( 469 self.lang.get_tokens_unprocessed(text), 470 lambda t: t in Token.Comment or t in Token.String 471 ): 472 if t is None: 473 for i2, t2, v2 in self._find_escape_tokens(v): 474 yield i + i2, t2, v2 475 else: 476 yield i, None, v 477 478 def _filter_to(self, it, pred): 479 """ Keep only the tokens that match `pred`, merge the others together """ 480 buf = '' 481 idx = 0 482 for i, t, v in it: 483 if pred(t): 484 if buf: 485 yield idx, None, buf 486 buf = '' 487 yield i, t, v 488 else: 489 if not buf: 490 idx = i 491 buf += v 492 if buf: 493 yield idx, None, buf 494 495 def _find_escape_tokens(self, text): 496 """ Find escape tokens within text, give token=None otherwise """ 497 index = 0 498 while text: 499 a, sep1, text = text.partition(self.left) 500 if a: 501 yield index, None, a 502 index += len(a) 503 if sep1: 504 b, sep2, text = text.partition(self.right) 505 if sep2: 506 yield index + len(sep1), Token.Escape, b 507 index += len(sep1) + len(b) + len(sep2) 508 else: 509 yield index, Token.Error, sep1 510 index += len(sep1) 511 text = b 512