1"""
2    sphinx.highlighting
3    ~~~~~~~~~~~~~~~~~~~
4
5    Highlight code blocks using Pygments.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11from functools import partial
12from importlib import import_module
13from typing import Any, Dict
14
15from pygments import highlight
16from pygments.filters import ErrorToken
17from pygments.formatter import Formatter
18from pygments.formatters import HtmlFormatter, LatexFormatter
19from pygments.lexer import Lexer
20from pygments.lexers import (CLexer, Python3Lexer, PythonConsoleLexer, PythonLexer, RstLexer,
21                             TextLexer, get_lexer_by_name, guess_lexer)
22from pygments.style import Style
23from pygments.styles import get_style_by_name
24from pygments.util import ClassNotFound
25
26from sphinx.locale import __
27from sphinx.pygments_styles import NoneStyle, SphinxStyle
28from sphinx.util import logging, texescape
29
30logger = logging.getLogger(__name__)
31
32lexers = {}  # type: Dict[str, Lexer]
33lexer_classes = {
34    'none': partial(TextLexer, stripnl=False),
35    'python': partial(PythonLexer, stripnl=False),
36    'python3': partial(Python3Lexer, stripnl=False),
37    'pycon': partial(PythonConsoleLexer, stripnl=False),
38    'pycon3': partial(PythonConsoleLexer, python3=True, stripnl=False),
39    'rest': partial(RstLexer, stripnl=False),
40    'c': partial(CLexer, stripnl=False),
41}  # type: Dict[str, Lexer]
42
43
44escape_hl_chars = {ord('\\'): '\\PYGZbs{}',
45                   ord('{'): '\\PYGZob{}',
46                   ord('}'): '\\PYGZcb{}'}
47
48# used if Pygments is available
49# use textcomp quote to get a true single quote
50_LATEX_ADD_STYLES = r'''
51\renewcommand\PYGZsq{\textquotesingle}
52'''
53
54
55class PygmentsBridge:
56    # Set these attributes if you want to have different Pygments formatters
57    # than the default ones.
58    html_formatter = HtmlFormatter
59    latex_formatter = LatexFormatter
60
61    def __init__(self, dest: str = 'html', stylename: str = 'sphinx',
62                 latex_engine: str = None) -> None:
63        self.dest = dest
64        self.latex_engine = latex_engine
65
66        style = self.get_style(stylename)
67        self.formatter_args = {'style': style}  # type: Dict[str, Any]
68        if dest == 'html':
69            self.formatter = self.html_formatter
70        else:
71            self.formatter = self.latex_formatter
72            self.formatter_args['commandprefix'] = 'PYG'
73
74    def get_style(self, stylename: str) -> Style:
75        if stylename is None or stylename == 'sphinx':
76            return SphinxStyle
77        elif stylename == 'none':
78            return NoneStyle
79        elif '.' in stylename:
80            module, stylename = stylename.rsplit('.', 1)
81            return getattr(import_module(module), stylename)
82        else:
83            return get_style_by_name(stylename)
84
85    def get_formatter(self, **kwargs: Any) -> Formatter:
86        kwargs.update(self.formatter_args)
87        return self.formatter(**kwargs)
88
89    def get_lexer(self, source: str, lang: str, opts: Dict = None,
90                  force: bool = False, location: Any = None) -> Lexer:
91        if not opts:
92            opts = {}
93
94        # find out which lexer to use
95        if lang in ('py', 'python'):
96            if source.startswith('>>>'):
97                # interactive session
98                lang = 'pycon'
99            else:
100                lang = 'python'
101        elif lang in ('py3', 'python3', 'default'):
102            if source.startswith('>>>'):
103                lang = 'pycon3'
104            else:
105                lang = 'python3'
106
107        if lang in lexers:
108            # just return custom lexers here (without installing raiseonerror filter)
109            return lexers[lang]
110        elif lang in lexer_classes:
111            lexer = lexer_classes[lang](**opts)
112        else:
113            try:
114                if lang == 'guess':
115                    lexer = guess_lexer(source, **opts)
116                else:
117                    lexer = get_lexer_by_name(lang, **opts)
118            except ClassNotFound:
119                logger.warning(__('Pygments lexer name %r is not known'), lang,
120                               location=location)
121                lexer = lexer_classes['none'](**opts)
122
123        if not force:
124            lexer.add_filter('raiseonerror')
125
126        return lexer
127
128    def highlight_block(self, source: str, lang: str, opts: Dict = None,
129                        force: bool = False, location: Any = None, **kwargs: Any) -> str:
130        if not isinstance(source, str):
131            source = source.decode()
132
133        lexer = self.get_lexer(source, lang, opts, force, location)
134
135        # highlight via Pygments
136        formatter = self.get_formatter(**kwargs)
137        try:
138            hlsource = highlight(source, lexer, formatter)
139        except ErrorToken:
140            # this is most probably not the selected language,
141            # so let it pass unhighlighted
142            if lang == 'default':
143                pass  # automatic highlighting failed.
144            else:
145                logger.warning(__('Could not lex literal_block as "%s". '
146                                  'Highlighting skipped.'), lang,
147                               type='misc', subtype='highlighting_failure',
148                               location=location)
149            lexer = self.get_lexer(source, 'none', opts, force, location)
150            hlsource = highlight(source, lexer, formatter)
151
152        if self.dest == 'html':
153            return hlsource
154        else:
155            # MEMO: this is done to escape Unicode chars with non-Unicode engines
156            return texescape.hlescape(hlsource, self.latex_engine)
157
158    def get_stylesheet(self) -> str:
159        formatter = self.get_formatter()
160        if self.dest == 'html':
161            return formatter.get_style_defs('.highlight')
162        else:
163            return formatter.get_style_defs() + _LATEX_ADD_STYLES
164