1"""
2CodeHilite Extension for Python-Markdown
3========================================
4
5Adds code/syntax highlighting to standard Python-Markdown code blocks.
6
7See <https://pythonhosted.org/Markdown/extensions/code_hilite.html>
8for documentation.
9
10Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
11
12All changes Copyright 2008-2014 The Python Markdown Project
13
14License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
15
16"""
17
18from __future__ import absolute_import
19from __future__ import unicode_literals
20from . import Extension
21from ..treeprocessors import Treeprocessor
22
23try:
24    from pygments import highlight
25    from pygments.lexers import get_lexer_by_name, guess_lexer
26    from pygments.formatters import get_formatter_by_name
27    pygments = True
28except ImportError:
29    pygments = False
30
31
32def parse_hl_lines(expr):
33    """Support our syntax for emphasizing certain lines of code.
34
35    expr should be like '1 2' to emphasize lines 1 and 2 of a code block.
36    Returns a list of ints, the line numbers to emphasize.
37    """
38    if not expr:
39        return []
40
41    try:
42        return list(map(int, expr.split()))
43    except ValueError:
44        return []
45
46
47# ------------------ The Main CodeHilite Class ----------------------
48class CodeHilite(object):
49    """
50    Determine language of source code, and pass it into pygments hilighter.
51
52    Basic Usage:
53        >>> code = CodeHilite(src = 'some text')
54        >>> html = code.hilite()
55
56    * src: Source string or any object with a .readline attribute.
57
58    * linenums: (Boolean) Set line numbering to 'on' (True),
59      'off' (False) or 'auto'(None). Set to 'auto' by default.
60
61    * guess_lang: (Boolean) Turn language auto-detection
62      'on' or 'off' (on by default).
63
64    * css_class: Set class name of wrapper div ('codehilite' by default).
65
66    * hl_lines: (List of integers) Lines to emphasize, 1-indexed.
67
68    Low Level Usage:
69        >>> code = CodeHilite()
70        >>> code.src = 'some text' # String or anything with a .readline attr.
71        >>> code.linenos = True  # Turns line numbering on or of.
72        >>> html = code.hilite()
73
74    """
75
76    def __init__(self, src=None, linenums=None, guess_lang=True,
77                 css_class="codehilite", lang=None, style='default',
78                 noclasses=False, tab_length=4, hl_lines=None, use_pygments=True):
79        self.src = src
80        self.lang = lang
81        self.linenums = linenums
82        self.guess_lang = guess_lang
83        self.css_class = css_class
84        self.style = style
85        self.noclasses = noclasses
86        self.tab_length = tab_length
87        self.hl_lines = hl_lines or []
88        self.use_pygments = use_pygments
89
90    def hilite(self):
91        """
92        Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
93        optional line numbers. The output should then be styled with css to
94        your liking. No styles are applied by default - only styling hooks
95        (i.e.: <span class="k">).
96
97        returns : A string of html.
98
99        """
100
101        self.src = self.src.strip('\n')
102
103        if self.lang is None:
104            self._parseHeader()
105
106        if pygments and self.use_pygments:
107            try:
108                lexer = get_lexer_by_name(self.lang)
109            except ValueError:
110                try:
111                    if self.guess_lang:
112                        lexer = guess_lexer(self.src)
113                    else:
114                        lexer = get_lexer_by_name('text')
115                except ValueError:
116                    lexer = get_lexer_by_name('text')
117            formatter = get_formatter_by_name('html',
118                                              linenos=self.linenums,
119                                              cssclass=self.css_class,
120                                              style=self.style,
121                                              noclasses=self.noclasses,
122                                              hl_lines=self.hl_lines)
123            return highlight(self.src, lexer, formatter)
124        else:
125            # just escape and build markup usable by JS highlighting libs
126            txt = self.src.replace('&', '&amp;')
127            txt = txt.replace('<', '&lt;')
128            txt = txt.replace('>', '&gt;')
129            txt = txt.replace('"', '&quot;')
130            classes = []
131            if self.lang:
132                classes.append('language-%s' % self.lang)
133            if self.linenums:
134                classes.append('linenums')
135            class_str = ''
136            if classes:
137                class_str = ' class="%s"' % ' '.join(classes)
138            return '<pre class="%s"><code%s>%s</code></pre>\n' % \
139                   (self.css_class, class_str, txt)
140
141    def _parseHeader(self):
142        """
143        Determines language of a code block from shebang line and whether said
144        line should be removed or left in place. If the sheband line contains a
145        path (even a single /) then it is assumed to be a real shebang line and
146        left alone. However, if no path is given (e.i.: #!python or :::python)
147        then it is assumed to be a mock shebang for language identifitation of
148        a code fragment and removed from the code block prior to processing for
149        code highlighting. When a mock shebang (e.i: #!python) is found, line
150        numbering is turned on. When colons are found in place of a shebang
151        (e.i.: :::python), line numbering is left in the current state - off
152        by default.
153
154        Also parses optional list of highlight lines, like:
155
156            :::python hl_lines="1 3"
157        """
158
159        import re
160
161        # split text into lines
162        lines = self.src.split("\n")
163        # pull first line to examine
164        fl = lines.pop(0)
165
166        c = re.compile(r'''
167            (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons
168            (?P<path>(?:/\w+)*[/ ])?        # Zero or 1 path
169            (?P<lang>[\w+-]*)               # The language
170            \s*                             # Arbitrary whitespace
171            # Optional highlight lines, single- or double-quote-delimited
172            (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?
173            ''',  re.VERBOSE)
174        # search first line for shebang
175        m = c.search(fl)
176        if m:
177            # we have a match
178            try:
179                self.lang = m.group('lang').lower()
180            except IndexError:
181                self.lang = None
182            if m.group('path'):
183                # path exists - restore first line
184                lines.insert(0, fl)
185            if self.linenums is None and m.group('shebang'):
186                # Overridable and Shebang exists - use line numbers
187                self.linenums = True
188
189            self.hl_lines = parse_hl_lines(m.group('hl_lines'))
190        else:
191            # No match
192            lines.insert(0, fl)
193
194        self.src = "\n".join(lines).strip("\n")
195
196
197# ------------------ The Markdown Extension -------------------------------
198
199
200class HiliteTreeprocessor(Treeprocessor):
201    """ Hilight source code in code blocks. """
202
203    def run(self, root):
204        """ Find code blocks and store in htmlStash. """
205        blocks = root.iter('pre')
206        for block in blocks:
207            if len(block) == 1 and block[0].tag == 'code':
208                code = CodeHilite(
209                    block[0].text,
210                    linenums=self.config['linenums'],
211                    guess_lang=self.config['guess_lang'],
212                    css_class=self.config['css_class'],
213                    style=self.config['pygments_style'],
214                    noclasses=self.config['noclasses'],
215                    tab_length=self.markdown.tab_length,
216                    use_pygments=self.config['use_pygments']
217                )
218                placeholder = self.markdown.htmlStash.store(code.hilite(),
219                                                            safe=True)
220                # Clear codeblock in etree instance
221                block.clear()
222                # Change to p element which will later
223                # be removed when inserting raw html
224                block.tag = 'p'
225                block.text = placeholder
226
227
228class CodeHiliteExtension(Extension):
229    """ Add source code hilighting to markdown codeblocks. """
230
231    def __init__(self, *args, **kwargs):
232        # define default configs
233        self.config = {
234            'linenums': [None,
235                         "Use lines numbers. True=yes, False=no, None=auto"],
236            'guess_lang': [True,
237                           "Automatic language detection - Default: True"],
238            'css_class': ["codehilite",
239                          "Set class name for wrapper <div> - "
240                          "Default: codehilite"],
241            'pygments_style': ['default',
242                               'Pygments HTML Formatter Style '
243                               '(Colorscheme) - Default: default'],
244            'noclasses': [False,
245                          'Use inline styles instead of CSS classes - '
246                          'Default false'],
247            'use_pygments': [True,
248                             'Use Pygments to Highlight code blocks. '
249                             'Disable if using a JavaScript library. '
250                             'Default: True']
251            }
252
253        super(CodeHiliteExtension, self).__init__(*args, **kwargs)
254
255    def extendMarkdown(self, md, md_globals):
256        """ Add HilitePostprocessor to Markdown instance. """
257        hiliter = HiliteTreeprocessor(md)
258        hiliter.config = self.getConfigs()
259        md.treeprocessors.add("hilite", hiliter, "<inline")
260
261        md.registerExtension(self)
262
263
264def makeExtension(*args, **kwargs):
265    return CodeHiliteExtension(*args, **kwargs)
266