1""" 2CodeHilite Extension for Python-Markdown 3======================================== 4 5Adds code/syntax highlighting to standard Python-Markdown code blocks. 6 7See <https://pythonhosted.org/Markdown/extensions/code_hilite.html> 8for documentation. 9 10Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). 11 12All changes Copyright 2008-2014 The Python Markdown Project 13 14License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 16""" 17 18from __future__ import absolute_import 19from __future__ import unicode_literals 20from . import Extension 21from ..treeprocessors import Treeprocessor 22 23try: 24 from pygments import highlight 25 from pygments.lexers import get_lexer_by_name, guess_lexer 26 from pygments.formatters import get_formatter_by_name 27 pygments = True 28except ImportError: 29 pygments = False 30 31 32def parse_hl_lines(expr): 33 """Support our syntax for emphasizing certain lines of code. 34 35 expr should be like '1 2' to emphasize lines 1 and 2 of a code block. 36 Returns a list of ints, the line numbers to emphasize. 37 """ 38 if not expr: 39 return [] 40 41 try: 42 return list(map(int, expr.split())) 43 except ValueError: 44 return [] 45 46 47# ------------------ The Main CodeHilite Class ---------------------- 48class CodeHilite(object): 49 """ 50 Determine language of source code, and pass it into pygments hilighter. 51 52 Basic Usage: 53 >>> code = CodeHilite(src = 'some text') 54 >>> html = code.hilite() 55 56 * src: Source string or any object with a .readline attribute. 57 58 * linenums: (Boolean) Set line numbering to 'on' (True), 59 'off' (False) or 'auto'(None). Set to 'auto' by default. 60 61 * guess_lang: (Boolean) Turn language auto-detection 62 'on' or 'off' (on by default). 63 64 * css_class: Set class name of wrapper div ('codehilite' by default). 65 66 * hl_lines: (List of integers) Lines to emphasize, 1-indexed. 67 68 Low Level Usage: 69 >>> code = CodeHilite() 70 >>> code.src = 'some text' # String or anything with a .readline attr. 71 >>> code.linenos = True # Turns line numbering on or of. 72 >>> html = code.hilite() 73 74 """ 75 76 def __init__(self, src=None, linenums=None, guess_lang=True, 77 css_class="codehilite", lang=None, style='default', 78 noclasses=False, tab_length=4, hl_lines=None, use_pygments=True): 79 self.src = src 80 self.lang = lang 81 self.linenums = linenums 82 self.guess_lang = guess_lang 83 self.css_class = css_class 84 self.style = style 85 self.noclasses = noclasses 86 self.tab_length = tab_length 87 self.hl_lines = hl_lines or [] 88 self.use_pygments = use_pygments 89 90 def hilite(self): 91 """ 92 Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with 93 optional line numbers. The output should then be styled with css to 94 your liking. No styles are applied by default - only styling hooks 95 (i.e.: <span class="k">). 96 97 returns : A string of html. 98 99 """ 100 101 self.src = self.src.strip('\n') 102 103 if self.lang is None: 104 self._parseHeader() 105 106 if pygments and self.use_pygments: 107 try: 108 lexer = get_lexer_by_name(self.lang) 109 except ValueError: 110 try: 111 if self.guess_lang: 112 lexer = guess_lexer(self.src) 113 else: 114 lexer = get_lexer_by_name('text') 115 except ValueError: 116 lexer = get_lexer_by_name('text') 117 formatter = get_formatter_by_name('html', 118 linenos=self.linenums, 119 cssclass=self.css_class, 120 style=self.style, 121 noclasses=self.noclasses, 122 hl_lines=self.hl_lines) 123 return highlight(self.src, lexer, formatter) 124 else: 125 # just escape and build markup usable by JS highlighting libs 126 txt = self.src.replace('&', '&') 127 txt = txt.replace('<', '<') 128 txt = txt.replace('>', '>') 129 txt = txt.replace('"', '"') 130 classes = [] 131 if self.lang: 132 classes.append('language-%s' % self.lang) 133 if self.linenums: 134 classes.append('linenums') 135 class_str = '' 136 if classes: 137 class_str = ' class="%s"' % ' '.join(classes) 138 return '<pre class="%s"><code%s>%s</code></pre>\n' % \ 139 (self.css_class, class_str, txt) 140 141 def _parseHeader(self): 142 """ 143 Determines language of a code block from shebang line and whether said 144 line should be removed or left in place. If the sheband line contains a 145 path (even a single /) then it is assumed to be a real shebang line and 146 left alone. However, if no path is given (e.i.: #!python or :::python) 147 then it is assumed to be a mock shebang for language identifitation of 148 a code fragment and removed from the code block prior to processing for 149 code highlighting. When a mock shebang (e.i: #!python) is found, line 150 numbering is turned on. When colons are found in place of a shebang 151 (e.i.: :::python), line numbering is left in the current state - off 152 by default. 153 154 Also parses optional list of highlight lines, like: 155 156 :::python hl_lines="1 3" 157 """ 158 159 import re 160 161 # split text into lines 162 lines = self.src.split("\n") 163 # pull first line to examine 164 fl = lines.pop(0) 165 166 c = re.compile(r''' 167 (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons 168 (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path 169 (?P<lang>[\w+-]*) # The language 170 \s* # Arbitrary whitespace 171 # Optional highlight lines, single- or double-quote-delimited 172 (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? 173 ''', re.VERBOSE) 174 # search first line for shebang 175 m = c.search(fl) 176 if m: 177 # we have a match 178 try: 179 self.lang = m.group('lang').lower() 180 except IndexError: 181 self.lang = None 182 if m.group('path'): 183 # path exists - restore first line 184 lines.insert(0, fl) 185 if self.linenums is None and m.group('shebang'): 186 # Overridable and Shebang exists - use line numbers 187 self.linenums = True 188 189 self.hl_lines = parse_hl_lines(m.group('hl_lines')) 190 else: 191 # No match 192 lines.insert(0, fl) 193 194 self.src = "\n".join(lines).strip("\n") 195 196 197# ------------------ The Markdown Extension ------------------------------- 198 199 200class HiliteTreeprocessor(Treeprocessor): 201 """ Hilight source code in code blocks. """ 202 203 def run(self, root): 204 """ Find code blocks and store in htmlStash. """ 205 blocks = root.iter('pre') 206 for block in blocks: 207 if len(block) == 1 and block[0].tag == 'code': 208 code = CodeHilite( 209 block[0].text, 210 linenums=self.config['linenums'], 211 guess_lang=self.config['guess_lang'], 212 css_class=self.config['css_class'], 213 style=self.config['pygments_style'], 214 noclasses=self.config['noclasses'], 215 tab_length=self.markdown.tab_length, 216 use_pygments=self.config['use_pygments'] 217 ) 218 placeholder = self.markdown.htmlStash.store(code.hilite(), 219 safe=True) 220 # Clear codeblock in etree instance 221 block.clear() 222 # Change to p element which will later 223 # be removed when inserting raw html 224 block.tag = 'p' 225 block.text = placeholder 226 227 228class CodeHiliteExtension(Extension): 229 """ Add source code hilighting to markdown codeblocks. """ 230 231 def __init__(self, *args, **kwargs): 232 # define default configs 233 self.config = { 234 'linenums': [None, 235 "Use lines numbers. True=yes, False=no, None=auto"], 236 'guess_lang': [True, 237 "Automatic language detection - Default: True"], 238 'css_class': ["codehilite", 239 "Set class name for wrapper <div> - " 240 "Default: codehilite"], 241 'pygments_style': ['default', 242 'Pygments HTML Formatter Style ' 243 '(Colorscheme) - Default: default'], 244 'noclasses': [False, 245 'Use inline styles instead of CSS classes - ' 246 'Default false'], 247 'use_pygments': [True, 248 'Use Pygments to Highlight code blocks. ' 249 'Disable if using a JavaScript library. ' 250 'Default: True'] 251 } 252 253 super(CodeHiliteExtension, self).__init__(*args, **kwargs) 254 255 def extendMarkdown(self, md, md_globals): 256 """ Add HilitePostprocessor to Markdown instance. """ 257 hiliter = HiliteTreeprocessor(md) 258 hiliter.config = self.getConfigs() 259 md.treeprocessors.add("hilite", hiliter, "<inline") 260 261 md.registerExtension(self) 262 263 264def makeExtension(*args, **kwargs): 265 return CodeHiliteExtension(*args, **kwargs) 266