1""" 2Footnotes Extension for Python-Markdown 3======================================= 4 5Adds footnote handling to Python-Markdown. 6 7See <https://Python-Markdown.github.io/extensions/footnotes> 8for documentation. 9 10Copyright The Python Markdown Project 11 12License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 13 14""" 15 16from __future__ import absolute_import 17from __future__ import unicode_literals 18from . import Extension 19from ..preprocessors import Preprocessor 20from ..inlinepatterns import Pattern 21from ..treeprocessors import Treeprocessor 22from ..postprocessors import Postprocessor 23from .. import util 24from ..odict import OrderedDict 25import re 26import copy 27 28FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX 29NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX 30DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)') 31TABBED_RE = re.compile(r'((\t)|( ))(.*)') 32RE_REF_ID = re.compile(r'(fnref)(\d+)') 33 34 35class FootnoteExtension(Extension): 36 """ Footnote Extension. """ 37 38 def __init__(self, *args, **kwargs): 39 """ Setup configs. """ 40 41 self.config = { 42 'PLACE_MARKER': 43 ["///Footnotes Go Here///", 44 "The text string that marks where the footnotes go"], 45 'UNIQUE_IDS': 46 [False, 47 "Avoid name collisions across " 48 "multiple calls to reset()."], 49 "BACKLINK_TEXT": 50 ["↩", 51 "The text string that links from the footnote " 52 "to the reader's place."], 53 "BACKLINK_TITLE": 54 ["Jump back to footnote %d in the text", 55 "The text string used for the title HTML attribute " 56 "of the backlink. %d will be replaced by the " 57 "footnote number."] 58 } 59 super(FootnoteExtension, self).__init__(*args, **kwargs) 60 61 # In multiple invocations, emit links that don't get tangled. 62 self.unique_prefix = 0 63 self.found_refs = {} 64 self.used_refs = set() 65 66 self.reset() 67 68 def extendMarkdown(self, md, md_globals): 69 """ Add pieces to Markdown. """ 70 md.registerExtension(self) 71 self.parser = md.parser 72 self.md = md 73 # Insert a preprocessor before ReferencePreprocessor 74 md.preprocessors.add( 75 "footnote", FootnotePreprocessor(self), "<reference" 76 ) 77 # Insert an inline pattern before ImageReferencePattern 78 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah 79 md.inlinePatterns.add( 80 "footnote", FootnotePattern(FOOTNOTE_RE, self), "<reference" 81 ) 82 # Insert a tree-processor that would actually add the footnote div 83 # This must be before all other treeprocessors (i.e., inline and 84 # codehilite) so they can run on the the contents of the div. 85 md.treeprocessors.add( 86 "footnote", FootnoteTreeprocessor(self), "_begin" 87 ) 88 89 # Insert a tree-processor that will run after inline is done. 90 # In this tree-processor we want to check our duplicate footnote tracker 91 # And add additional backrefs to the footnote pointing back to the 92 # duplicated references. 93 md.treeprocessors.add( 94 "footnote-duplicate", FootnotePostTreeprocessor(self), '>inline' 95 ) 96 97 # Insert a postprocessor after amp_substitute oricessor 98 md.postprocessors.add( 99 "footnote", FootnotePostprocessor(self), ">amp_substitute" 100 ) 101 102 def reset(self): 103 """ Clear footnotes on reset, and prepare for distinct document. """ 104 self.footnotes = OrderedDict() 105 self.unique_prefix += 1 106 self.found_refs = {} 107 self.used_refs = set() 108 109 def unique_ref(self, reference, found=False): 110 """ Get a unique reference if there are duplicates. """ 111 if not found: 112 return reference 113 114 original_ref = reference 115 while reference in self.used_refs: 116 ref, rest = reference.split(self.get_separator(), 1) 117 m = RE_REF_ID.match(ref) 118 if m: 119 reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest) 120 else: 121 reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest) 122 123 self.used_refs.add(reference) 124 if original_ref in self.found_refs: 125 self.found_refs[original_ref] += 1 126 else: 127 self.found_refs[original_ref] = 1 128 return reference 129 130 def findFootnotesPlaceholder(self, root): 131 """ Return ElementTree Element that contains Footnote placeholder. """ 132 def finder(element): 133 for child in element: 134 if child.text: 135 if child.text.find(self.getConfig("PLACE_MARKER")) > -1: 136 return child, element, True 137 if child.tail: 138 if child.tail.find(self.getConfig("PLACE_MARKER")) > -1: 139 return child, element, False 140 child_res = finder(child) 141 if child_res is not None: 142 return child_res 143 return None 144 145 res = finder(root) 146 return res 147 148 def setFootnote(self, id, text): 149 """ Store a footnote for later retrieval. """ 150 self.footnotes[id] = text 151 152 def get_separator(self): 153 if self.md.output_format in ['html5', 'xhtml5']: 154 return '-' 155 return ':' 156 157 def makeFootnoteId(self, id): 158 """ Return footnote link id. """ 159 if self.getConfig("UNIQUE_IDS"): 160 return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) 161 else: 162 return 'fn%s%s' % (self.get_separator(), id) 163 164 def makeFootnoteRefId(self, id, found=False): 165 """ Return footnote back-link id. """ 166 if self.getConfig("UNIQUE_IDS"): 167 return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) 168 else: 169 return self.unique_ref('fnref%s%s' % (self.get_separator(), id), found) 170 171 def makeFootnotesDiv(self, root): 172 """ Return div of footnotes as et Element. """ 173 174 if not list(self.footnotes.keys()): 175 return None 176 177 div = util.etree.Element("div") 178 div.set('class', 'footnote') 179 util.etree.SubElement(div, "hr") 180 ol = util.etree.SubElement(div, "ol") 181 surrogate_parent = util.etree.Element("div") 182 183 for id in self.footnotes.keys(): 184 li = util.etree.SubElement(ol, "li") 185 li.set("id", self.makeFootnoteId(id)) 186 # Parse footnote with surrogate parent as li cannot be used. 187 # List block handlers have special logic to deal with li. 188 # When we are done parsing, we will copy everything over to li. 189 self.parser.parseChunk(surrogate_parent, self.footnotes[id]) 190 for el in list(surrogate_parent): 191 li.append(el) 192 surrogate_parent.remove(el) 193 backlink = util.etree.Element("a") 194 backlink.set("href", "#" + self.makeFootnoteRefId(id)) 195 if self.md.output_format not in ['html5', 'xhtml5']: 196 backlink.set("rev", "footnote") # Invalid in HTML5 197 backlink.set("class", "footnote-backref") 198 backlink.set( 199 "title", 200 self.getConfig("BACKLINK_TITLE") % 201 (self.footnotes.index(id)+1) 202 ) 203 backlink.text = FN_BACKLINK_TEXT 204 205 if len(li): 206 node = li[-1] 207 if node.tag == "p": 208 node.text = node.text + NBSP_PLACEHOLDER 209 node.append(backlink) 210 else: 211 p = util.etree.SubElement(li, "p") 212 p.append(backlink) 213 return div 214 215 216class FootnotePreprocessor(Preprocessor): 217 """ Find all footnote references and store for later use. """ 218 219 def __init__(self, footnotes): 220 self.footnotes = footnotes 221 222 def run(self, lines): 223 """ 224 Loop through lines and find, set, and remove footnote definitions. 225 226 Keywords: 227 228 * lines: A list of lines of text 229 230 Return: A list of lines of text with footnote definitions removed. 231 232 """ 233 newlines = [] 234 i = 0 235 while True: 236 m = DEF_RE.match(lines[i]) 237 if m: 238 fn, _i = self.detectTabbed(lines[i+1:]) 239 fn.insert(0, m.group(2)) 240 i += _i-1 # skip past footnote 241 footnote = "\n".join(fn) 242 self.footnotes.setFootnote(m.group(1), footnote.rstrip()) 243 # Preserve a line for each block to prevent raw HTML indexing issue. 244 # https://github.com/Python-Markdown/markdown/issues/584 245 num_blocks = (len(footnote.split('\n\n')) * 2) 246 newlines.extend([''] * (num_blocks)) 247 else: 248 newlines.append(lines[i]) 249 if len(lines) > i+1: 250 i += 1 251 else: 252 break 253 return newlines 254 255 def detectTabbed(self, lines): 256 """ Find indented text and remove indent before further proccesing. 257 258 Keyword arguments: 259 260 * lines: an array of strings 261 262 Returns: a list of post processed items and the index of last line. 263 264 """ 265 items = [] 266 blank_line = False # have we encountered a blank line yet? 267 i = 0 # to keep track of where we are 268 269 def detab(line): 270 match = TABBED_RE.match(line) 271 if match: 272 return match.group(4) 273 274 for line in lines: 275 if line.strip(): # Non-blank line 276 detabbed_line = detab(line) 277 if detabbed_line: 278 items.append(detabbed_line) 279 i += 1 280 continue 281 elif not blank_line and not DEF_RE.match(line): 282 # not tabbed but still part of first par. 283 items.append(line) 284 i += 1 285 continue 286 else: 287 return items, i+1 288 289 else: # Blank line: _maybe_ we are done. 290 blank_line = True 291 i += 1 # advance 292 293 # Find the next non-blank line 294 for j in range(i, len(lines)): 295 if lines[j].strip(): 296 next_line = lines[j] 297 break 298 else: 299 # Include extreaneous padding to prevent raw HTML 300 # parsing issue: https://github.com/Python-Markdown/markdown/issues/584 301 items.append("") 302 i += 1 303 else: 304 break # There is no more text; we are done. 305 306 # Check if the next non-blank line is tabbed 307 if detab(next_line): # Yes, more work to do. 308 items.append("") 309 continue 310 else: 311 break # No, we are done. 312 else: 313 i += 1 314 315 return items, i 316 317 318class FootnotePattern(Pattern): 319 """ InlinePattern for footnote markers in a document's body text. """ 320 321 def __init__(self, pattern, footnotes): 322 super(FootnotePattern, self).__init__(pattern) 323 self.footnotes = footnotes 324 325 def handleMatch(self, m): 326 id = m.group(2) 327 if id in self.footnotes.footnotes.keys(): 328 sup = util.etree.Element("sup") 329 a = util.etree.SubElement(sup, "a") 330 sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) 331 a.set('href', '#' + self.footnotes.makeFootnoteId(id)) 332 if self.footnotes.md.output_format not in ['html5', 'xhtml5']: 333 a.set('rel', 'footnote') # invalid in HTML5 334 a.set('class', 'footnote-ref') 335 a.text = util.text_type(self.footnotes.footnotes.index(id) + 1) 336 return sup 337 else: 338 return None 339 340 341class FootnotePostTreeprocessor(Treeprocessor): 342 """ Ammend footnote div with duplicates. """ 343 344 def __init__(self, footnotes): 345 self.footnotes = footnotes 346 347 def add_duplicates(self, li, duplicates): 348 """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """ 349 for link in li.iter('a'): 350 # Find the link that needs to be duplicated. 351 if link.attrib.get('class', '') == 'footnote-backref': 352 ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1) 353 # Duplicate link the number of times we need to 354 # and point the to the appropriate references. 355 links = [] 356 for index in range(2, duplicates + 1): 357 sib_link = copy.deepcopy(link) 358 sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest) 359 links.append(sib_link) 360 self.offset += 1 361 # Add all the new duplicate links. 362 el = list(li)[-1] 363 for l in links: 364 el.append(l) 365 break 366 367 def get_num_duplicates(self, li): 368 """ Get the number of duplicate refs of the footnote. """ 369 fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1) 370 link_id = '%sref%s%s' % (fn, self.footnotes.get_separator(), rest) 371 return self.footnotes.found_refs.get(link_id, 0) 372 373 def handle_duplicates(self, parent): 374 """ Find duplicate footnotes and format and add the duplicates. """ 375 for li in list(parent): 376 # Check number of duplicates footnotes and insert 377 # additional links if needed. 378 count = self.get_num_duplicates(li) 379 if count > 1: 380 self.add_duplicates(li, count) 381 382 def run(self, root): 383 """ Crawl the footnote div and add missing duplicate footnotes. """ 384 self.offset = 0 385 for div in root.iter('div'): 386 if div.attrib.get('class', '') == 'footnote': 387 # Footnotes shoul be under the first orderd list under 388 # the footnote div. So once we find it, quit. 389 for ol in div.iter('ol'): 390 self.handle_duplicates(ol) 391 break 392 393 394class FootnoteTreeprocessor(Treeprocessor): 395 """ Build and append footnote div to end of document. """ 396 397 def __init__(self, footnotes): 398 self.footnotes = footnotes 399 400 def run(self, root): 401 footnotesDiv = self.footnotes.makeFootnotesDiv(root) 402 if footnotesDiv is not None: 403 result = self.footnotes.findFootnotesPlaceholder(root) 404 if result: 405 child, parent, isText = result 406 ind = list(parent).index(child) 407 if isText: 408 parent.remove(child) 409 parent.insert(ind, footnotesDiv) 410 else: 411 parent.insert(ind + 1, footnotesDiv) 412 child.tail = None 413 else: 414 root.append(footnotesDiv) 415 416 417class FootnotePostprocessor(Postprocessor): 418 """ Replace placeholders with html entities. """ 419 def __init__(self, footnotes): 420 self.footnotes = footnotes 421 422 def run(self, text): 423 text = text.replace( 424 FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT") 425 ) 426 return text.replace(NBSP_PLACEHOLDER, " ") 427 428 429def makeExtension(*args, **kwargs): 430 """ Return an instance of the FootnoteExtension """ 431 return FootnoteExtension(*args, **kwargs) 432