1""" 2Footnotes Extension for Python-Markdown 3======================================= 4 5Adds footnote handling to Python-Markdown. 6 7See <https://Python-Markdown.github.io/extensions/footnotes> 8for documentation. 9 10Copyright The Python Markdown Project 11 12License: [BSD](https://opensource.org/licenses/bsd-license.php) 13 14""" 15 16from . import Extension 17from ..blockprocessors import BlockProcessor 18from ..inlinepatterns import InlineProcessor 19from ..treeprocessors import Treeprocessor 20from ..postprocessors import Postprocessor 21from .. import util 22from collections import OrderedDict 23import re 24import copy 25import xml.etree.ElementTree as etree 26 27FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX 28NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX 29RE_REF_ID = re.compile(r'(fnref)(\d+)') 30 31 32class FootnoteExtension(Extension): 33 """ Footnote Extension. """ 34 35 def __init__(self, **kwargs): 36 """ Setup configs. """ 37 38 self.config = { 39 'PLACE_MARKER': 40 ["///Footnotes Go Here///", 41 "The text string that marks where the footnotes go"], 42 'UNIQUE_IDS': 43 [False, 44 "Avoid name collisions across " 45 "multiple calls to reset()."], 46 "BACKLINK_TEXT": 47 ["↩", 48 "The text string that links from the footnote " 49 "to the reader's place."], 50 "BACKLINK_TITLE": 51 ["Jump back to footnote %d in the text", 52 "The text string used for the title HTML attribute " 53 "of the backlink. %d will be replaced by the " 54 "footnote number."], 55 "SEPARATOR": 56 [":", 57 "Footnote separator."] 58 } 59 super().__init__(**kwargs) 60 61 # In multiple invocations, emit links that don't get tangled. 62 self.unique_prefix = 0 63 self.found_refs = {} 64 self.used_refs = set() 65 66 self.reset() 67 68 def extendMarkdown(self, md): 69 """ Add pieces to Markdown. """ 70 md.registerExtension(self) 71 self.parser = md.parser 72 self.md = md 73 # Insert a blockprocessor before ReferencePreprocessor 74 md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17) 75 76 # Insert an inline pattern before ImageReferencePattern 77 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah 78 md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175) 79 # Insert a tree-processor that would actually add the footnote div 80 # This must be before all other treeprocessors (i.e., inline and 81 # codehilite) so they can run on the the contents of the div. 82 md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50) 83 84 # Insert a tree-processor that will run after inline is done. 85 # In this tree-processor we want to check our duplicate footnote tracker 86 # And add additional backrefs to the footnote pointing back to the 87 # duplicated references. 88 md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15) 89 90 # Insert a postprocessor after amp_substitute processor 91 md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25) 92 93 def reset(self): 94 """ Clear footnotes on reset, and prepare for distinct document. """ 95 self.footnotes = OrderedDict() 96 self.unique_prefix += 1 97 self.found_refs = {} 98 self.used_refs = set() 99 100 def unique_ref(self, reference, found=False): 101 """ Get a unique reference if there are duplicates. """ 102 if not found: 103 return reference 104 105 original_ref = reference 106 while reference in self.used_refs: 107 ref, rest = reference.split(self.get_separator(), 1) 108 m = RE_REF_ID.match(ref) 109 if m: 110 reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest) 111 else: 112 reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest) 113 114 self.used_refs.add(reference) 115 if original_ref in self.found_refs: 116 self.found_refs[original_ref] += 1 117 else: 118 self.found_refs[original_ref] = 1 119 return reference 120 121 def findFootnotesPlaceholder(self, root): 122 """ Return ElementTree Element that contains Footnote placeholder. """ 123 def finder(element): 124 for child in element: 125 if child.text: 126 if child.text.find(self.getConfig("PLACE_MARKER")) > -1: 127 return child, element, True 128 if child.tail: 129 if child.tail.find(self.getConfig("PLACE_MARKER")) > -1: 130 return child, element, False 131 child_res = finder(child) 132 if child_res is not None: 133 return child_res 134 return None 135 136 res = finder(root) 137 return res 138 139 def setFootnote(self, id, text): 140 """ Store a footnote for later retrieval. """ 141 self.footnotes[id] = text 142 143 def get_separator(self): 144 """ Get the footnote separator. """ 145 return self.getConfig("SEPARATOR") 146 147 def makeFootnoteId(self, id): 148 """ Return footnote link id. """ 149 if self.getConfig("UNIQUE_IDS"): 150 return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) 151 else: 152 return 'fn{}{}'.format(self.get_separator(), id) 153 154 def makeFootnoteRefId(self, id, found=False): 155 """ Return footnote back-link id. """ 156 if self.getConfig("UNIQUE_IDS"): 157 return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) 158 else: 159 return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found) 160 161 def makeFootnotesDiv(self, root): 162 """ Return div of footnotes as et Element. """ 163 164 if not list(self.footnotes.keys()): 165 return None 166 167 div = etree.Element("div") 168 div.set('class', 'footnote') 169 etree.SubElement(div, "hr") 170 ol = etree.SubElement(div, "ol") 171 surrogate_parent = etree.Element("div") 172 173 for index, id in enumerate(self.footnotes.keys(), start=1): 174 li = etree.SubElement(ol, "li") 175 li.set("id", self.makeFootnoteId(id)) 176 # Parse footnote with surrogate parent as li cannot be used. 177 # List block handlers have special logic to deal with li. 178 # When we are done parsing, we will copy everything over to li. 179 self.parser.parseChunk(surrogate_parent, self.footnotes[id]) 180 for el in list(surrogate_parent): 181 li.append(el) 182 surrogate_parent.remove(el) 183 backlink = etree.Element("a") 184 backlink.set("href", "#" + self.makeFootnoteRefId(id)) 185 backlink.set("class", "footnote-backref") 186 backlink.set( 187 "title", 188 self.getConfig("BACKLINK_TITLE") % (index) 189 ) 190 backlink.text = FN_BACKLINK_TEXT 191 192 if len(li): 193 node = li[-1] 194 if node.tag == "p": 195 node.text = node.text + NBSP_PLACEHOLDER 196 node.append(backlink) 197 else: 198 p = etree.SubElement(li, "p") 199 p.append(backlink) 200 return div 201 202 203class FootnoteBlockProcessor(BlockProcessor): 204 """ Find all footnote references and store for later use. """ 205 206 RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE) 207 208 def __init__(self, footnotes): 209 super().__init__(footnotes.parser) 210 self.footnotes = footnotes 211 212 def test(self, parent, block): 213 return True 214 215 def run(self, parent, blocks): 216 """ Find, set, and remove footnote definitions. """ 217 block = blocks.pop(0) 218 m = self.RE.search(block) 219 if m: 220 id = m.group(1) 221 fn_blocks = [m.group(2)] 222 223 # Handle rest of block 224 therest = block[m.end():].lstrip('\n') 225 m2 = self.RE.search(therest) 226 if m2: 227 # Another footnote exists in the rest of this block. 228 # Any content before match is continuation of this footnote, which may be lazily indented. 229 before = therest[:m2.start()].rstrip('\n') 230 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n') 231 # Add back to blocks everything from begining of match forward for next iteration. 232 blocks.insert(0, therest[m2.start():]) 233 else: 234 # All remaining lines of block are continuation of this footnote, which may be lazily indented. 235 fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n') 236 237 # Check for child elements in remaining blocks. 238 fn_blocks.extend(self.detectTabbed(blocks)) 239 240 footnote = "\n\n".join(fn_blocks) 241 self.footnotes.setFootnote(id, footnote.rstrip()) 242 243 if block[:m.start()].strip(): 244 # Add any content before match back to blocks as separate block 245 blocks.insert(0, block[:m.start()].rstrip('\n')) 246 return True 247 # No match. Restore block. 248 blocks.insert(0, block) 249 return False 250 251 def detectTabbed(self, blocks): 252 """ Find indented text and remove indent before further proccesing. 253 254 Returns: a list of blocks with indentation removed. 255 """ 256 fn_blocks = [] 257 while blocks: 258 if blocks[0].startswith(' '*4): 259 block = blocks.pop(0) 260 # Check for new footnotes within this block and split at new footnote. 261 m = self.RE.search(block) 262 if m: 263 # Another footnote exists in this block. 264 # Any content before match is continuation of this footnote, which may be lazily indented. 265 before = block[:m.start()].rstrip('\n') 266 fn_blocks.append(self.detab(before)) 267 # Add back to blocks everything from begining of match forward for next iteration. 268 blocks.insert(0, block[m.start():]) 269 # End of this footnote. 270 break 271 else: 272 # Entire block is part of this footnote. 273 fn_blocks.append(self.detab(block)) 274 else: 275 # End of this footnote. 276 break 277 return fn_blocks 278 279 def detab(self, block): 280 """ Remove one level of indent from a block. 281 282 Preserve lazily indented blocks by only removing indent from indented lines. 283 """ 284 lines = block.split('\n') 285 for i, line in enumerate(lines): 286 if line.startswith(' '*4): 287 lines[i] = line[4:] 288 return '\n'.join(lines) 289 290 291class FootnoteInlineProcessor(InlineProcessor): 292 """ InlinePattern for footnote markers in a document's body text. """ 293 294 def __init__(self, pattern, footnotes): 295 super().__init__(pattern) 296 self.footnotes = footnotes 297 298 def handleMatch(self, m, data): 299 id = m.group(1) 300 if id in self.footnotes.footnotes.keys(): 301 sup = etree.Element("sup") 302 a = etree.SubElement(sup, "a") 303 sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) 304 a.set('href', '#' + self.footnotes.makeFootnoteId(id)) 305 a.set('class', 'footnote-ref') 306 a.text = str(list(self.footnotes.footnotes.keys()).index(id) + 1) 307 return sup, m.start(0), m.end(0) 308 else: 309 return None, None, None 310 311 312class FootnotePostTreeprocessor(Treeprocessor): 313 """ Amend footnote div with duplicates. """ 314 315 def __init__(self, footnotes): 316 self.footnotes = footnotes 317 318 def add_duplicates(self, li, duplicates): 319 """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """ 320 for link in li.iter('a'): 321 # Find the link that needs to be duplicated. 322 if link.attrib.get('class', '') == 'footnote-backref': 323 ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1) 324 # Duplicate link the number of times we need to 325 # and point the to the appropriate references. 326 links = [] 327 for index in range(2, duplicates + 1): 328 sib_link = copy.deepcopy(link) 329 sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest) 330 links.append(sib_link) 331 self.offset += 1 332 # Add all the new duplicate links. 333 el = list(li)[-1] 334 for link in links: 335 el.append(link) 336 break 337 338 def get_num_duplicates(self, li): 339 """ Get the number of duplicate refs of the footnote. """ 340 fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1) 341 link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest) 342 return self.footnotes.found_refs.get(link_id, 0) 343 344 def handle_duplicates(self, parent): 345 """ Find duplicate footnotes and format and add the duplicates. """ 346 for li in list(parent): 347 # Check number of duplicates footnotes and insert 348 # additional links if needed. 349 count = self.get_num_duplicates(li) 350 if count > 1: 351 self.add_duplicates(li, count) 352 353 def run(self, root): 354 """ Crawl the footnote div and add missing duplicate footnotes. """ 355 self.offset = 0 356 for div in root.iter('div'): 357 if div.attrib.get('class', '') == 'footnote': 358 # Footnotes shoul be under the first orderd list under 359 # the footnote div. So once we find it, quit. 360 for ol in div.iter('ol'): 361 self.handle_duplicates(ol) 362 break 363 364 365class FootnoteTreeprocessor(Treeprocessor): 366 """ Build and append footnote div to end of document. """ 367 368 def __init__(self, footnotes): 369 self.footnotes = footnotes 370 371 def run(self, root): 372 footnotesDiv = self.footnotes.makeFootnotesDiv(root) 373 if footnotesDiv is not None: 374 result = self.footnotes.findFootnotesPlaceholder(root) 375 if result: 376 child, parent, isText = result 377 ind = list(parent).index(child) 378 if isText: 379 parent.remove(child) 380 parent.insert(ind, footnotesDiv) 381 else: 382 parent.insert(ind + 1, footnotesDiv) 383 child.tail = None 384 else: 385 root.append(footnotesDiv) 386 387 388class FootnotePostprocessor(Postprocessor): 389 """ Replace placeholders with html entities. """ 390 def __init__(self, footnotes): 391 self.footnotes = footnotes 392 393 def run(self, text): 394 text = text.replace( 395 FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT") 396 ) 397 return text.replace(NBSP_PLACEHOLDER, " ") 398 399 400def makeExtension(**kwargs): # pragma: no cover 401 """ Return an instance of the FootnoteExtension """ 402 return FootnoteExtension(**kwargs) 403