1"""
2Footnotes Extension for Python-Markdown
3=======================================
4
5Adds footnote handling to Python-Markdown.
6
7See <https://Python-Markdown.github.io/extensions/footnotes>
8for documentation.
9
10Copyright The Python Markdown Project
11
12License: [BSD](https://opensource.org/licenses/bsd-license.php)
13
14"""
15
16from . import Extension
17from ..blockprocessors import BlockProcessor
18from ..inlinepatterns import InlineProcessor
19from ..treeprocessors import Treeprocessor
20from ..postprocessors import Postprocessor
21from .. import util
22from collections import OrderedDict
23import re
24import copy
25import xml.etree.ElementTree as etree
26
27FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
28NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
29RE_REF_ID = re.compile(r'(fnref)(\d+)')
30
31
32class FootnoteExtension(Extension):
33    """ Footnote Extension. """
34
35    def __init__(self, **kwargs):
36        """ Setup configs. """
37
38        self.config = {
39            'PLACE_MARKER':
40                ["///Footnotes Go Here///",
41                 "The text string that marks where the footnotes go"],
42            'UNIQUE_IDS':
43                [False,
44                 "Avoid name collisions across "
45                 "multiple calls to reset()."],
46            "BACKLINK_TEXT":
47                ["&#8617;",
48                 "The text string that links from the footnote "
49                 "to the reader's place."],
50            "BACKLINK_TITLE":
51                ["Jump back to footnote %d in the text",
52                 "The text string used for the title HTML attribute "
53                 "of the backlink. %d will be replaced by the "
54                 "footnote number."],
55            "SEPARATOR":
56                [":",
57                 "Footnote separator."]
58        }
59        super().__init__(**kwargs)
60
61        # In multiple invocations, emit links that don't get tangled.
62        self.unique_prefix = 0
63        self.found_refs = {}
64        self.used_refs = set()
65
66        self.reset()
67
68    def extendMarkdown(self, md):
69        """ Add pieces to Markdown. """
70        md.registerExtension(self)
71        self.parser = md.parser
72        self.md = md
73        # Insert a blockprocessor before ReferencePreprocessor
74        md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
75
76        # Insert an inline pattern before ImageReferencePattern
77        FOOTNOTE_RE = r'\[\^([^\]]*)\]'  # blah blah [^1] blah
78        md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
79        # Insert a tree-processor that would actually add the footnote div
80        # This must be before all other treeprocessors (i.e., inline and
81        # codehilite) so they can run on the the contents of the div.
82        md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
83
84        # Insert a tree-processor that will run after inline is done.
85        # In this tree-processor we want to check our duplicate footnote tracker
86        # And add additional backrefs to the footnote pointing back to the
87        # duplicated references.
88        md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
89
90        # Insert a postprocessor after amp_substitute processor
91        md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
92
93    def reset(self):
94        """ Clear footnotes on reset, and prepare for distinct document. """
95        self.footnotes = OrderedDict()
96        self.unique_prefix += 1
97        self.found_refs = {}
98        self.used_refs = set()
99
100    def unique_ref(self, reference, found=False):
101        """ Get a unique reference if there are duplicates. """
102        if not found:
103            return reference
104
105        original_ref = reference
106        while reference in self.used_refs:
107            ref, rest = reference.split(self.get_separator(), 1)
108            m = RE_REF_ID.match(ref)
109            if m:
110                reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
111            else:
112                reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
113
114        self.used_refs.add(reference)
115        if original_ref in self.found_refs:
116            self.found_refs[original_ref] += 1
117        else:
118            self.found_refs[original_ref] = 1
119        return reference
120
121    def findFootnotesPlaceholder(self, root):
122        """ Return ElementTree Element that contains Footnote placeholder. """
123        def finder(element):
124            for child in element:
125                if child.text:
126                    if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
127                        return child, element, True
128                if child.tail:
129                    if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
130                        return child, element, False
131                child_res = finder(child)
132                if child_res is not None:
133                    return child_res
134            return None
135
136        res = finder(root)
137        return res
138
139    def setFootnote(self, id, text):
140        """ Store a footnote for later retrieval. """
141        self.footnotes[id] = text
142
143    def get_separator(self):
144        """ Get the footnote separator. """
145        return self.getConfig("SEPARATOR")
146
147    def makeFootnoteId(self, id):
148        """ Return footnote link id. """
149        if self.getConfig("UNIQUE_IDS"):
150            return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
151        else:
152            return 'fn{}{}'.format(self.get_separator(), id)
153
154    def makeFootnoteRefId(self, id, found=False):
155        """ Return footnote back-link id. """
156        if self.getConfig("UNIQUE_IDS"):
157            return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
158        else:
159            return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found)
160
161    def makeFootnotesDiv(self, root):
162        """ Return div of footnotes as et Element. """
163
164        if not list(self.footnotes.keys()):
165            return None
166
167        div = etree.Element("div")
168        div.set('class', 'footnote')
169        etree.SubElement(div, "hr")
170        ol = etree.SubElement(div, "ol")
171        surrogate_parent = etree.Element("div")
172
173        for index, id in enumerate(self.footnotes.keys(), start=1):
174            li = etree.SubElement(ol, "li")
175            li.set("id", self.makeFootnoteId(id))
176            # Parse footnote with surrogate parent as li cannot be used.
177            # List block handlers have special logic to deal with li.
178            # When we are done parsing, we will copy everything over to li.
179            self.parser.parseChunk(surrogate_parent, self.footnotes[id])
180            for el in list(surrogate_parent):
181                li.append(el)
182                surrogate_parent.remove(el)
183            backlink = etree.Element("a")
184            backlink.set("href", "#" + self.makeFootnoteRefId(id))
185            backlink.set("class", "footnote-backref")
186            backlink.set(
187                "title",
188                self.getConfig("BACKLINK_TITLE") % (index)
189            )
190            backlink.text = FN_BACKLINK_TEXT
191
192            if len(li):
193                node = li[-1]
194                if node.tag == "p":
195                    node.text = node.text + NBSP_PLACEHOLDER
196                    node.append(backlink)
197                else:
198                    p = etree.SubElement(li, "p")
199                    p.append(backlink)
200        return div
201
202
203class FootnoteBlockProcessor(BlockProcessor):
204    """ Find all footnote references and store for later use. """
205
206    RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
207
208    def __init__(self, footnotes):
209        super().__init__(footnotes.parser)
210        self.footnotes = footnotes
211
212    def test(self, parent, block):
213        return True
214
215    def run(self, parent, blocks):
216        """ Find, set, and remove footnote definitions. """
217        block = blocks.pop(0)
218        m = self.RE.search(block)
219        if m:
220            id = m.group(1)
221            fn_blocks = [m.group(2)]
222
223            # Handle rest of block
224            therest = block[m.end():].lstrip('\n')
225            m2 = self.RE.search(therest)
226            if m2:
227                # Another footnote exists in the rest of this block.
228                # Any content before match is continuation of this footnote, which may be lazily indented.
229                before = therest[:m2.start()].rstrip('\n')
230                fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
231                # Add back to blocks everything from begining of match forward for next iteration.
232                blocks.insert(0, therest[m2.start():])
233            else:
234                # All remaining lines of block are continuation of this footnote, which may be lazily indented.
235                fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
236
237                # Check for child elements in remaining blocks.
238                fn_blocks.extend(self.detectTabbed(blocks))
239
240            footnote = "\n\n".join(fn_blocks)
241            self.footnotes.setFootnote(id, footnote.rstrip())
242
243            if block[:m.start()].strip():
244                # Add any content before match back to blocks as separate block
245                blocks.insert(0, block[:m.start()].rstrip('\n'))
246            return True
247        # No match. Restore block.
248        blocks.insert(0, block)
249        return False
250
251    def detectTabbed(self, blocks):
252        """ Find indented text and remove indent before further proccesing.
253
254        Returns: a list of blocks with indentation removed.
255        """
256        fn_blocks = []
257        while blocks:
258            if blocks[0].startswith(' '*4):
259                block = blocks.pop(0)
260                # Check for new footnotes within this block and split at new footnote.
261                m = self.RE.search(block)
262                if m:
263                    # Another footnote exists in this block.
264                    # Any content before match is continuation of this footnote, which may be lazily indented.
265                    before = block[:m.start()].rstrip('\n')
266                    fn_blocks.append(self.detab(before))
267                    # Add back to blocks everything from begining of match forward for next iteration.
268                    blocks.insert(0, block[m.start():])
269                    # End of this footnote.
270                    break
271                else:
272                    # Entire block is part of this footnote.
273                    fn_blocks.append(self.detab(block))
274            else:
275                # End of this footnote.
276                break
277        return fn_blocks
278
279    def detab(self, block):
280        """ Remove one level of indent from a block.
281
282        Preserve lazily indented blocks by only removing indent from indented lines.
283        """
284        lines = block.split('\n')
285        for i, line in enumerate(lines):
286            if line.startswith(' '*4):
287                lines[i] = line[4:]
288        return '\n'.join(lines)
289
290
291class FootnoteInlineProcessor(InlineProcessor):
292    """ InlinePattern for footnote markers in a document's body text. """
293
294    def __init__(self, pattern, footnotes):
295        super().__init__(pattern)
296        self.footnotes = footnotes
297
298    def handleMatch(self, m, data):
299        id = m.group(1)
300        if id in self.footnotes.footnotes.keys():
301            sup = etree.Element("sup")
302            a = etree.SubElement(sup, "a")
303            sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
304            a.set('href', '#' + self.footnotes.makeFootnoteId(id))
305            a.set('class', 'footnote-ref')
306            a.text = str(list(self.footnotes.footnotes.keys()).index(id) + 1)
307            return sup, m.start(0), m.end(0)
308        else:
309            return None, None, None
310
311
312class FootnotePostTreeprocessor(Treeprocessor):
313    """ Amend footnote div with duplicates. """
314
315    def __init__(self, footnotes):
316        self.footnotes = footnotes
317
318    def add_duplicates(self, li, duplicates):
319        """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """
320        for link in li.iter('a'):
321            # Find the link that needs to be duplicated.
322            if link.attrib.get('class', '') == 'footnote-backref':
323                ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
324                # Duplicate link the number of times we need to
325                # and point the to the appropriate references.
326                links = []
327                for index in range(2, duplicates + 1):
328                    sib_link = copy.deepcopy(link)
329                    sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
330                    links.append(sib_link)
331                    self.offset += 1
332                # Add all the new duplicate links.
333                el = list(li)[-1]
334                for link in links:
335                    el.append(link)
336                break
337
338    def get_num_duplicates(self, li):
339        """ Get the number of duplicate refs of the footnote. """
340        fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
341        link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
342        return self.footnotes.found_refs.get(link_id, 0)
343
344    def handle_duplicates(self, parent):
345        """ Find duplicate footnotes and format and add the duplicates. """
346        for li in list(parent):
347            # Check number of duplicates footnotes and insert
348            # additional links if needed.
349            count = self.get_num_duplicates(li)
350            if count > 1:
351                self.add_duplicates(li, count)
352
353    def run(self, root):
354        """ Crawl the footnote div and add missing duplicate footnotes. """
355        self.offset = 0
356        for div in root.iter('div'):
357            if div.attrib.get('class', '') == 'footnote':
358                # Footnotes shoul be under the first orderd list under
359                # the footnote div.  So once we find it, quit.
360                for ol in div.iter('ol'):
361                    self.handle_duplicates(ol)
362                    break
363
364
365class FootnoteTreeprocessor(Treeprocessor):
366    """ Build and append footnote div to end of document. """
367
368    def __init__(self, footnotes):
369        self.footnotes = footnotes
370
371    def run(self, root):
372        footnotesDiv = self.footnotes.makeFootnotesDiv(root)
373        if footnotesDiv is not None:
374            result = self.footnotes.findFootnotesPlaceholder(root)
375            if result:
376                child, parent, isText = result
377                ind = list(parent).index(child)
378                if isText:
379                    parent.remove(child)
380                    parent.insert(ind, footnotesDiv)
381                else:
382                    parent.insert(ind + 1, footnotesDiv)
383                    child.tail = None
384            else:
385                root.append(footnotesDiv)
386
387
388class FootnotePostprocessor(Postprocessor):
389    """ Replace placeholders with html entities. """
390    def __init__(self, footnotes):
391        self.footnotes = footnotes
392
393    def run(self, text):
394        text = text.replace(
395            FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
396        )
397        return text.replace(NBSP_PLACEHOLDER, "&#160;")
398
399
400def makeExtension(**kwargs):  # pragma: no cover
401    """ Return an instance of the FootnoteExtension """
402    return FootnoteExtension(**kwargs)
403