1"""
2Footnotes Extension for Python-Markdown
3=======================================
4
5Adds footnote handling to Python-Markdown.
6
7See <https://Python-Markdown.github.io/extensions/footnotes>
8for documentation.
9
10Copyright The Python Markdown Project
11
12License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
13
14"""
15
16from __future__ import absolute_import
17from __future__ import unicode_literals
18from . import Extension
19from ..preprocessors import Preprocessor
20from ..inlinepatterns import Pattern
21from ..treeprocessors import Treeprocessor
22from ..postprocessors import Postprocessor
23from .. import util
24from ..odict import OrderedDict
25import re
26import copy
27
28FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
29NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
30DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)')
31TABBED_RE = re.compile(r'((\t)|(    ))(.*)')
32RE_REF_ID = re.compile(r'(fnref)(\d+)')
33
34
35class FootnoteExtension(Extension):
36    """ Footnote Extension. """
37
38    def __init__(self, *args, **kwargs):
39        """ Setup configs. """
40
41        self.config = {
42            'PLACE_MARKER':
43                ["///Footnotes Go Here///",
44                 "The text string that marks where the footnotes go"],
45            'UNIQUE_IDS':
46                [False,
47                 "Avoid name collisions across "
48                 "multiple calls to reset()."],
49            "BACKLINK_TEXT":
50                ["&#8617;",
51                 "The text string that links from the footnote "
52                 "to the reader's place."],
53            "BACKLINK_TITLE":
54                ["Jump back to footnote %d in the text",
55                 "The text string used for the title HTML attribute "
56                 "of the backlink. %d will be replaced by the "
57                 "footnote number."]
58        }
59        super(FootnoteExtension, self).__init__(*args, **kwargs)
60
61        # In multiple invocations, emit links that don't get tangled.
62        self.unique_prefix = 0
63        self.found_refs = {}
64        self.used_refs = set()
65
66        self.reset()
67
68    def extendMarkdown(self, md, md_globals):
69        """ Add pieces to Markdown. """
70        md.registerExtension(self)
71        self.parser = md.parser
72        self.md = md
73        # Insert a preprocessor before ReferencePreprocessor
74        md.preprocessors.add(
75            "footnote", FootnotePreprocessor(self), "<reference"
76        )
77        # Insert an inline pattern before ImageReferencePattern
78        FOOTNOTE_RE = r'\[\^([^\]]*)\]'  # blah blah [^1] blah
79        md.inlinePatterns.add(
80            "footnote", FootnotePattern(FOOTNOTE_RE, self), "<reference"
81        )
82        # Insert a tree-processor that would actually add the footnote div
83        # This must be before all other treeprocessors (i.e., inline and
84        # codehilite) so they can run on the the contents of the div.
85        md.treeprocessors.add(
86            "footnote", FootnoteTreeprocessor(self), "_begin"
87        )
88
89        # Insert a tree-processor that will run after inline is done.
90        # In this tree-processor we want to check our duplicate footnote tracker
91        # And add additional backrefs to the footnote pointing back to the
92        # duplicated references.
93        md.treeprocessors.add(
94            "footnote-duplicate", FootnotePostTreeprocessor(self), '>inline'
95        )
96
97        # Insert a postprocessor after amp_substitute oricessor
98        md.postprocessors.add(
99            "footnote", FootnotePostprocessor(self), ">amp_substitute"
100        )
101
102    def reset(self):
103        """ Clear footnotes on reset, and prepare for distinct document. """
104        self.footnotes = OrderedDict()
105        self.unique_prefix += 1
106        self.found_refs = {}
107        self.used_refs = set()
108
109    def unique_ref(self, reference, found=False):
110        """ Get a unique reference if there are duplicates. """
111        if not found:
112            return reference
113
114        original_ref = reference
115        while reference in self.used_refs:
116            ref, rest = reference.split(self.get_separator(), 1)
117            m = RE_REF_ID.match(ref)
118            if m:
119                reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
120            else:
121                reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
122
123        self.used_refs.add(reference)
124        if original_ref in self.found_refs:
125            self.found_refs[original_ref] += 1
126        else:
127            self.found_refs[original_ref] = 1
128        return reference
129
130    def findFootnotesPlaceholder(self, root):
131        """ Return ElementTree Element that contains Footnote placeholder. """
132        def finder(element):
133            for child in element:
134                if child.text:
135                    if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
136                        return child, element, True
137                if child.tail:
138                    if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
139                        return child, element, False
140                child_res = finder(child)
141                if child_res is not None:
142                    return child_res
143            return None
144
145        res = finder(root)
146        return res
147
148    def setFootnote(self, id, text):
149        """ Store a footnote for later retrieval. """
150        self.footnotes[id] = text
151
152    def get_separator(self):
153        if self.md.output_format in ['html5', 'xhtml5']:
154            return '-'
155        return ':'
156
157    def makeFootnoteId(self, id):
158        """ Return footnote link id. """
159        if self.getConfig("UNIQUE_IDS"):
160            return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
161        else:
162            return 'fn%s%s' % (self.get_separator(), id)
163
164    def makeFootnoteRefId(self, id, found=False):
165        """ Return footnote back-link id. """
166        if self.getConfig("UNIQUE_IDS"):
167            return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
168        else:
169            return self.unique_ref('fnref%s%s' % (self.get_separator(), id), found)
170
171    def makeFootnotesDiv(self, root):
172        """ Return div of footnotes as et Element. """
173
174        if not list(self.footnotes.keys()):
175            return None
176
177        div = util.etree.Element("div")
178        div.set('class', 'footnote')
179        util.etree.SubElement(div, "hr")
180        ol = util.etree.SubElement(div, "ol")
181        surrogate_parent = util.etree.Element("div")
182
183        for id in self.footnotes.keys():
184            li = util.etree.SubElement(ol, "li")
185            li.set("id", self.makeFootnoteId(id))
186            # Parse footnote with surrogate parent as li cannot be used.
187            # List block handlers have special logic to deal with li.
188            # When we are done parsing, we will copy everything over to li.
189            self.parser.parseChunk(surrogate_parent, self.footnotes[id])
190            for el in list(surrogate_parent):
191                li.append(el)
192                surrogate_parent.remove(el)
193            backlink = util.etree.Element("a")
194            backlink.set("href", "#" + self.makeFootnoteRefId(id))
195            if self.md.output_format not in ['html5', 'xhtml5']:
196                backlink.set("rev", "footnote")  # Invalid in HTML5
197            backlink.set("class", "footnote-backref")
198            backlink.set(
199                "title",
200                self.getConfig("BACKLINK_TITLE") %
201                (self.footnotes.index(id)+1)
202            )
203            backlink.text = FN_BACKLINK_TEXT
204
205            if len(li):
206                node = li[-1]
207                if node.tag == "p":
208                    node.text = node.text + NBSP_PLACEHOLDER
209                    node.append(backlink)
210                else:
211                    p = util.etree.SubElement(li, "p")
212                    p.append(backlink)
213        return div
214
215
216class FootnotePreprocessor(Preprocessor):
217    """ Find all footnote references and store for later use. """
218
219    def __init__(self, footnotes):
220        self.footnotes = footnotes
221
222    def run(self, lines):
223        """
224        Loop through lines and find, set, and remove footnote definitions.
225
226        Keywords:
227
228        * lines: A list of lines of text
229
230        Return: A list of lines of text with footnote definitions removed.
231
232        """
233        newlines = []
234        i = 0
235        while True:
236            m = DEF_RE.match(lines[i])
237            if m:
238                fn, _i = self.detectTabbed(lines[i+1:])
239                fn.insert(0, m.group(2))
240                i += _i-1  # skip past footnote
241                footnote = "\n".join(fn)
242                self.footnotes.setFootnote(m.group(1), footnote.rstrip())
243                # Preserve a line for each block to prevent raw HTML indexing issue.
244                # https://github.com/Python-Markdown/markdown/issues/584
245                num_blocks = (len(footnote.split('\n\n')) * 2)
246                newlines.extend([''] * (num_blocks))
247            else:
248                newlines.append(lines[i])
249            if len(lines) > i+1:
250                i += 1
251            else:
252                break
253        return newlines
254
255    def detectTabbed(self, lines):
256        """ Find indented text and remove indent before further proccesing.
257
258        Keyword arguments:
259
260        * lines: an array of strings
261
262        Returns: a list of post processed items and the index of last line.
263
264        """
265        items = []
266        blank_line = False  # have we encountered a blank line yet?
267        i = 0  # to keep track of where we are
268
269        def detab(line):
270            match = TABBED_RE.match(line)
271            if match:
272                return match.group(4)
273
274        for line in lines:
275            if line.strip():  # Non-blank line
276                detabbed_line = detab(line)
277                if detabbed_line:
278                    items.append(detabbed_line)
279                    i += 1
280                    continue
281                elif not blank_line and not DEF_RE.match(line):
282                    # not tabbed but still part of first par.
283                    items.append(line)
284                    i += 1
285                    continue
286                else:
287                    return items, i+1
288
289            else:  # Blank line: _maybe_ we are done.
290                blank_line = True
291                i += 1  # advance
292
293                # Find the next non-blank line
294                for j in range(i, len(lines)):
295                    if lines[j].strip():
296                        next_line = lines[j]
297                        break
298                    else:
299                        # Include extreaneous padding to prevent raw HTML
300                        # parsing issue: https://github.com/Python-Markdown/markdown/issues/584
301                        items.append("")
302                        i += 1
303                else:
304                    break  # There is no more text; we are done.
305
306                # Check if the next non-blank line is tabbed
307                if detab(next_line):  # Yes, more work to do.
308                    items.append("")
309                    continue
310                else:
311                    break  # No, we are done.
312        else:
313            i += 1
314
315        return items, i
316
317
318class FootnotePattern(Pattern):
319    """ InlinePattern for footnote markers in a document's body text. """
320
321    def __init__(self, pattern, footnotes):
322        super(FootnotePattern, self).__init__(pattern)
323        self.footnotes = footnotes
324
325    def handleMatch(self, m):
326        id = m.group(2)
327        if id in self.footnotes.footnotes.keys():
328            sup = util.etree.Element("sup")
329            a = util.etree.SubElement(sup, "a")
330            sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
331            a.set('href', '#' + self.footnotes.makeFootnoteId(id))
332            if self.footnotes.md.output_format not in ['html5', 'xhtml5']:
333                a.set('rel', 'footnote')  # invalid in HTML5
334            a.set('class', 'footnote-ref')
335            a.text = util.text_type(self.footnotes.footnotes.index(id) + 1)
336            return sup
337        else:
338            return None
339
340
341class FootnotePostTreeprocessor(Treeprocessor):
342    """ Ammend footnote div with duplicates. """
343
344    def __init__(self, footnotes):
345        self.footnotes = footnotes
346
347    def add_duplicates(self, li, duplicates):
348        """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """
349        for link in li.iter('a'):
350            # Find the link that needs to be duplicated.
351            if link.attrib.get('class', '') == 'footnote-backref':
352                ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
353                # Duplicate link the number of times we need to
354                # and point the to the appropriate references.
355                links = []
356                for index in range(2, duplicates + 1):
357                    sib_link = copy.deepcopy(link)
358                    sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
359                    links.append(sib_link)
360                    self.offset += 1
361                # Add all the new duplicate links.
362                el = list(li)[-1]
363                for l in links:
364                    el.append(l)
365                break
366
367    def get_num_duplicates(self, li):
368        """ Get the number of duplicate refs of the footnote. """
369        fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
370        link_id = '%sref%s%s' % (fn, self.footnotes.get_separator(), rest)
371        return self.footnotes.found_refs.get(link_id, 0)
372
373    def handle_duplicates(self, parent):
374        """ Find duplicate footnotes and format and add the duplicates. """
375        for li in list(parent):
376            # Check number of duplicates footnotes and insert
377            # additional links if needed.
378            count = self.get_num_duplicates(li)
379            if count > 1:
380                self.add_duplicates(li, count)
381
382    def run(self, root):
383        """ Crawl the footnote div and add missing duplicate footnotes. """
384        self.offset = 0
385        for div in root.iter('div'):
386            if div.attrib.get('class', '') == 'footnote':
387                # Footnotes shoul be under the first orderd list under
388                # the footnote div.  So once we find it, quit.
389                for ol in div.iter('ol'):
390                    self.handle_duplicates(ol)
391                    break
392
393
394class FootnoteTreeprocessor(Treeprocessor):
395    """ Build and append footnote div to end of document. """
396
397    def __init__(self, footnotes):
398        self.footnotes = footnotes
399
400    def run(self, root):
401        footnotesDiv = self.footnotes.makeFootnotesDiv(root)
402        if footnotesDiv is not None:
403            result = self.footnotes.findFootnotesPlaceholder(root)
404            if result:
405                child, parent, isText = result
406                ind = list(parent).index(child)
407                if isText:
408                    parent.remove(child)
409                    parent.insert(ind, footnotesDiv)
410                else:
411                    parent.insert(ind + 1, footnotesDiv)
412                    child.tail = None
413            else:
414                root.append(footnotesDiv)
415
416
417class FootnotePostprocessor(Postprocessor):
418    """ Replace placeholders with html entities. """
419    def __init__(self, footnotes):
420        self.footnotes = footnotes
421
422    def run(self, text):
423        text = text.replace(
424            FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
425        )
426        return text.replace(NBSP_PLACEHOLDER, "&#160;")
427
428
429def makeExtension(*args, **kwargs):
430    """ Return an instance of the FootnoteExtension """
431    return FootnoteExtension(*args, **kwargs)
432