1"""
2Attribute List Extension for Python-Markdown
3============================================
4
5Adds attribute list syntax. Inspired by
6[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
7feature of the same name.
8
9See <https://Python-Markdown.github.io/extensions/attr_list>
10for documentation.
11
12Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
13
14All changes Copyright 2011-2014 The Python Markdown Project
15
16License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
17
18"""
19
20from __future__ import absolute_import
21from __future__ import unicode_literals
22from . import Extension
23from ..treeprocessors import Treeprocessor
24from ..util import isBlockLevel
25import re
26
27try:
28    Scanner = re.Scanner
29except AttributeError:  # pragma: no cover
30    # must be on Python 2.4
31    from sre import Scanner
32
33
34def _handle_double_quote(s, t):
35    k, v = t.split('=', 1)
36    return k, v.strip('"')
37
38
39def _handle_single_quote(s, t):
40    k, v = t.split('=', 1)
41    return k, v.strip("'")
42
43
44def _handle_key_value(s, t):
45    return t.split('=', 1)
46
47
48def _handle_word(s, t):
49    if t.startswith('.'):
50        return '.', t[1:]
51    if t.startswith('#'):
52        return 'id', t[1:]
53    return t, t
54
55
56_scanner = Scanner([
57    (r'[^ =]+=".*?"', _handle_double_quote),
58    (r"[^ =]+='.*?'", _handle_single_quote),
59    (r'[^ =]+=[^ =]+', _handle_key_value),
60    (r'[^ =]+', _handle_word),
61    (r' ', None)
62])
63
64
65def get_attrs(str):
66    """ Parse attribute list and return a list of attribute tuples. """
67    return _scanner.scan(str)[0]
68
69
70def isheader(elem):
71    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
72
73
74class AttrListTreeprocessor(Treeprocessor):
75
76    BASE_RE = r'\{\:?([^\}\n]*)\}'
77    HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
78    BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
79    INLINE_RE = re.compile(r'^%s' % BASE_RE)
80    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
81                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
82                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
83                         r'\uf900-\ufdcf\ufdf0-\ufffd'
84                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
85
86    def run(self, doc):
87        for elem in doc.iter():
88            if isBlockLevel(elem.tag):
89                # Block level: check for attrs on last line of text
90                RE = self.BLOCK_RE
91                if isheader(elem) or elem.tag == 'dt':
92                    # header or def-term: check for attrs at end of line
93                    RE = self.HEADER_RE
94                if len(elem) and elem.tag == 'li':
95                    # special case list items. children may include a ul or ol.
96                    pos = None
97                    # find the ul or ol position
98                    for i, child in enumerate(elem):
99                        if child.tag in ['ul', 'ol']:
100                            pos = i
101                            break
102                    if pos is None and elem[-1].tail:
103                        # use tail of last child. no ul or ol.
104                        m = RE.search(elem[-1].tail)
105                        if m:
106                            self.assign_attrs(elem, m.group(1))
107                            elem[-1].tail = elem[-1].tail[:m.start()]
108                    elif pos is not None and pos > 0 and elem[pos-1].tail:
109                        # use tail of last child before ul or ol
110                        m = RE.search(elem[pos-1].tail)
111                        if m:
112                            self.assign_attrs(elem, m.group(1))
113                            elem[pos-1].tail = elem[pos-1].tail[:m.start()]
114                    elif elem.text:
115                        # use text. ul is first child.
116                        m = RE.search(elem.text)
117                        if m:
118                            self.assign_attrs(elem, m.group(1))
119                            elem.text = elem.text[:m.start()]
120                elif len(elem) and elem[-1].tail:
121                    # has children. Get from tail of last child
122                    m = RE.search(elem[-1].tail)
123                    if m:
124                        self.assign_attrs(elem, m.group(1))
125                        elem[-1].tail = elem[-1].tail[:m.start()]
126                        if isheader(elem):
127                            # clean up trailing #s
128                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
129                elif elem.text:
130                    # no children. Get from text.
131                    m = RE.search(elem.text)
132                    if not m and elem.tag == 'td':
133                        m = re.search(self.BASE_RE, elem.text)
134                    if m:
135                        self.assign_attrs(elem, m.group(1))
136                        elem.text = elem.text[:m.start()]
137                        if isheader(elem):
138                            # clean up trailing #s
139                            elem.text = elem.text.rstrip('#').rstrip()
140            else:
141                # inline: check for attrs at start of tail
142                if elem.tail:
143                    m = self.INLINE_RE.match(elem.tail)
144                    if m:
145                        self.assign_attrs(elem, m.group(1))
146                        elem.tail = elem.tail[m.end():]
147
148    def assign_attrs(self, elem, attrs):
149        """ Assign attrs to element. """
150        for k, v in get_attrs(attrs):
151            if k == '.':
152                # add to class
153                cls = elem.get('class')
154                if cls:
155                    elem.set('class', '%s %s' % (cls, v))
156                else:
157                    elem.set('class', v)
158            else:
159                # assign attr k with v
160                elem.set(self.sanitize_name(k), v)
161
162    def sanitize_name(self, name):
163        """
164        Sanitize name as 'an XML Name, minus the ":"'.
165        See http://www.w3.org/TR/REC-xml-names/#NT-NCName
166        """
167        return self.NAME_RE.sub('_', name)
168
169
170class AttrListExtension(Extension):
171    def extendMarkdown(self, md, md_globals):
172        md.treeprocessors.add(
173            'attr_list', AttrListTreeprocessor(md), '>prettify'
174        )
175
176
177def makeExtension(*args, **kwargs):
178    return AttrListExtension(*args, **kwargs)
179