1""" 2Attribute List Extension for Python-Markdown 3============================================ 4 5Adds attribute list syntax. Inspired by 6[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s 7feature of the same name. 8 9See <https://Python-Markdown.github.io/extensions/attr_list> 10for documentation. 11 12Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). 13 14All changes Copyright 2011-2014 The Python Markdown Project 15 16License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 17 18""" 19 20from __future__ import absolute_import 21from __future__ import unicode_literals 22from . import Extension 23from ..treeprocessors import Treeprocessor 24from ..util import isBlockLevel 25import re 26 27try: 28 Scanner = re.Scanner 29except AttributeError: # pragma: no cover 30 # must be on Python 2.4 31 from sre import Scanner 32 33 34def _handle_double_quote(s, t): 35 k, v = t.split('=', 1) 36 return k, v.strip('"') 37 38 39def _handle_single_quote(s, t): 40 k, v = t.split('=', 1) 41 return k, v.strip("'") 42 43 44def _handle_key_value(s, t): 45 return t.split('=', 1) 46 47 48def _handle_word(s, t): 49 if t.startswith('.'): 50 return '.', t[1:] 51 if t.startswith('#'): 52 return 'id', t[1:] 53 return t, t 54 55 56_scanner = Scanner([ 57 (r'[^ =]+=".*?"', _handle_double_quote), 58 (r"[^ =]+='.*?'", _handle_single_quote), 59 (r'[^ =]+=[^ =]+', _handle_key_value), 60 (r'[^ =]+', _handle_word), 61 (r' ', None) 62]) 63 64 65def get_attrs(str): 66 """ Parse attribute list and return a list of attribute tuples. """ 67 return _scanner.scan(str)[0] 68 69 70def isheader(elem): 71 return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] 72 73 74class AttrListTreeprocessor(Treeprocessor): 75 76 BASE_RE = r'\{\:?([^\}\n]*)\}' 77 HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) 78 BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) 79 INLINE_RE = re.compile(r'^%s' % BASE_RE) 80 NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' 81 r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' 82 r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' 83 r'\uf900-\ufdcf\ufdf0-\ufffd' 84 r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') 85 86 def run(self, doc): 87 for elem in doc.iter(): 88 if isBlockLevel(elem.tag): 89 # Block level: check for attrs on last line of text 90 RE = self.BLOCK_RE 91 if isheader(elem) or elem.tag == 'dt': 92 # header or def-term: check for attrs at end of line 93 RE = self.HEADER_RE 94 if len(elem) and elem.tag == 'li': 95 # special case list items. children may include a ul or ol. 96 pos = None 97 # find the ul or ol position 98 for i, child in enumerate(elem): 99 if child.tag in ['ul', 'ol']: 100 pos = i 101 break 102 if pos is None and elem[-1].tail: 103 # use tail of last child. no ul or ol. 104 m = RE.search(elem[-1].tail) 105 if m: 106 self.assign_attrs(elem, m.group(1)) 107 elem[-1].tail = elem[-1].tail[:m.start()] 108 elif pos is not None and pos > 0 and elem[pos-1].tail: 109 # use tail of last child before ul or ol 110 m = RE.search(elem[pos-1].tail) 111 if m: 112 self.assign_attrs(elem, m.group(1)) 113 elem[pos-1].tail = elem[pos-1].tail[:m.start()] 114 elif elem.text: 115 # use text. ul is first child. 116 m = RE.search(elem.text) 117 if m: 118 self.assign_attrs(elem, m.group(1)) 119 elem.text = elem.text[:m.start()] 120 elif len(elem) and elem[-1].tail: 121 # has children. Get from tail of last child 122 m = RE.search(elem[-1].tail) 123 if m: 124 self.assign_attrs(elem, m.group(1)) 125 elem[-1].tail = elem[-1].tail[:m.start()] 126 if isheader(elem): 127 # clean up trailing #s 128 elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() 129 elif elem.text: 130 # no children. Get from text. 131 m = RE.search(elem.text) 132 if not m and elem.tag == 'td': 133 m = re.search(self.BASE_RE, elem.text) 134 if m: 135 self.assign_attrs(elem, m.group(1)) 136 elem.text = elem.text[:m.start()] 137 if isheader(elem): 138 # clean up trailing #s 139 elem.text = elem.text.rstrip('#').rstrip() 140 else: 141 # inline: check for attrs at start of tail 142 if elem.tail: 143 m = self.INLINE_RE.match(elem.tail) 144 if m: 145 self.assign_attrs(elem, m.group(1)) 146 elem.tail = elem.tail[m.end():] 147 148 def assign_attrs(self, elem, attrs): 149 """ Assign attrs to element. """ 150 for k, v in get_attrs(attrs): 151 if k == '.': 152 # add to class 153 cls = elem.get('class') 154 if cls: 155 elem.set('class', '%s %s' % (cls, v)) 156 else: 157 elem.set('class', v) 158 else: 159 # assign attr k with v 160 elem.set(self.sanitize_name(k), v) 161 162 def sanitize_name(self, name): 163 """ 164 Sanitize name as 'an XML Name, minus the ":"'. 165 See http://www.w3.org/TR/REC-xml-names/#NT-NCName 166 """ 167 return self.NAME_RE.sub('_', name) 168 169 170class AttrListExtension(Extension): 171 def extendMarkdown(self, md, md_globals): 172 md.treeprocessors.add( 173 'attr_list', AttrListTreeprocessor(md), '>prettify' 174 ) 175 176 177def makeExtension(*args, **kwargs): 178 return AttrListExtension(*args, **kwargs) 179