1""" 2Tables Extension for Python-Markdown 3==================================== 4 5Added parsing of tables to Python-Markdown. 6 7See <https://Python-Markdown.github.io/extensions/tables> 8for documentation. 9 10Original code Copyright 2009 [Waylan Limberg](http://achinghead.com) 11 12All changes Copyright 2008-2014 The Python Markdown Project 13 14License: [BSD](http://www.opensource.org/licenses/bsd-license.php) 15 16""" 17 18from __future__ import absolute_import 19from __future__ import unicode_literals 20from . import Extension 21from ..blockprocessors import BlockProcessor 22from ..util import etree 23import re 24PIPE_NONE = 0 25PIPE_LEFT = 1 26PIPE_RIGHT = 2 27 28 29class TableProcessor(BlockProcessor): 30 """ Process Tables. """ 31 32 RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') 33 RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') 34 35 def __init__(self, parser): 36 self.border = False 37 self.separator = '' 38 super(TableProcessor, self).__init__(parser) 39 40 def test(self, parent, block): 41 """ 42 Ensure first two rows (column header and separator row) are valid table rows. 43 44 Keep border check and separator row do avoid repeating the work. 45 """ 46 is_table = False 47 rows = [row.strip() for row in block.split('\n')] 48 if len(rows) > 1: 49 header0 = rows[0] 50 self.border = PIPE_NONE 51 if header0.startswith('|'): 52 self.border |= PIPE_LEFT 53 if self.RE_END_BORDER.search(header0) is not None: 54 self.border |= PIPE_RIGHT 55 row = self._split_row(header0) 56 row0_len = len(row) 57 is_table = row0_len > 1 58 59 # Each row in a single column table needs at least one pipe. 60 if not is_table and row0_len == 1 and self.border: 61 for index in range(1, len(rows)): 62 is_table = rows[index].startswith('|') 63 if not is_table: 64 is_table = self.RE_END_BORDER.search(rows[index]) is not None 65 if not is_table: 66 break 67 68 if is_table: 69 row = self._split_row(rows[1]) 70 is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ') 71 if is_table: 72 self.separator = row 73 74 return is_table 75 76 def run(self, parent, blocks): 77 """ Parse a table block and build table. """ 78 block = blocks.pop(0).split('\n') 79 header = block[0].strip() 80 rows = [] if len(block) < 3 else block[2:] 81 82 # Get alignment of columns 83 align = [] 84 for c in self.separator: 85 c = c.strip() 86 if c.startswith(':') and c.endswith(':'): 87 align.append('center') 88 elif c.startswith(':'): 89 align.append('left') 90 elif c.endswith(':'): 91 align.append('right') 92 else: 93 align.append(None) 94 95 # Build table 96 table = etree.SubElement(parent, 'table') 97 thead = etree.SubElement(table, 'thead') 98 self._build_row(header, thead, align) 99 tbody = etree.SubElement(table, 'tbody') 100 if len(rows) == 0: 101 # Handle empty table 102 self._build_empty_row(tbody, align) 103 else: 104 for row in rows: 105 self._build_row(row.strip(), tbody, align) 106 107 def _build_empty_row(self, parent, align): 108 """Build an empty row.""" 109 tr = etree.SubElement(parent, 'tr') 110 count = len(align) 111 while count: 112 etree.SubElement(tr, 'td') 113 count -= 1 114 115 def _build_row(self, row, parent, align): 116 """ Given a row of text, build table cells. """ 117 tr = etree.SubElement(parent, 'tr') 118 tag = 'td' 119 if parent.tag == 'thead': 120 tag = 'th' 121 cells = self._split_row(row) 122 # We use align here rather than cells to ensure every row 123 # contains the same number of columns. 124 for i, a in enumerate(align): 125 c = etree.SubElement(tr, tag) 126 try: 127 c.text = cells[i].strip() 128 except IndexError: # pragma: no cover 129 c.text = "" 130 if a: 131 c.set('align', a) 132 133 def _split_row(self, row): 134 """ split a row of text into list of cells. """ 135 if self.border: 136 if row.startswith('|'): 137 row = row[1:] 138 row = self.RE_END_BORDER.sub('', row) 139 return self._split(row) 140 141 def _split(self, row): 142 """ split a row of text with some code into a list of cells. """ 143 elements = [] 144 pipes = [] 145 tics = [] 146 tic_points = [] 147 tic_region = [] 148 good_pipes = [] 149 150 # Parse row 151 # Throw out \\, and \| 152 for m in self.RE_CODE_PIPES.finditer(row): 153 # Store ` data (len, start_pos, end_pos) 154 if m.group(2): 155 # \`+ 156 # Store length of each tic group: subtract \ 157 tics.append(len(m.group(2)) - 1) 158 # Store start of group, end of group, and escape length 159 tic_points.append((m.start(2), m.end(2) - 1, 1)) 160 elif m.group(3): 161 # `+ 162 # Store length of each tic group 163 tics.append(len(m.group(3))) 164 # Store start of group, end of group, and escape length 165 tic_points.append((m.start(3), m.end(3) - 1, 0)) 166 # Store pipe location 167 elif m.group(5): 168 pipes.append(m.start(5)) 169 170 # Pair up tics according to size if possible 171 # Subtract the escape length *only* from the opening. 172 # Walk through tic list and see if tic has a close. 173 # Store the tic region (start of region, end of region). 174 pos = 0 175 tic_len = len(tics) 176 while pos < tic_len: 177 try: 178 tic_size = tics[pos] - tic_points[pos][2] 179 if tic_size == 0: 180 raise ValueError 181 index = tics[pos + 1:].index(tic_size) + 1 182 tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) 183 pos += index + 1 184 except ValueError: 185 pos += 1 186 187 # Resolve pipes. Check if they are within a tic pair region. 188 # Walk through pipes comparing them to each region. 189 # - If pipe position is less that a region, it isn't in a region 190 # - If it is within a region, we don't want it, so throw it out 191 # - If we didn't throw it out, it must be a table pipe 192 for pipe in pipes: 193 throw_out = False 194 for region in tic_region: 195 if pipe < region[0]: 196 # Pipe is not in a region 197 break 198 elif region[0] <= pipe <= region[1]: 199 # Pipe is within a code region. Throw it out. 200 throw_out = True 201 break 202 if not throw_out: 203 good_pipes.append(pipe) 204 205 # Split row according to table delimeters. 206 pos = 0 207 for pipe in good_pipes: 208 elements.append(row[pos:pipe]) 209 pos = pipe + 1 210 elements.append(row[pos:]) 211 return elements 212 213 214class TableExtension(Extension): 215 """ Add tables to Markdown. """ 216 217 def extendMarkdown(self, md, md_globals): 218 """ Add an instance of TableProcessor to BlockParser. """ 219 if '|' not in md.ESCAPED_CHARS: 220 md.ESCAPED_CHARS.append('|') 221 md.parser.blockprocessors.add('table', 222 TableProcessor(md.parser), 223 '<hashheader') 224 225 226def makeExtension(*args, **kwargs): 227 return TableExtension(*args, **kwargs) 228