2Tables Extension for Python-Markdown
5Added parsing of tables to Python-Markdown.
7See <https://Python-Markdown.github.io/extensions/tables>
8for documentation.
10Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
12All changes Copyright 2008-2014 The Python Markdown Project
14License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
18from __future__ import absolute_import
19from __future__ import unicode_literals
20from . import Extension
21from ..blockprocessors import BlockProcessor
22from ..util import etree
23import re
29class TableProcessor(BlockProcessor):
30    """ Process Tables. """
32    RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
33    RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
35    def __init__(self, parser):
36        self.border = False
37        self.separator = ''
38        super(TableProcessor, self).__init__(parser)
40    def test(self, parent, block):
41        """
42        Ensure first two rows (column header and separator row) are valid table rows.
44        Keep border check and separator row do avoid repeating the work.
45        """
46        is_table = False
47        rows = [row.strip() for row in block.split('\n')]
48        if len(rows) > 1:
49            header0 = rows[0]
50            self.border = PIPE_NONE
51            if header0.startswith('|'):
52                self.border |= PIPE_LEFT
53            if self.RE_END_BORDER.search(header0) is not None:
54                self.border |= PIPE_RIGHT
55            row = self._split_row(header0)
56            row0_len = len(row)
57            is_table = row0_len > 1
59            # Each row in a single column table needs at least one pipe.
60            if not is_table and row0_len == 1 and self.border:
61                for index in range(1, len(rows)):
62                    is_table = rows[index].startswith('|')
63                    if not is_table:
64                        is_table = self.RE_END_BORDER.search(rows[index]) is not None
65                    if not is_table:
66                        break
68            if is_table:
69                row = self._split_row(rows[1])
70                is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ')
71                if is_table:
72                    self.separator = row
74        return is_table
76    def run(self, parent, blocks):
77        """ Parse a table block and build table. """
78        block = blocks.pop(0).split('\n')
79        header = block[0].strip()
80        rows = [] if len(block) < 3 else block[2:]
82        # Get alignment of columns
83        align = []
84        for c in self.separator:
85            c = c.strip()
86            if c.startswith(':') and c.endswith(':'):
87                align.append('center')
88            elif c.startswith(':'):
89                align.append('left')
90            elif c.endswith(':'):
91                align.append('right')
92            else:
93                align.append(None)
95        # Build table
96        table = etree.SubElement(parent, 'table')
97        thead = etree.SubElement(table, 'thead')
98        self._build_row(header, thead, align)
99        tbody = etree.SubElement(table, 'tbody')
100        if len(rows) == 0:
101            # Handle empty table
102            self._build_empty_row(tbody, align)
103        else:
104            for row in rows:
105                self._build_row(row.strip(), tbody, align)
107    def _build_empty_row(self, parent, align):
108        """Build an empty row."""
109        tr = etree.SubElement(parent, 'tr')
110        count = len(align)
111        while count:
112            etree.SubElement(tr, 'td')
113            count -= 1
115    def _build_row(self, row, parent, align):
116        """ Given a row of text, build table cells. """
117        tr = etree.SubElement(parent, 'tr')
118        tag = 'td'
119        if parent.tag == 'thead':
120            tag = 'th'
121        cells = self._split_row(row)
122        # We use align here rather than cells to ensure every row
123        # contains the same number of columns.
124        for i, a in enumerate(align):
125            c = etree.SubElement(tr, tag)
126            try:
127                c.text = cells[i].strip()
128            except IndexError:  # pragma: no cover
129                c.text = ""
130            if a:
131                c.set('align', a)
133    def _split_row(self, row):
134        """ split a row of text into list of cells. """
135        if self.border:
136            if row.startswith('|'):
137                row = row[1:]
138            row = self.RE_END_BORDER.sub('', row)
139        return self._split(row)
141    def _split(self, row):
142        """ split a row of text with some code into a list of cells. """
143        elements = []
144        pipes = []
145        tics = []
146        tic_points = []
147        tic_region = []
148        good_pipes = []
150        # Parse row
151        # Throw out \\, and \|
152        for m in self.RE_CODE_PIPES.finditer(row):
153            # Store ` data (len, start_pos, end_pos)
154            if m.group(2):
155                # \`+
156                # Store length of each tic group: subtract \
157                tics.append(len(m.group(2)) - 1)
158                # Store start of group, end of group, and escape length
159                tic_points.append((m.start(2), m.end(2) - 1, 1))
160            elif m.group(3):
161                # `+
162                # Store length of each tic group
163                tics.append(len(m.group(3)))
164                # Store start of group, end of group, and escape length
165                tic_points.append((m.start(3), m.end(3) - 1, 0))
166            # Store pipe location
167            elif m.group(5):
168                pipes.append(m.start(5))
170        # Pair up tics according to size if possible
171        # Subtract the escape length *only* from the opening.
172        # Walk through tic list and see if tic has a close.
173        # Store the tic region (start of region, end of region).
174        pos = 0
175        tic_len = len(tics)
176        while pos < tic_len:
177            try:
178                tic_size = tics[pos] - tic_points[pos][2]
179                if tic_size == 0:
180                    raise ValueError
181                index = tics[pos + 1:].index(tic_size) + 1
182                tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
183                pos += index + 1
184            except ValueError:
185                pos += 1
187        # Resolve pipes.  Check if they are within a tic pair region.
188        # Walk through pipes comparing them to each region.
189        #     - If pipe position is less that a region, it isn't in a region
190        #     - If it is within a region, we don't want it, so throw it out
191        #     - If we didn't throw it out, it must be a table pipe
192        for pipe in pipes:
193            throw_out = False
194            for region in tic_region:
195                if pipe < region[0]:
196                    # Pipe is not in a region
197                    break
198                elif region[0] <= pipe <= region[1]:
199                    # Pipe is within a code region.  Throw it out.
200                    throw_out = True
201                    break
202            if not throw_out:
203                good_pipes.append(pipe)
205        # Split row according to table delimeters.
206        pos = 0
207        for pipe in good_pipes:
208            elements.append(row[pos:pipe])
209            pos = pipe + 1
210        elements.append(row[pos:])
211        return elements
214class TableExtension(Extension):
215    """ Add tables to Markdown. """
217    def extendMarkdown(self, md, md_globals):
218        """ Add an instance of TableProcessor to BlockParser. """
219        if '|' not in md.ESCAPED_CHARS:
220            md.ESCAPED_CHARS.append('|')
221        md.parser.blockprocessors.add('table',
222                                      TableProcessor(md.parser),
223                                      '<hashheader')
226def makeExtension(*args, **kwargs):
227    return TableExtension(*args, **kwargs)