2# vim:fileencoding=utf-8
5__license__ = 'GPL v3'
6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
8from lxml.html.builder import TABLE, TR, TD
10from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css
11from calibre.ebooks.docx.char_styles import RunStyle
12from polyglot.builtins import iteritems, itervalues
14# Read from XML {{{
15read_shd = rs
16edges = ('left', 'top', 'right', 'bottom')
19def _read_width(elem, get):
20    ans = inherit
21    try:
22        w = int(get(elem, 'w:w'))
23    except (TypeError, ValueError):
24        w = 0
25    typ = get(elem, 'w:type', 'auto')
26    if typ == 'nil':
27        ans = '0'
28    elif typ == 'auto':
29        ans = 'auto'
30    elif typ == 'dxa':
31        ans = '%.3gpt' % (w/20)
32    elif typ == 'pct':
33        ans = '%.3g%%' % (w/50)
34    return ans
37def read_width(parent, dest, XPath, get):
38    ans = inherit
39    for tblW in XPath('./w:tblW')(parent):
40        ans = _read_width(tblW, get)
41    setattr(dest, 'width', ans)
44def read_cell_width(parent, dest, XPath, get):
45    ans = inherit
46    for tblW in XPath('./w:tcW')(parent):
47        ans = _read_width(tblW, get)
48    setattr(dest, 'width', ans)
51def read_padding(parent, dest, XPath, get):
52    name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar'
53    ans = {x:inherit for x in edges}
54    for mar in XPath('./w:%s' % name)(parent):
55        for x in edges:
56            for edge in XPath('./w:%s' % x)(mar):
57                ans[x] = _read_width(edge, get)
58    for x in edges:
59        setattr(dest, 'cell_padding_%s' % x, ans[x])
62def read_justification(parent, dest, XPath, get):
63    left = right = inherit
64    for jc in XPath('./w:jc[@w:val]')(parent):
65        val = get(jc, 'w:val')
66        if not val:
67            continue
68        if val == 'left':
69            right = 'auto'
70        elif val == 'right':
71            left = 'auto'
72        elif val == 'center':
73            left = right = 'auto'
74    setattr(dest, 'margin_left', left)
75    setattr(dest, 'margin_right', right)
78def read_spacing(parent, dest, XPath, get):
79    ans = inherit
80    for cs in XPath('./w:tblCellSpacing')(parent):
81        ans = _read_width(cs, get)
82    setattr(dest, 'spacing', ans)
85def read_float(parent, dest, XPath, get):
86    ans = inherit
87    for x in XPath('./w:tblpPr')(parent):
88        ans = {k.rpartition('}')[-1]: v for k, v in iteritems(x.attrib)}
89    setattr(dest, 'float', ans)
92def read_indent(parent, dest, XPath, get):
93    ans = inherit
94    for cs in XPath('./w:tblInd')(parent):
95        ans = _read_width(cs, get)
96    setattr(dest, 'indent', ans)
99border_edges = ('left', 'top', 'right', 'bottom', 'insideH', 'insideV')
102def read_borders(parent, dest, XPath, get):
103    name = 'tblBorders' if parent.tag.endswith('}tblPr') else 'tcBorders'
104    read_border(parent, dest, XPath, get, border_edges, name)
107def read_height(parent, dest, XPath, get):
108    ans = inherit
109    for rh in XPath('./w:trHeight')(parent):
110        rule = get(rh, 'w:hRule', 'auto')
111        if rule in {'auto', 'atLeast', 'exact'}:
112            val = get(rh, 'w:val')
113            ans = (rule, val)
114    setattr(dest, 'height', ans)
117def read_vertical_align(parent, dest, XPath, get):
118    ans = inherit
119    for va in XPath('./w:vAlign')(parent):
120        val = get(va, 'w:val')
121        ans = {'center': 'middle', 'top': 'top', 'bottom': 'bottom'}.get(val, 'middle')
122    setattr(dest, 'vertical_align', ans)
125def read_col_span(parent, dest, XPath, get):
126    ans = inherit
127    for gs in XPath('./w:gridSpan')(parent):
128        try:
129            ans = int(get(gs, 'w:val'))
130        except (TypeError, ValueError):
131            continue
132    setattr(dest, 'col_span', ans)
135def read_merge(parent, dest, XPath, get):
136    for x in ('hMerge', 'vMerge'):
137        ans = inherit
138        for m in XPath('./w:%s' % x)(parent):
139            ans = get(m, 'w:val', 'continue')
140        setattr(dest, x, ans)
143def read_band_size(parent, dest, XPath, get):
144    for x in ('Col', 'Row'):
145        ans = 1
146        for y in XPath('./w:tblStyle%sBandSize' % x)(parent):
147            try:
148                ans = int(get(y, 'w:val'))
149            except (TypeError, ValueError):
150                continue
151        setattr(dest, '%s_band_size' % x.lower(), ans)
154def read_look(parent, dest, XPath, get):
155    ans = 0
156    for x in XPath('./w:tblLook')(parent):
157        try:
158            ans = int(get(x, 'w:val'), 16)
159        except (ValueError, TypeError):
160            continue
161    setattr(dest, 'look', ans)
163# }}}
166def clone(style):
167    if style is None:
168        return None
169    try:
170        ans = type(style)(style.namespace)
171    except TypeError:
172        return None
173    ans.update(style)
174    return ans
177class Style:
179    is_bidi = False
181    def update(self, other):
182        for prop in self.all_properties:
183            nval = getattr(other, prop)
184            if nval is not inherit:
185                setattr(self, prop, nval)
187    def apply_bidi(self):
188        self.is_bidi = True
190    def convert_spacing(self):
191        ans = {}
192        if self.spacing is not inherit:
193            if self.spacing in {'auto', '0'}:
194                ans['border-collapse'] = 'collapse'
195            else:
196                ans['border-collapse'] = 'separate'
197                ans['border-spacing'] = self.spacing
198        return ans
200    def convert_border(self):
201        c = {}
202        for x in edges:
203            border_to_css(x, self, c)
204            val = getattr(self, 'padding_%s' % x)
205            if val is not inherit:
206                c['padding-%s' % x] = '%.3gpt' % val
207        if self.is_bidi:
208            for a in ('padding-%s', 'border-%s-style', 'border-%s-color', 'border-%s-width'):
209                l, r = c.get(a % 'left'), c.get(a % 'right')
210                if l is not None:
211                    c[a % 'right'] = l
212                if r is not None:
213                    c[a % 'left'] = r
214        return c
217class RowStyle(Style):
219    all_properties = ('height', 'cantSplit', 'hidden', 'spacing',)
221    def __init__(self, namespace, trPr=None):
222        self.namespace = namespace
223        if trPr is None:
224            for p in self.all_properties:
225                setattr(self, p, inherit)
226        else:
227            for p in ('hidden', 'cantSplit'):
228                setattr(self, p, binary_property(trPr, p, namespace.XPath, namespace.get))
229            for p in ('spacing', 'height'):
230                f = globals()['read_%s' % p]
231                f(trPr, self, namespace.XPath, namespace.get)
232        self._css = None
234    @property
235    def css(self):
236        if self._css is None:
237            c = self._css = {}
238            if self.hidden is True:
239                c['display'] = 'none'
240            if self.cantSplit is True:
241                c['page-break-inside'] = 'avoid'
242            if self.height is not inherit:
243                rule, val = self.height
244                if rule != 'auto':
245                    try:
246                        c['min-height' if rule == 'atLeast' else 'height'] = '%.3gpt' % (int(val)/20)
247                    except (ValueError, TypeError):
248                        pass
249            c.update(self.convert_spacing())
250        return self._css
253class CellStyle(Style):
255    all_properties = ('background_color', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
256        'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', 'row_span',
257    ) + tuple(k % edge for edge in border_edges for k in border_props)
259    def __init__(self, namespace, tcPr=None):
260        self.namespace = namespace
261        if tcPr is None:
262            for p in self.all_properties:
263                setattr(self, p, inherit)
264        else:
265            for x in ('borders', 'shd', 'padding', 'cell_width', 'vertical_align', 'col_span', 'merge'):
266                f = globals()['read_%s' % x]
267                f(tcPr, self, namespace.XPath, namespace.get)
268            self.row_span = inherit
269        self._css = None
271    @property
272    def css(self):
273        if self._css is None:
274            self._css = c = {}
275            if self.background_color is not inherit:
276                c['background-color'] = self.background_color
277            if self.width not in (inherit, 'auto'):
278                c['width'] = self.width
279            c['vertical-align'] = 'top' if self.vertical_align is inherit else self.vertical_align
280            for x in edges:
281                val = getattr(self, 'cell_padding_%s' % x)
282                if val not in (inherit, 'auto'):
283                    c['padding-%s' % x] =  val
284                elif val is inherit and x in {'left', 'right'}:
285                    c['padding-%s' % x] = '%.3gpt' % (115/20)
286            # In Word, tables are apparently rendered with some default top and
287            # bottom padding irrespective of the cellMargin values. Simulate
288            # that here.
289            for x in ('top', 'bottom'):
290                if c.get('padding-%s' % x, '0pt') == '0pt':
291                    c['padding-%s' % x] = '0.5ex'
292            c.update(self.convert_border())
294        return self._css
297class TableStyle(Style):
299    all_properties = (
300        'width', 'float', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
301        'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color',
302        'spacing', 'indent', 'overrides', 'col_band_size', 'row_band_size', 'look', 'bidi',
303    ) + tuple(k % edge for edge in border_edges for k in border_props)
305    def __init__(self, namespace, tblPr=None):
306        self.namespace = namespace
307        if tblPr is None:
308            for p in self.all_properties:
309                setattr(self, p, inherit)
310        else:
311            self.overrides = inherit
312            self.bidi = binary_property(tblPr, 'bidiVisual', namespace.XPath, namespace.get)
313            for x in ('width', 'float', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'):
314                f = globals()['read_%s' % x]
315                f(tblPr, self, self.namespace.XPath, self.namespace.get)
316            parent = tblPr.getparent()
317            if self.namespace.is_tag(parent, 'w:style'):
318                self.overrides = {}
319                for tblStylePr in self.namespace.XPath('./w:tblStylePr[@w:type]')(parent):
320                    otype = self.namespace.get(tblStylePr, 'w:type')
321                    orides = self.overrides[otype] = {}
322                    for tblPr in self.namespace.XPath('./w:tblPr')(tblStylePr):
323                        orides['table'] = TableStyle(self.namespace, tblPr)
324                    for trPr in self.namespace.XPath('./w:trPr')(tblStylePr):
325                        orides['row'] = RowStyle(self.namespace, trPr)
326                    for tcPr in self.namespace.XPath('./w:tcPr')(tblStylePr):
327                        orides['cell'] = CellStyle(self.namespace, tcPr)
328                    for pPr in self.namespace.XPath('./w:pPr')(tblStylePr):
329                        orides['para'] = ParagraphStyle(self.namespace, pPr)
330                    for rPr in self.namespace.XPath('./w:rPr')(tblStylePr):
331                        orides['run'] = RunStyle(self.namespace, rPr)
332        self._css = None
334    def resolve_based_on(self, parent):
335        for p in self.all_properties:
336            val = getattr(self, p)
337            if val is inherit:
338                setattr(self, p, getattr(parent, p))
340    @property
341    def css(self):
342        if self._css is None:
343            c = self._css = {}
344            if self.width not in (inherit, 'auto'):
345                c['width'] = self.width
346            for x in ('background_color', 'margin_left', 'margin_right'):
347                val = getattr(self, x)
348                if val is not inherit:
349                    c[x.replace('_', '-')] = val
350            if self.indent not in (inherit, 'auto') and self.margin_left != 'auto':
351                c['margin-left'] = self.indent
352            if self.float is not inherit:
353                for x in ('left', 'top', 'right', 'bottom'):
354                    val = self.float.get('%sFromText' % x, 0)
355                    try:
356                        val = '%.3gpt' % (int(val) / 20)
357                    except (ValueError, TypeError):
358                        val = '0'
359                    c['margin-%s' % x] = val
360                if 'tblpXSpec' in self.float:
361                    c['float'] = 'right' if self.float['tblpXSpec'] in {'right', 'outside'} else 'left'
362                else:
363                    page = self.page
364                    page_width = page.width - page.margin_left - page.margin_right
365                    try:
366                        x = int(self.float['tblpX']) / 20
367                    except (KeyError, ValueError, TypeError):
368                        x = 0
369                    c['float'] = 'left' if (x/page_width) < 0.65 else 'right'
370            c.update(self.convert_spacing())
371            if 'border-collapse' not in c:
372                c['border-collapse'] = 'collapse'
373            c.update(self.convert_border())
375        return self._css
378class Table:
380    def __init__(self, namespace, tbl, styles, para_map, is_sub_table=False):
381        self.namespace = namespace
382        self.tbl = tbl
383        self.styles = styles
384        self.is_sub_table = is_sub_table
386        # Read Table Style
387        style = {'table':TableStyle(self.namespace)}
388        for tblPr in self.namespace.XPath('./w:tblPr')(tbl):
389            for ts in self.namespace.XPath('./w:tblStyle[@w:val]')(tblPr):
390                style_id = self.namespace.get(ts, 'w:val')
391                s = styles.get(style_id)
392                if s is not None:
393                    if s.table_style is not None:
394                        style['table'].update(s.table_style)
395                    if s.paragraph_style is not None:
396                        if 'paragraph' in style:
397                            style['paragraph'].update(s.paragraph_style)
398                        else:
399                            style['paragraph'] = s.paragraph_style
400                    if s.character_style is not None:
401                        if 'run' in style:
402                            style['run'].update(s.character_style)
403                        else:
404                            style['run'] = s.character_style
405            style['table'].update(TableStyle(self.namespace, tblPr))
406        self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None)
407        self.run_style = style.get('run', None)
408        self.overrides = self.table_style.overrides
409        if self.overrides is inherit:
410            self.overrides = {}
411        if 'wholeTable' in self.overrides and 'table' in self.overrides['wholeTable']:
412            self.table_style.update(self.overrides['wholeTable']['table'])
414        self.style_map = {}
415        self.paragraphs = []
416        self.cell_map = []
418        rows = self.namespace.XPath('./w:tr')(tbl)
419        for r, tr in enumerate(rows):
420            overrides = self.get_overrides(r, None, len(rows), None)
421            self.resolve_row_style(tr, overrides)
422            cells = self.namespace.XPath('./w:tc')(tr)
423            self.cell_map.append([])
424            for c, tc in enumerate(cells):
425                overrides = self.get_overrides(r, c, len(rows), len(cells))
426                self.resolve_cell_style(tc, overrides, r, c, len(rows), len(cells))
427                self.cell_map[-1].append(tc)
428                for p in self.namespace.XPath('./w:p')(tc):
429                    para_map[p] = self
430                    self.paragraphs.append(p)
431                    self.resolve_para_style(p, overrides)
433        self.handle_merged_cells()
434        self.sub_tables = {x:Table(namespace, x, styles, para_map, is_sub_table=True) for x in self.namespace.XPath('./w:tr/w:tc/w:tbl')(tbl)}
436    @property
437    def bidi(self):
438        return self.table_style.bidi is True
440    def override_allowed(self, name):
441        'Check if the named override is allowed by the tblLook element'
442        if name.endswith('Cell') or name == 'wholeTable':
443            return True
444        look = self.table_style.look
445        if (look & 0x0020 and name == 'firstRow') or (look & 0x0040 and name == 'lastRow') or \
446           (look & 0x0080 and name == 'firstCol') or (look & 0x0100 and name == 'lastCol'):
447            return True
448        if name.startswith('band'):
449            if name.endswith('Horz'):
450                return not bool(look & 0x0200)
451            if name.endswith('Vert'):
452                return not bool(look & 0x0400)
453        return False
455    def get_overrides(self, r, c, num_of_rows, num_of_cols_in_row):
456        'List of possible overrides for the given para'
457        overrides = ['wholeTable']
459        def divisor(m, n):
460            return (m - (m % n)) // n
461        if c is not None:
462            odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1
463            overrides.append('band%dVert' % (1 if odd_column_band else 2))
464        odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1
465        overrides.append('band%dHorz' % (1 if odd_row_band else 2))
467        # According to the OOXML spec columns should have higher override
468        # priority than rows, but Word seems to do it the other way around.
469        if c is not None:
470            if c == 0:
471                overrides.append('firstCol')
472            if c >= num_of_cols_in_row - 1:
473                overrides.append('lastCol')
474        if r == 0:
475            overrides.append('firstRow')
476        if r >= num_of_rows - 1:
477            overrides.append('lastRow')
478        if c is not None:
479            if r == 0:
480                if c == 0:
481                    overrides.append('nwCell')
482                if c == num_of_cols_in_row - 1:
483                    overrides.append('neCell')
484            if r == num_of_rows - 1:
485                if c == 0:
486                    overrides.append('swCell')
487                if c == num_of_cols_in_row - 1:
488                    overrides.append('seCell')
489        return tuple(filter(self.override_allowed, overrides))
491    def resolve_row_style(self, tr, overrides):
492        rs = RowStyle(self.namespace)
493        for o in overrides:
494            if o in self.overrides:
495                ovr = self.overrides[o]
496                ors = ovr.get('row', None)
497                if ors is not None:
498                    rs.update(ors)
500        for trPr in self.namespace.XPath('./w:trPr')(tr):
501            rs.update(RowStyle(self.namespace, trPr))
502        if self.bidi:
503            rs.apply_bidi()
504        self.style_map[tr] = rs
506    def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
507        cs = CellStyle(self.namespace)
508        for o in overrides:
509            if o in self.overrides:
510                ovr = self.overrides[o]
511                ors = ovr.get('cell', None)
512                if ors is not None:
513                    cs.update(ors)
515        for tcPr in self.namespace.XPath('./w:tcPr')(tc):
516            cs.update(CellStyle(self.namespace, tcPr))
518        for x in edges:
519            p = 'cell_padding_%s' % x
520            val = getattr(cs, p)
521            if val is inherit:
522                setattr(cs, p, getattr(self.table_style, p))
524            is_inside_edge = (
525                (x == 'left' and col > 0) or
526                (x == 'top' and row > 0) or
527                (x == 'right' and col < cols_in_row - 1) or
528                (x == 'bottom' and row < rows -1)
529            )
530            inside_edge = ('insideH' if x in {'top', 'bottom'} else 'insideV') if is_inside_edge else None
531            for prop in border_props:
532                if not prop.startswith('border'):
533                    continue
534                eprop = prop % x
535                iprop = (prop % inside_edge) if inside_edge else None
536                val = getattr(cs, eprop)
537                if val is inherit and iprop is not None:
538                    # Use the insideX borders if the main cell borders are not
539                    # specified
540                    val = getattr(cs, iprop)
541                    if val is inherit:
542                        val = getattr(self.table_style, iprop)
543                if not is_inside_edge and val == 'none':
544                    # Cell borders must override table borders even when the
545                    # table border is not null and the cell border is null.
546                    val = 'hidden'
547                setattr(cs, eprop, val)
549        if self.bidi:
550            cs.apply_bidi()
551        self.style_map[tc] = cs
553    def resolve_para_style(self, p, overrides):
554        text_styles = [clone(self.paragraph_style), clone(self.run_style)]
556        for o in overrides:
557            if o in self.overrides:
558                ovr = self.overrides[o]
559                for i, name in enumerate(('para', 'run')):
560                    ops = ovr.get(name, None)
561                    if ops is not None:
562                        if text_styles[i] is None:
563                            text_styles[i] = ops
564                        else:
565                            text_styles[i].update(ops)
566        self.style_map[p] = text_styles
568    def handle_merged_cells(self):
569        if not self.cell_map:
570            return
571        # Handle vMerge
572        max_col_num = max(len(r) for r in self.cell_map)
573        for c in range(max_col_num):
574            cells = [row[c] if c < len(row) else None for row in self.cell_map]
575            runs = [[]]
576            for cell in cells:
577                try:
578                    s = self.style_map[cell]
579                except KeyError:  # cell is None
580                    s = CellStyle(self.namespace)
581                if s.vMerge == 'restart':
582                    runs.append([cell])
583                elif s.vMerge == 'continue':
584                    runs[-1].append(cell)
585                else:
586                    runs.append([])
587            for run in runs:
588                if len(run) > 1:
589                    self.style_map[run[0]].row_span = len(run)
590                    for tc in run[1:]:
591                        tc.getparent().remove(tc)
593        # Handle hMerge
594        for cells in self.cell_map:
595            runs = [[]]
596            for cell in cells:
597                try:
598                    s = self.style_map[cell]
599                except KeyError:  # cell is None
600                    s = CellStyle(self.namespace)
601                if s.col_span is not inherit:
602                    runs.append([])
603                    continue
604                if s.hMerge == 'restart':
605                    runs.append([cell])
606                elif s.hMerge == 'continue':
607                    runs[-1].append(cell)
608                else:
609                    runs.append([])
611            for run in runs:
612                if len(run) > 1:
613                    self.style_map[run[0]].col_span = len(run)
614                    for tc in run[1:]:
615                        tc.getparent().remove(tc)
617    def __iter__(self):
618        yield from self.paragraphs
619        for t in itervalues(self.sub_tables):
620            yield from t
622    def apply_markup(self, rmap, page, parent=None):
623        table = TABLE('\n\t\t')
624        if self.bidi:
625            table.set('dir', 'rtl')
626        self.table_style.page = page
627        style_map = {}
628        if parent is None:
629            try:
630                first_para = rmap[next(iter(self))]
631            except StopIteration:
632                return
633            parent = first_para.getparent()
634            idx = parent.index(first_para)
635            parent.insert(idx, table)
636        else:
637            parent.append(table)
638        for row in self.namespace.XPath('./w:tr')(self.tbl):
639            tr = TR('\n\t\t\t')
640            style_map[tr] = self.style_map[row]
641            tr.tail = '\n\t\t'
642            table.append(tr)
643            for tc in self.namespace.XPath('./w:tc')(row):
644                td = TD()
645                style_map[td] = s = self.style_map[tc]
646                if s.col_span is not inherit:
647                    td.set('colspan', str(s.col_span))
648                if s.row_span is not inherit:
649                    td.set('rowspan', str(s.row_span))
650                td.tail = '\n\t\t\t'
651                tr.append(td)
652                for x in self.namespace.XPath('./w:p|./w:tbl')(tc):
653                    if x.tag.endswith('}p'):
654                        td.append(rmap[x])
655                    else:
656                        self.sub_tables[x].apply_markup(rmap, page, parent=td)
657            if len(tr):
658                tr[-1].tail = '\n\t\t'
659        if len(table):
660            table[-1].tail = '\n\t'
662        table_style = self.table_style.css
663        if table_style:
664            table.set('class', self.styles.register(table_style, 'table'))
665        for elem, style in iteritems(style_map):
666            css = style.css
667            if css:
668                elem.set('class', self.styles.register(css, elem.tag))
671class Tables:
673    def __init__(self, namespace):
674        self.tables = []
675        self.para_map = {}
676        self.sub_tables = set()
677        self.namespace = namespace
679    def register(self, tbl, styles):
680        if tbl in self.sub_tables:
681            return
682        self.tables.append(Table(self.namespace, tbl, styles, self.para_map))
683        self.sub_tables |= set(self.tables[-1].sub_tables)
685    def apply_markup(self, object_map, page_map):
686        rmap = {v:k for k, v in iteritems(object_map)}
687        for table in self.tables:
688            table.apply_markup(rmap, page_map[table.tbl])
690    def para_style(self, p):
691        table = self.para_map.get(p, None)
692        if table is not None:
693            return table.style_map.get(p, (None, None))[0]
695    def run_style(self, p):
696        table = self.para_map.get(p, None)
697        if table is not None:
698            return table.style_map.get(p, (None, None))[1]