1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3
4
5__license__ = 'GPL v3'
6__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
7
8import numbers
9from collections import Counter, defaultdict
10from operator import attrgetter
11
12from lxml import etree
13
14from calibre.ebooks import parse_css_length
15from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
16from calibre.utils.localization import lang_as_iso639_1
17from polyglot.builtins import iteritems
18from tinycss.css21 import CSS21Parser
19
20css_parser = CSS21Parser()
21
22border_edges = ('left', 'top', 'right', 'bottom')
23border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
24ignore = object()
25
26
27def parse_css_font_family(raw):
28    decl, errs = css_parser.parse_style_attr('font-family:' + raw)
29    if decl:
30        for token in decl[0].value:
31            if token.type in 'STRING IDENT':
32                val = token.value
33                if val == 'inherit':
34                    break
35                yield val
36
37
38def css_font_family_to_docx(raw):
39    generic = {'serif':'Cambria', 'sansserif':'Candara', 'sans-serif':'Candara', 'fantasy':'Comic Sans', 'cursive':'Segoe Script'}
40    for ff in parse_css_font_family(raw):
41        return generic.get(ff.lower(), ff)
42
43
44def bmap(x):
45    return 'on' if x else 'off'
46
47
48def is_dropcaps(html_tag, tag_style):
49    return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding='unicode', with_tail=False)) < 5 and tag_style['float'] == 'left'
50
51
52class CombinedStyle:
53
54    def __init__(self, bs, rs, blocks, namespace):
55        self.bs, self.rs, self.blocks = bs, rs, blocks
56        self.namespace = namespace
57        self.id = self.name = self.seq = None
58        self.outline_level = None
59
60    def apply(self):
61        for block in self.blocks:
62            block.linked_style = self
63            for run in block.runs:
64                run.parent_style = self.rs
65
66    def serialize(self, styles, normal_style):
67        makeelement = self.namespace.makeelement
68        w = lambda x: '{%s}%s' % (self.namespace.namespaces['w'], x)
69        block = makeelement(styles, 'w:style', w_styleId=self.id, w_type='paragraph')
70        makeelement(block, 'w:name', w_val=self.name)
71        makeelement(block, 'w:qFormat')
72        if self is not normal_style:
73            makeelement(block, 'w:basedOn', w_val=normal_style.id)
74        if self.seq == 0:
75            block.set(w('default'), '1')
76        pPr = makeelement(block, 'w:pPr')
77        self.bs.serialize_properties(pPr, normal_style.bs)
78        if self.outline_level is not None:
79            makeelement(pPr, 'w:outlineLvl', w_val=str(self.outline_level + 1))
80        rPr = makeelement(block, 'w:rPr')
81        self.rs.serialize_properties(rPr, normal_style.rs)
82
83
84class FloatSpec:
85
86    def __init__(self, namespace, html_tag, tag_style):
87        self.makeelement = namespace.makeelement
88        self.is_dropcaps = is_dropcaps(html_tag, tag_style)
89        self.blocks = []
90        if self.is_dropcaps:
91            self.dropcaps_lines = 3
92        else:
93            self.x_align = tag_style['float']
94            self.w = self.h = None
95            if tag_style._get('width') != 'auto':
96                self.w = int(20 * max(tag_style['min-width'], tag_style['width']))
97            if tag_style._get('height') == 'auto':
98                self.h_rule = 'auto'
99            else:
100                if tag_style['min-height'] > 0:
101                    self.h_rule, self.h = 'atLeast', tag_style['min-height']
102                else:
103                    self.h_rule, self.h = 'exact', tag_style['height']
104                self.h = int(20 * self.h)
105            self.h_space = int(20 * max(tag_style['margin-right'], tag_style['margin-left']))
106            self.v_space = int(20 * max(tag_style['margin-top'], tag_style['margin-bottom']))
107
108        read_css_block_borders(self, tag_style)
109
110    def serialize(self, block, parent):
111        if self.is_dropcaps:
112            attrs = dict(w_dropCap='drop', w_lines=str(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text')
113        else:
114            attrs = dict(
115                w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1',
116                w_hSpace=str(self.h_space), w_vSpace=str(self.v_space), w_hRule=self.h_rule
117            )
118            if self.w is not None:
119                attrs['w_w'] = str(self.w)
120            if self.h is not None:
121                attrs['w_h'] = str(self.h)
122        self.makeelement(parent, 'w:framePr', **attrs)
123        # Margins are already applied by the frame style, so override them to
124        # be zero on individual blocks
125        self.makeelement(parent, 'w:ind', w_left='0', w_leftChars='0', w_right='0', w_rightChars='0')
126        attrs = {}
127        if block is self.blocks[0]:
128            attrs.update(dict(w_before='0', w_beforeLines='0'))
129        if block is self.blocks[-1]:
130            attrs.update(dict(w_after='0', w_afterLines='0'))
131        if attrs:
132            self.makeelement(parent, 'w:spacing', **attrs)
133        # Similarly apply the same border and padding properties to all blocks
134        # in this floatspec
135        bdr = self.makeelement(parent, 'w:pBdr')
136        for edge in border_edges:
137            padding = getattr(self, 'padding_' + edge)
138            width = getattr(self, 'border_%s_width' % edge)
139            bstyle = getattr(self, 'border_%s_style' % edge)
140            self.makeelement(
141                bdr, 'w:'+edge, w_space=str(padding), w_val=bstyle, w_sz=str(width), w_color=getattr(self, 'border_%s_color' % edge))
142
143
144class DOCXStyle:
145
146    ALL_PROPS = ()
147    TYPE = 'paragraph'
148
149    def __init__(self, namespace):
150        self.namespace = namespace
151        self.w = lambda x: '{%s}%s' % (namespace.namespaces['w'], x)
152        self.id = self.name = None
153        self.next_style = None
154        self.calculate_hash()
155
156    def calculate_hash(self):
157        self._hash = hash(tuple(
158            getattr(self, x) for x in self.ALL_PROPS))
159
160    def makeelement(self, parent, name, **attrs):
161        return parent.makeelement(self.w(name), **{self.w(k):v for k, v in iteritems(attrs)})
162
163    def __hash__(self):
164        return self._hash
165
166    def __eq__(self, other):
167        for x in self.ALL_PROPS:
168            if getattr(self, x) != getattr(other, x, None):
169                return False
170        return True
171
172    def __ne__(self, other):
173        return not self == other
174
175    def __repr__(self):
176        return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':self.namespace.namespaces['w']})), pretty_print=True)
177    __str__ = __repr__
178
179    def serialize(self, styles, normal_style):
180        makeelement = self.makeelement
181        style = makeelement(styles, 'style', styleId=self.id, type=self.TYPE)
182        style.append(makeelement(style, 'name', val=self.name))
183        if self is not normal_style:
184            style.append(makeelement(style, 'basedOn', val=normal_style.id))
185        styles.append(style)
186        return style
187
188
189LINE_STYLES = {
190    'none'  : 'none',
191    'hidden': 'none',
192    'dotted': 'dotted',
193    'dashed': 'dashed',
194    'solid' : 'single',
195    'double': 'double',
196    'groove': 'threeDEngrave',
197    'ridge' : 'threeDEmboss',
198    'inset' : 'inset',
199    'outset': 'outset',
200}
201
202
203class TextStyle(DOCXStyle):
204
205    ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
206                 'background_color', 'underline', 'strike', 'dstrike', 'caps',
207                 'shadow', 'small_caps', 'spacing', 'vertical_align', 'padding',
208                 'border_style', 'border_width', 'border_color')
209    TYPE = 'character'
210
211    def __init__(self, namespace, css, is_parent_style=False):
212        self.font_family = css_font_family_to_docx(css['font-family'])
213        try:
214            self.font_size = max(0, int(float(css['font-size']) * 2))  # stylizer normalizes all font sizes into pts
215        except (ValueError, TypeError, AttributeError):
216            self.font_size = None
217
218        fw = css['font-weight']
219        self.bold = (fw.lower() if hasattr(fw, 'lower') else fw) in {'bold', 'bolder'} or int_or_zero(fw) >= 700
220        self.italic = css['font-style'].lower() in {'italic', 'oblique'}
221        self.color = convert_color(css['color'])
222        self.background_color = None if is_parent_style else convert_color(css.backgroundColor)
223        td = set((css.effective_text_decoration or '').split())
224        self.underline = 'underline' in td
225        self.dstrike = 'line-through' in td and 'overline' in td
226        self.strike = not self.dstrike and 'line-through' in td
227        self.text_transform = css['text-transform']  # TODO: If lowercase or capitalize, transform the actual text
228        self.caps = self.text_transform == 'uppercase'
229        self.small_caps = css['font-variant'].lower() in {'small-caps', 'smallcaps'}
230        self.shadow = css['text-shadow'] not in {'none', None}
231        try:
232            self.spacing = int(float(css['letter-spacing']) * 20)
233        except (ValueError, TypeError, AttributeError):
234            self.spacing = None
235        va = css.first_vertical_align
236        if isinstance(va, numbers.Number):
237            self.vertical_align = str(int(va * 2))
238        else:
239            val = {
240                'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'super':'superscript',
241                'bottom':'subscript', 'text-bottom':'subscript', 'sub':'subscript'}.get(va)
242            self.vertical_align = val or 'baseline'
243
244        self.padding = self.border_color = self.border_width = self.border_style = None
245        if not is_parent_style:
246            # DOCX does not support individual borders/padding for inline content
247            for edge in border_edges:
248                # In DOCX padding can only be a positive integer
249                try:
250                    padding = max(0, int(css['padding-' + edge]))
251                except ValueError:
252                    padding = 0
253                if self.padding is None:
254                    self.padding = padding
255                elif self.padding != padding:
256                    self.padding = ignore
257                val = css['border-%s-width' % edge]
258                if not isinstance(val, numbers.Number):
259                    val = {'thin':0.2, 'medium':1, 'thick':2}.get(val, 0)
260                val = min(96, max(2, int(val * 8)))
261                if self.border_width is None:
262                    self.border_width = val
263                elif self.border_width != val:
264                    self.border_width = ignore
265                color = convert_color(css['border-%s-color' % edge])
266                if self.border_color is None:
267                    self.border_color = color
268                elif self.border_color != color:
269                    self.border_color = ignore
270                style = LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none')
271                if self.border_style is None:
272                    self.border_style = style
273                elif self.border_style != style:
274                    self.border_style = ignore
275
276        if self.padding in (None, ignore):
277            self.padding = 0
278        if self.border_width in (None, ignore):
279            self.border_width = 0
280        if self.border_style in (None, ignore):
281            self.border_style = 'none'
282        if self.border_color in (None, ignore):
283            self.border_color = 'auto'
284        if self.border_style == 'none':
285            self.border_width, self.border_color = 0, 'auto'
286
287        DOCXStyle.__init__(self, namespace)
288
289    def serialize_borders(self, bdr, normal_style):
290        w = self.w
291        is_normal_style = self is normal_style
292        if is_normal_style or self.padding != normal_style.padding:
293            bdr.set(w('space'), str(self.padding))
294        if is_normal_style or self.border_width != normal_style.border_width:
295            bdr.set(w('sz'), str(self.border_width))
296        if is_normal_style or self.border_style != normal_style.border_style:
297            bdr.set(w('val'), self.border_style)
298        if is_normal_style or self.border_color != normal_style.border_color:
299            bdr.set(w('color'), self.border_color)
300        return bdr
301
302    def serialize(self, styles, normal_style):
303        makeelement = self.makeelement
304        style_root = DOCXStyle.serialize(self, styles, normal_style)
305        style = makeelement(style_root, 'rPr')
306        self.serialize_properties(style, normal_style)
307        if len(style) > 0:
308            style_root.append(style)
309        return style_root
310
311    def serialize_properties(self, rPr, normal_style):
312        makeelement = self.makeelement
313        is_normal_style = self is normal_style
314        if is_normal_style or self.font_family != normal_style.font_family:
315            rPr.append(makeelement(
316                rPr, 'rFonts', **{k:self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
317
318        for name, attr, vmap in (('sz', 'font_size', str), ('b', 'bold', bmap), ('i', 'italic', bmap)):
319            val = getattr(self, attr)
320            if is_normal_style or getattr(normal_style, attr) != val:
321                for suffix in ('', 'Cs'):
322                    rPr.append(makeelement(rPr, name + suffix, val=vmap(val)))
323
324        def check_attr(attr):
325            val = getattr(self, attr)
326            return is_normal_style or (val != getattr(normal_style, attr))
327
328        if check_attr('color'):
329            rPr.append(makeelement(rPr, 'color', val=self.color or 'auto'))
330        if check_attr('background_color'):
331            rPr.append(makeelement(rPr, 'shd', fill=self.background_color or 'auto'))
332        if check_attr('underline'):
333            rPr.append(makeelement(rPr, 'u', val='single' if self.underline else 'none'))
334        if check_attr('dstrike'):
335            rPr.append(makeelement(rPr, 'dstrike', val=bmap(self.dstrike)))
336        if check_attr('strike'):
337            rPr.append(makeelement(rPr, 'strike', val=bmap(self.strike)))
338        if check_attr('caps'):
339            rPr.append(makeelement(rPr, 'caps', val=bmap(self.caps)))
340        if check_attr('small_caps'):
341            rPr.append(makeelement(rPr, 'smallCaps', val=bmap(self.small_caps)))
342        if check_attr('shadow'):
343            rPr.append(makeelement(rPr, 'shadow', val=bmap(self.shadow)))
344        if check_attr('spacing'):
345            rPr.append(makeelement(rPr, 'spacing', val=str(self.spacing or 0)))
346        if is_normal_style:
347            rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align if self.vertical_align in {'superscript', 'subscript'} else 'baseline'))
348        elif self.vertical_align != normal_style.vertical_align:
349            if self.vertical_align in {'superscript', 'subscript', 'baseline'}:
350                rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align))
351            else:
352                rPr.append(makeelement(rPr, 'position', val=self.vertical_align))
353
354        bdr = self.serialize_borders(makeelement(rPr, 'bdr'), normal_style)
355        if bdr.attrib:
356            rPr.append(bdr)
357
358
359class DescendantTextStyle:
360
361    def __init__(self, parent_style, child_style):
362        self.id = self.name = None
363        self.makeelement = child_style.makeelement
364
365        p = []
366
367        def add(name, **props):
368            p.append((name, frozenset(iteritems(props))))
369
370        def vals(attr):
371            return getattr(parent_style, attr), getattr(child_style, attr)
372
373        def check(attr):
374            pval, cval = vals(attr)
375            return pval != cval
376
377        if parent_style.font_family != child_style.font_family:
378            add('rFonts', **{k:child_style.font_family for k in 'ascii cs eastAsia hAnsi'.split()})
379
380        for name, attr in (('sz', 'font_size'), ('b', 'bold'), ('i', 'italic')):
381            pval, cval = vals(attr)
382            if pval != cval:
383                val = 'on' if attr in {'bold', 'italic'} else str(cval)  # bold, italic are toggle properties
384                for suffix in ('', 'Cs'):
385                    add(name + suffix, val=val)
386
387        if check('color'):
388            add('color', val=child_style.color or 'auto')
389        if check('background_color'):
390            add('shd', fill=child_style.background_color or 'auto')
391        if check('underline'):
392            add('u', val='single' if child_style.underline else 'none')
393        if check('dstrike'):
394            add('dstrike', val=bmap(child_style.dstrike))
395        if check('strike'):
396            add('strike', val='on')  # toggle property
397        if check('caps'):
398            add('caps', val='on')  # toggle property
399        if check('small_caps'):
400            add('smallCaps', val='on')  # toggle property
401        if check('shadow'):
402            add('shadow', val='on')  # toggle property
403        if check('spacing'):
404            add('spacing', val=str(child_style.spacing or 0))
405        if check('vertical_align'):
406            val = child_style.vertical_align
407            if val in {'superscript', 'subscript', 'baseline'}:
408                add('vertAlign', val=val)
409            else:
410                add('position', val=val)
411
412        bdr = {}
413        if check('padding'):
414            bdr['space'] = str(child_style.padding)
415        if check('border_width'):
416            bdr['sz'] = str(child_style.border_width)
417        if check('border_style'):
418            bdr['val'] = child_style.border_style
419        if check('border_color'):
420            bdr['color'] = child_style.border_color
421        if bdr:
422            add('bdr', **bdr)
423        self.properties = tuple(p)
424        self._hash = hash(self.properties)
425
426    def __hash__(self):
427        return self._hash
428
429    def __eq__(self, other):
430        return self.properties == other.properties
431
432    def __ne__(self, other):
433        return self.properties != other.properties
434
435    def serialize(self, styles):
436        makeelement = self.makeelement
437        style = makeelement(styles, 'style', styleId=self.id, type='character')
438        style.append(makeelement(style, 'name', val=self.name))
439        rpr = makeelement(style, 'rPr')
440        style.append(rpr)
441        for name, attrs in self.properties:
442            rpr.append(makeelement(style, name, **dict(attrs)))
443        styles.append(style)
444        return style
445
446
447def read_css_block_borders(self, css, store_css_style=False):
448    for edge in border_edges:
449        if css is None:
450            setattr(self, 'padding_' + edge, 0)
451            setattr(self, 'margin_' + edge, 0)
452            setattr(self, 'css_margin_' + edge, '')
453            setattr(self, 'border_%s_width' % edge, 2)
454            setattr(self, 'border_%s_color' % edge, None)
455            setattr(self, 'border_%s_style' %  edge, 'none')
456            if store_css_style:
457                setattr(self, 'border_%s_css_style' %  edge, 'none')
458        else:
459            # In DOCX padding can only be a positive integer
460            try:
461                setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
462            except ValueError:
463                setattr(self, 'padding_' + edge, 0)  # invalid value for padding
464            # In DOCX margin must be a positive integer in twips (twentieth of a point)
465            try:
466                setattr(self, 'margin_' + edge, max(0, int(css['margin-' + edge] * 20)))
467            except ValueError:
468                setattr(self, 'margin_' + edge, 0)  # e.g.: margin: auto
469            setattr(self, 'css_margin_' + edge, css._style.get('margin-' + edge, ''))
470            val = css['border-%s-width' % edge]
471            if not isinstance(val, numbers.Number):
472                val = {'thin':0.2, 'medium':1, 'thick':2}.get(val, 0)
473            val = min(96, max(2, int(val * 8)))
474            setattr(self, 'border_%s_width' % edge, val)
475            setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]) or 'auto')
476            setattr(self, 'border_%s_style' %  edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
477            if store_css_style:
478                setattr(self, 'border_%s_css_style' %  edge, css['border-%s-style' % edge].lower())
479
480
481class BlockStyle(DOCXStyle):
482
483    ALL_PROPS = tuple(
484        'text_align css_text_indent text_indent line_height background_color'.split(
485        ) + ['margin_' + edge for edge in border_edges
486        ] + ['css_margin_' + edge for edge in border_edges
487        ] + [x%edge for edge in border_edges for x in border_props]
488    )
489
490    def __init__(self, namespace, css, html_block, is_table_cell=False, parent_bg=None):
491        read_css_block_borders(self, css)
492        if is_table_cell:
493            for edge in border_edges:
494                setattr(self, 'border_%s_style' % edge, 'none')
495                setattr(self, 'border_%s_width' % edge, 0)
496                setattr(self, 'padding_' + edge, 0)
497                setattr(self, 'margin_' + edge, 0)
498        if css is None:
499            self.text_indent = 0
500            self.css_text_indent = None
501            self.line_height = 280
502            self.background_color = None
503            self.text_align = 'left'
504        else:
505            try:
506                self.text_indent = int(css['text-indent'] * 20)
507                self.css_text_indent = css._get('text-indent')
508            except (TypeError, ValueError):
509                self.text_indent = 0
510                self.css_text_indent = None
511            try:
512                self.line_height = max(0, int(css.lineHeight * 20))
513            except (TypeError, ValueError):
514                self.line_height = max(0, int(1.2 * css.fontSize * 20))
515            self.background_color = None if is_table_cell else convert_color(css['background-color'])
516            if not is_table_cell and self.background_color is None:
517                self.background_color = parent_bg
518            try:
519                ws = css['white-space'].lower()
520                preserve_whitespace = ws in {'pre', 'pre-wrap'}
521            except Exception:
522                preserve_whitespace = False
523            try:
524                aval = css['text-align'].lower()
525                if preserve_whitespace:
526                    aval = 'start'
527                self.text_align = {'start':'left', 'left':'left', 'end':'right', 'right':'right', 'center':'center', 'justify':'both', 'centre':'center'}.get(
528                    aval, 'left')
529            except AttributeError:
530                self.text_align = 'left'
531
532        DOCXStyle.__init__(self, namespace)
533
534    def serialize_borders(self, bdr, normal_style):
535        w = self.w
536        for edge in border_edges:
537            e = bdr.makeelement(w(edge))
538            padding = getattr(self, 'padding_' + edge)
539            if (self is normal_style and padding > 0) or (padding != getattr(normal_style, 'padding_' + edge)):
540                e.set(w('space'), str(padding))
541            width = getattr(self, 'border_%s_width' % edge)
542            bstyle = getattr(self, 'border_%s_style' % edge)
543            if (self is normal_style and width > 0 and bstyle != 'none'
544                    ) or width != getattr(normal_style, 'border_%s_width' % edge
545                    ) or bstyle != getattr(normal_style, 'border_%s_style' % edge):
546                e.set(w('val'), bstyle)
547                e.set(w('sz'), str(width))
548                e.set(w('color'), getattr(self, 'border_%s_color' % edge))
549            if e.attrib:
550                bdr.append(e)
551        return bdr
552
553    def serialize(self, styles, normal_style):
554        makeelement = self.makeelement
555        style_root = DOCXStyle.serialize(self, styles, normal_style)
556        style = makeelement(style_root, 'pPr')
557        self.serialize_properties(style, normal_style)
558        if len(style) > 0:
559            style_root.append(style)
560        return style_root
561
562    def serialize_properties(self, pPr, normal_style):
563        makeelement, w = self.makeelement, self.w
564        spacing = makeelement(pPr, 'spacing')
565        for edge, attr in iteritems({'top':'before', 'bottom':'after'}):
566            getter = attrgetter('css_margin_' + edge)
567            css_val, css_unit = parse_css_length(getter(self))
568            if css_unit in ('em', 'ex'):
569                lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
570                if (self is normal_style and lines > 0) or getter(self) != getter(normal_style):
571                    spacing.set(w(attr + 'Lines'), str(lines))
572            else:
573                getter = attrgetter('margin_' + edge)
574                val = getter(self)
575                if (self is normal_style and val > 0) or val != getter(normal_style):
576                    spacing.set(w(attr), str(val))
577
578        if self is normal_style or self.line_height != normal_style.line_height:
579            spacing.set(w('line'), str(self.line_height))
580            spacing.set(w('lineRule'), 'atLeast')
581
582        if spacing.attrib:
583            pPr.append(spacing)
584
585        ind = makeelement(pPr, 'ind')
586        for edge in ('left', 'right'):
587            getter = attrgetter('css_margin_' + edge)
588            css_val, css_unit = parse_css_length(getter(self))
589            if css_unit in ('em', 'ex'):
590                chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
591                if (self is normal_style and chars > 0) or getter(self) != getter(normal_style):
592                    ind.set(w(edge + 'Chars'), str(chars))
593            else:
594                getter = attrgetter('margin_' + edge)
595                val = getter(self)
596                if (self is normal_style and val > 0) or val != getter(normal_style):
597                    ind.set(w(edge), str(val))
598                    ind.set(w(edge + 'Chars'), '0')  # This is needed to override any declaration in the parent style
599        css_val, css_unit = parse_css_length(self.css_text_indent)
600        if css_unit in ('em', 'ex'):
601            chars = int(css_val * (50 if css_unit == 'ex' else 100))
602            if css_val >= 0:
603                if (self is normal_style and chars > 0) or self.css_text_indent != normal_style.css_text_indent:
604                    ind.set(w('firstLineChars'), str(chars))
605            else:
606                if (self is normal_style and chars < 0) or self.css_text_indent != normal_style.css_text_indent:
607                    ind.set(w('hangingChars'), str(abs(chars)))
608        else:
609            val = self.text_indent
610            if val >= 0:
611                if (self is normal_style and val > 0) or self.text_indent != normal_style.text_indent:
612                    ind.set(w('firstLine'), str(val))
613                    ind.set(w('firstLineChars'), '0')  # This is needed to override any declaration in the parent style
614            else:
615                if (self is normal_style and val < 0) or self.text_indent != normal_style.text_indent:
616                    ind.set(w('hanging'), str(abs(val)))
617                    ind.set(w('hangingChars'), '0')
618        if ind.attrib:
619            pPr.append(ind)
620
621        if (self is normal_style and self.background_color) or self.background_color != normal_style.background_color:
622            pPr.append(makeelement(pPr, 'shd', val='clear', color='auto', fill=self.background_color or 'auto'))
623
624        pbdr = self.serialize_borders(pPr.makeelement(w('pBdr')), normal_style)
625        if len(pbdr):
626            pPr.append(pbdr)
627
628        if self is normal_style or self.text_align != normal_style.text_align:
629            pPr.append(makeelement(pPr, 'jc', val=self.text_align))
630
631        if self is not normal_style and self.next_style is not None:
632            pPr.append(makeelement(pPr, 'next', val=self.next_style))
633
634
635class StylesManager:
636
637    def __init__(self, namespace, log, document_lang):
638        self.namespace = namespace
639        self.document_lang = lang_as_iso639_1(document_lang) or 'en'
640        self.log = log
641        self.block_styles, self.text_styles = {}, {}
642        self.styles_for_html_blocks = {}
643
644    def create_text_style(self, css_style, is_parent_style=False):
645        ans = TextStyle(self.namespace, css_style, is_parent_style=is_parent_style)
646        existing = self.text_styles.get(ans, None)
647        if existing is None:
648            self.text_styles[ans] = ans
649        else:
650            ans = existing
651        return ans
652
653    def create_block_style(self, css_style, html_block, is_table_cell=False, parent_bg=None):
654        ans = BlockStyle(self.namespace, css_style, html_block, is_table_cell=is_table_cell, parent_bg=parent_bg)
655        existing = self.block_styles.get(ans, None)
656        if existing is None:
657            self.block_styles[ans] = ans
658        else:
659            ans = existing
660        self.styles_for_html_blocks[html_block] = ans
661        return ans
662
663    def finalize(self, all_blocks):
664        block_counts, run_counts = Counter(), Counter()
665        block_rmap, run_rmap = defaultdict(list), defaultdict(list)
666        used_pairs = defaultdict(list)
667        heading_styles = defaultdict(list)
668        headings = frozenset('h1 h2 h3 h4 h5 h6'.split())
669        pure_block_styles = set()
670
671        for block in all_blocks:
672            bs = block.style
673            block_counts[bs] += 1
674            block_rmap[block.style].append(block)
675            local_run_counts = Counter()
676            for run in block.runs:
677                count = run.style_weight
678                run_counts[run.style] += count
679                local_run_counts[run.style] += count
680                run_rmap[run.style].append(run)
681            if local_run_counts:
682                rs = local_run_counts.most_common(1)[0][0]
683                used_pairs[(bs, rs)].append(block)
684                if block.html_tag in headings:
685                    heading_styles[block.html_tag].append((bs, rs))
686            else:
687                pure_block_styles.add(bs)
688
689        self.pure_block_styles = sorted(pure_block_styles, key=block_counts.__getitem__)
690        bnum = len(str(max(1, len(pure_block_styles) - 1)))
691        for i, bs in enumerate(self.pure_block_styles):
692            bs.id = bs.name = '%0{}d Block'.format(bnum) % i
693            bs.seq = i
694            if i == 0:
695                self.normal_pure_block_style = bs
696
697        counts = Counter()
698        smap = {}
699        for (bs, rs), blocks in iteritems(used_pairs):
700            s = CombinedStyle(bs, rs, blocks, self.namespace)
701            smap[(bs, rs)] = s
702            counts[s] += sum(1 for b in blocks if not b.is_empty())
703        for i, heading_tag in enumerate(sorted(heading_styles)):
704            styles = sorted((smap[k] for k in heading_styles[heading_tag]), key=counts.__getitem__)
705            styles = list(filter(lambda s:s.outline_level is None, styles))
706            if styles:
707                heading_style = styles[-1]
708                heading_style.outline_level = i
709
710        snum = len(str(max(1, len(counts) - 1)))
711        heading_styles = []
712        for i, (style, count) in enumerate(counts.most_common()):
713            if i == 0:
714                self.normal_style = style
715                style.id = style.name = 'Normal'
716            else:
717                if style.outline_level is None:
718                    val = 'Para %0{}d'.format(snum) % i
719                else:
720                    val = 'Heading %d' % (style.outline_level + 1)
721                    heading_styles.append(style)
722                style.id = style.name = val
723            style.seq = i
724        self.combined_styles = sorted(counts, key=attrgetter('seq'))
725        [ls.apply() for ls in self.combined_styles]
726
727        descendant_style_map = {}
728        ds_counts = Counter()
729        for block in all_blocks:
730            for run in block.runs:
731                if run.parent_style is not run.style and run.parent_style and run.style:
732                    ds = DescendantTextStyle(run.parent_style, run.style)
733                    if ds.properties:
734                        run.descendant_style = descendant_style_map.get(ds)
735                        if run.descendant_style is None:
736                            run.descendant_style = descendant_style_map[ds] = ds
737                        ds_counts[run.descendant_style] += run.style_weight
738        rnum = len(str(max(1, len(ds_counts) - 1)))
739        for i, (text_style, count) in enumerate(ds_counts.most_common()):
740            text_style.id = 'Text%d' % i
741            text_style.name = '%0{}d Text'.format(rnum) % i
742            text_style.seq = i
743        self.descendant_text_styles = sorted(descendant_style_map, key=attrgetter('seq'))
744
745        self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, (
746            self.descendant_text_styles, self.combined_styles))))
747
748        self.primary_heading_style = None
749        if heading_styles:
750            heading_styles.sort(key=attrgetter('outline_level'))
751            self.primary_heading_style = heading_styles[0]
752        else:
753            ms = 0
754            for s in self.combined_styles:
755                if s.rs.font_size > ms:
756                    self.primary_heading_style = s
757                    ms = s.rs.font_size
758
759    def serialize(self, styles):
760        lang = styles.xpath('descendant::*[local-name()="lang"]')[0]
761        for k in tuple(lang.attrib):
762            lang.attrib[k] = self.document_lang
763        for style in self.combined_styles:
764            style.serialize(styles, self.normal_style)
765        for style in self.descendant_text_styles:
766            style.serialize(styles)
767        for style in sorted(self.pure_block_styles, key=attrgetter('seq')):
768            style.serialize(styles, self.normal_pure_block_style)
769