1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3
4
5__license__ = 'GPL v3'
6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
7
8import re
9
10from lxml.etree import XPath as X
11
12from calibre.utils.filenames import ascii_text
13from polyglot.builtins import iteritems
14
15# Names {{{
16TRANSITIONAL_NAMES = {
17    'DOCUMENT'  : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument',
18    'DOCPROPS'  : 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties',
19    'APPPROPS'  : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties',
20    'STYLES'    : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles',
21    'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering',
22    'FONTS'     : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable',
23    'EMBEDDED_FONT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font',
24    'IMAGES'    : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image',
25    'LINKS'     : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink',
26    'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes',
27    'ENDNOTES'  : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes',
28    'THEMES'    : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme',
29    'SETTINGS'  : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings',
30    'WEB_SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings',
31}
32
33STRICT_NAMES = {
34    k:v.replace('http://schemas.openxmlformats.org/officeDocument/2006',  'http://purl.oclc.org/ooxml/officeDocument')
35    for k, v in iteritems(TRANSITIONAL_NAMES)
36}
37
38TRANSITIONAL_NAMESPACES = {
39    'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
40    'o': 'urn:schemas-microsoft-com:office:office',
41    've': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
42    'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
43    # Text Content
44    'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
45    'w10': 'urn:schemas-microsoft-com:office:word',
46    'wne': 'http://schemas.microsoft.com/office/word/2006/wordml',
47    'xml': 'http://www.w3.org/XML/1998/namespace',
48    # Drawing
49    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
50    'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
51    'mv': 'urn:schemas-microsoft-com:mac:vml',
52    'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
53    'v': 'urn:schemas-microsoft-com:vml',
54    'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
55    # Properties (core and extended)
56    'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties',
57    'dc': 'http://purl.org/dc/elements/1.1/',
58    'ep': 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties',
59    'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
60    # Content Types
61    'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
62    # Package Relationships
63    'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
64    'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
65    # Dublin Core document properties
66    'dcmitype': 'http://purl.org/dc/dcmitype/',
67    'dcterms': 'http://purl.org/dc/terms/'
68}
69
70STRICT_NAMESPACES = {
71    k:v.replace(
72        'http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument').replace(
73        'http://schemas.openxmlformats.org/wordprocessingml/2006', 'http://purl.oclc.org/ooxml/wordprocessingml').replace(
74        'http://schemas.openxmlformats.org/drawingml/2006', 'http://purl.oclc.org/ooxml/drawingml')
75    for k, v in iteritems(TRANSITIONAL_NAMESPACES)
76}
77# }}}
78
79
80def barename(x):
81    return x.rpartition('}')[-1]
82
83
84def XML(x):
85    return '{%s}%s' % (TRANSITIONAL_NAMESPACES['xml'], x)
86
87
88def generate_anchor(name, existing):
89    x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
90    c = 1
91    while y in existing:
92        y = '%s_%d' % (x, c)
93        c += 1
94    return y
95
96
97class DOCXNamespace:
98
99    def __init__(self, transitional=True):
100        self.xpath_cache = {}
101        if transitional:
102            self.namespaces = TRANSITIONAL_NAMESPACES.copy()
103            self.names = TRANSITIONAL_NAMES.copy()
104        else:
105            self.namespaces = STRICT_NAMESPACES.copy()
106            self.names = STRICT_NAMES.copy()
107
108    def XPath(self, expr):
109        ans = self.xpath_cache.get(expr, None)
110        if ans is None:
111            self.xpath_cache[expr] = ans = X(expr, namespaces=self.namespaces)
112        return ans
113
114    def is_tag(self, x, q):
115        tag = getattr(x, 'tag', x)
116        ns, name = q.partition(':')[0::2]
117        return '{%s}%s' % (self.namespaces.get(ns, None), name) == tag
118
119    def expand(self, name, sep=':'):
120        ns, tag = name.partition(sep)[::2]
121        if ns and tag:
122            tag = '{%s}%s' % (self.namespaces[ns], tag)
123        return tag or ns
124
125    def get(self, x, attr, default=None):
126        return x.attrib.get(self.expand(attr), default)
127
128    def ancestor(self, elem, name):
129        try:
130            return self.XPath('ancestor::%s[1]' % name)(elem)[0]
131        except IndexError:
132            return None
133
134    def children(self, elem, *args):
135        return self.XPath('|'.join('child::%s' % a for a in args))(elem)
136
137    def descendants(self, elem, *args):
138        return self.XPath('|'.join('descendant::%s' % a for a in args))(elem)
139
140    def makeelement(self, root, tag, append=True, **attrs):
141        ans = root.makeelement(self.expand(tag), **{self.expand(k, sep='_'):v for k, v in iteritems(attrs)})
142        if append:
143            root.append(ans)
144        return ans
145