1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' 7 8import re 9 10from lxml.etree import XPath as X 11 12from calibre.utils.filenames import ascii_text 13from polyglot.builtins import iteritems 14 15# Names {{{ 16TRANSITIONAL_NAMES = { 17 'DOCUMENT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument', 18 'DOCPROPS' : 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties', 19 'APPPROPS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties', 20 'STYLES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles', 21 'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering', 22 'FONTS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable', 23 'EMBEDDED_FONT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font', 24 'IMAGES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image', 25 'LINKS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink', 26 'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes', 27 'ENDNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes', 28 'THEMES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme', 29 'SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings', 30 'WEB_SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings', 31} 32 33STRICT_NAMES = { 34 k:v.replace('http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument') 35 for k, v in iteritems(TRANSITIONAL_NAMES) 36} 37 38TRANSITIONAL_NAMESPACES = { 39 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', 40 'o': 'urn:schemas-microsoft-com:office:office', 41 've': 'http://schemas.openxmlformats.org/markup-compatibility/2006', 42 'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006', 43 # Text Content 44 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', 45 'w10': 'urn:schemas-microsoft-com:office:word', 46 'wne': 'http://schemas.microsoft.com/office/word/2006/wordml', 47 'xml': 'http://www.w3.org/XML/1998/namespace', 48 # Drawing 49 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', 50 'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math', 51 'mv': 'urn:schemas-microsoft-com:mac:vml', 52 'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture', 53 'v': 'urn:schemas-microsoft-com:vml', 54 'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing', 55 # Properties (core and extended) 56 'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties', 57 'dc': 'http://purl.org/dc/elements/1.1/', 58 'ep': 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', 59 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 60 # Content Types 61 'ct': 'http://schemas.openxmlformats.org/package/2006/content-types', 62 # Package Relationships 63 'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships', 64 'pr': 'http://schemas.openxmlformats.org/package/2006/relationships', 65 # Dublin Core document properties 66 'dcmitype': 'http://purl.org/dc/dcmitype/', 67 'dcterms': 'http://purl.org/dc/terms/' 68} 69 70STRICT_NAMESPACES = { 71 k:v.replace( 72 'http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument').replace( 73 'http://schemas.openxmlformats.org/wordprocessingml/2006', 'http://purl.oclc.org/ooxml/wordprocessingml').replace( 74 'http://schemas.openxmlformats.org/drawingml/2006', 'http://purl.oclc.org/ooxml/drawingml') 75 for k, v in iteritems(TRANSITIONAL_NAMESPACES) 76} 77# }}} 78 79 80def barename(x): 81 return x.rpartition('}')[-1] 82 83 84def XML(x): 85 return '{%s}%s' % (TRANSITIONAL_NAMESPACES['xml'], x) 86 87 88def generate_anchor(name, existing): 89 x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_') 90 c = 1 91 while y in existing: 92 y = '%s_%d' % (x, c) 93 c += 1 94 return y 95 96 97class DOCXNamespace: 98 99 def __init__(self, transitional=True): 100 self.xpath_cache = {} 101 if transitional: 102 self.namespaces = TRANSITIONAL_NAMESPACES.copy() 103 self.names = TRANSITIONAL_NAMES.copy() 104 else: 105 self.namespaces = STRICT_NAMESPACES.copy() 106 self.names = STRICT_NAMES.copy() 107 108 def XPath(self, expr): 109 ans = self.xpath_cache.get(expr, None) 110 if ans is None: 111 self.xpath_cache[expr] = ans = X(expr, namespaces=self.namespaces) 112 return ans 113 114 def is_tag(self, x, q): 115 tag = getattr(x, 'tag', x) 116 ns, name = q.partition(':')[0::2] 117 return '{%s}%s' % (self.namespaces.get(ns, None), name) == tag 118 119 def expand(self, name, sep=':'): 120 ns, tag = name.partition(sep)[::2] 121 if ns and tag: 122 tag = '{%s}%s' % (self.namespaces[ns], tag) 123 return tag or ns 124 125 def get(self, x, attr, default=None): 126 return x.attrib.get(self.expand(attr), default) 127 128 def ancestor(self, elem, name): 129 try: 130 return self.XPath('ancestor::%s[1]' % name)(elem)[0] 131 except IndexError: 132 return None 133 134 def children(self, elem, *args): 135 return self.XPath('|'.join('child::%s' % a for a in args))(elem) 136 137 def descendants(self, elem, *args): 138 return self.XPath('|'.join('descendant::%s' % a for a in args))(elem) 139 140 def makeelement(self, root, tag, append=True, **attrs): 141 ans = root.makeelement(self.expand(tag), **{self.expand(k, sep='_'):v for k, v in iteritems(attrs)}) 142 if append: 143 root.append(ans) 144 return ans 145