1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3# License: GPLv3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net> 4 5 6from lxml import etree 7 8from calibre.ebooks.metadata.opf3 import ( 9 DC, OPF, XPath, create_rating, create_series, create_timestamp, 10 encode_is_multiple, ensure_id, normalize_whitespace, parse_date, read_prefixes, 11 read_refines, read_user_metadata2, refdef, remove_element, set_last_modified, 12 set_refines, set_user_metadata3 13) 14from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf 15from polyglot.builtins import itervalues 16 17 18class Data: 19 pass 20 21 22def upgrade_identifiers(root, data): 23 for ident in XPath('./opf:metadata/dc:identifier')(root): 24 val = (ident.text or '').strip() 25 lval = val.lower() 26 scheme = ident.attrib.pop(OPF('scheme'), None) 27 if lval.startswith('urn:'): 28 prefix, rest = val[4:].partition(':')[::2] 29 if prefix and rest: 30 scheme, val = prefix, rest 31 if scheme and val: 32 ident.text = '{}:{}'.format(scheme, val) 33 for attr in tuple(ident.attrib): 34 if attr != 'id': 35 del ident.attrib[attr] 36 37 38def upgrade_title(root, data): 39 first_title = None 40 for title in XPath('./opf:metadata/dc:title')(root): 41 if not title.text or not title.text.strip(): 42 remove_element(title, data.refines) 43 continue 44 if first_title is None: 45 first_title = title 46 47 title_sort = None 48 for m in XPath('./opf:metadata/opf:meta[@name="calibre:title_sort"]')(root): 49 ans = m.get('content') 50 if ans: 51 title_sort = ans 52 remove_element(m, data.refines) 53 54 if first_title is not None: 55 ts = [refdef('file-as', title_sort)] if title_sort else () 56 set_refines(first_title, data.refines, refdef('title-type', 'main'), *ts) 57 58 59def upgrade_languages(root, data): 60 langs = XPath('./opf:metadata/dc:language')(root) 61 if langs: 62 for lang in langs: 63 lang.attrib.clear() 64 else: 65 # EPUB spec says dc:language is required 66 metadata = XPath('./opf:metadata')(root)[0] 67 l = metadata.makeelement(DC('language')) 68 l.text = 'und' 69 metadata.append(l) 70 71 72def upgrade_authors(root, data): 73 for which in 'creator', 'contributor': 74 for elem in XPath('./opf:metadata/dc:' + which)(root): 75 role = elem.attrib.pop(OPF('role'), None) 76 sort = elem.attrib.pop(OPF('file-as'), None) 77 if role or sort: 78 aid = ensure_id(elem) 79 metadata = elem.getparent() 80 if role: 81 m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'role', 'scheme':'marc:relators'}) 82 m.text = role 83 metadata.append(m) 84 if sort: 85 m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'file-as'}) 86 m.text = sort 87 metadata.append(m) 88 89 90def upgrade_timestamp(root, data): 91 for meta in XPath('./opf:metadata/opf:meta[@name="calibre:timestamp"]')(root): 92 m = meta.getparent() 93 remove_element(meta, data.refines) 94 val = meta.get('content') 95 if val: 96 try: 97 val = parse_date(val, is_w3cdtf=True) 98 except Exception: 99 pass 100 else: 101 create_timestamp(root, data.prefixes, m, val) 102 103 104def upgrade_date(root, data): 105 found = False 106 for date in XPath('./opf:metadata/dc:date')(root): 107 val = date.text 108 if not val: 109 remove_element(date, data.refines) 110 continue 111 if found: 112 # only one dc:date allowed 113 remove_element(date, data.refines) 114 else: 115 found = True 116 117 118def upgrade_rating(root, data): 119 rating = None 120 for meta in XPath('./opf:metadata/opf:meta[@name="calibre:rating"]')(root): 121 remove_element(meta, data.refines) 122 rating = meta.get('content') 123 if rating is not None: 124 create_rating(root, data.prefixes, rating) 125 126 127def upgrade_series(root, data): 128 series, series_index = None, '1.0' 129 for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series"]')(root): 130 remove_element(meta, data.refines) 131 series = meta.get('content') 132 for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series_index"]')(root): 133 remove_element(meta, data.refines) 134 series_index = meta.get('content') 135 136 if series: 137 create_series(root, data.refines, series, series_index) 138 139 140def upgrade_custom(root, data): 141 m = read_user_metadata2(root, remove_tags=True) 142 if m: 143 for fm in itervalues(m): 144 encode_is_multiple(fm) 145 set_user_metadata3(root, data.prefixes, data.refines, m) 146 147 148def upgrade_meta(root, data): 149 for meta in XPath('./opf:metadata/opf:meta[@name]')(root): 150 name, content = meta.get('name'), meta.get('content') or '' 151 if name.startswith('rendition:'): 152 name = name.partition(':')[-1] 153 prop = None 154 if name in ('orientation', 'layout', 'spread'): 155 prop = 'rendition:' + name 156 elif name == 'fixed-layout': 157 prop = 'rendition:layout' 158 content = {'true': 'pre-paginated'}.get(content.lower(), 'reflowable') 159 elif name == 'orientation-lock': 160 prop = 'rendition:orientation' 161 content = {'portrait': 'portrait', 'landscape': 'landscape'}.get(content.lower(), 'auto') 162 if prop: 163 del meta.attrib['name'] 164 del meta.attrib['content'] 165 meta.set('property', prop) 166 meta.text = content 167 168 169def upgrade_cover(root, data): 170 for item in XPath('./opf:metadata/opf:meta[@name="cover"]')(root): 171 # Google Play Books does not recognize covers unless the old style 172 # <meta name="cover"> is present, so leave it in 173 # remove_element(item, data.refines) 174 item_id = item.get('content') 175 for item in XPath('./opf:manifest/opf:item[@id and @href and @media-type]')(root): 176 if item.get('id') == item_id: 177 mt = (item.get('media-type') or '').lower() 178 if mt and 'xml' not in mt and 'html' not in mt: 179 item.set('properties', normalize_whitespace((item.get('properties') or '') + ' cover-image')) 180 181 182def remove_invalid_attrs_in_dc_metadata(root, data): 183 for tag in XPath('//*[namespace-uri() = "{}"]'.format(DC('')[1:-1]))(root): 184 for k in tuple(tag.attrib): 185 if k != 'id': 186 del tag.attrib[k] 187 188 189def upgrade_metadata(root): 190 data = Data() 191 data.prefixes = read_prefixes(root) 192 data.refines = read_refines(root) 193 194 upgrade_identifiers(root, data) 195 upgrade_title(root, data) 196 upgrade_languages(root, data) 197 upgrade_authors(root, data) 198 upgrade_timestamp(root, data) 199 upgrade_date(root, data) 200 upgrade_rating(root, data) 201 upgrade_series(root, data) 202 upgrade_custom(root, data) 203 upgrade_meta(root, data) 204 upgrade_cover(root, data) 205 206 remove_invalid_attrs_in_dc_metadata(root, data) 207 set_last_modified(root, data.prefixes, data.refines) 208 pretty_print_opf(root) 209 210 211if __name__ == '__main__': 212 import sys 213 root = parse_opf(open(sys.argv[-1], 'rb')) 214 upgrade_metadata(root) 215 print(etree.tostring(root)) 216