1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3# License: GPLv3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
4
5
6from lxml import etree
7
8from calibre.ebooks.metadata.opf3 import (
9    DC, OPF, XPath, create_rating, create_series, create_timestamp,
10    encode_is_multiple, ensure_id, normalize_whitespace, parse_date, read_prefixes,
11    read_refines, read_user_metadata2, refdef, remove_element, set_last_modified,
12    set_refines, set_user_metadata3
13)
14from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf
15from polyglot.builtins import itervalues
16
17
18class Data:
19    pass
20
21
22def upgrade_identifiers(root, data):
23    for ident in XPath('./opf:metadata/dc:identifier')(root):
24        val = (ident.text or '').strip()
25        lval = val.lower()
26        scheme = ident.attrib.pop(OPF('scheme'), None)
27        if lval.startswith('urn:'):
28            prefix, rest = val[4:].partition(':')[::2]
29            if prefix and rest:
30                scheme, val = prefix, rest
31        if scheme and val:
32            ident.text = '{}:{}'.format(scheme, val)
33        for attr in tuple(ident.attrib):
34            if attr != 'id':
35                del ident.attrib[attr]
36
37
38def upgrade_title(root, data):
39    first_title = None
40    for title in XPath('./opf:metadata/dc:title')(root):
41        if not title.text or not title.text.strip():
42            remove_element(title, data.refines)
43            continue
44        if first_title is None:
45            first_title = title
46
47    title_sort = None
48    for m in XPath('./opf:metadata/opf:meta[@name="calibre:title_sort"]')(root):
49        ans = m.get('content')
50        if ans:
51            title_sort = ans
52        remove_element(m, data.refines)
53
54    if first_title is not None:
55        ts = [refdef('file-as', title_sort)] if title_sort else ()
56        set_refines(first_title, data.refines, refdef('title-type', 'main'), *ts)
57
58
59def upgrade_languages(root, data):
60    langs = XPath('./opf:metadata/dc:language')(root)
61    if langs:
62        for lang in langs:
63            lang.attrib.clear()
64    else:
65        # EPUB spec says dc:language is required
66        metadata = XPath('./opf:metadata')(root)[0]
67        l = metadata.makeelement(DC('language'))
68        l.text = 'und'
69        metadata.append(l)
70
71
72def upgrade_authors(root, data):
73    for which in 'creator', 'contributor':
74        for elem in XPath('./opf:metadata/dc:' + which)(root):
75            role = elem.attrib.pop(OPF('role'), None)
76            sort = elem.attrib.pop(OPF('file-as'), None)
77            if role or sort:
78                aid = ensure_id(elem)
79                metadata = elem.getparent()
80                if role:
81                    m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'role', 'scheme':'marc:relators'})
82                    m.text = role
83                    metadata.append(m)
84                if sort:
85                    m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'file-as'})
86                    m.text = sort
87                    metadata.append(m)
88
89
90def upgrade_timestamp(root, data):
91    for meta in XPath('./opf:metadata/opf:meta[@name="calibre:timestamp"]')(root):
92        m = meta.getparent()
93        remove_element(meta, data.refines)
94        val = meta.get('content')
95        if val:
96            try:
97                val = parse_date(val, is_w3cdtf=True)
98            except Exception:
99                pass
100            else:
101                create_timestamp(root, data.prefixes, m, val)
102
103
104def upgrade_date(root, data):
105    found = False
106    for date in XPath('./opf:metadata/dc:date')(root):
107        val = date.text
108        if not val:
109            remove_element(date, data.refines)
110            continue
111        if found:
112            # only one dc:date allowed
113            remove_element(date, data.refines)
114        else:
115            found = True
116
117
118def upgrade_rating(root, data):
119    rating = None
120    for meta in XPath('./opf:metadata/opf:meta[@name="calibre:rating"]')(root):
121        remove_element(meta, data.refines)
122        rating = meta.get('content')
123    if rating is not None:
124        create_rating(root, data.prefixes, rating)
125
126
127def upgrade_series(root, data):
128    series, series_index = None, '1.0'
129    for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series"]')(root):
130        remove_element(meta, data.refines)
131        series = meta.get('content')
132    for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series_index"]')(root):
133        remove_element(meta, data.refines)
134        series_index = meta.get('content')
135
136    if series:
137        create_series(root, data.refines, series, series_index)
138
139
140def upgrade_custom(root, data):
141    m = read_user_metadata2(root, remove_tags=True)
142    if m:
143        for fm in itervalues(m):
144            encode_is_multiple(fm)
145        set_user_metadata3(root, data.prefixes, data.refines, m)
146
147
148def upgrade_meta(root, data):
149    for meta in XPath('./opf:metadata/opf:meta[@name]')(root):
150        name, content = meta.get('name'), meta.get('content') or ''
151        if name.startswith('rendition:'):
152            name = name.partition(':')[-1]
153        prop = None
154        if name in ('orientation', 'layout', 'spread'):
155            prop = 'rendition:' + name
156        elif name == 'fixed-layout':
157            prop = 'rendition:layout'
158            content = {'true': 'pre-paginated'}.get(content.lower(), 'reflowable')
159        elif name == 'orientation-lock':
160            prop = 'rendition:orientation'
161            content = {'portrait': 'portrait', 'landscape': 'landscape'}.get(content.lower(), 'auto')
162        if prop:
163            del meta.attrib['name']
164            del meta.attrib['content']
165            meta.set('property', prop)
166            meta.text = content
167
168
169def upgrade_cover(root, data):
170    for item in XPath('./opf:metadata/opf:meta[@name="cover"]')(root):
171        # Google Play Books does not recognize covers unless the old style
172        # <meta name="cover"> is present, so leave it in
173        # remove_element(item, data.refines)
174        item_id = item.get('content')
175        for item in XPath('./opf:manifest/opf:item[@id and @href and @media-type]')(root):
176            if item.get('id') == item_id:
177                mt = (item.get('media-type') or '').lower()
178                if mt and 'xml' not in mt and 'html' not in mt:
179                    item.set('properties', normalize_whitespace((item.get('properties') or '') + ' cover-image'))
180
181
182def remove_invalid_attrs_in_dc_metadata(root, data):
183    for tag in XPath('//*[namespace-uri() = "{}"]'.format(DC('')[1:-1]))(root):
184        for k in tuple(tag.attrib):
185            if k != 'id':
186                del tag.attrib[k]
187
188
189def upgrade_metadata(root):
190    data = Data()
191    data.prefixes = read_prefixes(root)
192    data.refines = read_refines(root)
193
194    upgrade_identifiers(root, data)
195    upgrade_title(root, data)
196    upgrade_languages(root, data)
197    upgrade_authors(root, data)
198    upgrade_timestamp(root, data)
199    upgrade_date(root, data)
200    upgrade_rating(root, data)
201    upgrade_series(root, data)
202    upgrade_custom(root, data)
203    upgrade_meta(root, data)
204    upgrade_cover(root, data)
205
206    remove_invalid_attrs_in_dc_metadata(root, data)
207    set_last_modified(root, data.prefixes, data.refines)
208    pretty_print_opf(root)
209
210
211if __name__ == '__main__':
212    import sys
213    root = parse_opf(open(sys.argv[-1], 'rb'))
214    upgrade_metadata(root)
215    print(etree.tostring(root))
216