1#!/usr/local/bin/python3.8
2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
3
4
5__license__   = 'GPL v3'
6__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
8
9import os, re
10from calibre.utils.date import isoformat, now
11from calibre import guess_type
12from polyglot.builtins import iteritems
13
14
15def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
16    from calibre.ebooks.oeb.base import OPF
17    if not mi.is_null('title'):
18        m.clear('title')
19        m.add('title', mi.title)
20    if mi.title_sort:
21        if not m.title:
22            m.add('title', mi.title_sort)
23        m.clear('title_sort')
24        m.add('title_sort', mi.title_sort)
25    if not mi.is_null('authors'):
26        m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
27        for a in mi.authors:
28            attrib = {'role':'aut'}
29            if mi.author_sort:
30                attrib[OPF('file-as')] = mi.author_sort
31            m.add('creator', a, attrib=attrib)
32    if not mi.is_null('book_producer'):
33        m.filter('contributor', lambda x : x.role.lower() == 'bkp')
34        m.add('contributor', mi.book_producer, role='bkp')
35    elif override_input_metadata:
36        m.filter('contributor', lambda x : x.role.lower() == 'bkp')
37    if not mi.is_null('comments'):
38        m.clear('description')
39        m.add('description', mi.comments)
40    elif override_input_metadata:
41        m.clear('description')
42    if not mi.is_null('publisher'):
43        m.clear('publisher')
44        m.add('publisher', mi.publisher)
45    elif override_input_metadata:
46        m.clear('publisher')
47    if not mi.is_null('series'):
48        m.clear('series')
49        m.add('series', mi.series)
50    elif override_input_metadata:
51        m.clear('series')
52    identifiers = mi.get_identifiers()
53    set_isbn = False
54    for typ, val in iteritems(identifiers):
55        has = False
56        if typ.lower() == 'isbn':
57            set_isbn = True
58        for x in m.identifier:
59            if x.scheme.lower() == typ.lower():
60                x.content = val
61                has = True
62        if not has:
63            m.add('identifier', val, scheme=typ.upper())
64    if override_input_metadata and not set_isbn:
65        m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
66    if not mi.is_null('languages'):
67        m.clear('language')
68        for lang in mi.languages:
69            if lang and lang.lower() not in ('und', ''):
70                m.add('language', lang)
71    if not mi.is_null('series_index'):
72        m.clear('series_index')
73        m.add('series_index', mi.format_series_index())
74    elif override_input_metadata:
75        m.clear('series_index')
76    if not mi.is_null('rating'):
77        m.clear('rating')
78        m.add('rating', '%.2f'%mi.rating)
79    elif override_input_metadata:
80        m.clear('rating')
81    if not mi.is_null('tags'):
82        m.clear('subject')
83        for t in mi.tags:
84            m.add('subject', t)
85    elif override_input_metadata:
86        m.clear('subject')
87    if not mi.is_null('pubdate'):
88        m.clear('date')
89        m.add('date', isoformat(mi.pubdate))
90    if not mi.is_null('timestamp'):
91        m.clear('timestamp')
92        m.add('timestamp', isoformat(mi.timestamp))
93    if not mi.is_null('rights'):
94        m.clear('rights')
95        m.add('rights', mi.rights)
96    if not mi.is_null('publication_type'):
97        m.clear('publication_type')
98        m.add('publication_type', mi.publication_type)
99
100    if not m.timestamp:
101        m.add('timestamp', isoformat(now()))
102
103
104class MergeMetadata:
105    'Merge in user metadata, including cover'
106
107    def __call__(self, oeb, mi, opts, override_input_metadata=False):
108        self.oeb, self.log = oeb, oeb.log
109        m = self.oeb.metadata
110        self.log('Merging user specified metadata...')
111        meta_info_to_oeb_metadata(mi, m, oeb.log,
112                override_input_metadata=override_input_metadata)
113        cover_id = self.set_cover(mi, opts.prefer_metadata_cover)
114        m.clear('cover')
115        if cover_id is not None:
116            m.add('cover', cover_id)
117        if mi.uuid is not None:
118            m.filter('identifier', lambda x:x.id=='uuid_id')
119            self.oeb.metadata.add('identifier', mi.uuid, id='uuid_id',
120                                  scheme='uuid')
121            self.oeb.uid = self.oeb.metadata.identifier[-1]
122        if mi.application_id is not None:
123            m.filter('identifier', lambda x:x.scheme=='calibre')
124            self.oeb.metadata.add('identifier', mi.application_id, scheme='calibre')
125
126    def set_cover(self, mi, prefer_metadata_cover):
127        cdata, ext = b'', 'jpg'
128        if mi.cover and os.access(mi.cover, os.R_OK):
129            with open(mi.cover, 'rb') as f:
130                cdata = f.read()
131            ext = mi.cover.rpartition('.')[-1].lower().strip()
132        elif mi.cover_data and mi.cover_data[-1]:
133            cdata = mi.cover_data[1]
134            ext = mi.cover_data[0]
135        if ext not in ('png', 'jpg', 'jpeg'):
136            ext = 'jpg'
137        id = old_cover = None
138        if 'cover' in self.oeb.guide:
139            old_cover = self.oeb.guide['cover']
140        if prefer_metadata_cover and old_cover is not None:
141            cdata = b''
142        if cdata:
143            self.oeb.guide.remove('cover')
144            self.oeb.guide.remove('titlepage')
145        elif self.oeb.plumber_output_format in {'mobi', 'azw3'} and old_cover is not None:
146            # The amazon formats dont support html cover pages, so remove them
147            # even if no cover was specified.
148            self.oeb.guide.remove('titlepage')
149        do_remove_old_cover = False
150        if old_cover is not None:
151            if old_cover.href in self.oeb.manifest.hrefs:
152                item = self.oeb.manifest.hrefs[old_cover.href]
153                if not cdata:
154                    return item.id
155                do_remove_old_cover = True
156            elif not cdata:
157                id = self.oeb.manifest.generate(id='cover')[0]
158                self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
159                return id
160        new_cover_item = None
161        if cdata:
162            id, href = self.oeb.manifest.generate('cover', 'cover.'+ext)
163            new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata)
164            self.oeb.guide.add('cover', 'Cover', href)
165        if do_remove_old_cover:
166            self.remove_old_cover(item, new_cover_item.href)
167        return id
168
169    def remove_old_cover(self, cover_item, new_cover_href=None):
170        from calibre.ebooks.oeb.base import XPath, XLINK
171        from lxml import etree
172
173        self.oeb.manifest.remove(cover_item)
174
175        # Remove any references to the cover in the HTML
176        affected_items = set()
177        xp = XPath('//h:img[@src]|//svg:image[@xl:href]')
178        for i, item in enumerate(self.oeb.spine):
179            try:
180                images = xp(item.data)
181            except Exception:
182                images = ()
183            removed = False
184            for img in images:
185                href = img.get('src') or img.get(XLINK('href'))
186                try:
187                    href = item.abshref(href)
188                except Exception:
189                    continue  # Invalid URL, ignore
190                if href == cover_item.href:
191                    if new_cover_href is not None:
192                        replacement_href = item.relhref(new_cover_href)
193                        attr = 'src' if img.tag.endswith('img') else XLINK('href')
194                        img.set(attr, replacement_href)
195                    else:
196                        p = img.getparent()
197                        if p.tag.endswith('}svg'):
198                            p.getparent().remove(p)
199                        else:
200                            p.remove(img)
201                        removed = True
202            if removed:
203                affected_items.add(item)
204
205        # Check if the resulting HTML has no content, if so remove it
206        for item in affected_items:
207            body = XPath('//h:body')(item.data)
208            if body:
209                text = etree.tostring(body[0], method='text', encoding='unicode')
210            else:
211                text = ''
212            text = re.sub(r'\s+', '', text)
213            if not text and not XPath('//h:img|//svg:svg')(item.data):
214                self.log('Removing %s as it is a wrapper around'
215                        ' the cover image'%item.href)
216                self.oeb.spine.remove(item)
217                self.oeb.manifest.remove(item)
218                self.oeb.guide.remove_by_href(item.href)
219