1#!/usr/local/bin/python3.8 2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' 7__docformat__ = 'restructuredtext en' 8 9import os, re 10from calibre.utils.date import isoformat, now 11from calibre import guess_type 12from polyglot.builtins import iteritems 13 14 15def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False): 16 from calibre.ebooks.oeb.base import OPF 17 if not mi.is_null('title'): 18 m.clear('title') 19 m.add('title', mi.title) 20 if mi.title_sort: 21 if not m.title: 22 m.add('title', mi.title_sort) 23 m.clear('title_sort') 24 m.add('title_sort', mi.title_sort) 25 if not mi.is_null('authors'): 26 m.filter('creator', lambda x : x.role.lower() in ['aut', '']) 27 for a in mi.authors: 28 attrib = {'role':'aut'} 29 if mi.author_sort: 30 attrib[OPF('file-as')] = mi.author_sort 31 m.add('creator', a, attrib=attrib) 32 if not mi.is_null('book_producer'): 33 m.filter('contributor', lambda x : x.role.lower() == 'bkp') 34 m.add('contributor', mi.book_producer, role='bkp') 35 elif override_input_metadata: 36 m.filter('contributor', lambda x : x.role.lower() == 'bkp') 37 if not mi.is_null('comments'): 38 m.clear('description') 39 m.add('description', mi.comments) 40 elif override_input_metadata: 41 m.clear('description') 42 if not mi.is_null('publisher'): 43 m.clear('publisher') 44 m.add('publisher', mi.publisher) 45 elif override_input_metadata: 46 m.clear('publisher') 47 if not mi.is_null('series'): 48 m.clear('series') 49 m.add('series', mi.series) 50 elif override_input_metadata: 51 m.clear('series') 52 identifiers = mi.get_identifiers() 53 set_isbn = False 54 for typ, val in iteritems(identifiers): 55 has = False 56 if typ.lower() == 'isbn': 57 set_isbn = True 58 for x in m.identifier: 59 if x.scheme.lower() == typ.lower(): 60 x.content = val 61 has = True 62 if not has: 63 m.add('identifier', val, scheme=typ.upper()) 64 if override_input_metadata and not set_isbn: 65 m.filter('identifier', lambda x: x.scheme.lower() == 'isbn') 66 if not mi.is_null('languages'): 67 m.clear('language') 68 for lang in mi.languages: 69 if lang and lang.lower() not in ('und', ''): 70 m.add('language', lang) 71 if not mi.is_null('series_index'): 72 m.clear('series_index') 73 m.add('series_index', mi.format_series_index()) 74 elif override_input_metadata: 75 m.clear('series_index') 76 if not mi.is_null('rating'): 77 m.clear('rating') 78 m.add('rating', '%.2f'%mi.rating) 79 elif override_input_metadata: 80 m.clear('rating') 81 if not mi.is_null('tags'): 82 m.clear('subject') 83 for t in mi.tags: 84 m.add('subject', t) 85 elif override_input_metadata: 86 m.clear('subject') 87 if not mi.is_null('pubdate'): 88 m.clear('date') 89 m.add('date', isoformat(mi.pubdate)) 90 if not mi.is_null('timestamp'): 91 m.clear('timestamp') 92 m.add('timestamp', isoformat(mi.timestamp)) 93 if not mi.is_null('rights'): 94 m.clear('rights') 95 m.add('rights', mi.rights) 96 if not mi.is_null('publication_type'): 97 m.clear('publication_type') 98 m.add('publication_type', mi.publication_type) 99 100 if not m.timestamp: 101 m.add('timestamp', isoformat(now())) 102 103 104class MergeMetadata: 105 'Merge in user metadata, including cover' 106 107 def __call__(self, oeb, mi, opts, override_input_metadata=False): 108 self.oeb, self.log = oeb, oeb.log 109 m = self.oeb.metadata 110 self.log('Merging user specified metadata...') 111 meta_info_to_oeb_metadata(mi, m, oeb.log, 112 override_input_metadata=override_input_metadata) 113 cover_id = self.set_cover(mi, opts.prefer_metadata_cover) 114 m.clear('cover') 115 if cover_id is not None: 116 m.add('cover', cover_id) 117 if mi.uuid is not None: 118 m.filter('identifier', lambda x:x.id=='uuid_id') 119 self.oeb.metadata.add('identifier', mi.uuid, id='uuid_id', 120 scheme='uuid') 121 self.oeb.uid = self.oeb.metadata.identifier[-1] 122 if mi.application_id is not None: 123 m.filter('identifier', lambda x:x.scheme=='calibre') 124 self.oeb.metadata.add('identifier', mi.application_id, scheme='calibre') 125 126 def set_cover(self, mi, prefer_metadata_cover): 127 cdata, ext = b'', 'jpg' 128 if mi.cover and os.access(mi.cover, os.R_OK): 129 with open(mi.cover, 'rb') as f: 130 cdata = f.read() 131 ext = mi.cover.rpartition('.')[-1].lower().strip() 132 elif mi.cover_data and mi.cover_data[-1]: 133 cdata = mi.cover_data[1] 134 ext = mi.cover_data[0] 135 if ext not in ('png', 'jpg', 'jpeg'): 136 ext = 'jpg' 137 id = old_cover = None 138 if 'cover' in self.oeb.guide: 139 old_cover = self.oeb.guide['cover'] 140 if prefer_metadata_cover and old_cover is not None: 141 cdata = b'' 142 if cdata: 143 self.oeb.guide.remove('cover') 144 self.oeb.guide.remove('titlepage') 145 elif self.oeb.plumber_output_format in {'mobi', 'azw3'} and old_cover is not None: 146 # The amazon formats dont support html cover pages, so remove them 147 # even if no cover was specified. 148 self.oeb.guide.remove('titlepage') 149 do_remove_old_cover = False 150 if old_cover is not None: 151 if old_cover.href in self.oeb.manifest.hrefs: 152 item = self.oeb.manifest.hrefs[old_cover.href] 153 if not cdata: 154 return item.id 155 do_remove_old_cover = True 156 elif not cdata: 157 id = self.oeb.manifest.generate(id='cover')[0] 158 self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') 159 return id 160 new_cover_item = None 161 if cdata: 162 id, href = self.oeb.manifest.generate('cover', 'cover.'+ext) 163 new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata) 164 self.oeb.guide.add('cover', 'Cover', href) 165 if do_remove_old_cover: 166 self.remove_old_cover(item, new_cover_item.href) 167 return id 168 169 def remove_old_cover(self, cover_item, new_cover_href=None): 170 from calibre.ebooks.oeb.base import XPath, XLINK 171 from lxml import etree 172 173 self.oeb.manifest.remove(cover_item) 174 175 # Remove any references to the cover in the HTML 176 affected_items = set() 177 xp = XPath('//h:img[@src]|//svg:image[@xl:href]') 178 for i, item in enumerate(self.oeb.spine): 179 try: 180 images = xp(item.data) 181 except Exception: 182 images = () 183 removed = False 184 for img in images: 185 href = img.get('src') or img.get(XLINK('href')) 186 try: 187 href = item.abshref(href) 188 except Exception: 189 continue # Invalid URL, ignore 190 if href == cover_item.href: 191 if new_cover_href is not None: 192 replacement_href = item.relhref(new_cover_href) 193 attr = 'src' if img.tag.endswith('img') else XLINK('href') 194 img.set(attr, replacement_href) 195 else: 196 p = img.getparent() 197 if p.tag.endswith('}svg'): 198 p.getparent().remove(p) 199 else: 200 p.remove(img) 201 removed = True 202 if removed: 203 affected_items.add(item) 204 205 # Check if the resulting HTML has no content, if so remove it 206 for item in affected_items: 207 body = XPath('//h:body')(item.data) 208 if body: 209 text = etree.tostring(body[0], method='text', encoding='unicode') 210 else: 211 text = '' 212 text = re.sub(r'\s+', '', text) 213 if not text and not XPath('//h:img|//svg:svg')(item.data): 214 self.log('Removing %s as it is a wrapper around' 215 ' the cover image'%item.href) 216 self.oeb.spine.remove(item) 217 self.oeb.manifest.remove(item) 218 self.oeb.guide.remove_by_href(item.href) 219