1#!/usr/local/bin/python3.8 2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' 7__docformat__ = 'restructuredtext en' 8 9import posixpath 10 11from lxml import etree 12 13from calibre.ebooks.oeb.base import rewrite_links, urlnormalize 14from polyglot.urllib import urldefrag, urlparse 15 16 17class RenameFiles: # {{{ 18 19 ''' 20 Rename files and adjust all links pointing to them. Note that the spine 21 and manifest are not touched by this transform. 22 ''' 23 24 def __init__(self, rename_map, renamed_items_map=None): 25 self.rename_map = rename_map 26 self.renamed_items_map = renamed_items_map 27 28 def __call__(self, oeb, opts): 29 import css_parser 30 self.log = oeb.logger 31 self.opts = opts 32 self.oeb = oeb 33 34 for item in oeb.manifest.items: 35 self.current_item = item 36 if etree.iselement(item.data): 37 rewrite_links(self.current_item.data, self.url_replacer) 38 elif hasattr(item.data, 'cssText'): 39 css_parser.replaceUrls(item.data, self.url_replacer) 40 41 if self.oeb.guide: 42 for ref in self.oeb.guide.values(): 43 href = urlnormalize(ref.href) 44 href, frag = urldefrag(href) 45 replacement = self.rename_map.get(href, None) 46 if replacement is not None: 47 nhref = replacement 48 if frag: 49 nhref += '#' + frag 50 ref.href = nhref 51 52 if self.oeb.toc: 53 self.fix_toc_entry(self.oeb.toc) 54 55 def fix_toc_entry(self, toc): 56 if toc.href: 57 href = urlnormalize(toc.href) 58 href, frag = urldefrag(href) 59 replacement = self.rename_map.get(href, None) 60 61 if replacement is not None: 62 nhref = replacement 63 if frag: 64 nhref = '#'.join((nhref, frag)) 65 toc.href = nhref 66 67 for x in toc: 68 self.fix_toc_entry(x) 69 70 def url_replacer(self, orig_url): 71 url = urlnormalize(orig_url) 72 parts = urlparse(url) 73 if parts.scheme: 74 # Only rewrite local URLs 75 return orig_url 76 path, frag = urldefrag(url) 77 if self.renamed_items_map: 78 orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item) 79 else: 80 orig_item = self.current_item 81 82 href = orig_item.abshref(path) 83 replacement = self.current_item.relhref(self.rename_map.get(href, href)) 84 if frag: 85 replacement += '#' + frag 86 return replacement 87 88# }}} 89 90 91class UniqueFilenames: # {{{ 92 93 'Ensure that every item in the manifest has a unique filename' 94 95 def __call__(self, oeb, opts): 96 self.log = oeb.logger 97 self.opts = opts 98 self.oeb = oeb 99 100 self.seen_filenames = set() 101 self.rename_map = {} 102 103 for item in list(oeb.manifest.items): 104 fname = posixpath.basename(item.href) 105 if fname in self.seen_filenames: 106 suffix = self.unique_suffix(fname) 107 data = item.data 108 base, ext = posixpath.splitext(item.href) 109 nhref = base + suffix + ext 110 nhref = oeb.manifest.generate(href=nhref)[1] 111 spine_pos = item.spine_position 112 oeb.manifest.remove(item) 113 nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data, 114 fallback=item.fallback) 115 self.seen_filenames.add(posixpath.basename(nhref)) 116 self.rename_map[item.href] = nhref 117 if spine_pos is not None: 118 oeb.spine.insert(spine_pos, nitem, item.linear) 119 else: 120 self.seen_filenames.add(fname) 121 122 if self.rename_map: 123 self.log('Found non-unique filenames, renaming to support broken' 124 ' EPUB readers like FBReader, Aldiko and Stanza...') 125 from pprint import pformat 126 self.log.debug(pformat(self.rename_map)) 127 128 renamer = RenameFiles(self.rename_map) 129 renamer(oeb, opts) 130 131 def unique_suffix(self, fname): 132 base, ext = posixpath.splitext(fname) 133 c = 0 134 while True: 135 c += 1 136 suffix = '_u%d'%c 137 candidate = base + suffix + ext 138 if candidate not in self.seen_filenames: 139 return suffix 140# }}} 141 142 143class FlatFilenames: # {{{ 144 145 'Ensure that every item in the manifest has a unique filename without subfolders.' 146 147 def __call__(self, oeb, opts): 148 self.log = oeb.logger 149 self.opts = opts 150 self.oeb = oeb 151 152 self.rename_map = {} 153 self.renamed_items_map = {} 154 155 for item in list(oeb.manifest.items): 156 # Flatten URL by removing directories. 157 # Example: a/b/c/index.html -> a_b_c_index.html 158 nhref = item.href.replace("/", "_") 159 160 if item.href == nhref: 161 # URL hasn't changed, skip item. 162 continue 163 164 data = item.data 165 isp = item.spine_position 166 nhref = oeb.manifest.generate(href=nhref)[1] 167 if isp is not None: 168 oeb.spine.remove(item) 169 oeb.manifest.remove(item) 170 171 nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data, 172 fallback=item.fallback) 173 self.rename_map[item.href] = nhref 174 self.renamed_items_map[nhref] = item 175 if isp is not None: 176 oeb.spine.insert(isp, nitem, item.linear) 177 178 if self.rename_map: 179 self.log('Found non-flat filenames, renaming to support broken' 180 ' EPUB readers like FBReader...') 181 from pprint import pformat 182 self.log.debug(pformat(self.rename_map)) 183 self.log.debug(pformat(self.renamed_items_map)) 184 185 renamer = RenameFiles(self.rename_map, self.renamed_items_map) 186 renamer(oeb, opts) 187# }}} 188