1#!/usr/local/bin/python3.8 2# -*- coding: utf-8 -*- 3# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab 4 5# Copyright (c) 2014-2020 Kevin B. Hendricks and Doug Massay 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without modification, 9# are permitted provided that the following conditions are met: 10# 11# 1. Redistributions of source code must retain the above copyright notice, this list of 12# conditions and the following disclaimer. 13# 14# 2. Redistributions in binary form must reproduce the above copyright notice, this list 15# of conditions and the following disclaimer in the documentation and/or other materials 16# provided with the distribution. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 19# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 24# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 26# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28from collections import OrderedDict 29import sys 30import os 31import re 32from hrefutils import urldecodepart, urlencodepart 33from hrefutils import buildBookPath, startingDir, buildRelativePath 34from hrefutils import ext_mime_map, mime_group_map 35 36import unicodedata 37 38def _utf8str(p): 39 if p is None: 40 return None 41 if isinstance(p, bytes): 42 return p 43 return p.encode('utf-8', errors='replace') 44 45def _unicodestr(p): 46 if p is None: 47 return None 48 if isinstance(p, str): 49 return p 50 return p.decode('utf-8', errors='replace') 51 52_launcher_version = 20211025 53 54_PKG_VER = re.compile(r'''<\s*package[^>]*version\s*=\s*["']([^'"]*)['"][^>]*>''', re.IGNORECASE) 55 56# Wrapper Class is used to peform record keeping for Sigil. It keeps track of modified, 57# added, and deleted files while providing some degree of protection against files under 58# Sigil's control from being directly manipulated. 59# Uses "write-on-modify" and so removes the need for wholesale copying of files 60 61_guide_types = ['cover', 'title-page', 'toc', 'index', 'glossary', 'acknowledgements', 62 'bibliography', 'colophon', 'copyright-page', 'dedication', 63 'epigraph', 'foreward', 'loi', 'lot', 'notes', 'preface', 'text'] 64 65PROTECTED_FILES = [ 66 'mimetype', 67 'META-INF/container.xml', 68] 69 70TEXT_MIMETYPES = [ 71 'image/svg+xml', 72 'application/xhtml+xml', 73 'text/css', 74 'application/x-dtbncx+xml', 75 'application/oebps-package+xml', 76 'application/oebs-page-map+xml', 77 'application/smil+xml', 78 'application/adobe-page-template+xml', 79 'application/vnd.adobe-page-template+xml', 80 'text/javascript', 81 'application/javascript' 82 'application/pls+xml' 83] 84 85 86def _epub_file_walk(top): 87 top = os.fsdecode(top) 88 rv = [] 89 for base, dnames, names in os.walk(top): 90 for name in names: 91 rv.append(os.path.relpath(os.path.join(base, name), top)) 92 return rv 93 94 95class WrapperException(Exception): 96 pass 97 98class Wrapper(object): 99 100 def __init__(self, ebook_root, outdir, op, plugin_dir, plugin_name, debug=False): 101 self._debug = debug 102 self.ebook_root = os.fsdecode(ebook_root) 103 # plugins and plugin containers can get name and user plugin dir 104 self.plugin_dir = os.fsdecode(plugin_dir) 105 self.plugin_name = plugin_name 106 self.outdir = os.fsdecode(outdir) 107 108 # initialize the sigil cofiguration info passed in outdir with sigil.cfg 109 self.opfbookpath = None 110 self.appdir = None 111 self.usrsupdir = None 112 # Location of directory containing hunspell dictionaries on Linux 113 self.linux_hunspell_dict_dirs = [] 114 # Sigil interface language code 115 self.sigil_ui_lang = None 116 # Default Sigil spell check dictionary 117 self.sigil_spellcheck_lang = None 118 # status of epub inside Sigil (isDirty) and CurrentFilePath of current epub file 119 self.epub_isDirty = False 120 self.epub_filepath = "" 121 self.colormode = None 122 self.colors = None 123 # File selected in Sigil's Book Browser 124 self.selected = [] 125 cfg = '' 126 with open(os.path.join(self.outdir, 'sigil.cfg'), 'rb') as f: 127 cfg = f.read().decode('utf-8') 128 cfg = cfg.replace("\r", "") 129 cfg_lst = cfg.split("\n") 130 if len(cfg_lst) >= 7: 131 self.opfbookpath = cfg_lst.pop(0) 132 self.appdir = cfg_lst.pop(0) 133 self.usrsupdir = cfg_lst.pop(0) 134 if not sys.platform.startswith('darwin') and not sys.platform.startswith('win'): 135 self.linux_hunspell_dict_dirs = cfg_lst.pop(0).split(":") 136 self.sigil_ui_lang = cfg_lst.pop(0) 137 self.sigil_spellcheck_lang = cfg_lst.pop(0) 138 self.epub_isDirty = (cfg_lst.pop(0) == "True") 139 self.epub_filepath = cfg_lst.pop(0) 140 self.colormode = cfg_lst.pop(0) 141 self.colors = cfg_lst.pop(0) 142 self.highdpi = cfg_lst.pop(0) 143 self.uifont = cfg_lst.pop(0) 144 self.selected = cfg_lst 145 os.environ['SigilGumboLibPath'] = self.get_gumbo_path() 146 147 # dictionaries used to map opf manifest information 148 self.id_to_href = OrderedDict() 149 self.id_to_mime = OrderedDict() 150 self.id_to_props = OrderedDict() 151 self.id_to_fall = OrderedDict() 152 self.id_to_over = OrderedDict() 153 self.id_to_bookpath = OrderedDict() 154 self.href_to_id = OrderedDict() 155 self.bookpath_to_id = OrderedDict() 156 self.spine_ppd = None 157 self.spine = [] 158 self.guide = [] 159 self.bindings = [] 160 self.package_tag = None 161 self.epub_version = None 162 # self.metadata_attr = None 163 # self.metadata = [] 164 self.metadataxml = '' 165 self.op = op 166 if self.op is not None: 167 # copy in data from parsing of initial opf 168 self.opf_dir = op.opf_dir 169 # Note: manifest hrefs may only point to files (there are no fragments) 170 # all manifest relative hrefs have already had their path component url decoded 171 self.id_to_href = op.get_manifest_id_to_href_dict().copy() 172 self.id_to_mime = op.get_manifest_id_to_mime_dict().copy() 173 self.id_to_props = op.get_manifest_id_to_properties_dict().copy() 174 self.id_to_fall = op.get_manifest_id_to_fallback_dict().copy() 175 self.id_to_over = op.get_manifest_id_to_overlay_dict().copy() 176 self.id_to_bookpath = op.get_manifest_id_to_bookpath_dict().copy() 177 self.group_paths = op.get_group_paths().copy() 178 self.spine_ppd = op.get_spine_ppd() 179 self.spine = op.get_spine() 180 # since guide hrefs may contain framents they are kept in url encoded form 181 self.guide = op.get_guide() 182 self.package_tag = op.get_package_tag() 183 self.epub_version = op.get_epub_version() 184 self.bindings = op.get_bindings() 185 self.metadataxml = op.get_metadataxml() 186 # invert key dictionaries to allow for reverse access 187 for k, v in self.id_to_href.items(): 188 self.href_to_id[v] = k 189 for k, v in self.id_to_bookpath.items(): 190 self.bookpath_to_id[v] = k 191 # self.href_to_id = {v: k for k, v in self.id_to_href.items()} 192 # self.bookpath_to_id = {v: k for k, v in self.id_to_bookpath.items()} 193 # self.metadata = op.get_metadata() 194 # self.metadata_attr = op.get_metadata_attr() 195 self.other = [] # non-manifest file information 196 self.id_to_filepath = OrderedDict() 197 self.book_href_to_filepath = OrderedDict() 198 self.modified = OrderedDict() 199 self.added = [] 200 self.deleted = [] 201 202 # walk the ebook directory tree building up initial list of 203 # all unmanifested (other) files 204 for filepath in _epub_file_walk(ebook_root): 205 book_href = filepath.replace(os.sep, "/") 206 # OS X file names and paths use NFD form. The EPUB 207 # spec requires all text including filenames to be in NFC form. 208 book_href = unicodedata.normalize('NFC', book_href) 209 # if book_href file in manifest convert to manifest id 210 id = self.bookpath_to_id.get(book_href, None) 211 if id is None: 212 self.other.append(book_href) 213 self.book_href_to_filepath[book_href] = filepath 214 else: 215 self.id_to_filepath[id] = filepath 216 217 def getversion(self): 218 global _launcher_version 219 return _launcher_version 220 221 def getepubversion(self): 222 return self.epub_version 223 224 # utility routine to get mime from href (book href or opf href) 225 # no fragments present 226 def getmime(self, href): 227 href = _unicodestr(href) 228 href = urldecodepart(href) 229 filename = os.path.basename(href) 230 ext = os.path.splitext(filename)[1] 231 ext = ext.lower() 232 return ext_mime_map.get(ext, "") 233 234 235 # New in Sigil 1.1 236 # ------------------ 237 238 # returns color mode of Sigil "light" or "dark" 239 def colorMode(self): 240 return _unicodestr(self.colormode) 241 242 # returns color as css or javascript hex color string #xxxxxx 243 # Accepts the following color roles "Window", "Base", "Text", "Highlight", "HighlightedText" 244 def color(self, role): 245 role = _unicodestr(role) 246 role = role.lower() 247 color_roles = ["window", "base", "text", "highlight", "highlightedtext"] 248 colors = self.colors.split(',') 249 if role in color_roles: 250 idx = color_roles.index(role) 251 return _unicodestr(colors[idx]) 252 return None 253 254 # New in Sigil 1.0 255 # ---------------- 256 257 # A book path (aka "bookpath" or "book_path") is a unique relative path 258 # from the ebook root to a specific file. As a relative path meant to 259 # be used in an href or src link it only uses forward slashes "/" 260 # as path segment separators. Since all files exist inside the 261 # epub root (folder the epub was unzipped into), book paths will NEVER 262 # have or use "./" or "../" ie they are in always in canonical form 263 264 # We will use the terms book_href (aka "bookhref") interchangeabily 265 # with bookpath with the following convention: 266 # - use book_href when working with "other" files outside of the manifest 267 # - use bookpath when working with files in the manifest 268 # - use either when the file in question in the OPF as it exists in the intersection 269 270 # returns the bookpath/book_href to the opf file 271 def get_opfbookpath(self): 272 return self.opfbookpath 273 274 # returns the book path to the folder containing this bookpath 275 def get_startingdir(self, bookpath): 276 bookpath = _unicodestr(bookpath) 277 return startingDir(bookpath) 278 279 # return a bookpath for the file pointed to by the href from 280 # the specified bookpath starting directory 281 # no fragments allowed in href (must have been previously split off) 282 def build_bookpath(self, href, starting_dir): 283 href = _unicodestr(href) 284 href = urldecodepart(href) 285 starting_dir = _unicodestr(starting_dir) 286 return buildBookPath(href, starting_dir) 287 288 # returns the href relative path from source bookpath to target bookpath 289 def get_relativepath(self, from_bookpath, to_bookpath): 290 from_bookpath = _unicodestr(from_bookpath) 291 to_bookpath = _unicodestr(to_bookpath) 292 return buildRelativePath(from_bookpath, to_bookpath) 293 294 # ---------- 295 296 # routine to detect if the current epub is in Sigil standard epub form 297 def epub_is_standard(self): 298 groups = ["Text", "Styles", "Fonts", "Images", "Audio", "Video", "Misc"] 299 paths = ["OEBPS/Text", "OEBPS/Styles", "OEBPS/Fonts", "OEBPS/Images", "OEBPS/Audio", "OEBPS/Video", "OEBPS/Misc"] 300 std_epub = self.opfbookpath == "OEBPS/content.opf" 301 tocid = self.gettocid() 302 if tocid is not None: 303 std_epub = std_epub and self.id_to_bookpath[tocid] == "OEBPS/toc.ncx" 304 if self.epub_version.startswith("2"): 305 std_epub = std_epub and tocid is not None 306 for g, p in zip(groups, paths): 307 folders = self.group_paths[g] 308 std_epub = std_epub and folders[0] == p and len(folders) == 1 309 return std_epub 310 311 312 # routines to rebuild the opf on the fly from current information 313 def build_package_starttag(self): 314 return self.package_tag 315 316 def build_manifest_xml(self): 317 manout = [] 318 manout.append(' <manifest>\n') 319 for id in sorted(self.id_to_mime): 320 href = self.id_to_href[id] 321 # relative manifest hrefs must have no fragments 322 if href.find(':') == -1: 323 href = urlencodepart(href) 324 mime = self.id_to_mime[id] 325 props = '' 326 properties = self.id_to_props[id] 327 if properties is not None: 328 props = ' properties="%s"' % properties 329 fall = '' 330 fallback = self.id_to_fall[id] 331 if fallback is not None: 332 fall = ' fallback="%s"' % fallback 333 over = '' 334 overlay = self.id_to_over[id] 335 if overlay is not None: 336 over = ' media-overlay="%s"' % overlay 337 manout.append(' <item id="%s" href="%s" media-type="%s"%s%s%s />\n' % (id, href, mime, props, fall, over)) 338 manout.append(' </manifest>\n') 339 return "".join(manout) 340 341 def build_spine_xml(self): 342 spineout = [] 343 ppd = '' 344 ncx = '' 345 map = '' 346 if self.spine_ppd is not None: 347 ppd = ' page-progression-direction="%s"' % self.spine_ppd 348 tocid = self.gettocid() 349 if tocid is not None: 350 ncx = ' toc="%s"' % tocid 351 pagemapid = self.getpagemapid() 352 if pagemapid is not None: 353 map = ' page-map="%s"' % pagemapid 354 spineout.append(' <spine%s%s%s>\n' % (ppd, ncx, map)) 355 for (id, linear, properties) in self.spine: 356 lin = '' 357 if linear is not None: 358 lin = ' linear="%s"' % linear 359 props = '' 360 if properties is not None: 361 props = ' properties="%s"' % properties 362 spineout.append(' <itemref idref="%s"%s%s/>\n' % (id, lin, props)) 363 spineout.append(' </spine>\n') 364 return "".join(spineout) 365 366 def build_guide_xml(self): 367 guideout = [] 368 if len(self.guide) > 0: 369 guideout.append(' <guide>\n') 370 for (type, title, href) in self.guide: 371 # note guide hrefs may have fragments so must be kept 372 # in url encoded form at all times until splitting into component parts 373 guideout.append(' <reference type="%s" href="%s" title="%s"/>\n' % (type, href, title)) 374 guideout.append(' </guide>\n') 375 return "".join(guideout) 376 377 def build_bindings_xml(self): 378 bindout = [] 379 if len(self.bindings) > 0 and self.epub_version.startswith('3'): 380 bindout.append(' <bindings>\n') 381 for (mtype, handler) in self.bindings: 382 bindout.append(' <mediaType media-type="%s" handler="%s"/>\n' % (mtype, handler)) 383 bindout.append(' </bindings>\n') 384 return "".join(bindout) 385 386 def build_opf(self): 387 data = [] 388 data.append('<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n') 389 data.append(self.build_package_starttag()) 390 data.append(self.metadataxml) 391 data.append(self.build_manifest_xml()) 392 data.append(self.build_spine_xml()) 393 data.append(self.build_guide_xml()) 394 data.append(self.build_bindings_xml()) 395 data.append('</package>\n') 396 return "".join(data) 397 398 def write_opf(self): 399 if self.op is not None: 400 platpath = self.opfbookpath.replace('/', os.sep) 401 filepath = os.path.join(self.outdir, platpath) 402 base = os.path.dirname(filepath) 403 if not os.path.exists(base): 404 os.makedirs(base) 405 with open(filepath, 'wb') as fp: 406 data = _utf8str(self.build_opf()) 407 fp.write(data) 408 409 410 # routines to help find the manifest id of toc.ncx and page-map.xml 411 412 def gettocid(self): 413 for id in self.id_to_mime: 414 mime = self.id_to_mime[id] 415 if mime == "application/x-dtbncx+xml": 416 return id 417 return None 418 419 def getpagemapid(self): 420 for id in self.id_to_mime: 421 mime = self.id_to_mime[id] 422 if mime == "application/oebs-page-map+xml": 423 return id 424 return None 425 426 427 # routines to help find the manifest id of the nav 428 def getnavid(self): 429 if self.epub_version == "2.0": 430 return None 431 for id in self.id_to_mime: 432 mime = self.id_to_mime[id] 433 if mime == "application/xhtml+xml": 434 properties = self.id_to_props[id] 435 if properties is not None and "nav" in properties: 436 return id 437 return None 438 439 440 # routines to manipulate the spine 441 442 def getspine(self): 443 osp = [] 444 for (sid, linear, properties) in self.spine: 445 osp.append((sid, linear)) 446 return osp 447 448 def setspine(self, new_spine): 449 spine = [] 450 for (sid, linear) in new_spine: 451 properties = None 452 sid = _unicodestr(sid) 453 linear = _unicodestr(linear) 454 if sid not in self.id_to_href: 455 raise WrapperException('Spine Id not in Manifest') 456 if linear is not None: 457 linear = linear.lower() 458 if linear not in ['yes', 'no']: 459 raise Exception('Improper Spine Linear Attribute') 460 spine.append((sid, linear, properties)) 461 self.spine = spine 462 self.modified[self.opfbookpath] = 'file' 463 464 def getspine_epub3(self): 465 return self.spine 466 467 def setspine_epub3(self, new_spine): 468 spine = [] 469 for (sid, linear, properties) in new_spine: 470 sid = _unicodestr(sid) 471 linear = _unicodestr(linear) 472 properties = _unicodestr(properties) 473 if properties is not None and properties == "": 474 properties = None 475 if sid not in self.id_to_href: 476 raise WrapperException('Spine Id not in Manifest') 477 if linear is not None: 478 linear = linear.lower() 479 if linear not in ['yes', 'no']: 480 raise Exception('Improper Spine Linear Attribute') 481 if properties is not None: 482 properties = properties.lower() 483 spine.append((sid, linear, properties)) 484 self.spine = spine 485 self.modified[self.opfbookpath] = 'file' 486 487 def getbindings_epub3(self): 488 return self.bindings 489 490 def setbindings_epub3(self, new_bindings): 491 bindings = [] 492 for (mtype, handler) in new_bindings: 493 mtype = _unicodestr(mtype) 494 handler = _unicodestr(handler) 495 if mtype is None or mtype == "": 496 continue 497 if handler is None or handler == "": 498 continue 499 if handler not in self.id_to_href: 500 raise WrapperException('Handler not in Manifest') 501 bindings.append((mtype, handler)) 502 self.bindings = bindings 503 self.modified[self.opfbookpath] = 'file' 504 505 def spine_insert_before(self, pos, sid, linear, properties=None): 506 sid = _unicodestr(sid) 507 linear = _unicodestr(linear) 508 properties = _unicodestr(properties) 509 if properties is not None and properties == "": 510 properties = None 511 if sid not in self.id_to_mime: 512 raise WrapperException('that spine idref does not exist in manifest') 513 n = len(self.spine) 514 if pos == 0: 515 self.spine = [(sid, linear, properties)] + self.spine 516 elif pos == -1 or pos >= n: 517 self.spine.append((sid, linear, properties)) 518 else: 519 self.spine = self.spine[0:pos] + [(sid, linear, properties)] + self.spine[pos:] 520 self.modified[self.opfbookpath] = 'file' 521 522 def getspine_ppd(self): 523 return self.spine_ppd 524 525 def setspine_ppd(self, ppd): 526 ppd = _unicodestr(ppd) 527 if ppd not in ['rtl', 'ltr', None]: 528 raise WrapperException('incorrect page-progression direction') 529 self.spine_ppd = ppd 530 self.modified[self.opfbookpath] = 'file' 531 532 def setspine_itemref_epub3_attributes(self, idref, linear, properties): 533 idref = _unicodestr(idref) 534 linear = _unicodestr(linear) 535 properties = _unicodestr(properties) 536 if properties is not None and properties == "": 537 properties = None 538 pos = -1 539 i = 0 540 for (sid, slinear, sproperties) in self.spine: 541 if sid == idref: 542 pos = i 543 break 544 i += 1 545 if pos == -1: 546 raise WrapperException('that idref is not exist in the spine') 547 self.spine[pos] = (sid, linear, properties) 548 self.modified[self.opfbookpath] = 'file' 549 550 551 # routines to get and set the guide 552 553 def getguide(self): 554 return self.guide 555 556 # guide hrefs must be in urlencoded form (percent encodings present if needed) 557 # as they may include fragments and # is a valid url path character 558 def setguide(self, new_guide): 559 guide = [] 560 for (type, title, href) in new_guide: 561 type = _unicodestr(type) 562 title = _unicodestr(title) 563 href = _unicodestr(href) 564 if type not in _guide_types: 565 type = "other." + type 566 if title is None: 567 title = 'title missing' 568 thref = urldecodepart(href.split('#')[0]) 569 if thref not in self.href_to_id: 570 raise WrapperException('guide href not in manifest') 571 guide.append((type, title, href)) 572 self.guide = guide 573 self.modified[self.opfbookpath] = 'file' 574 575 576 # routines to get and set metadata xml fragment 577 578 def getmetadataxml(self): 579 return self.metadataxml 580 581 def setmetadataxml(self, new_metadata): 582 self.metadataxml = _unicodestr(new_metadata) 583 self.modified[self.opfbookpath] = 'file' 584 585 586 # routines to get and set the package tag 587 def getpackagetag(self): 588 return self.package_tag 589 590 def setpackagetag(self, new_packagetag): 591 pkgtag = _unicodestr(new_packagetag) 592 version = "" 593 mo = _PKG_VER.search(pkgtag) 594 if mo: 595 version = mo.group(1) 596 if version != self.epub_version: 597 raise WrapperException('Illegal to change the package version attribute') 598 self.package_tag = pkgtag 599 self.modified[self.opfbookpath] = 'file' 600 601 602 # routines to manipulate files in the manifest (updates the opf automagically) 603 604 def readfile(self, id): 605 id = _unicodestr(id) 606 if id not in self.id_to_href: 607 raise WrapperException('Id does not exist in manifest') 608 filepath = self.id_to_filepath.get(id, None) 609 if filepath is None: 610 raise WrapperException('Id does not exist in manifest') 611 # already added or modified it will be in outdir 612 basedir = self.ebook_root 613 if id in self.added or id in self.modified: 614 basedir = self.outdir 615 filepath = os.path.join(basedir, filepath) 616 if not os.path.exists(filepath): 617 raise WrapperException('File Does Not Exist') 618 data = '' 619 with open(filepath, 'rb') as fp: 620 data = fp.read() 621 mime = self.id_to_mime.get(id, '') 622 if mime in TEXT_MIMETYPES: 623 data = _unicodestr(data) 624 return data 625 626 def writefile(self, id, data): 627 id = _unicodestr(id) 628 if id not in self.id_to_href: 629 raise WrapperException('Id does not exist in manifest') 630 filepath = self.id_to_filepath.get(id, None) 631 if filepath is None: 632 raise WrapperException('Id does not exist in manifest') 633 mime = self.id_to_mime.get(id, '') 634 filepath = os.path.join(self.outdir, filepath) 635 base = os.path.dirname(filepath) 636 if not os.path.exists(base): 637 os.makedirs(base) 638 if mime in TEXT_MIMETYPES or isinstance(data, str): 639 data = _utf8str(data) 640 with open(filepath, 'wb') as fp: 641 fp.write(data) 642 self.modified[id] = 'file' 643 644 645 def addfile(self, uniqueid, basename, data, mime=None, properties=None, fallback=None, overlay=None): 646 uniqueid = _unicodestr(uniqueid) 647 if uniqueid in self.id_to_href: 648 raise WrapperException('Manifest Id is not unique') 649 basename = _unicodestr(basename) 650 mime = _unicodestr(mime) 651 if mime is None: 652 ext = os.path.splitext(basename)[1] 653 ext = ext.lower() 654 mime = ext_mime_map.get(ext, None) 655 if mime is None: 656 raise WrapperException("Mime Type Missing") 657 if mime == "application/x-dtbncx+xml" and self.epub_version.startswith("2"): 658 raise WrapperException('Can not add or remove an ncx under epub2') 659 group = mime_group_map.get(mime, "Misc") 660 default_path = self.group_paths[group][0] 661 bookpath = basename 662 if default_path != "": 663 bookpath = default_path + "/" + basename 664 href = buildRelativePath(self.opfbookpath, bookpath) 665 if href in self.href_to_id: 666 raise WrapperException('Basename already exists') 667 # now actually write out the new file 668 filepath = bookpath.replace("/", os.sep) 669 self.id_to_filepath[uniqueid] = filepath 670 filepath = os.path.join(self.outdir, filepath) 671 base = os.path.dirname(filepath) 672 if not os.path.exists(base): 673 os.makedirs(base) 674 if mime in TEXT_MIMETYPES or isinstance(data, str): 675 data = _utf8str(data) 676 with open(filepath, 'wb') as fp: 677 fp.write(data) 678 self.id_to_href[uniqueid] = href 679 self.id_to_mime[uniqueid] = mime 680 self.id_to_props[uniqueid] = properties 681 self.id_to_fall[uniqueid] = fallback 682 self.id_to_over[uniqueid] = overlay 683 self.id_to_bookpath[uniqueid] = bookpath 684 self.href_to_id[href] = uniqueid 685 self.bookpath_to_id[bookpath] = uniqueid 686 self.added.append(uniqueid) 687 self.modified[self.opfbookpath] = 'file' 688 return uniqueid 689 690 691 # new in Sigil 1.0 692 693 # adds bookpath specified file to the manifest with given uniqueid data, and mime 694 def addbookpath(self, uniqueid, bookpath, data, mime=None): 695 uniqueid = _unicodestr(uniqueid) 696 if uniqueid in self.id_to_href: 697 raise WrapperException('Manifest Id is not unique') 698 bookpath = _unicodestr(bookpath) 699 basename = bookpath.split("/")[-1] 700 mime = _unicodestr(mime) 701 if mime is None: 702 ext = os.path.splitext(basename)[1] 703 ext = ext.lower() 704 mime = ext_mime_map.get(ext, None) 705 if mime is None: 706 raise WrapperException("Mime Type Missing") 707 if mime == "application/x-dtbncx+xml" and self.epub_version.startswith("2"): 708 raise WrapperException('Can not add or remove an ncx under epub2') 709 href = buildRelativePath(self.opfbookpath, bookpath) 710 if href in self.href_to_id: 711 raise WrapperException('bookpath already exists') 712 # now actually write out the new file 713 filepath = bookpath.replace("/", os.sep) 714 self.id_to_filepath[uniqueid] = filepath 715 filepath = os.path.join(self.outdir, filepath) 716 base = os.path.dirname(filepath) 717 if not os.path.exists(base): 718 os.makedirs(base) 719 if mime in TEXT_MIMETYPES or isinstance(data, str): 720 data = _utf8str(data) 721 with open(filepath, 'wb') as fp: 722 fp.write(data) 723 self.id_to_href[uniqueid] = href 724 self.id_to_mime[uniqueid] = mime 725 self.id_to_props[uniqueid] = None 726 self.id_to_fall[uniqueid] = None 727 self.id_to_over[uniqueid] = None 728 self.id_to_bookpath[uniqueid] = bookpath 729 self.href_to_id[href] = uniqueid 730 self.bookpath_to_id[bookpath] = uniqueid 731 self.added.append(uniqueid) 732 self.modified[self.opfbookpath] = 'file' 733 return uniqueid 734 735 736 def deletefile(self, id): 737 id = _unicodestr(id) 738 if id not in self.id_to_href: 739 raise WrapperException('Id does not exist in manifest') 740 filepath = self.id_to_filepath.get(id, None) 741 if id is None: 742 raise WrapperException('Id does not exist in manifest') 743 if self.epub_version.startswith("2") and id == self.gettocid(): 744 raise WrapperException('Can not add or remove an ncx under epub2') 745 add_to_deleted = True 746 # if file was added or modified, delete file from outdir 747 if id in self.added or id in self.modified: 748 filepath = os.path.join(self.outdir, filepath) 749 if os.path.exists(filepath) and os.path.isfile(filepath): 750 os.remove(filepath) 751 if id in self.added: 752 self.added.remove(id) 753 add_to_deleted = False 754 if id in self.modified: 755 del self.modified[id] 756 # remove from manifest 757 href = self.id_to_href[id] 758 bookpath = self.id_to_bookpath[id] 759 del self.id_to_href[id] 760 del self.id_to_mime[id] 761 del self.id_to_props[id] 762 del self.id_to_fall[id] 763 del self.id_to_over[id] 764 del self.id_to_bookpath[id] 765 del self.href_to_id[href] 766 del self.bookpath_to_id[bookpath] 767 # remove from spine 768 new_spine = [] 769 was_modified = False 770 for sid, linear, properties in self.spine: 771 if sid != id: 772 new_spine.append((sid, linear, properties)) 773 else: 774 was_modified = True 775 if was_modified: 776 self.setspine_epub3(new_spine) 777 if add_to_deleted: 778 self.deleted.append(('manifest', id, bookpath)) 779 self.modified[self.opfbookpath] = 'file' 780 del self.id_to_filepath[id] 781 782 def set_manifest_epub3_attributes(self, id, properties=None, fallback=None, overlay=None): 783 id = _unicodestr(id) 784 properties = _unicodestr(properties) 785 if properties is not None and properties == "": 786 properties = None 787 fallback = _unicodestr(fallback) 788 if fallback is not None and fallback == "": 789 fallback = None 790 overlay = _unicodestr(overlay) 791 if overlay is not None and overlay == "": 792 overlay = None 793 if id not in self.id_to_href: 794 raise WrapperException('Id does not exist in manifest') 795 del self.id_to_props[id] 796 del self.id_to_fall[id] 797 del self.id_to_over[id] 798 self.id_to_props[id] = properties 799 self.id_to_fall[id] = fallback 800 self.id_to_over[id] = overlay 801 self.modified[self.opfbookpath] = 'file' 802 803 804 # helpful mapping routines for file info from the opf manifest 805 806 def map_href_to_id(self, href, ow): 807 href = _unicodestr(href) 808 href = urldecodepart(href) 809 return self.href_to_id.get(href, ow) 810 811 # new in Sigil 1.0 812 def map_bookpath_to_id(self, bookpath, ow): 813 bookpath = _unicodestr(bookpath) 814 return self.bookpath_to_id.get(bookpath, ow) 815 816 def map_basename_to_id(self, basename, ow): 817 for bookpath in self.bookpath_to_id: 818 filename = bookpath.split("/")[-1] 819 if filename == basename: 820 return self.bookpath_to_id[bookpath] 821 return ow 822 823 def map_id_to_href(self, id, ow): 824 id = _unicodestr(id) 825 return self.id_to_href.get(id, ow) 826 827 # new in Sigil 1.0 828 def map_id_to_bookpath(self, id, ow): 829 id = _unicodestr(id) 830 return self.id_to_bookpath.get(id, ow) 831 832 def map_id_to_mime(self, id, ow): 833 id = _unicodestr(id) 834 return self.id_to_mime.get(id, ow) 835 836 def map_id_to_properties(self, id, ow): 837 id = _unicodestr(id) 838 return self.id_to_props.get(id, ow) 839 840 def map_id_to_fallback(self, id, ow): 841 id = _unicodestr(id) 842 return self.id_to_fall.get(id, ow) 843 844 def map_id_to_overlay(self, id, ow): 845 id = _unicodestr(id) 846 return self.id_to_over.get(id, ow) 847 848 # new in Sigil 1.0 849 # returns a sorted folder list for that group 850 # valid groups: Text, Styles, Images, Fonts, Audio, Video, ncx, opf, Misc 851 def map_group_to_folders(self, group, ow): 852 group = _unicodestr(group) 853 return self.group_paths.get(group, ow) 854 855 # new in Sigil 1.0 856 def map_mediatype_to_group(self, mtype, ow): 857 mtype = _unicodestr(mtype) 858 return mime_group_map.get(mtype, ow) 859 860 861 # routines to work on ebook files that are not part of an opf manifest 862 # their "id" is actually their unique relative path from book root 863 # this is called either a book href or a book path 864 # we use book_href or bookhref when working with "other" files 865 # we use bookpath when working with files in the manifest 866 867 def readotherfile(self, book_href): 868 id = _unicodestr(book_href) 869 id = urldecodepart(id) 870 if id is None: 871 raise WrapperException('None is not a valid book href') 872 if id not in self.other and id in self.id_to_href: 873 raise WrapperException('Incorrect interface routine - use readfile') 874 # handle special case of trying to read the opf after it has been modified 875 if id == self.opfbookpath: 876 if id in self.modified: 877 return self.build_opf() 878 filepath = self.book_href_to_filepath.get(id, None) 879 if filepath is None: 880 raise WrapperException('Book href does not exist') 881 basedir = self.ebook_root 882 if id in self.added or id in self.modified: 883 basedir = self.outdir 884 filepath = os.path.join(basedir, filepath) 885 if not os.path.exists(filepath): 886 raise WrapperException('File Does Not Exist') 887 basename = os.path.basename(filepath) 888 ext = os.path.splitext(basename)[1] 889 ext = ext.lower() 890 mime = ext_mime_map.get(ext, "") 891 data = b'' 892 with open(filepath, 'rb') as fp: 893 data = fp.read() 894 if mime in TEXT_MIMETYPES: 895 data = _unicodestr(data) 896 return data 897 898 def writeotherfile(self, book_href, data): 899 id = _unicodestr(book_href) 900 id = urldecodepart(id) 901 if id is None: 902 raise WrapperException('None is not a valid book href') 903 if id not in self.other and id in self.id_to_href: 904 raise WrapperException('Incorrect interface routine - use writefile') 905 filepath = self.book_href_to_filepath.get(id, None) 906 if filepath is None: 907 raise WrapperException('Book href does not exist') 908 if id in PROTECTED_FILES or id == self.opfbookpath: 909 raise WrapperException('Attempt to modify protected file') 910 filepath = os.path.join(self.outdir, filepath) 911 base = os.path.dirname(filepath) 912 if not os.path.exists(base): 913 os.makedirs(base) 914 if isinstance(data, str): 915 data = _utf8str(data) 916 with open(filepath, 'wb') as fp: 917 fp.write(data) 918 self.modified[id] = 'file' 919 920 def addotherfile(self, book_href, data) : 921 id = _unicodestr(book_href) 922 id = urldecodepart(id) 923 if id is None: 924 raise WrapperException('None is not a valid book href') 925 if id in self.other: 926 raise WrapperException('Book href must be unique') 927 desired_path = id.replace("/", os.sep) 928 filepath = os.path.join(self.outdir, desired_path) 929 if os.path.isfile(filepath): 930 raise WrapperException('Desired path already exists') 931 base = os.path.dirname(filepath) 932 if not os.path.exists(base): 933 os.makedirs(base) 934 if isinstance(data, str): 935 data = _utf8str(data) 936 with open(filepath, 'wb')as fp: 937 fp.write(data) 938 self.other.append(id) 939 self.added.append(id) 940 self.book_href_to_filepath[id] = desired_path 941 942 def deleteotherfile(self, book_href): 943 id = _unicodestr(book_href) 944 id = urldecodepart(id) 945 if id is None: 946 raise WrapperException('None is not a valid book hrefbook href') 947 if id not in self.other and id in self.id_to_href: 948 raise WrapperException('Incorrect interface routine - use deletefile') 949 filepath = self.book_href_to_filepath.get(id, None) 950 if filepath is None: 951 raise WrapperException('Book href does not exist') 952 if id in PROTECTED_FILES or id == self.opfbookpath: 953 raise WrapperException('attempt to delete protected file') 954 add_to_deleted = True 955 # if file was added or modified delete file from outdir 956 if id in self.added or id in self.modified: 957 filepath = os.path.join(self.outdir, filepath) 958 if os.path.exists(filepath) and os.path.isfile(filepath): 959 os.remove(filepath) 960 if id in self.added: 961 self.added.remove(id) 962 add_to_deleted = False 963 if id in self.other: 964 self.other.remove(id) 965 if id in self.modified: 966 del self.modified[id] 967 if add_to_deleted: 968 self.deleted.append(('other', id, book_href)) 969 del self.book_href_to_filepath[id] 970 971 972 # utility routine to copy entire ebook to a destination directory 973 # including the any prior updates and changes to the opf 974 975 def copy_book_contents_to(self, destdir): 976 destdir = _unicodestr(destdir) 977 if destdir is None or not os.path.isdir(destdir): 978 raise WrapperException('destination directory does not exist') 979 for id in self.id_to_filepath: 980 rpath = self.id_to_filepath[id] 981 data = self.readfile(id) 982 filepath = os.path.join(destdir, rpath) 983 base = os.path.dirname(filepath) 984 if not os.path.exists(base): 985 os.makedirs(base) 986 if isinstance(data, str): 987 data = _utf8str(data) 988 with open(filepath, 'wb') as fp: 989 fp.write(data) 990 for id in self.book_href_to_filepath: 991 rpath = self.book_href_to_filepath[id] 992 data = self.readotherfile(id) 993 filepath = os.path.join(destdir, rpath) 994 base = os.path.dirname(filepath) 995 if not os.path.exists(base): 996 os.makedirs(base) 997 if isinstance(data, str): 998 data = _utf8str(data) 999 with open(filepath, 'wb') as fp: 1000 fp.write(data) 1001 1002 def get_dictionary_dirs(self): 1003 apaths = [] 1004 if sys.platform.startswith('darwin'): 1005 apaths.append(os.path.abspath(os.path.join(self.appdir, "..", "hunspell_dictionaries"))) 1006 apaths.append(os.path.abspath(os.path.join(self.usrsupdir, "hunspell_dictionaries"))) 1007 elif sys.platform.startswith('win'): 1008 apaths.append(os.path.abspath(os.path.join(self.appdir, "hunspell_dictionaries"))) 1009 apaths.append(os.path.abspath(os.path.join(self.usrsupdir, "hunspell_dictionaries"))) 1010 else: 1011 # Linux 1012 for path in self.linux_hunspell_dict_dirs: 1013 apaths.append(os.path.abspath(path.strip())) 1014 apaths.append(os.path.abspath(os.path.join(self.usrsupdir, "hunspell_dictionaries"))) 1015 return apaths 1016 1017 def get_gumbo_path(self): 1018 if sys.platform.startswith('darwin'): 1019 lib_dir = os.path.abspath(os.path.join(self.appdir, "..", "lib")) 1020 lib_name = 'libsigilgumbo.dylib' 1021 elif sys.platform.startswith('win'): 1022 lib_dir = os.path.abspath(self.appdir) 1023 lib_name = 'sigilgumbo.dll' 1024 else: 1025 lib_dir = os.path.abspath(self.appdir) 1026 lib_name = 'libsigilgumbo.so' 1027 return os.path.join(lib_dir, lib_name) 1028 1029 def get_hunspell_path(self): 1030 if sys.platform.startswith('darwin'): 1031 lib_dir = os.path.abspath(os.path.join(self.appdir, "..", "lib")) 1032 lib_name = 'libhunspell.dylib' 1033 elif sys.platform.startswith('win'): 1034 lib_dir = os.path.abspath(self.appdir) 1035 lib_name = 'hunspell.dll' 1036 else: 1037 lib_dir = os.path.abspath(self.appdir) 1038 lib_name = 'libhunspell.so' 1039 return os.path.join(lib_dir, lib_name) 1040