1#!/usr/local/bin/python3.8
2# -*- coding: utf-8 -*-
3# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
4
5# Copyright (c) 2014-2020 Kevin B. Hendricks and Doug Massay
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without modification,
9# are permitted provided that the following conditions are met:
10#
11# 1. Redistributions of source code must retain the above copyright notice, this list of
12# conditions and the following disclaimer.
13#
14# 2. Redistributions in binary form must reproduce the above copyright notice, this list
15# of conditions and the following disclaimer in the documentation and/or other materials
16# provided with the distribution.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
19# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
26# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28from collections import OrderedDict
29import sys
30import os
31import re
32from hrefutils import urldecodepart, urlencodepart
33from hrefutils import buildBookPath, startingDir, buildRelativePath
34from hrefutils import ext_mime_map, mime_group_map
35
36import unicodedata
37
38def _utf8str(p):
39    if p is None:
40        return None
41    if isinstance(p, bytes):
42        return p
43    return p.encode('utf-8', errors='replace')
44
45def _unicodestr(p):
46    if p is None:
47        return None
48    if isinstance(p, str):
49        return p
50    return p.decode('utf-8', errors='replace')
51
52_launcher_version = 20211025
53
54_PKG_VER = re.compile(r'''<\s*package[^>]*version\s*=\s*["']([^'"]*)['"][^>]*>''', re.IGNORECASE)
55
56# Wrapper Class is used to peform record keeping for Sigil.  It keeps track of modified,
57# added, and deleted files while providing some degree of protection against files under
58# Sigil's control from being directly manipulated.
59# Uses "write-on-modify" and so removes the need for wholesale copying of files
60
61_guide_types = ['cover', 'title-page', 'toc', 'index', 'glossary', 'acknowledgements',
62                'bibliography', 'colophon', 'copyright-page', 'dedication',
63                'epigraph', 'foreward', 'loi', 'lot', 'notes', 'preface', 'text']
64
65PROTECTED_FILES = [
66    'mimetype',
67    'META-INF/container.xml',
68]
69
70TEXT_MIMETYPES = [
71    'image/svg+xml',
72    'application/xhtml+xml',
73    'text/css',
74    'application/x-dtbncx+xml',
75    'application/oebps-package+xml',
76    'application/oebs-page-map+xml',
77    'application/smil+xml',
78    'application/adobe-page-template+xml',
79    'application/vnd.adobe-page-template+xml',
80    'text/javascript',
81    'application/javascript'
82    'application/pls+xml'
83]
84
85
86def _epub_file_walk(top):
87    top = os.fsdecode(top)
88    rv = []
89    for base, dnames, names in os.walk(top):
90        for name in names:
91            rv.append(os.path.relpath(os.path.join(base, name), top))
92    return rv
93
94
95class WrapperException(Exception):
96    pass
97
98class Wrapper(object):
99
100    def __init__(self, ebook_root, outdir, op, plugin_dir, plugin_name, debug=False):
101        self._debug = debug
102        self.ebook_root = os.fsdecode(ebook_root)
103        # plugins and plugin containers can get name and user plugin dir
104        self.plugin_dir = os.fsdecode(plugin_dir)
105        self.plugin_name = plugin_name
106        self.outdir = os.fsdecode(outdir)
107
108        # initialize the sigil cofiguration info passed in outdir with sigil.cfg
109        self.opfbookpath = None
110        self.appdir = None
111        self.usrsupdir = None
112        # Location of directory containing hunspell dictionaries on Linux
113        self.linux_hunspell_dict_dirs = []
114        # Sigil interface language code
115        self.sigil_ui_lang = None
116        # Default Sigil spell check dictionary
117        self.sigil_spellcheck_lang = None
118        # status of epub inside Sigil (isDirty) and CurrentFilePath of current epub file
119        self.epub_isDirty = False
120        self.epub_filepath = ""
121        self.colormode = None
122        self.colors = None
123        # File selected in Sigil's Book Browser
124        self.selected = []
125        cfg = ''
126        with open(os.path.join(self.outdir, 'sigil.cfg'), 'rb') as f:
127            cfg = f.read().decode('utf-8')
128        cfg = cfg.replace("\r", "")
129        cfg_lst = cfg.split("\n")
130        if len(cfg_lst) >= 7:
131            self.opfbookpath = cfg_lst.pop(0)
132            self.appdir = cfg_lst.pop(0)
133            self.usrsupdir = cfg_lst.pop(0)
134            if not sys.platform.startswith('darwin') and not sys.platform.startswith('win'):
135                self.linux_hunspell_dict_dirs = cfg_lst.pop(0).split(":")
136            self.sigil_ui_lang = cfg_lst.pop(0)
137            self.sigil_spellcheck_lang = cfg_lst.pop(0)
138            self.epub_isDirty = (cfg_lst.pop(0) == "True")
139            self.epub_filepath = cfg_lst.pop(0)
140            self.colormode = cfg_lst.pop(0)
141            self.colors = cfg_lst.pop(0)
142            self.highdpi = cfg_lst.pop(0)
143            self.uifont = cfg_lst.pop(0)
144            self.selected = cfg_lst
145        os.environ['SigilGumboLibPath'] = self.get_gumbo_path()
146
147        # dictionaries used to map opf manifest information
148        self.id_to_href = OrderedDict()
149        self.id_to_mime = OrderedDict()
150        self.id_to_props = OrderedDict()
151        self.id_to_fall = OrderedDict()
152        self.id_to_over = OrderedDict()
153        self.id_to_bookpath = OrderedDict()
154        self.href_to_id = OrderedDict()
155        self.bookpath_to_id = OrderedDict()
156        self.spine_ppd = None
157        self.spine = []
158        self.guide = []
159        self.bindings = []
160        self.package_tag = None
161        self.epub_version = None
162        # self.metadata_attr = None
163        # self.metadata = []
164        self.metadataxml = ''
165        self.op = op
166        if self.op is not None:
167            # copy in data from parsing of initial opf
168            self.opf_dir = op.opf_dir
169            # Note: manifest hrefs may only point to files (there are no fragments)
170            # all manifest relative hrefs have already had their path component url decoded
171            self.id_to_href = op.get_manifest_id_to_href_dict().copy()
172            self.id_to_mime = op.get_manifest_id_to_mime_dict().copy()
173            self.id_to_props = op.get_manifest_id_to_properties_dict().copy()
174            self.id_to_fall = op.get_manifest_id_to_fallback_dict().copy()
175            self.id_to_over = op.get_manifest_id_to_overlay_dict().copy()
176            self.id_to_bookpath = op.get_manifest_id_to_bookpath_dict().copy()
177            self.group_paths = op.get_group_paths().copy()
178            self.spine_ppd = op.get_spine_ppd()
179            self.spine = op.get_spine()
180            # since guide hrefs may contain framents they are kept in url encoded form
181            self.guide = op.get_guide()
182            self.package_tag = op.get_package_tag()
183            self.epub_version = op.get_epub_version()
184            self.bindings = op.get_bindings()
185            self.metadataxml = op.get_metadataxml()
186            # invert key dictionaries to allow for reverse access
187            for k, v in self.id_to_href.items():
188                self.href_to_id[v] = k
189            for k, v in self.id_to_bookpath.items():
190                self.bookpath_to_id[v] = k
191            # self.href_to_id = {v: k for k, v in self.id_to_href.items()}
192            # self.bookpath_to_id = {v: k for k, v in self.id_to_bookpath.items()}
193            # self.metadata = op.get_metadata()
194            # self.metadata_attr = op.get_metadata_attr()
195        self.other = []  # non-manifest file information
196        self.id_to_filepath = OrderedDict()
197        self.book_href_to_filepath = OrderedDict()
198        self.modified = OrderedDict()
199        self.added = []
200        self.deleted = []
201
202        # walk the ebook directory tree building up initial list of
203        # all unmanifested (other) files
204        for filepath in _epub_file_walk(ebook_root):
205            book_href = filepath.replace(os.sep, "/")
206            # OS X file names and paths use NFD form. The EPUB
207            # spec requires all text including filenames to be in NFC form.
208            book_href = unicodedata.normalize('NFC', book_href)
209            # if book_href file in manifest convert to manifest id
210            id = self.bookpath_to_id.get(book_href, None)
211            if id is None:
212                self.other.append(book_href)
213                self.book_href_to_filepath[book_href] = filepath
214            else:
215                self.id_to_filepath[id] = filepath
216
217    def getversion(self):
218        global _launcher_version
219        return _launcher_version
220
221    def getepubversion(self):
222        return self.epub_version
223
224    # utility routine to get mime from href (book href or opf href)
225    # no fragments present
226    def getmime(self, href):
227        href = _unicodestr(href)
228        href = urldecodepart(href)
229        filename = os.path.basename(href)
230        ext = os.path.splitext(filename)[1]
231        ext = ext.lower()
232        return ext_mime_map.get(ext, "")
233
234
235    # New in Sigil 1.1
236    # ------------------
237
238    # returns color mode of Sigil "light" or "dark"
239    def colorMode(self):
240        return _unicodestr(self.colormode)
241
242    # returns color as css or javascript hex color string #xxxxxx
243    # Accepts the following color roles "Window", "Base", "Text", "Highlight", "HighlightedText"
244    def color(self, role):
245        role = _unicodestr(role)
246        role = role.lower()
247        color_roles = ["window", "base", "text", "highlight", "highlightedtext"]
248        colors = self.colors.split(',')
249        if role in color_roles:
250            idx = color_roles.index(role)
251            return _unicodestr(colors[idx])
252        return None
253
254    # New in Sigil 1.0
255    # ----------------
256
257    # A book path (aka "bookpath" or "book_path") is a unique relative path
258    # from the ebook root to a specific file.  As a relative path meant to
259    # be used in an href or src link it only uses forward slashes "/"
260    # as path segment separators.  Since all files exist inside the
261    # epub root (folder the epub was unzipped into), book paths will NEVER
262    # have or use "./" or "../" ie they are in always in canonical form
263
264    # We will use the terms book_href (aka "bookhref") interchangeabily
265    # with bookpath with the following convention:
266    #   - use book_href when working with "other" files outside of the manifest
267    #   - use bookpath when working with files in the manifest
268    #   - use either when the file in question in the OPF as it exists in the intersection
269
270    # returns the bookpath/book_href to the opf file
271    def get_opfbookpath(self):
272        return self.opfbookpath
273
274    # returns the book path to the folder containing this bookpath
275    def get_startingdir(self, bookpath):
276        bookpath = _unicodestr(bookpath)
277        return startingDir(bookpath)
278
279    # return a bookpath for the file pointed to by the href from
280    # the specified bookpath starting directory
281    # no fragments allowed in href (must have been previously split off)
282    def build_bookpath(self, href, starting_dir):
283        href = _unicodestr(href)
284        href = urldecodepart(href)
285        starting_dir = _unicodestr(starting_dir)
286        return buildBookPath(href, starting_dir)
287
288    # returns the href relative path from source bookpath to target bookpath
289    def get_relativepath(self, from_bookpath, to_bookpath):
290        from_bookpath = _unicodestr(from_bookpath)
291        to_bookpath = _unicodestr(to_bookpath)
292        return buildRelativePath(from_bookpath, to_bookpath)
293
294    # ----------
295
296    # routine to detect if the current epub is in Sigil standard epub form
297    def epub_is_standard(self):
298        groups = ["Text", "Styles", "Fonts", "Images", "Audio", "Video", "Misc"]
299        paths = ["OEBPS/Text", "OEBPS/Styles", "OEBPS/Fonts", "OEBPS/Images", "OEBPS/Audio", "OEBPS/Video", "OEBPS/Misc"]
300        std_epub = self.opfbookpath == "OEBPS/content.opf"
301        tocid = self.gettocid()
302        if tocid is not None:
303            std_epub = std_epub and self.id_to_bookpath[tocid] == "OEBPS/toc.ncx"
304        if self.epub_version.startswith("2"):
305            std_epub = std_epub and tocid is not None
306        for g, p in zip(groups, paths):
307            folders = self.group_paths[g]
308            std_epub = std_epub and folders[0] == p and len(folders) == 1
309        return std_epub
310
311
312    # routines to rebuild the opf on the fly from current information
313    def build_package_starttag(self):
314        return self.package_tag
315
316    def build_manifest_xml(self):
317        manout = []
318        manout.append('  <manifest>\n')
319        for id in sorted(self.id_to_mime):
320            href = self.id_to_href[id]
321            # relative manifest hrefs must have no fragments
322            if href.find(':') == -1:
323                href = urlencodepart(href)
324            mime = self.id_to_mime[id]
325            props = ''
326            properties = self.id_to_props[id]
327            if properties is not None:
328                props = ' properties="%s"' % properties
329            fall = ''
330            fallback = self.id_to_fall[id]
331            if fallback is not None:
332                fall = ' fallback="%s"' % fallback
333            over = ''
334            overlay = self.id_to_over[id]
335            if overlay is not None:
336                over = ' media-overlay="%s"' % overlay
337            manout.append('    <item id="%s" href="%s" media-type="%s"%s%s%s />\n' % (id, href, mime, props, fall, over))
338        manout.append('  </manifest>\n')
339        return "".join(manout)
340
341    def build_spine_xml(self):
342        spineout = []
343        ppd = ''
344        ncx = ''
345        map = ''
346        if self.spine_ppd is not None:
347            ppd = ' page-progression-direction="%s"' % self.spine_ppd
348        tocid = self.gettocid()
349        if tocid is not None:
350            ncx = ' toc="%s"' % tocid
351        pagemapid = self.getpagemapid()
352        if pagemapid is not None:
353            map = ' page-map="%s"' % pagemapid
354        spineout.append('  <spine%s%s%s>\n' % (ppd, ncx, map))
355        for (id, linear, properties) in self.spine:
356            lin = ''
357            if linear is not None:
358                lin = ' linear="%s"' % linear
359            props = ''
360            if properties is not None:
361                props = ' properties="%s"' % properties
362            spineout.append('    <itemref idref="%s"%s%s/>\n' % (id, lin, props))
363        spineout.append('  </spine>\n')
364        return "".join(spineout)
365
366    def build_guide_xml(self):
367        guideout = []
368        if len(self.guide) > 0:
369            guideout.append('  <guide>\n')
370            for (type, title, href) in self.guide:
371                # note guide hrefs may have fragments so must be kept
372                # in url encoded form at all times until splitting into component parts
373                guideout.append('    <reference type="%s" href="%s" title="%s"/>\n' % (type, href, title))
374            guideout.append('  </guide>\n')
375        return "".join(guideout)
376
377    def build_bindings_xml(self):
378        bindout = []
379        if len(self.bindings) > 0 and self.epub_version.startswith('3'):
380            bindout.append('  <bindings>\n')
381            for (mtype, handler) in self.bindings:
382                bindout.append('    <mediaType media-type="%s" handler="%s"/>\n' % (mtype, handler))
383            bindout.append('  </bindings>\n')
384        return "".join(bindout)
385
386    def build_opf(self):
387        data = []
388        data.append('<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n')
389        data.append(self.build_package_starttag())
390        data.append(self.metadataxml)
391        data.append(self.build_manifest_xml())
392        data.append(self.build_spine_xml())
393        data.append(self.build_guide_xml())
394        data.append(self.build_bindings_xml())
395        data.append('</package>\n')
396        return "".join(data)
397
398    def write_opf(self):
399        if self.op is not None:
400            platpath = self.opfbookpath.replace('/', os.sep)
401            filepath = os.path.join(self.outdir, platpath)
402            base = os.path.dirname(filepath)
403            if not os.path.exists(base):
404                os.makedirs(base)
405            with open(filepath, 'wb') as fp:
406                data = _utf8str(self.build_opf())
407                fp.write(data)
408
409
410    # routines to help find the manifest id of toc.ncx and page-map.xml
411
412    def gettocid(self):
413        for id in self.id_to_mime:
414            mime = self.id_to_mime[id]
415            if mime == "application/x-dtbncx+xml":
416                return id
417        return None
418
419    def getpagemapid(self):
420        for id in self.id_to_mime:
421            mime = self.id_to_mime[id]
422            if mime == "application/oebs-page-map+xml":
423                return id
424        return None
425
426
427    # routines to help find the manifest id of the nav
428    def getnavid(self):
429        if self.epub_version == "2.0":
430            return None
431        for id in self.id_to_mime:
432            mime = self.id_to_mime[id]
433            if mime == "application/xhtml+xml":
434                properties = self.id_to_props[id]
435                if properties is not None and "nav" in properties:
436                    return id
437        return None
438
439
440    # routines to manipulate the spine
441
442    def getspine(self):
443        osp = []
444        for (sid, linear, properties) in self.spine:
445            osp.append((sid, linear))
446        return osp
447
448    def setspine(self, new_spine):
449        spine = []
450        for (sid, linear) in new_spine:
451            properties = None
452            sid = _unicodestr(sid)
453            linear = _unicodestr(linear)
454            if sid not in self.id_to_href:
455                raise WrapperException('Spine Id not in Manifest')
456            if linear is not None:
457                linear = linear.lower()
458                if linear not in ['yes', 'no']:
459                    raise Exception('Improper Spine Linear Attribute')
460            spine.append((sid, linear, properties))
461        self.spine = spine
462        self.modified[self.opfbookpath] = 'file'
463
464    def getspine_epub3(self):
465        return self.spine
466
467    def setspine_epub3(self, new_spine):
468        spine = []
469        for (sid, linear, properties) in new_spine:
470            sid = _unicodestr(sid)
471            linear = _unicodestr(linear)
472            properties = _unicodestr(properties)
473            if properties is not None and properties == "":
474                properties = None
475            if sid not in self.id_to_href:
476                raise WrapperException('Spine Id not in Manifest')
477            if linear is not None:
478                linear = linear.lower()
479                if linear not in ['yes', 'no']:
480                    raise Exception('Improper Spine Linear Attribute')
481            if properties is not None:
482                properties = properties.lower()
483            spine.append((sid, linear, properties))
484        self.spine = spine
485        self.modified[self.opfbookpath] = 'file'
486
487    def getbindings_epub3(self):
488        return self.bindings
489
490    def setbindings_epub3(self, new_bindings):
491        bindings = []
492        for (mtype, handler) in new_bindings:
493            mtype = _unicodestr(mtype)
494            handler = _unicodestr(handler)
495            if mtype is None or mtype == "":
496                continue
497            if handler is None or handler == "":
498                continue
499            if handler not in self.id_to_href:
500                raise WrapperException('Handler not in Manifest')
501            bindings.append((mtype, handler))
502        self.bindings = bindings
503        self.modified[self.opfbookpath] = 'file'
504
505    def spine_insert_before(self, pos, sid, linear, properties=None):
506        sid = _unicodestr(sid)
507        linear = _unicodestr(linear)
508        properties = _unicodestr(properties)
509        if properties is not None and properties == "":
510            properties = None
511        if sid not in self.id_to_mime:
512            raise WrapperException('that spine idref does not exist in manifest')
513        n = len(self.spine)
514        if pos == 0:
515            self.spine = [(sid, linear, properties)] + self.spine
516        elif pos == -1 or pos >= n:
517            self.spine.append((sid, linear, properties))
518        else:
519            self.spine = self.spine[0:pos] + [(sid, linear, properties)] + self.spine[pos:]
520        self.modified[self.opfbookpath] = 'file'
521
522    def getspine_ppd(self):
523        return self.spine_ppd
524
525    def setspine_ppd(self, ppd):
526        ppd = _unicodestr(ppd)
527        if ppd not in ['rtl', 'ltr', None]:
528            raise WrapperException('incorrect page-progression direction')
529        self.spine_ppd = ppd
530        self.modified[self.opfbookpath] = 'file'
531
532    def setspine_itemref_epub3_attributes(self, idref, linear, properties):
533        idref = _unicodestr(idref)
534        linear = _unicodestr(linear)
535        properties = _unicodestr(properties)
536        if properties is not None and properties == "":
537            properties = None
538        pos = -1
539        i = 0
540        for (sid, slinear, sproperties) in self.spine:
541            if sid == idref:
542                pos = i
543                break
544            i += 1
545        if pos == -1:
546            raise WrapperException('that idref is not exist in the spine')
547        self.spine[pos] = (sid, linear, properties)
548        self.modified[self.opfbookpath] = 'file'
549
550
551    # routines to get and set the guide
552
553    def getguide(self):
554        return self.guide
555
556    # guide hrefs must be in urlencoded form (percent encodings present if needed)
557    # as they may include fragments and # is a valid url path character
558    def setguide(self, new_guide):
559        guide = []
560        for (type, title, href) in new_guide:
561            type = _unicodestr(type)
562            title = _unicodestr(title)
563            href = _unicodestr(href)
564            if type not in _guide_types:
565                type = "other." + type
566            if title is None:
567                title = 'title missing'
568            thref = urldecodepart(href.split('#')[0])
569            if thref not in self.href_to_id:
570                raise WrapperException('guide href not in manifest')
571            guide.append((type, title, href))
572        self.guide = guide
573        self.modified[self.opfbookpath] = 'file'
574
575
576    # routines to get and set metadata xml fragment
577
578    def getmetadataxml(self):
579        return self.metadataxml
580
581    def setmetadataxml(self, new_metadata):
582        self.metadataxml = _unicodestr(new_metadata)
583        self.modified[self.opfbookpath] = 'file'
584
585
586    # routines to get and set the package tag
587    def getpackagetag(self):
588        return self.package_tag
589
590    def setpackagetag(self, new_packagetag):
591        pkgtag = _unicodestr(new_packagetag)
592        version = ""
593        mo = _PKG_VER.search(pkgtag)
594        if mo:
595            version = mo.group(1)
596        if version != self.epub_version:
597            raise WrapperException('Illegal to change the package version attribute')
598        self.package_tag = pkgtag
599        self.modified[self.opfbookpath] = 'file'
600
601
602    # routines to manipulate files in the manifest (updates the opf automagically)
603
604    def readfile(self, id):
605        id = _unicodestr(id)
606        if id not in self.id_to_href:
607            raise WrapperException('Id does not exist in manifest')
608        filepath = self.id_to_filepath.get(id, None)
609        if filepath is None:
610            raise WrapperException('Id does not exist in manifest')
611        # already added or modified it will be in outdir
612        basedir = self.ebook_root
613        if id in self.added or id in self.modified:
614            basedir = self.outdir
615        filepath = os.path.join(basedir, filepath)
616        if not os.path.exists(filepath):
617            raise WrapperException('File Does Not Exist')
618        data = ''
619        with open(filepath, 'rb') as fp:
620            data = fp.read()
621        mime = self.id_to_mime.get(id, '')
622        if mime in TEXT_MIMETYPES:
623            data = _unicodestr(data)
624        return data
625
626    def writefile(self, id, data):
627        id = _unicodestr(id)
628        if id not in self.id_to_href:
629            raise WrapperException('Id does not exist in manifest')
630        filepath = self.id_to_filepath.get(id, None)
631        if filepath is None:
632            raise WrapperException('Id does not exist in manifest')
633        mime = self.id_to_mime.get(id, '')
634        filepath = os.path.join(self.outdir, filepath)
635        base = os.path.dirname(filepath)
636        if not os.path.exists(base):
637            os.makedirs(base)
638        if mime in TEXT_MIMETYPES or isinstance(data, str):
639            data = _utf8str(data)
640        with open(filepath, 'wb') as fp:
641            fp.write(data)
642        self.modified[id] = 'file'
643
644
645    def addfile(self, uniqueid, basename, data, mime=None, properties=None, fallback=None, overlay=None):
646        uniqueid = _unicodestr(uniqueid)
647        if uniqueid in self.id_to_href:
648            raise WrapperException('Manifest Id is not unique')
649        basename = _unicodestr(basename)
650        mime = _unicodestr(mime)
651        if mime is None:
652            ext = os.path.splitext(basename)[1]
653            ext = ext.lower()
654            mime = ext_mime_map.get(ext, None)
655        if mime is None:
656            raise WrapperException("Mime Type Missing")
657        if mime == "application/x-dtbncx+xml" and self.epub_version.startswith("2"):
658            raise WrapperException('Can not add or remove an ncx under epub2')
659        group = mime_group_map.get(mime, "Misc")
660        default_path = self.group_paths[group][0]
661        bookpath = basename
662        if default_path != "":
663            bookpath = default_path + "/" + basename
664        href = buildRelativePath(self.opfbookpath, bookpath)
665        if href in self.href_to_id:
666            raise WrapperException('Basename already exists')
667        # now actually write out the new file
668        filepath = bookpath.replace("/", os.sep)
669        self.id_to_filepath[uniqueid] = filepath
670        filepath = os.path.join(self.outdir, filepath)
671        base = os.path.dirname(filepath)
672        if not os.path.exists(base):
673            os.makedirs(base)
674        if mime in TEXT_MIMETYPES or isinstance(data, str):
675            data = _utf8str(data)
676        with open(filepath, 'wb') as fp:
677            fp.write(data)
678        self.id_to_href[uniqueid] = href
679        self.id_to_mime[uniqueid] = mime
680        self.id_to_props[uniqueid] = properties
681        self.id_to_fall[uniqueid] = fallback
682        self.id_to_over[uniqueid] = overlay
683        self.id_to_bookpath[uniqueid] = bookpath
684        self.href_to_id[href] = uniqueid
685        self.bookpath_to_id[bookpath] = uniqueid
686        self.added.append(uniqueid)
687        self.modified[self.opfbookpath] = 'file'
688        return uniqueid
689
690
691    # new in Sigil 1.0
692
693    # adds bookpath specified file to the manifest with given uniqueid data, and mime
694    def addbookpath(self, uniqueid, bookpath, data, mime=None):
695        uniqueid = _unicodestr(uniqueid)
696        if uniqueid in self.id_to_href:
697            raise WrapperException('Manifest Id is not unique')
698        bookpath = _unicodestr(bookpath)
699        basename = bookpath.split("/")[-1]
700        mime = _unicodestr(mime)
701        if mime is None:
702            ext = os.path.splitext(basename)[1]
703            ext = ext.lower()
704            mime = ext_mime_map.get(ext, None)
705        if mime is None:
706            raise WrapperException("Mime Type Missing")
707        if mime == "application/x-dtbncx+xml" and self.epub_version.startswith("2"):
708            raise WrapperException('Can not add or remove an ncx under epub2')
709        href = buildRelativePath(self.opfbookpath, bookpath)
710        if href in self.href_to_id:
711            raise WrapperException('bookpath already exists')
712        # now actually write out the new file
713        filepath = bookpath.replace("/", os.sep)
714        self.id_to_filepath[uniqueid] = filepath
715        filepath = os.path.join(self.outdir, filepath)
716        base = os.path.dirname(filepath)
717        if not os.path.exists(base):
718            os.makedirs(base)
719        if mime in TEXT_MIMETYPES or isinstance(data, str):
720            data = _utf8str(data)
721        with open(filepath, 'wb') as fp:
722            fp.write(data)
723        self.id_to_href[uniqueid] = href
724        self.id_to_mime[uniqueid] = mime
725        self.id_to_props[uniqueid] = None
726        self.id_to_fall[uniqueid] = None
727        self.id_to_over[uniqueid] = None
728        self.id_to_bookpath[uniqueid] = bookpath
729        self.href_to_id[href] = uniqueid
730        self.bookpath_to_id[bookpath] = uniqueid
731        self.added.append(uniqueid)
732        self.modified[self.opfbookpath] = 'file'
733        return uniqueid
734
735
736    def deletefile(self, id):
737        id = _unicodestr(id)
738        if id not in self.id_to_href:
739            raise WrapperException('Id does not exist in manifest')
740        filepath = self.id_to_filepath.get(id, None)
741        if id is None:
742            raise WrapperException('Id does not exist in manifest')
743        if self.epub_version.startswith("2") and id == self.gettocid():
744            raise WrapperException('Can not add or remove an ncx under epub2')
745        add_to_deleted = True
746        # if file was added or modified, delete file from outdir
747        if id in self.added or id in self.modified:
748            filepath = os.path.join(self.outdir, filepath)
749            if os.path.exists(filepath) and os.path.isfile(filepath):
750                os.remove(filepath)
751            if id in self.added:
752                self.added.remove(id)
753                add_to_deleted = False
754            if id in self.modified:
755                del self.modified[id]
756        # remove from manifest
757        href = self.id_to_href[id]
758        bookpath = self.id_to_bookpath[id]
759        del self.id_to_href[id]
760        del self.id_to_mime[id]
761        del self.id_to_props[id]
762        del self.id_to_fall[id]
763        del self.id_to_over[id]
764        del self.id_to_bookpath[id]
765        del self.href_to_id[href]
766        del self.bookpath_to_id[bookpath]
767        # remove from spine
768        new_spine = []
769        was_modified = False
770        for sid, linear, properties in self.spine:
771            if sid != id:
772                new_spine.append((sid, linear, properties))
773            else:
774                was_modified = True
775        if was_modified:
776            self.setspine_epub3(new_spine)
777        if add_to_deleted:
778            self.deleted.append(('manifest', id, bookpath))
779            self.modified[self.opfbookpath] = 'file'
780        del self.id_to_filepath[id]
781
782    def set_manifest_epub3_attributes(self, id, properties=None, fallback=None, overlay=None):
783        id = _unicodestr(id)
784        properties = _unicodestr(properties)
785        if properties is not None and properties == "":
786            properties = None
787        fallback = _unicodestr(fallback)
788        if fallback is not None and fallback == "":
789            fallback = None
790        overlay = _unicodestr(overlay)
791        if overlay is not None and overlay == "":
792            overlay = None
793        if id not in self.id_to_href:
794            raise WrapperException('Id does not exist in manifest')
795        del self.id_to_props[id]
796        del self.id_to_fall[id]
797        del self.id_to_over[id]
798        self.id_to_props[id] = properties
799        self.id_to_fall[id] = fallback
800        self.id_to_over[id] = overlay
801        self.modified[self.opfbookpath] = 'file'
802
803
804    # helpful mapping routines for file info from the opf manifest
805
806    def map_href_to_id(self, href, ow):
807        href = _unicodestr(href)
808        href = urldecodepart(href)
809        return self.href_to_id.get(href, ow)
810
811    # new in Sigil 1.0
812    def map_bookpath_to_id(self, bookpath, ow):
813        bookpath = _unicodestr(bookpath)
814        return self.bookpath_to_id.get(bookpath, ow)
815
816    def map_basename_to_id(self, basename, ow):
817        for bookpath in self.bookpath_to_id:
818            filename = bookpath.split("/")[-1]
819            if filename == basename:
820                return self.bookpath_to_id[bookpath]
821        return ow
822
823    def map_id_to_href(self, id, ow):
824        id = _unicodestr(id)
825        return self.id_to_href.get(id, ow)
826
827    # new in Sigil 1.0
828    def map_id_to_bookpath(self, id, ow):
829        id = _unicodestr(id)
830        return self.id_to_bookpath.get(id, ow)
831
832    def map_id_to_mime(self, id, ow):
833        id = _unicodestr(id)
834        return self.id_to_mime.get(id, ow)
835
836    def map_id_to_properties(self, id, ow):
837        id = _unicodestr(id)
838        return self.id_to_props.get(id, ow)
839
840    def map_id_to_fallback(self, id, ow):
841        id = _unicodestr(id)
842        return self.id_to_fall.get(id, ow)
843
844    def map_id_to_overlay(self, id, ow):
845        id = _unicodestr(id)
846        return self.id_to_over.get(id, ow)
847
848    # new in Sigil 1.0
849    # returns a sorted folder list for that group
850    # valid groups: Text, Styles, Images, Fonts, Audio, Video, ncx, opf, Misc
851    def map_group_to_folders(self, group, ow):
852        group = _unicodestr(group)
853        return self.group_paths.get(group, ow)
854
855    # new in Sigil 1.0
856    def map_mediatype_to_group(self, mtype, ow):
857        mtype = _unicodestr(mtype)
858        return mime_group_map.get(mtype, ow)
859
860
861    # routines to work on ebook files that are not part of an opf manifest
862    # their "id" is actually their unique relative path from book root
863    # this is called either a book href or a book path
864    # we use book_href or bookhref  when working with "other" files
865    # we use bookpath when working with files in the manifest
866
867    def readotherfile(self, book_href):
868        id = _unicodestr(book_href)
869        id = urldecodepart(id)
870        if id is None:
871            raise WrapperException('None is not a valid book href')
872        if id not in self.other and id in self.id_to_href:
873            raise WrapperException('Incorrect interface routine - use readfile')
874        # handle special case of trying to read the opf after it has been modified
875        if id == self.opfbookpath:
876            if id in self.modified:
877                return self.build_opf()
878        filepath = self.book_href_to_filepath.get(id, None)
879        if filepath is None:
880            raise WrapperException('Book href does not exist')
881        basedir = self.ebook_root
882        if id in self.added or id in self.modified:
883            basedir = self.outdir
884        filepath = os.path.join(basedir, filepath)
885        if not os.path.exists(filepath):
886            raise WrapperException('File Does Not Exist')
887        basename = os.path.basename(filepath)
888        ext = os.path.splitext(basename)[1]
889        ext = ext.lower()
890        mime = ext_mime_map.get(ext, "")
891        data = b''
892        with open(filepath, 'rb') as fp:
893            data = fp.read()
894        if mime in TEXT_MIMETYPES:
895            data = _unicodestr(data)
896        return data
897
898    def writeotherfile(self, book_href, data):
899        id = _unicodestr(book_href)
900        id = urldecodepart(id)
901        if id is None:
902            raise WrapperException('None is not a valid book href')
903        if id not in self.other and id in self.id_to_href:
904            raise WrapperException('Incorrect interface routine - use writefile')
905        filepath = self.book_href_to_filepath.get(id, None)
906        if filepath is None:
907            raise WrapperException('Book href does not exist')
908        if id in PROTECTED_FILES or id == self.opfbookpath:
909            raise WrapperException('Attempt to modify protected file')
910        filepath = os.path.join(self.outdir, filepath)
911        base = os.path.dirname(filepath)
912        if not os.path.exists(base):
913            os.makedirs(base)
914        if isinstance(data, str):
915            data = _utf8str(data)
916        with open(filepath, 'wb') as fp:
917            fp.write(data)
918        self.modified[id] = 'file'
919
920    def addotherfile(self, book_href, data) :
921        id = _unicodestr(book_href)
922        id = urldecodepart(id)
923        if id is None:
924            raise WrapperException('None is not a valid book href')
925        if id in self.other:
926            raise WrapperException('Book href must be unique')
927        desired_path = id.replace("/", os.sep)
928        filepath = os.path.join(self.outdir, desired_path)
929        if os.path.isfile(filepath):
930            raise WrapperException('Desired path already exists')
931        base = os.path.dirname(filepath)
932        if not os.path.exists(base):
933            os.makedirs(base)
934        if isinstance(data, str):
935            data = _utf8str(data)
936        with open(filepath, 'wb')as fp:
937            fp.write(data)
938        self.other.append(id)
939        self.added.append(id)
940        self.book_href_to_filepath[id] = desired_path
941
942    def deleteotherfile(self, book_href):
943        id = _unicodestr(book_href)
944        id = urldecodepart(id)
945        if id is None:
946            raise WrapperException('None is not a valid book hrefbook href')
947        if id not in self.other and id in self.id_to_href:
948            raise WrapperException('Incorrect interface routine - use deletefile')
949        filepath = self.book_href_to_filepath.get(id, None)
950        if filepath is None:
951            raise WrapperException('Book href does not exist')
952        if id in PROTECTED_FILES or id == self.opfbookpath:
953            raise WrapperException('attempt to delete protected file')
954        add_to_deleted = True
955        # if file was added or modified delete file from outdir
956        if id in self.added or id in self.modified:
957            filepath = os.path.join(self.outdir, filepath)
958            if os.path.exists(filepath) and os.path.isfile(filepath):
959                os.remove(filepath)
960            if id in self.added:
961                self.added.remove(id)
962                add_to_deleted = False
963            if id in self.other:
964                self.other.remove(id)
965            if id in self.modified:
966                del self.modified[id]
967        if add_to_deleted:
968            self.deleted.append(('other', id, book_href))
969        del self.book_href_to_filepath[id]
970
971
972    # utility routine to copy entire ebook to a destination directory
973    # including the any prior updates and changes to the opf
974
975    def copy_book_contents_to(self, destdir):
976        destdir = _unicodestr(destdir)
977        if destdir is None or not os.path.isdir(destdir):
978            raise WrapperException('destination directory does not exist')
979        for id in self.id_to_filepath:
980            rpath = self.id_to_filepath[id]
981            data = self.readfile(id)
982            filepath = os.path.join(destdir, rpath)
983            base = os.path.dirname(filepath)
984            if not os.path.exists(base):
985                os.makedirs(base)
986            if isinstance(data, str):
987                data = _utf8str(data)
988            with open(filepath, 'wb') as fp:
989                fp.write(data)
990        for id in self.book_href_to_filepath:
991            rpath = self.book_href_to_filepath[id]
992            data = self.readotherfile(id)
993            filepath = os.path.join(destdir, rpath)
994            base = os.path.dirname(filepath)
995            if not os.path.exists(base):
996                os.makedirs(base)
997            if isinstance(data, str):
998                data = _utf8str(data)
999            with open(filepath, 'wb') as fp:
1000                fp.write(data)
1001
1002    def get_dictionary_dirs(self):
1003        apaths = []
1004        if sys.platform.startswith('darwin'):
1005            apaths.append(os.path.abspath(os.path.join(self.appdir, "..", "hunspell_dictionaries")))
1006            apaths.append(os.path.abspath(os.path.join(self.usrsupdir, "hunspell_dictionaries")))
1007        elif sys.platform.startswith('win'):
1008            apaths.append(os.path.abspath(os.path.join(self.appdir, "hunspell_dictionaries")))
1009            apaths.append(os.path.abspath(os.path.join(self.usrsupdir, "hunspell_dictionaries")))
1010        else:
1011            # Linux
1012            for path in self.linux_hunspell_dict_dirs:
1013                apaths.append(os.path.abspath(path.strip()))
1014            apaths.append(os.path.abspath(os.path.join(self.usrsupdir, "hunspell_dictionaries")))
1015        return apaths
1016
1017    def get_gumbo_path(self):
1018        if sys.platform.startswith('darwin'):
1019            lib_dir = os.path.abspath(os.path.join(self.appdir, "..", "lib"))
1020            lib_name = 'libsigilgumbo.dylib'
1021        elif sys.platform.startswith('win'):
1022            lib_dir = os.path.abspath(self.appdir)
1023            lib_name = 'sigilgumbo.dll'
1024        else:
1025            lib_dir = os.path.abspath(self.appdir)
1026            lib_name = 'libsigilgumbo.so'
1027        return os.path.join(lib_dir, lib_name)
1028
1029    def get_hunspell_path(self):
1030        if sys.platform.startswith('darwin'):
1031            lib_dir = os.path.abspath(os.path.join(self.appdir, "..", "lib"))
1032            lib_name = 'libhunspell.dylib'
1033        elif sys.platform.startswith('win'):
1034            lib_dir = os.path.abspath(self.appdir)
1035            lib_name = 'hunspell.dll'
1036        else:
1037            lib_dir = os.path.abspath(self.appdir)
1038            lib_name = 'libhunspell.so'
1039        return os.path.join(lib_dir, lib_name)
1040