1__license__ = 'GPL 3'
2__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
3__docformat__ = 'restructuredtext en'
4
5import os
6
7from calibre.customize.conversion import InputFormatPlugin
8
9
10class MOBIInput(InputFormatPlugin):
11
12    name        = 'MOBI Input'
13    author      = 'Kovid Goyal'
14    description = _('Convert MOBI files (.mobi, .prc, .azw) to HTML')
15    file_types  = {'mobi', 'prc', 'azw', 'azw3', 'pobi'}
16    commit_name = 'mobi_input'
17
18    def convert(self, stream, options, file_ext, log,
19                accelerators):
20        self.is_kf8 = False
21        self.mobi_is_joint = False
22
23        from calibre.ebooks.mobi.reader.mobi6 import MobiReader
24        from lxml import html
25        parse_cache = {}
26        try:
27            mr = MobiReader(stream, log, options.input_encoding,
28                        options.debug_pipeline)
29            if mr.kf8_type is None:
30                mr.extract_content('.', parse_cache)
31
32        except:
33            mr = MobiReader(stream, log, options.input_encoding,
34                        options.debug_pipeline, try_extra_data_fix=True)
35            if mr.kf8_type is None:
36                mr.extract_content('.', parse_cache)
37
38        if mr.kf8_type is not None:
39            log('Found KF8 MOBI of type %r'%mr.kf8_type)
40            if mr.kf8_type == 'joint':
41                self.mobi_is_joint = True
42            from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
43            mr = Mobi8Reader(mr, log)
44            opf = os.path.abspath(mr())
45            self.encrypted_fonts = mr.encrypted_fonts
46            self.is_kf8 = True
47            return opf
48
49        raw = parse_cache.pop('calibre_raw_mobi_markup', False)
50        if raw:
51            if isinstance(raw, str):
52                raw = raw.encode('utf-8')
53            with lopen('debug-raw.html', 'wb') as f:
54                f.write(raw)
55        from calibre.ebooks.oeb.base import close_self_closing_tags
56        for f, root in parse_cache.items():
57            raw = html.tostring(root, encoding='utf-8', method='xml',
58                    include_meta_content_type=False)
59            raw = close_self_closing_tags(raw)
60            with lopen(f, 'wb') as q:
61                q.write(raw)
62        accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
63        return mr.created_opf_path
64