1__license__ = 'GPL 3' 2__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' 3__docformat__ = 'restructuredtext en' 4 5import os 6 7from calibre.customize.conversion import InputFormatPlugin 8 9 10class MOBIInput(InputFormatPlugin): 11 12 name = 'MOBI Input' 13 author = 'Kovid Goyal' 14 description = _('Convert MOBI files (.mobi, .prc, .azw) to HTML') 15 file_types = {'mobi', 'prc', 'azw', 'azw3', 'pobi'} 16 commit_name = 'mobi_input' 17 18 def convert(self, stream, options, file_ext, log, 19 accelerators): 20 self.is_kf8 = False 21 self.mobi_is_joint = False 22 23 from calibre.ebooks.mobi.reader.mobi6 import MobiReader 24 from lxml import html 25 parse_cache = {} 26 try: 27 mr = MobiReader(stream, log, options.input_encoding, 28 options.debug_pipeline) 29 if mr.kf8_type is None: 30 mr.extract_content('.', parse_cache) 31 32 except: 33 mr = MobiReader(stream, log, options.input_encoding, 34 options.debug_pipeline, try_extra_data_fix=True) 35 if mr.kf8_type is None: 36 mr.extract_content('.', parse_cache) 37 38 if mr.kf8_type is not None: 39 log('Found KF8 MOBI of type %r'%mr.kf8_type) 40 if mr.kf8_type == 'joint': 41 self.mobi_is_joint = True 42 from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader 43 mr = Mobi8Reader(mr, log) 44 opf = os.path.abspath(mr()) 45 self.encrypted_fonts = mr.encrypted_fonts 46 self.is_kf8 = True 47 return opf 48 49 raw = parse_cache.pop('calibre_raw_mobi_markup', False) 50 if raw: 51 if isinstance(raw, str): 52 raw = raw.encode('utf-8') 53 with lopen('debug-raw.html', 'wb') as f: 54 f.write(raw) 55 from calibre.ebooks.oeb.base import close_self_closing_tags 56 for f, root in parse_cache.items(): 57 raw = html.tostring(root, encoding='utf-8', method='xml', 58 include_meta_content_type=False) 59 raw = close_self_closing_tags(raw) 60 with lopen(f, 'wb') as q: 61 q.write(raw) 62 accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]' 63 return mr.created_opf_path 64