1__license__ = 'GPL v3' 2__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' 3'''''' 4 5import sys, array, os, re, codecs, logging 6from itertools import chain 7 8from calibre import setup_cli_handlers 9from calibre.utils.config import OptionParser 10from calibre.utils.filenames import ascii_filename 11from calibre.ebooks.lrf.meta import LRFMetaFile 12from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \ 13 Font, Text, TOCObject, BookAttr, ruby_tags 14from polyglot.builtins import itervalues 15 16 17class LRFDocument(LRFMetaFile): 18 19 class temp: 20 pass 21 22 def __init__(self, stream): 23 LRFMetaFile.__init__(self, stream) 24 self.scramble_key = self.xor_key 25 self.page_trees = [] 26 self.font_map = {} 27 self.image_map = {} 28 self.toc = '' 29 self.keep_parsing = True 30 31 def parse(self): 32 self._parse_objects() 33 self.metadata = LRFDocument.temp() 34 for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id', 35 'classification', 'free_text', 'publisher', 'label', 'category'): 36 setattr(self.metadata, a, getattr(self, a)) 37 self.doc_info = LRFDocument.temp() 38 for a in ('thumbnail', 'language', 'creator', 'producer', 'page'): 39 setattr(self.doc_info, a, getattr(self, a)) 40 self.doc_info.thumbnail_extension = self.thumbail_extension() 41 self.device_info = LRFDocument.temp() 42 for a in ('dpi', 'width', 'height'): 43 setattr(self.device_info, a, getattr(self, a)) 44 45 def _parse_objects(self): 46 self.objects = {} 47 self._file.seek(self.object_index_offset) 48 obj_array = array.array("I", self._file.read(4*4*self.number_of_objects)) 49 if ord(array.array("i",[1]).tobytes()[0:1])==0: # big-endian 50 obj_array.byteswap() 51 for i in range(self.number_of_objects): 52 if not self.keep_parsing: 53 break 54 objid, objoff, objsize = obj_array[i*4:i*4+3] 55 self._parse_object(objid, objoff, objsize) 56 for obj in self.objects.values(): 57 if not self.keep_parsing: 58 break 59 if hasattr(obj, 'initialize'): 60 obj.initialize() 61 62 def _parse_object(self, objid, objoff, objsize): 63 obj = get_object(self, self._file, objid, objoff, objsize, self.scramble_key) 64 self.objects[objid] = obj 65 if isinstance(obj, PageTree): 66 self.page_trees.append(obj) 67 elif isinstance(obj, TOCObject): 68 self.toc = obj 69 elif isinstance(obj, BookAttr): 70 self.ruby_tags = {} 71 for h in ruby_tags.values(): 72 attr = h[0] 73 if hasattr(obj, attr): 74 self.ruby_tags[attr] = getattr(obj, attr) 75 76 def __iter__(self): 77 yield from self.page_trees 78 79 def write_files(self): 80 for obj in chain(itervalues(self.image_map), itervalues(self.font_map)): 81 with open(obj.file, 'wb') as f: 82 f.write(obj.stream) 83 84 def to_xml(self, write_files=True): 85 bookinfo = '<BookInformation>\n<Info version="1.1">\n<BookInfo>\n' 86 bookinfo += '<Title reading="%s">%s</Title>\n'%(self.metadata.title_reading, self.metadata.title) 87 bookinfo += '<Author reading="%s">%s</Author>\n'%(self.metadata.author_reading, self.metadata.author) 88 bookinfo += '<BookID>%s</BookID>\n'%(self.metadata.book_id,) 89 bookinfo += '<Publisher reading="">%s</Publisher>\n'%(self.metadata.publisher,) 90 bookinfo += '<Label reading="">%s</Label>\n'%(self.metadata.label,) 91 bookinfo += '<Category reading="">%s</Category>\n'%(self.metadata.category,) 92 bookinfo += '<Classification reading="">%s</Classification>\n'%(self.metadata.classification,) 93 bookinfo += '<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n'%(self.metadata.free_text,) 94 th = self.doc_info.thumbnail 95 if th: 96 prefix = ascii_filename(self.metadata.title) 97 bookinfo += '<CThumbnail file="%s" />\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,) 98 if write_files: 99 with open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb') as f: 100 f.write(th) 101 bookinfo += '<Language reading="">%s</Language>\n'%(self.doc_info.language,) 102 bookinfo += '<Creator reading="">%s</Creator>\n'%(self.doc_info.creator,) 103 bookinfo += '<Producer reading="">%s</Producer>\n'%(self.doc_info.producer,) 104 bookinfo += '<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n%s</BookInformation>\n'%(self.doc_info.page,self.toc) 105 pages = '' 106 done_main = False 107 pt_id = -1 108 for page_tree in self: 109 if not done_main: 110 done_main = True 111 pages += '<Main>\n' 112 close = '</Main>\n' 113 pt_id = page_tree.id 114 else: 115 pages += '<PageTree objid="%d">\n'%(page_tree.id,) 116 close = '</PageTree>\n' 117 for page in page_tree: 118 pages += str(page) 119 pages += close 120 traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] 121 122 objects = '\n<Objects>\n' 123 styles = '\n<Style>\n' 124 for obj in self.objects: 125 obj = self.objects[obj] 126 if obj.id in traversed_objects: 127 continue 128 if isinstance(obj, (Font, Text, TOCObject)): 129 continue 130 if isinstance(obj, StyleObject): 131 styles += str(obj) 132 else: 133 objects += str(obj) 134 styles += '</Style>\n' 135 objects += '</Objects>\n' 136 if write_files: 137 self.write_files() 138 return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>' 139 140 141def option_parser(): 142 parser = OptionParser(usage=_('%prog book.lrf\nConvert an LRF file into an LRS (XML UTF-8 encoded) file')) 143 parser.add_option('--output', '-o', default=None, help=_('Output LRS file'), dest='out') 144 parser.add_option('--dont-output-resources', default=True, action='store_false', 145 help=_('Do not save embedded image and font files to disk'), 146 dest='output_resources') 147 parser.add_option('--verbose', default=False, action='store_true', dest='verbose', help=_('Be more verbose')) 148 return parser 149 150 151def main(args=sys.argv, logger=None): 152 parser = option_parser() 153 opts, args = parser.parse_args(args) 154 if logger is None: 155 level = logging.DEBUG if opts.verbose else logging.INFO 156 logger = logging.getLogger('lrf2lrs') 157 setup_cli_handlers(logger, level) 158 if len(args) != 2: 159 parser.print_help() 160 return 1 161 if opts.out is None: 162 opts.out = os.path.join(os.path.dirname(args[1]), os.path.splitext(os.path.basename(args[1]))[0]+".lrs") 163 logger.info(_('Parsing LRF...')) 164 d = LRFDocument(open(args[1], 'rb')) 165 d.parse() 166 logger.info(_('Creating XML...')) 167 with codecs.open(os.path.abspath(os.path.expanduser(opts.out)), 'wb', 'utf-8') as f: 168 f.write('<?xml version="1.0" encoding="UTF-8"?>\n') 169 f.write(d.to_xml(write_files=opts.output_resources)) 170 logger.info(_('LRS written to ')+opts.out) 171 return 0 172 173 174if __name__ == '__main__': 175 sys.exit(main()) 176