1__license__   = 'GPL v3'
2__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
3''''''
4
5import sys, array, os, re, codecs, logging
6from itertools import chain
7
8from calibre import setup_cli_handlers
9from calibre.utils.config import OptionParser
10from calibre.utils.filenames import ascii_filename
11from calibre.ebooks.lrf.meta import LRFMetaFile
12from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
13                                         Font, Text, TOCObject, BookAttr, ruby_tags
14from polyglot.builtins import itervalues
15
16
17class LRFDocument(LRFMetaFile):
18
19    class temp:
20        pass
21
22    def __init__(self, stream):
23        LRFMetaFile.__init__(self, stream)
24        self.scramble_key = self.xor_key
25        self.page_trees = []
26        self.font_map = {}
27        self.image_map = {}
28        self.toc = ''
29        self.keep_parsing = True
30
31    def parse(self):
32        self._parse_objects()
33        self.metadata = LRFDocument.temp()
34        for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id',
35                  'classification', 'free_text', 'publisher', 'label', 'category'):
36            setattr(self.metadata, a, getattr(self, a))
37        self.doc_info = LRFDocument.temp()
38        for a in ('thumbnail', 'language', 'creator', 'producer', 'page'):
39            setattr(self.doc_info, a, getattr(self, a))
40        self.doc_info.thumbnail_extension = self.thumbail_extension()
41        self.device_info = LRFDocument.temp()
42        for a in ('dpi', 'width', 'height'):
43            setattr(self.device_info, a, getattr(self, a))
44
45    def _parse_objects(self):
46        self.objects = {}
47        self._file.seek(self.object_index_offset)
48        obj_array = array.array("I", self._file.read(4*4*self.number_of_objects))
49        if ord(array.array("i",[1]).tobytes()[0:1])==0:  # big-endian
50            obj_array.byteswap()
51        for i in range(self.number_of_objects):
52            if not self.keep_parsing:
53                break
54            objid, objoff, objsize = obj_array[i*4:i*4+3]
55            self._parse_object(objid, objoff, objsize)
56        for obj in self.objects.values():
57            if not self.keep_parsing:
58                break
59            if hasattr(obj, 'initialize'):
60                obj.initialize()
61
62    def _parse_object(self, objid, objoff, objsize):
63        obj = get_object(self, self._file, objid, objoff, objsize, self.scramble_key)
64        self.objects[objid] = obj
65        if isinstance(obj, PageTree):
66            self.page_trees.append(obj)
67        elif isinstance(obj, TOCObject):
68            self.toc = obj
69        elif isinstance(obj, BookAttr):
70            self.ruby_tags = {}
71            for h in ruby_tags.values():
72                attr = h[0]
73                if hasattr(obj, attr):
74                    self.ruby_tags[attr] = getattr(obj, attr)
75
76    def __iter__(self):
77        yield from self.page_trees
78
79    def write_files(self):
80        for obj in chain(itervalues(self.image_map), itervalues(self.font_map)):
81            with open(obj.file, 'wb') as f:
82                f.write(obj.stream)
83
84    def to_xml(self, write_files=True):
85        bookinfo = '<BookInformation>\n<Info version="1.1">\n<BookInfo>\n'
86        bookinfo += '<Title reading="%s">%s</Title>\n'%(self.metadata.title_reading, self.metadata.title)
87        bookinfo += '<Author reading="%s">%s</Author>\n'%(self.metadata.author_reading, self.metadata.author)
88        bookinfo += '<BookID>%s</BookID>\n'%(self.metadata.book_id,)
89        bookinfo += '<Publisher reading="">%s</Publisher>\n'%(self.metadata.publisher,)
90        bookinfo += '<Label reading="">%s</Label>\n'%(self.metadata.label,)
91        bookinfo += '<Category reading="">%s</Category>\n'%(self.metadata.category,)
92        bookinfo += '<Classification reading="">%s</Classification>\n'%(self.metadata.classification,)
93        bookinfo += '<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n'%(self.metadata.free_text,)
94        th = self.doc_info.thumbnail
95        if th:
96            prefix = ascii_filename(self.metadata.title)
97            bookinfo += '<CThumbnail file="%s" />\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,)
98            if write_files:
99                with open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb') as f:
100                    f.write(th)
101        bookinfo += '<Language reading="">%s</Language>\n'%(self.doc_info.language,)
102        bookinfo += '<Creator reading="">%s</Creator>\n'%(self.doc_info.creator,)
103        bookinfo += '<Producer reading="">%s</Producer>\n'%(self.doc_info.producer,)
104        bookinfo += '<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n%s</BookInformation>\n'%(self.doc_info.page,self.toc)
105        pages = ''
106        done_main = False
107        pt_id = -1
108        for page_tree in self:
109            if not done_main:
110                done_main = True
111                pages += '<Main>\n'
112                close = '</Main>\n'
113                pt_id = page_tree.id
114            else:
115                pages += '<PageTree objid="%d">\n'%(page_tree.id,)
116                close = '</PageTree>\n'
117            for page in page_tree:
118                pages += str(page)
119            pages += close
120        traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
121
122        objects = '\n<Objects>\n'
123        styles  = '\n<Style>\n'
124        for obj in self.objects:
125            obj = self.objects[obj]
126            if obj.id in traversed_objects:
127                continue
128            if isinstance(obj, (Font, Text, TOCObject)):
129                continue
130            if isinstance(obj, StyleObject):
131                styles += str(obj)
132            else:
133                objects += str(obj)
134        styles += '</Style>\n'
135        objects += '</Objects>\n'
136        if write_files:
137            self.write_files()
138        return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
139
140
141def option_parser():
142    parser = OptionParser(usage=_('%prog book.lrf\nConvert an LRF file into an LRS (XML UTF-8 encoded) file'))
143    parser.add_option('--output', '-o', default=None, help=_('Output LRS file'), dest='out')
144    parser.add_option('--dont-output-resources', default=True, action='store_false',
145                      help=_('Do not save embedded image and font files to disk'),
146                      dest='output_resources')
147    parser.add_option('--verbose', default=False, action='store_true', dest='verbose', help=_('Be more verbose'))
148    return parser
149
150
151def main(args=sys.argv, logger=None):
152    parser = option_parser()
153    opts, args = parser.parse_args(args)
154    if logger is None:
155        level = logging.DEBUG if opts.verbose else logging.INFO
156        logger = logging.getLogger('lrf2lrs')
157        setup_cli_handlers(logger, level)
158    if len(args) != 2:
159        parser.print_help()
160        return 1
161    if opts.out is None:
162        opts.out = os.path.join(os.path.dirname(args[1]), os.path.splitext(os.path.basename(args[1]))[0]+".lrs")
163    logger.info(_('Parsing LRF...'))
164    d = LRFDocument(open(args[1], 'rb'))
165    d.parse()
166    logger.info(_('Creating XML...'))
167    with codecs.open(os.path.abspath(os.path.expanduser(opts.out)), 'wb', 'utf-8') as f:
168        f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
169        f.write(d.to_xml(write_files=opts.output_resources))
170    logger.info(_('LRS written to ')+opts.out)
171    return 0
172
173
174if __name__ == '__main__':
175    sys.exit(main())
176