1# -*- coding: utf-8 -*-
2
3
4'''
5Write content to ereader pdb file.
6'''
7
8__license__   = 'GPL v3'
9__copyright__ = '2009, John Schember <john@nachtimwald.com>'
10__docformat__ = 'restructuredtext en'
11
12import io
13import re
14import struct
15import zlib
16
17from PIL import Image
18
19from calibre.ebooks.pdb.formatwriter import FormatWriter
20from calibre.ebooks.pdb.header import PdbHeaderBuilder
21from calibre.ebooks.pml.pmlml import PMLMLizer
22from polyglot.builtins import as_bytes
23
24IDENTITY = 'PNRdPPrs'
25
26# This is an arbitrary number that is small enough to work. The actual maximum
27# record size is unknown.
28MAX_RECORD_SIZE = 8192
29
30
31class Writer(FormatWriter):
32
33    def __init__(self, opts, log):
34        self.opts = opts
35        self.log = log
36
37    def write_content(self, oeb_book, out_stream, metadata=None):
38        pmlmlizer = PMLMLizer(self.log)
39        pml = str(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
40
41        text, text_sizes = self._text(pml)
42        chapter_index = self._index_item(br'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml)
43        chapter_index += self._index_item(br'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
44        chapter_index += self._index_item(br'(?s)\\x(?P<text>.+?)\\x', pml)
45        link_index = self._index_item(br'(?s)\\Q="(?P<text>.+?)"', pml)
46        images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
47        metadata = [self._metadata(metadata)]
48        hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
49
50        '''
51        Record order as generated by Dropbook.
52            1. eReader Header
53            2. Compressed text
54            3. Small font page index
55            4. Large font page index
56            5. Chapter index
57            6. Links index
58            7. Images
59            8. (Extrapolation: there should be one more record type here though yet uncovered what it might be).
60            9. Metadata
61           10. Sidebar records
62           11. Footnote records
63           12. Text block size record
64           13. "MeTaInFo\x00" word record
65        '''
66        sections = hr+text+chapter_index+link_index+images+metadata+[text_sizes]+[b'MeTaInFo\x00']
67
68        lengths = [len(i) if i not in images else len(i[0]) + len(i[1]) for i in sections]
69
70        pdbHeaderBuilder = PdbHeaderBuilder(IDENTITY, metadata[0].partition(b'\x00')[0])
71        pdbHeaderBuilder.build_header(lengths, out_stream)
72
73        for item in sections:
74            if item in images:
75                out_stream.write(item[0])
76                out_stream.write(item[1])
77            else:
78                out_stream.write(item)
79
80    def _text(self, pml):
81        pml_pages = []
82        text_sizes = b''
83        index = 0
84        while index < len(pml):
85            '''
86            Split on the space character closest to MAX_RECORD_SIZE when possible.
87            '''
88            split = pml.rfind(b' ', index, MAX_RECORD_SIZE)
89            if split == -1:
90                len_end = len(pml[index:])
91                if len_end > MAX_RECORD_SIZE:
92                    split = MAX_RECORD_SIZE
93                else:
94                    split = len_end
95            if split == 0:
96                split = 1
97            pml_pages.append(zlib.compress(pml[index:index+split]))
98            text_sizes += struct.pack('>H', split)
99            index += split
100
101        return pml_pages, text_sizes
102
103    def _index_item(self, regex, pml):
104        index = []
105        for mo in re.finditer(regex, pml):
106            item = b''
107            if 'text' in mo.groupdict().keys():
108                item += struct.pack('>L', mo.start())
109                text = mo.group('text')
110                # Strip all PML tags from text
111                text = re.sub(br'\\U[0-9a-z]{4}', '', text)
112                text = re.sub(br'\\a\d{3}', '', text)
113                text = re.sub(br'\\.', '', text)
114                # Add appropriate spacing to denote the various levels of headings
115                if 'val' in mo.groupdict().keys():
116                    text = b'%s%s' % (b' ' * 4 * int(mo.group('val')), text)
117                item += text
118                item += b'\x00'
119            if item:
120                index.append(item)
121        return index
122
123    def _images(self, manifest, image_hrefs):
124        '''
125        Image format.
126
127        0-4   : 'PNG '. There must be a space after PNG.
128        4-36  : Image name. Must be exactly 32 bytes long. Pad with \x00 for names shorter than 32 bytes
129        36-58 : Unknown.
130        58-60 : Width.
131        60-62 : Height.
132        62-...: Raw image data in 8 bit PNG format.
133        '''
134        images = []
135        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
136
137        for item in manifest:
138            if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
139                try:
140                    im = Image.open(io.BytesIO(item.data)).convert('P')
141                    im.thumbnail((300,300), Image.ANTIALIAS)
142
143                    data = io.BytesIO()
144                    im.save(data, 'PNG')
145                    data = data.getvalue()
146                    href = as_bytes(image_hrefs[item.href])
147
148                    header = b'PNG '
149                    header += href.ljust(32, b'\x00')[:32]
150                    header = header.ljust(58, b'\x00')
151                    header += struct.pack('>HH', im.size[0], im.size[1])
152                    header = header.ljust(62, b'\x00')
153
154                    if len(data) + len(header) < 65505:
155                        images.append((header, data))
156                except Exception as e:
157                    self.log.error('Error: Could not include file %s because '
158                        '%s.' % (item.href, e))
159
160        return images
161
162    def _metadata(self, metadata):
163        '''
164        Metadata takes the form:
165        title\x00
166        author\x00
167        copyright\x00
168        publisher\x00
169        isbn\x00
170        '''
171
172        title = _('Unknown')
173        author = _('Unknown')
174        copyright = ''
175        publisher = ''
176        isbn = ''
177
178        if metadata:
179            if len(metadata.title) >= 1:
180                title = metadata.title[0].value
181            if len(metadata.creator) >= 1:
182                from calibre.ebooks.metadata import authors_to_string
183                author = authors_to_string([x.value for x in metadata.creator])
184            if len(metadata.rights) >= 1:
185                copyright = metadata.rights[0].value
186            if len(metadata.publisher) >= 1:
187                publisher = metadata.publisher[0].value
188
189        return as_bytes('%s\x00%s\x00%s\x00%s\x00%s\x00' % (title, author, copyright, publisher, isbn))
190
191    def _header_record(self, text_count, chapter_count, link_count, image_count):
192        '''
193        text_count = the number of text pages
194        image_count = the number of images
195        '''
196        compression = 10  # zlib compression.
197        non_text_offset = text_count + 1
198
199        chapter_offset = non_text_offset
200        link_offset = chapter_offset + chapter_count
201
202        if image_count > 0:
203            image_data_offset = link_offset + link_count
204            meta_data_offset = image_data_offset + image_count
205            last_data_offset = meta_data_offset + 1
206        else:
207            meta_data_offset = link_offset + link_count
208            last_data_offset = meta_data_offset + 1
209            image_data_offset = last_data_offset
210
211        if chapter_count == 0:
212            chapter_offset = last_data_offset
213        if link_count == 0:
214            link_offset = last_data_offset
215
216        record = b''
217
218        record += struct.pack('>H', compression)            # [0:2]    # Compression. Specifies compression and drm. 2 = palmdoc, 10 = zlib. 260 and 272 = DRM
219        record += struct.pack('>H', 0)                      # [2:4]    # Unknown.
220        record += struct.pack('>H', 0)                      # [4:6]    # Unknown.
221        record += struct.pack('>H', 25152)                  # [6:8]    # 25152 is MAGIC. Somehow represents the cp1252 encoding of the text
222        record += struct.pack('>H', 0)                      # [8:10]   # Number of small font pages. 0 if page index is not built.
223        record += struct.pack('>H', 0)                      # [10:12]  # Number of large font pages. 0 if page index is not built.
224        record += struct.pack('>H', non_text_offset)        # [12:14]  # Non-Text record start.
225        record += struct.pack('>H', chapter_count)          # [14:16]  # Number of chapter index records.
226        record += struct.pack('>H', 0)                      # [16:18]  # Number of small font page index records.
227        record += struct.pack('>H', 0)                      # [18:20]  # Number of large font page index records.
228        record += struct.pack('>H', image_count)            # [20:22]  # Number of images.
229        record += struct.pack('>H', link_count)             # [22:24]  # Number of links.
230        record += struct.pack('>H', 1)                      # [24:26]  # 1 if has metadata, 0 if not.
231        record += struct.pack('>H', 0)                      # [26:28]  # Unknown.
232        record += struct.pack('>H', 0)                      # [28:30]  # Number of Footnotes.
233        record += struct.pack('>H', 0)                      # [30:32]  # Number of Sidebars.
234        record += struct.pack('>H', chapter_offset)         # [32:34]  # Chapter index offset.
235        record += struct.pack('>H', 2560)                   # [34:36]  # 2560 is MAGIC.
236        record += struct.pack('>H', last_data_offset)       # [36:38]  # Small font page offset. This will be the last data offset if there are none.
237        record += struct.pack('>H', last_data_offset)       # [38:40]  # Large font page offset. This will be the last data offset if there are none.
238        record += struct.pack('>H', image_data_offset)      # [40:42]  # Image offset. This will be the last data offset if there are none.
239        record += struct.pack('>H', link_offset)            # [42:44]  # Links offset. This will be the last data offset if there are none.
240        record += struct.pack('>H', meta_data_offset)       # [44:46]  # Metadata offset. This will be the last data offset if there are none.
241        record += struct.pack('>H', 0)                      # [46:48]  # Unknown.
242        record += struct.pack('>H', last_data_offset)       # [48:50]  # Footnote offset. This will be the last data offset if there are none.
243        record += struct.pack('>H', last_data_offset)       # [50:52]  # Sidebar offset. This will be the last data offset if there are none.
244        record += struct.pack('>H', last_data_offset)       # [52:54]  # Last data offset.
245
246        for i in range(54, 132, 2):
247            record += struct.pack('>H', 0)                  # [54:132]
248
249        return record
250