1# -*- coding: utf-8 -*-
2
3
4__license__ = 'GPL 3'
5__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
6__docformat__ = 'restructuredtext en'
7
8'''
9Convert OEB ebook format to PDF.
10'''
11
12import glob, os
13
14from calibre.customize.conversion import (OutputFormatPlugin,
15    OptionRecommendation)
16from calibre.ptempfile import TemporaryDirectory
17from polyglot.builtins import iteritems
18
19UNITS = ('millimeter', 'centimeter', 'point', 'inch' , 'pica' , 'didot',
20        'cicero', 'devicepixel')
21
22PAPER_SIZES = ('a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'b0', 'b1',
23        'b2', 'b3', 'b4', 'b5', 'b6', 'legal', 'letter')
24
25
26class PDFOutput(OutputFormatPlugin):
27
28    name = 'PDF Output'
29    author = 'Kovid Goyal'
30    file_type = 'pdf'
31    commit_name = 'pdf_output'
32    ui_data = {'paper_sizes': PAPER_SIZES, 'units': UNITS, 'font_types': ('serif', 'sans', 'mono')}
33
34    options = {
35        OptionRecommendation(name='use_profile_size', recommended_value=False,
36            help=_('Instead of using the paper size specified in the PDF Output options,'
37                   ' use a paper size corresponding to the current output profile.'
38                   ' Useful if you want to generate a PDF for viewing on a specific device.')),
39        OptionRecommendation(name='unit', recommended_value='inch',
40            level=OptionRecommendation.LOW, short_switch='u', choices=UNITS,
41            help=_('The unit of measure for page sizes. Default is inch. Choices '
42            'are {} '
43            'Note: This does not override the unit for margins!').format(', '.join(UNITS))),
44        OptionRecommendation(name='paper_size', recommended_value='letter',
45            level=OptionRecommendation.LOW, choices=PAPER_SIZES,
46            help=_('The size of the paper. This size will be overridden when a '
47            'non default output profile is used. Default is letter. Choices '
48            'are {}').format(', '.join(PAPER_SIZES))),
49        OptionRecommendation(name='custom_size', recommended_value=None,
50            help=_('Custom size of the document. Use the form width x height '
51            'e.g. `123x321` to specify the width and height. '
52            'This overrides any specified paper-size.')),
53        OptionRecommendation(name='preserve_cover_aspect_ratio',
54            recommended_value=False,
55            help=_('Preserve the aspect ratio of the cover, instead'
56                ' of stretching it to fill the full first page of the'
57                ' generated PDF.')),
58        OptionRecommendation(name='pdf_serif_family',
59            recommended_value='Times', help=_(
60                'The font family used to render serif fonts. Will work only if the font is available system-wide.')),
61        OptionRecommendation(name='pdf_sans_family',
62            recommended_value='Helvetica', help=_(
63                'The font family used to render sans-serif fonts. Will work only if the font is available system-wide.')),
64        OptionRecommendation(name='pdf_mono_family',
65            recommended_value='Courier', help=_(
66                'The font family used to render monospace fonts. Will work only if the font is available system-wide.')),
67        OptionRecommendation(name='pdf_standard_font', choices=ui_data['font_types'],
68            recommended_value='serif', help=_(
69                'The font family used to render monospace fonts')),
70        OptionRecommendation(name='pdf_default_font_size',
71            recommended_value=20, help=_(
72                'The default font size (in pixels)')),
73        OptionRecommendation(name='pdf_mono_font_size',
74            recommended_value=16, help=_(
75                'The default font size for monospaced text (in pixels)')),
76        OptionRecommendation(name='pdf_hyphenate', recommended_value=False,
77            help=_('Break long words at the end of lines. This can give the text at the right margin a more even appearance.'
78                   ' Note that depending on the fonts used this option can break the copying of text from the PDF file.')),
79        OptionRecommendation(name='pdf_mark_links', recommended_value=False,
80            help=_('Surround all links with a red box, useful for debugging.')),
81        OptionRecommendation(name='pdf_page_numbers', recommended_value=False,
82            help=_('Add page numbers to the bottom of every page in the generated PDF file. If you '
83                   'specify a footer template, it will take precedence '
84                   'over this option.')),
85        OptionRecommendation(name='pdf_footer_template', recommended_value=None,
86            help=_('An HTML template used to generate %s on every page.'
87                   ' The strings _PAGENUM_, _TITLE_, _AUTHOR_ and _SECTION_ will be replaced by their current values.')%_('footers')),
88        OptionRecommendation(name='pdf_header_template', recommended_value=None,
89            help=_('An HTML template used to generate %s on every page.'
90                   ' The strings _PAGENUM_, _TITLE_, _AUTHOR_ and _SECTION_ will be replaced by their current values.')%_('headers')),
91        OptionRecommendation(name='pdf_add_toc', recommended_value=False,
92            help=_('Add a Table of Contents at the end of the PDF that lists page numbers. '
93                   'Useful if you want to print out the PDF. If this PDF is intended for electronic use, use the PDF Outline instead.')),
94        OptionRecommendation(name='toc_title', recommended_value=None,
95            help=_('Title for generated table of contents.')
96        ),
97
98        OptionRecommendation(name='pdf_page_margin_left', recommended_value=72.0,
99            level=OptionRecommendation.LOW,
100            help=_('The size of the left page margin, in pts. Default is 72pt.'
101                   ' Overrides the common left page margin setting.')
102        ),
103
104        OptionRecommendation(name='pdf_page_margin_top', recommended_value=72.0,
105            level=OptionRecommendation.LOW,
106            help=_('The size of the top page margin, in pts. Default is 72pt.'
107                   ' Overrides the common top page margin setting, unless set to zero.')
108        ),
109
110        OptionRecommendation(name='pdf_page_margin_right', recommended_value=72.0,
111            level=OptionRecommendation.LOW,
112            help=_('The size of the right page margin, in pts. Default is 72pt.'
113                   ' Overrides the common right page margin setting, unless set to zero.')
114        ),
115
116        OptionRecommendation(name='pdf_page_margin_bottom', recommended_value=72.0,
117            level=OptionRecommendation.LOW,
118            help=_('The size of the bottom page margin, in pts. Default is 72pt.'
119                   ' Overrides the common bottom page margin setting, unless set to zero.')
120        ),
121        OptionRecommendation(name='pdf_use_document_margins', recommended_value=False,
122            help=_('Use the page margins specified in the input document via @page CSS rules.'
123            ' This will cause the margins specified in the conversion settings to be ignored.'
124            ' If the document does not specify page margins, the conversion settings will be used as a fallback.')
125        ),
126        OptionRecommendation(name='pdf_page_number_map', recommended_value=None,
127            help=_('Adjust page numbers, as needed. Syntax is a JavaScript expression for the page number.'
128                ' For example, "if (n < 3) 0; else n - 3;", where n is current page number.')
129        ),
130        OptionRecommendation(name='uncompressed_pdf',
131            recommended_value=False, help=_(
132                'Generate an uncompressed PDF, useful for debugging.')
133        ),
134        OptionRecommendation(name='pdf_odd_even_offset', recommended_value=0.0,
135            level=OptionRecommendation.LOW,
136            help=_(
137                'Shift the text horizontally by the specified offset (in pts).'
138                ' On odd numbered pages, it is shifted to the right and on even'
139                ' numbered pages to the left. Use negative numbers for the opposite'
140                ' effect. Note that this setting is ignored on pages where the margins'
141                ' are smaller than the specified offset. Shifting is done by setting'
142                ' the PDF CropBox, not all software respects the CropBox.'
143            )
144        ),
145
146    }
147
148    def specialize_options(self, log, opts, input_fmt):
149        # Ensure Qt is setup to be used with WebEngine
150        # specialize_options is called early enough in the pipeline
151        # that hopefully no Qt application has been constructed as yet
152        from qt.webengine import QWebEngineUrlScheme
153        from qt.webengine import QWebEnginePage  # noqa
154        from calibre.gui2 import must_use_qt
155        from calibre.constants import FAKE_PROTOCOL
156        scheme = QWebEngineUrlScheme(FAKE_PROTOCOL.encode('ascii'))
157        scheme.setSyntax(QWebEngineUrlScheme.Syntax.Host)
158        scheme.setFlags(QWebEngineUrlScheme.Flag.SecureScheme)
159        QWebEngineUrlScheme.registerScheme(scheme)
160        must_use_qt()
161        self.input_fmt = input_fmt
162
163        if opts.pdf_use_document_margins:
164            # Prevent the conversion pipeline from overwriting document margins
165            opts.margin_left = opts.margin_right = opts.margin_top = opts.margin_bottom = -1
166
167    def convert(self, oeb_book, output_path, input_plugin, opts, log):
168        self.stored_page_margins = getattr(opts, '_stored_page_margins', {})
169
170        self.oeb = oeb_book
171        self.input_plugin, self.opts, self.log = input_plugin, opts, log
172        self.output_path = output_path
173        from calibre.ebooks.oeb.base import OPF, OPF2_NS
174        from lxml import etree
175        from io import BytesIO
176        package = etree.Element(OPF('package'),
177            attrib={'version': '2.0', 'unique-identifier': 'dummy'},
178            nsmap={None: OPF2_NS})
179        from calibre.ebooks.metadata.opf2 import OPF
180        self.oeb.metadata.to_opf2(package)
181        self.metadata = OPF(BytesIO(etree.tostring(package))).to_book_metadata()
182        self.cover_data = None
183
184        if input_plugin.is_image_collection:
185            log.debug('Converting input as an image collection...')
186            self.convert_images(input_plugin.get_images())
187        else:
188            log.debug('Converting input as a text based book...')
189            self.convert_text(oeb_book)
190
191    def convert_images(self, images):
192        from calibre.ebooks.pdf.image_writer import convert
193        convert(images, self.output_path, self.opts, self.metadata, self.report_progress)
194
195    def get_cover_data(self):
196        oeb = self.oeb
197        if (oeb.metadata.cover and str(oeb.metadata.cover[0]) in oeb.manifest.ids):
198            cover_id = str(oeb.metadata.cover[0])
199            item = oeb.manifest.ids[cover_id]
200            if isinstance(item.data, bytes):
201                self.cover_data = item.data
202
203    def process_fonts(self):
204        ''' Make sure all fonts are embeddable '''
205        from calibre.ebooks.oeb.base import urlnormalize
206        from calibre.utils.fonts.utils import remove_embed_restriction
207
208        processed = set()
209        for item in list(self.oeb.manifest):
210            if not hasattr(item.data, 'cssRules'):
211                continue
212            for i, rule in enumerate(item.data.cssRules):
213                if rule.type == rule.FONT_FACE_RULE:
214                    try:
215                        s = rule.style
216                        src = s.getProperty('src').propertyValue[0].uri
217                    except:
218                        continue
219                    path = item.abshref(src)
220                    ff = self.oeb.manifest.hrefs.get(urlnormalize(path), None)
221                    if ff is None:
222                        continue
223
224                    raw = nraw = ff.data
225                    if path not in processed:
226                        processed.add(path)
227                        try:
228                            nraw = remove_embed_restriction(raw)
229                        except:
230                            continue
231                        if nraw != raw:
232                            ff.data = nraw
233                            self.oeb.container.write(path, nraw)
234
235    def convert_text(self, oeb_book):
236        import json
237        from calibre.ebooks.pdf.html_writer import convert
238        self.get_cover_data()
239        self.process_fonts()
240
241        if self.opts.pdf_use_document_margins and self.stored_page_margins:
242            for href, margins in iteritems(self.stored_page_margins):
243                item = oeb_book.manifest.hrefs.get(href)
244                if item is not None:
245                    root = item.data
246                    if hasattr(root, 'xpath') and margins:
247                        root.set('data-calibre-pdf-output-page-margins', json.dumps(margins))
248
249        with TemporaryDirectory('_pdf_out') as oeb_dir:
250            from calibre.customize.ui import plugin_for_output_format
251            oeb_dir = os.path.realpath(oeb_dir)
252            oeb_output = plugin_for_output_format('oeb')
253            oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log)
254            opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
255            convert(
256                opfpath, self.opts, metadata=self.metadata, output_path=self.output_path,
257                log=self.log, cover_data=self.cover_data, report_progress=self.report_progress
258            )
259