1# © 2018 James R. Barlow: github.com/jbarlow83
2#
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7
8from os import fspath
9from pathlib import Path
10from unittest.mock import patch
11
12import img2pdf
13import pikepdf
14import pytest
15from PIL import Image, ImageDraw
16
17from ocrmypdf import optimize as opt
18from ocrmypdf._exec import jbig2enc, pngquant
19from ocrmypdf._exec.ghostscript import rasterize_pdf
20from ocrmypdf.helpers import Resolution
21
22from .conftest import check_ocrmypdf
23
24needs_pngquant = pytest.mark.skipif(
25    not pngquant.available(), reason="pngquant not installed"
26)
27needs_jbig2enc = pytest.mark.skipif(
28    not jbig2enc.available(), reason="jbig2enc not installed"
29)
30
31
32@needs_pngquant
33@pytest.mark.parametrize('pdf', ['multipage.pdf', 'palette.pdf'])
34def test_basic(resources, pdf, outpdf):
35    infile = resources / pdf
36    opt.main(infile, outpdf, level=3)
37
38    assert 0.98 * Path(outpdf).stat().st_size <= Path(infile).stat().st_size
39
40
41@needs_pngquant
42def test_mono_not_inverted(resources, outdir):
43    infile = resources / '2400dpi.pdf'
44    opt.main(infile, outdir / 'out.pdf', level=3)
45
46    rasterize_pdf(
47        outdir / 'out.pdf',
48        outdir / 'im.png',
49        raster_device='pnggray',
50        raster_dpi=Resolution(10, 10),
51    )
52
53    with Image.open(fspath(outdir / 'im.png')) as im:
54        assert im.getpixel((0, 0)) == 255, "Expected white background"
55
56
57@needs_pngquant
58def test_jpg_png_params(resources, outpdf):
59    check_ocrmypdf(
60        resources / 'crom.png',
61        outpdf,
62        '--image-dpi',
63        '200',
64        '--optimize',
65        '3',
66        '--jpg-quality',
67        '50',
68        '--png-quality',
69        '20',
70        '--plugin',
71        'tests/plugins/tesseract_noop.py',
72    )
73
74
75@needs_jbig2enc
76@pytest.mark.parametrize('lossy', [False, True])
77def test_jbig2_lossy(lossy, resources, outpdf):
78    args = [
79        resources / 'ccitt.pdf',
80        outpdf,
81        '--image-dpi',
82        '200',
83        '--optimize',
84        3,
85        '--jpg-quality',
86        '50',
87        '--png-quality',
88        '20',
89        '--plugin',
90        'tests/plugins/tesseract_noop.py',
91    ]
92    if lossy:
93        args.append('--jbig2-lossy')
94
95    check_ocrmypdf(*args)
96
97    pdf = pikepdf.open(outpdf)
98    pim = pikepdf.PdfImage(next(iter(pdf.pages[0].images.values())))
99    assert pim.filters[0] == '/JBIG2Decode'
100
101    if lossy:
102        assert '/JBIG2Globals' in pim.decode_parms[0]
103    else:
104        assert len(pim.decode_parms) == 0
105
106
107@needs_pngquant
108@needs_jbig2enc
109def test_flate_to_jbig2(resources, outdir):
110    # This test requires an image that pngquant is capable of converting to
111    # to 1bpp - so use an existing 1bpp image, convert up, confirm it can
112    # convert down
113    with Image.open(fspath(resources / 'typewriter.png')) as im:
114        assert im.mode in ('1', 'P')
115        im = im.convert('L')
116        im.save(fspath(outdir / 'type8.png'))
117
118    check_ocrmypdf(
119        outdir / 'type8.png',
120        outdir / 'out.pdf',
121        '--image-dpi',
122        '100',
123        '--png-quality',
124        '50',
125        '--optimize',
126        '3',
127        '--plugin',
128        'tests/plugins/tesseract_noop.py',
129    )
130
131    pdf = pikepdf.open(outdir / 'out.pdf')
132    pim = pikepdf.PdfImage(next(iter(pdf.pages[0].images.values())))
133    assert pim.filters[0] == '/JBIG2Decode'
134
135
136@needs_pngquant
137def test_multiple_pngs(resources, outdir):
138    with Path.open(outdir / 'in.pdf', 'wb') as inpdf:
139        img2pdf.convert(
140            fspath(resources / 'baiona_colormapped.png'),
141            fspath(resources / 'baiona_gray.png'),
142            with_pdfrw=False,
143            outputstream=inpdf,
144        )
145
146    def mockquant(input_file, output_file, *_args):
147        with Image.open(input_file) as im:
148            draw = ImageDraw.Draw(im)
149            draw.rectangle((0, 0, im.width, im.height), fill=128)
150            im.save(output_file)
151
152    with patch('ocrmypdf.optimize.pngquant.quantize') as mock:
153        mock.side_effect = mockquant
154        check_ocrmypdf(
155            outdir / 'in.pdf',
156            outdir / 'out.pdf',
157            '--optimize',
158            '3',
159            '--jobs',
160            '1',
161            '--use-threads',
162            '--output-type',
163            'pdf',
164            '--plugin',
165            'tests/plugins/tesseract_noop.py',
166        )
167        mock.assert_called()
168
169    with pikepdf.open(outdir / 'in.pdf') as inpdf, pikepdf.open(
170        outdir / 'out.pdf'
171    ) as outpdf:
172        for n in range(len(inpdf.pages)):
173            inim = next(iter(inpdf.pages[n].images.values()))
174            outim = next(iter(outpdf.pages[n].images.values()))
175            assert len(outim.read_raw_bytes()) < len(inim.read_raw_bytes()), n
176
177
178def test_optimize_off(resources, outpdf):
179    check_ocrmypdf(
180        resources / 'trivial.pdf',
181        outpdf,
182        '--optimize=0',
183        '--output-type',
184        'pdf',
185        '--plugin',
186        'tests/plugins/tesseract_noop.py',
187    )
188
189
190def test_group3(resources, outdir):
191    with pikepdf.open(resources / 'ccitt.pdf') as pdf:
192        im = pdf.pages[0].Resources.XObject['/Im1']
193        assert (
194            opt.extract_image_filter(pdf, outdir, im, im.objgen[0]) is not None
195        ), "Group 4 should be allowed"
196
197        im.DecodeParms['/K'] = 0
198        assert (
199            opt.extract_image_filter(pdf, outdir, im, im.objgen[0]) is None
200        ), "Group 3 should be disallowed"
201