1 /*
2  * This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5  *
6  * Copyright (C) 2017, James R. Barlow (https://github.com/jbarlow83/)
7  */
8 
9 #include <sstream>
10 #include <type_traits>
11 #include <cerrno>
12 #include <cstring>
13 
14 #include "pikepdf.h"
15 
16 #include <qpdf/QPDFExc.hh>
17 #include <qpdf/QPDFSystemError.hh>
18 #include <qpdf/QPDFObjGen.hh>
19 #include <qpdf/QPDFXRefEntry.hh>
20 #include <qpdf/Buffer.hh>
21 #include <qpdf/BufferInputSource.hh>
22 #include <qpdf/QPDFWriter.hh>
23 #include <qpdf/QPDFPageDocumentHelper.hh>
24 #include <qpdf/Pl_Discard.hh>
25 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
26 
27 #include <pybind11/stl.h>
28 #include <pybind11/iostream.h>
29 #include <pybind11/buffer_info.h>
30 
31 #include "qpdf_pagelist.h"
32 #include "qpdf_inputsource-inl.h"
33 #include "mmap_inputsource-inl.h"
34 #include "pipeline.h"
35 #include "utils.h"
36 #include "gsl.h"
37 
38 extern bool MMAP_DEFAULT;
39 
40 enum access_mode_e { access_default, access_stream, access_mmap, access_mmap_only };
41 
check_stream_is_usable(py::object stream)42 void check_stream_is_usable(py::object stream)
43 {
44     auto TextIOBase = py::module_::import("io").attr("TextIOBase");
45 
46     if (py::isinstance(stream, TextIOBase)) {
47         throw py::type_error("stream must be binary (no transcoding) and seekable");
48     }
49 }
50 
qpdf_basic_settings(QPDF & q)51 void qpdf_basic_settings(QPDF &q)
52 {
53     q.setSuppressWarnings(true);
54     q.setImmediateCopyFrom(true);
55 }
56 
open_pdf(py::object filename_or_stream,std::string password,bool hex_password=false,bool ignore_xref_streams=false,bool suppress_warnings=true,bool attempt_recovery=true,bool inherit_page_attributes=true,access_mode_e access_mode=access_mode_e::access_default)57 std::shared_ptr<QPDF> open_pdf(py::object filename_or_stream,
58     std::string password,
59     bool hex_password            = false,
60     bool ignore_xref_streams     = false,
61     bool suppress_warnings       = true,
62     bool attempt_recovery        = true,
63     bool inherit_page_attributes = true,
64     access_mode_e access_mode    = access_mode_e::access_default)
65 {
66     auto q = std::make_shared<QPDF>();
67 
68     qpdf_basic_settings(*q);
69     q->setSuppressWarnings(suppress_warnings);
70     q->setPasswordIsHexKey(hex_password);
71     q->setIgnoreXRefStreams(ignore_xref_streams);
72     q->setAttemptRecovery(attempt_recovery);
73 
74     py::object stream;
75     bool closing_stream;
76     std::string description;
77 
78     if (py::hasattr(filename_or_stream, "read") &&
79         py::hasattr(filename_or_stream, "seek")) {
80         // Python code gave us an object with a stream interface
81         stream = filename_or_stream;
82         check_stream_is_usable(stream);
83         closing_stream = false;
84         description    = py::repr(stream);
85     } else {
86         if (py::isinstance<py::int_>(filename_or_stream))
87             throw py::type_error("expected str, bytes or os.PathLike object");
88         auto filename  = fspath(filename_or_stream);
89         auto io_open   = py::module_::import("io").attr("open");
90         stream         = io_open(filename, "rb");
91         closing_stream = true;
92         description    = py::str(filename);
93     }
94 
95     bool success = false;
96     if (access_mode == access_default)
97         access_mode = MMAP_DEFAULT ? access_mmap : access_stream;
98 
99     if (access_mode == access_mmap || access_mode == access_mmap_only) {
100         try {
101             py::gil_scoped_release release;
102             auto mmap_input_source =
103                 std::make_unique<MmapInputSource>(stream, description, closing_stream);
104             auto input_source = PointerHolder<InputSource>(mmap_input_source.release());
105             q->processInputSource(input_source, password.c_str());
106             success = true;
107         } catch (const py::error_already_set &e) {
108             if (access_mode == access_mmap) {
109                 // Prepare to fallback to stream access
110                 stream.attr("seek")(0);
111                 access_mode = access_stream;
112             } else {
113                 throw;
114             }
115         }
116     }
117 
118     if (!success && access_mode == access_stream) {
119         py::gil_scoped_release release;
120         auto stream_input_source = std::make_unique<PythonStreamInputSource>(
121             stream, description, closing_stream);
122         auto input_source = PointerHolder<InputSource>(stream_input_source.release());
123         q->processInputSource(input_source, password.c_str());
124         success = true;
125     }
126 
127     if (!success) {
128         // LCOV_EXCL_LINE
129         throw std::logic_error(
130             "open_pdf: should have succeeded or thrown a Python exception");
131     }
132 
133     if (inherit_page_attributes) {
134         // This could be expensive for a large file, plausibly (not tested),
135         // so release the GIL again.
136         py::gil_scoped_release release;
137         q->pushInheritedAttributesToPage();
138     }
139 
140     return q;
141 }
142 
143 class PikeProgressReporter : public QPDFWriter::ProgressReporter {
144 public:
PikeProgressReporter(py::function callback)145     PikeProgressReporter(py::function callback) { this->callback = callback; }
146 
147     virtual ~PikeProgressReporter() = default;
148 
reportProgress(int percent)149     virtual void reportProgress(int percent) override
150     {
151         py::gil_scoped_acquire acquire;
152         this->callback(percent);
153     }
154 
155 private:
156     py::function callback;
157 };
158 
update_xmp_pdfversion(QPDF & q,std::string version)159 void update_xmp_pdfversion(QPDF &q, std::string version)
160 {
161     auto impl =
162         py::module_::import("pikepdf._cpphelpers").attr("update_xmp_pdfversion");
163     auto pypdf = py::cast(q);
164     impl(pypdf, version);
165 }
166 
setup_encryption(QPDFWriter & w,py::object encryption)167 void setup_encryption(QPDFWriter &w, py::object encryption)
168 {
169     std::string owner;
170     std::string user;
171 
172     bool aes      = true;
173     bool metadata = true;
174     std::map<std::string, bool> allow;
175     int encryption_level = 6;
176 
177     if (encryption.contains("R")) {
178         if (!py::isinstance<py::int_>(encryption["R"]))
179             throw py::type_error("Encryption level 'R' must be an integer");
180         encryption_level = py::int_(encryption["R"]);
181     }
182     if (encryption_level < 2 || encryption_level > 6)
183         throw py::value_error("Invalid encryption level: must be 2, 3, 4 or 6");
184 
185     if (encryption_level == 5) {
186         auto warn = py::module_::import("warnings").attr("warn");
187         warn("Encryption R=5 is deprecated");
188     }
189 
190     if (encryption.contains("owner")) {
191         if (encryption_level <= 4) {
192             auto success =
193                 QUtil::utf8_to_pdf_doc(encryption["owner"].cast<std::string>(), owner);
194             if (!success)
195                 throw py::value_error("Encryption level is R3/R4 and password is not "
196                                       "encodable as PDFDocEncoding");
197         } else {
198             owner = encryption["owner"].cast<std::string>();
199         }
200     }
201     if (encryption.contains("user")) {
202         if (encryption_level <= 4) {
203             auto success =
204                 QUtil::utf8_to_pdf_doc(encryption["user"].cast<std::string>(), user);
205             if (!success)
206                 throw py::value_error("Encryption level is R3/R4 and password is not "
207                                       "encodable as PDFDocEncoding");
208         } else {
209             user = encryption["user"].cast<std::string>();
210         }
211     }
212     if (encryption.contains("allow")) {
213         auto pyallow               = encryption["allow"];
214         allow["accessibility"]     = pyallow.attr("accessibility").cast<bool>();
215         allow["extract"]           = pyallow.attr("extract").cast<bool>();
216         allow["modify_assembly"]   = pyallow.attr("modify_assembly").cast<bool>();
217         allow["modify_annotation"] = pyallow.attr("modify_annotation").cast<bool>();
218         allow["modify_form"]       = pyallow.attr("modify_form").cast<bool>();
219         allow["modify_other"]      = pyallow.attr("modify_other").cast<bool>();
220         allow["print_lowres"]      = pyallow.attr("print_lowres").cast<bool>();
221         allow["print_highres"]     = pyallow.attr("print_highres").cast<bool>();
222     }
223     if (encryption.contains("aes")) {
224         if (py::isinstance<py::bool_>(encryption["aes"]))
225             aes = py::bool_(encryption["aes"]);
226         else
227             throw py::type_error("aes must be bool");
228     } else {
229         aes = (encryption_level >= 4);
230     }
231     if (encryption.contains("metadata")) {
232         if (py::isinstance<py::bool_>(encryption["metadata"]))
233             metadata = py::bool_(encryption["metadata"]);
234         else
235             throw py::type_error("metadata must be bool");
236     } else {
237         metadata = (encryption_level >= 4);
238     }
239 
240     if (metadata && encryption_level < 4) {
241         throw py::value_error("Cannot encrypt metadata when R < 4");
242     }
243     if (aes && encryption_level < 4) {
244         throw py::value_error("Cannot encrypt with AES when R < 4");
245     }
246     if (encryption_level == 6 && !aes) {
247         throw py::value_error("When R = 6, AES encryption must be enabled");
248     }
249     if (metadata && !aes) {
250         throw py::value_error(
251             "Cannot encrypt metadata unless AES encryption is enabled");
252     }
253 
254     qpdf_r3_print_e print;
255     if (allow["print_highres"])
256         print = qpdf_r3p_full;
257     else if (allow["print_lowres"])
258         print = qpdf_r3p_low;
259     else
260         print = qpdf_r3p_none;
261 
262     if (encryption_level == 6) {
263         w.setR6EncryptionParameters(user.c_str(),
264             owner.c_str(),
265             allow["accessibility"],
266             allow["extract"],
267             allow["modify_assembly"],
268             allow["modify_annotation"],
269             allow["modify_form"],
270             allow["modify_other"],
271             print,
272             metadata);
273     } else if (encryption_level == 5) {
274         // TODO WARNING
275         w.setR5EncryptionParameters(user.c_str(),
276             owner.c_str(),
277             allow["accessibility"],
278             allow["extract"],
279             allow["modify_assembly"],
280             allow["modify_annotation"],
281             allow["modify_form"],
282             allow["modify_other"],
283             print,
284             metadata);
285     } else if (encryption_level == 4) {
286         w.setR4EncryptionParameters(user.c_str(),
287             owner.c_str(),
288             allow["accessibility"],
289             allow["extract"],
290             allow["modify_assembly"],
291             allow["modify_annotation"],
292             allow["modify_form"],
293             allow["modify_other"],
294             print,
295             metadata,
296             aes);
297     } else if (encryption_level == 3) {
298         w.setR3EncryptionParameters(user.c_str(),
299             owner.c_str(),
300             allow["accessibility"],
301             allow["extract"],
302             allow["modify_assembly"],
303             allow["modify_annotation"],
304             allow["modify_form"],
305             allow["modify_other"],
306             print);
307     } else if (encryption_level == 2) {
308         w.setR2EncryptionParameters(user.c_str(),
309             owner.c_str(),
310             (print != qpdf_r3p_none),
311             allow["modify_assembly"],
312             allow["extract"],
313             allow["modify_annotation"]);
314     }
315 }
316 
317 typedef std::pair<std::string, int> pdf_version_extension;
318 
get_version_extension(py::object ver_ext)319 pdf_version_extension get_version_extension(py::object ver_ext)
320 {
321     std::string version = "";
322     int extension       = 0;
323     try {
324         version   = ver_ext.cast<std::string>();
325         extension = 0;
326     } catch (const py::cast_error &) {
327         try {
328             auto version_ext = ver_ext.cast<pdf_version_extension>();
329             version          = version_ext.first;
330             extension        = version_ext.second;
331         } catch (const py::cast_error &) {
332             throw py::type_error("PDF version must be a tuple: (str, int)");
333         }
334     }
335     return pdf_version_extension(version, extension);
336 }
337 
save_pdf(QPDF & q,py::object filename_or_stream,bool static_id=false,bool preserve_pdfa=true,py::object min_version=py::none (),py::object force_version=py::none (),bool fix_metadata_version=true,bool compress_streams=true,py::object stream_decode_level=py::none (),qpdf_object_stream_e object_stream_mode=qpdf_o_preserve,bool normalize_content=false,bool linearize=false,bool qdf=false,py::object progress=py::none (),py::object encryption=py::none (),bool samefile_check=true,bool recompress_flate=false)338 void save_pdf(QPDF &q,
339     py::object filename_or_stream,
340     bool static_id                          = false,
341     bool preserve_pdfa                      = true,
342     py::object min_version                  = py::none(),
343     py::object force_version                = py::none(),
344     bool fix_metadata_version               = true,
345     bool compress_streams                   = true,
346     py::object stream_decode_level          = py::none(),
347     qpdf_object_stream_e object_stream_mode = qpdf_o_preserve,
348     bool normalize_content                  = false,
349     bool linearize                          = false,
350     bool qdf                                = false,
351     py::object progress                     = py::none(),
352     py::object encryption                   = py::none(),
353     bool samefile_check                     = true,
354     bool recompress_flate                   = false)
355 {
356     std::string description;
357     QPDFWriter w(q);
358 
359     if (static_id) {
360         w.setStaticID(true);
361     }
362     w.setNewlineBeforeEndstream(preserve_pdfa);
363 
364     if (!min_version.is_none()) {
365         auto version_ext = get_version_extension(min_version);
366         w.setMinimumPDFVersion(version_ext.first, version_ext.second);
367     }
368     w.setCompressStreams(compress_streams);
369     if (!stream_decode_level.is_none()) {
370         // Unconditionally calling setDecodeLevel has side effects, disabling
371         // preserve encryption in particular
372         w.setDecodeLevel(stream_decode_level.cast<qpdf_stream_decode_level_e>());
373     }
374     w.setObjectStreamMode(object_stream_mode);
375     w.setRecompressFlate(recompress_flate);
376 
377     py::object stream;
378     bool should_close_stream = false;
379     auto close_stream        = gsl::finally([&stream, &should_close_stream] {
380         if (should_close_stream && !stream.is_none() && py::hasattr(stream, "close"))
381             stream.attr("close")();
382     });
383 
384     if (py::hasattr(filename_or_stream, "write") &&
385         py::hasattr(filename_or_stream, "seek")) {
386         // Python code gave us an object with a stream interface
387         stream = filename_or_stream;
388         check_stream_is_usable(stream);
389         description = py::repr(stream);
390     } else {
391         if (py::isinstance<py::int_>(filename_or_stream))
392             throw py::type_error("expected str, bytes or os.PathLike object");
393         py::object output_filename = fspath(filename_or_stream);
394         if (samefile_check) {
395             auto input_filename = q.getFilename();
396 
397             py::object ospath   = py::module_::import("os").attr("path");
398             py::object samefile = ospath.attr("samefile");
399             try {
400                 if (samefile(output_filename, input_filename).cast<bool>()) {
401                     throw py::value_error(
402                         "Cannot overwrite input file. Open the file with "
403                         "pikepdf.open(..., allow_overwriting_input=True) to "
404                         "allow overwriting the input file.");
405                 }
406             } catch (const py::error_already_set &e) {
407                 // We expect FileNotFoundError if filename refers to a file that does
408                 // not exist, or if q.getFilename indicates a memory file. Suppress
409                 // that, and rethrow all others.
410                 if (!e.matches(PyExc_FileNotFoundError))
411                     throw;
412             }
413         }
414         stream = py::module_::import("io").attr("open")(output_filename, "wb");
415         should_close_stream = true;
416         description         = py::str(output_filename);
417     }
418 
419     // We must set up the output pipeline before we configure encryption
420     Pl_PythonOutput output_pipe(description.c_str(), stream);
421     w.setOutputPipeline(&output_pipe);
422 
423     if (encryption.is(py::bool_(true)) && !q.isEncrypted()) {
424         throw py::value_error(
425             "can't perserve encryption parameters on a file with no encryption");
426     }
427 
428     if ((encryption.is(py::bool_(true)) || py::isinstance<py::dict>(encryption)) &&
429         (normalize_content || !stream_decode_level.is_none())) {
430         throw py::value_error(
431             "cannot save with encryption and normalize_content or stream_decode_level");
432     }
433 
434     if (encryption.is(py::bool_(true))) {
435         w.setPreserveEncryption(true); // Keep existing encryption
436     } else if (encryption.is_none() || encryption.is(py::bool_(false))) {
437         w.setPreserveEncryption(false); // Remove encryption
438     } else {
439         setup_encryption(w, encryption);
440     }
441 
442     if (normalize_content && linearize) {
443         throw py::value_error("cannot save with both normalize_content and linearize");
444     }
445     w.setContentNormalization(normalize_content);
446     w.setLinearization(linearize);
447     w.setQDFMode(qdf);
448 
449     if (!force_version.is_none()) {
450         auto version_ext = get_version_extension(force_version);
451         w.forcePDFVersion(version_ext.first, version_ext.second);
452     }
453     if (fix_metadata_version) {
454         update_xmp_pdfversion(q, w.getFinalVersion());
455     }
456 
457     if (!progress.is_none()) {
458         auto reporter = PointerHolder<QPDFWriter::ProgressReporter>(
459             new PikeProgressReporter(progress));
460         w.registerProgressReporter(reporter);
461     }
462 
463     w.write();
464 }
465 
init_qpdf(py::module_ & m)466 void init_qpdf(py::module_ &m)
467 {
468     py::enum_<qpdf_object_stream_e>(m, "ObjectStreamMode")
469         .value("disable", qpdf_object_stream_e::qpdf_o_disable)
470         .value("preserve", qpdf_object_stream_e::qpdf_o_preserve)
471         .value("generate", qpdf_object_stream_e::qpdf_o_generate);
472 
473     py::enum_<qpdf_stream_decode_level_e>(m, "StreamDecodeLevel")
474         .value("none", qpdf_stream_decode_level_e::qpdf_dl_none)
475         .value("generalized", qpdf_stream_decode_level_e::qpdf_dl_generalized)
476         .value("specialized", qpdf_stream_decode_level_e::qpdf_dl_specialized)
477         .value("all", qpdf_stream_decode_level_e::qpdf_dl_all);
478 
479     py::enum_<QPDF::encryption_method_e>(m, "EncryptionMethod")
480         .value("none", QPDF::encryption_method_e::e_none)
481         .value("unknown", QPDF::encryption_method_e::e_unknown)
482         .value("rc4", QPDF::encryption_method_e::e_rc4)
483         .value("aes", QPDF::encryption_method_e::e_aes)
484         .value("aesv3", QPDF::encryption_method_e::e_aesv3);
485 
486     py::enum_<access_mode_e>(m, "AccessMode")
487         .value("default", access_mode_e::access_default)
488         .value("stream", access_mode_e::access_stream)
489         .value("mmap", access_mode_e::access_mmap)
490         .value("mmap_only", access_mode_e::access_mmap_only);
491 
492     py::class_<QPDF, std::shared_ptr<QPDF>>(
493         m, "Pdf", "In-memory representation of a PDF", py::dynamic_attr())
494         .def_static(
495             "new",
496             []() {
497                 auto q = std::make_shared<QPDF>();
498                 q->emptyPDF();
499                 qpdf_basic_settings(*q);
500                 return q;
501             },
502             "Create a new empty PDF from stratch.")
503         .def_static("_open",
504             open_pdf,
505             "",
506             py::arg("filename_or_stream"),
507             py::arg("password")                = "",
508             py::arg("hex_password")            = false,
509             py::arg("ignore_xref_streams")     = false,
510             py::arg("suppress_warnings")       = true,
511             py::arg("attempt_recovery")        = true,
512             py::arg("inherit_page_attributes") = true,
513             py::arg("access_mode")             = access_mode_e::access_default)
514         .def("__repr__",
515             [](QPDF &q) {
516                 return std::string("<pikepdf.Pdf description='") + q.getFilename() +
517                        std::string("'>");
518             })
519         .def_property_readonly("filename",
520             &QPDF::getFilename,
521             "The source filename of an existing PDF, when available.")
522         .def_property_readonly("pdf_version",
523             &QPDF::getPDFVersion,
524             "The version of the PDF specification used for this file, such as '1.7'.")
525         .def_property_readonly("extension_level", &QPDF::getExtensionLevel)
526         .def_property_readonly("Root", &QPDF::getRoot, "The /Root object of the PDF.")
527         .def_property_readonly("trailer",
528             &QPDF::getTrailer,
529             R"~~~(
530             Provides access to the PDF trailer object.
531 
532             See |pdfrm| section 7.5.5. Generally speaking,
533             the trailer should not be modified with pikepdf, and modifying it
534             may not work. Some of the values in the trailer are automatically
535             changed when a file is saved.
536             )~~~")
537         .def_property_readonly(
538             "pages",
539             [](std::shared_ptr<QPDF> q) { return PageList(q); },
540             R"~~~(
541             Returns the list of pages.
542 
543             Return type:
544                 pikepdf._qpdf.PageList
545             )~~~",
546             py::return_value_policy::reference_internal)
547         .def_property_readonly("_pages", &QPDF::getAllPages)
548         .def_property_readonly("is_encrypted",
549             &QPDF::isEncrypted,
550             R"~~~(
551             Returns True if the PDF is encrypted.
552 
553             For information about the nature of the encryption, see
554             :attr:`Pdf.encryption`.
555             )~~~")
556         .def_property_readonly("is_linearized",
557             &QPDF::isLinearized,
558             R"~~~(
559             Returns True if the PDF is linearized.
560 
561             Specifically returns True iff the file starts with a linearization
562             parameter dictionary.  Does no additional validation.
563             )~~~")
564         .def(
565             "check_linearization",
566             [](QPDF &q, py::object stream) {
567                 py::scoped_estream_redirect redirector(std::cerr, stream);
568                 return q.checkLinearization();
569             },
570             R"~~~(
571             Reports information on the PDF's linearization.
572 
573             Args:
574                 stream: A stream to write this information too; must
575                     implement ``.write()`` and ``.flush()`` method. Defaults to
576                     :data:`sys.stderr`.
577 
578             Returns:
579                 ``True`` if the file is correctly linearized, and ``False`` if
580                 the file is linearized but the linearization data contains errors
581                 or was incorrectly generated.
582 
583             Raises:
584                 RuntimeError: If the PDF in question is not linearized at all.
585             )~~~",
586             py::arg_v(
587                 "stream", py::module_::import("sys").attr("stderr"), "sys.stderr"))
588         .def("get_warnings", // this is a def because it modifies state by clearing
589                              // warnings
590             [](QPDF &q) {
591                 py::list warnings;
592                 for (auto w : q.getWarnings()) {
593                     warnings.append(w.what());
594                 }
595                 return warnings;
596             })
597         .def("show_xref_table",
598             &QPDF::showXRefTable,
599             R"~~~(
600             Pretty-print the Pdf's xref (cross-reference table)
601             )~~~",
602             py::call_guard<py::scoped_ostream_redirect>())
603         .def(
604             "_add_page",
605             [](QPDF &q, QPDFObjectHandle &page, bool first = false) {
606                 q.addPage(page, first);
607             },
608             R"~~~(
609             Attach a page to this PDF.
610 
611             The page can be either be a newly constructed PDF object or it can
612             be obtained from another PDF.
613 
614             Args:
615                 page (pikepdf.Object): The page object to attach
616                 first (bool): If True, prepend this before the first page; if False append after last page
617             )~~~",
618             py::arg("page"),
619             py::arg("first") = false,
620             py::keep_alive<1, 2>())
621         .def("_add_page_at", &QPDF::addPageAt, py::keep_alive<1, 2>())
622         .def("_remove_page", &QPDF::removePage)
623         .def(
624             "remove_unreferenced_resources",
625             [](QPDF &q) {
626                 QPDFPageDocumentHelper helper(q);
627                 helper.removeUnreferencedResources();
628             },
629             R"~~~(
630             Remove from /Resources of each page any object not referenced in page's contents
631 
632             PDF pages may share resource dictionaries with other pages. If
633             pikepdf is used for page splitting, pages may reference resources
634             in their /Resources dictionary that are not actually required.
635             This purges all unnecessary resource entries.
636 
637             For clarity, if all references to any type of object are removed, that
638             object will be excluded from the output PDF on save. (Conversely, only
639             objects that are discoverable from the PDF's root object are included.)
640             This function removes objects that are referenced from the page /Resources
641             dictionary, but never called for in the content stream, making them
642             unnecessary.
643 
644             Suggested before saving, if content streams or /Resources dictionaries
645             are edited.
646             )~~~")
647         .def("_save",
648             save_pdf,
649             "",
650             py::arg("filename"),
651             py::arg("static_id")            = false,
652             py::arg("preserve_pdfa")        = true,
653             py::arg("min_version")          = "",
654             py::arg("force_version")        = "",
655             py::arg("fix_metadata_version") = true,
656             py::arg("compress_streams")     = true,
657             py::arg("stream_decode_level")  = py::none(),
658             py::arg("object_stream_mode")   = qpdf_object_stream_e::qpdf_o_preserve,
659             py::arg("normalize_content")    = false,
660             py::arg("linearize")            = false,
661             py::arg("qdf")                  = false,
662             py::arg("progress")             = py::none(),
663             py::arg("encryption")           = py::none(),
664             py::arg("samefile_check")       = true,
665             py::arg("recompress_flate")     = false)
666         .def("_get_object_id", &QPDF::getObjectByID)
667         .def(
668             "get_object",
669             [](QPDF &q, std::pair<int, int> objgen) {
670                 return q.getObjectByID(objgen.first, objgen.second);
671             },
672             R"~~~(
673             Look up an object by ID and generation number
674 
675             Return type:
676                 pikepdf.Object
677             )~~~",
678             py::return_value_policy::reference_internal,
679             py::arg("objgen"))
680         .def(
681             "get_object",
682             [](QPDF &q, int objid, int gen) { return q.getObjectByID(objid, gen); },
683             R"~~~(
684             Look up an object by ID and generation number
685 
686             Return type:
687                 pikepdf.Object
688             )~~~",
689             py::return_value_policy::reference_internal,
690             py::arg("objid"),
691             py::arg("gen"))
692         .def_property_readonly(
693             "objects",
694             [](QPDF &q) { return q.getAllObjects(); },
695             R"~~~(
696             Return an iterable list of all objects in the PDF.
697 
698             After deleting content from a PDF such as pages, objects related
699             to that page, such as images on the page, may still be present.
700 
701             Retun type:
702                 pikepdf._ObjectList
703             )~~~",
704             py::return_value_policy::reference_internal)
705         .def("make_indirect",
706             &QPDF::makeIndirectObject,
707             R"~~~(
708             Attach an object to the Pdf as an indirect object
709 
710             Direct objects appear inline in the binary encoding of the PDF.
711             Indirect objects appear inline as references (in English, "look
712             up object 4 generation 0") and then read from another location in
713             the file. The PDF specification requires that certain objects
714             are indirect - consult the PDF specification to confirm.
715 
716             Generally a resource that is shared should be attached as an
717             indirect object. :class:`pikepdf.Stream` objects are always
718             indirect, and creating them will automatically attach it to the
719             Pdf.
720 
721             See Also:
722                 :meth:`pikepdf.Object.is_indirect`
723 
724             Return type:
725                 pikepdf.Object
726             )~~~",
727             py::arg("h"))
728         .def(
729             "make_indirect",
730             [](QPDF &q, py::object obj) -> QPDFObjectHandle {
731                 return q.makeIndirectObject(objecthandle_encode(obj));
732             },
733             R"~~~(
734             Encode a Python object and attach to this Pdf as an indirect object.
735 
736             Return type:
737                 pikepdf.Object
738             )~~~",
739             py::arg("obj"))
740         .def(
741             "copy_foreign",
742             [](QPDF &q, QPDFObjectHandle &h) -> QPDFObjectHandle {
743                 return q.copyForeignObject(h);
744             },
745             R"~~~(
746             Copy an ``Object`` from a foreign ``Pdf`` to this one.
747 
748             This function is used to copy a :class:`pikepdf.Object` that is owned by
749             some other ``Pdf`` into this one. This is performs a deep (recursive) copy
750             and preserves circular references that may exist in the foreign object.
751             It also copies all :class:`pikepdf.Stream` objects. Since this may copy
752             a large amount of data, it is not done implicitly. This function does
753             not copy references to pages in the foreign PDF - it stops at page
754             boundaries. Thus, if you use ``copy_foreign()`` on a table of contents
755             (``/Outlines`` dictionary), you may have to update references to pages.
756 
757             Direct objects, including dictionaries, do not need ``copy_foreign()``.
758             pikepdf will automatically convert and construct them.
759 
760             Note:
761                 pikepdf automatically treats incoming pages from a foreign PDF as
762                 foreign objects, so :attr:`Pdf.pages` does not require this treatment.
763 
764             See also:
765                 `QPDF::copyForeignObject <http://qpdf.sourceforge.net/files/qpdf-manual.html#ref.foreign-objects>`_
766 
767             .. versionchanged:: 2.1
768                 Error messages improved.
769             )~~~",
770             py::return_value_policy::reference_internal,
771             py::keep_alive<1, 2>(),
772             py::arg("h"))
773         .def("_replace_object",
774             [](QPDF &q, std::pair<int, int> objgen, QPDFObjectHandle &h) {
775                 q.replaceObject(objgen.first, objgen.second, h);
776             })
777         .def("_swap_objects",
778             [](QPDF &q, std::pair<int, int> objgen1, std::pair<int, int> objgen2) {
779                 QPDFObjGen o1(objgen1.first, objgen1.second);
780                 QPDFObjGen o2(objgen2.first, objgen2.second);
781                 q.swapObjects(o1, o2);
782             })
783         .def(
784             "_process",
785             [](QPDF &q, std::string description, py::bytes data) {
786                 std::string s = data;
787                 q.processMemoryFile(description.c_str(), s.data(), s.size());
788             },
789             R"~~~(
790             Process a new in-memory PDF, replacing the existing PDF
791 
792             Used to implement Pdf.close().
793             )~~~")
794         .def("_decode_all_streams_and_discard",
795             [](QPDF &q) {
796                 QPDFWriter w(q);
797                 Pl_Discard discard;
798                 w.setOutputPipeline(&discard);
799                 w.setDecodeLevel(qpdf_dl_all);
800                 w.write();
801             })
802         .def_property_readonly(
803             "_allow_accessibility", [](QPDF &q) { return q.allowAccessibility(); })
804         .def_property_readonly(
805             "_allow_extract", [](QPDF &q) { return q.allowExtractAll(); })
806         .def_property_readonly(
807             "_allow_print_lowres", [](QPDF &q) { return q.allowPrintLowRes(); })
808         .def_property_readonly(
809             "_allow_print_highres", [](QPDF &q) { return q.allowPrintHighRes(); })
810         .def_property_readonly(
811             "_allow_modify_assembly", [](QPDF &q) { return q.allowModifyAssembly(); })
812         .def_property_readonly(
813             "_allow_modify_form", [](QPDF &q) { return q.allowModifyForm(); })
814         .def_property_readonly("_allow_modify_annotation",
815             [](QPDF &q) { return q.allowModifyAnnotation(); })
816         .def_property_readonly(
817             "_allow_modify_other", [](QPDF &q) { return q.allowModifyOther(); })
818         .def_property_readonly(
819             "_allow_modify_all", [](QPDF &q) { return q.allowModifyAll(); })
820         .def_property_readonly("_encryption_data",
821             [](QPDF &q) {
822                 int R                                   = 0;
823                 int P                                   = 0;
824                 int V                                   = 0;
825                 QPDF::encryption_method_e stream_method = QPDF::e_unknown;
826                 QPDF::encryption_method_e string_method = QPDF::e_unknown;
827                 QPDF::encryption_method_e file_method   = QPDF::e_unknown;
828                 if (!q.isEncrypted(R, P, V, stream_method, string_method, file_method))
829                     return py::dict();
830 
831                 auto user_passwd    = q.getTrimmedUserPassword();
832                 auto encryption_key = q.getEncryptionKey();
833 
834                 return py::dict(py::arg("R")  = R,
835                     py::arg("P")              = P,
836                     py::arg("V")              = V,
837                     py::arg("stream")         = stream_method,
838                     py::arg("string")         = string_method,
839                     py::arg("file")           = file_method,
840                     py::arg("user_passwd")    = py::bytes(user_passwd),
841                     py::arg("encryption_key") = py::bytes(encryption_key));
842             })
843         .def_property_readonly("user_password_matched",
844             &QPDF::userPasswordMatched,
845             R"~~~(
846             Returns True if the user password matched when the ``Pdf`` was opened.
847 
848             It is possible for both the user and owner passwords to match.
849 
850             .. versionadded:: 2.10
851             )~~~")
852         .def_property_readonly("owner_password_matched",
853             &QPDF::ownerPasswordMatched,
854             R"~~~(
855             Returns True if the owner password matched when the ``Pdf`` was opened.
856 
857             It is possible for both the user and owner passwords to match.
858 
859             .. versionadded:: 2.10
860             )~~~")
861         .def(
862             "generate_appearance_streams",
863             [](QPDF &q) {
864                 QPDFAcroFormDocumentHelper afdh(q);
865                 afdh.generateAppearancesIfNeeded();
866             },
867             R"~~~(
868             Generates appearance streams for AcroForm forms and form fields.
869 
870             Appearance streams describe exactly how annotations and form fields
871             should appear to the user. If omitted, the PDF viewer is free to
872             render the annotations and form fields according to its own settings,
873             as needed.
874 
875             For every form field in the document, this generates appearance
876             streams, subject to the limitations of QPDF's ability to create
877             appearance streams.
878 
879             When invoked, this method will modify the ``Pdf`` in memory. It may be
880             best to do this after the ``Pdf`` is opened, or before it is saved,
881             because it may modify objects that the user does not expect to be
882             modified.
883 
884             See:
885                 https://github.com/qpdf/qpdf/blob/bf6b9ba1c681a6fac6d585c6262fb2778d4bb9d2/include/qpdf/QPDFFormFieldObjectHelper.hh#L216
886 
887             .. versionadded:: 2.11
888             )~~~")
889         .def(
890             "flatten_annotations",
891             [](QPDF &q, std::string mode) {
892                 QPDFPageDocumentHelper dh(q);
893                 auto required  = 0;
894                 auto forbidden = an_invisible | an_hidden;
895 
896                 if (mode == "screen") {
897                     forbidden |= an_no_view;
898                 } else if (mode == "print") {
899                     required |= an_print;
900                 } else if (mode == "" || mode == "all") {
901                     // No op
902                 } else {
903                     throw py::value_error(
904                         "Mode must be one of 'all', 'screen', 'print'.");
905                 }
906 
907                 dh.flattenAnnotations(required, forbidden);
908             },
909             R"~~~(
910             Flattens all PDF annotations into regular PDF content.
911 
912             Annotations are markup such as review comments, highlights, proofreading
913             marks. User data entered into interactive form fields also counts as an
914             annotation.
915 
916             When annotations are flattened, they are "burned into" the regular
917             content stream of the document and the fact that they were once annotations
918             is deleted. This can be useful when preparing a document for printing,
919             to ensure annotations are printed, or to finalize a form that should
920             no longer be changed.
921 
922             Args:
923                 mode: One of the strings ``'all'``, ``'screen'``, ``'print'``. If
924                     omitted or  set to empty, treated as ``'all'``. ``'screen'``
925                     flattens all except those marked with the PDF flag /NoView.
926                     ``'print'`` flattens only those marked for printing.
927 
928             .. versionadded:: 2.11
929             )~~~",
930             py::arg("mode") = "all"); // class Pdf
931 }
932