1 /*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright (C) 2017, James R. Barlow (https://github.com/jbarlow83/)
7 */
8
9 #include <sstream>
10 #include <type_traits>
11 #include <cerrno>
12 #include <cstring>
13
14 #include "pikepdf.h"
15
16 #include <qpdf/QPDFExc.hh>
17 #include <qpdf/QPDFSystemError.hh>
18 #include <qpdf/QPDFObjGen.hh>
19 #include <qpdf/QPDFXRefEntry.hh>
20 #include <qpdf/Buffer.hh>
21 #include <qpdf/BufferInputSource.hh>
22 #include <qpdf/QPDFWriter.hh>
23 #include <qpdf/QPDFPageDocumentHelper.hh>
24 #include <qpdf/Pl_Discard.hh>
25 #include <qpdf/QPDFAcroFormDocumentHelper.hh>
26
27 #include <pybind11/stl.h>
28 #include <pybind11/iostream.h>
29 #include <pybind11/buffer_info.h>
30
31 #include "qpdf_pagelist.h"
32 #include "qpdf_inputsource-inl.h"
33 #include "mmap_inputsource-inl.h"
34 #include "pipeline.h"
35 #include "utils.h"
36 #include "gsl.h"
37
38 extern bool MMAP_DEFAULT;
39
40 enum access_mode_e { access_default, access_stream, access_mmap, access_mmap_only };
41
check_stream_is_usable(py::object stream)42 void check_stream_is_usable(py::object stream)
43 {
44 auto TextIOBase = py::module_::import("io").attr("TextIOBase");
45
46 if (py::isinstance(stream, TextIOBase)) {
47 throw py::type_error("stream must be binary (no transcoding) and seekable");
48 }
49 }
50
qpdf_basic_settings(QPDF & q)51 void qpdf_basic_settings(QPDF &q)
52 {
53 q.setSuppressWarnings(true);
54 q.setImmediateCopyFrom(true);
55 }
56
open_pdf(py::object filename_or_stream,std::string password,bool hex_password=false,bool ignore_xref_streams=false,bool suppress_warnings=true,bool attempt_recovery=true,bool inherit_page_attributes=true,access_mode_e access_mode=access_mode_e::access_default)57 std::shared_ptr<QPDF> open_pdf(py::object filename_or_stream,
58 std::string password,
59 bool hex_password = false,
60 bool ignore_xref_streams = false,
61 bool suppress_warnings = true,
62 bool attempt_recovery = true,
63 bool inherit_page_attributes = true,
64 access_mode_e access_mode = access_mode_e::access_default)
65 {
66 auto q = std::make_shared<QPDF>();
67
68 qpdf_basic_settings(*q);
69 q->setSuppressWarnings(suppress_warnings);
70 q->setPasswordIsHexKey(hex_password);
71 q->setIgnoreXRefStreams(ignore_xref_streams);
72 q->setAttemptRecovery(attempt_recovery);
73
74 py::object stream;
75 bool closing_stream;
76 std::string description;
77
78 if (py::hasattr(filename_or_stream, "read") &&
79 py::hasattr(filename_or_stream, "seek")) {
80 // Python code gave us an object with a stream interface
81 stream = filename_or_stream;
82 check_stream_is_usable(stream);
83 closing_stream = false;
84 description = py::repr(stream);
85 } else {
86 if (py::isinstance<py::int_>(filename_or_stream))
87 throw py::type_error("expected str, bytes or os.PathLike object");
88 auto filename = fspath(filename_or_stream);
89 auto io_open = py::module_::import("io").attr("open");
90 stream = io_open(filename, "rb");
91 closing_stream = true;
92 description = py::str(filename);
93 }
94
95 bool success = false;
96 if (access_mode == access_default)
97 access_mode = MMAP_DEFAULT ? access_mmap : access_stream;
98
99 if (access_mode == access_mmap || access_mode == access_mmap_only) {
100 try {
101 py::gil_scoped_release release;
102 auto mmap_input_source =
103 std::make_unique<MmapInputSource>(stream, description, closing_stream);
104 auto input_source = PointerHolder<InputSource>(mmap_input_source.release());
105 q->processInputSource(input_source, password.c_str());
106 success = true;
107 } catch (const py::error_already_set &e) {
108 if (access_mode == access_mmap) {
109 // Prepare to fallback to stream access
110 stream.attr("seek")(0);
111 access_mode = access_stream;
112 } else {
113 throw;
114 }
115 }
116 }
117
118 if (!success && access_mode == access_stream) {
119 py::gil_scoped_release release;
120 auto stream_input_source = std::make_unique<PythonStreamInputSource>(
121 stream, description, closing_stream);
122 auto input_source = PointerHolder<InputSource>(stream_input_source.release());
123 q->processInputSource(input_source, password.c_str());
124 success = true;
125 }
126
127 if (!success) {
128 // LCOV_EXCL_LINE
129 throw std::logic_error(
130 "open_pdf: should have succeeded or thrown a Python exception");
131 }
132
133 if (inherit_page_attributes) {
134 // This could be expensive for a large file, plausibly (not tested),
135 // so release the GIL again.
136 py::gil_scoped_release release;
137 q->pushInheritedAttributesToPage();
138 }
139
140 return q;
141 }
142
143 class PikeProgressReporter : public QPDFWriter::ProgressReporter {
144 public:
PikeProgressReporter(py::function callback)145 PikeProgressReporter(py::function callback) { this->callback = callback; }
146
147 virtual ~PikeProgressReporter() = default;
148
reportProgress(int percent)149 virtual void reportProgress(int percent) override
150 {
151 py::gil_scoped_acquire acquire;
152 this->callback(percent);
153 }
154
155 private:
156 py::function callback;
157 };
158
update_xmp_pdfversion(QPDF & q,std::string version)159 void update_xmp_pdfversion(QPDF &q, std::string version)
160 {
161 auto impl =
162 py::module_::import("pikepdf._cpphelpers").attr("update_xmp_pdfversion");
163 auto pypdf = py::cast(q);
164 impl(pypdf, version);
165 }
166
setup_encryption(QPDFWriter & w,py::object encryption)167 void setup_encryption(QPDFWriter &w, py::object encryption)
168 {
169 std::string owner;
170 std::string user;
171
172 bool aes = true;
173 bool metadata = true;
174 std::map<std::string, bool> allow;
175 int encryption_level = 6;
176
177 if (encryption.contains("R")) {
178 if (!py::isinstance<py::int_>(encryption["R"]))
179 throw py::type_error("Encryption level 'R' must be an integer");
180 encryption_level = py::int_(encryption["R"]);
181 }
182 if (encryption_level < 2 || encryption_level > 6)
183 throw py::value_error("Invalid encryption level: must be 2, 3, 4 or 6");
184
185 if (encryption_level == 5) {
186 auto warn = py::module_::import("warnings").attr("warn");
187 warn("Encryption R=5 is deprecated");
188 }
189
190 if (encryption.contains("owner")) {
191 if (encryption_level <= 4) {
192 auto success =
193 QUtil::utf8_to_pdf_doc(encryption["owner"].cast<std::string>(), owner);
194 if (!success)
195 throw py::value_error("Encryption level is R3/R4 and password is not "
196 "encodable as PDFDocEncoding");
197 } else {
198 owner = encryption["owner"].cast<std::string>();
199 }
200 }
201 if (encryption.contains("user")) {
202 if (encryption_level <= 4) {
203 auto success =
204 QUtil::utf8_to_pdf_doc(encryption["user"].cast<std::string>(), user);
205 if (!success)
206 throw py::value_error("Encryption level is R3/R4 and password is not "
207 "encodable as PDFDocEncoding");
208 } else {
209 user = encryption["user"].cast<std::string>();
210 }
211 }
212 if (encryption.contains("allow")) {
213 auto pyallow = encryption["allow"];
214 allow["accessibility"] = pyallow.attr("accessibility").cast<bool>();
215 allow["extract"] = pyallow.attr("extract").cast<bool>();
216 allow["modify_assembly"] = pyallow.attr("modify_assembly").cast<bool>();
217 allow["modify_annotation"] = pyallow.attr("modify_annotation").cast<bool>();
218 allow["modify_form"] = pyallow.attr("modify_form").cast<bool>();
219 allow["modify_other"] = pyallow.attr("modify_other").cast<bool>();
220 allow["print_lowres"] = pyallow.attr("print_lowres").cast<bool>();
221 allow["print_highres"] = pyallow.attr("print_highres").cast<bool>();
222 }
223 if (encryption.contains("aes")) {
224 if (py::isinstance<py::bool_>(encryption["aes"]))
225 aes = py::bool_(encryption["aes"]);
226 else
227 throw py::type_error("aes must be bool");
228 } else {
229 aes = (encryption_level >= 4);
230 }
231 if (encryption.contains("metadata")) {
232 if (py::isinstance<py::bool_>(encryption["metadata"]))
233 metadata = py::bool_(encryption["metadata"]);
234 else
235 throw py::type_error("metadata must be bool");
236 } else {
237 metadata = (encryption_level >= 4);
238 }
239
240 if (metadata && encryption_level < 4) {
241 throw py::value_error("Cannot encrypt metadata when R < 4");
242 }
243 if (aes && encryption_level < 4) {
244 throw py::value_error("Cannot encrypt with AES when R < 4");
245 }
246 if (encryption_level == 6 && !aes) {
247 throw py::value_error("When R = 6, AES encryption must be enabled");
248 }
249 if (metadata && !aes) {
250 throw py::value_error(
251 "Cannot encrypt metadata unless AES encryption is enabled");
252 }
253
254 qpdf_r3_print_e print;
255 if (allow["print_highres"])
256 print = qpdf_r3p_full;
257 else if (allow["print_lowres"])
258 print = qpdf_r3p_low;
259 else
260 print = qpdf_r3p_none;
261
262 if (encryption_level == 6) {
263 w.setR6EncryptionParameters(user.c_str(),
264 owner.c_str(),
265 allow["accessibility"],
266 allow["extract"],
267 allow["modify_assembly"],
268 allow["modify_annotation"],
269 allow["modify_form"],
270 allow["modify_other"],
271 print,
272 metadata);
273 } else if (encryption_level == 5) {
274 // TODO WARNING
275 w.setR5EncryptionParameters(user.c_str(),
276 owner.c_str(),
277 allow["accessibility"],
278 allow["extract"],
279 allow["modify_assembly"],
280 allow["modify_annotation"],
281 allow["modify_form"],
282 allow["modify_other"],
283 print,
284 metadata);
285 } else if (encryption_level == 4) {
286 w.setR4EncryptionParameters(user.c_str(),
287 owner.c_str(),
288 allow["accessibility"],
289 allow["extract"],
290 allow["modify_assembly"],
291 allow["modify_annotation"],
292 allow["modify_form"],
293 allow["modify_other"],
294 print,
295 metadata,
296 aes);
297 } else if (encryption_level == 3) {
298 w.setR3EncryptionParameters(user.c_str(),
299 owner.c_str(),
300 allow["accessibility"],
301 allow["extract"],
302 allow["modify_assembly"],
303 allow["modify_annotation"],
304 allow["modify_form"],
305 allow["modify_other"],
306 print);
307 } else if (encryption_level == 2) {
308 w.setR2EncryptionParameters(user.c_str(),
309 owner.c_str(),
310 (print != qpdf_r3p_none),
311 allow["modify_assembly"],
312 allow["extract"],
313 allow["modify_annotation"]);
314 }
315 }
316
317 typedef std::pair<std::string, int> pdf_version_extension;
318
get_version_extension(py::object ver_ext)319 pdf_version_extension get_version_extension(py::object ver_ext)
320 {
321 std::string version = "";
322 int extension = 0;
323 try {
324 version = ver_ext.cast<std::string>();
325 extension = 0;
326 } catch (const py::cast_error &) {
327 try {
328 auto version_ext = ver_ext.cast<pdf_version_extension>();
329 version = version_ext.first;
330 extension = version_ext.second;
331 } catch (const py::cast_error &) {
332 throw py::type_error("PDF version must be a tuple: (str, int)");
333 }
334 }
335 return pdf_version_extension(version, extension);
336 }
337
save_pdf(QPDF & q,py::object filename_or_stream,bool static_id=false,bool preserve_pdfa=true,py::object min_version=py::none (),py::object force_version=py::none (),bool fix_metadata_version=true,bool compress_streams=true,py::object stream_decode_level=py::none (),qpdf_object_stream_e object_stream_mode=qpdf_o_preserve,bool normalize_content=false,bool linearize=false,bool qdf=false,py::object progress=py::none (),py::object encryption=py::none (),bool samefile_check=true,bool recompress_flate=false)338 void save_pdf(QPDF &q,
339 py::object filename_or_stream,
340 bool static_id = false,
341 bool preserve_pdfa = true,
342 py::object min_version = py::none(),
343 py::object force_version = py::none(),
344 bool fix_metadata_version = true,
345 bool compress_streams = true,
346 py::object stream_decode_level = py::none(),
347 qpdf_object_stream_e object_stream_mode = qpdf_o_preserve,
348 bool normalize_content = false,
349 bool linearize = false,
350 bool qdf = false,
351 py::object progress = py::none(),
352 py::object encryption = py::none(),
353 bool samefile_check = true,
354 bool recompress_flate = false)
355 {
356 std::string description;
357 QPDFWriter w(q);
358
359 if (static_id) {
360 w.setStaticID(true);
361 }
362 w.setNewlineBeforeEndstream(preserve_pdfa);
363
364 if (!min_version.is_none()) {
365 auto version_ext = get_version_extension(min_version);
366 w.setMinimumPDFVersion(version_ext.first, version_ext.second);
367 }
368 w.setCompressStreams(compress_streams);
369 if (!stream_decode_level.is_none()) {
370 // Unconditionally calling setDecodeLevel has side effects, disabling
371 // preserve encryption in particular
372 w.setDecodeLevel(stream_decode_level.cast<qpdf_stream_decode_level_e>());
373 }
374 w.setObjectStreamMode(object_stream_mode);
375 w.setRecompressFlate(recompress_flate);
376
377 py::object stream;
378 bool should_close_stream = false;
379 auto close_stream = gsl::finally([&stream, &should_close_stream] {
380 if (should_close_stream && !stream.is_none() && py::hasattr(stream, "close"))
381 stream.attr("close")();
382 });
383
384 if (py::hasattr(filename_or_stream, "write") &&
385 py::hasattr(filename_or_stream, "seek")) {
386 // Python code gave us an object with a stream interface
387 stream = filename_or_stream;
388 check_stream_is_usable(stream);
389 description = py::repr(stream);
390 } else {
391 if (py::isinstance<py::int_>(filename_or_stream))
392 throw py::type_error("expected str, bytes or os.PathLike object");
393 py::object output_filename = fspath(filename_or_stream);
394 if (samefile_check) {
395 auto input_filename = q.getFilename();
396
397 py::object ospath = py::module_::import("os").attr("path");
398 py::object samefile = ospath.attr("samefile");
399 try {
400 if (samefile(output_filename, input_filename).cast<bool>()) {
401 throw py::value_error(
402 "Cannot overwrite input file. Open the file with "
403 "pikepdf.open(..., allow_overwriting_input=True) to "
404 "allow overwriting the input file.");
405 }
406 } catch (const py::error_already_set &e) {
407 // We expect FileNotFoundError if filename refers to a file that does
408 // not exist, or if q.getFilename indicates a memory file. Suppress
409 // that, and rethrow all others.
410 if (!e.matches(PyExc_FileNotFoundError))
411 throw;
412 }
413 }
414 stream = py::module_::import("io").attr("open")(output_filename, "wb");
415 should_close_stream = true;
416 description = py::str(output_filename);
417 }
418
419 // We must set up the output pipeline before we configure encryption
420 Pl_PythonOutput output_pipe(description.c_str(), stream);
421 w.setOutputPipeline(&output_pipe);
422
423 if (encryption.is(py::bool_(true)) && !q.isEncrypted()) {
424 throw py::value_error(
425 "can't perserve encryption parameters on a file with no encryption");
426 }
427
428 if ((encryption.is(py::bool_(true)) || py::isinstance<py::dict>(encryption)) &&
429 (normalize_content || !stream_decode_level.is_none())) {
430 throw py::value_error(
431 "cannot save with encryption and normalize_content or stream_decode_level");
432 }
433
434 if (encryption.is(py::bool_(true))) {
435 w.setPreserveEncryption(true); // Keep existing encryption
436 } else if (encryption.is_none() || encryption.is(py::bool_(false))) {
437 w.setPreserveEncryption(false); // Remove encryption
438 } else {
439 setup_encryption(w, encryption);
440 }
441
442 if (normalize_content && linearize) {
443 throw py::value_error("cannot save with both normalize_content and linearize");
444 }
445 w.setContentNormalization(normalize_content);
446 w.setLinearization(linearize);
447 w.setQDFMode(qdf);
448
449 if (!force_version.is_none()) {
450 auto version_ext = get_version_extension(force_version);
451 w.forcePDFVersion(version_ext.first, version_ext.second);
452 }
453 if (fix_metadata_version) {
454 update_xmp_pdfversion(q, w.getFinalVersion());
455 }
456
457 if (!progress.is_none()) {
458 auto reporter = PointerHolder<QPDFWriter::ProgressReporter>(
459 new PikeProgressReporter(progress));
460 w.registerProgressReporter(reporter);
461 }
462
463 w.write();
464 }
465
init_qpdf(py::module_ & m)466 void init_qpdf(py::module_ &m)
467 {
468 py::enum_<qpdf_object_stream_e>(m, "ObjectStreamMode")
469 .value("disable", qpdf_object_stream_e::qpdf_o_disable)
470 .value("preserve", qpdf_object_stream_e::qpdf_o_preserve)
471 .value("generate", qpdf_object_stream_e::qpdf_o_generate);
472
473 py::enum_<qpdf_stream_decode_level_e>(m, "StreamDecodeLevel")
474 .value("none", qpdf_stream_decode_level_e::qpdf_dl_none)
475 .value("generalized", qpdf_stream_decode_level_e::qpdf_dl_generalized)
476 .value("specialized", qpdf_stream_decode_level_e::qpdf_dl_specialized)
477 .value("all", qpdf_stream_decode_level_e::qpdf_dl_all);
478
479 py::enum_<QPDF::encryption_method_e>(m, "EncryptionMethod")
480 .value("none", QPDF::encryption_method_e::e_none)
481 .value("unknown", QPDF::encryption_method_e::e_unknown)
482 .value("rc4", QPDF::encryption_method_e::e_rc4)
483 .value("aes", QPDF::encryption_method_e::e_aes)
484 .value("aesv3", QPDF::encryption_method_e::e_aesv3);
485
486 py::enum_<access_mode_e>(m, "AccessMode")
487 .value("default", access_mode_e::access_default)
488 .value("stream", access_mode_e::access_stream)
489 .value("mmap", access_mode_e::access_mmap)
490 .value("mmap_only", access_mode_e::access_mmap_only);
491
492 py::class_<QPDF, std::shared_ptr<QPDF>>(
493 m, "Pdf", "In-memory representation of a PDF", py::dynamic_attr())
494 .def_static(
495 "new",
496 []() {
497 auto q = std::make_shared<QPDF>();
498 q->emptyPDF();
499 qpdf_basic_settings(*q);
500 return q;
501 },
502 "Create a new empty PDF from stratch.")
503 .def_static("_open",
504 open_pdf,
505 "",
506 py::arg("filename_or_stream"),
507 py::arg("password") = "",
508 py::arg("hex_password") = false,
509 py::arg("ignore_xref_streams") = false,
510 py::arg("suppress_warnings") = true,
511 py::arg("attempt_recovery") = true,
512 py::arg("inherit_page_attributes") = true,
513 py::arg("access_mode") = access_mode_e::access_default)
514 .def("__repr__",
515 [](QPDF &q) {
516 return std::string("<pikepdf.Pdf description='") + q.getFilename() +
517 std::string("'>");
518 })
519 .def_property_readonly("filename",
520 &QPDF::getFilename,
521 "The source filename of an existing PDF, when available.")
522 .def_property_readonly("pdf_version",
523 &QPDF::getPDFVersion,
524 "The version of the PDF specification used for this file, such as '1.7'.")
525 .def_property_readonly("extension_level", &QPDF::getExtensionLevel)
526 .def_property_readonly("Root", &QPDF::getRoot, "The /Root object of the PDF.")
527 .def_property_readonly("trailer",
528 &QPDF::getTrailer,
529 R"~~~(
530 Provides access to the PDF trailer object.
531
532 See |pdfrm| section 7.5.5. Generally speaking,
533 the trailer should not be modified with pikepdf, and modifying it
534 may not work. Some of the values in the trailer are automatically
535 changed when a file is saved.
536 )~~~")
537 .def_property_readonly(
538 "pages",
539 [](std::shared_ptr<QPDF> q) { return PageList(q); },
540 R"~~~(
541 Returns the list of pages.
542
543 Return type:
544 pikepdf._qpdf.PageList
545 )~~~",
546 py::return_value_policy::reference_internal)
547 .def_property_readonly("_pages", &QPDF::getAllPages)
548 .def_property_readonly("is_encrypted",
549 &QPDF::isEncrypted,
550 R"~~~(
551 Returns True if the PDF is encrypted.
552
553 For information about the nature of the encryption, see
554 :attr:`Pdf.encryption`.
555 )~~~")
556 .def_property_readonly("is_linearized",
557 &QPDF::isLinearized,
558 R"~~~(
559 Returns True if the PDF is linearized.
560
561 Specifically returns True iff the file starts with a linearization
562 parameter dictionary. Does no additional validation.
563 )~~~")
564 .def(
565 "check_linearization",
566 [](QPDF &q, py::object stream) {
567 py::scoped_estream_redirect redirector(std::cerr, stream);
568 return q.checkLinearization();
569 },
570 R"~~~(
571 Reports information on the PDF's linearization.
572
573 Args:
574 stream: A stream to write this information too; must
575 implement ``.write()`` and ``.flush()`` method. Defaults to
576 :data:`sys.stderr`.
577
578 Returns:
579 ``True`` if the file is correctly linearized, and ``False`` if
580 the file is linearized but the linearization data contains errors
581 or was incorrectly generated.
582
583 Raises:
584 RuntimeError: If the PDF in question is not linearized at all.
585 )~~~",
586 py::arg_v(
587 "stream", py::module_::import("sys").attr("stderr"), "sys.stderr"))
588 .def("get_warnings", // this is a def because it modifies state by clearing
589 // warnings
590 [](QPDF &q) {
591 py::list warnings;
592 for (auto w : q.getWarnings()) {
593 warnings.append(w.what());
594 }
595 return warnings;
596 })
597 .def("show_xref_table",
598 &QPDF::showXRefTable,
599 R"~~~(
600 Pretty-print the Pdf's xref (cross-reference table)
601 )~~~",
602 py::call_guard<py::scoped_ostream_redirect>())
603 .def(
604 "_add_page",
605 [](QPDF &q, QPDFObjectHandle &page, bool first = false) {
606 q.addPage(page, first);
607 },
608 R"~~~(
609 Attach a page to this PDF.
610
611 The page can be either be a newly constructed PDF object or it can
612 be obtained from another PDF.
613
614 Args:
615 page (pikepdf.Object): The page object to attach
616 first (bool): If True, prepend this before the first page; if False append after last page
617 )~~~",
618 py::arg("page"),
619 py::arg("first") = false,
620 py::keep_alive<1, 2>())
621 .def("_add_page_at", &QPDF::addPageAt, py::keep_alive<1, 2>())
622 .def("_remove_page", &QPDF::removePage)
623 .def(
624 "remove_unreferenced_resources",
625 [](QPDF &q) {
626 QPDFPageDocumentHelper helper(q);
627 helper.removeUnreferencedResources();
628 },
629 R"~~~(
630 Remove from /Resources of each page any object not referenced in page's contents
631
632 PDF pages may share resource dictionaries with other pages. If
633 pikepdf is used for page splitting, pages may reference resources
634 in their /Resources dictionary that are not actually required.
635 This purges all unnecessary resource entries.
636
637 For clarity, if all references to any type of object are removed, that
638 object will be excluded from the output PDF on save. (Conversely, only
639 objects that are discoverable from the PDF's root object are included.)
640 This function removes objects that are referenced from the page /Resources
641 dictionary, but never called for in the content stream, making them
642 unnecessary.
643
644 Suggested before saving, if content streams or /Resources dictionaries
645 are edited.
646 )~~~")
647 .def("_save",
648 save_pdf,
649 "",
650 py::arg("filename"),
651 py::arg("static_id") = false,
652 py::arg("preserve_pdfa") = true,
653 py::arg("min_version") = "",
654 py::arg("force_version") = "",
655 py::arg("fix_metadata_version") = true,
656 py::arg("compress_streams") = true,
657 py::arg("stream_decode_level") = py::none(),
658 py::arg("object_stream_mode") = qpdf_object_stream_e::qpdf_o_preserve,
659 py::arg("normalize_content") = false,
660 py::arg("linearize") = false,
661 py::arg("qdf") = false,
662 py::arg("progress") = py::none(),
663 py::arg("encryption") = py::none(),
664 py::arg("samefile_check") = true,
665 py::arg("recompress_flate") = false)
666 .def("_get_object_id", &QPDF::getObjectByID)
667 .def(
668 "get_object",
669 [](QPDF &q, std::pair<int, int> objgen) {
670 return q.getObjectByID(objgen.first, objgen.second);
671 },
672 R"~~~(
673 Look up an object by ID and generation number
674
675 Return type:
676 pikepdf.Object
677 )~~~",
678 py::return_value_policy::reference_internal,
679 py::arg("objgen"))
680 .def(
681 "get_object",
682 [](QPDF &q, int objid, int gen) { return q.getObjectByID(objid, gen); },
683 R"~~~(
684 Look up an object by ID and generation number
685
686 Return type:
687 pikepdf.Object
688 )~~~",
689 py::return_value_policy::reference_internal,
690 py::arg("objid"),
691 py::arg("gen"))
692 .def_property_readonly(
693 "objects",
694 [](QPDF &q) { return q.getAllObjects(); },
695 R"~~~(
696 Return an iterable list of all objects in the PDF.
697
698 After deleting content from a PDF such as pages, objects related
699 to that page, such as images on the page, may still be present.
700
701 Retun type:
702 pikepdf._ObjectList
703 )~~~",
704 py::return_value_policy::reference_internal)
705 .def("make_indirect",
706 &QPDF::makeIndirectObject,
707 R"~~~(
708 Attach an object to the Pdf as an indirect object
709
710 Direct objects appear inline in the binary encoding of the PDF.
711 Indirect objects appear inline as references (in English, "look
712 up object 4 generation 0") and then read from another location in
713 the file. The PDF specification requires that certain objects
714 are indirect - consult the PDF specification to confirm.
715
716 Generally a resource that is shared should be attached as an
717 indirect object. :class:`pikepdf.Stream` objects are always
718 indirect, and creating them will automatically attach it to the
719 Pdf.
720
721 See Also:
722 :meth:`pikepdf.Object.is_indirect`
723
724 Return type:
725 pikepdf.Object
726 )~~~",
727 py::arg("h"))
728 .def(
729 "make_indirect",
730 [](QPDF &q, py::object obj) -> QPDFObjectHandle {
731 return q.makeIndirectObject(objecthandle_encode(obj));
732 },
733 R"~~~(
734 Encode a Python object and attach to this Pdf as an indirect object.
735
736 Return type:
737 pikepdf.Object
738 )~~~",
739 py::arg("obj"))
740 .def(
741 "copy_foreign",
742 [](QPDF &q, QPDFObjectHandle &h) -> QPDFObjectHandle {
743 return q.copyForeignObject(h);
744 },
745 R"~~~(
746 Copy an ``Object`` from a foreign ``Pdf`` to this one.
747
748 This function is used to copy a :class:`pikepdf.Object` that is owned by
749 some other ``Pdf`` into this one. This is performs a deep (recursive) copy
750 and preserves circular references that may exist in the foreign object.
751 It also copies all :class:`pikepdf.Stream` objects. Since this may copy
752 a large amount of data, it is not done implicitly. This function does
753 not copy references to pages in the foreign PDF - it stops at page
754 boundaries. Thus, if you use ``copy_foreign()`` on a table of contents
755 (``/Outlines`` dictionary), you may have to update references to pages.
756
757 Direct objects, including dictionaries, do not need ``copy_foreign()``.
758 pikepdf will automatically convert and construct them.
759
760 Note:
761 pikepdf automatically treats incoming pages from a foreign PDF as
762 foreign objects, so :attr:`Pdf.pages` does not require this treatment.
763
764 See also:
765 `QPDF::copyForeignObject <http://qpdf.sourceforge.net/files/qpdf-manual.html#ref.foreign-objects>`_
766
767 .. versionchanged:: 2.1
768 Error messages improved.
769 )~~~",
770 py::return_value_policy::reference_internal,
771 py::keep_alive<1, 2>(),
772 py::arg("h"))
773 .def("_replace_object",
774 [](QPDF &q, std::pair<int, int> objgen, QPDFObjectHandle &h) {
775 q.replaceObject(objgen.first, objgen.second, h);
776 })
777 .def("_swap_objects",
778 [](QPDF &q, std::pair<int, int> objgen1, std::pair<int, int> objgen2) {
779 QPDFObjGen o1(objgen1.first, objgen1.second);
780 QPDFObjGen o2(objgen2.first, objgen2.second);
781 q.swapObjects(o1, o2);
782 })
783 .def(
784 "_process",
785 [](QPDF &q, std::string description, py::bytes data) {
786 std::string s = data;
787 q.processMemoryFile(description.c_str(), s.data(), s.size());
788 },
789 R"~~~(
790 Process a new in-memory PDF, replacing the existing PDF
791
792 Used to implement Pdf.close().
793 )~~~")
794 .def("_decode_all_streams_and_discard",
795 [](QPDF &q) {
796 QPDFWriter w(q);
797 Pl_Discard discard;
798 w.setOutputPipeline(&discard);
799 w.setDecodeLevel(qpdf_dl_all);
800 w.write();
801 })
802 .def_property_readonly(
803 "_allow_accessibility", [](QPDF &q) { return q.allowAccessibility(); })
804 .def_property_readonly(
805 "_allow_extract", [](QPDF &q) { return q.allowExtractAll(); })
806 .def_property_readonly(
807 "_allow_print_lowres", [](QPDF &q) { return q.allowPrintLowRes(); })
808 .def_property_readonly(
809 "_allow_print_highres", [](QPDF &q) { return q.allowPrintHighRes(); })
810 .def_property_readonly(
811 "_allow_modify_assembly", [](QPDF &q) { return q.allowModifyAssembly(); })
812 .def_property_readonly(
813 "_allow_modify_form", [](QPDF &q) { return q.allowModifyForm(); })
814 .def_property_readonly("_allow_modify_annotation",
815 [](QPDF &q) { return q.allowModifyAnnotation(); })
816 .def_property_readonly(
817 "_allow_modify_other", [](QPDF &q) { return q.allowModifyOther(); })
818 .def_property_readonly(
819 "_allow_modify_all", [](QPDF &q) { return q.allowModifyAll(); })
820 .def_property_readonly("_encryption_data",
821 [](QPDF &q) {
822 int R = 0;
823 int P = 0;
824 int V = 0;
825 QPDF::encryption_method_e stream_method = QPDF::e_unknown;
826 QPDF::encryption_method_e string_method = QPDF::e_unknown;
827 QPDF::encryption_method_e file_method = QPDF::e_unknown;
828 if (!q.isEncrypted(R, P, V, stream_method, string_method, file_method))
829 return py::dict();
830
831 auto user_passwd = q.getTrimmedUserPassword();
832 auto encryption_key = q.getEncryptionKey();
833
834 return py::dict(py::arg("R") = R,
835 py::arg("P") = P,
836 py::arg("V") = V,
837 py::arg("stream") = stream_method,
838 py::arg("string") = string_method,
839 py::arg("file") = file_method,
840 py::arg("user_passwd") = py::bytes(user_passwd),
841 py::arg("encryption_key") = py::bytes(encryption_key));
842 })
843 .def_property_readonly("user_password_matched",
844 &QPDF::userPasswordMatched,
845 R"~~~(
846 Returns True if the user password matched when the ``Pdf`` was opened.
847
848 It is possible for both the user and owner passwords to match.
849
850 .. versionadded:: 2.10
851 )~~~")
852 .def_property_readonly("owner_password_matched",
853 &QPDF::ownerPasswordMatched,
854 R"~~~(
855 Returns True if the owner password matched when the ``Pdf`` was opened.
856
857 It is possible for both the user and owner passwords to match.
858
859 .. versionadded:: 2.10
860 )~~~")
861 .def(
862 "generate_appearance_streams",
863 [](QPDF &q) {
864 QPDFAcroFormDocumentHelper afdh(q);
865 afdh.generateAppearancesIfNeeded();
866 },
867 R"~~~(
868 Generates appearance streams for AcroForm forms and form fields.
869
870 Appearance streams describe exactly how annotations and form fields
871 should appear to the user. If omitted, the PDF viewer is free to
872 render the annotations and form fields according to its own settings,
873 as needed.
874
875 For every form field in the document, this generates appearance
876 streams, subject to the limitations of QPDF's ability to create
877 appearance streams.
878
879 When invoked, this method will modify the ``Pdf`` in memory. It may be
880 best to do this after the ``Pdf`` is opened, or before it is saved,
881 because it may modify objects that the user does not expect to be
882 modified.
883
884 See:
885 https://github.com/qpdf/qpdf/blob/bf6b9ba1c681a6fac6d585c6262fb2778d4bb9d2/include/qpdf/QPDFFormFieldObjectHelper.hh#L216
886
887 .. versionadded:: 2.11
888 )~~~")
889 .def(
890 "flatten_annotations",
891 [](QPDF &q, std::string mode) {
892 QPDFPageDocumentHelper dh(q);
893 auto required = 0;
894 auto forbidden = an_invisible | an_hidden;
895
896 if (mode == "screen") {
897 forbidden |= an_no_view;
898 } else if (mode == "print") {
899 required |= an_print;
900 } else if (mode == "" || mode == "all") {
901 // No op
902 } else {
903 throw py::value_error(
904 "Mode must be one of 'all', 'screen', 'print'.");
905 }
906
907 dh.flattenAnnotations(required, forbidden);
908 },
909 R"~~~(
910 Flattens all PDF annotations into regular PDF content.
911
912 Annotations are markup such as review comments, highlights, proofreading
913 marks. User data entered into interactive form fields also counts as an
914 annotation.
915
916 When annotations are flattened, they are "burned into" the regular
917 content stream of the document and the fact that they were once annotations
918 is deleted. This can be useful when preparing a document for printing,
919 to ensure annotations are printed, or to finalize a form that should
920 no longer be changed.
921
922 Args:
923 mode: One of the strings ``'all'``, ``'screen'``, ``'print'``. If
924 omitted or set to empty, treated as ``'all'``. ``'screen'``
925 flattens all except those marked with the PDF flag /NoView.
926 ``'print'`` flattens only those marked for printing.
927
928 .. versionadded:: 2.11
929 )~~~",
930 py::arg("mode") = "all"); // class Pdf
931 }
932