1 #include <qpdf/qpdf-config.h> // include first for large file support
2 #include <qpdf/QPDFWriter.hh>
3
4 #include <assert.h>
5 #include <qpdf/Pl_StdioFile.hh>
6 #include <qpdf/Pl_Count.hh>
7 #include <qpdf/Pl_Discard.hh>
8 #include <qpdf/Pl_RC4.hh>
9 #include <qpdf/Pl_AES_PDF.hh>
10 #include <qpdf/Pl_Flate.hh>
11 #include <qpdf/Pl_PNGFilter.hh>
12 #include <qpdf/Pl_MD5.hh>
13 #include <qpdf/QUtil.hh>
14 #include <qpdf/MD5.hh>
15 #include <qpdf/RC4.hh>
16 #include <qpdf/QTC.hh>
17
18 #include <qpdf/QPDF.hh>
19 #include <qpdf/QPDFObjectHandle.hh>
20 #include <qpdf/QPDF_Name.hh>
21 #include <qpdf/QPDF_String.hh>
22 #include <qpdf/QIntC.hh>
23
24 #include <algorithm>
25 #include <stdlib.h>
26
Members(QPDF & pdf)27 QPDFWriter::Members::Members(QPDF& pdf) :
28 pdf(pdf),
29 filename("unspecified"),
30 file(0),
31 close_file(false),
32 buffer_pipeline(0),
33 output_buffer(0),
34 normalize_content_set(false),
35 normalize_content(false),
36 compress_streams(true),
37 compress_streams_set(false),
38 stream_decode_level(qpdf_dl_none),
39 stream_decode_level_set(false),
40 recompress_flate(false),
41 qdf_mode(false),
42 preserve_unreferenced_objects(false),
43 newline_before_endstream(false),
44 static_id(false),
45 suppress_original_object_ids(false),
46 direct_stream_lengths(true),
47 encrypted(false),
48 preserve_encryption(true),
49 linearized(false),
50 pclm(false),
51 object_stream_mode(qpdf_o_preserve),
52 encrypt_metadata(true),
53 encrypt_use_aes(false),
54 encryption_V(0),
55 encryption_R(0),
56 final_extension_level(0),
57 min_extension_level(0),
58 forced_extension_level(0),
59 encryption_dict_objid(0),
60 pipeline(0),
61 next_objid(1),
62 cur_stream_length_id(0),
63 cur_stream_length(0),
64 added_newline(false),
65 max_ostream_index(0),
66 next_stack_id(0),
67 deterministic_id(false),
68 md5_pipeline(0),
69 did_write_setup(false),
70 events_expected(0),
71 events_seen(0),
72 next_progress_report(0)
73 {
74 }
75
~Members()76 QPDFWriter::Members::~Members()
77 {
78 if (file && close_file)
79 {
80 fclose(file);
81 }
82 delete output_buffer;
83 }
84
QPDFWriter(QPDF & pdf)85 QPDFWriter::QPDFWriter(QPDF& pdf) :
86 m(new Members(pdf))
87 {
88 }
89
QPDFWriter(QPDF & pdf,char const * filename)90 QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
91 m(new Members(pdf))
92 {
93 setOutputFilename(filename);
94 }
95
QPDFWriter(QPDF & pdf,char const * description,FILE * file,bool close_file)96 QPDFWriter::QPDFWriter(QPDF& pdf, char const* description,
97 FILE *file, bool close_file) :
98 m(new Members(pdf))
99 {
100 setOutputFile(description, file, close_file);
101 }
102
~QPDFWriter()103 QPDFWriter::~QPDFWriter()
104 {
105 }
106
107 void
setOutputFilename(char const * filename)108 QPDFWriter::setOutputFilename(char const* filename)
109 {
110 char const* description = filename;
111 FILE* f = 0;
112 bool close_file = false;
113 if (filename == 0)
114 {
115 description = "standard output";
116 QTC::TC("qpdf", "QPDFWriter write to stdout");
117 f = stdout;
118 QUtil::binary_stdout();
119 }
120 else
121 {
122 QTC::TC("qpdf", "QPDFWriter write to file");
123 f = QUtil::safe_fopen(filename, "wb+");
124 close_file = true;
125 }
126 setOutputFile(description, f, close_file);
127 }
128
129 void
setOutputFile(char const * description,FILE * file,bool close_file)130 QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
131 {
132 this->m->filename = description;
133 this->m->file = file;
134 this->m->close_file = close_file;
135 Pipeline* p = new Pl_StdioFile("qpdf output", file);
136 this->m->to_delete.push_back(p);
137 initializePipelineStack(p);
138 }
139
140 void
setOutputMemory()141 QPDFWriter::setOutputMemory()
142 {
143 this->m->filename = "memory buffer";
144 this->m->buffer_pipeline = new Pl_Buffer("qpdf output");
145 this->m->to_delete.push_back(this->m->buffer_pipeline);
146 initializePipelineStack(this->m->buffer_pipeline);
147 }
148
149 Buffer*
getBuffer()150 QPDFWriter::getBuffer()
151 {
152 Buffer* result = this->m->output_buffer;
153 this->m->output_buffer = 0;
154 return result;
155 }
156
157 void
setOutputPipeline(Pipeline * p)158 QPDFWriter::setOutputPipeline(Pipeline* p)
159 {
160 this->m->filename = "custom pipeline";
161 initializePipelineStack(p);
162 }
163
164 void
setObjectStreamMode(qpdf_object_stream_e mode)165 QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
166 {
167 this->m->object_stream_mode = mode;
168 }
169
170 void
setStreamDataMode(qpdf_stream_data_e mode)171 QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
172 {
173 switch (mode)
174 {
175 case qpdf_s_uncompress:
176 this->m->stream_decode_level =
177 std::max(qpdf_dl_generalized, this->m->stream_decode_level);
178 this->m->compress_streams = false;
179 break;
180
181 case qpdf_s_preserve:
182 this->m->stream_decode_level = qpdf_dl_none;
183 this->m->compress_streams = false;
184 break;
185
186 case qpdf_s_compress:
187 this->m->stream_decode_level =
188 std::max(qpdf_dl_generalized, this->m->stream_decode_level);
189 this->m->compress_streams = true;
190 break;
191 }
192 this->m->stream_decode_level_set = true;
193 this->m->compress_streams_set = true;
194 }
195
196
197 void
setCompressStreams(bool val)198 QPDFWriter::setCompressStreams(bool val)
199 {
200 this->m->compress_streams = val;
201 this->m->compress_streams_set = true;
202 }
203
204 void
setDecodeLevel(qpdf_stream_decode_level_e val)205 QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
206 {
207 this->m->stream_decode_level = val;
208 this->m->stream_decode_level_set = true;
209 }
210
211 void
setRecompressFlate(bool val)212 QPDFWriter::setRecompressFlate(bool val)
213 {
214 this->m->recompress_flate = val;
215 }
216
217 void
setContentNormalization(bool val)218 QPDFWriter::setContentNormalization(bool val)
219 {
220 this->m->normalize_content_set = true;
221 this->m->normalize_content = val;
222 }
223
224 void
setQDFMode(bool val)225 QPDFWriter::setQDFMode(bool val)
226 {
227 this->m->qdf_mode = val;
228 }
229
230 void
setPreserveUnreferencedObjects(bool val)231 QPDFWriter::setPreserveUnreferencedObjects(bool val)
232 {
233 this->m->preserve_unreferenced_objects = val;
234 }
235
236 void
setNewlineBeforeEndstream(bool val)237 QPDFWriter::setNewlineBeforeEndstream(bool val)
238 {
239 this->m->newline_before_endstream = val;
240 }
241
242 void
setMinimumPDFVersion(std::string const & version,int extension_level)243 QPDFWriter::setMinimumPDFVersion(std::string const& version,
244 int extension_level)
245 {
246 bool set_version = false;
247 bool set_extension_level = false;
248 if (this->m->min_pdf_version.empty())
249 {
250 set_version = true;
251 set_extension_level = true;
252 }
253 else
254 {
255 int old_major = 0;
256 int old_minor = 0;
257 int min_major = 0;
258 int min_minor = 0;
259 parseVersion(version, old_major, old_minor);
260 parseVersion(this->m->min_pdf_version, min_major, min_minor);
261 int compare = compareVersions(
262 old_major, old_minor, min_major, min_minor);
263 if (compare > 0)
264 {
265 QTC::TC("qpdf", "QPDFWriter increasing minimum version",
266 extension_level == 0 ? 0 : 1);
267 set_version = true;
268 set_extension_level = true;
269 }
270 else if (compare == 0)
271 {
272 if (extension_level > this->m->min_extension_level)
273 {
274 QTC::TC("qpdf", "QPDFWriter increasing extension level");
275 set_extension_level = true;
276 }
277 }
278 }
279
280 if (set_version)
281 {
282 this->m->min_pdf_version = version;
283 }
284 if (set_extension_level)
285 {
286 this->m->min_extension_level = extension_level;
287 }
288 }
289
290 void
forcePDFVersion(std::string const & version,int extension_level)291 QPDFWriter::forcePDFVersion(std::string const& version,
292 int extension_level)
293 {
294 this->m->forced_pdf_version = version;
295 this->m->forced_extension_level = extension_level;
296 }
297
298 void
setExtraHeaderText(std::string const & text)299 QPDFWriter::setExtraHeaderText(std::string const& text)
300 {
301 this->m->extra_header_text = text;
302 if ((this->m->extra_header_text.length() > 0) &&
303 (*(this->m->extra_header_text.rbegin()) != '\n'))
304 {
305 QTC::TC("qpdf", "QPDFWriter extra header text add newline");
306 this->m->extra_header_text += "\n";
307 }
308 else
309 {
310 QTC::TC("qpdf", "QPDFWriter extra header text no newline");
311 }
312 }
313
314 void
setStaticID(bool val)315 QPDFWriter::setStaticID(bool val)
316 {
317 this->m->static_id = val;
318 }
319
320 void
setDeterministicID(bool val)321 QPDFWriter::setDeterministicID(bool val)
322 {
323 this->m->deterministic_id = val;
324 }
325
326 void
setStaticAesIV(bool val)327 QPDFWriter::setStaticAesIV(bool val)
328 {
329 if (val)
330 {
331 Pl_AES_PDF::useStaticIV();
332 }
333 }
334
335 void
setSuppressOriginalObjectIDs(bool val)336 QPDFWriter::setSuppressOriginalObjectIDs(bool val)
337 {
338 this->m->suppress_original_object_ids = val;
339 }
340
341 void
setPreserveEncryption(bool val)342 QPDFWriter::setPreserveEncryption(bool val)
343 {
344 this->m->preserve_encryption = val;
345 }
346
347 void
setLinearization(bool val)348 QPDFWriter::setLinearization(bool val)
349 {
350 this->m->linearized = val;
351 if (val)
352 {
353 this->m->pclm = false;
354 }
355 }
356
357 void
setLinearizationPass1Filename(std::string const & filename)358 QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
359 {
360 this->m->lin_pass1_filename = filename;
361 }
362
363 void
setPCLm(bool val)364 QPDFWriter::setPCLm(bool val)
365 {
366 this->m->pclm = val;
367 if (val)
368 {
369 this->m->linearized = false;
370 }
371 }
372
373 void
setR2EncryptionParameters(char const * user_password,char const * owner_password,bool allow_print,bool allow_modify,bool allow_extract,bool allow_annotate)374 QPDFWriter::setR2EncryptionParameters(
375 char const* user_password, char const* owner_password,
376 bool allow_print, bool allow_modify,
377 bool allow_extract, bool allow_annotate)
378 {
379 std::set<int> clear;
380 if (! allow_print)
381 {
382 clear.insert(3);
383 }
384 if (! allow_modify)
385 {
386 clear.insert(4);
387 }
388 if (! allow_extract)
389 {
390 clear.insert(5);
391 }
392 if (! allow_annotate)
393 {
394 clear.insert(6);
395 }
396
397 setEncryptionParameters(user_password, owner_password, 1, 2, 5, clear);
398 }
399
400 void
setR3EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify)401 QPDFWriter::setR3EncryptionParameters(
402 char const* user_password, char const* owner_password,
403 bool allow_accessibility, bool allow_extract,
404 qpdf_r3_print_e print, qpdf_r3_modify_e modify)
405 {
406 std::set<int> clear;
407 interpretR3EncryptionParameters(
408 clear, user_password, owner_password,
409 allow_accessibility, allow_extract,
410 true, true, true, true, print, modify);
411 setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear);
412 }
413
414 void
setR3EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print)415 QPDFWriter::setR3EncryptionParameters(
416 char const* user_password, char const* owner_password,
417 bool allow_accessibility, bool allow_extract,
418 bool allow_assemble, bool allow_annotate_and_form,
419 bool allow_form_filling, bool allow_modify_other,
420 qpdf_r3_print_e print)
421 {
422 std::set<int> clear;
423 interpretR3EncryptionParameters(
424 clear, user_password, owner_password,
425 allow_accessibility, allow_extract,
426 allow_assemble, allow_annotate_and_form,
427 allow_form_filling, allow_modify_other,
428 print, qpdf_r3m_all);
429 setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear);
430 }
431
432 void
setR4EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify,bool encrypt_metadata,bool use_aes)433 QPDFWriter::setR4EncryptionParameters(
434 char const* user_password, char const* owner_password,
435 bool allow_accessibility, bool allow_extract,
436 qpdf_r3_print_e print, qpdf_r3_modify_e modify,
437 bool encrypt_metadata, bool use_aes)
438 {
439 std::set<int> clear;
440 interpretR3EncryptionParameters(
441 clear, user_password, owner_password,
442 allow_accessibility, allow_extract,
443 true, true, true, true, print, modify);
444 this->m->encrypt_use_aes = use_aes;
445 this->m->encrypt_metadata = encrypt_metadata;
446 setEncryptionParameters(user_password, owner_password, 4, 4, 16, clear);
447 }
448
449 void
setR4EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,bool encrypt_metadata,bool use_aes)450 QPDFWriter::setR4EncryptionParameters(
451 char const* user_password, char const* owner_password,
452 bool allow_accessibility, bool allow_extract,
453 bool allow_assemble, bool allow_annotate_and_form,
454 bool allow_form_filling, bool allow_modify_other,
455 qpdf_r3_print_e print,
456 bool encrypt_metadata, bool use_aes)
457 {
458 std::set<int> clear;
459 interpretR3EncryptionParameters(
460 clear, user_password, owner_password,
461 allow_accessibility, allow_extract,
462 allow_assemble, allow_annotate_and_form,
463 allow_form_filling, allow_modify_other,
464 print, qpdf_r3m_all);
465 this->m->encrypt_use_aes = use_aes;
466 this->m->encrypt_metadata = encrypt_metadata;
467 setEncryptionParameters(user_password, owner_password, 4, 4, 16, clear);
468 }
469
470 void
setR5EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify,bool encrypt_metadata)471 QPDFWriter::setR5EncryptionParameters(
472 char const* user_password, char const* owner_password,
473 bool allow_accessibility, bool allow_extract,
474 qpdf_r3_print_e print, qpdf_r3_modify_e modify,
475 bool encrypt_metadata)
476 {
477 std::set<int> clear;
478 interpretR3EncryptionParameters(
479 clear, user_password, owner_password,
480 allow_accessibility, allow_extract,
481 true, true, true, true, print, modify);
482 this->m->encrypt_use_aes = true;
483 this->m->encrypt_metadata = encrypt_metadata;
484 setEncryptionParameters(user_password, owner_password, 5, 5, 32, clear);
485 }
486
487 void
setR5EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,bool encrypt_metadata)488 QPDFWriter::setR5EncryptionParameters(
489 char const* user_password, char const* owner_password,
490 bool allow_accessibility, bool allow_extract,
491 bool allow_assemble, bool allow_annotate_and_form,
492 bool allow_form_filling, bool allow_modify_other,
493 qpdf_r3_print_e print,
494 bool encrypt_metadata)
495 {
496 std::set<int> clear;
497 interpretR3EncryptionParameters(
498 clear, user_password, owner_password,
499 allow_accessibility, allow_extract,
500 allow_assemble, allow_annotate_and_form,
501 allow_form_filling, allow_modify_other,
502 print, qpdf_r3m_all);
503 this->m->encrypt_use_aes = true;
504 this->m->encrypt_metadata = encrypt_metadata;
505 setEncryptionParameters(user_password, owner_password, 5, 5, 32, clear);
506 }
507
508 void
setR6EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify,bool encrypt_metadata)509 QPDFWriter::setR6EncryptionParameters(
510 char const* user_password, char const* owner_password,
511 bool allow_accessibility, bool allow_extract,
512 qpdf_r3_print_e print, qpdf_r3_modify_e modify,
513 bool encrypt_metadata)
514 {
515 std::set<int> clear;
516 interpretR3EncryptionParameters(
517 clear, user_password, owner_password,
518 allow_accessibility, allow_extract,
519 true, true, true, true, print, modify);
520 this->m->encrypt_use_aes = true;
521 this->m->encrypt_metadata = encrypt_metadata;
522 setEncryptionParameters(user_password, owner_password, 5, 6, 32, clear);
523 }
524
525 void
setR6EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,bool encrypt_metadata)526 QPDFWriter::setR6EncryptionParameters(
527 char const* user_password, char const* owner_password,
528 bool allow_accessibility, bool allow_extract,
529 bool allow_assemble, bool allow_annotate_and_form,
530 bool allow_form_filling, bool allow_modify_other,
531 qpdf_r3_print_e print,
532 bool encrypt_metadata)
533 {
534 std::set<int> clear;
535 interpretR3EncryptionParameters(
536 clear, user_password, owner_password,
537 allow_accessibility, allow_extract,
538 allow_assemble, allow_annotate_and_form,
539 allow_form_filling, allow_modify_other,
540 print, qpdf_r3m_all);
541 this->m->encrypt_use_aes = true;
542 this->m->encrypt_metadata = encrypt_metadata;
543 setEncryptionParameters(user_password, owner_password, 5, 6, 32, clear);
544 }
545
546 void
interpretR3EncryptionParameters(std::set<int> & clear,char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,qpdf_r3_modify_e modify)547 QPDFWriter::interpretR3EncryptionParameters(
548 std::set<int>& clear,
549 char const* user_password, char const* owner_password,
550 bool allow_accessibility, bool allow_extract,
551 bool allow_assemble, bool allow_annotate_and_form,
552 bool allow_form_filling, bool allow_modify_other,
553 qpdf_r3_print_e print, qpdf_r3_modify_e modify)
554 {
555 // Acrobat 5 security options:
556
557 // Checkboxes:
558 // Enable Content Access for the Visually Impaired
559 // Allow Content Copying and Extraction
560
561 // Allowed changes menu:
562 // None
563 // Only Document Assembly
564 // Only Form Field Fill-in or Signing
565 // Comment Authoring, Form Field Fill-in or Signing
566 // General Editing, Comment and Form Field Authoring
567
568 // Allowed printing menu:
569 // None
570 // Low Resolution
571 // Full printing
572
573 // Meanings of bits in P when R >= 3
574 //
575 // 3: low-resolution printing
576 // 4: document modification except as controlled by 6, 9, and 11
577 // 5: extraction
578 // 6: add/modify annotations (comment), fill in forms
579 // if 4+6 are set, also allows modification of form fields
580 // 9: fill in forms even if 6 is clear
581 // 10: accessibility; ignored by readers, should always be set
582 // 11: document assembly even if 4 is clear
583 // 12: high-resolution printing
584
585 if (! allow_accessibility)
586 {
587 // setEncryptionParameters sets this if R > 3
588 clear.insert(10);
589 }
590 if (! allow_extract)
591 {
592 clear.insert(5);
593 }
594
595 // Note: these switch statements all "fall through" (no break
596 // statements). Each option clears successively more access bits.
597 switch (print)
598 {
599 case qpdf_r3p_none:
600 clear.insert(3); // any printing
601
602 case qpdf_r3p_low:
603 clear.insert(12); // high resolution printing
604
605 case qpdf_r3p_full:
606 break;
607
608 // no default so gcc warns for missing cases
609 }
610
611 // Modify options. The qpdf_r3_modify_e options control groups of
612 // bits and lack the full flexibility of the spec. This is
613 // unfortunate, but it's been in the API for ages, and we're stuck
614 // with it. See also allow checks below to control the bits
615 // individually.
616
617 // NOT EXERCISED IN TEST SUITE
618 switch (modify)
619 {
620 case qpdf_r3m_none:
621 clear.insert(11); // document assembly
622
623 case qpdf_r3m_assembly:
624 clear.insert(9); // filling in form fields
625
626 case qpdf_r3m_form:
627 clear.insert(6); // modify annotations, fill in form fields
628
629 case qpdf_r3m_annotate:
630 clear.insert(4); // other modifications
631
632 case qpdf_r3m_all:
633 break;
634
635 // no default so gcc warns for missing cases
636 }
637 // END NOT EXERCISED IN TEST SUITE
638
639 if (! allow_assemble)
640 {
641 clear.insert(11);
642 }
643 if (! allow_annotate_and_form)
644 {
645 clear.insert(6);
646 }
647 if (! allow_form_filling)
648 {
649 clear.insert(9);
650 }
651 if (! allow_modify_other)
652 {
653 clear.insert(4);
654 }
655 }
656
657 void
setEncryptionParameters(char const * user_password,char const * owner_password,int V,int R,int key_len,std::set<int> & bits_to_clear)658 QPDFWriter::setEncryptionParameters(
659 char const* user_password, char const* owner_password,
660 int V, int R, int key_len, std::set<int>& bits_to_clear)
661 {
662 // PDF specification refers to bits with the low bit numbered 1.
663 // We have to convert this into a bit field.
664
665 // Specification always requires bits 1 and 2 to be cleared.
666 bits_to_clear.insert(1);
667 bits_to_clear.insert(2);
668
669 if (R > 3)
670 {
671 // Bit 10 is deprecated and should always be set. This used
672 // to mean accessibility. There is no way to disable
673 // accessibility with R > 3.
674 bits_to_clear.erase(10);
675 }
676
677 int P = 0;
678 // Create the complement of P, then invert.
679 for (std::set<int>::iterator iter = bits_to_clear.begin();
680 iter != bits_to_clear.end(); ++iter)
681 {
682 P |= (1 << ((*iter) - 1));
683 }
684 P = ~P;
685
686 generateID();
687 std::string O;
688 std::string U;
689 std::string OE;
690 std::string UE;
691 std::string Perms;
692 std::string encryption_key;
693 if (V < 5)
694 {
695 QPDF::compute_encryption_O_U(
696 user_password, owner_password, V, R, key_len, P,
697 this->m->encrypt_metadata, this->m->id1, O, U);
698 }
699 else
700 {
701 QPDF::compute_encryption_parameters_V5(
702 user_password, owner_password, V, R, key_len, P,
703 this->m->encrypt_metadata, this->m->id1,
704 encryption_key, O, U, OE, UE, Perms);
705 }
706 setEncryptionParametersInternal(
707 V, R, key_len, P, O, U, OE, UE, Perms,
708 this->m->id1, user_password, encryption_key);
709 }
710
711 void
copyEncryptionParameters(QPDF & qpdf)712 QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
713 {
714 this->m->preserve_encryption = false;
715 QPDFObjectHandle trailer = qpdf.getTrailer();
716 if (trailer.hasKey("/Encrypt"))
717 {
718 generateID();
719 this->m->id1 =
720 trailer.getKey("/ID").getArrayItem(0).getStringValue();
721 QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
722 int V = encrypt.getKey("/V").getIntValueAsInt();
723 int key_len = 5;
724 if (V > 1)
725 {
726 key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
727 }
728 if (encrypt.hasKey("/EncryptMetadata") &&
729 encrypt.getKey("/EncryptMetadata").isBool())
730 {
731 this->m->encrypt_metadata =
732 encrypt.getKey("/EncryptMetadata").getBoolValue();
733 }
734 if (V >= 4)
735 {
736 // When copying encryption parameters, use AES even if the
737 // original file did not. Acrobat doesn't create files
738 // with V >= 4 that don't use AES, and the logic of
739 // figuring out whether AES is used or not is complicated
740 // with /StmF, /StrF, and /EFF all potentially having
741 // different values.
742 this->m->encrypt_use_aes = true;
743 }
744 QTC::TC("qpdf", "QPDFWriter copy encrypt metadata",
745 this->m->encrypt_metadata ? 0 : 1);
746 QTC::TC("qpdf", "QPDFWriter copy use_aes",
747 this->m->encrypt_use_aes ? 0 : 1);
748 std::string OE;
749 std::string UE;
750 std::string Perms;
751 std::string encryption_key;
752 if (V >= 5)
753 {
754 QTC::TC("qpdf", "QPDFWriter copy V5");
755 OE = encrypt.getKey("/OE").getStringValue();
756 UE = encrypt.getKey("/UE").getStringValue();
757 Perms = encrypt.getKey("/Perms").getStringValue();
758 encryption_key = qpdf.getEncryptionKey();
759 }
760
761 setEncryptionParametersInternal(
762 V,
763 encrypt.getKey("/R").getIntValueAsInt(),
764 key_len,
765 static_cast<int>(encrypt.getKey("/P").getIntValue()),
766 encrypt.getKey("/O").getStringValue(),
767 encrypt.getKey("/U").getStringValue(),
768 OE,
769 UE,
770 Perms,
771 this->m->id1, // this->m->id1 == the other file's id1
772 qpdf.getPaddedUserPassword(),
773 encryption_key);
774 }
775 }
776
777 void
disableIncompatibleEncryption(int major,int minor,int extension_level)778 QPDFWriter::disableIncompatibleEncryption(int major, int minor,
779 int extension_level)
780 {
781 if (! this->m->encrypted)
782 {
783 return;
784 }
785
786 bool disable = false;
787 if (compareVersions(major, minor, 1, 3) < 0)
788 {
789 disable = true;
790 }
791 else
792 {
793 int V = QUtil::string_to_int(
794 this->m->encryption_dictionary["/V"].c_str());
795 int R = QUtil::string_to_int(
796 this->m->encryption_dictionary["/R"].c_str());
797 if (compareVersions(major, minor, 1, 4) < 0)
798 {
799 if ((V > 1) || (R > 2))
800 {
801 disable = true;
802 }
803 }
804 else if (compareVersions(major, minor, 1, 5) < 0)
805 {
806 if ((V > 2) || (R > 3))
807 {
808 disable = true;
809 }
810 }
811 else if (compareVersions(major, minor, 1, 6) < 0)
812 {
813 if (this->m->encrypt_use_aes)
814 {
815 disable = true;
816 }
817 }
818 else if ((compareVersions(major, minor, 1, 7) < 0) ||
819 ((compareVersions(major, minor, 1, 7) == 0) &&
820 extension_level < 3))
821 {
822 if ((V >= 5) || (R >= 5))
823 {
824 disable = true;
825 }
826 }
827 }
828 if (disable)
829 {
830 QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
831 this->m->encrypted = false;
832 }
833 }
834
835 void
parseVersion(std::string const & version,int & major,int & minor) const836 QPDFWriter::parseVersion(std::string const& version,
837 int& major, int& minor) const
838 {
839 major = QUtil::string_to_int(version.c_str());
840 minor = 0;
841 size_t p = version.find('.');
842 if ((p != std::string::npos) && (version.length() > p))
843 {
844 minor = QUtil::string_to_int(version.substr(p + 1).c_str());
845 }
846 std::string tmp = QUtil::int_to_string(major) + "." +
847 QUtil::int_to_string(minor);
848 if (tmp != version)
849 {
850 // The version number in the input is probably invalid. This
851 // happens with some files that are designed to exercise bugs,
852 // such as files in the fuzzer corpus. Unfortunately
853 // QPDFWriter doesn't have a way to give a warning, so we just
854 // ignore this case.
855 }
856 }
857
858 int
compareVersions(int major1,int minor1,int major2,int minor2) const859 QPDFWriter::compareVersions(int major1, int minor1,
860 int major2, int minor2) const
861 {
862 if (major1 < major2)
863 {
864 return -1;
865 }
866 else if (major1 > major2)
867 {
868 return 1;
869 }
870 else if (minor1 < minor2)
871 {
872 return -1;
873 }
874 else if (minor1 > minor2)
875 {
876 return 1;
877 }
878 else
879 {
880 return 0;
881 }
882 }
883
884 void
setEncryptionParametersInternal(int V,int R,int key_len,int P,std::string const & O,std::string const & U,std::string const & OE,std::string const & UE,std::string const & Perms,std::string const & id1,std::string const & user_password,std::string const & encryption_key)885 QPDFWriter::setEncryptionParametersInternal(
886 int V, int R, int key_len, int P,
887 std::string const& O, std::string const& U,
888 std::string const& OE, std::string const& UE, std::string const& Perms,
889 std::string const& id1, std::string const& user_password,
890 std::string const& encryption_key)
891 {
892 this->m->encryption_V = V;
893 this->m->encryption_R = R;
894 this->m->encryption_dictionary["/Filter"] = "/Standard";
895 this->m->encryption_dictionary["/V"] = QUtil::int_to_string(V);
896 this->m->encryption_dictionary["/Length"] =
897 QUtil::int_to_string(key_len * 8);
898 this->m->encryption_dictionary["/R"] = QUtil::int_to_string(R);
899 this->m->encryption_dictionary["/P"] = QUtil::int_to_string(P);
900 this->m->encryption_dictionary["/O"] = QPDF_String(O).unparse(true);
901 this->m->encryption_dictionary["/U"] = QPDF_String(U).unparse(true);
902 if (V >= 5)
903 {
904 this->m->encryption_dictionary["/OE"] = QPDF_String(OE).unparse(true);
905 this->m->encryption_dictionary["/UE"] = QPDF_String(UE).unparse(true);
906 this->m->encryption_dictionary["/Perms"] =
907 QPDF_String(Perms).unparse(true);
908 }
909 if (R >= 6)
910 {
911 setMinimumPDFVersion("1.7", 8);
912 }
913 else if (R == 5)
914 {
915 setMinimumPDFVersion("1.7", 3);
916 }
917 else if (R == 4)
918 {
919 setMinimumPDFVersion(this->m->encrypt_use_aes ? "1.6" : "1.5");
920 }
921 else if (R == 3)
922 {
923 setMinimumPDFVersion("1.4");
924 }
925 else
926 {
927 setMinimumPDFVersion("1.3");
928 }
929
930 if ((R >= 4) && (! this->m->encrypt_metadata))
931 {
932 this->m->encryption_dictionary["/EncryptMetadata"] = "false";
933 }
934 if ((V == 4) || (V == 5))
935 {
936 // The spec says the value for the crypt filter key can be
937 // anything, and xpdf seems to agree. However, Adobe Reader
938 // won't open our files unless we use /StdCF.
939 this->m->encryption_dictionary["/StmF"] = "/StdCF";
940 this->m->encryption_dictionary["/StrF"] = "/StdCF";
941 std::string method = (this->m->encrypt_use_aes
942 ? ((V < 5) ? "/AESV2" : "/AESV3")
943 : "/V2");
944 // The PDF spec says the /Length key is optional, but the PDF
945 // previewer on some versions of MacOS won't open encrypted
946 // files without it.
947 this->m->encryption_dictionary["/CF"] =
948 "<< /StdCF << /AuthEvent /DocOpen /CFM " + method +
949 " /Length " + std::string((V < 5) ? "16" : "32") + " >> >>";
950 }
951
952 this->m->encrypted = true;
953 QPDF::EncryptionData encryption_data(
954 V, R, key_len, P, O, U, OE, UE, Perms, id1, this->m->encrypt_metadata);
955 if (V < 5)
956 {
957 this->m->encryption_key = QPDF::compute_encryption_key(
958 user_password, encryption_data);
959 }
960 else
961 {
962 this->m->encryption_key = encryption_key;
963 }
964 }
965
966 void
setDataKey(int objid)967 QPDFWriter::setDataKey(int objid)
968 {
969 this->m->cur_data_key = QPDF::compute_data_key(
970 this->m->encryption_key, objid, 0,
971 this->m->encrypt_use_aes, this->m->encryption_V, this->m->encryption_R);
972 }
973
974 unsigned int
bytesNeeded(long long n)975 QPDFWriter::bytesNeeded(long long n)
976 {
977 unsigned int bytes = 0;
978 while (n)
979 {
980 ++bytes;
981 n >>= 8;
982 }
983 return bytes;
984 }
985
986 void
writeBinary(unsigned long long val,unsigned int bytes)987 QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
988 {
989 if (bytes > sizeof(unsigned long long))
990 {
991 throw std::logic_error(
992 "QPDFWriter::writeBinary called with too many bytes");
993 }
994 unsigned char data[sizeof(unsigned long long)];
995 for (unsigned int i = 0; i < bytes; ++i)
996 {
997 data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
998 val >>= 8;
999 }
1000 this->m->pipeline->write(data, bytes);
1001 }
1002
1003 void
writeString(std::string const & str)1004 QPDFWriter::writeString(std::string const& str)
1005 {
1006 this->m->pipeline->write(QUtil::unsigned_char_pointer(str), str.length());
1007 }
1008
1009 void
writeBuffer(PointerHolder<Buffer> & b)1010 QPDFWriter::writeBuffer(PointerHolder<Buffer>& b)
1011 {
1012 this->m->pipeline->write(b->getBuffer(), b->getSize());
1013 }
1014
1015 void
writeStringQDF(std::string const & str)1016 QPDFWriter::writeStringQDF(std::string const& str)
1017 {
1018 if (this->m->qdf_mode)
1019 {
1020 writeString(str);
1021 }
1022 }
1023
1024 void
writeStringNoQDF(std::string const & str)1025 QPDFWriter::writeStringNoQDF(std::string const& str)
1026 {
1027 if (! this->m->qdf_mode)
1028 {
1029 writeString(str);
1030 }
1031 }
1032
1033 void
writePad(int nspaces)1034 QPDFWriter::writePad(int nspaces)
1035 {
1036 for (int i = 0; i < nspaces; ++i)
1037 {
1038 writeString(" ");
1039 }
1040 }
1041
1042 Pipeline*
pushPipeline(Pipeline * p)1043 QPDFWriter::pushPipeline(Pipeline* p)
1044 {
1045 assert(dynamic_cast<Pl_Count*>(p) == 0);
1046 this->m->pipeline_stack.push_back(p);
1047 return p;
1048 }
1049
1050 void
initializePipelineStack(Pipeline * p)1051 QPDFWriter::initializePipelineStack(Pipeline *p)
1052 {
1053 this->m->pipeline = new Pl_Count("pipeline stack base", p);
1054 this->m->to_delete.push_back(this->m->pipeline);
1055 this->m->pipeline_stack.push_back(this->m->pipeline);
1056 }
1057
1058 void
activatePipelineStack(PipelinePopper & pp)1059 QPDFWriter::activatePipelineStack(PipelinePopper& pp)
1060 {
1061 std::string stack_id(
1062 "stack " + QUtil::uint_to_string(this->m->next_stack_id));
1063 Pl_Count* c = new Pl_Count(stack_id.c_str(),
1064 this->m->pipeline_stack.back());
1065 ++this->m->next_stack_id;
1066 this->m->pipeline_stack.push_back(c);
1067 this->m->pipeline = c;
1068 pp.stack_id = stack_id;
1069 }
1070
~PipelinePopper()1071 QPDFWriter::PipelinePopper::~PipelinePopper()
1072 {
1073 if (stack_id.empty())
1074 {
1075 return;
1076 }
1077 assert(qw->m->pipeline_stack.size() >= 2);
1078 qw->m->pipeline->finish();
1079 assert(dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) ==
1080 qw->m->pipeline);
1081 // It might be possible for this assertion to fail if
1082 // writeLinearized exits by exception when deterministic ID, but I
1083 // don't think so. As of this writing, this is the only case in
1084 // which two dynamically allocated PipelinePopper objects ever
1085 // exist at the same time, so the assertion will fail if they get
1086 // popped out of order from automatic destruction.
1087 assert(qw->m->pipeline->getIdentifier() == stack_id);
1088 delete qw->m->pipeline_stack.back();
1089 qw->m->pipeline_stack.pop_back();
1090 while (dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) == 0)
1091 {
1092 Pipeline* p = qw->m->pipeline_stack.back();
1093 if (dynamic_cast<Pl_MD5*>(p) == qw->m->md5_pipeline)
1094 {
1095 qw->m->md5_pipeline = 0;
1096 }
1097 qw->m->pipeline_stack.pop_back();
1098 Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
1099 if (bp && buf)
1100 {
1101 *bp = buf->getBuffer();
1102 }
1103 delete p;
1104 }
1105 qw->m->pipeline = dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back());
1106 }
1107
1108 void
adjustAESStreamLength(size_t & length)1109 QPDFWriter::adjustAESStreamLength(size_t& length)
1110 {
1111 if (this->m->encrypted && (! this->m->cur_data_key.empty()) &&
1112 this->m->encrypt_use_aes)
1113 {
1114 // Stream length will be padded with 1 to 16 bytes to end up
1115 // as a multiple of 16. It will also be prepended by 16 bits
1116 // of random data.
1117 length += 32 - (length & 0xf);
1118 }
1119 }
1120
1121 void
pushEncryptionFilter(PipelinePopper & pp)1122 QPDFWriter::pushEncryptionFilter(PipelinePopper& pp)
1123 {
1124 if (this->m->encrypted && (! this->m->cur_data_key.empty()))
1125 {
1126 Pipeline* p = 0;
1127 if (this->m->encrypt_use_aes)
1128 {
1129 p = new Pl_AES_PDF(
1130 "aes stream encryption", this->m->pipeline, true,
1131 QUtil::unsigned_char_pointer(this->m->cur_data_key),
1132 this->m->cur_data_key.length());
1133 }
1134 else
1135 {
1136 p = new Pl_RC4("rc4 stream encryption", this->m->pipeline,
1137 QUtil::unsigned_char_pointer(this->m->cur_data_key),
1138 QIntC::to_int(this->m->cur_data_key.length()));
1139 }
1140 pushPipeline(p);
1141 }
1142 // Must call this unconditionally so we can call popPipelineStack
1143 // to balance pushEncryptionFilter().
1144 activatePipelineStack(pp);
1145 }
1146
1147 void
pushDiscardFilter(PipelinePopper & pp)1148 QPDFWriter::pushDiscardFilter(PipelinePopper& pp)
1149 {
1150 pushPipeline(new Pl_Discard());
1151 activatePipelineStack(pp);
1152 }
1153
1154 void
pushMD5Pipeline(PipelinePopper & pp)1155 QPDFWriter::pushMD5Pipeline(PipelinePopper& pp)
1156 {
1157 if (! this->m->id2.empty())
1158 {
1159 // Can't happen in the code
1160 throw std::logic_error(
1161 "Deterministic ID computation enabled after ID"
1162 " generation has already occurred.");
1163 }
1164 assert(this->m->deterministic_id);
1165 assert(this->m->md5_pipeline == 0);
1166 assert(this->m->pipeline->getCount() == 0);
1167 this->m->md5_pipeline = new Pl_MD5("qpdf md5", this->m->pipeline);
1168 this->m->md5_pipeline->persistAcrossFinish(true);
1169 // Special case code in popPipelineStack clears this->m->md5_pipeline
1170 // upon deletion.
1171 pushPipeline(this->m->md5_pipeline);
1172 activatePipelineStack(pp);
1173 }
1174
1175 void
computeDeterministicIDData()1176 QPDFWriter::computeDeterministicIDData()
1177 {
1178 assert(this->m->md5_pipeline != 0);
1179 assert(this->m->deterministic_id_data.empty());
1180 this->m->deterministic_id_data = this->m->md5_pipeline->getHexDigest();
1181 this->m->md5_pipeline->enable(false);
1182 }
1183
1184 int
openObject(int objid)1185 QPDFWriter::openObject(int objid)
1186 {
1187 if (objid == 0)
1188 {
1189 objid = this->m->next_objid++;
1190 }
1191 this->m->xref[objid] = QPDFXRefEntry(1, this->m->pipeline->getCount(), 0);
1192 writeString(QUtil::int_to_string(objid));
1193 writeString(" 0 obj\n");
1194 return objid;
1195 }
1196
1197 void
closeObject(int objid)1198 QPDFWriter::closeObject(int objid)
1199 {
1200 // Write a newline before endobj as it makes the file easier to
1201 // repair.
1202 writeString("\nendobj\n");
1203 writeStringQDF("\n");
1204 this->m->lengths[objid] = this->m->pipeline->getCount() -
1205 this->m->xref[objid].getOffset();
1206 }
1207
1208 void
assignCompressedObjectNumbers(QPDFObjGen const & og)1209 QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
1210 {
1211 int objid = og.getObj();
1212 if ((og.getGen() != 0) ||
1213 (this->m->object_stream_to_objects.count(objid) == 0))
1214 {
1215 // This is not an object stream.
1216 return;
1217 }
1218
1219 // Reserve numbers for the objects that belong to this object
1220 // stream.
1221 for (std::set<QPDFObjGen>::iterator iter =
1222 this->m->object_stream_to_objects[objid].begin();
1223 iter != this->m->object_stream_to_objects[objid].end();
1224 ++iter)
1225 {
1226 this->m->obj_renumber[*iter] = this->m->next_objid++;
1227 }
1228 }
1229
1230 void
enqueueObject(QPDFObjectHandle object)1231 QPDFWriter::enqueueObject(QPDFObjectHandle object)
1232 {
1233 if (object.isIndirect())
1234 {
1235 if (object.getOwningQPDF() != &(this->m->pdf))
1236 {
1237 QTC::TC("qpdf", "QPDFWriter foreign object");
1238 throw std::logic_error(
1239 "QPDFObjectHandle from different QPDF found while writing."
1240 " Use QPDF::copyForeignObject to add objects from"
1241 " another file.");
1242 }
1243
1244 if (this->m->qdf_mode &&
1245 object.isStream() && object.getDict().getKey("/Type").isName() &&
1246 (object.getDict().getKey("/Type").getName() == "/XRef"))
1247 {
1248 // As a special case, do not output any extraneous XRef
1249 // streams in QDF mode. Doing so will confuse fix-qdf,
1250 // which expects to see only one XRef stream at the end of
1251 // the file. This case can occur when creating a QDF from
1252 // a file with object streams when preserving unreferenced
1253 // objects since the old cross reference streams are not
1254 // actually referenced by object number.
1255 QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1256 return;
1257 }
1258
1259 QPDFObjGen og = object.getObjGen();
1260
1261 if (this->m->obj_renumber.count(og) == 0)
1262 {
1263 if (this->m->object_to_object_stream.count(og))
1264 {
1265 // This is in an object stream. Don't process it
1266 // here. Instead, enqueue the object stream. Object
1267 // streams always have generation 0.
1268 int stream_id = this->m->object_to_object_stream[og];
1269 // Detect loops by storing invalid object ID 0, which
1270 // will get overwritten later.
1271 this->m->obj_renumber[og] = 0;
1272 enqueueObject(this->m->pdf.getObjectByID(stream_id, 0));
1273 }
1274 else
1275 {
1276 this->m->object_queue.push_back(object);
1277 this->m->obj_renumber[og] = this->m->next_objid++;
1278
1279 if ((og.getGen() == 0) &&
1280 this->m->object_stream_to_objects.count(og.getObj()))
1281 {
1282 // For linearized files, uncompressed objects go
1283 // at end, and we take care of assigning numbers
1284 // to them elsewhere.
1285 if (! this->m->linearized)
1286 {
1287 assignCompressedObjectNumbers(og);
1288 }
1289 }
1290 else if ((! this->m->direct_stream_lengths) &&
1291 object.isStream())
1292 {
1293 // reserve next object ID for length
1294 ++this->m->next_objid;
1295 }
1296 }
1297 }
1298 else if (this->m->obj_renumber[og] == 0)
1299 {
1300 // This can happen if a specially constructed file
1301 // indicates that an object stream is inside itself.
1302 QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream");
1303 }
1304 }
1305 else if (object.isArray())
1306 {
1307 int n = object.getArrayNItems();
1308 for (int i = 0; i < n; ++i)
1309 {
1310 if (! this->m->linearized)
1311 {
1312 enqueueObject(object.getArrayItem(i));
1313 }
1314 }
1315 }
1316 else if (object.isDictionary())
1317 {
1318 std::set<std::string> keys = object.getKeys();
1319 for (std::set<std::string>::iterator iter = keys.begin();
1320 iter != keys.end(); ++iter)
1321 {
1322 if (! this->m->linearized)
1323 {
1324 enqueueObject(object.getKey(*iter));
1325 }
1326 }
1327 }
1328 else
1329 {
1330 // ignore
1331 }
1332 }
1333
1334 void
unparseChild(QPDFObjectHandle child,int level,int flags)1335 QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1336 {
1337 if (! this->m->linearized)
1338 {
1339 enqueueObject(child);
1340 }
1341 if (child.isIndirect())
1342 {
1343 QPDFObjGen old_og = child.getObjGen();
1344 int new_id = this->m->obj_renumber[old_og];
1345 writeString(QUtil::int_to_string(new_id));
1346 writeString(" 0 R");
1347 }
1348 else
1349 {
1350 unparseObject(child, level, flags);
1351 }
1352 }
1353
1354 void
writeTrailer(trailer_e which,int size,bool xref_stream,qpdf_offset_t prev,int linearization_pass)1355 QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
1356 qpdf_offset_t prev, int linearization_pass)
1357 {
1358 QPDFObjectHandle trailer = getTrimmedTrailer();
1359 if (xref_stream)
1360 {
1361 this->m->cur_data_key.clear();
1362 }
1363 else
1364 {
1365 writeString("trailer <<");
1366 }
1367 writeStringQDF("\n");
1368 if (which == t_lin_second)
1369 {
1370 writeString(" /Size ");
1371 writeString(QUtil::int_to_string(size));
1372 }
1373 else
1374 {
1375 std::set<std::string> keys = trailer.getKeys();
1376 for (std::set<std::string>::iterator iter = keys.begin();
1377 iter != keys.end(); ++iter)
1378 {
1379 std::string const& key = *iter;
1380 writeStringQDF(" ");
1381 writeStringNoQDF(" ");
1382 writeString(QPDF_Name::normalizeName(key));
1383 writeString(" ");
1384 if (key == "/Size")
1385 {
1386 writeString(QUtil::int_to_string(size));
1387 if (which == t_lin_first)
1388 {
1389 writeString(" /Prev ");
1390 qpdf_offset_t pos = this->m->pipeline->getCount();
1391 writeString(QUtil::int_to_string(prev));
1392 int nspaces =
1393 QIntC::to_int(pos - this->m->pipeline->getCount() + 21);
1394 if (nspaces < 0)
1395 {
1396 throw std::logic_error(
1397 "QPDFWriter: no padding required in trailer");
1398 }
1399 writePad(nspaces);
1400 }
1401 }
1402 else
1403 {
1404 unparseChild(trailer.getKey(key), 1, 0);
1405 }
1406 writeStringQDF("\n");
1407 }
1408 }
1409
1410 // Write ID
1411 writeStringQDF(" ");
1412 writeString(" /ID [");
1413 if (linearization_pass == 1)
1414 {
1415 std::string original_id1 = getOriginalID1();
1416 if (original_id1.empty())
1417 {
1418 writeString("<00000000000000000000000000000000>");
1419 }
1420 else
1421 {
1422 // Write a string of zeroes equal in length to the
1423 // representation of the original ID. While writing the
1424 // original ID would have the same number of bytes, it
1425 // would cause a change to the deterministic ID generated
1426 // by older versions of the software that hard-coded the
1427 // length of the ID to 16 bytes.
1428 writeString("<");
1429 size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1430 for (size_t i = 0; i < len; ++i)
1431 {
1432 writeString("0");
1433 }
1434 writeString(">");
1435 }
1436 writeString("<00000000000000000000000000000000>");
1437 }
1438 else
1439 {
1440 if ((linearization_pass == 0) && (this->m->deterministic_id))
1441 {
1442 computeDeterministicIDData();
1443 }
1444 generateID();
1445 writeString(QPDF_String(this->m->id1).unparse(true));
1446 writeString(QPDF_String(this->m->id2).unparse(true));
1447 }
1448 writeString("]");
1449
1450 if (which != t_lin_second)
1451 {
1452 // Write reference to encryption dictionary
1453 if (this->m->encrypted)
1454 {
1455 writeString(" /Encrypt ");
1456 writeString(QUtil::int_to_string(this->m->encryption_dict_objid));
1457 writeString(" 0 R");
1458 }
1459 }
1460
1461 writeStringQDF("\n");
1462 writeStringNoQDF(" ");
1463 writeString(">>");
1464 }
1465
1466 bool
willFilterStream(QPDFObjectHandle stream,bool & compress_stream,bool & is_metadata,PointerHolder<Buffer> * stream_data)1467 QPDFWriter::willFilterStream(QPDFObjectHandle stream,
1468 bool& compress_stream, bool& is_metadata,
1469 PointerHolder<Buffer>* stream_data)
1470 {
1471 compress_stream = false;
1472 is_metadata = false;
1473
1474 QPDFObjGen old_og = stream.getObjGen();
1475 QPDFObjectHandle stream_dict = stream.getDict();
1476
1477 if (stream_dict.getKey("/Type").isName() &&
1478 (stream_dict.getKey("/Type").getName() == "/Metadata"))
1479 {
1480 is_metadata = true;
1481 }
1482 bool filter = (stream.isDataModified() ||
1483 this->m->compress_streams ||
1484 this->m->stream_decode_level);
1485 bool filter_on_write = stream.getFilterOnWrite();
1486 if (! filter_on_write)
1487 {
1488 QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1489 filter = false;
1490 }
1491 if (filter_on_write && this->m->compress_streams)
1492 {
1493 // Don't filter if the stream is already compressed with
1494 // FlateDecode. This way we don't make it worse if the
1495 // original file used a better Flate algorithm, and we
1496 // don't spend time and CPU cycles uncompressing and
1497 // recompressing stuff. This can be overridden with
1498 // setRecompressFlate(true).
1499 QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1500 if ((! this->m->recompress_flate) &&
1501 (! stream.isDataModified()) &&
1502 filter_obj.isName() &&
1503 ((filter_obj.getName() == "/FlateDecode") ||
1504 (filter_obj.getName() == "/Fl")))
1505 {
1506 QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1507 filter = false;
1508 }
1509 }
1510 bool normalize = false;
1511 bool uncompress = false;
1512 if (filter_on_write && is_metadata &&
1513 ((! this->m->encrypted) || (this->m->encrypt_metadata == false)))
1514 {
1515 QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1516 filter = true;
1517 compress_stream = false;
1518 uncompress = true;
1519 }
1520 else if (filter_on_write && this->m->normalize_content &&
1521 this->m->normalized_streams.count(old_og))
1522 {
1523 normalize = true;
1524 filter = true;
1525 }
1526 else if (filter_on_write && filter && this->m->compress_streams)
1527 {
1528 compress_stream = true;
1529 QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1530 }
1531
1532 bool filtered = false;
1533 for (int attempt = 1; attempt <= 2; ++attempt)
1534 {
1535 pushPipeline(new Pl_Buffer("stream data"));
1536 PipelinePopper pp_stream_data(this, stream_data);
1537 activatePipelineStack(pp_stream_data);
1538 filtered =
1539 stream.pipeStreamData(
1540 this->m->pipeline,
1541 (((filter && normalize) ? qpdf_ef_normalize : 0) |
1542 ((filter && compress_stream) ? qpdf_ef_compress : 0)),
1543 (filter
1544 ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level)
1545 : qpdf_dl_none), false, (attempt == 1));
1546 if (filter && (! filtered))
1547 {
1548 // Try again
1549 filter = false;
1550 }
1551 else
1552 {
1553 break;
1554 }
1555 }
1556 if (! filtered)
1557 {
1558 compress_stream = false;
1559 }
1560 return filtered;
1561 }
1562
1563 void
unparseObject(QPDFObjectHandle object,int level,int flags,size_t stream_length,bool compress)1564 QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1565 int flags, size_t stream_length,
1566 bool compress)
1567 {
1568 QPDFObjGen old_og = object.getObjGen();
1569 int child_flags = flags & ~f_stream;
1570
1571 std::string indent;
1572 for (int i = 0; i < level; ++i)
1573 {
1574 indent += " ";
1575 }
1576
1577 if (object.isArray())
1578 {
1579 // Note: PDF spec 1.4 implementation note 121 states that
1580 // Acrobat requires a space after the [ in the /H key of the
1581 // linearization parameter dictionary. We'll do this
1582 // unconditionally for all arrays because it looks nicer and
1583 // doesn't make the files that much bigger.
1584 writeString("[");
1585 writeStringQDF("\n");
1586 int n = object.getArrayNItems();
1587 for (int i = 0; i < n; ++i)
1588 {
1589 writeStringQDF(indent);
1590 writeStringQDF(" ");
1591 writeStringNoQDF(" ");
1592 unparseChild(object.getArrayItem(i), level + 1, child_flags);
1593 writeStringQDF("\n");
1594 }
1595 writeStringQDF(indent);
1596 writeStringNoQDF(" ");
1597 writeString("]");
1598 }
1599 else if (object.isDictionary())
1600 {
1601 // Make a shallow copy of this object so we can modify it
1602 // safely without affecting the original. This code has logic
1603 // to skip certain keys in agreement with prepareFileForWrite
1604 // and with skip_stream_parameters so that replacing them
1605 // doesn't leave unreferenced objects in the output. We can
1606 // use unsafeShallowCopy here because we are all we are doing
1607 // is removing or replacing top-level keys.
1608 object = object.unsafeShallowCopy();
1609
1610 // Handle special cases for specific dictionaries.
1611
1612 // Extensions dictionaries.
1613
1614 // We have one of several cases:
1615 //
1616 // * We need ADBE
1617 // - We already have Extensions
1618 // - If it has the right ADBE, preserve it
1619 // - Otherwise, replace ADBE
1620 // - We don't have Extensions: create one from scratch
1621 // * We don't want ADBE
1622 // - We already have Extensions
1623 // - If it only has ADBE, remove it
1624 // - If it has other things, keep those and remove ADBE
1625 // - We have no extensions: no action required
1626 //
1627 // Before writing, we guarantee that /Extensions, if present,
1628 // is direct through the ADBE dictionary, so we can modify in
1629 // place.
1630
1631 bool is_root = false;
1632 bool have_extensions_other = false;
1633 bool have_extensions_adbe = false;
1634
1635 QPDFObjectHandle extensions;
1636 if ((old_og.getObj() != 0) &&
1637 (old_og == this->m->pdf.getRoot().getObjGen()))
1638 {
1639 is_root = true;
1640 if (object.hasKey("/Extensions") &&
1641 object.getKey("/Extensions").isDictionary())
1642 {
1643 extensions = object.getKey("/Extensions");
1644 }
1645 }
1646
1647 if (extensions.isInitialized())
1648 {
1649 std::set<std::string> keys = extensions.getKeys();
1650 if (keys.count("/ADBE") > 0)
1651 {
1652 have_extensions_adbe = true;
1653 keys.erase("/ADBE");
1654 }
1655 if (keys.size() > 0)
1656 {
1657 have_extensions_other = true;
1658 }
1659 }
1660
1661 bool need_extensions_adbe = (this->m->final_extension_level > 0);
1662
1663 if (is_root)
1664 {
1665 if (need_extensions_adbe)
1666 {
1667 if (! (have_extensions_other || have_extensions_adbe))
1668 {
1669 // We need Extensions and don't have it. Create
1670 // it here.
1671 QTC::TC("qpdf", "QPDFWriter create Extensions",
1672 this->m->qdf_mode ? 0 : 1);
1673 extensions = QPDFObjectHandle::newDictionary();
1674 object.replaceKey("/Extensions", extensions);
1675 }
1676 }
1677 else if (! have_extensions_other)
1678 {
1679 // We have Extensions dictionary and don't want one.
1680 if (have_extensions_adbe)
1681 {
1682 QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1683 object.removeKey("/Extensions");
1684 extensions = QPDFObjectHandle(); // uninitialized
1685 }
1686 }
1687 }
1688
1689 if (extensions.isInitialized())
1690 {
1691 QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1692 QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1693 if (adbe.isDictionary() &&
1694 adbe.hasKey("/BaseVersion") &&
1695 adbe.getKey("/BaseVersion").isName() &&
1696 (adbe.getKey("/BaseVersion").getName() ==
1697 "/" + this->m->final_pdf_version) &&
1698 adbe.hasKey("/ExtensionLevel") &&
1699 adbe.getKey("/ExtensionLevel").isInteger() &&
1700 (adbe.getKey("/ExtensionLevel").getIntValue() ==
1701 this->m->final_extension_level))
1702 {
1703 QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1704 }
1705 else
1706 {
1707 if (need_extensions_adbe)
1708 {
1709 extensions.replaceKey(
1710 "/ADBE",
1711 QPDFObjectHandle::parse(
1712 "<< /BaseVersion /" + this->m->final_pdf_version +
1713 " /ExtensionLevel " +
1714 QUtil::int_to_string(
1715 this->m->final_extension_level) +
1716 " >>"));
1717 }
1718 else
1719 {
1720 QTC::TC("qpdf", "QPDFWriter remove ADBE");
1721 extensions.removeKey("/ADBE");
1722 }
1723 }
1724 }
1725
1726 // Stream dictionaries.
1727
1728 if (flags & f_stream)
1729 {
1730 // Suppress /Length since we will write it manually
1731 object.removeKey("/Length");
1732
1733 // If /DecodeParms is an empty list, remove it.
1734 if (object.getKey("/DecodeParms").isArray() &&
1735 (0 == object.getKey("/DecodeParms").getArrayNItems()))
1736 {
1737 QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms");
1738 object.removeKey("/DecodeParms");
1739 }
1740
1741 if (flags & f_filtered)
1742 {
1743 // We will supply our own filter and decode
1744 // parameters.
1745 object.removeKey("/Filter");
1746 object.removeKey("/DecodeParms");
1747 }
1748 else
1749 {
1750 // Make sure, no matter what else we have, that we
1751 // don't have /Crypt in the output filters.
1752 QPDFObjectHandle filter = object.getKey("/Filter");
1753 QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1754 if (filter.isOrHasName("/Crypt"))
1755 {
1756 if (filter.isName())
1757 {
1758 object.removeKey("/Filter");
1759 object.removeKey("/DecodeParms");
1760 }
1761 else
1762 {
1763 int idx = -1;
1764 for (int i = 0; i < filter.getArrayNItems(); ++i)
1765 {
1766 QPDFObjectHandle item = filter.getArrayItem(i);
1767 if (item.isName() && item.getName() == "/Crypt")
1768 {
1769 idx = i;
1770 break;
1771 }
1772 }
1773 if (idx >= 0)
1774 {
1775 // If filter is an array, then the code in
1776 // QPDF_Stream has already verified that
1777 // DecodeParms and Filters are arrays of
1778 // the same length, but if they weren't
1779 // for some reason, eraseItem does type
1780 // and bounds checking.
1781 QTC::TC("qpdf", "QPDFWriter remove Crypt");
1782 filter.eraseItem(idx);
1783 decode_parms.eraseItem(idx);
1784 }
1785 }
1786 }
1787 }
1788 }
1789
1790 writeString("<<");
1791 writeStringQDF("\n");
1792
1793 std::set<std::string> keys = object.getKeys();
1794 for (std::set<std::string>::iterator iter = keys.begin();
1795 iter != keys.end(); ++iter)
1796 {
1797 std::string const& key = *iter;
1798
1799 writeStringQDF(indent);
1800 writeStringQDF(" ");
1801 writeStringNoQDF(" ");
1802 writeString(QPDF_Name::normalizeName(key));
1803 writeString(" ");
1804 if (key == "/Contents" &&
1805 object.hasKey("/Type") &&
1806 object.getKey("/Type").isName() &&
1807 object.getKey("/Type").getName() == "/Sig" &&
1808 object.hasKey("/ByteRange"))
1809 {
1810 QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1811 unparseChild(object.getKey(key), level + 1,
1812 child_flags | f_hex_string | f_no_encryption);
1813 }
1814 else
1815 {
1816 unparseChild(object.getKey(key), level + 1, child_flags);
1817 }
1818 writeStringQDF("\n");
1819 }
1820
1821 if (flags & f_stream)
1822 {
1823 writeStringQDF(indent);
1824 writeStringQDF(" ");
1825 writeString(" /Length ");
1826
1827 if (this->m->direct_stream_lengths)
1828 {
1829 writeString(QUtil::uint_to_string(stream_length));
1830 }
1831 else
1832 {
1833 writeString(
1834 QUtil::int_to_string(this->m->cur_stream_length_id));
1835 writeString(" 0 R");
1836 }
1837 writeStringQDF("\n");
1838 if (compress && (flags & f_filtered))
1839 {
1840 writeStringQDF(indent);
1841 writeStringQDF(" ");
1842 writeString(" /Filter /FlateDecode");
1843 writeStringQDF("\n");
1844 }
1845 }
1846
1847 writeStringQDF(indent);
1848 writeStringNoQDF(" ");
1849 writeString(">>");
1850 }
1851 else if (object.isStream())
1852 {
1853 // Write stream data to a buffer.
1854 int new_id = this->m->obj_renumber[old_og];
1855 if (! this->m->direct_stream_lengths)
1856 {
1857 this->m->cur_stream_length_id = new_id + 1;
1858 }
1859
1860 flags |= f_stream;
1861 bool compress_stream = false;
1862 bool is_metadata = false;
1863 PointerHolder<Buffer> stream_data;
1864 if (willFilterStream(object, compress_stream,
1865 is_metadata, &stream_data))
1866 {
1867 flags |= f_filtered;
1868 }
1869 QPDFObjectHandle stream_dict = object.getDict();
1870
1871 this->m->cur_stream_length = stream_data->getSize();
1872 if (is_metadata && this->m->encrypted && (! this->m->encrypt_metadata))
1873 {
1874 // Don't encrypt stream data for the metadata stream
1875 this->m->cur_data_key.clear();
1876 }
1877 adjustAESStreamLength(this->m->cur_stream_length);
1878 unparseObject(stream_dict, 0, flags,
1879 this->m->cur_stream_length, compress_stream);
1880 unsigned char last_char = '\0';
1881 writeString("\nstream\n");
1882 {
1883 PipelinePopper pp_enc(this);
1884 pushEncryptionFilter(pp_enc);
1885 writeBuffer(stream_data);
1886 last_char = this->m->pipeline->getLastChar();
1887 }
1888
1889 if (this->m->newline_before_endstream ||
1890 (this->m->qdf_mode && (last_char != '\n')))
1891 {
1892 writeString("\n");
1893 this->m->added_newline = true;
1894 }
1895 else
1896 {
1897 this->m->added_newline = false;
1898 }
1899 writeString("endstream");
1900 }
1901 else if (object.isString())
1902 {
1903 std::string val;
1904 if (this->m->encrypted &&
1905 (! (flags & f_in_ostream)) &&
1906 (! (flags & f_no_encryption)) &&
1907 (! this->m->cur_data_key.empty()))
1908 {
1909 val = object.getStringValue();
1910 if (this->m->encrypt_use_aes)
1911 {
1912 Pl_Buffer bufpl("encrypted string");
1913 Pl_AES_PDF pl(
1914 "aes encrypt string", &bufpl, true,
1915 QUtil::unsigned_char_pointer(this->m->cur_data_key),
1916 this->m->cur_data_key.length());
1917 pl.write(QUtil::unsigned_char_pointer(val), val.length());
1918 pl.finish();
1919 PointerHolder<Buffer> buf = bufpl.getBuffer();
1920 val = QPDF_String(
1921 std::string(reinterpret_cast<char*>(buf->getBuffer()),
1922 buf->getSize())).unparse(true);
1923 }
1924 else
1925 {
1926 PointerHolder<char> tmp_ph =
1927 PointerHolder<char>(true, QUtil::copy_string(val));
1928 char* tmp = tmp_ph.getPointer();
1929 size_t vlen = val.length();
1930 RC4 rc4(QUtil::unsigned_char_pointer(this->m->cur_data_key),
1931 QIntC::to_int(this->m->cur_data_key.length()));
1932 rc4.process(QUtil::unsigned_char_pointer(tmp), vlen);
1933 val = QPDF_String(std::string(tmp, vlen)).unparse();
1934 }
1935 }
1936 else if (flags & f_hex_string)
1937 {
1938 val = QPDF_String(object.getStringValue()).unparse(true);
1939 }
1940 else
1941 {
1942 val = object.unparseResolved();
1943 }
1944 writeString(val);
1945 }
1946 else
1947 {
1948 writeString(object.unparseResolved());
1949 }
1950 }
1951
1952 void
writeObjectStreamOffsets(std::vector<qpdf_offset_t> & offsets,int first_obj)1953 QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets,
1954 int first_obj)
1955 {
1956 for (size_t i = 0; i < offsets.size(); ++i)
1957 {
1958 if (i != 0)
1959 {
1960 writeStringQDF("\n");
1961 writeStringNoQDF(" ");
1962 }
1963 writeString(QUtil::uint_to_string(i + QIntC::to_size(first_obj)));
1964 writeString(" ");
1965 writeString(QUtil::int_to_string(offsets.at(i)));
1966 }
1967 writeString("\n");
1968 }
1969
1970 void
writeObjectStream(QPDFObjectHandle object)1971 QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1972 {
1973 // Note: object might be null if this is a place-holder for an
1974 // object stream that we are generating from scratch.
1975
1976 QPDFObjGen old_og = object.getObjGen();
1977 assert(old_og.getGen() == 0);
1978 int old_id = old_og.getObj();
1979 int new_id = this->m->obj_renumber[old_og];
1980
1981 std::vector<qpdf_offset_t> offsets;
1982 qpdf_offset_t first = 0;
1983
1984 // Generate stream itself. We have to do this in two passes so we
1985 // can calculate offsets in the first pass.
1986 PointerHolder<Buffer> stream_buffer;
1987 int first_obj = -1;
1988 bool compressed = false;
1989 for (int pass = 1; pass <= 2; ++pass)
1990 {
1991 // stream_buffer will be initialized only for pass 2
1992 PipelinePopper pp_ostream(this, &stream_buffer);
1993 if (pass == 1)
1994 {
1995 pushDiscardFilter(pp_ostream);
1996 }
1997 else
1998 {
1999 // Adjust offsets to skip over comment before first object
2000
2001 first = offsets.at(0);
2002 for (std::vector<qpdf_offset_t>::iterator iter = offsets.begin();
2003 iter != offsets.end(); ++iter)
2004 {
2005 *iter -= first;
2006 }
2007
2008 // Take one pass at writing pairs of numbers so we can get
2009 // their size information
2010 {
2011 PipelinePopper pp_discard(this);
2012 pushDiscardFilter(pp_discard);
2013 writeObjectStreamOffsets(offsets, first_obj);
2014 first += this->m->pipeline->getCount();
2015 }
2016
2017 // Set up a stream to write the stream data into a buffer.
2018 Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
2019 if ((this->m->compress_streams ||
2020 (this->m->stream_decode_level == qpdf_dl_none)) &&
2021 (! this->m->qdf_mode))
2022 {
2023 compressed = true;
2024 next = pushPipeline(
2025 new Pl_Flate("compress object stream", next,
2026 Pl_Flate::a_deflate));
2027 }
2028 activatePipelineStack(pp_ostream);
2029 writeObjectStreamOffsets(offsets, first_obj);
2030 }
2031
2032 int count = 0;
2033 for (std::set<QPDFObjGen>::iterator iter =
2034 this->m->object_stream_to_objects[old_id].begin();
2035 iter != this->m->object_stream_to_objects[old_id].end();
2036 ++iter, ++count)
2037 {
2038 QPDFObjGen obj = *iter;
2039 int new_obj = this->m->obj_renumber[obj];
2040 if (first_obj == -1)
2041 {
2042 first_obj = new_obj;
2043 }
2044 if (this->m->qdf_mode)
2045 {
2046 writeString("%% Object stream: object " +
2047 QUtil::int_to_string(new_obj) + ", index " +
2048 QUtil::int_to_string(count));
2049 if (! this->m->suppress_original_object_ids)
2050 {
2051 writeString("; original object ID: " +
2052 QUtil::int_to_string(obj.getObj()));
2053 // For compatibility, only write the generation if
2054 // non-zero. While object streams only allow
2055 // objects with generation 0, if we are generating
2056 // object streams, the old object could have a
2057 // non-zero generation.
2058 if (obj.getGen() != 0)
2059 {
2060 QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
2061 writeString(" " + QUtil::int_to_string(obj.getGen()));
2062 }
2063 }
2064 writeString("\n");
2065 }
2066 if (pass == 1)
2067 {
2068 offsets.push_back(this->m->pipeline->getCount());
2069 // To avoid double-counting objects being written in
2070 // object streams for progress reporting, decrement in
2071 // pass 1.
2072 indicateProgress(true, false);
2073 }
2074 QPDFObjectHandle obj_to_write =
2075 this->m->pdf.getObjectByObjGen(obj);
2076 if (obj_to_write.isStream())
2077 {
2078 // This condition occurred in a fuzz input. Ideally we
2079 // should block it at at parse time, but it's not
2080 // clear to me how to construct a case for this.
2081 QTC::TC("qpdf", "QPDFWriter stream in ostream");
2082 obj_to_write.warnIfPossible(
2083 "stream found inside object stream; treating as null");
2084 obj_to_write = QPDFObjectHandle::newNull();
2085 }
2086 writeObject(obj_to_write, count);
2087
2088 this->m->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
2089 }
2090 }
2091
2092 // Write the object
2093 openObject(new_id);
2094 setDataKey(new_id);
2095 writeString("<<");
2096 writeStringQDF("\n ");
2097 writeString(" /Type /ObjStm");
2098 writeStringQDF("\n ");
2099 size_t length = stream_buffer->getSize();
2100 adjustAESStreamLength(length);
2101 writeString(" /Length " + QUtil::uint_to_string(length));
2102 writeStringQDF("\n ");
2103 if (compressed)
2104 {
2105 writeString(" /Filter /FlateDecode");
2106 }
2107 writeString(" /N " + QUtil::uint_to_string(offsets.size()));
2108 writeStringQDF("\n ");
2109 writeString(" /First " + QUtil::int_to_string(first));
2110 if (! object.isNull())
2111 {
2112 // If the original object has an /Extends key, preserve it.
2113 QPDFObjectHandle dict = object.getDict();
2114 QPDFObjectHandle extends = dict.getKey("/Extends");
2115 if (extends.isIndirect())
2116 {
2117 QTC::TC("qpdf", "QPDFWriter copy Extends");
2118 writeStringQDF("\n ");
2119 writeString(" /Extends ");
2120 unparseChild(extends, 1, f_in_ostream);
2121 }
2122 }
2123 writeStringQDF("\n");
2124 writeStringNoQDF(" ");
2125 writeString(">>\nstream\n");
2126 if (this->m->encrypted)
2127 {
2128 QTC::TC("qpdf", "QPDFWriter encrypt object stream");
2129 }
2130 {
2131 PipelinePopper pp_enc(this);
2132 pushEncryptionFilter(pp_enc);
2133 writeBuffer(stream_buffer);
2134 }
2135 if (this->m->newline_before_endstream)
2136 {
2137 writeString("\n");
2138 }
2139 writeString("endstream");
2140 this->m->cur_data_key.clear();
2141 closeObject(new_id);
2142 }
2143
2144 void
writeObject(QPDFObjectHandle object,int object_stream_index)2145 QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
2146 {
2147 QPDFObjGen old_og = object.getObjGen();
2148
2149 if ((object_stream_index == -1) &&
2150 (old_og.getGen() == 0) &&
2151 (this->m->object_stream_to_objects.count(old_og.getObj())))
2152 {
2153 writeObjectStream(object);
2154 return;
2155 }
2156
2157 indicateProgress(false, false);
2158 int new_id = this->m->obj_renumber[old_og];
2159 if (this->m->qdf_mode)
2160 {
2161 if (this->m->page_object_to_seq.count(old_og))
2162 {
2163 writeString("%% Page ");
2164 writeString(
2165 QUtil::int_to_string(
2166 this->m->page_object_to_seq[old_og]));
2167 writeString("\n");
2168 }
2169 if (this->m->contents_to_page_seq.count(old_og))
2170 {
2171 writeString("%% Contents for page ");
2172 writeString(
2173 QUtil::int_to_string(
2174 this->m->contents_to_page_seq[old_og]));
2175 writeString("\n");
2176 }
2177 }
2178 if (object_stream_index == -1)
2179 {
2180 if (this->m->qdf_mode && (! this->m->suppress_original_object_ids))
2181 {
2182 writeString("%% Original object ID: " +
2183 QUtil::int_to_string(object.getObjectID()) + " " +
2184 QUtil::int_to_string(object.getGeneration()) + "\n");
2185 }
2186 openObject(new_id);
2187 setDataKey(new_id);
2188 unparseObject(object, 0, 0);
2189 this->m->cur_data_key.clear();
2190 closeObject(new_id);
2191 }
2192 else
2193 {
2194 unparseObject(object, 0, f_in_ostream);
2195 writeString("\n");
2196 }
2197
2198 if ((! this->m->direct_stream_lengths) && object.isStream())
2199 {
2200 if (this->m->qdf_mode)
2201 {
2202 if (this->m->added_newline)
2203 {
2204 writeString("%QDF: ignore_newline\n");
2205 }
2206 }
2207 openObject(new_id + 1);
2208 writeString(QUtil::uint_to_string(this->m->cur_stream_length));
2209 closeObject(new_id + 1);
2210 }
2211 }
2212
2213 std::string
getOriginalID1()2214 QPDFWriter::getOriginalID1()
2215 {
2216 QPDFObjectHandle trailer = this->m->pdf.getTrailer();
2217 if (trailer.hasKey("/ID"))
2218 {
2219 return trailer.getKey("/ID").getArrayItem(0).getStringValue();
2220 }
2221 else
2222 {
2223 return "";
2224 }
2225 }
2226
2227 void
generateID()2228 QPDFWriter::generateID()
2229 {
2230 // Generate the ID lazily so that we can handle the user's
2231 // preference to use static or deterministic ID generation.
2232
2233 if (! this->m->id2.empty())
2234 {
2235 return;
2236 }
2237
2238 QPDFObjectHandle trailer = this->m->pdf.getTrailer();
2239
2240 std::string result;
2241
2242 if (this->m->static_id)
2243 {
2244 // For test suite use only...
2245 static unsigned char tmp[] = {0x31, 0x41, 0x59, 0x26,
2246 0x53, 0x58, 0x97, 0x93,
2247 0x23, 0x84, 0x62, 0x64,
2248 0x33, 0x83, 0x27, 0x95,
2249 0x00};
2250 result = reinterpret_cast<char*>(tmp);
2251 }
2252 else
2253 {
2254 // The PDF specification has guidelines for creating IDs, but
2255 // it states clearly that the only thing that's really
2256 // important is that it is very likely to be unique. We can't
2257 // really follow the guidelines in the spec exactly because we
2258 // haven't written the file yet. This scheme should be fine
2259 // though. The deterministic ID case uses a digest of a
2260 // sufficient portion of the file's contents such no two
2261 // non-matching files would match in the subsets used for this
2262 // computation. Note that we explicitly omit the filename from
2263 // the digest calculation for deterministic ID so that the same
2264 // file converted with qpdf, in that case, would have the same
2265 // ID regardless of the output file's name.
2266
2267 std::string seed;
2268 if (this->m->deterministic_id)
2269 {
2270 if (this->m->deterministic_id_data.empty())
2271 {
2272 QTC::TC("qpdf", "QPDFWriter deterministic with no data");
2273 throw std::logic_error(
2274 "INTERNAL ERROR: QPDFWriter::generateID has no"
2275 " data for deterministic ID. This may happen if"
2276 " deterministic ID and file encryption are requested"
2277 " together.");
2278 }
2279 seed += this->m->deterministic_id_data;
2280 }
2281 else
2282 {
2283 seed += QUtil::int_to_string(QUtil::get_current_time());
2284 seed += this->m->filename;
2285 seed += " ";
2286 }
2287 seed += " QPDF ";
2288 if (trailer.hasKey("/Info"))
2289 {
2290 QPDFObjectHandle info = trailer.getKey("/Info");
2291 std::set<std::string> keys = info.getKeys();
2292 for (std::set<std::string>::iterator iter = keys.begin();
2293 iter != keys.end(); ++iter)
2294 {
2295 QPDFObjectHandle obj = info.getKey(*iter);
2296 if (obj.isString())
2297 {
2298 seed += " ";
2299 seed += obj.getStringValue();
2300 }
2301 }
2302 }
2303
2304 MD5 m;
2305 m.encodeString(seed.c_str());
2306 MD5::Digest digest;
2307 m.digest(digest);
2308 result = std::string(reinterpret_cast<char*>(digest),
2309 sizeof(MD5::Digest));
2310 }
2311
2312 // If /ID already exists, follow the spec: use the original first
2313 // word and generate a new second word. Otherwise, we'll use the
2314 // generated ID for both.
2315
2316 this->m->id2 = result;
2317 // Note: keep /ID from old file even if --static-id was given.
2318 this->m->id1 = getOriginalID1();
2319 if (this->m->id1.empty())
2320 {
2321 this->m->id1 = this->m->id2;
2322 }
2323 }
2324
2325 void
initializeSpecialStreams()2326 QPDFWriter::initializeSpecialStreams()
2327 {
2328 // Mark all page content streams in case we are filtering or
2329 // normalizing.
2330 std::vector<QPDFObjectHandle> pages = this->m->pdf.getAllPages();
2331 int num = 0;
2332 for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
2333 iter != pages.end(); ++iter)
2334 {
2335 QPDFObjectHandle& page = *iter;
2336 this->m->page_object_to_seq[page.getObjGen()] = ++num;
2337 QPDFObjectHandle contents = page.getKey("/Contents");
2338 std::vector<QPDFObjGen> contents_objects;
2339 if (contents.isArray())
2340 {
2341 int n = contents.getArrayNItems();
2342 for (int i = 0; i < n; ++i)
2343 {
2344 contents_objects.push_back(
2345 contents.getArrayItem(i).getObjGen());
2346 }
2347 }
2348 else if (contents.isStream())
2349 {
2350 contents_objects.push_back(contents.getObjGen());
2351 }
2352
2353 for (auto const& c: contents_objects)
2354 {
2355 this->m->contents_to_page_seq[c] = num;
2356 this->m->normalized_streams.insert(c);
2357 }
2358 }
2359 }
2360
2361 void
preserveObjectStreams()2362 QPDFWriter::preserveObjectStreams()
2363 {
2364 std::map<int, int> omap;
2365 QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
2366 if (omap.empty())
2367 {
2368 return;
2369 }
2370 // Our object_to_object_stream map has to map ObjGen -> ObjGen
2371 // since we may be generating object streams out of old objects
2372 // that have generation numbers greater than zero. However in an
2373 // existing PDF, all object stream objects and all objects in them
2374 // must have generation 0 because the PDF spec does not provide
2375 // any way to do otherwise. This code filters out objects that are
2376 // not allowed to be in object streams. In addition to removing
2377 // objects that were erroneously included in object streams in the
2378 // source PDF, it also prevents unreferenced objects from being
2379 // included.
2380 std::set<QPDFObjGen> eligible;
2381 if (! this->m->preserve_unreferenced_objects)
2382 {
2383 std::vector<QPDFObjGen> eligible_v =
2384 QPDF::Writer::getCompressibleObjGens(this->m->pdf);
2385 eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());
2386 }
2387 QTC::TC("qpdf", "QPDFWriter preserve object streams",
2388 this->m->preserve_unreferenced_objects ? 0 : 1);
2389 for (auto iter: omap)
2390 {
2391 QPDFObjGen og(iter.first, 0);
2392 if (eligible.count(og) || this->m->preserve_unreferenced_objects)
2393 {
2394 this->m->object_to_object_stream[og] = iter.second;
2395 }
2396 else
2397 {
2398 QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2399 }
2400 }
2401 }
2402
2403 void
generateObjectStreams()2404 QPDFWriter::generateObjectStreams()
2405 {
2406 // Basic strategy: make a list of objects that can go into an
2407 // object stream. Then figure out how many object streams are
2408 // needed so that we can distribute objects approximately evenly
2409 // without having any object stream exceed 100 members. We don't
2410 // have to worry about linearized files here -- if the file is
2411 // linearized, we take care of excluding things that aren't
2412 // allowed here later.
2413
2414 // This code doesn't do anything with /Extends.
2415
2416 std::vector<QPDFObjGen> eligible =
2417 QPDF::Writer::getCompressibleObjGens(this->m->pdf);
2418 size_t n_object_streams = (eligible.size() + 99U) / 100U;
2419 if (n_object_streams == 0)
2420 {
2421 return;
2422 }
2423 size_t n_per = eligible.size() / n_object_streams;
2424 if (n_per * n_object_streams < eligible.size())
2425 {
2426 ++n_per;
2427 }
2428 unsigned int n = 0;
2429 int cur_ostream = 0;
2430 for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin();
2431 iter != eligible.end(); ++iter)
2432 {
2433 if ((n % n_per) == 0)
2434 {
2435 if (n > 0)
2436 {
2437 QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2438 }
2439 n = 0;
2440 }
2441 if (n == 0)
2442 {
2443 // Construct a new null object as the "original" object
2444 // stream. The rest of the code knows that this means
2445 // we're creating the object stream from scratch.
2446 cur_ostream = this->m->pdf.makeIndirectObject(
2447 QPDFObjectHandle::newNull()).getObjectID();
2448 }
2449 this->m->object_to_object_stream[*iter] = cur_ostream;
2450 ++n;
2451 }
2452 }
2453
2454 QPDFObjectHandle
getTrimmedTrailer()2455 QPDFWriter::getTrimmedTrailer()
2456 {
2457 // Remove keys from the trailer that necessarily have to be
2458 // replaced when writing the file.
2459
2460 QPDFObjectHandle trailer = this->m->pdf.getTrailer().unsafeShallowCopy();
2461
2462 // Remove encryption keys
2463 trailer.removeKey("/ID");
2464 trailer.removeKey("/Encrypt");
2465
2466 // Remove modification information
2467 trailer.removeKey("/Prev");
2468
2469 // Remove all trailer keys that potentially come from a
2470 // cross-reference stream
2471 trailer.removeKey("/Index");
2472 trailer.removeKey("/W");
2473 trailer.removeKey("/Length");
2474 trailer.removeKey("/Filter");
2475 trailer.removeKey("/DecodeParms");
2476 trailer.removeKey("/Type");
2477 trailer.removeKey("/XRefStm");
2478
2479 return trailer;
2480 }
2481
2482 void
prepareFileForWrite()2483 QPDFWriter::prepareFileForWrite()
2484 {
2485 // Make document extension level information direct as required by
2486 // the spec.
2487
2488 this->m->pdf.fixDanglingReferences(true);
2489 QPDFObjectHandle root = this->m->pdf.getRoot();
2490 for (auto const& key: root.getKeys())
2491 {
2492 QPDFObjectHandle oh = root.getKey(key);
2493 if ((key == "/Extensions") && (oh.isDictionary()))
2494 {
2495 bool extensions_indirect = false;
2496 if (oh.isIndirect())
2497 {
2498 QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2499 extensions_indirect = true;
2500 oh = oh.shallowCopy();
2501 root.replaceKey(key, oh);
2502 }
2503 if (oh.hasKey("/ADBE"))
2504 {
2505 QPDFObjectHandle adbe = oh.getKey("/ADBE");
2506 if (adbe.isIndirect())
2507 {
2508 QTC::TC("qpdf", "QPDFWriter make ADBE direct",
2509 extensions_indirect ? 0 : 1);
2510 adbe.makeDirect();
2511 oh.replaceKey("/ADBE", adbe);
2512 }
2513 }
2514 }
2515 }
2516 }
2517
2518 void
doWriteSetup()2519 QPDFWriter::doWriteSetup()
2520 {
2521 if (this->m->did_write_setup)
2522 {
2523 return;
2524 }
2525 this->m->did_write_setup = true;
2526
2527 // Do preliminary setup
2528
2529 if (this->m->linearized)
2530 {
2531 this->m->qdf_mode = false;
2532 }
2533
2534 if (this->m->pclm)
2535 {
2536 this->m->stream_decode_level = qpdf_dl_none;
2537 this->m->compress_streams = false;
2538 this->m->encrypted = false;
2539 }
2540
2541 if (this->m->qdf_mode)
2542 {
2543 if (! this->m->normalize_content_set)
2544 {
2545 this->m->normalize_content = true;
2546 }
2547 if (! this->m->compress_streams_set)
2548 {
2549 this->m->compress_streams = false;
2550 }
2551 if (! this->m->stream_decode_level_set)
2552 {
2553 this->m->stream_decode_level = qpdf_dl_generalized;
2554 }
2555 }
2556
2557 if (this->m->encrypted)
2558 {
2559 // Encryption has been explicitly set
2560 this->m->preserve_encryption = false;
2561 }
2562 else if (this->m->normalize_content ||
2563 this->m->stream_decode_level ||
2564 this->m->pclm ||
2565 this->m->qdf_mode)
2566 {
2567 // Encryption makes looking at contents pretty useless. If
2568 // the user explicitly encrypted though, we still obey that.
2569 this->m->preserve_encryption = false;
2570 }
2571
2572 if (this->m->preserve_encryption)
2573 {
2574 copyEncryptionParameters(this->m->pdf);
2575 }
2576
2577 if (! this->m->forced_pdf_version.empty())
2578 {
2579 int major = 0;
2580 int minor = 0;
2581 parseVersion(this->m->forced_pdf_version, major, minor);
2582 disableIncompatibleEncryption(major, minor,
2583 this->m->forced_extension_level);
2584 if (compareVersions(major, minor, 1, 5) < 0)
2585 {
2586 QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2587 this->m->object_stream_mode = qpdf_o_disable;
2588 }
2589 }
2590
2591 if (this->m->qdf_mode || this->m->normalize_content ||
2592 this->m->stream_decode_level)
2593 {
2594 initializeSpecialStreams();
2595 }
2596
2597 if (this->m->qdf_mode)
2598 {
2599 // Generate indirect stream lengths for qdf mode since fix-qdf
2600 // uses them for storing recomputed stream length data.
2601 // Certain streams such as object streams, xref streams, and
2602 // hint streams always get direct stream lengths.
2603 this->m->direct_stream_lengths = false;
2604 }
2605
2606 switch (this->m->object_stream_mode)
2607 {
2608 case qpdf_o_disable:
2609 // no action required
2610 break;
2611
2612 case qpdf_o_preserve:
2613 preserveObjectStreams();
2614 break;
2615
2616 case qpdf_o_generate:
2617 generateObjectStreams();
2618 break;
2619
2620 // no default so gcc will warn for missing case tag
2621 }
2622
2623 if (this->m->linearized)
2624 {
2625 // Page dictionaries are not allowed to be compressed objects.
2626 std::vector<QPDFObjectHandle> pages = this->m->pdf.getAllPages();
2627 for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
2628 iter != pages.end(); ++iter)
2629 {
2630 QPDFObjectHandle& page = *iter;
2631 QPDFObjGen og = page.getObjGen();
2632 if (this->m->object_to_object_stream.count(og))
2633 {
2634 QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2635 this->m->object_to_object_stream.erase(og);
2636 }
2637 }
2638 }
2639
2640 if (this->m->linearized || this->m->encrypted)
2641 {
2642 // The document catalog is not allowed to be compressed in
2643 // linearized files either. It also appears that Adobe Reader
2644 // 8.0.0 has a bug that prevents it from being able to handle
2645 // encrypted files with compressed document catalogs, so we
2646 // disable them in that case as well.
2647 QPDFObjGen og = this->m->pdf.getRoot().getObjGen();
2648 if (this->m->object_to_object_stream.count(og))
2649 {
2650 QTC::TC("qpdf", "QPDFWriter uncompressing root");
2651 this->m->object_to_object_stream.erase(og);
2652 }
2653 }
2654
2655 // Generate reverse mapping from object stream to objects
2656 for (std::map<QPDFObjGen, int>::iterator iter =
2657 this->m->object_to_object_stream.begin();
2658 iter != this->m->object_to_object_stream.end(); ++iter)
2659 {
2660 QPDFObjGen obj = (*iter).first;
2661 int stream = (*iter).second;
2662 this->m->object_stream_to_objects[stream].insert(obj);
2663 this->m->max_ostream_index =
2664 std::max(this->m->max_ostream_index,
2665 QIntC::to_int(
2666 this->m->object_stream_to_objects[stream].size()) - 1);
2667 }
2668
2669 if (! this->m->object_stream_to_objects.empty())
2670 {
2671 setMinimumPDFVersion("1.5");
2672 }
2673
2674 setMinimumPDFVersion(this->m->pdf.getPDFVersion(),
2675 this->m->pdf.getExtensionLevel());
2676 this->m->final_pdf_version = this->m->min_pdf_version;
2677 this->m->final_extension_level = this->m->min_extension_level;
2678 if (! this->m->forced_pdf_version.empty())
2679 {
2680 QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2681 this->m->final_pdf_version = this->m->forced_pdf_version;
2682 this->m->final_extension_level = this->m->forced_extension_level;
2683 }
2684 }
2685
2686 void
write()2687 QPDFWriter::write()
2688 {
2689 doWriteSetup();
2690
2691 // Set up progress reporting. For linearized files, we write two
2692 // passes. events_expected is an approximation, but it's good
2693 // enough for progress reporting, which is mostly a guess anyway.
2694 this->m->events_expected = QIntC::to_int(
2695 this->m->pdf.getObjectCount() * (this->m->linearized ? 2 : 1));
2696
2697 prepareFileForWrite();
2698
2699 if (this->m->linearized)
2700 {
2701 writeLinearized();
2702 }
2703 else
2704 {
2705 writeStandard();
2706 }
2707
2708 this->m->pipeline->finish();
2709 if (this->m->close_file)
2710 {
2711 fclose(this->m->file);
2712 }
2713 this->m->file = 0;
2714 if (this->m->buffer_pipeline)
2715 {
2716 this->m->output_buffer = this->m->buffer_pipeline->getBuffer();
2717 this->m->buffer_pipeline = 0;
2718 }
2719 indicateProgress(false, true);
2720 }
2721
2722 QPDFObjGen
getRenumberedObjGen(QPDFObjGen og)2723 QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2724 {
2725 return QPDFObjGen(this->m->obj_renumber[og], 0);
2726 }
2727
2728 std::map<QPDFObjGen, QPDFXRefEntry>
getWrittenXRefTable()2729 QPDFWriter::getWrittenXRefTable()
2730 {
2731 std::map<QPDFObjGen, QPDFXRefEntry> result;
2732
2733 for (std::map<int, QPDFXRefEntry>::iterator iter = this->m->xref.begin();
2734 iter != this->m->xref.end(); ++iter)
2735 {
2736 if (iter->first != 0 && iter->second.getType() != 0)
2737 {
2738 result[QPDFObjGen(iter->first, 0)] = iter->second;
2739 }
2740 }
2741
2742 return result;
2743 }
2744
2745 void
enqueuePart(std::vector<QPDFObjectHandle> & part)2746 QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2747 {
2748 for (std::vector<QPDFObjectHandle>::iterator iter = part.begin();
2749 iter != part.end(); ++iter)
2750 {
2751 enqueueObject(*iter);
2752 }
2753 }
2754
2755 void
writeEncryptionDictionary()2756 QPDFWriter::writeEncryptionDictionary()
2757 {
2758 this->m->encryption_dict_objid = openObject(this->m->encryption_dict_objid);
2759 writeString("<<");
2760 for (std::map<std::string, std::string>::iterator iter =
2761 this->m->encryption_dictionary.begin();
2762 iter != this->m->encryption_dictionary.end(); ++iter)
2763 {
2764 writeString(" ");
2765 writeString((*iter).first);
2766 writeString(" ");
2767 writeString((*iter).second);
2768 }
2769 writeString(" >>");
2770 closeObject(this->m->encryption_dict_objid);
2771 }
2772
2773 std::string
getFinalVersion()2774 QPDFWriter::getFinalVersion()
2775 {
2776 doWriteSetup();
2777 return this->m->final_pdf_version;
2778 }
2779
2780 void
writeHeader()2781 QPDFWriter::writeHeader()
2782 {
2783 writeString("%PDF-");
2784 writeString(this->m->final_pdf_version);
2785 if (this->m->pclm)
2786 {
2787 // PCLm version
2788 writeString("\n%PCLm 1.0\n");
2789 }
2790 else
2791 {
2792 // This string of binary characters would not be valid UTF-8, so
2793 // it really should be treated as binary.
2794 writeString("\n%\xbf\xf7\xa2\xfe\n");
2795 }
2796 writeStringQDF("%QDF-1.0\n\n");
2797
2798 // Note: do not write extra header text here. Linearized PDFs
2799 // must include the entire linearization parameter dictionary
2800 // within the first 1024 characters of the PDF file, so for
2801 // linearized files, we have to write extra header text after the
2802 // linearization parameter dictionary.
2803 }
2804
2805 void
writeHintStream(int hint_id)2806 QPDFWriter::writeHintStream(int hint_id)
2807 {
2808 PointerHolder<Buffer> hint_buffer;
2809 int S = 0;
2810 int O = 0;
2811 QPDF::Writer::generateHintStream(
2812 this->m->pdf, this->m->xref, this->m->lengths,
2813 this->m->obj_renumber_no_gen,
2814 hint_buffer, S, O);
2815
2816 openObject(hint_id);
2817 setDataKey(hint_id);
2818
2819 size_t hlen = hint_buffer->getSize();
2820
2821 writeString("<< /Filter /FlateDecode /S ");
2822 writeString(QUtil::int_to_string(S));
2823 if (O)
2824 {
2825 writeString(" /O ");
2826 writeString(QUtil::int_to_string(O));
2827 }
2828 writeString(" /Length ");
2829 adjustAESStreamLength(hlen);
2830 writeString(QUtil::uint_to_string(hlen));
2831 writeString(" >>\nstream\n");
2832
2833 if (this->m->encrypted)
2834 {
2835 QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2836 }
2837 unsigned char last_char = '\0';
2838 {
2839 PipelinePopper pp_enc(this);
2840 pushEncryptionFilter(pp_enc);
2841 writeBuffer(hint_buffer);
2842 last_char = this->m->pipeline->getLastChar();
2843 }
2844
2845 if (last_char != '\n')
2846 {
2847 writeString("\n");
2848 }
2849 writeString("endstream");
2850 closeObject(hint_id);
2851 }
2852
2853 qpdf_offset_t
writeXRefTable(trailer_e which,int first,int last,int size)2854 QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2855 {
2856 // There are too many extra arguments to replace overloaded
2857 // function with defaults in the header file...too much risk of
2858 // leaving something off.
2859 return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2860 }
2861
2862 qpdf_offset_t
writeXRefTable(trailer_e which,int first,int last,int size,qpdf_offset_t prev,bool suppress_offsets,int hint_id,qpdf_offset_t hint_offset,qpdf_offset_t hint_length,int linearization_pass)2863 QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
2864 qpdf_offset_t prev, bool suppress_offsets,
2865 int hint_id, qpdf_offset_t hint_offset,
2866 qpdf_offset_t hint_length, int linearization_pass)
2867 {
2868 writeString("xref\n");
2869 writeString(QUtil::int_to_string(first));
2870 writeString(" ");
2871 writeString(QUtil::int_to_string(last - first + 1));
2872 qpdf_offset_t space_before_zero = this->m->pipeline->getCount();
2873 writeString("\n");
2874 for (int i = first; i <= last; ++i)
2875 {
2876 if (i == 0)
2877 {
2878 writeString("0000000000 65535 f \n");
2879 }
2880 else
2881 {
2882 qpdf_offset_t offset = 0;
2883 if (! suppress_offsets)
2884 {
2885 offset = this->m->xref[i].getOffset();
2886 if ((hint_id != 0) &&
2887 (i != hint_id) &&
2888 (offset >= hint_offset))
2889 {
2890 offset += hint_length;
2891 }
2892 }
2893 writeString(QUtil::int_to_string(offset, 10));
2894 writeString(" 00000 n \n");
2895 }
2896 }
2897 writeTrailer(which, size, false, prev, linearization_pass);
2898 writeString("\n");
2899 return space_before_zero;
2900 }
2901
2902 qpdf_offset_t
writeXRefStream(int objid,int max_id,qpdf_offset_t max_offset,trailer_e which,int first,int last,int size)2903 QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
2904 trailer_e which, int first, int last, int size)
2905 {
2906 // There are too many extra arguments to replace overloaded
2907 // function with defaults in the header file...too much risk of
2908 // leaving something off.
2909 return writeXRefStream(objid, max_id, max_offset,
2910 which, first, last, size, 0, 0, 0, 0, false, 0);
2911 }
2912
2913 qpdf_offset_t
writeXRefStream(int xref_id,int max_id,qpdf_offset_t max_offset,trailer_e which,int first,int last,int size,qpdf_offset_t prev,int hint_id,qpdf_offset_t hint_offset,qpdf_offset_t hint_length,bool skip_compression,int linearization_pass)2914 QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
2915 trailer_e which, int first, int last, int size,
2916 qpdf_offset_t prev, int hint_id,
2917 qpdf_offset_t hint_offset,
2918 qpdf_offset_t hint_length,
2919 bool skip_compression,
2920 int linearization_pass)
2921 {
2922 qpdf_offset_t xref_offset = this->m->pipeline->getCount();
2923 qpdf_offset_t space_before_zero = xref_offset - 1;
2924
2925 // field 1 contains offsets and object stream identifiers
2926 unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length),
2927 bytesNeeded(max_id));
2928
2929 // field 2 contains object stream indices
2930 unsigned int f2_size = bytesNeeded(this->m->max_ostream_index);
2931
2932 unsigned int esize = 1 + f1_size + f2_size;
2933
2934 // Must store in xref table in advance of writing the actual data
2935 // rather than waiting for openObject to do it.
2936 this->m->xref[xref_id] = QPDFXRefEntry(1, this->m->pipeline->getCount(), 0);
2937
2938 Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
2939 bool compressed = false;
2940 if ((this->m->compress_streams ||
2941 (this->m->stream_decode_level == qpdf_dl_none)) &&
2942 (! this->m->qdf_mode))
2943 {
2944 compressed = true;
2945 if (! skip_compression)
2946 {
2947 // Write the stream dictionary for compression but don't
2948 // actually compress. This helps us with computation of
2949 // padding for pass 1 of linearization.
2950 p = pushPipeline(
2951 new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
2952 }
2953 p = pushPipeline(
2954 new Pl_PNGFilter(
2955 "pngify xref", p, Pl_PNGFilter::a_encode, esize));
2956 }
2957 PointerHolder<Buffer> xref_data;
2958 {
2959 PipelinePopper pp_xref(this, &xref_data);
2960 activatePipelineStack(pp_xref);
2961 for (int i = first; i <= last; ++i)
2962 {
2963 QPDFXRefEntry& e = this->m->xref[i];
2964 switch (e.getType())
2965 {
2966 case 0:
2967 writeBinary(0, 1);
2968 writeBinary(0, f1_size);
2969 writeBinary(0, f2_size);
2970 break;
2971
2972 case 1:
2973 {
2974 qpdf_offset_t offset = e.getOffset();
2975 if ((hint_id != 0) &&
2976 (i != hint_id) &&
2977 (offset >= hint_offset))
2978 {
2979 offset += hint_length;
2980 }
2981 writeBinary(1, 1);
2982 writeBinary(QIntC::to_ulonglong(offset), f1_size);
2983 writeBinary(0, f2_size);
2984 }
2985 break;
2986
2987 case 2:
2988 writeBinary(2, 1);
2989 writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2990 writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2991 break;
2992
2993 default:
2994 throw std::logic_error("invalid type writing xref stream");
2995 break;
2996 }
2997 }
2998 }
2999
3000 openObject(xref_id);
3001 writeString("<<");
3002 writeStringQDF("\n ");
3003 writeString(" /Type /XRef");
3004 writeStringQDF("\n ");
3005 writeString(" /Length " + QUtil::uint_to_string(xref_data->getSize()));
3006 if (compressed)
3007 {
3008 writeStringQDF("\n ");
3009 writeString(" /Filter /FlateDecode");
3010 writeStringQDF("\n ");
3011 writeString(" /DecodeParms << /Columns " +
3012 QUtil::int_to_string(esize) + " /Predictor 12 >>");
3013 }
3014 writeStringQDF("\n ");
3015 writeString(" /W [ 1 " +
3016 QUtil::int_to_string(f1_size) + " " +
3017 QUtil::int_to_string(f2_size) + " ]");
3018 if (! ((first == 0) && (last == size - 1)))
3019 {
3020 writeString(" /Index [ " +
3021 QUtil::int_to_string(first) + " " +
3022 QUtil::int_to_string(last - first + 1) + " ]");
3023 }
3024 writeTrailer(which, size, true, prev, linearization_pass);
3025 writeString("\nstream\n");
3026 writeBuffer(xref_data);
3027 writeString("\nendstream");
3028 closeObject(xref_id);
3029 return space_before_zero;
3030 }
3031
3032 int
calculateXrefStreamPadding(qpdf_offset_t xref_bytes)3033 QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
3034 {
3035 // This routine is called right after a linearization first pass
3036 // xref stream has been written without compression. Calculate
3037 // the amount of padding that would be required in the worst case,
3038 // assuming the number of uncompressed bytes remains the same.
3039 // The worst case for zlib is that the output is larger than the
3040 // input by 6 bytes plus 5 bytes per 16K, and then we'll add 10
3041 // extra bytes for number length increases.
3042
3043 return QIntC::to_int(16 + (5 * ((xref_bytes + 16383) / 16384)));
3044 }
3045
3046 void
discardGeneration(std::map<QPDFObjGen,int> const & in,std::map<int,int> & out)3047 QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in,
3048 std::map<int, int>& out)
3049 {
3050 // There are deep assumptions in the linearization code in QPDF
3051 // that there is only one object with each object number; i.e.,
3052 // you can't have two objects with the same object number and
3053 // different generations. This is a pretty safe assumption
3054 // because Adobe Reader and Acrobat can't actually handle this
3055 // case. There is not much if any code in QPDF outside
3056 // linearization that assumes this, but the linearization code as
3057 // currently implemented would do weird things if we found such a
3058 // case. In order to avoid breaking ABI changes in QPDF, we will
3059 // first assert that this condition holds. Then we can create new
3060 // maps for QPDF that throw away generation numbers.
3061
3062 out.clear();
3063 for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin();
3064 iter != in.end(); ++iter)
3065 {
3066 if (out.count((*iter).first.getObj()))
3067 {
3068 throw std::runtime_error(
3069 "QPDF cannot currently linearize files that contain"
3070 " multiple objects with the same object ID and different"
3071 " generations. If you see this error message, please file"
3072 " a bug report and attach the file if possible. As a"
3073 " workaround, first convert the file with qpdf without"
3074 " linearizing, and then linearize the result of that"
3075 " conversion.");
3076 }
3077 out[(*iter).first.getObj()] = (*iter).second;
3078 }
3079 }
3080
3081 void
writeLinearized()3082 QPDFWriter::writeLinearized()
3083 {
3084 // Optimize file and enqueue objects in order
3085
3086 discardGeneration(this->m->object_to_object_stream,
3087 this->m->object_to_object_stream_no_gen);
3088
3089 auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
3090 bool compress_stream;
3091 bool is_metadata;
3092 if (willFilterStream(stream, compress_stream, is_metadata, nullptr))
3093 {
3094 return 2;
3095 }
3096 else
3097 {
3098 return 1;
3099 }
3100 };
3101
3102 this->m->pdf.optimize(this->m->object_to_object_stream_no_gen,
3103 true, skip_stream_parameters);
3104
3105 std::vector<QPDFObjectHandle> part4;
3106 std::vector<QPDFObjectHandle> part6;
3107 std::vector<QPDFObjectHandle> part7;
3108 std::vector<QPDFObjectHandle> part8;
3109 std::vector<QPDFObjectHandle> part9;
3110 QPDF::Writer::getLinearizedParts(
3111 this->m->pdf, this->m->object_to_object_stream_no_gen,
3112 part4, part6, part7, part8, part9);
3113
3114 // Object number sequence:
3115 //
3116 // second half
3117 // second half uncompressed objects
3118 // second half xref stream, if any
3119 // second half compressed objects
3120 // first half
3121 // linearization dictionary
3122 // first half xref stream, if any
3123 // part 4 uncompresesd objects
3124 // encryption dictionary, if any
3125 // hint stream
3126 // part 6 uncompressed objects
3127 // first half compressed objects
3128 //
3129
3130 // Second half objects
3131 int second_half_uncompressed =
3132 QIntC::to_int(part7.size() + part8.size() + part9.size());
3133 int second_half_first_obj = 1;
3134 int after_second_half = 1 + second_half_uncompressed;
3135 this->m->next_objid = after_second_half;
3136 int second_half_xref = 0;
3137 bool need_xref_stream = (! this->m->object_to_object_stream.empty());
3138 if (need_xref_stream)
3139 {
3140 second_half_xref = this->m->next_objid++;
3141 }
3142 // Assign numbers to all compressed objects in the second half.
3143 std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
3144 for (int i = 0; i < 3; ++i)
3145 {
3146 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
3147 iter != (*vecs2[i]).end(); ++iter)
3148 {
3149 assignCompressedObjectNumbers((*iter).getObjGen());
3150 }
3151 }
3152 int second_half_end = this->m->next_objid - 1;
3153 int second_trailer_size = this->m->next_objid;
3154
3155 // First half objects
3156 int first_half_start = this->m->next_objid;
3157 int lindict_id = this->m->next_objid++;
3158 int first_half_xref = 0;
3159 if (need_xref_stream)
3160 {
3161 first_half_xref = this->m->next_objid++;
3162 }
3163 int part4_first_obj = this->m->next_objid;
3164 this->m->next_objid += QIntC::to_int(part4.size());
3165 int after_part4 = this->m->next_objid;
3166 if (this->m->encrypted)
3167 {
3168 this->m->encryption_dict_objid = this->m->next_objid++;
3169 }
3170 int hint_id = this->m->next_objid++;
3171 int part6_first_obj = this->m->next_objid;
3172 this->m->next_objid += QIntC::to_int(part6.size());
3173 int after_part6 = this->m->next_objid;
3174 // Assign numbers to all compressed objects in the first half
3175 std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
3176 for (int i = 0; i < 2; ++i)
3177 {
3178 for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
3179 iter != (*vecs1[i]).end(); ++iter)
3180 {
3181 assignCompressedObjectNumbers((*iter).getObjGen());
3182 }
3183 }
3184 int first_half_end = this->m->next_objid - 1;
3185 int first_trailer_size = this->m->next_objid;
3186
3187 int part4_end_marker = part4.back().getObjectID();
3188 int part6_end_marker = part6.back().getObjectID();
3189 qpdf_offset_t space_before_zero = 0;
3190 qpdf_offset_t file_size = 0;
3191 qpdf_offset_t part6_end_offset = 0;
3192 qpdf_offset_t first_half_max_obj_offset = 0;
3193 qpdf_offset_t second_xref_offset = 0;
3194 qpdf_offset_t first_xref_end = 0;
3195 qpdf_offset_t second_xref_end = 0;
3196
3197 this->m->next_objid = part4_first_obj;
3198 enqueuePart(part4);
3199 if (this->m->next_objid != after_part4)
3200 {
3201 // This can happen with very botched files as in the fuzzer
3202 // test. There are likely some faulty assumptions in
3203 // calculateLinearizationData
3204 throw std::runtime_error(
3205 "error encountered after"
3206 " writing part 4 of linearized data");
3207 }
3208 this->m->next_objid = part6_first_obj;
3209 enqueuePart(part6);
3210 if (this->m->next_objid != after_part6)
3211 {
3212 throw std::runtime_error(
3213 "error encountered after"
3214 " writing part 6 of linearized data");
3215 }
3216 this->m->next_objid = second_half_first_obj;
3217 enqueuePart(part7);
3218 enqueuePart(part8);
3219 enqueuePart(part9);
3220 if (this->m->next_objid != after_second_half)
3221 {
3222 throw std::runtime_error(
3223 "error encountered after"
3224 " writing part 9 of linearized data");
3225 }
3226
3227 qpdf_offset_t hint_length = 0;
3228 PointerHolder<Buffer> hint_buffer;
3229
3230 // Write file in two passes. Part numbers refer to PDF spec 1.4.
3231
3232 FILE* lin_pass1_file = 0;
3233 PointerHolder<PipelinePopper> pp_pass1 = new PipelinePopper(this);
3234 PointerHolder<PipelinePopper> pp_md5 = new PipelinePopper(this);
3235 for (int pass = 1; pass <= 2; ++pass)
3236 {
3237 if (pass == 1)
3238 {
3239 if (! this->m->lin_pass1_filename.empty())
3240 {
3241 lin_pass1_file =
3242 QUtil::safe_fopen(
3243 this->m->lin_pass1_filename.c_str(), "wb");
3244 pushPipeline(
3245 new Pl_StdioFile("linearization pass1", lin_pass1_file));
3246 activatePipelineStack(*pp_pass1);
3247 }
3248 else
3249 {
3250 pushDiscardFilter(*pp_pass1);
3251 }
3252 if (this->m->deterministic_id)
3253 {
3254 pushMD5Pipeline(*pp_md5);
3255 }
3256 }
3257
3258 // Part 1: header
3259
3260 writeHeader();
3261
3262 // Part 2: linearization parameter dictionary. Save enough
3263 // space to write real dictionary. 200 characters is enough
3264 // space if all numerical values in the parameter dictionary
3265 // that contain offsets are 20 digits long plus a few extra
3266 // characters for safety. The entire linearization parameter
3267 // dictionary must appear within the first 1024 characters of
3268 // the file.
3269
3270 qpdf_offset_t pos = this->m->pipeline->getCount();
3271 openObject(lindict_id);
3272 writeString("<<");
3273 if (pass == 2)
3274 {
3275 std::vector<QPDFObjectHandle> const& pages =
3276 this->m->pdf.getAllPages();
3277 int first_page_object =
3278 this->m->obj_renumber[pages.at(0).getObjGen()];
3279 int npages = QIntC::to_int(pages.size());
3280
3281 writeString(" /Linearized 1 /L ");
3282 writeString(QUtil::int_to_string(file_size + hint_length));
3283 // Implementation note 121 states that a space is
3284 // mandatory after this open bracket.
3285 writeString(" /H [ ");
3286 writeString(QUtil::int_to_string(
3287 this->m->xref[hint_id].getOffset()));
3288 writeString(" ");
3289 writeString(QUtil::int_to_string(hint_length));
3290 writeString(" ] /O ");
3291 writeString(QUtil::int_to_string(first_page_object));
3292 writeString(" /E ");
3293 writeString(QUtil::int_to_string(part6_end_offset + hint_length));
3294 writeString(" /N ");
3295 writeString(QUtil::int_to_string(npages));
3296 writeString(" /T ");
3297 writeString(QUtil::int_to_string(space_before_zero + hint_length));
3298 }
3299 writeString(" >>");
3300 closeObject(lindict_id);
3301 static int const pad = 200;
3302 int spaces = QIntC::to_int(pos - this->m->pipeline->getCount() + pad);
3303 assert(spaces >= 0);
3304 writePad(spaces);
3305 writeString("\n");
3306
3307 // If the user supplied any additional header text, write it
3308 // here after the linearization parameter dictionary.
3309 writeString(this->m->extra_header_text);
3310
3311 // Part 3: first page cross reference table and trailer.
3312
3313 qpdf_offset_t first_xref_offset = this->m->pipeline->getCount();
3314 qpdf_offset_t hint_offset = 0;
3315 if (pass == 2)
3316 {
3317 hint_offset = this->m->xref[hint_id].getOffset();
3318 }
3319 if (need_xref_stream)
3320 {
3321 // Must pad here too.
3322 if (pass == 1)
3323 {
3324 // Set first_half_max_obj_offset to a value large
3325 // enough to force four bytes to be reserved for each
3326 // file offset. This would provide adequate space for
3327 // the xref stream as long as the last object in page
3328 // 1 starts with in the first 4 GB of the file, which
3329 // is extremely likely. In the second pass, we will
3330 // know the actual value for this, but it's okay if
3331 // it's smaller.
3332 first_half_max_obj_offset = 1 << 25;
3333 }
3334 pos = this->m->pipeline->getCount();
3335 writeXRefStream(first_half_xref, first_half_end,
3336 first_half_max_obj_offset,
3337 t_lin_first, first_half_start, first_half_end,
3338 first_trailer_size,
3339 hint_length + second_xref_offset,
3340 hint_id, hint_offset, hint_length,
3341 (pass == 1), pass);
3342 qpdf_offset_t endpos = this->m->pipeline->getCount();
3343 if (pass == 1)
3344 {
3345 // Pad so we have enough room for the real xref
3346 // stream.
3347 writePad(calculateXrefStreamPadding(endpos - pos));
3348 first_xref_end = this->m->pipeline->getCount();
3349 }
3350 else
3351 {
3352 // Pad so that the next object starts at the same
3353 // place as in pass 1.
3354 writePad(QIntC::to_int(first_xref_end - endpos));
3355
3356 if (this->m->pipeline->getCount() != first_xref_end)
3357 {
3358 throw std::logic_error(
3359 "insufficient padding for first pass xref stream; "
3360 "first_xref_end=" +
3361 QUtil::int_to_string(first_xref_end) +
3362 "; endpos=" + QUtil::int_to_string(endpos));
3363 }
3364 }
3365 writeString("\n");
3366 }
3367 else
3368 {
3369 writeXRefTable(t_lin_first, first_half_start, first_half_end,
3370 first_trailer_size, hint_length + second_xref_offset,
3371 (pass == 1), hint_id, hint_offset, hint_length,
3372 pass);
3373 writeString("startxref\n0\n%%EOF\n");
3374 }
3375
3376 // Parts 4 through 9
3377
3378 for (std::list<QPDFObjectHandle>::iterator iter =
3379 this->m->object_queue.begin();
3380 iter != this->m->object_queue.end(); ++iter)
3381 {
3382 QPDFObjectHandle cur_object = (*iter);
3383 if (cur_object.getObjectID() == part6_end_marker)
3384 {
3385 first_half_max_obj_offset = this->m->pipeline->getCount();
3386 }
3387 writeObject(cur_object);
3388 if (cur_object.getObjectID() == part4_end_marker)
3389 {
3390 if (this->m->encrypted)
3391 {
3392 writeEncryptionDictionary();
3393 }
3394 if (pass == 1)
3395 {
3396 this->m->xref[hint_id] =
3397 QPDFXRefEntry(1, this->m->pipeline->getCount(), 0);
3398 }
3399 else
3400 {
3401 // Part 5: hint stream
3402 writeBuffer(hint_buffer);
3403 }
3404 }
3405 if (cur_object.getObjectID() == part6_end_marker)
3406 {
3407 part6_end_offset = this->m->pipeline->getCount();
3408 }
3409 }
3410
3411 // Part 10: overflow hint stream -- not used
3412
3413 // Part 11: main cross reference table and trailer
3414
3415 second_xref_offset = this->m->pipeline->getCount();
3416 if (need_xref_stream)
3417 {
3418 pos = this->m->pipeline->getCount();
3419 space_before_zero =
3420 writeXRefStream(second_half_xref,
3421 second_half_end, second_xref_offset,
3422 t_lin_second, 0, second_half_end,
3423 second_trailer_size,
3424 0, 0, 0, 0, (pass == 1), pass);
3425 qpdf_offset_t endpos = this->m->pipeline->getCount();
3426
3427 if (pass == 1)
3428 {
3429 // Pad so we have enough room for the real xref
3430 // stream. See comments for previous xref stream on
3431 // how we calculate the padding.
3432 writePad(calculateXrefStreamPadding(endpos - pos));
3433 writeString("\n");
3434 second_xref_end = this->m->pipeline->getCount();
3435 }
3436 else
3437 {
3438 // Make the file size the same.
3439 writePad(
3440 QIntC::to_int(second_xref_end + hint_length -
3441 1 - this->m->pipeline->getCount()));
3442 writeString("\n");
3443
3444 // If this assertion fails, maybe we didn't have
3445 // enough padding above.
3446 if (this->m->pipeline->getCount() !=
3447 second_xref_end + hint_length)
3448 {
3449 throw std::logic_error(
3450 "count mismatch after xref stream;"
3451 " possible insufficient padding?");
3452 }
3453 }
3454 }
3455 else
3456 {
3457 space_before_zero =
3458 writeXRefTable(t_lin_second, 0, second_half_end,
3459 second_trailer_size, 0, false, 0, 0, 0, pass);
3460 }
3461 writeString("startxref\n");
3462 writeString(QUtil::int_to_string(first_xref_offset));
3463 writeString("\n%%EOF\n");
3464
3465 discardGeneration(this->m->obj_renumber, this->m->obj_renumber_no_gen);
3466
3467 if (pass == 1)
3468 {
3469 if (this->m->deterministic_id)
3470 {
3471 QTC::TC("qpdf", "QPDFWriter linearized deterministic ID",
3472 need_xref_stream ? 0 : 1);
3473 computeDeterministicIDData();
3474 pp_md5 = 0;
3475 assert(this->m->md5_pipeline == 0);
3476 }
3477
3478 // Close first pass pipeline
3479 file_size = this->m->pipeline->getCount();
3480 pp_pass1 = 0;
3481
3482 // Save hint offset since it will be set to zero by
3483 // calling openObject.
3484 qpdf_offset_t hint_offset1 = this->m->xref[hint_id].getOffset();
3485
3486 // Write hint stream to a buffer
3487 {
3488 pushPipeline(new Pl_Buffer("hint buffer"));
3489 PipelinePopper pp_hint(this, &hint_buffer);
3490 activatePipelineStack(pp_hint);
3491 writeHintStream(hint_id);
3492 }
3493 hint_length = QIntC::to_offset(hint_buffer->getSize());
3494
3495 // Restore hint offset
3496 this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset1, 0);
3497 if (lin_pass1_file)
3498 {
3499 // Write some debugging information
3500 fprintf(lin_pass1_file, "%% hint_offset=%s\n",
3501 QUtil::int_to_string(hint_offset1).c_str());
3502 fprintf(lin_pass1_file, "%% hint_length=%s\n",
3503 QUtil::int_to_string(hint_length).c_str());
3504 fprintf(lin_pass1_file, "%% second_xref_offset=%s\n",
3505 QUtil::int_to_string(second_xref_offset).c_str());
3506 fprintf(lin_pass1_file, "%% second_xref_end=%s\n",
3507 QUtil::int_to_string(second_xref_end).c_str());
3508 fclose(lin_pass1_file);
3509 lin_pass1_file = 0;
3510 }
3511 }
3512 }
3513 }
3514
3515 void
enqueueObjectsStandard()3516 QPDFWriter::enqueueObjectsStandard()
3517 {
3518 if (this->m->preserve_unreferenced_objects)
3519 {
3520 QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
3521 std::vector<QPDFObjectHandle> all = this->m->pdf.getAllObjects();
3522 for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
3523 iter != all.end(); ++iter)
3524 {
3525 enqueueObject(*iter);
3526 }
3527 }
3528
3529 // Put root first on queue.
3530 QPDFObjectHandle trailer = getTrimmedTrailer();
3531 enqueueObject(trailer.getKey("/Root"));
3532
3533 // Next place any other objects referenced from the trailer
3534 // dictionary into the queue, handling direct objects recursively.
3535 // Root is already there, so enqueuing it a second time is a
3536 // no-op.
3537 std::set<std::string> keys = trailer.getKeys();
3538 for (std::set<std::string>::iterator iter = keys.begin();
3539 iter != keys.end(); ++iter)
3540 {
3541 enqueueObject(trailer.getKey(*iter));
3542 }
3543 }
3544
3545 void
enqueueObjectsPCLm()3546 QPDFWriter::enqueueObjectsPCLm()
3547 {
3548 // Image transform stream content for page strip images.
3549 // Each of this new stream has to come after every page image
3550 // strip written in the pclm file.
3551 std::string image_transform_content = "q /image Do Q\n";
3552
3553 // enqueue all pages first
3554 std::vector<QPDFObjectHandle> all = this->m->pdf.getAllPages();
3555 for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
3556 iter != all.end(); ++iter)
3557 {
3558 // enqueue page
3559 enqueueObject(*iter);
3560
3561 // enqueue page contents stream
3562 enqueueObject((*iter).getKey("/Contents"));
3563
3564 // enqueue all the strips for each page
3565 QPDFObjectHandle strips =
3566 (*iter).getKey("/Resources").getKey("/XObject");
3567 std::set<std::string> keys = strips.getKeys();
3568 for (std::set<std::string>::iterator image = keys.begin();
3569 image != keys.end(); ++image)
3570 {
3571 enqueueObject(strips.getKey(*image));
3572 enqueueObject(QPDFObjectHandle::newStream(
3573 &this->m->pdf, image_transform_content));
3574 }
3575 }
3576
3577 // Put root in queue.
3578 QPDFObjectHandle trailer = getTrimmedTrailer();
3579 enqueueObject(trailer.getKey("/Root"));
3580 }
3581
3582 void
indicateProgress(bool decrement,bool finished)3583 QPDFWriter::indicateProgress(bool decrement, bool finished)
3584 {
3585 if (decrement)
3586 {
3587 --this->m->events_seen;
3588 return;
3589 }
3590
3591 ++this->m->events_seen;
3592
3593 if (! this->m->progress_reporter.getPointer())
3594 {
3595 return;
3596 }
3597
3598 if (finished || (this->m->events_seen >= this->m->next_progress_report))
3599 {
3600 int percentage = (
3601 finished
3602 ? 100
3603 : this->m->next_progress_report == 0
3604 ? 0
3605 : std::min(99, 1 + ((100 * this->m->events_seen) /
3606 this->m->events_expected)));
3607 this->m->progress_reporter->reportProgress(percentage);
3608 }
3609 int increment = std::max(1, (this->m->events_expected / 100));
3610 while (this->m->events_seen >= this->m->next_progress_report)
3611 {
3612 this->m->next_progress_report += increment;
3613 }
3614 }
3615
3616 void
registerProgressReporter(PointerHolder<ProgressReporter> pr)3617 QPDFWriter::registerProgressReporter(PointerHolder<ProgressReporter> pr)
3618 {
3619 this->m->progress_reporter = pr;
3620 }
3621
3622 void
writeStandard()3623 QPDFWriter::writeStandard()
3624 {
3625 PointerHolder<PipelinePopper> pp_md5 = new PipelinePopper(this);
3626 if (this->m->deterministic_id)
3627 {
3628 pushMD5Pipeline(*pp_md5);
3629 }
3630
3631 // Start writing
3632
3633 writeHeader();
3634 writeString(this->m->extra_header_text);
3635
3636 if (this->m->pclm)
3637 {
3638 enqueueObjectsPCLm();
3639 }
3640 else
3641 {
3642 enqueueObjectsStandard();
3643 }
3644
3645 // Now start walking queue, outputting each object.
3646 while (this->m->object_queue.size())
3647 {
3648 QPDFObjectHandle cur_object = this->m->object_queue.front();
3649 this->m->object_queue.pop_front();
3650 writeObject(cur_object);
3651 }
3652
3653 // Write out the encryption dictionary, if any
3654 if (this->m->encrypted)
3655 {
3656 writeEncryptionDictionary();
3657 }
3658
3659 // Now write out xref. next_objid is now the number of objects.
3660 qpdf_offset_t xref_offset = this->m->pipeline->getCount();
3661 if (this->m->object_stream_to_objects.empty())
3662 {
3663 // Write regular cross-reference table
3664 writeXRefTable(t_normal, 0, this->m->next_objid - 1,
3665 this->m->next_objid);
3666 }
3667 else
3668 {
3669 // Write cross-reference stream.
3670 int xref_id = this->m->next_objid++;
3671 writeXRefStream(xref_id, xref_id, xref_offset, t_normal,
3672 0, this->m->next_objid - 1, this->m->next_objid);
3673 }
3674 writeString("startxref\n");
3675 writeString(QUtil::int_to_string(xref_offset));
3676 writeString("\n%%EOF\n");
3677
3678 if (this->m->deterministic_id)
3679 {
3680 QTC::TC("qpdf", "QPDFWriter standard deterministic ID",
3681 this->m->object_stream_to_objects.empty() ? 0 : 1);
3682 pp_md5 = 0;
3683 assert(this->m->md5_pipeline == 0);
3684 }
3685 }
3686