1 #include <qpdf/qpdf-config.h>  // include first for large file support
2 #include <qpdf/QPDFWriter.hh>
3 
4 #include <assert.h>
5 #include <qpdf/Pl_StdioFile.hh>
6 #include <qpdf/Pl_Count.hh>
7 #include <qpdf/Pl_Discard.hh>
8 #include <qpdf/Pl_RC4.hh>
9 #include <qpdf/Pl_AES_PDF.hh>
10 #include <qpdf/Pl_Flate.hh>
11 #include <qpdf/Pl_PNGFilter.hh>
12 #include <qpdf/Pl_MD5.hh>
13 #include <qpdf/QUtil.hh>
14 #include <qpdf/MD5.hh>
15 #include <qpdf/RC4.hh>
16 #include <qpdf/QTC.hh>
17 
18 #include <qpdf/QPDF.hh>
19 #include <qpdf/QPDFObjectHandle.hh>
20 #include <qpdf/QPDF_Name.hh>
21 #include <qpdf/QPDF_String.hh>
22 #include <qpdf/QIntC.hh>
23 
24 #include <algorithm>
25 #include <stdlib.h>
26 
Members(QPDF & pdf)27 QPDFWriter::Members::Members(QPDF& pdf) :
28     pdf(pdf),
29     filename("unspecified"),
30     file(0),
31     close_file(false),
32     buffer_pipeline(0),
33     output_buffer(0),
34     normalize_content_set(false),
35     normalize_content(false),
36     compress_streams(true),
37     compress_streams_set(false),
38     stream_decode_level(qpdf_dl_none),
39     stream_decode_level_set(false),
40     recompress_flate(false),
41     qdf_mode(false),
42     preserve_unreferenced_objects(false),
43     newline_before_endstream(false),
44     static_id(false),
45     suppress_original_object_ids(false),
46     direct_stream_lengths(true),
47     encrypted(false),
48     preserve_encryption(true),
49     linearized(false),
50     pclm(false),
51     object_stream_mode(qpdf_o_preserve),
52     encrypt_metadata(true),
53     encrypt_use_aes(false),
54     encryption_V(0),
55     encryption_R(0),
56     final_extension_level(0),
57     min_extension_level(0),
58     forced_extension_level(0),
59     encryption_dict_objid(0),
60     pipeline(0),
61     next_objid(1),
62     cur_stream_length_id(0),
63     cur_stream_length(0),
64     added_newline(false),
65     max_ostream_index(0),
66     next_stack_id(0),
67     deterministic_id(false),
68     md5_pipeline(0),
69     did_write_setup(false),
70     events_expected(0),
71     events_seen(0),
72     next_progress_report(0)
73 {
74 }
75 
~Members()76 QPDFWriter::Members::~Members()
77 {
78     if (file && close_file)
79     {
80 	fclose(file);
81     }
82     delete output_buffer;
83 }
84 
QPDFWriter(QPDF & pdf)85 QPDFWriter::QPDFWriter(QPDF& pdf) :
86     m(new Members(pdf))
87 {
88 }
89 
QPDFWriter(QPDF & pdf,char const * filename)90 QPDFWriter::QPDFWriter(QPDF& pdf, char const* filename) :
91     m(new Members(pdf))
92 {
93     setOutputFilename(filename);
94 }
95 
QPDFWriter(QPDF & pdf,char const * description,FILE * file,bool close_file)96 QPDFWriter::QPDFWriter(QPDF& pdf, char const* description,
97                        FILE *file, bool close_file) :
98     m(new Members(pdf))
99 {
100     setOutputFile(description, file, close_file);
101 }
102 
~QPDFWriter()103 QPDFWriter::~QPDFWriter()
104 {
105 }
106 
107 void
setOutputFilename(char const * filename)108 QPDFWriter::setOutputFilename(char const* filename)
109 {
110     char const* description = filename;
111     FILE* f = 0;
112     bool close_file = false;
113     if (filename == 0)
114     {
115 	description = "standard output";
116 	QTC::TC("qpdf", "QPDFWriter write to stdout");
117 	f = stdout;
118 	QUtil::binary_stdout();
119     }
120     else
121     {
122 	QTC::TC("qpdf", "QPDFWriter write to file");
123 	f = QUtil::safe_fopen(filename, "wb+");
124 	close_file = true;
125     }
126     setOutputFile(description, f, close_file);
127 }
128 
129 void
setOutputFile(char const * description,FILE * file,bool close_file)130 QPDFWriter::setOutputFile(char const* description, FILE* file, bool close_file)
131 {
132     this->m->filename = description;
133     this->m->file = file;
134     this->m->close_file = close_file;
135     Pipeline* p = new Pl_StdioFile("qpdf output", file);
136     this->m->to_delete.push_back(p);
137     initializePipelineStack(p);
138 }
139 
140 void
setOutputMemory()141 QPDFWriter::setOutputMemory()
142 {
143     this->m->filename = "memory buffer";
144     this->m->buffer_pipeline = new Pl_Buffer("qpdf output");
145     this->m->to_delete.push_back(this->m->buffer_pipeline);
146     initializePipelineStack(this->m->buffer_pipeline);
147 }
148 
149 Buffer*
getBuffer()150 QPDFWriter::getBuffer()
151 {
152     Buffer* result = this->m->output_buffer;
153     this->m->output_buffer = 0;
154     return result;
155 }
156 
157 void
setOutputPipeline(Pipeline * p)158 QPDFWriter::setOutputPipeline(Pipeline* p)
159 {
160     this->m->filename = "custom pipeline";
161     initializePipelineStack(p);
162 }
163 
164 void
setObjectStreamMode(qpdf_object_stream_e mode)165 QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
166 {
167     this->m->object_stream_mode = mode;
168 }
169 
170 void
setStreamDataMode(qpdf_stream_data_e mode)171 QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
172 {
173     switch (mode)
174     {
175       case qpdf_s_uncompress:
176         this->m->stream_decode_level =
177             std::max(qpdf_dl_generalized, this->m->stream_decode_level);
178         this->m->compress_streams = false;
179         break;
180 
181       case qpdf_s_preserve:
182         this->m->stream_decode_level = qpdf_dl_none;
183         this->m->compress_streams = false;
184         break;
185 
186       case qpdf_s_compress:
187         this->m->stream_decode_level =
188             std::max(qpdf_dl_generalized, this->m->stream_decode_level);
189         this->m->compress_streams = true;
190         break;
191     }
192     this->m->stream_decode_level_set = true;
193     this->m->compress_streams_set = true;
194 }
195 
196 
197 void
setCompressStreams(bool val)198 QPDFWriter::setCompressStreams(bool val)
199 {
200     this->m->compress_streams = val;
201     this->m->compress_streams_set = true;
202 }
203 
204 void
setDecodeLevel(qpdf_stream_decode_level_e val)205 QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
206 {
207     this->m->stream_decode_level = val;
208     this->m->stream_decode_level_set = true;
209 }
210 
211 void
setRecompressFlate(bool val)212 QPDFWriter::setRecompressFlate(bool val)
213 {
214     this->m->recompress_flate = val;
215 }
216 
217 void
setContentNormalization(bool val)218 QPDFWriter::setContentNormalization(bool val)
219 {
220     this->m->normalize_content_set = true;
221     this->m->normalize_content = val;
222 }
223 
224 void
setQDFMode(bool val)225 QPDFWriter::setQDFMode(bool val)
226 {
227     this->m->qdf_mode = val;
228 }
229 
230 void
setPreserveUnreferencedObjects(bool val)231 QPDFWriter::setPreserveUnreferencedObjects(bool val)
232 {
233     this->m->preserve_unreferenced_objects = val;
234 }
235 
236 void
setNewlineBeforeEndstream(bool val)237 QPDFWriter::setNewlineBeforeEndstream(bool val)
238 {
239     this->m->newline_before_endstream = val;
240 }
241 
242 void
setMinimumPDFVersion(std::string const & version,int extension_level)243 QPDFWriter::setMinimumPDFVersion(std::string const& version,
244                                  int extension_level)
245 {
246     bool set_version = false;
247     bool set_extension_level = false;
248     if (this->m->min_pdf_version.empty())
249     {
250 	set_version = true;
251         set_extension_level = true;
252     }
253     else
254     {
255 	int old_major = 0;
256 	int old_minor = 0;
257 	int min_major = 0;
258 	int min_minor = 0;
259 	parseVersion(version, old_major, old_minor);
260 	parseVersion(this->m->min_pdf_version, min_major, min_minor);
261         int compare = compareVersions(
262             old_major, old_minor, min_major, min_minor);
263 	if (compare > 0)
264 	{
265 	    QTC::TC("qpdf", "QPDFWriter increasing minimum version",
266                     extension_level == 0 ? 0 : 1);
267 	    set_version = true;
268             set_extension_level = true;
269 	}
270         else if (compare == 0)
271         {
272             if (extension_level > this->m->min_extension_level)
273             {
274                 QTC::TC("qpdf", "QPDFWriter increasing extension level");
275                 set_extension_level = true;
276             }
277 	}
278     }
279 
280     if (set_version)
281     {
282 	this->m->min_pdf_version = version;
283     }
284     if (set_extension_level)
285     {
286         this->m->min_extension_level = extension_level;
287     }
288 }
289 
290 void
forcePDFVersion(std::string const & version,int extension_level)291 QPDFWriter::forcePDFVersion(std::string const& version,
292                             int extension_level)
293 {
294     this->m->forced_pdf_version = version;
295     this->m->forced_extension_level = extension_level;
296 }
297 
298 void
setExtraHeaderText(std::string const & text)299 QPDFWriter::setExtraHeaderText(std::string const& text)
300 {
301     this->m->extra_header_text = text;
302     if ((this->m->extra_header_text.length() > 0) &&
303         (*(this->m->extra_header_text.rbegin()) != '\n'))
304     {
305         QTC::TC("qpdf", "QPDFWriter extra header text add newline");
306         this->m->extra_header_text += "\n";
307     }
308     else
309     {
310         QTC::TC("qpdf", "QPDFWriter extra header text no newline");
311     }
312 }
313 
314 void
setStaticID(bool val)315 QPDFWriter::setStaticID(bool val)
316 {
317     this->m->static_id = val;
318 }
319 
320 void
setDeterministicID(bool val)321 QPDFWriter::setDeterministicID(bool val)
322 {
323     this->m->deterministic_id = val;
324 }
325 
326 void
setStaticAesIV(bool val)327 QPDFWriter::setStaticAesIV(bool val)
328 {
329     if (val)
330     {
331 	Pl_AES_PDF::useStaticIV();
332     }
333 }
334 
335 void
setSuppressOriginalObjectIDs(bool val)336 QPDFWriter::setSuppressOriginalObjectIDs(bool val)
337 {
338     this->m->suppress_original_object_ids = val;
339 }
340 
341 void
setPreserveEncryption(bool val)342 QPDFWriter::setPreserveEncryption(bool val)
343 {
344     this->m->preserve_encryption = val;
345 }
346 
347 void
setLinearization(bool val)348 QPDFWriter::setLinearization(bool val)
349 {
350     this->m->linearized = val;
351     if (val)
352     {
353         this->m->pclm = false;
354     }
355 }
356 
357 void
setLinearizationPass1Filename(std::string const & filename)358 QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
359 {
360     this->m->lin_pass1_filename = filename;
361 }
362 
363 void
setPCLm(bool val)364 QPDFWriter::setPCLm(bool val)
365 {
366     this->m->pclm = val;
367     if (val)
368     {
369         this->m->linearized = false;
370     }
371 }
372 
373 void
setR2EncryptionParameters(char const * user_password,char const * owner_password,bool allow_print,bool allow_modify,bool allow_extract,bool allow_annotate)374 QPDFWriter::setR2EncryptionParameters(
375     char const* user_password, char const* owner_password,
376     bool allow_print, bool allow_modify,
377     bool allow_extract, bool allow_annotate)
378 {
379     std::set<int> clear;
380     if (! allow_print)
381     {
382 	clear.insert(3);
383     }
384     if (! allow_modify)
385     {
386 	clear.insert(4);
387     }
388     if (! allow_extract)
389     {
390 	clear.insert(5);
391     }
392     if (! allow_annotate)
393     {
394 	clear.insert(6);
395     }
396 
397     setEncryptionParameters(user_password, owner_password, 1, 2, 5, clear);
398 }
399 
400 void
setR3EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify)401 QPDFWriter::setR3EncryptionParameters(
402     char const* user_password, char const* owner_password,
403     bool allow_accessibility, bool allow_extract,
404     qpdf_r3_print_e print, qpdf_r3_modify_e modify)
405 {
406     std::set<int> clear;
407     interpretR3EncryptionParameters(
408 	clear, user_password, owner_password,
409 	allow_accessibility, allow_extract,
410         true, true, true, true, print, modify);
411     setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear);
412 }
413 
414 void
setR3EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print)415 QPDFWriter::setR3EncryptionParameters(
416     char const* user_password, char const* owner_password,
417     bool allow_accessibility, bool allow_extract,
418     bool allow_assemble, bool allow_annotate_and_form,
419     bool allow_form_filling, bool allow_modify_other,
420     qpdf_r3_print_e print)
421 {
422     std::set<int> clear;
423     interpretR3EncryptionParameters(
424 	clear, user_password, owner_password,
425 	allow_accessibility, allow_extract,
426         allow_assemble, allow_annotate_and_form,
427         allow_form_filling, allow_modify_other,
428         print, qpdf_r3m_all);
429     setEncryptionParameters(user_password, owner_password, 2, 3, 16, clear);
430 }
431 
432 void
setR4EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify,bool encrypt_metadata,bool use_aes)433 QPDFWriter::setR4EncryptionParameters(
434     char const* user_password, char const* owner_password,
435     bool allow_accessibility, bool allow_extract,
436     qpdf_r3_print_e print, qpdf_r3_modify_e modify,
437     bool encrypt_metadata, bool use_aes)
438 {
439     std::set<int> clear;
440     interpretR3EncryptionParameters(
441 	clear, user_password, owner_password,
442 	allow_accessibility, allow_extract,
443         true, true, true, true, print, modify);
444     this->m->encrypt_use_aes = use_aes;
445     this->m->encrypt_metadata = encrypt_metadata;
446     setEncryptionParameters(user_password, owner_password, 4, 4, 16, clear);
447 }
448 
449 void
setR4EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,bool encrypt_metadata,bool use_aes)450 QPDFWriter::setR4EncryptionParameters(
451     char const* user_password, char const* owner_password,
452     bool allow_accessibility, bool allow_extract,
453     bool allow_assemble, bool allow_annotate_and_form,
454     bool allow_form_filling, bool allow_modify_other,
455     qpdf_r3_print_e print,
456     bool encrypt_metadata, bool use_aes)
457 {
458     std::set<int> clear;
459     interpretR3EncryptionParameters(
460 	clear, user_password, owner_password,
461 	allow_accessibility, allow_extract,
462         allow_assemble, allow_annotate_and_form,
463         allow_form_filling, allow_modify_other,
464         print, qpdf_r3m_all);
465     this->m->encrypt_use_aes = use_aes;
466     this->m->encrypt_metadata = encrypt_metadata;
467     setEncryptionParameters(user_password, owner_password, 4, 4, 16, clear);
468 }
469 
470 void
setR5EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify,bool encrypt_metadata)471 QPDFWriter::setR5EncryptionParameters(
472     char const* user_password, char const* owner_password,
473     bool allow_accessibility, bool allow_extract,
474     qpdf_r3_print_e print, qpdf_r3_modify_e modify,
475     bool encrypt_metadata)
476 {
477     std::set<int> clear;
478     interpretR3EncryptionParameters(
479 	clear, user_password, owner_password,
480 	allow_accessibility, allow_extract,
481         true, true, true, true, print, modify);
482     this->m->encrypt_use_aes = true;
483     this->m->encrypt_metadata = encrypt_metadata;
484     setEncryptionParameters(user_password, owner_password, 5, 5, 32, clear);
485 }
486 
487 void
setR5EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,bool encrypt_metadata)488 QPDFWriter::setR5EncryptionParameters(
489     char const* user_password, char const* owner_password,
490     bool allow_accessibility, bool allow_extract,
491     bool allow_assemble, bool allow_annotate_and_form,
492     bool allow_form_filling, bool allow_modify_other,
493     qpdf_r3_print_e print,
494     bool encrypt_metadata)
495 {
496     std::set<int> clear;
497     interpretR3EncryptionParameters(
498 	clear, user_password, owner_password,
499 	allow_accessibility, allow_extract,
500         allow_assemble, allow_annotate_and_form,
501         allow_form_filling, allow_modify_other,
502         print, qpdf_r3m_all);
503     this->m->encrypt_use_aes = true;
504     this->m->encrypt_metadata = encrypt_metadata;
505     setEncryptionParameters(user_password, owner_password, 5, 5, 32, clear);
506 }
507 
508 void
setR6EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,qpdf_r3_print_e print,qpdf_r3_modify_e modify,bool encrypt_metadata)509 QPDFWriter::setR6EncryptionParameters(
510     char const* user_password, char const* owner_password,
511     bool allow_accessibility, bool allow_extract,
512     qpdf_r3_print_e print, qpdf_r3_modify_e modify,
513     bool encrypt_metadata)
514 {
515     std::set<int> clear;
516     interpretR3EncryptionParameters(
517 	clear, user_password, owner_password,
518 	allow_accessibility, allow_extract,
519         true, true, true, true, print, modify);
520     this->m->encrypt_use_aes = true;
521     this->m->encrypt_metadata = encrypt_metadata;
522     setEncryptionParameters(user_password, owner_password, 5, 6, 32, clear);
523 }
524 
525 void
setR6EncryptionParameters(char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,bool encrypt_metadata)526 QPDFWriter::setR6EncryptionParameters(
527     char const* user_password, char const* owner_password,
528     bool allow_accessibility, bool allow_extract,
529     bool allow_assemble, bool allow_annotate_and_form,
530     bool allow_form_filling, bool allow_modify_other,
531     qpdf_r3_print_e print,
532     bool encrypt_metadata)
533 {
534     std::set<int> clear;
535     interpretR3EncryptionParameters(
536 	clear, user_password, owner_password,
537 	allow_accessibility, allow_extract,
538         allow_assemble, allow_annotate_and_form,
539         allow_form_filling, allow_modify_other,
540         print, qpdf_r3m_all);
541     this->m->encrypt_use_aes = true;
542     this->m->encrypt_metadata = encrypt_metadata;
543     setEncryptionParameters(user_password, owner_password, 5, 6, 32, clear);
544 }
545 
546 void
interpretR3EncryptionParameters(std::set<int> & clear,char const * user_password,char const * owner_password,bool allow_accessibility,bool allow_extract,bool allow_assemble,bool allow_annotate_and_form,bool allow_form_filling,bool allow_modify_other,qpdf_r3_print_e print,qpdf_r3_modify_e modify)547 QPDFWriter::interpretR3EncryptionParameters(
548     std::set<int>& clear,
549     char const* user_password, char const* owner_password,
550     bool allow_accessibility, bool allow_extract,
551     bool allow_assemble, bool allow_annotate_and_form,
552     bool allow_form_filling, bool allow_modify_other,
553     qpdf_r3_print_e print, qpdf_r3_modify_e modify)
554 {
555     // Acrobat 5 security options:
556 
557     // Checkboxes:
558     //   Enable Content Access for the Visually Impaired
559     //   Allow Content Copying and Extraction
560 
561     // Allowed changes menu:
562     //   None
563     //   Only Document Assembly
564     //   Only Form Field Fill-in or Signing
565     //   Comment Authoring, Form Field Fill-in or Signing
566     //   General Editing, Comment and Form Field Authoring
567 
568     // Allowed printing menu:
569     //   None
570     //   Low Resolution
571     //   Full printing
572 
573     // Meanings of bits in P when R >= 3
574     //
575     //  3: low-resolution printing
576     //  4: document modification except as controlled by 6, 9, and 11
577     //  5: extraction
578     //  6: add/modify annotations (comment), fill in forms
579     //     if 4+6 are set, also allows modification of form fields
580     //  9: fill in forms even if 6 is clear
581     // 10: accessibility; ignored by readers, should always be set
582     // 11: document assembly even if 4 is clear
583     // 12: high-resolution printing
584 
585     if (! allow_accessibility)
586     {
587         // setEncryptionParameters sets this if R > 3
588 	clear.insert(10);
589     }
590     if (! allow_extract)
591     {
592 	clear.insert(5);
593     }
594 
595     // Note: these switch statements all "fall through" (no break
596     // statements).  Each option clears successively more access bits.
597     switch (print)
598     {
599       case qpdf_r3p_none:
600 	clear.insert(3);	// any printing
601 
602       case qpdf_r3p_low:
603 	clear.insert(12);	// high resolution printing
604 
605       case qpdf_r3p_full:
606 	break;
607 
608 	// no default so gcc warns for missing cases
609     }
610 
611     // Modify options. The qpdf_r3_modify_e options control groups of
612     // bits and lack the full flexibility of the spec. This is
613     // unfortunate, but it's been in the API for ages, and we're stuck
614     // with it. See also allow checks below to control the bits
615     // individually.
616 
617     // NOT EXERCISED IN TEST SUITE
618     switch (modify)
619     {
620       case qpdf_r3m_none:
621 	clear.insert(11);	// document assembly
622 
623       case qpdf_r3m_assembly:
624 	clear.insert(9);	// filling in form fields
625 
626       case qpdf_r3m_form:
627 	clear.insert(6);	// modify annotations, fill in form fields
628 
629       case qpdf_r3m_annotate:
630 	clear.insert(4);	// other modifications
631 
632       case qpdf_r3m_all:
633 	break;
634 
635 	// no default so gcc warns for missing cases
636     }
637     // END NOT EXERCISED IN TEST SUITE
638 
639     if (! allow_assemble)
640     {
641         clear.insert(11);
642     }
643     if (! allow_annotate_and_form)
644     {
645         clear.insert(6);
646     }
647     if (! allow_form_filling)
648     {
649         clear.insert(9);
650     }
651     if (! allow_modify_other)
652     {
653         clear.insert(4);
654     }
655 }
656 
657 void
setEncryptionParameters(char const * user_password,char const * owner_password,int V,int R,int key_len,std::set<int> & bits_to_clear)658 QPDFWriter::setEncryptionParameters(
659     char const* user_password, char const* owner_password,
660     int V, int R, int key_len, std::set<int>& bits_to_clear)
661 {
662     // PDF specification refers to bits with the low bit numbered 1.
663     // We have to convert this into a bit field.
664 
665     // Specification always requires bits 1 and 2 to be cleared.
666     bits_to_clear.insert(1);
667     bits_to_clear.insert(2);
668 
669     if (R > 3)
670     {
671         // Bit 10 is deprecated and should always be set.  This used
672         // to mean accessibility.  There is no way to disable
673         // accessibility with R > 3.
674         bits_to_clear.erase(10);
675     }
676 
677     int P = 0;
678     // Create the complement of P, then invert.
679     for (std::set<int>::iterator iter = bits_to_clear.begin();
680 	 iter != bits_to_clear.end(); ++iter)
681     {
682 	P |= (1 << ((*iter) - 1));
683     }
684     P = ~P;
685 
686     generateID();
687     std::string O;
688     std::string U;
689     std::string OE;
690     std::string UE;
691     std::string Perms;
692     std::string encryption_key;
693     if (V < 5)
694     {
695         QPDF::compute_encryption_O_U(
696             user_password, owner_password, V, R, key_len, P,
697             this->m->encrypt_metadata, this->m->id1, O, U);
698     }
699     else
700     {
701         QPDF::compute_encryption_parameters_V5(
702             user_password, owner_password, V, R, key_len, P,
703             this->m->encrypt_metadata, this->m->id1,
704             encryption_key, O, U, OE, UE, Perms);
705     }
706     setEncryptionParametersInternal(
707 	V, R, key_len, P, O, U, OE, UE, Perms,
708         this->m->id1, user_password, encryption_key);
709 }
710 
711 void
copyEncryptionParameters(QPDF & qpdf)712 QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
713 {
714     this->m->preserve_encryption = false;
715     QPDFObjectHandle trailer = qpdf.getTrailer();
716     if (trailer.hasKey("/Encrypt"))
717     {
718         generateID();
719         this->m->id1 =
720             trailer.getKey("/ID").getArrayItem(0).getStringValue();
721 	QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
722 	int V = encrypt.getKey("/V").getIntValueAsInt();
723 	int key_len = 5;
724 	if (V > 1)
725 	{
726 	    key_len = encrypt.getKey("/Length").getIntValueAsInt() / 8;
727 	}
728 	if (encrypt.hasKey("/EncryptMetadata") &&
729 	    encrypt.getKey("/EncryptMetadata").isBool())
730 	{
731 	    this->m->encrypt_metadata =
732 		encrypt.getKey("/EncryptMetadata").getBoolValue();
733 	}
734         if (V >= 4)
735         {
736             // When copying encryption parameters, use AES even if the
737             // original file did not.  Acrobat doesn't create files
738             // with V >= 4 that don't use AES, and the logic of
739             // figuring out whether AES is used or not is complicated
740             // with /StmF, /StrF, and /EFF all potentially having
741             // different values.
742             this->m->encrypt_use_aes = true;
743         }
744 	QTC::TC("qpdf", "QPDFWriter copy encrypt metadata",
745 		this->m->encrypt_metadata ? 0 : 1);
746         QTC::TC("qpdf", "QPDFWriter copy use_aes",
747                 this->m->encrypt_use_aes ? 0 : 1);
748         std::string OE;
749         std::string UE;
750         std::string Perms;
751         std::string encryption_key;
752         if (V >= 5)
753         {
754             QTC::TC("qpdf", "QPDFWriter copy V5");
755 	    OE = encrypt.getKey("/OE").getStringValue();
756             UE = encrypt.getKey("/UE").getStringValue();
757 	    Perms = encrypt.getKey("/Perms").getStringValue();
758             encryption_key = qpdf.getEncryptionKey();
759         }
760 
761 	setEncryptionParametersInternal(
762 	    V,
763 	    encrypt.getKey("/R").getIntValueAsInt(),
764     	    key_len,
765 	    static_cast<int>(encrypt.getKey("/P").getIntValue()),
766 	    encrypt.getKey("/O").getStringValue(),
767 	    encrypt.getKey("/U").getStringValue(),
768             OE,
769             UE,
770             Perms,
771 	    this->m->id1,		// this->m->id1 == the other file's id1
772 	    qpdf.getPaddedUserPassword(),
773             encryption_key);
774     }
775 }
776 
777 void
disableIncompatibleEncryption(int major,int minor,int extension_level)778 QPDFWriter::disableIncompatibleEncryption(int major, int minor,
779                                           int extension_level)
780 {
781     if (! this->m->encrypted)
782     {
783 	return;
784     }
785 
786     bool disable = false;
787     if (compareVersions(major, minor, 1, 3) < 0)
788     {
789 	disable = true;
790     }
791     else
792     {
793 	int V = QUtil::string_to_int(
794             this->m->encryption_dictionary["/V"].c_str());
795 	int R = QUtil::string_to_int(
796             this->m->encryption_dictionary["/R"].c_str());
797 	if (compareVersions(major, minor, 1, 4) < 0)
798 	{
799 	    if ((V > 1) || (R > 2))
800 	    {
801 		disable = true;
802 	    }
803 	}
804 	else if (compareVersions(major, minor, 1, 5) < 0)
805 	{
806 	    if ((V > 2) || (R > 3))
807 	    {
808 		disable = true;
809 	    }
810 	}
811 	else if (compareVersions(major, minor, 1, 6) < 0)
812 	{
813 	    if (this->m->encrypt_use_aes)
814 	    {
815 		disable = true;
816 	    }
817 	}
818         else if ((compareVersions(major, minor, 1, 7) < 0) ||
819                  ((compareVersions(major, minor, 1, 7) == 0) &&
820                   extension_level < 3))
821         {
822             if ((V >= 5) || (R >= 5))
823             {
824                 disable = true;
825             }
826         }
827     }
828     if (disable)
829     {
830 	QTC::TC("qpdf", "QPDFWriter forced version disabled encryption");
831 	this->m->encrypted = false;
832     }
833 }
834 
835 void
parseVersion(std::string const & version,int & major,int & minor) const836 QPDFWriter::parseVersion(std::string const& version,
837 			 int& major, int& minor) const
838 {
839     major = QUtil::string_to_int(version.c_str());
840     minor = 0;
841     size_t p = version.find('.');
842     if ((p != std::string::npos) && (version.length() > p))
843     {
844 	minor = QUtil::string_to_int(version.substr(p + 1).c_str());
845     }
846     std::string tmp = QUtil::int_to_string(major) + "." +
847 	QUtil::int_to_string(minor);
848     if (tmp != version)
849     {
850         // The version number in the input is probably invalid. This
851         // happens with some files that are designed to exercise bugs,
852         // such as files in the fuzzer corpus. Unfortunately
853         // QPDFWriter doesn't have a way to give a warning, so we just
854         // ignore this case.
855     }
856 }
857 
858 int
compareVersions(int major1,int minor1,int major2,int minor2) const859 QPDFWriter::compareVersions(int major1, int minor1,
860 			    int major2, int minor2) const
861 {
862     if (major1 < major2)
863     {
864 	return -1;
865     }
866     else if (major1 > major2)
867     {
868 	return 1;
869     }
870     else if (minor1 < minor2)
871     {
872 	return -1;
873     }
874     else if (minor1 > minor2)
875     {
876 	return 1;
877     }
878     else
879     {
880 	return 0;
881     }
882 }
883 
884 void
setEncryptionParametersInternal(int V,int R,int key_len,int P,std::string const & O,std::string const & U,std::string const & OE,std::string const & UE,std::string const & Perms,std::string const & id1,std::string const & user_password,std::string const & encryption_key)885 QPDFWriter::setEncryptionParametersInternal(
886     int V, int R, int key_len, int P,
887     std::string const& O, std::string const& U,
888     std::string const& OE, std::string const& UE, std::string const& Perms,
889     std::string const& id1, std::string const& user_password,
890     std::string const& encryption_key)
891 {
892     this->m->encryption_V = V;
893     this->m->encryption_R = R;
894     this->m->encryption_dictionary["/Filter"] = "/Standard";
895     this->m->encryption_dictionary["/V"] = QUtil::int_to_string(V);
896     this->m->encryption_dictionary["/Length"] =
897         QUtil::int_to_string(key_len * 8);
898     this->m->encryption_dictionary["/R"] = QUtil::int_to_string(R);
899     this->m->encryption_dictionary["/P"] = QUtil::int_to_string(P);
900     this->m->encryption_dictionary["/O"] = QPDF_String(O).unparse(true);
901     this->m->encryption_dictionary["/U"] = QPDF_String(U).unparse(true);
902     if (V >= 5)
903     {
904         this->m->encryption_dictionary["/OE"] = QPDF_String(OE).unparse(true);
905         this->m->encryption_dictionary["/UE"] = QPDF_String(UE).unparse(true);
906         this->m->encryption_dictionary["/Perms"] =
907             QPDF_String(Perms).unparse(true);
908     }
909     if (R >= 6)
910     {
911         setMinimumPDFVersion("1.7", 8);
912     }
913     else if (R == 5)
914     {
915         setMinimumPDFVersion("1.7", 3);
916     }
917     else if (R == 4)
918     {
919         setMinimumPDFVersion(this->m->encrypt_use_aes ? "1.6" : "1.5");
920     }
921     else if (R == 3)
922     {
923         setMinimumPDFVersion("1.4");
924     }
925     else
926     {
927         setMinimumPDFVersion("1.3");
928     }
929 
930     if ((R >= 4) && (! this->m->encrypt_metadata))
931     {
932 	this->m->encryption_dictionary["/EncryptMetadata"] = "false";
933     }
934     if ((V == 4) || (V == 5))
935     {
936 	// The spec says the value for the crypt filter key can be
937 	// anything, and xpdf seems to agree.  However, Adobe Reader
938 	// won't open our files unless we use /StdCF.
939 	this->m->encryption_dictionary["/StmF"] = "/StdCF";
940 	this->m->encryption_dictionary["/StrF"] = "/StdCF";
941 	std::string method = (this->m->encrypt_use_aes
942                               ? ((V < 5) ? "/AESV2" : "/AESV3")
943                               : "/V2");
944         // The PDF spec says the /Length key is optional, but the PDF
945         // previewer on some versions of MacOS won't open encrypted
946         // files without it.
947 	this->m->encryption_dictionary["/CF"] =
948 	    "<< /StdCF << /AuthEvent /DocOpen /CFM " + method +
949             " /Length " + std::string((V < 5) ? "16" : "32") + " >> >>";
950     }
951 
952     this->m->encrypted = true;
953     QPDF::EncryptionData encryption_data(
954 	V, R, key_len, P, O, U, OE, UE, Perms, id1, this->m->encrypt_metadata);
955     if (V < 5)
956     {
957         this->m->encryption_key = QPDF::compute_encryption_key(
958             user_password, encryption_data);
959     }
960     else
961     {
962         this->m->encryption_key = encryption_key;
963     }
964 }
965 
966 void
setDataKey(int objid)967 QPDFWriter::setDataKey(int objid)
968 {
969     this->m->cur_data_key = QPDF::compute_data_key(
970 	this->m->encryption_key, objid, 0,
971         this->m->encrypt_use_aes, this->m->encryption_V, this->m->encryption_R);
972 }
973 
974 unsigned int
bytesNeeded(long long n)975 QPDFWriter::bytesNeeded(long long n)
976 {
977     unsigned int bytes = 0;
978     while (n)
979     {
980 	++bytes;
981 	n >>= 8;
982     }
983     return bytes;
984 }
985 
986 void
writeBinary(unsigned long long val,unsigned int bytes)987 QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
988 {
989     if (bytes > sizeof(unsigned long long))
990     {
991         throw std::logic_error(
992             "QPDFWriter::writeBinary called with too many bytes");
993     }
994     unsigned char data[sizeof(unsigned long long)];
995     for (unsigned int i = 0; i < bytes; ++i)
996     {
997 	data[bytes - i - 1] = static_cast<unsigned char>(val & 0xff);
998 	val >>= 8;
999     }
1000     this->m->pipeline->write(data, bytes);
1001 }
1002 
1003 void
writeString(std::string const & str)1004 QPDFWriter::writeString(std::string const& str)
1005 {
1006     this->m->pipeline->write(QUtil::unsigned_char_pointer(str), str.length());
1007 }
1008 
1009 void
writeBuffer(PointerHolder<Buffer> & b)1010 QPDFWriter::writeBuffer(PointerHolder<Buffer>& b)
1011 {
1012     this->m->pipeline->write(b->getBuffer(), b->getSize());
1013 }
1014 
1015 void
writeStringQDF(std::string const & str)1016 QPDFWriter::writeStringQDF(std::string const& str)
1017 {
1018     if (this->m->qdf_mode)
1019     {
1020 	writeString(str);
1021     }
1022 }
1023 
1024 void
writeStringNoQDF(std::string const & str)1025 QPDFWriter::writeStringNoQDF(std::string const& str)
1026 {
1027     if (! this->m->qdf_mode)
1028     {
1029 	writeString(str);
1030     }
1031 }
1032 
1033 void
writePad(int nspaces)1034 QPDFWriter::writePad(int nspaces)
1035 {
1036     for (int i = 0; i < nspaces; ++i)
1037     {
1038 	writeString(" ");
1039     }
1040 }
1041 
1042 Pipeline*
pushPipeline(Pipeline * p)1043 QPDFWriter::pushPipeline(Pipeline* p)
1044 {
1045     assert(dynamic_cast<Pl_Count*>(p) == 0);
1046     this->m->pipeline_stack.push_back(p);
1047     return p;
1048 }
1049 
1050 void
initializePipelineStack(Pipeline * p)1051 QPDFWriter::initializePipelineStack(Pipeline *p)
1052 {
1053     this->m->pipeline = new Pl_Count("pipeline stack base", p);
1054     this->m->to_delete.push_back(this->m->pipeline);
1055     this->m->pipeline_stack.push_back(this->m->pipeline);
1056 }
1057 
1058 void
activatePipelineStack(PipelinePopper & pp)1059 QPDFWriter::activatePipelineStack(PipelinePopper& pp)
1060 {
1061     std::string stack_id(
1062         "stack " + QUtil::uint_to_string(this->m->next_stack_id));
1063     Pl_Count* c = new Pl_Count(stack_id.c_str(),
1064                                this->m->pipeline_stack.back());
1065     ++this->m->next_stack_id;
1066     this->m->pipeline_stack.push_back(c);
1067     this->m->pipeline = c;
1068     pp.stack_id = stack_id;
1069 }
1070 
~PipelinePopper()1071 QPDFWriter::PipelinePopper::~PipelinePopper()
1072 {
1073     if (stack_id.empty())
1074     {
1075         return;
1076     }
1077     assert(qw->m->pipeline_stack.size() >= 2);
1078     qw->m->pipeline->finish();
1079     assert(dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) ==
1080 	   qw->m->pipeline);
1081     // It might be possible for this assertion to fail if
1082     // writeLinearized exits by exception when deterministic ID, but I
1083     // don't think so. As of this writing, this is the only case in
1084     // which two dynamically allocated PipelinePopper objects ever
1085     // exist at the same time, so the assertion will fail if they get
1086     // popped out of order from automatic destruction.
1087     assert(qw->m->pipeline->getIdentifier() == stack_id);
1088     delete qw->m->pipeline_stack.back();
1089     qw->m->pipeline_stack.pop_back();
1090     while (dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back()) == 0)
1091     {
1092 	Pipeline* p = qw->m->pipeline_stack.back();
1093         if (dynamic_cast<Pl_MD5*>(p) == qw->m->md5_pipeline)
1094         {
1095             qw->m->md5_pipeline = 0;
1096         }
1097 	qw->m->pipeline_stack.pop_back();
1098 	Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
1099 	if (bp && buf)
1100 	{
1101 	    *bp = buf->getBuffer();
1102 	}
1103 	delete p;
1104     }
1105     qw->m->pipeline = dynamic_cast<Pl_Count*>(qw->m->pipeline_stack.back());
1106 }
1107 
1108 void
adjustAESStreamLength(size_t & length)1109 QPDFWriter::adjustAESStreamLength(size_t& length)
1110 {
1111     if (this->m->encrypted && (! this->m->cur_data_key.empty()) &&
1112 	this->m->encrypt_use_aes)
1113     {
1114 	// Stream length will be padded with 1 to 16 bytes to end up
1115 	// as a multiple of 16.  It will also be prepended by 16 bits
1116 	// of random data.
1117 	length += 32 - (length & 0xf);
1118     }
1119 }
1120 
1121 void
pushEncryptionFilter(PipelinePopper & pp)1122 QPDFWriter::pushEncryptionFilter(PipelinePopper& pp)
1123 {
1124     if (this->m->encrypted && (! this->m->cur_data_key.empty()))
1125     {
1126 	Pipeline* p = 0;
1127 	if (this->m->encrypt_use_aes)
1128 	{
1129 	    p = new Pl_AES_PDF(
1130 		"aes stream encryption", this->m->pipeline, true,
1131 		QUtil::unsigned_char_pointer(this->m->cur_data_key),
1132                 this->m->cur_data_key.length());
1133 	}
1134 	else
1135 	{
1136 	    p = new Pl_RC4("rc4 stream encryption", this->m->pipeline,
1137 			   QUtil::unsigned_char_pointer(this->m->cur_data_key),
1138 			   QIntC::to_int(this->m->cur_data_key.length()));
1139 	}
1140 	pushPipeline(p);
1141     }
1142     // Must call this unconditionally so we can call popPipelineStack
1143     // to balance pushEncryptionFilter().
1144     activatePipelineStack(pp);
1145 }
1146 
1147 void
pushDiscardFilter(PipelinePopper & pp)1148 QPDFWriter::pushDiscardFilter(PipelinePopper& pp)
1149 {
1150     pushPipeline(new Pl_Discard());
1151     activatePipelineStack(pp);
1152 }
1153 
1154 void
pushMD5Pipeline(PipelinePopper & pp)1155 QPDFWriter::pushMD5Pipeline(PipelinePopper& pp)
1156 {
1157     if (! this->m->id2.empty())
1158     {
1159         // Can't happen in the code
1160         throw std::logic_error(
1161             "Deterministic ID computation enabled after ID"
1162             " generation has already occurred.");
1163     }
1164     assert(this->m->deterministic_id);
1165     assert(this->m->md5_pipeline == 0);
1166     assert(this->m->pipeline->getCount() == 0);
1167     this->m->md5_pipeline = new Pl_MD5("qpdf md5", this->m->pipeline);
1168     this->m->md5_pipeline->persistAcrossFinish(true);
1169     // Special case code in popPipelineStack clears this->m->md5_pipeline
1170     // upon deletion.
1171     pushPipeline(this->m->md5_pipeline);
1172     activatePipelineStack(pp);
1173 }
1174 
1175 void
computeDeterministicIDData()1176 QPDFWriter::computeDeterministicIDData()
1177 {
1178     assert(this->m->md5_pipeline != 0);
1179     assert(this->m->deterministic_id_data.empty());
1180     this->m->deterministic_id_data = this->m->md5_pipeline->getHexDigest();
1181     this->m->md5_pipeline->enable(false);
1182 }
1183 
1184 int
openObject(int objid)1185 QPDFWriter::openObject(int objid)
1186 {
1187     if (objid == 0)
1188     {
1189 	objid = this->m->next_objid++;
1190     }
1191     this->m->xref[objid] = QPDFXRefEntry(1, this->m->pipeline->getCount(), 0);
1192     writeString(QUtil::int_to_string(objid));
1193     writeString(" 0 obj\n");
1194     return objid;
1195 }
1196 
1197 void
closeObject(int objid)1198 QPDFWriter::closeObject(int objid)
1199 {
1200     // Write a newline before endobj as it makes the file easier to
1201     // repair.
1202     writeString("\nendobj\n");
1203     writeStringQDF("\n");
1204     this->m->lengths[objid] = this->m->pipeline->getCount() -
1205         this->m->xref[objid].getOffset();
1206 }
1207 
1208 void
assignCompressedObjectNumbers(QPDFObjGen const & og)1209 QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
1210 {
1211     int objid = og.getObj();
1212     if ((og.getGen() != 0) ||
1213         (this->m->object_stream_to_objects.count(objid) == 0))
1214     {
1215         // This is not an object stream.
1216 	return;
1217     }
1218 
1219     // Reserve numbers for the objects that belong to this object
1220     // stream.
1221     for (std::set<QPDFObjGen>::iterator iter =
1222 	     this->m->object_stream_to_objects[objid].begin();
1223 	 iter != this->m->object_stream_to_objects[objid].end();
1224 	 ++iter)
1225     {
1226 	this->m->obj_renumber[*iter] = this->m->next_objid++;
1227     }
1228 }
1229 
1230 void
enqueueObject(QPDFObjectHandle object)1231 QPDFWriter::enqueueObject(QPDFObjectHandle object)
1232 {
1233     if (object.isIndirect())
1234     {
1235         if (object.getOwningQPDF() != &(this->m->pdf))
1236         {
1237             QTC::TC("qpdf", "QPDFWriter foreign object");
1238             throw std::logic_error(
1239                 "QPDFObjectHandle from different QPDF found while writing."
1240                 "  Use QPDF::copyForeignObject to add objects from"
1241                 " another file.");
1242         }
1243 
1244         if (this->m->qdf_mode &&
1245             object.isStream() && object.getDict().getKey("/Type").isName() &&
1246             (object.getDict().getKey("/Type").getName() == "/XRef"))
1247         {
1248             // As a special case, do not output any extraneous XRef
1249             // streams in QDF mode. Doing so will confuse fix-qdf,
1250             // which expects to see only one XRef stream at the end of
1251             // the file. This case can occur when creating a QDF from
1252             // a file with object streams when preserving unreferenced
1253             // objects since the old cross reference streams are not
1254             // actually referenced by object number.
1255             QTC::TC("qpdf", "QPDFWriter ignore XRef in qdf mode");
1256             return;
1257         }
1258 
1259 	QPDFObjGen og = object.getObjGen();
1260 
1261 	if (this->m->obj_renumber.count(og) == 0)
1262 	{
1263 	    if (this->m->object_to_object_stream.count(og))
1264 	    {
1265 		// This is in an object stream.  Don't process it
1266 		// here.  Instead, enqueue the object stream.  Object
1267 		// streams always have generation 0.
1268 		int stream_id = this->m->object_to_object_stream[og];
1269                 // Detect loops by storing invalid object ID 0, which
1270                 // will get overwritten later.
1271                 this->m->obj_renumber[og] = 0;
1272 		enqueueObject(this->m->pdf.getObjectByID(stream_id, 0));
1273 	    }
1274 	    else
1275 	    {
1276 		this->m->object_queue.push_back(object);
1277 		this->m->obj_renumber[og] = this->m->next_objid++;
1278 
1279 		if ((og.getGen() == 0) &&
1280                     this->m->object_stream_to_objects.count(og.getObj()))
1281 		{
1282 		    // For linearized files, uncompressed objects go
1283 		    // at end, and we take care of assigning numbers
1284 		    // to them elsewhere.
1285 		    if (! this->m->linearized)
1286 		    {
1287 			assignCompressedObjectNumbers(og);
1288 		    }
1289 		}
1290 		else if ((! this->m->direct_stream_lengths) &&
1291                          object.isStream())
1292 		{
1293 		    // reserve next object ID for length
1294 		    ++this->m->next_objid;
1295 		}
1296 	    }
1297 	}
1298         else if (this->m->obj_renumber[og] == 0)
1299         {
1300             // This can happen if a specially constructed file
1301             // indicates that an object stream is inside itself.
1302             QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream");
1303         }
1304     }
1305     else if (object.isArray())
1306     {
1307 	int n = object.getArrayNItems();
1308 	for (int i = 0; i < n; ++i)
1309 	{
1310 	    if (! this->m->linearized)
1311 	    {
1312 		enqueueObject(object.getArrayItem(i));
1313 	    }
1314 	}
1315     }
1316     else if (object.isDictionary())
1317     {
1318 	std::set<std::string> keys = object.getKeys();
1319 	for (std::set<std::string>::iterator iter = keys.begin();
1320 	     iter != keys.end(); ++iter)
1321 	{
1322 	    if (! this->m->linearized)
1323 	    {
1324 		enqueueObject(object.getKey(*iter));
1325 	    }
1326 	}
1327     }
1328     else
1329     {
1330 	// ignore
1331     }
1332 }
1333 
1334 void
unparseChild(QPDFObjectHandle child,int level,int flags)1335 QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
1336 {
1337     if (! this->m->linearized)
1338     {
1339 	enqueueObject(child);
1340     }
1341     if (child.isIndirect())
1342     {
1343 	QPDFObjGen old_og = child.getObjGen();
1344 	int new_id = this->m->obj_renumber[old_og];
1345 	writeString(QUtil::int_to_string(new_id));
1346 	writeString(" 0 R");
1347     }
1348     else
1349     {
1350 	unparseObject(child, level, flags);
1351     }
1352 }
1353 
1354 void
writeTrailer(trailer_e which,int size,bool xref_stream,qpdf_offset_t prev,int linearization_pass)1355 QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
1356                          qpdf_offset_t prev, int linearization_pass)
1357 {
1358     QPDFObjectHandle trailer = getTrimmedTrailer();
1359     if (xref_stream)
1360     {
1361         this->m->cur_data_key.clear();
1362     }
1363     else
1364     {
1365 	writeString("trailer <<");
1366     }
1367     writeStringQDF("\n");
1368     if (which == t_lin_second)
1369     {
1370 	writeString(" /Size ");
1371 	writeString(QUtil::int_to_string(size));
1372     }
1373     else
1374     {
1375 	std::set<std::string> keys = trailer.getKeys();
1376 	for (std::set<std::string>::iterator iter = keys.begin();
1377 	     iter != keys.end(); ++iter)
1378 	{
1379 	    std::string const& key = *iter;
1380 	    writeStringQDF("  ");
1381 	    writeStringNoQDF(" ");
1382 	    writeString(QPDF_Name::normalizeName(key));
1383 	    writeString(" ");
1384 	    if (key == "/Size")
1385 	    {
1386 		writeString(QUtil::int_to_string(size));
1387 		if (which == t_lin_first)
1388 		{
1389 		    writeString(" /Prev ");
1390 		    qpdf_offset_t pos = this->m->pipeline->getCount();
1391 		    writeString(QUtil::int_to_string(prev));
1392 		    int nspaces =
1393                         QIntC::to_int(pos - this->m->pipeline->getCount() + 21);
1394 		    if (nspaces < 0)
1395                     {
1396                         throw std::logic_error(
1397                             "QPDFWriter: no padding required in trailer");
1398                     }
1399 		    writePad(nspaces);
1400 		}
1401 	    }
1402 	    else
1403 	    {
1404 		unparseChild(trailer.getKey(key), 1, 0);
1405 	    }
1406 	    writeStringQDF("\n");
1407 	}
1408     }
1409 
1410     // Write ID
1411     writeStringQDF(" ");
1412     writeString(" /ID [");
1413     if (linearization_pass == 1)
1414     {
1415 	std::string original_id1 = getOriginalID1();
1416         if (original_id1.empty())
1417         {
1418             writeString("<00000000000000000000000000000000>");
1419         }
1420         else
1421         {
1422             // Write a string of zeroes equal in length to the
1423             // representation of the original ID. While writing the
1424             // original ID would have the same number of bytes, it
1425             // would cause a change to the deterministic ID generated
1426             // by older versions of the software that hard-coded the
1427             // length of the ID to 16 bytes.
1428             writeString("<");
1429             size_t len = QPDF_String(original_id1).unparse(true).length() - 2;
1430             for (size_t i = 0; i < len; ++i)
1431             {
1432                 writeString("0");
1433             }
1434             writeString(">");
1435         }
1436         writeString("<00000000000000000000000000000000>");
1437     }
1438     else
1439     {
1440         if ((linearization_pass == 0) && (this->m->deterministic_id))
1441         {
1442             computeDeterministicIDData();
1443         }
1444         generateID();
1445         writeString(QPDF_String(this->m->id1).unparse(true));
1446         writeString(QPDF_String(this->m->id2).unparse(true));
1447     }
1448     writeString("]");
1449 
1450     if (which != t_lin_second)
1451     {
1452 	// Write reference to encryption dictionary
1453 	if (this->m->encrypted)
1454 	{
1455 	    writeString(" /Encrypt ");
1456 	    writeString(QUtil::int_to_string(this->m->encryption_dict_objid));
1457 	    writeString(" 0 R");
1458 	}
1459     }
1460 
1461     writeStringQDF("\n");
1462     writeStringNoQDF(" ");
1463     writeString(">>");
1464 }
1465 
1466 bool
willFilterStream(QPDFObjectHandle stream,bool & compress_stream,bool & is_metadata,PointerHolder<Buffer> * stream_data)1467 QPDFWriter::willFilterStream(QPDFObjectHandle stream,
1468                              bool& compress_stream, bool& is_metadata,
1469                              PointerHolder<Buffer>* stream_data)
1470 {
1471     compress_stream = false;
1472     is_metadata = false;
1473 
1474     QPDFObjGen old_og = stream.getObjGen();
1475     QPDFObjectHandle stream_dict = stream.getDict();
1476 
1477     if (stream_dict.getKey("/Type").isName() &&
1478         (stream_dict.getKey("/Type").getName() == "/Metadata"))
1479     {
1480         is_metadata = true;
1481     }
1482     bool filter = (stream.isDataModified() ||
1483                    this->m->compress_streams ||
1484                    this->m->stream_decode_level);
1485     bool filter_on_write = stream.getFilterOnWrite();
1486     if (! filter_on_write)
1487     {
1488         QTC::TC("qpdf", "QPDFWriter getFilterOnWrite false");
1489         filter = false;
1490     }
1491     if (filter_on_write && this->m->compress_streams)
1492     {
1493         // Don't filter if the stream is already compressed with
1494         // FlateDecode. This way we don't make it worse if the
1495         // original file used a better Flate algorithm, and we
1496         // don't spend time and CPU cycles uncompressing and
1497         // recompressing stuff. This can be overridden with
1498         // setRecompressFlate(true).
1499         QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
1500         if ((! this->m->recompress_flate) &&
1501             (! stream.isDataModified()) &&
1502             filter_obj.isName() &&
1503             ((filter_obj.getName() == "/FlateDecode") ||
1504              (filter_obj.getName() == "/Fl")))
1505         {
1506             QTC::TC("qpdf", "QPDFWriter not recompressing /FlateDecode");
1507             filter = false;
1508         }
1509     }
1510     bool normalize = false;
1511     bool uncompress = false;
1512     if (filter_on_write && is_metadata &&
1513         ((! this->m->encrypted) || (this->m->encrypt_metadata == false)))
1514     {
1515         QTC::TC("qpdf", "QPDFWriter not compressing metadata");
1516         filter = true;
1517         compress_stream = false;
1518         uncompress = true;
1519     }
1520     else if (filter_on_write && this->m->normalize_content &&
1521              this->m->normalized_streams.count(old_og))
1522     {
1523         normalize = true;
1524         filter = true;
1525     }
1526     else if (filter_on_write && filter && this->m->compress_streams)
1527     {
1528         compress_stream = true;
1529         QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
1530     }
1531 
1532     bool filtered = false;
1533     for (int attempt = 1; attempt <= 2; ++attempt)
1534     {
1535         pushPipeline(new Pl_Buffer("stream data"));
1536         PipelinePopper pp_stream_data(this, stream_data);
1537         activatePipelineStack(pp_stream_data);
1538         filtered =
1539             stream.pipeStreamData(
1540                 this->m->pipeline,
1541                 (((filter && normalize) ? qpdf_ef_normalize : 0) |
1542                  ((filter && compress_stream) ? qpdf_ef_compress : 0)),
1543                 (filter
1544                  ? (uncompress ? qpdf_dl_all : this->m->stream_decode_level)
1545                  : qpdf_dl_none), false, (attempt == 1));
1546         if (filter && (! filtered))
1547         {
1548             // Try again
1549             filter = false;
1550         }
1551         else
1552         {
1553             break;
1554         }
1555     }
1556     if (! filtered)
1557     {
1558         compress_stream = false;
1559     }
1560     return filtered;
1561 }
1562 
1563 void
unparseObject(QPDFObjectHandle object,int level,int flags,size_t stream_length,bool compress)1564 QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1565 			  int flags, size_t stream_length,
1566                           bool compress)
1567 {
1568     QPDFObjGen old_og = object.getObjGen();
1569     int child_flags = flags & ~f_stream;
1570 
1571     std::string indent;
1572     for (int i = 0; i < level; ++i)
1573     {
1574 	indent += "  ";
1575     }
1576 
1577     if (object.isArray())
1578     {
1579 	// Note: PDF spec 1.4 implementation note 121 states that
1580 	// Acrobat requires a space after the [ in the /H key of the
1581 	// linearization parameter dictionary.  We'll do this
1582 	// unconditionally for all arrays because it looks nicer and
1583 	// doesn't make the files that much bigger.
1584 	writeString("[");
1585 	writeStringQDF("\n");
1586 	int n = object.getArrayNItems();
1587 	for (int i = 0; i < n; ++i)
1588 	{
1589 	    writeStringQDF(indent);
1590 	    writeStringQDF("  ");
1591 	    writeStringNoQDF(" ");
1592 	    unparseChild(object.getArrayItem(i), level + 1, child_flags);
1593 	    writeStringQDF("\n");
1594 	}
1595 	writeStringQDF(indent);
1596 	writeStringNoQDF(" ");
1597 	writeString("]");
1598     }
1599     else if (object.isDictionary())
1600     {
1601         // Make a shallow copy of this object so we can modify it
1602         // safely without affecting the original. This code has logic
1603         // to skip certain keys in agreement with prepareFileForWrite
1604         // and with skip_stream_parameters so that replacing them
1605         // doesn't leave unreferenced objects in the output. We can
1606         // use unsafeShallowCopy here because we are all we are doing
1607         // is removing or replacing top-level keys.
1608         object = object.unsafeShallowCopy();
1609 
1610         // Handle special cases for specific dictionaries.
1611 
1612         // Extensions dictionaries.
1613 
1614         // We have one of several cases:
1615         //
1616         // * We need ADBE
1617         //    - We already have Extensions
1618         //       - If it has the right ADBE, preserve it
1619         //       - Otherwise, replace ADBE
1620         //    - We don't have Extensions: create one from scratch
1621         // * We don't want ADBE
1622         //    - We already have Extensions
1623         //       - If it only has ADBE, remove it
1624         //       - If it has other things, keep those and remove ADBE
1625         //    - We have no extensions: no action required
1626         //
1627         // Before writing, we guarantee that /Extensions, if present,
1628         // is direct through the ADBE dictionary, so we can modify in
1629         // place.
1630 
1631         bool is_root = false;
1632         bool have_extensions_other = false;
1633         bool have_extensions_adbe = false;
1634 
1635         QPDFObjectHandle extensions;
1636         if ((old_og.getObj() != 0) &&
1637             (old_og == this->m->pdf.getRoot().getObjGen()))
1638         {
1639             is_root = true;
1640             if (object.hasKey("/Extensions") &&
1641                 object.getKey("/Extensions").isDictionary())
1642             {
1643                 extensions = object.getKey("/Extensions");
1644             }
1645         }
1646 
1647         if (extensions.isInitialized())
1648         {
1649             std::set<std::string> keys = extensions.getKeys();
1650             if (keys.count("/ADBE") > 0)
1651             {
1652                 have_extensions_adbe = true;
1653                 keys.erase("/ADBE");
1654             }
1655             if (keys.size() > 0)
1656             {
1657                 have_extensions_other = true;
1658             }
1659         }
1660 
1661         bool need_extensions_adbe = (this->m->final_extension_level > 0);
1662 
1663         if (is_root)
1664         {
1665             if (need_extensions_adbe)
1666             {
1667                 if (! (have_extensions_other || have_extensions_adbe))
1668                 {
1669                     // We need Extensions and don't have it.  Create
1670                     // it here.
1671                     QTC::TC("qpdf", "QPDFWriter create Extensions",
1672                             this->m->qdf_mode ? 0 : 1);
1673                     extensions = QPDFObjectHandle::newDictionary();
1674                     object.replaceKey("/Extensions", extensions);
1675                 }
1676             }
1677             else if (! have_extensions_other)
1678             {
1679                 // We have Extensions dictionary and don't want one.
1680                 if (have_extensions_adbe)
1681                 {
1682                     QTC::TC("qpdf", "QPDFWriter remove existing Extensions");
1683                     object.removeKey("/Extensions");
1684                     extensions = QPDFObjectHandle(); // uninitialized
1685                 }
1686             }
1687         }
1688 
1689         if (extensions.isInitialized())
1690         {
1691             QTC::TC("qpdf", "QPDFWriter preserve Extensions");
1692             QPDFObjectHandle adbe = extensions.getKey("/ADBE");
1693             if (adbe.isDictionary() &&
1694                 adbe.hasKey("/BaseVersion") &&
1695                 adbe.getKey("/BaseVersion").isName() &&
1696                 (adbe.getKey("/BaseVersion").getName() ==
1697                  "/" + this->m->final_pdf_version) &&
1698                 adbe.hasKey("/ExtensionLevel") &&
1699                 adbe.getKey("/ExtensionLevel").isInteger() &&
1700                 (adbe.getKey("/ExtensionLevel").getIntValue() ==
1701                  this->m->final_extension_level))
1702             {
1703                 QTC::TC("qpdf", "QPDFWriter preserve ADBE");
1704             }
1705             else
1706             {
1707                 if (need_extensions_adbe)
1708                 {
1709                     extensions.replaceKey(
1710                         "/ADBE",
1711                         QPDFObjectHandle::parse(
1712                             "<< /BaseVersion /" + this->m->final_pdf_version +
1713                             " /ExtensionLevel " +
1714                             QUtil::int_to_string(
1715                                 this->m->final_extension_level) +
1716                             " >>"));
1717                 }
1718                 else
1719                 {
1720                     QTC::TC("qpdf", "QPDFWriter remove ADBE");
1721                     extensions.removeKey("/ADBE");
1722                 }
1723             }
1724         }
1725 
1726         // Stream dictionaries.
1727 
1728         if (flags & f_stream)
1729         {
1730             // Suppress /Length since we will write it manually
1731             object.removeKey("/Length");
1732 
1733             // If /DecodeParms is an empty list, remove it.
1734             if (object.getKey("/DecodeParms").isArray() &&
1735                 (0 == object.getKey("/DecodeParms").getArrayNItems()))
1736             {
1737                 QTC::TC("qpdf", "QPDFWriter remove empty DecodeParms");
1738                 object.removeKey("/DecodeParms");
1739             }
1740 
1741 	    if (flags & f_filtered)
1742             {
1743                 // We will supply our own filter and decode
1744                 // parameters.
1745                 object.removeKey("/Filter");
1746                 object.removeKey("/DecodeParms");
1747             }
1748             else
1749             {
1750                 // Make sure, no matter what else we have, that we
1751                 // don't have /Crypt in the output filters.
1752                 QPDFObjectHandle filter = object.getKey("/Filter");
1753                 QPDFObjectHandle decode_parms = object.getKey("/DecodeParms");
1754                 if (filter.isOrHasName("/Crypt"))
1755                 {
1756                     if (filter.isName())
1757                     {
1758                         object.removeKey("/Filter");
1759                         object.removeKey("/DecodeParms");
1760                     }
1761                     else
1762                     {
1763                         int idx = -1;
1764                         for (int i = 0; i < filter.getArrayNItems(); ++i)
1765                         {
1766                             QPDFObjectHandle item = filter.getArrayItem(i);
1767                             if (item.isName() && item.getName() == "/Crypt")
1768                             {
1769                                 idx = i;
1770                                 break;
1771                             }
1772                         }
1773                         if (idx >= 0)
1774                         {
1775                             // If filter is an array, then the code in
1776                             // QPDF_Stream has already verified that
1777                             // DecodeParms and Filters are arrays of
1778                             // the same length, but if they weren't
1779                             // for some reason, eraseItem does type
1780                             // and bounds checking.
1781                             QTC::TC("qpdf", "QPDFWriter remove Crypt");
1782                             filter.eraseItem(idx);
1783                             decode_parms.eraseItem(idx);
1784                         }
1785                     }
1786                 }
1787             }
1788         }
1789 
1790 	writeString("<<");
1791 	writeStringQDF("\n");
1792 
1793 	std::set<std::string> keys = object.getKeys();
1794 	for (std::set<std::string>::iterator iter = keys.begin();
1795 	     iter != keys.end(); ++iter)
1796 	{
1797 	    std::string const& key = *iter;
1798 
1799 	    writeStringQDF(indent);
1800 	    writeStringQDF("  ");
1801 	    writeStringNoQDF(" ");
1802 	    writeString(QPDF_Name::normalizeName(key));
1803 	    writeString(" ");
1804 	    if (key == "/Contents" &&
1805 		object.hasKey("/Type") &&
1806 		object.getKey("/Type").isName() &&
1807 		object.getKey("/Type").getName() == "/Sig" &&
1808 		object.hasKey("/ByteRange"))
1809 	    {
1810                 QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1811 		unparseChild(object.getKey(key), level + 1,
1812 			     child_flags | f_hex_string | f_no_encryption);
1813 	    }
1814 	    else
1815 	    {
1816 		unparseChild(object.getKey(key), level + 1, child_flags);
1817 	    }
1818 	    writeStringQDF("\n");
1819 	}
1820 
1821 	if (flags & f_stream)
1822 	{
1823 	    writeStringQDF(indent);
1824 	    writeStringQDF(" ");
1825 	    writeString(" /Length ");
1826 
1827 	    if (this->m->direct_stream_lengths)
1828 	    {
1829 		writeString(QUtil::uint_to_string(stream_length));
1830 	    }
1831 	    else
1832 	    {
1833 		writeString(
1834 		    QUtil::int_to_string(this->m->cur_stream_length_id));
1835 		writeString(" 0 R");
1836 	    }
1837 	    writeStringQDF("\n");
1838 	    if (compress && (flags & f_filtered))
1839 	    {
1840 		writeStringQDF(indent);
1841 		writeStringQDF(" ");
1842 		writeString(" /Filter /FlateDecode");
1843 		writeStringQDF("\n");
1844 	    }
1845 	}
1846 
1847 	writeStringQDF(indent);
1848 	writeStringNoQDF(" ");
1849 	writeString(">>");
1850     }
1851     else if (object.isStream())
1852     {
1853 	// Write stream data to a buffer.
1854 	int new_id = this->m->obj_renumber[old_og];
1855 	if (! this->m->direct_stream_lengths)
1856 	{
1857 	    this->m->cur_stream_length_id = new_id + 1;
1858 	}
1859 
1860 	flags |= f_stream;
1861 	bool compress_stream = false;
1862         bool is_metadata = false;
1863         PointerHolder<Buffer> stream_data;
1864         if (willFilterStream(object, compress_stream,
1865                              is_metadata, &stream_data))
1866 	{
1867 	    flags |= f_filtered;
1868 	}
1869         QPDFObjectHandle stream_dict = object.getDict();
1870 
1871 	this->m->cur_stream_length = stream_data->getSize();
1872 	if (is_metadata && this->m->encrypted && (! this->m->encrypt_metadata))
1873 	{
1874 	    // Don't encrypt stream data for the metadata stream
1875 	    this->m->cur_data_key.clear();
1876 	}
1877 	adjustAESStreamLength(this->m->cur_stream_length);
1878 	unparseObject(stream_dict, 0, flags,
1879                       this->m->cur_stream_length, compress_stream);
1880 	unsigned char last_char = '\0';
1881 	writeString("\nstream\n");
1882         {
1883             PipelinePopper pp_enc(this);
1884             pushEncryptionFilter(pp_enc);
1885             writeBuffer(stream_data);
1886             last_char = this->m->pipeline->getLastChar();
1887         }
1888 
1889         if (this->m->newline_before_endstream ||
1890             (this->m->qdf_mode && (last_char != '\n')))
1891         {
1892             writeString("\n");
1893             this->m->added_newline = true;
1894         }
1895         else
1896         {
1897             this->m->added_newline = false;
1898         }
1899 	writeString("endstream");
1900     }
1901     else if (object.isString())
1902     {
1903 	std::string val;
1904 	if (this->m->encrypted &&
1905 	    (! (flags & f_in_ostream)) &&
1906 	    (! (flags & f_no_encryption)) &&
1907 	    (! this->m->cur_data_key.empty()))
1908 	{
1909 	    val = object.getStringValue();
1910 	    if (this->m->encrypt_use_aes)
1911 	    {
1912 		Pl_Buffer bufpl("encrypted string");
1913 		Pl_AES_PDF pl(
1914                     "aes encrypt string", &bufpl, true,
1915                     QUtil::unsigned_char_pointer(this->m->cur_data_key),
1916                     this->m->cur_data_key.length());
1917 		pl.write(QUtil::unsigned_char_pointer(val), val.length());
1918 		pl.finish();
1919 		PointerHolder<Buffer> buf = bufpl.getBuffer();
1920 		val = QPDF_String(
1921 		    std::string(reinterpret_cast<char*>(buf->getBuffer()),
1922 				buf->getSize())).unparse(true);
1923 	    }
1924 	    else
1925 	    {
1926 		PointerHolder<char> tmp_ph =
1927                     PointerHolder<char>(true, QUtil::copy_string(val));
1928                 char* tmp = tmp_ph.getPointer();
1929 		size_t vlen = val.length();
1930 		RC4 rc4(QUtil::unsigned_char_pointer(this->m->cur_data_key),
1931 			QIntC::to_int(this->m->cur_data_key.length()));
1932 		rc4.process(QUtil::unsigned_char_pointer(tmp), vlen);
1933 		val = QPDF_String(std::string(tmp, vlen)).unparse();
1934 	    }
1935 	}
1936 	else if (flags & f_hex_string)
1937 	{
1938 	    val = QPDF_String(object.getStringValue()).unparse(true);
1939 	}
1940 	else
1941 	{
1942 	    val = object.unparseResolved();
1943 	}
1944 	writeString(val);
1945     }
1946     else
1947     {
1948 	writeString(object.unparseResolved());
1949     }
1950 }
1951 
1952 void
writeObjectStreamOffsets(std::vector<qpdf_offset_t> & offsets,int first_obj)1953 QPDFWriter::writeObjectStreamOffsets(std::vector<qpdf_offset_t>& offsets,
1954 				     int first_obj)
1955 {
1956     for (size_t i = 0; i < offsets.size(); ++i)
1957     {
1958 	if (i != 0)
1959 	{
1960 	    writeStringQDF("\n");
1961 	    writeStringNoQDF(" ");
1962 	}
1963 	writeString(QUtil::uint_to_string(i + QIntC::to_size(first_obj)));
1964 	writeString(" ");
1965 	writeString(QUtil::int_to_string(offsets.at(i)));
1966     }
1967     writeString("\n");
1968 }
1969 
1970 void
writeObjectStream(QPDFObjectHandle object)1971 QPDFWriter::writeObjectStream(QPDFObjectHandle object)
1972 {
1973     // Note: object might be null if this is a place-holder for an
1974     // object stream that we are generating from scratch.
1975 
1976     QPDFObjGen old_og = object.getObjGen();
1977     assert(old_og.getGen() == 0);
1978     int old_id = old_og.getObj();
1979     int new_id = this->m->obj_renumber[old_og];
1980 
1981     std::vector<qpdf_offset_t> offsets;
1982     qpdf_offset_t first = 0;
1983 
1984     // Generate stream itself.  We have to do this in two passes so we
1985     // can calculate offsets in the first pass.
1986     PointerHolder<Buffer> stream_buffer;
1987     int first_obj = -1;
1988     bool compressed = false;
1989     for (int pass = 1; pass <= 2; ++pass)
1990     {
1991 	// stream_buffer will be initialized only for pass 2
1992         PipelinePopper pp_ostream(this, &stream_buffer);
1993 	if (pass == 1)
1994 	{
1995 	    pushDiscardFilter(pp_ostream);
1996 	}
1997 	else
1998 	{
1999 	    // Adjust offsets to skip over comment before first object
2000 
2001 	    first = offsets.at(0);
2002 	    for (std::vector<qpdf_offset_t>::iterator iter = offsets.begin();
2003 		 iter != offsets.end(); ++iter)
2004 	    {
2005 		*iter -= first;
2006 	    }
2007 
2008 	    // Take one pass at writing pairs of numbers so we can get
2009 	    // their size information
2010             {
2011                 PipelinePopper pp_discard(this);
2012                 pushDiscardFilter(pp_discard);
2013                 writeObjectStreamOffsets(offsets, first_obj);
2014                 first += this->m->pipeline->getCount();
2015             }
2016 
2017 	    // Set up a stream to write the stream data into a buffer.
2018 	    Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
2019             if ((this->m->compress_streams ||
2020                  (this->m->stream_decode_level == qpdf_dl_none)) &&
2021                 (! this->m->qdf_mode))
2022 	    {
2023 		compressed = true;
2024 		next = pushPipeline(
2025 		    new Pl_Flate("compress object stream", next,
2026 				 Pl_Flate::a_deflate));
2027 	    }
2028 	    activatePipelineStack(pp_ostream);
2029 	    writeObjectStreamOffsets(offsets, first_obj);
2030 	}
2031 
2032 	int count = 0;
2033 	for (std::set<QPDFObjGen>::iterator iter =
2034 		 this->m->object_stream_to_objects[old_id].begin();
2035 	     iter != this->m->object_stream_to_objects[old_id].end();
2036 	     ++iter, ++count)
2037 	{
2038 	    QPDFObjGen obj = *iter;
2039 	    int new_obj = this->m->obj_renumber[obj];
2040 	    if (first_obj == -1)
2041 	    {
2042 		first_obj = new_obj;
2043 	    }
2044 	    if (this->m->qdf_mode)
2045 	    {
2046 		writeString("%% Object stream: object " +
2047 			    QUtil::int_to_string(new_obj) + ", index " +
2048 			    QUtil::int_to_string(count));
2049 		if (! this->m->suppress_original_object_ids)
2050 		{
2051 		    writeString("; original object ID: " +
2052 				QUtil::int_to_string(obj.getObj()));
2053                     // For compatibility, only write the generation if
2054                     // non-zero.  While object streams only allow
2055                     // objects with generation 0, if we are generating
2056                     // object streams, the old object could have a
2057                     // non-zero generation.
2058                     if (obj.getGen() != 0)
2059                     {
2060                         QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
2061                         writeString(" " + QUtil::int_to_string(obj.getGen()));
2062                     }
2063 		}
2064 		writeString("\n");
2065 	    }
2066 	    if (pass == 1)
2067 	    {
2068 		offsets.push_back(this->m->pipeline->getCount());
2069                 // To avoid double-counting objects being written in
2070                 // object streams for progress reporting, decrement in
2071                 // pass 1.
2072                 indicateProgress(true, false);
2073 	    }
2074             QPDFObjectHandle obj_to_write =
2075                 this->m->pdf.getObjectByObjGen(obj);
2076             if (obj_to_write.isStream())
2077             {
2078                 // This condition occurred in a fuzz input. Ideally we
2079                 // should block it at at parse time, but it's not
2080                 // clear to me how to construct a case for this.
2081                 QTC::TC("qpdf", "QPDFWriter stream in ostream");
2082                 obj_to_write.warnIfPossible(
2083                     "stream found inside object stream; treating as null");
2084                 obj_to_write = QPDFObjectHandle::newNull();
2085             }
2086 	    writeObject(obj_to_write, count);
2087 
2088 	    this->m->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
2089 	}
2090     }
2091 
2092     // Write the object
2093     openObject(new_id);
2094     setDataKey(new_id);
2095     writeString("<<");
2096     writeStringQDF("\n ");
2097     writeString(" /Type /ObjStm");
2098     writeStringQDF("\n ");
2099     size_t length = stream_buffer->getSize();
2100     adjustAESStreamLength(length);
2101     writeString(" /Length " + QUtil::uint_to_string(length));
2102     writeStringQDF("\n ");
2103     if (compressed)
2104     {
2105 	writeString(" /Filter /FlateDecode");
2106     }
2107     writeString(" /N " + QUtil::uint_to_string(offsets.size()));
2108     writeStringQDF("\n ");
2109     writeString(" /First " + QUtil::int_to_string(first));
2110     if (! object.isNull())
2111     {
2112 	// If the original object has an /Extends key, preserve it.
2113 	QPDFObjectHandle dict = object.getDict();
2114 	QPDFObjectHandle extends = dict.getKey("/Extends");
2115 	if (extends.isIndirect())
2116 	{
2117 	    QTC::TC("qpdf", "QPDFWriter copy Extends");
2118 	    writeStringQDF("\n ");
2119 	    writeString(" /Extends ");
2120 	    unparseChild(extends, 1, f_in_ostream);
2121 	}
2122     }
2123     writeStringQDF("\n");
2124     writeStringNoQDF(" ");
2125     writeString(">>\nstream\n");
2126     if (this->m->encrypted)
2127     {
2128 	QTC::TC("qpdf", "QPDFWriter encrypt object stream");
2129     }
2130     {
2131         PipelinePopper pp_enc(this);
2132         pushEncryptionFilter(pp_enc);
2133         writeBuffer(stream_buffer);
2134     }
2135     if (this->m->newline_before_endstream)
2136     {
2137         writeString("\n");
2138     }
2139     writeString("endstream");
2140     this->m->cur_data_key.clear();
2141     closeObject(new_id);
2142 }
2143 
2144 void
writeObject(QPDFObjectHandle object,int object_stream_index)2145 QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
2146 {
2147     QPDFObjGen old_og = object.getObjGen();
2148 
2149     if ((object_stream_index == -1) &&
2150         (old_og.getGen() == 0) &&
2151 	(this->m->object_stream_to_objects.count(old_og.getObj())))
2152     {
2153 	writeObjectStream(object);
2154 	return;
2155     }
2156 
2157     indicateProgress(false, false);
2158     int new_id = this->m->obj_renumber[old_og];
2159     if (this->m->qdf_mode)
2160     {
2161 	if (this->m->page_object_to_seq.count(old_og))
2162 	{
2163 	    writeString("%% Page ");
2164 	    writeString(
2165 		QUtil::int_to_string(
2166 		    this->m->page_object_to_seq[old_og]));
2167 	    writeString("\n");
2168 	}
2169 	if (this->m->contents_to_page_seq.count(old_og))
2170 	{
2171 	    writeString("%% Contents for page ");
2172 	    writeString(
2173 		QUtil::int_to_string(
2174 		    this->m->contents_to_page_seq[old_og]));
2175 	    writeString("\n");
2176 	}
2177     }
2178     if (object_stream_index == -1)
2179     {
2180 	if (this->m->qdf_mode && (! this->m->suppress_original_object_ids))
2181 	{
2182 	    writeString("%% Original object ID: " +
2183 			QUtil::int_to_string(object.getObjectID()) + " " +
2184 			QUtil::int_to_string(object.getGeneration()) + "\n");
2185 	}
2186 	openObject(new_id);
2187 	setDataKey(new_id);
2188 	unparseObject(object, 0, 0);
2189 	this->m->cur_data_key.clear();
2190 	closeObject(new_id);
2191     }
2192     else
2193     {
2194 	unparseObject(object, 0, f_in_ostream);
2195 	writeString("\n");
2196     }
2197 
2198     if ((! this->m->direct_stream_lengths) && object.isStream())
2199     {
2200 	if (this->m->qdf_mode)
2201 	{
2202 	    if (this->m->added_newline)
2203 	    {
2204 		writeString("%QDF: ignore_newline\n");
2205 	    }
2206 	}
2207 	openObject(new_id + 1);
2208 	writeString(QUtil::uint_to_string(this->m->cur_stream_length));
2209 	closeObject(new_id + 1);
2210     }
2211 }
2212 
2213 std::string
getOriginalID1()2214 QPDFWriter::getOriginalID1()
2215 {
2216     QPDFObjectHandle trailer = this->m->pdf.getTrailer();
2217     if (trailer.hasKey("/ID"))
2218     {
2219         return trailer.getKey("/ID").getArrayItem(0).getStringValue();
2220     }
2221     else
2222     {
2223         return "";
2224     }
2225 }
2226 
2227 void
generateID()2228 QPDFWriter::generateID()
2229 {
2230     // Generate the ID lazily so that we can handle the user's
2231     // preference to use static or deterministic ID generation.
2232 
2233     if (! this->m->id2.empty())
2234     {
2235 	return;
2236     }
2237 
2238     QPDFObjectHandle trailer = this->m->pdf.getTrailer();
2239 
2240     std::string result;
2241 
2242     if (this->m->static_id)
2243     {
2244 	// For test suite use only...
2245 	static unsigned char tmp[] = {0x31, 0x41, 0x59, 0x26,
2246                                       0x53, 0x58, 0x97, 0x93,
2247                                       0x23, 0x84, 0x62, 0x64,
2248                                       0x33, 0x83, 0x27, 0x95,
2249                                       0x00};
2250 	result = reinterpret_cast<char*>(tmp);
2251     }
2252     else
2253     {
2254 	// The PDF specification has guidelines for creating IDs, but
2255 	// it states clearly that the only thing that's really
2256 	// important is that it is very likely to be unique.  We can't
2257 	// really follow the guidelines in the spec exactly because we
2258 	// haven't written the file yet.  This scheme should be fine
2259 	// though.  The deterministic ID case uses a digest of a
2260 	// sufficient portion of the file's contents such no two
2261 	// non-matching files would match in the subsets used for this
2262 	// computation.  Note that we explicitly omit the filename from
2263 	// the digest calculation for deterministic ID so that the same
2264 	// file converted with qpdf, in that case, would have the same
2265 	// ID regardless of the output file's name.
2266 
2267 	std::string seed;
2268         if (this->m->deterministic_id)
2269         {
2270             if (this->m->deterministic_id_data.empty())
2271             {
2272                 QTC::TC("qpdf", "QPDFWriter deterministic with no data");
2273                 throw std::logic_error(
2274                     "INTERNAL ERROR: QPDFWriter::generateID has no"
2275                     " data for deterministic ID.  This may happen if"
2276                     " deterministic ID and file encryption are requested"
2277                     " together.");
2278             }
2279             seed += this->m->deterministic_id_data;
2280         }
2281         else
2282         {
2283             seed += QUtil::int_to_string(QUtil::get_current_time());
2284             seed += this->m->filename;
2285             seed += " ";
2286         }
2287 	seed += " QPDF ";
2288 	if (trailer.hasKey("/Info"))
2289 	{
2290             QPDFObjectHandle info = trailer.getKey("/Info");
2291 	    std::set<std::string> keys = info.getKeys();
2292 	    for (std::set<std::string>::iterator iter = keys.begin();
2293 		 iter != keys.end(); ++iter)
2294 	    {
2295 		QPDFObjectHandle obj = info.getKey(*iter);
2296 		if (obj.isString())
2297 		{
2298 		    seed += " ";
2299 		    seed += obj.getStringValue();
2300 		}
2301 	    }
2302 	}
2303 
2304 	MD5 m;
2305 	m.encodeString(seed.c_str());
2306 	MD5::Digest digest;
2307 	m.digest(digest);
2308 	result = std::string(reinterpret_cast<char*>(digest),
2309                              sizeof(MD5::Digest));
2310     }
2311 
2312     // If /ID already exists, follow the spec: use the original first
2313     // word and generate a new second word.  Otherwise, we'll use the
2314     // generated ID for both.
2315 
2316     this->m->id2 = result;
2317     // Note: keep /ID from old file even if --static-id was given.
2318     this->m->id1 = getOriginalID1();
2319     if (this->m->id1.empty())
2320     {
2321 	this->m->id1 = this->m->id2;
2322     }
2323 }
2324 
2325 void
initializeSpecialStreams()2326 QPDFWriter::initializeSpecialStreams()
2327 {
2328     // Mark all page content streams in case we are filtering or
2329     // normalizing.
2330     std::vector<QPDFObjectHandle> pages = this->m->pdf.getAllPages();
2331     int num = 0;
2332     for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
2333 	 iter != pages.end(); ++iter)
2334     {
2335 	QPDFObjectHandle& page = *iter;
2336 	this->m->page_object_to_seq[page.getObjGen()] = ++num;
2337 	QPDFObjectHandle contents = page.getKey("/Contents");
2338 	std::vector<QPDFObjGen> contents_objects;
2339 	if (contents.isArray())
2340 	{
2341 	    int n = contents.getArrayNItems();
2342 	    for (int i = 0; i < n; ++i)
2343 	    {
2344 		contents_objects.push_back(
2345 		    contents.getArrayItem(i).getObjGen());
2346 	    }
2347 	}
2348 	else if (contents.isStream())
2349 	{
2350 	    contents_objects.push_back(contents.getObjGen());
2351 	}
2352 
2353 	for (auto const& c: contents_objects)
2354 	{
2355 	    this->m->contents_to_page_seq[c] = num;
2356 	    this->m->normalized_streams.insert(c);
2357 	}
2358     }
2359 }
2360 
2361 void
preserveObjectStreams()2362 QPDFWriter::preserveObjectStreams()
2363 {
2364     std::map<int, int> omap;
2365     QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
2366     if (omap.empty())
2367     {
2368         return;
2369     }
2370     // Our object_to_object_stream map has to map ObjGen -> ObjGen
2371     // since we may be generating object streams out of old objects
2372     // that have generation numbers greater than zero. However in an
2373     // existing PDF, all object stream objects and all objects in them
2374     // must have generation 0 because the PDF spec does not provide
2375     // any way to do otherwise. This code filters out objects that are
2376     // not allowed to be in object streams. In addition to removing
2377     // objects that were erroneously included in object streams in the
2378     // source PDF, it also prevents unreferenced objects from being
2379     // included.
2380     std::set<QPDFObjGen> eligible;
2381     if (! this->m->preserve_unreferenced_objects)
2382     {
2383         std::vector<QPDFObjGen> eligible_v =
2384             QPDF::Writer::getCompressibleObjGens(this->m->pdf);
2385         eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());
2386     }
2387     QTC::TC("qpdf", "QPDFWriter preserve object streams",
2388             this->m->preserve_unreferenced_objects ? 0 : 1);
2389     for (auto iter: omap)
2390     {
2391         QPDFObjGen og(iter.first, 0);
2392         if (eligible.count(og) || this->m->preserve_unreferenced_objects)
2393         {
2394             this->m->object_to_object_stream[og] = iter.second;
2395         }
2396         else
2397         {
2398             QTC::TC("qpdf", "QPDFWriter exclude from object stream");
2399         }
2400     }
2401 }
2402 
2403 void
generateObjectStreams()2404 QPDFWriter::generateObjectStreams()
2405 {
2406     // Basic strategy: make a list of objects that can go into an
2407     // object stream.  Then figure out how many object streams are
2408     // needed so that we can distribute objects approximately evenly
2409     // without having any object stream exceed 100 members.  We don't
2410     // have to worry about linearized files here -- if the file is
2411     // linearized, we take care of excluding things that aren't
2412     // allowed here later.
2413 
2414     // This code doesn't do anything with /Extends.
2415 
2416     std::vector<QPDFObjGen> eligible =
2417         QPDF::Writer::getCompressibleObjGens(this->m->pdf);
2418     size_t n_object_streams = (eligible.size() + 99U) / 100U;
2419     if (n_object_streams == 0)
2420     {
2421         return;
2422     }
2423     size_t n_per = eligible.size() / n_object_streams;
2424     if (n_per * n_object_streams < eligible.size())
2425     {
2426 	++n_per;
2427     }
2428     unsigned int n = 0;
2429     int cur_ostream = 0;
2430     for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin();
2431 	 iter != eligible.end(); ++iter)
2432     {
2433 	if ((n % n_per) == 0)
2434 	{
2435 	    if (n > 0)
2436 	    {
2437 		QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
2438 	    }
2439 	    n = 0;
2440 	}
2441 	if (n == 0)
2442 	{
2443 	    // Construct a new null object as the "original" object
2444 	    // stream.  The rest of the code knows that this means
2445 	    // we're creating the object stream from scratch.
2446 	    cur_ostream = this->m->pdf.makeIndirectObject(
2447 		QPDFObjectHandle::newNull()).getObjectID();
2448 	}
2449 	this->m->object_to_object_stream[*iter] = cur_ostream;
2450 	++n;
2451     }
2452 }
2453 
2454 QPDFObjectHandle
getTrimmedTrailer()2455 QPDFWriter::getTrimmedTrailer()
2456 {
2457     // Remove keys from the trailer that necessarily have to be
2458     // replaced when writing the file.
2459 
2460     QPDFObjectHandle trailer = this->m->pdf.getTrailer().unsafeShallowCopy();
2461 
2462     // Remove encryption keys
2463     trailer.removeKey("/ID");
2464     trailer.removeKey("/Encrypt");
2465 
2466     // Remove modification information
2467     trailer.removeKey("/Prev");
2468 
2469     // Remove all trailer keys that potentially come from a
2470     // cross-reference stream
2471     trailer.removeKey("/Index");
2472     trailer.removeKey("/W");
2473     trailer.removeKey("/Length");
2474     trailer.removeKey("/Filter");
2475     trailer.removeKey("/DecodeParms");
2476     trailer.removeKey("/Type");
2477     trailer.removeKey("/XRefStm");
2478 
2479     return trailer;
2480 }
2481 
2482 void
prepareFileForWrite()2483 QPDFWriter::prepareFileForWrite()
2484 {
2485     // Make document extension level information direct as required by
2486     // the spec.
2487 
2488     this->m->pdf.fixDanglingReferences(true);
2489     QPDFObjectHandle root = this->m->pdf.getRoot();
2490     for (auto const& key: root.getKeys())
2491     {
2492         QPDFObjectHandle oh = root.getKey(key);
2493         if ((key == "/Extensions") && (oh.isDictionary()))
2494         {
2495             bool extensions_indirect = false;
2496             if (oh.isIndirect())
2497             {
2498                 QTC::TC("qpdf", "QPDFWriter make Extensions direct");
2499                 extensions_indirect = true;
2500                 oh = oh.shallowCopy();
2501                 root.replaceKey(key, oh);
2502             }
2503             if (oh.hasKey("/ADBE"))
2504             {
2505                 QPDFObjectHandle adbe = oh.getKey("/ADBE");
2506                 if (adbe.isIndirect())
2507                 {
2508                     QTC::TC("qpdf", "QPDFWriter make ADBE direct",
2509                             extensions_indirect ? 0 : 1);
2510                     adbe.makeDirect();
2511                     oh.replaceKey("/ADBE", adbe);
2512                 }
2513             }
2514         }
2515     }
2516 }
2517 
2518 void
doWriteSetup()2519 QPDFWriter::doWriteSetup()
2520 {
2521     if (this->m->did_write_setup)
2522     {
2523         return;
2524     }
2525     this->m->did_write_setup = true;
2526 
2527     // Do preliminary setup
2528 
2529     if (this->m->linearized)
2530     {
2531 	this->m->qdf_mode = false;
2532     }
2533 
2534     if (this->m->pclm)
2535     {
2536         this->m->stream_decode_level = qpdf_dl_none;
2537         this->m->compress_streams = false;
2538         this->m->encrypted = false;
2539     }
2540 
2541     if (this->m->qdf_mode)
2542     {
2543 	if (! this->m->normalize_content_set)
2544 	{
2545 	    this->m->normalize_content = true;
2546 	}
2547 	if (! this->m->compress_streams_set)
2548 	{
2549 	    this->m->compress_streams = false;
2550 	}
2551         if (! this->m->stream_decode_level_set)
2552         {
2553             this->m->stream_decode_level = qpdf_dl_generalized;
2554         }
2555     }
2556 
2557     if (this->m->encrypted)
2558     {
2559 	// Encryption has been explicitly set
2560 	this->m->preserve_encryption = false;
2561     }
2562     else if (this->m->normalize_content ||
2563 	     this->m->stream_decode_level ||
2564              this->m->pclm ||
2565 	     this->m->qdf_mode)
2566     {
2567 	// Encryption makes looking at contents pretty useless.  If
2568 	// the user explicitly encrypted though, we still obey that.
2569 	this->m->preserve_encryption = false;
2570     }
2571 
2572     if (this->m->preserve_encryption)
2573     {
2574 	copyEncryptionParameters(this->m->pdf);
2575     }
2576 
2577     if (! this->m->forced_pdf_version.empty())
2578     {
2579 	int major = 0;
2580 	int minor = 0;
2581 	parseVersion(this->m->forced_pdf_version, major, minor);
2582 	disableIncompatibleEncryption(major, minor,
2583                                       this->m->forced_extension_level);
2584 	if (compareVersions(major, minor, 1, 5) < 0)
2585 	{
2586 	    QTC::TC("qpdf", "QPDFWriter forcing object stream disable");
2587 	    this->m->object_stream_mode = qpdf_o_disable;
2588 	}
2589     }
2590 
2591     if (this->m->qdf_mode || this->m->normalize_content ||
2592         this->m->stream_decode_level)
2593     {
2594 	initializeSpecialStreams();
2595     }
2596 
2597     if (this->m->qdf_mode)
2598     {
2599 	// Generate indirect stream lengths for qdf mode since fix-qdf
2600 	// uses them for storing recomputed stream length data.
2601 	// Certain streams such as object streams, xref streams, and
2602 	// hint streams always get direct stream lengths.
2603 	this->m->direct_stream_lengths = false;
2604     }
2605 
2606     switch (this->m->object_stream_mode)
2607     {
2608       case qpdf_o_disable:
2609 	// no action required
2610 	break;
2611 
2612       case qpdf_o_preserve:
2613 	preserveObjectStreams();
2614 	break;
2615 
2616       case qpdf_o_generate:
2617 	generateObjectStreams();
2618 	break;
2619 
2620 	// no default so gcc will warn for missing case tag
2621     }
2622 
2623     if (this->m->linearized)
2624     {
2625 	// Page dictionaries are not allowed to be compressed objects.
2626 	std::vector<QPDFObjectHandle> pages = this->m->pdf.getAllPages();
2627 	for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
2628 	     iter != pages.end(); ++iter)
2629 	{
2630 	    QPDFObjectHandle& page = *iter;
2631 	    QPDFObjGen og = page.getObjGen();
2632 	    if (this->m->object_to_object_stream.count(og))
2633 	    {
2634 		QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
2635 		this->m->object_to_object_stream.erase(og);
2636 	    }
2637 	}
2638     }
2639 
2640     if (this->m->linearized || this->m->encrypted)
2641     {
2642     	// The document catalog is not allowed to be compressed in
2643     	// linearized files either.  It also appears that Adobe Reader
2644     	// 8.0.0 has a bug that prevents it from being able to handle
2645     	// encrypted files with compressed document catalogs, so we
2646     	// disable them in that case as well.
2647 	QPDFObjGen og = this->m->pdf.getRoot().getObjGen();
2648 	if (this->m->object_to_object_stream.count(og))
2649 	{
2650 	    QTC::TC("qpdf", "QPDFWriter uncompressing root");
2651 	    this->m->object_to_object_stream.erase(og);
2652 	}
2653     }
2654 
2655     // Generate reverse mapping from object stream to objects
2656     for (std::map<QPDFObjGen, int>::iterator iter =
2657 	     this->m->object_to_object_stream.begin();
2658 	 iter != this->m->object_to_object_stream.end(); ++iter)
2659     {
2660 	QPDFObjGen obj = (*iter).first;
2661 	int stream = (*iter).second;
2662 	this->m->object_stream_to_objects[stream].insert(obj);
2663 	this->m->max_ostream_index =
2664 	    std::max(this->m->max_ostream_index,
2665 		     QIntC::to_int(
2666                          this->m->object_stream_to_objects[stream].size()) - 1);
2667     }
2668 
2669     if (! this->m->object_stream_to_objects.empty())
2670     {
2671 	setMinimumPDFVersion("1.5");
2672     }
2673 
2674     setMinimumPDFVersion(this->m->pdf.getPDFVersion(),
2675                          this->m->pdf.getExtensionLevel());
2676     this->m->final_pdf_version = this->m->min_pdf_version;
2677     this->m->final_extension_level = this->m->min_extension_level;
2678     if (! this->m->forced_pdf_version.empty())
2679     {
2680 	QTC::TC("qpdf", "QPDFWriter using forced PDF version");
2681 	this->m->final_pdf_version = this->m->forced_pdf_version;
2682         this->m->final_extension_level = this->m->forced_extension_level;
2683     }
2684 }
2685 
2686 void
write()2687 QPDFWriter::write()
2688 {
2689     doWriteSetup();
2690 
2691     // Set up progress reporting. For linearized files, we write two
2692     // passes. events_expected is an approximation, but it's good
2693     // enough for progress reporting, which is mostly a guess anyway.
2694     this->m->events_expected = QIntC::to_int(
2695         this->m->pdf.getObjectCount() * (this->m->linearized ? 2 : 1));
2696 
2697     prepareFileForWrite();
2698 
2699     if (this->m->linearized)
2700     {
2701 	writeLinearized();
2702     }
2703     else
2704     {
2705 	writeStandard();
2706     }
2707 
2708     this->m->pipeline->finish();
2709     if (this->m->close_file)
2710     {
2711 	fclose(this->m->file);
2712     }
2713     this->m->file = 0;
2714     if (this->m->buffer_pipeline)
2715     {
2716 	this->m->output_buffer = this->m->buffer_pipeline->getBuffer();
2717 	this->m->buffer_pipeline = 0;
2718     }
2719     indicateProgress(false, true);
2720 }
2721 
2722 QPDFObjGen
getRenumberedObjGen(QPDFObjGen og)2723 QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
2724 {
2725     return QPDFObjGen(this->m->obj_renumber[og], 0);
2726 }
2727 
2728 std::map<QPDFObjGen, QPDFXRefEntry>
getWrittenXRefTable()2729 QPDFWriter::getWrittenXRefTable()
2730 {
2731     std::map<QPDFObjGen, QPDFXRefEntry> result;
2732 
2733     for (std::map<int, QPDFXRefEntry>::iterator iter = this->m->xref.begin();
2734          iter != this->m->xref.end(); ++iter)
2735     {
2736         if (iter->first != 0 && iter->second.getType() != 0)
2737         {
2738             result[QPDFObjGen(iter->first, 0)] = iter->second;
2739         }
2740     }
2741 
2742     return result;
2743 }
2744 
2745 void
enqueuePart(std::vector<QPDFObjectHandle> & part)2746 QPDFWriter::enqueuePart(std::vector<QPDFObjectHandle>& part)
2747 {
2748     for (std::vector<QPDFObjectHandle>::iterator iter = part.begin();
2749 	 iter != part.end(); ++iter)
2750     {
2751 	enqueueObject(*iter);
2752     }
2753 }
2754 
2755 void
writeEncryptionDictionary()2756 QPDFWriter::writeEncryptionDictionary()
2757 {
2758     this->m->encryption_dict_objid = openObject(this->m->encryption_dict_objid);
2759     writeString("<<");
2760     for (std::map<std::string, std::string>::iterator iter =
2761 	     this->m->encryption_dictionary.begin();
2762 	 iter != this->m->encryption_dictionary.end(); ++iter)
2763     {
2764 	writeString(" ");
2765 	writeString((*iter).first);
2766 	writeString(" ");
2767 	writeString((*iter).second);
2768     }
2769     writeString(" >>");
2770     closeObject(this->m->encryption_dict_objid);
2771 }
2772 
2773 std::string
getFinalVersion()2774 QPDFWriter::getFinalVersion()
2775 {
2776     doWriteSetup();
2777     return this->m->final_pdf_version;
2778 }
2779 
2780 void
writeHeader()2781 QPDFWriter::writeHeader()
2782 {
2783     writeString("%PDF-");
2784     writeString(this->m->final_pdf_version);
2785     if (this->m->pclm)
2786     {
2787         // PCLm version
2788         writeString("\n%PCLm 1.0\n");
2789     }
2790     else
2791     {
2792         // This string of binary characters would not be valid UTF-8, so
2793         // it really should be treated as binary.
2794         writeString("\n%\xbf\xf7\xa2\xfe\n");
2795     }
2796     writeStringQDF("%QDF-1.0\n\n");
2797 
2798     // Note: do not write extra header text here.  Linearized PDFs
2799     // must include the entire linearization parameter dictionary
2800     // within the first 1024 characters of the PDF file, so for
2801     // linearized files, we have to write extra header text after the
2802     // linearization parameter dictionary.
2803 }
2804 
2805 void
writeHintStream(int hint_id)2806 QPDFWriter::writeHintStream(int hint_id)
2807 {
2808     PointerHolder<Buffer> hint_buffer;
2809     int S = 0;
2810     int O = 0;
2811     QPDF::Writer::generateHintStream(
2812         this->m->pdf, this->m->xref, this->m->lengths,
2813         this->m->obj_renumber_no_gen,
2814         hint_buffer, S, O);
2815 
2816     openObject(hint_id);
2817     setDataKey(hint_id);
2818 
2819     size_t hlen = hint_buffer->getSize();
2820 
2821     writeString("<< /Filter /FlateDecode /S ");
2822     writeString(QUtil::int_to_string(S));
2823     if (O)
2824     {
2825 	writeString(" /O ");
2826 	writeString(QUtil::int_to_string(O));
2827     }
2828     writeString(" /Length ");
2829     adjustAESStreamLength(hlen);
2830     writeString(QUtil::uint_to_string(hlen));
2831     writeString(" >>\nstream\n");
2832 
2833     if (this->m->encrypted)
2834     {
2835 	QTC::TC("qpdf", "QPDFWriter encrypted hint stream");
2836     }
2837     unsigned char last_char = '\0';
2838     {
2839         PipelinePopper pp_enc(this);
2840         pushEncryptionFilter(pp_enc);
2841         writeBuffer(hint_buffer);
2842         last_char = this->m->pipeline->getLastChar();
2843     }
2844 
2845     if (last_char != '\n')
2846     {
2847 	writeString("\n");
2848     }
2849     writeString("endstream");
2850     closeObject(hint_id);
2851 }
2852 
2853 qpdf_offset_t
writeXRefTable(trailer_e which,int first,int last,int size)2854 QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
2855 {
2856     // There are too many extra arguments to replace overloaded
2857     // function with defaults in the header file...too much risk of
2858     // leaving something off.
2859     return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0, 0);
2860 }
2861 
2862 qpdf_offset_t
writeXRefTable(trailer_e which,int first,int last,int size,qpdf_offset_t prev,bool suppress_offsets,int hint_id,qpdf_offset_t hint_offset,qpdf_offset_t hint_length,int linearization_pass)2863 QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
2864 			   qpdf_offset_t prev, bool suppress_offsets,
2865 			   int hint_id, qpdf_offset_t hint_offset,
2866                            qpdf_offset_t hint_length, int linearization_pass)
2867 {
2868     writeString("xref\n");
2869     writeString(QUtil::int_to_string(first));
2870     writeString(" ");
2871     writeString(QUtil::int_to_string(last - first + 1));
2872     qpdf_offset_t space_before_zero = this->m->pipeline->getCount();
2873     writeString("\n");
2874     for (int i = first; i <= last; ++i)
2875     {
2876 	if (i == 0)
2877 	{
2878 	    writeString("0000000000 65535 f \n");
2879 	}
2880 	else
2881 	{
2882 	    qpdf_offset_t offset = 0;
2883 	    if (! suppress_offsets)
2884 	    {
2885 		offset = this->m->xref[i].getOffset();
2886 		if ((hint_id != 0) &&
2887 		    (i != hint_id) &&
2888 		    (offset >= hint_offset))
2889 		{
2890 		    offset += hint_length;
2891 		}
2892 	    }
2893 	    writeString(QUtil::int_to_string(offset, 10));
2894 	    writeString(" 00000 n \n");
2895 	}
2896     }
2897     writeTrailer(which, size, false, prev, linearization_pass);
2898     writeString("\n");
2899     return space_before_zero;
2900 }
2901 
2902 qpdf_offset_t
writeXRefStream(int objid,int max_id,qpdf_offset_t max_offset,trailer_e which,int first,int last,int size)2903 QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
2904 			    trailer_e which, int first, int last, int size)
2905 {
2906     // There are too many extra arguments to replace overloaded
2907     // function with defaults in the header file...too much risk of
2908     // leaving something off.
2909     return writeXRefStream(objid, max_id, max_offset,
2910 			   which, first, last, size, 0, 0, 0, 0, false, 0);
2911 }
2912 
2913 qpdf_offset_t
writeXRefStream(int xref_id,int max_id,qpdf_offset_t max_offset,trailer_e which,int first,int last,int size,qpdf_offset_t prev,int hint_id,qpdf_offset_t hint_offset,qpdf_offset_t hint_length,bool skip_compression,int linearization_pass)2914 QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
2915 			    trailer_e which, int first, int last, int size,
2916 			    qpdf_offset_t prev, int hint_id,
2917 			    qpdf_offset_t hint_offset,
2918                             qpdf_offset_t hint_length,
2919 			    bool skip_compression,
2920                             int linearization_pass)
2921 {
2922     qpdf_offset_t xref_offset = this->m->pipeline->getCount();
2923     qpdf_offset_t space_before_zero = xref_offset - 1;
2924 
2925     // field 1 contains offsets and object stream identifiers
2926     unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length),
2927                                     bytesNeeded(max_id));
2928 
2929     // field 2 contains object stream indices
2930     unsigned int f2_size = bytesNeeded(this->m->max_ostream_index);
2931 
2932     unsigned int esize = 1 + f1_size + f2_size;
2933 
2934     // Must store in xref table in advance of writing the actual data
2935     // rather than waiting for openObject to do it.
2936     this->m->xref[xref_id] = QPDFXRefEntry(1, this->m->pipeline->getCount(), 0);
2937 
2938     Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
2939     bool compressed = false;
2940     if ((this->m->compress_streams ||
2941          (this->m->stream_decode_level == qpdf_dl_none)) &&
2942         (! this->m->qdf_mode))
2943     {
2944 	compressed = true;
2945 	if (! skip_compression)
2946 	{
2947 	    // Write the stream dictionary for compression but don't
2948 	    // actually compress.  This helps us with computation of
2949 	    // padding for pass 1 of linearization.
2950 	    p = pushPipeline(
2951 		new Pl_Flate("compress xref", p, Pl_Flate::a_deflate));
2952 	}
2953 	p = pushPipeline(
2954 	    new Pl_PNGFilter(
2955 		"pngify xref", p, Pl_PNGFilter::a_encode, esize));
2956     }
2957     PointerHolder<Buffer> xref_data;
2958     {
2959         PipelinePopper pp_xref(this, &xref_data);
2960         activatePipelineStack(pp_xref);
2961         for (int i = first; i <= last; ++i)
2962         {
2963             QPDFXRefEntry& e = this->m->xref[i];
2964             switch (e.getType())
2965             {
2966               case 0:
2967                 writeBinary(0, 1);
2968                 writeBinary(0, f1_size);
2969                 writeBinary(0, f2_size);
2970                 break;
2971 
2972               case 1:
2973                 {
2974                     qpdf_offset_t offset = e.getOffset();
2975                     if ((hint_id != 0) &&
2976                         (i != hint_id) &&
2977                         (offset >= hint_offset))
2978                     {
2979                         offset += hint_length;
2980                     }
2981                     writeBinary(1, 1);
2982                     writeBinary(QIntC::to_ulonglong(offset), f1_size);
2983                     writeBinary(0, f2_size);
2984                 }
2985                 break;
2986 
2987               case 2:
2988                 writeBinary(2, 1);
2989                 writeBinary(QIntC::to_ulonglong(e.getObjStreamNumber()), f1_size);
2990                 writeBinary(QIntC::to_ulonglong(e.getObjStreamIndex()), f2_size);
2991                 break;
2992 
2993               default:
2994                 throw std::logic_error("invalid type writing xref stream");
2995                 break;
2996             }
2997         }
2998     }
2999 
3000     openObject(xref_id);
3001     writeString("<<");
3002     writeStringQDF("\n ");
3003     writeString(" /Type /XRef");
3004     writeStringQDF("\n ");
3005     writeString(" /Length " + QUtil::uint_to_string(xref_data->getSize()));
3006     if (compressed)
3007     {
3008 	writeStringQDF("\n ");
3009 	writeString(" /Filter /FlateDecode");
3010 	writeStringQDF("\n ");
3011 	writeString(" /DecodeParms << /Columns " +
3012 		    QUtil::int_to_string(esize) + " /Predictor 12 >>");
3013     }
3014     writeStringQDF("\n ");
3015     writeString(" /W [ 1 " +
3016 		QUtil::int_to_string(f1_size) + " " +
3017 		QUtil::int_to_string(f2_size) + " ]");
3018     if (! ((first == 0) && (last == size - 1)))
3019     {
3020 	writeString(" /Index [ " +
3021 		    QUtil::int_to_string(first) + " " +
3022 		    QUtil::int_to_string(last - first + 1) + " ]");
3023     }
3024     writeTrailer(which, size, true, prev, linearization_pass);
3025     writeString("\nstream\n");
3026     writeBuffer(xref_data);
3027     writeString("\nendstream");
3028     closeObject(xref_id);
3029     return space_before_zero;
3030 }
3031 
3032 int
calculateXrefStreamPadding(qpdf_offset_t xref_bytes)3033 QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
3034 {
3035     // This routine is called right after a linearization first pass
3036     // xref stream has been written without compression.  Calculate
3037     // the amount of padding that would be required in the worst case,
3038     // assuming the number of uncompressed bytes remains the same.
3039     // The worst case for zlib is that the output is larger than the
3040     // input by 6 bytes plus 5 bytes per 16K, and then we'll add 10
3041     // extra bytes for number length increases.
3042 
3043     return QIntC::to_int(16 + (5 * ((xref_bytes + 16383) / 16384)));
3044 }
3045 
3046 void
discardGeneration(std::map<QPDFObjGen,int> const & in,std::map<int,int> & out)3047 QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in,
3048                               std::map<int, int>& out)
3049 {
3050     // There are deep assumptions in the linearization code in QPDF
3051     // that there is only one object with each object number; i.e.,
3052     // you can't have two objects with the same object number and
3053     // different generations.  This is a pretty safe assumption
3054     // because Adobe Reader and Acrobat can't actually handle this
3055     // case.  There is not much if any code in QPDF outside
3056     // linearization that assumes this, but the linearization code as
3057     // currently implemented would do weird things if we found such a
3058     // case.  In order to avoid breaking ABI changes in QPDF, we will
3059     // first assert that this condition holds.  Then we can create new
3060     // maps for QPDF that throw away generation numbers.
3061 
3062     out.clear();
3063     for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin();
3064          iter != in.end(); ++iter)
3065     {
3066         if (out.count((*iter).first.getObj()))
3067         {
3068             throw std::runtime_error(
3069                 "QPDF cannot currently linearize files that contain"
3070                 " multiple objects with the same object ID and different"
3071                 " generations.  If you see this error message, please file"
3072                 " a bug report and attach the file if possible.  As a"
3073                 " workaround, first convert the file with qpdf without"
3074                 " linearizing, and then linearize the result of that"
3075                 " conversion.");
3076         }
3077         out[(*iter).first.getObj()] = (*iter).second;
3078     }
3079 }
3080 
3081 void
writeLinearized()3082 QPDFWriter::writeLinearized()
3083 {
3084     // Optimize file and enqueue objects in order
3085 
3086     discardGeneration(this->m->object_to_object_stream,
3087                       this->m->object_to_object_stream_no_gen);
3088 
3089     auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
3090         bool compress_stream;
3091         bool is_metadata;
3092         if (willFilterStream(stream, compress_stream, is_metadata, nullptr))
3093         {
3094             return 2;
3095         }
3096         else
3097         {
3098             return 1;
3099         }
3100     };
3101 
3102     this->m->pdf.optimize(this->m->object_to_object_stream_no_gen,
3103                           true, skip_stream_parameters);
3104 
3105     std::vector<QPDFObjectHandle> part4;
3106     std::vector<QPDFObjectHandle> part6;
3107     std::vector<QPDFObjectHandle> part7;
3108     std::vector<QPDFObjectHandle> part8;
3109     std::vector<QPDFObjectHandle> part9;
3110     QPDF::Writer::getLinearizedParts(
3111         this->m->pdf, this->m->object_to_object_stream_no_gen,
3112         part4, part6, part7, part8, part9);
3113 
3114     // Object number sequence:
3115     //
3116     //  second half
3117     //    second half uncompressed objects
3118     //    second half xref stream, if any
3119     //    second half compressed objects
3120     //  first half
3121     //    linearization dictionary
3122     //    first half xref stream, if any
3123     //    part 4 uncompresesd objects
3124     //    encryption dictionary, if any
3125     //    hint stream
3126     //    part 6 uncompressed objects
3127     //    first half compressed objects
3128     //
3129 
3130     // Second half objects
3131     int second_half_uncompressed =
3132         QIntC::to_int(part7.size() + part8.size() + part9.size());
3133     int second_half_first_obj = 1;
3134     int after_second_half = 1 + second_half_uncompressed;
3135     this->m->next_objid = after_second_half;
3136     int second_half_xref = 0;
3137     bool need_xref_stream = (! this->m->object_to_object_stream.empty());
3138     if (need_xref_stream)
3139     {
3140 	second_half_xref = this->m->next_objid++;
3141     }
3142     // Assign numbers to all compressed objects in the second half.
3143     std::vector<QPDFObjectHandle>* vecs2[] = {&part7, &part8, &part9};
3144     for (int i = 0; i < 3; ++i)
3145     {
3146 	for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
3147 	     iter != (*vecs2[i]).end(); ++iter)
3148 	{
3149 	    assignCompressedObjectNumbers((*iter).getObjGen());
3150 	}
3151     }
3152     int second_half_end = this->m->next_objid - 1;
3153     int second_trailer_size = this->m->next_objid;
3154 
3155     // First half objects
3156     int first_half_start = this->m->next_objid;
3157     int lindict_id = this->m->next_objid++;
3158     int first_half_xref = 0;
3159     if (need_xref_stream)
3160     {
3161 	first_half_xref = this->m->next_objid++;
3162     }
3163     int part4_first_obj = this->m->next_objid;
3164     this->m->next_objid += QIntC::to_int(part4.size());
3165     int after_part4 = this->m->next_objid;
3166     if (this->m->encrypted)
3167     {
3168 	this->m->encryption_dict_objid = this->m->next_objid++;
3169     }
3170     int hint_id = this->m->next_objid++;
3171     int part6_first_obj = this->m->next_objid;
3172     this->m->next_objid += QIntC::to_int(part6.size());
3173     int after_part6 = this->m->next_objid;
3174     // Assign numbers to all compressed objects in the first half
3175     std::vector<QPDFObjectHandle>* vecs1[] = {&part4, &part6};
3176     for (int i = 0; i < 2; ++i)
3177     {
3178 	for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
3179 	     iter != (*vecs1[i]).end(); ++iter)
3180 	{
3181 	    assignCompressedObjectNumbers((*iter).getObjGen());
3182 	}
3183     }
3184     int first_half_end = this->m->next_objid - 1;
3185     int first_trailer_size = this->m->next_objid;
3186 
3187     int part4_end_marker = part4.back().getObjectID();
3188     int part6_end_marker = part6.back().getObjectID();
3189     qpdf_offset_t space_before_zero = 0;
3190     qpdf_offset_t file_size = 0;
3191     qpdf_offset_t part6_end_offset = 0;
3192     qpdf_offset_t first_half_max_obj_offset = 0;
3193     qpdf_offset_t second_xref_offset = 0;
3194     qpdf_offset_t first_xref_end = 0;
3195     qpdf_offset_t second_xref_end = 0;
3196 
3197     this->m->next_objid = part4_first_obj;
3198     enqueuePart(part4);
3199     if (this->m->next_objid != after_part4)
3200     {
3201         // This can happen with very botched files as in the fuzzer
3202         // test. There are likely some faulty assumptions in
3203         // calculateLinearizationData
3204         throw std::runtime_error(
3205             "error encountered after"
3206             " writing part 4 of linearized data");
3207     }
3208     this->m->next_objid = part6_first_obj;
3209     enqueuePart(part6);
3210     if (this->m->next_objid != after_part6)
3211     {
3212         throw std::runtime_error(
3213             "error encountered after"
3214             " writing part 6 of linearized data");
3215     }
3216     this->m->next_objid = second_half_first_obj;
3217     enqueuePart(part7);
3218     enqueuePart(part8);
3219     enqueuePart(part9);
3220     if (this->m->next_objid != after_second_half)
3221     {
3222         throw std::runtime_error(
3223             "error encountered after"
3224             " writing part 9 of linearized data");
3225     }
3226 
3227     qpdf_offset_t hint_length = 0;
3228     PointerHolder<Buffer> hint_buffer;
3229 
3230     // Write file in two passes.  Part numbers refer to PDF spec 1.4.
3231 
3232     FILE* lin_pass1_file = 0;
3233     PointerHolder<PipelinePopper> pp_pass1 = new PipelinePopper(this);
3234     PointerHolder<PipelinePopper> pp_md5 = new PipelinePopper(this);
3235     for (int pass = 1; pass <= 2; ++pass)
3236     {
3237 	if (pass == 1)
3238 	{
3239             if (! this->m->lin_pass1_filename.empty())
3240             {
3241                 lin_pass1_file =
3242                     QUtil::safe_fopen(
3243                         this->m->lin_pass1_filename.c_str(), "wb");
3244                 pushPipeline(
3245                     new Pl_StdioFile("linearization pass1", lin_pass1_file));
3246                 activatePipelineStack(*pp_pass1);
3247             }
3248             else
3249             {
3250                 pushDiscardFilter(*pp_pass1);
3251             }
3252             if (this->m->deterministic_id)
3253             {
3254                 pushMD5Pipeline(*pp_md5);
3255             }
3256 	}
3257 
3258 	// Part 1: header
3259 
3260 	writeHeader();
3261 
3262 	// Part 2: linearization parameter dictionary.  Save enough
3263 	// space to write real dictionary.  200 characters is enough
3264 	// space if all numerical values in the parameter dictionary
3265 	// that contain offsets are 20 digits long plus a few extra
3266 	// characters for safety.  The entire linearization parameter
3267 	// dictionary must appear within the first 1024 characters of
3268 	// the file.
3269 
3270 	qpdf_offset_t pos = this->m->pipeline->getCount();
3271 	openObject(lindict_id);
3272 	writeString("<<");
3273 	if (pass == 2)
3274 	{
3275 	    std::vector<QPDFObjectHandle> const& pages =
3276                 this->m->pdf.getAllPages();
3277 	    int first_page_object =
3278                 this->m->obj_renumber[pages.at(0).getObjGen()];
3279 	    int npages = QIntC::to_int(pages.size());
3280 
3281 	    writeString(" /Linearized 1 /L ");
3282 	    writeString(QUtil::int_to_string(file_size + hint_length));
3283 	    // Implementation note 121 states that a space is
3284 	    // mandatory after this open bracket.
3285 	    writeString(" /H [ ");
3286 	    writeString(QUtil::int_to_string(
3287                             this->m->xref[hint_id].getOffset()));
3288 	    writeString(" ");
3289 	    writeString(QUtil::int_to_string(hint_length));
3290 	    writeString(" ] /O ");
3291 	    writeString(QUtil::int_to_string(first_page_object));
3292 	    writeString(" /E ");
3293 	    writeString(QUtil::int_to_string(part6_end_offset + hint_length));
3294 	    writeString(" /N ");
3295 	    writeString(QUtil::int_to_string(npages));
3296 	    writeString(" /T ");
3297 	    writeString(QUtil::int_to_string(space_before_zero + hint_length));
3298 	}
3299 	writeString(" >>");
3300 	closeObject(lindict_id);
3301 	static int const pad = 200;
3302 	int spaces = QIntC::to_int(pos - this->m->pipeline->getCount() + pad);
3303 	assert(spaces >= 0);
3304 	writePad(spaces);
3305 	writeString("\n");
3306 
3307         // If the user supplied any additional header text, write it
3308         // here after the linearization parameter dictionary.
3309         writeString(this->m->extra_header_text);
3310 
3311 	// Part 3: first page cross reference table and trailer.
3312 
3313 	qpdf_offset_t first_xref_offset = this->m->pipeline->getCount();
3314 	qpdf_offset_t hint_offset = 0;
3315 	if (pass == 2)
3316 	{
3317 	    hint_offset = this->m->xref[hint_id].getOffset();
3318 	}
3319 	if (need_xref_stream)
3320 	{
3321 	    // Must pad here too.
3322 	    if (pass == 1)
3323 	    {
3324 		// Set first_half_max_obj_offset to a value large
3325 		// enough to force four bytes to be reserved for each
3326 		// file offset.  This would provide adequate space for
3327 		// the xref stream as long as the last object in page
3328 		// 1 starts with in the first 4 GB of the file, which
3329 		// is extremely likely.  In the second pass, we will
3330 		// know the actual value for this, but it's okay if
3331 		// it's smaller.
3332 		first_half_max_obj_offset = 1 << 25;
3333 	    }
3334 	    pos = this->m->pipeline->getCount();
3335 	    writeXRefStream(first_half_xref, first_half_end,
3336 			    first_half_max_obj_offset,
3337 			    t_lin_first, first_half_start, first_half_end,
3338 			    first_trailer_size,
3339 			    hint_length + second_xref_offset,
3340 			    hint_id, hint_offset, hint_length,
3341 			    (pass == 1), pass);
3342 	    qpdf_offset_t endpos = this->m->pipeline->getCount();
3343 	    if (pass == 1)
3344 	    {
3345 		// Pad so we have enough room for the real xref
3346 		// stream.
3347 		writePad(calculateXrefStreamPadding(endpos - pos));
3348 		first_xref_end = this->m->pipeline->getCount();
3349 	    }
3350 	    else
3351 	    {
3352 		// Pad so that the next object starts at the same
3353 		// place as in pass 1.
3354 		writePad(QIntC::to_int(first_xref_end - endpos));
3355 
3356 		if (this->m->pipeline->getCount() != first_xref_end)
3357                 {
3358                     throw std::logic_error(
3359                         "insufficient padding for first pass xref stream; "
3360                         "first_xref_end=" +
3361                         QUtil::int_to_string(first_xref_end) +
3362                         "; endpos=" + QUtil::int_to_string(endpos));
3363                 }
3364 	    }
3365 	    writeString("\n");
3366 	}
3367 	else
3368 	{
3369 	    writeXRefTable(t_lin_first, first_half_start, first_half_end,
3370 			   first_trailer_size, hint_length + second_xref_offset,
3371 			   (pass == 1), hint_id, hint_offset, hint_length,
3372                            pass);
3373 	    writeString("startxref\n0\n%%EOF\n");
3374 	}
3375 
3376 	// Parts 4 through 9
3377 
3378 	for (std::list<QPDFObjectHandle>::iterator iter =
3379 		 this->m->object_queue.begin();
3380 	     iter != this->m->object_queue.end(); ++iter)
3381 	{
3382 	    QPDFObjectHandle cur_object = (*iter);
3383 	    if (cur_object.getObjectID() == part6_end_marker)
3384 	    {
3385 		first_half_max_obj_offset = this->m->pipeline->getCount();
3386 	    }
3387 	    writeObject(cur_object);
3388 	    if (cur_object.getObjectID() == part4_end_marker)
3389 	    {
3390 		if (this->m->encrypted)
3391 		{
3392 		    writeEncryptionDictionary();
3393 		}
3394 		if (pass == 1)
3395 		{
3396 		    this->m->xref[hint_id] =
3397 			QPDFXRefEntry(1, this->m->pipeline->getCount(), 0);
3398 		}
3399 		else
3400 		{
3401 		    // Part 5: hint stream
3402 		    writeBuffer(hint_buffer);
3403 		}
3404 	    }
3405 	    if (cur_object.getObjectID() == part6_end_marker)
3406 	    {
3407 		part6_end_offset = this->m->pipeline->getCount();
3408 	    }
3409 	}
3410 
3411 	// Part 10: overflow hint stream -- not used
3412 
3413 	// Part 11: main cross reference table and trailer
3414 
3415 	second_xref_offset = this->m->pipeline->getCount();
3416 	if (need_xref_stream)
3417 	{
3418 	    pos = this->m->pipeline->getCount();
3419 	    space_before_zero =
3420 		writeXRefStream(second_half_xref,
3421 				second_half_end, second_xref_offset,
3422 				t_lin_second, 0, second_half_end,
3423 				second_trailer_size,
3424 				0, 0, 0, 0, (pass == 1), pass);
3425 	    qpdf_offset_t endpos = this->m->pipeline->getCount();
3426 
3427 	    if (pass == 1)
3428 	    {
3429 		// Pad so we have enough room for the real xref
3430 		// stream.  See comments for previous xref stream on
3431 		// how we calculate the padding.
3432 		writePad(calculateXrefStreamPadding(endpos - pos));
3433 		writeString("\n");
3434 		second_xref_end = this->m->pipeline->getCount();
3435 	    }
3436 	    else
3437 	    {
3438 		// Make the file size the same.
3439 		writePad(
3440                     QIntC::to_int(second_xref_end + hint_length -
3441                                   1 - this->m->pipeline->getCount()));
3442 		writeString("\n");
3443 
3444 		// If this assertion fails, maybe we didn't have
3445 		// enough padding above.
3446 		if (this->m->pipeline->getCount() !=
3447                     second_xref_end + hint_length)
3448                 {
3449                     throw std::logic_error(
3450                         "count mismatch after xref stream;"
3451                         " possible insufficient padding?");
3452                 }
3453 	    }
3454 	}
3455 	else
3456 	{
3457 	    space_before_zero =
3458 		writeXRefTable(t_lin_second, 0, second_half_end,
3459 			       second_trailer_size, 0, false, 0, 0, 0, pass);
3460 	}
3461 	writeString("startxref\n");
3462 	writeString(QUtil::int_to_string(first_xref_offset));
3463 	writeString("\n%%EOF\n");
3464 
3465         discardGeneration(this->m->obj_renumber, this->m->obj_renumber_no_gen);
3466 
3467 	if (pass == 1)
3468 	{
3469             if (this->m->deterministic_id)
3470             {
3471                 QTC::TC("qpdf", "QPDFWriter linearized deterministic ID",
3472                         need_xref_stream ? 0 : 1);
3473                 computeDeterministicIDData();
3474                 pp_md5 = 0;
3475                 assert(this->m->md5_pipeline == 0);
3476             }
3477 
3478 	    // Close first pass pipeline
3479 	    file_size = this->m->pipeline->getCount();
3480 	    pp_pass1 = 0;
3481 
3482 	    // Save hint offset since it will be set to zero by
3483 	    // calling openObject.
3484 	    qpdf_offset_t hint_offset1 = this->m->xref[hint_id].getOffset();
3485 
3486 	    // Write hint stream to a buffer
3487             {
3488                 pushPipeline(new Pl_Buffer("hint buffer"));
3489                 PipelinePopper pp_hint(this, &hint_buffer);
3490                 activatePipelineStack(pp_hint);
3491                 writeHintStream(hint_id);
3492             }
3493 	    hint_length = QIntC::to_offset(hint_buffer->getSize());
3494 
3495 	    // Restore hint offset
3496 	    this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset1, 0);
3497             if (lin_pass1_file)
3498             {
3499                 // Write some debugging information
3500                 fprintf(lin_pass1_file, "%% hint_offset=%s\n",
3501                         QUtil::int_to_string(hint_offset1).c_str());
3502                 fprintf(lin_pass1_file, "%% hint_length=%s\n",
3503                         QUtil::int_to_string(hint_length).c_str());
3504                 fprintf(lin_pass1_file, "%% second_xref_offset=%s\n",
3505                         QUtil::int_to_string(second_xref_offset).c_str());
3506                 fprintf(lin_pass1_file, "%% second_xref_end=%s\n",
3507                         QUtil::int_to_string(second_xref_end).c_str());
3508                 fclose(lin_pass1_file);
3509                 lin_pass1_file = 0;
3510             }
3511 	}
3512     }
3513 }
3514 
3515 void
enqueueObjectsStandard()3516 QPDFWriter::enqueueObjectsStandard()
3517 {
3518     if (this->m->preserve_unreferenced_objects)
3519     {
3520         QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
3521         std::vector<QPDFObjectHandle> all = this->m->pdf.getAllObjects();
3522         for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
3523              iter != all.end(); ++iter)
3524         {
3525             enqueueObject(*iter);
3526         }
3527     }
3528 
3529     // Put root first on queue.
3530     QPDFObjectHandle trailer = getTrimmedTrailer();
3531     enqueueObject(trailer.getKey("/Root"));
3532 
3533     // Next place any other objects referenced from the trailer
3534     // dictionary into the queue, handling direct objects recursively.
3535     // Root is already there, so enqueuing it a second time is a
3536     // no-op.
3537     std::set<std::string> keys = trailer.getKeys();
3538     for (std::set<std::string>::iterator iter = keys.begin();
3539 	 iter != keys.end(); ++iter)
3540     {
3541 	enqueueObject(trailer.getKey(*iter));
3542     }
3543 }
3544 
3545 void
enqueueObjectsPCLm()3546 QPDFWriter::enqueueObjectsPCLm()
3547 {
3548     // Image transform stream content for page strip images.
3549     // Each of this new stream has to come after every page image
3550     // strip written in the pclm file.
3551     std::string image_transform_content = "q /image Do Q\n";
3552 
3553     // enqueue all pages first
3554     std::vector<QPDFObjectHandle> all = this->m->pdf.getAllPages();
3555     for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
3556          iter != all.end(); ++iter)
3557     {
3558         // enqueue page
3559         enqueueObject(*iter);
3560 
3561         // enqueue page contents stream
3562         enqueueObject((*iter).getKey("/Contents"));
3563 
3564         // enqueue all the strips for each page
3565         QPDFObjectHandle strips =
3566             (*iter).getKey("/Resources").getKey("/XObject");
3567         std::set<std::string> keys = strips.getKeys();
3568         for (std::set<std::string>::iterator image = keys.begin();
3569              image != keys.end(); ++image)
3570         {
3571             enqueueObject(strips.getKey(*image));
3572             enqueueObject(QPDFObjectHandle::newStream(
3573                               &this->m->pdf, image_transform_content));
3574         }
3575     }
3576 
3577     // Put root in queue.
3578     QPDFObjectHandle trailer = getTrimmedTrailer();
3579     enqueueObject(trailer.getKey("/Root"));
3580 }
3581 
3582 void
indicateProgress(bool decrement,bool finished)3583 QPDFWriter::indicateProgress(bool decrement, bool finished)
3584 {
3585     if (decrement)
3586     {
3587         --this->m->events_seen;
3588         return;
3589     }
3590 
3591     ++this->m->events_seen;
3592 
3593     if (! this->m->progress_reporter.getPointer())
3594     {
3595         return;
3596     }
3597 
3598     if (finished || (this->m->events_seen >= this->m->next_progress_report))
3599     {
3600         int percentage = (
3601             finished
3602             ? 100
3603             : this->m->next_progress_report == 0
3604             ? 0
3605             : std::min(99, 1 + ((100 * this->m->events_seen) /
3606                                 this->m->events_expected)));
3607         this->m->progress_reporter->reportProgress(percentage);
3608     }
3609     int increment = std::max(1, (this->m->events_expected / 100));
3610     while (this->m->events_seen >= this->m->next_progress_report)
3611     {
3612         this->m->next_progress_report += increment;
3613     }
3614 }
3615 
3616 void
registerProgressReporter(PointerHolder<ProgressReporter> pr)3617 QPDFWriter::registerProgressReporter(PointerHolder<ProgressReporter> pr)
3618 {
3619     this->m->progress_reporter = pr;
3620 }
3621 
3622 void
writeStandard()3623 QPDFWriter::writeStandard()
3624 {
3625     PointerHolder<PipelinePopper> pp_md5 = new PipelinePopper(this);
3626     if (this->m->deterministic_id)
3627     {
3628         pushMD5Pipeline(*pp_md5);
3629     }
3630 
3631     // Start writing
3632 
3633     writeHeader();
3634     writeString(this->m->extra_header_text);
3635 
3636     if (this->m->pclm)
3637     {
3638         enqueueObjectsPCLm();
3639     }
3640     else
3641     {
3642         enqueueObjectsStandard();
3643     }
3644 
3645     // Now start walking queue, outputting each object.
3646     while (this->m->object_queue.size())
3647     {
3648 	QPDFObjectHandle cur_object = this->m->object_queue.front();
3649 	this->m->object_queue.pop_front();
3650 	writeObject(cur_object);
3651     }
3652 
3653     // Write out the encryption dictionary, if any
3654     if (this->m->encrypted)
3655     {
3656 	writeEncryptionDictionary();
3657     }
3658 
3659     // Now write out xref.  next_objid is now the number of objects.
3660     qpdf_offset_t xref_offset = this->m->pipeline->getCount();
3661     if (this->m->object_stream_to_objects.empty())
3662     {
3663 	// Write regular cross-reference table
3664 	writeXRefTable(t_normal, 0, this->m->next_objid - 1,
3665                        this->m->next_objid);
3666     }
3667     else
3668     {
3669 	// Write cross-reference stream.
3670 	int xref_id = this->m->next_objid++;
3671 	writeXRefStream(xref_id, xref_id, xref_offset, t_normal,
3672 			0, this->m->next_objid - 1, this->m->next_objid);
3673     }
3674     writeString("startxref\n");
3675     writeString(QUtil::int_to_string(xref_offset));
3676     writeString("\n%%EOF\n");
3677 
3678     if (this->m->deterministic_id)
3679     {
3680 	QTC::TC("qpdf", "QPDFWriter standard deterministic ID",
3681                 this->m->object_stream_to_objects.empty() ? 0 : 1);
3682         pp_md5 = 0;
3683         assert(this->m->md5_pipeline == 0);
3684     }
3685 }
3686