1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3
4 #include <zlib.h>
5
6 #include <assert.h>
7 #include <limits.h>
8 #include <string.h>
9
10 #include <stdio.h> /* for debug printing */
11 /* #define DEBUG_LINEARIZATION */
12 /* #define DEBUG_HEAP_SORT */
13 /* #define DEBUG_WRITING */
14
15 #define SIG_EXTRAS_SIZE (1024)
16
17 #define SLASH_BYTE_RANGE ("/ByteRange")
18 #define SLASH_CONTENTS ("/Contents")
19 #define SLASH_FILTER ("/Filter")
20
21
22 /*
23 As part of linearization, we need to keep a list of what objects are used
24 by what page. We do this by recording the objects used in a given page
25 in a page_objects structure. We have a list of these structures (one per
26 page) in the page_objects_list structure.
27
28 The page_objects structure maintains a heap in the object array, so
29 insertion takes log n time, and we can heapsort and dedupe at the end for
30 a total worse case n log n time.
31
32 The magic heap invariant is that:
33 entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2]
34 or equivalently:
35 entry[(n-1)>>1] >= entry[n]
36
37 For a discussion of the heap data structure (and heapsort) see Kingston,
38 "Algorithms and Data Structures".
39 */
40
41 typedef struct {
42 int num_shared;
43 int page_object_number;
44 int num_objects;
45 int min_ofs;
46 int max_ofs;
47 /* Extensible list of objects used on this page */
48 int cap;
49 int len;
50 int object[1];
51 } page_objects;
52
53 typedef struct {
54 int cap;
55 int len;
56 page_objects *page[1];
57 } page_objects_list;
58
59 typedef struct
60 {
61 fz_output *out;
62
63 int do_incremental;
64 int do_tight;
65 int do_ascii;
66 int do_expand;
67 int do_compress;
68 int do_compress_images;
69 int do_compress_fonts;
70 int do_garbage;
71 int do_linear;
72 int do_clean;
73 int do_encrypt;
74
75 int list_len;
76 int *use_list;
77 int64_t *ofs_list;
78 int *gen_list;
79 int *renumber_map;
80
81 /* The following extras are required for linearization */
82 int *rev_renumber_map;
83 int start;
84 int64_t first_xref_offset;
85 int64_t main_xref_offset;
86 int64_t first_xref_entry_offset;
87 int64_t file_len;
88 int hints_shared_offset;
89 int hintstream_len;
90 pdf_obj *linear_l;
91 pdf_obj *linear_h0;
92 pdf_obj *linear_h1;
93 pdf_obj *linear_o;
94 pdf_obj *linear_e;
95 pdf_obj *linear_n;
96 pdf_obj *linear_t;
97 pdf_obj *hints_s;
98 pdf_obj *hints_length;
99 int page_count;
100 page_objects_list *page_object_lists;
101 int crypt_object_number;
102 char opwd_utf8[128];
103 char upwd_utf8[128];
104 int permissions;
105 pdf_crypt *crypt;
106 } pdf_write_state;
107
108 /*
109 * Constants for use with use_list.
110 *
111 * If use_list[num] = 0, then object num is unused.
112 * If use_list[num] & PARAMS, then object num is the linearisation params obj.
113 * If use_list[num] & CATALOGUE, then object num is used by the catalogue.
114 * If use_list[num] & PAGE1, then object num is used by page 1.
115 * If use_list[num] & SHARED, then object num is shared between pages.
116 * If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
117 * If use_list[num] & OTHER_OBJECTS then this must should appear in section 9.
118 * Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
119 */
120 enum
121 {
122 USE_CATALOGUE = 2,
123 USE_PAGE1 = 4,
124 USE_SHARED = 8,
125 USE_PARAMS = 16,
126 USE_HINTS = 32,
127 USE_PAGE_OBJECT = 64,
128 USE_OTHER_OBJECTS = 128,
129 USE_PAGE_MASK = ~255,
130 USE_PAGE_SHIFT = 8
131 };
132
133 static void
expand_lists(fz_context * ctx,pdf_write_state * opts,int num)134 expand_lists(fz_context *ctx, pdf_write_state *opts, int num)
135 {
136 int i;
137
138 /* objects are numbered 0..num and maybe two additional objects for linearization */
139 num += 3;
140 opts->use_list = fz_realloc_array(ctx, opts->use_list, num, int);
141 opts->ofs_list = fz_realloc_array(ctx, opts->ofs_list, num, int64_t);
142 opts->gen_list = fz_realloc_array(ctx, opts->gen_list, num, int);
143 opts->renumber_map = fz_realloc_array(ctx, opts->renumber_map, num, int);
144 opts->rev_renumber_map = fz_realloc_array(ctx, opts->rev_renumber_map, num, int);
145
146 for (i = opts->list_len; i < num; i++)
147 {
148 opts->use_list[i] = 0;
149 opts->ofs_list[i] = 0;
150 opts->gen_list[i] = 0;
151 opts->renumber_map[i] = i;
152 opts->rev_renumber_map[i] = i;
153 }
154 opts->list_len = num;
155 }
156
157 /*
158 * page_objects and page_object_list handling functions
159 */
160 static page_objects_list *
page_objects_list_create(fz_context * ctx)161 page_objects_list_create(fz_context *ctx)
162 {
163 page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol));
164
165 pol->cap = 1;
166 pol->len = 0;
167 return pol;
168 }
169
170 static void
page_objects_list_destroy(fz_context * ctx,page_objects_list * pol)171 page_objects_list_destroy(fz_context *ctx, page_objects_list *pol)
172 {
173 int i;
174
175 if (!pol)
176 return;
177 for (i = 0; i < pol->len; i++)
178 {
179 fz_free(ctx, pol->page[i]);
180 }
181 fz_free(ctx, pol);
182 }
183
184 static void
page_objects_list_ensure(fz_context * ctx,page_objects_list ** pol,int newcap)185 page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap)
186 {
187 int oldcap = (*pol)->cap;
188 if (newcap <= oldcap)
189 return;
190 *pol = fz_realloc(ctx, *pol, sizeof(page_objects_list) + (newcap-1)*sizeof(page_objects *));
191 memset(&(*pol)->page[oldcap], 0, (newcap-oldcap)*sizeof(page_objects *));
192 (*pol)->cap = newcap;
193 }
194
195 static page_objects *
page_objects_create(fz_context * ctx)196 page_objects_create(fz_context *ctx)
197 {
198 int initial_cap = 8;
199 page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int));
200
201 po->cap = initial_cap;
202 po->len = 0;
203 return po;
204 }
205
206 static void
page_objects_insert(fz_context * ctx,page_objects ** ppo,int i)207 page_objects_insert(fz_context *ctx, page_objects **ppo, int i)
208 {
209 page_objects *po;
210
211 /* Make a page_objects if we don't have one */
212 if (*ppo == NULL)
213 *ppo = page_objects_create(ctx);
214
215 po = *ppo;
216 /* page_objects insertion: extend the page_objects by 1, and put us on the end */
217 if (po->len == po->cap)
218 {
219 po = fz_realloc(ctx, po, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int));
220 po->cap *= 2;
221 *ppo = po;
222 }
223 po->object[po->len++] = i;
224 }
225
226 static void
page_objects_list_insert(fz_context * ctx,pdf_write_state * opts,int page,int object)227 page_objects_list_insert(fz_context *ctx, pdf_write_state *opts, int page, int object)
228 {
229 page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
230 if (object >= opts->list_len)
231 expand_lists(ctx, opts, object);
232 if (opts->page_object_lists->len < page+1)
233 opts->page_object_lists->len = page+1;
234 page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
235 }
236
237 static void
page_objects_list_set_page_object(fz_context * ctx,pdf_write_state * opts,int page,int object)238 page_objects_list_set_page_object(fz_context *ctx, pdf_write_state *opts, int page, int object)
239 {
240 page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
241 if (object >= opts->list_len)
242 expand_lists(ctx, opts, object);
243 opts->page_object_lists->page[page]->page_object_number = object;
244 }
245
246 static void
page_objects_sort(fz_context * ctx,page_objects * po)247 page_objects_sort(fz_context *ctx, page_objects *po)
248 {
249 int i, j;
250 int n = po->len;
251
252 /* Step 1: Make a heap */
253 /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
254 for (i = 1; i < n; i++)
255 {
256 /* Now bubble backwards to maintain heap invariant */
257 j = i;
258 while (j != 0)
259 {
260 int tmp;
261 int k = (j-1)>>1;
262 if (po->object[k] >= po->object[j])
263 break;
264 tmp = po->object[k];
265 po->object[k] = po->object[j];
266 po->object[j] = tmp;
267 j = k;
268 }
269 }
270
271 /* Step 2: Heap sort */
272 /* Invariant: valid heap in [0..i), sorted list in [i..n) */
273 /* Initially: i = n */
274 for (i = n-1; i > 0; i--)
275 {
276 /* Swap the maximum (0th) element from the page_objects into its place
277 * in the sorted list (position i). */
278 int tmp = po->object[0];
279 po->object[0] = po->object[i];
280 po->object[i] = tmp;
281 /* Now, the page_objects is invalid because the 0th element is out
282 * of place. Bubble it until the page_objects is valid. */
283 j = 0;
284 while (1)
285 {
286 /* Children are k and k+1 */
287 int k = (j+1)*2-1;
288 /* If both children out of the page_objects, we're done */
289 if (k > i-1)
290 break;
291 /* If both are in the page_objects, pick the larger one */
292 if (k < i-1 && po->object[k] < po->object[k+1])
293 k++;
294 /* If j is bigger than k (i.e. both of its children),
295 * we're done */
296 if (po->object[j] > po->object[k])
297 break;
298 tmp = po->object[k];
299 po->object[k] = po->object[j];
300 po->object[j] = tmp;
301 j = k;
302 }
303 }
304 }
305
306 static int
order_ge(int ui,int uj)307 order_ge(int ui, int uj)
308 {
309 /*
310 For linearization, we need to order the sections as follows:
311
312 Remaining pages (Part 7)
313 Shared objects (Part 8)
314 Objects not associated with any page (Part 9)
315 Any "other" objects
316 (Header)(Part 1)
317 (Linearization params) (Part 2)
318 (1st page Xref/Trailer) (Part 3)
319 Catalogue (and other document level objects) (Part 4)
320 First page (Part 6)
321 (Primary Hint stream) (*) (Part 5)
322 Any free objects
323
324 Note, this is NOT the same order they appear in
325 the final file!
326
327 (*) The PDF reference gives us the option of putting the hint stream
328 after the first page, and we take it, for simplicity.
329 */
330
331 /* If the 2 objects are in the same section, then page object comes first. */
332 if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0)
333 return ((ui & USE_PAGE_OBJECT) == 0);
334 /* Put unused objects last */
335 else if (ui == 0)
336 return 1;
337 else if (uj == 0)
338 return 0;
339 /* Put the hint stream before that... */
340 else if (ui & USE_HINTS)
341 return 1;
342 else if (uj & USE_HINTS)
343 return 0;
344 /* Put page 1 before that... */
345 else if (ui & USE_PAGE1)
346 return 1;
347 else if (uj & USE_PAGE1)
348 return 0;
349 /* Put the catalogue before that... */
350 else if (ui & USE_CATALOGUE)
351 return 1;
352 else if (uj & USE_CATALOGUE)
353 return 0;
354 /* Put the linearization params before that... */
355 else if (ui & USE_PARAMS)
356 return 1;
357 else if (uj & USE_PARAMS)
358 return 0;
359 /* Put other objects before that */
360 else if (ui & USE_OTHER_OBJECTS)
361 return 1;
362 else if (uj & USE_OTHER_OBJECTS)
363 return 0;
364 /* Put shared objects before that... */
365 else if (ui & USE_SHARED)
366 return 1;
367 else if (uj & USE_SHARED)
368 return 0;
369 /* And otherwise, order by the page number on which
370 * they are used. */
371 return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
372 }
373
374 static void
heap_sort(int * list,int n,const int * val,int (* ge)(int,int))375 heap_sort(int *list, int n, const int *val, int (*ge)(int, int))
376 {
377 int i, j;
378
379 #ifdef DEBUG_HEAP_SORT
380 fprintf(stderr, "Initially:\n");
381 for (i=0; i < n; i++)
382 {
383 fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
384 }
385 #endif
386 /* Step 1: Make a heap */
387 /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
388 for (i = 1; i < n; i++)
389 {
390 /* Now bubble backwards to maintain heap invariant */
391 j = i;
392 while (j != 0)
393 {
394 int tmp;
395 int k = (j-1)>>1;
396 if (ge(val[list[k]], val[list[j]]))
397 break;
398 tmp = list[k];
399 list[k] = list[j];
400 list[j] = tmp;
401 j = k;
402 }
403 }
404 #ifdef DEBUG_HEAP_SORT
405 fprintf(stderr, "Valid heap:\n");
406 for (i=0; i < n; i++)
407 {
408 int k;
409 fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
410 k = (i+1)*2-1;
411 if (k < n)
412 {
413 if (ge(val[list[i]], val[list[k]]))
414 fprintf(stderr, "OK ");
415 else
416 fprintf(stderr, "BAD ");
417 }
418 if (k+1 < n)
419 {
420 if (ge(val[list[i]], val[list[k+1]]))
421 fprintf(stderr, "OK\n");
422 else
423 fprintf(stderr, "BAD\n");
424 }
425 else
426 fprintf(stderr, "\n");
427 }
428 #endif
429
430 /* Step 2: Heap sort */
431 /* Invariant: valid heap in [0..i), sorted list in [i..n) */
432 /* Initially: i = n */
433 for (i = n-1; i > 0; i--)
434 {
435 /* Swap the maximum (0th) element from the page_objects into its place
436 * in the sorted list (position i). */
437 int tmp = list[0];
438 list[0] = list[i];
439 list[i] = tmp;
440 /* Now, the page_objects is invalid because the 0th element is out
441 * of place. Bubble it until the page_objects is valid. */
442 j = 0;
443 while (1)
444 {
445 /* Children are k and k+1 */
446 int k = (j+1)*2-1;
447 /* If both children out of the page_objects, we're done */
448 if (k > i-1)
449 break;
450 /* If both are in the page_objects, pick the larger one */
451 if (k < i-1 && ge(val[list[k+1]], val[list[k]]))
452 k++;
453 /* If j is bigger than k (i.e. both of its children),
454 * we're done */
455 if (ge(val[list[j]], val[list[k]]))
456 break;
457 tmp = list[k];
458 list[k] = list[j];
459 list[j] = tmp;
460 j = k;
461 }
462 }
463 #ifdef DEBUG_HEAP_SORT
464 fprintf(stderr, "Sorted:\n");
465 for (i=0; i < n; i++)
466 {
467 fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
468 if (i+1 < n)
469 {
470 if (ge(val[list[i+1]], val[list[i]]))
471 fprintf(stderr, "OK");
472 else
473 fprintf(stderr, "BAD");
474 }
475 fprintf(stderr, "\n");
476 }
477 #endif
478 }
479
480 static void
page_objects_dedupe(fz_context * ctx,page_objects * po)481 page_objects_dedupe(fz_context *ctx, page_objects *po)
482 {
483 int i, j;
484 int n = po->len-1;
485
486 for (i = 0; i < n; i++)
487 {
488 if (po->object[i] == po->object[i+1])
489 break;
490 }
491 j = i; /* j points to the last valid one */
492 i++; /* i points to the first one we haven't looked at */
493 for (; i < n; i++)
494 {
495 if (po->object[j] != po->object[i])
496 po->object[++j] = po->object[i];
497 }
498 po->len = j+1;
499 }
500
501 static void
page_objects_list_sort_and_dedupe(fz_context * ctx,page_objects_list * pol)502 page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol)
503 {
504 int i;
505 int n = pol->len;
506
507 for (i = 0; i < n; i++)
508 {
509 page_objects_sort(ctx, pol->page[i]);
510 page_objects_dedupe(ctx, pol->page[i]);
511 }
512 }
513
514 #ifdef DEBUG_LINEARIZATION
515 static void
page_objects_dump(pdf_write_state * opts)516 page_objects_dump(pdf_write_state *opts)
517 {
518 page_objects_list *pol = opts->page_object_lists;
519 int i, j;
520
521 for (i = 0; i < pol->len; i++)
522 {
523 page_objects *p = pol->page[i];
524 fprintf(stderr, "Page %d\n", i+1);
525 for (j = 0; j < p->len; j++)
526 {
527 int o = p->object[j];
528 fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]);
529 }
530 fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
531 fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
532 fprintf(stderr, "Page object number=%d\n", p->page_object_number);
533 }
534 }
535
536 static void
objects_dump(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)537 objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
538 {
539 int i;
540
541 for (i=0; i < pdf_xref_len(ctx, doc); i++)
542 {
543 fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], (int)opts->ofs_list[i]);
544 }
545 }
546 #endif
547
548 /*
549 * Garbage collect objects not reachable from the trailer.
550 */
551
552 /* Mark a reference. If it's been marked already, return NULL (as no further
553 * processing is required). If it's not, return the resolved object so
554 * that we can continue our recursive marking. If it's a duff reference
555 * return the fact so that we can remove the reference at source.
556 */
markref(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj,int * duff)557 static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj, int *duff)
558 {
559 int num = pdf_to_num(ctx, obj);
560
561 if (num <= 0 || num >= pdf_xref_len(ctx, doc))
562 {
563 *duff = 1;
564 return NULL;
565 }
566 *duff = 0;
567 if (opts->use_list[num])
568 return NULL;
569
570 opts->use_list[num] = 1;
571
572 /* Bake in /Length in stream objects */
573 fz_try(ctx)
574 {
575 if (pdf_obj_num_is_stream(ctx, doc, num))
576 {
577 pdf_obj *len = pdf_dict_get(ctx, obj, PDF_NAME(Length));
578 if (pdf_is_indirect(ctx, len))
579 {
580 opts->use_list[pdf_to_num(ctx, len)] = 0;
581 len = pdf_resolve_indirect(ctx, len);
582 pdf_dict_put(ctx, obj, PDF_NAME(Length), len);
583 }
584 }
585 }
586 fz_catch(ctx)
587 {
588 /* Leave broken */
589 }
590
591 obj = pdf_resolve_indirect(ctx, obj);
592 if (obj == NULL || pdf_is_null(ctx, obj))
593 {
594 *duff = 1;
595 opts->use_list[num] = 0;
596 }
597
598 return obj;
599 }
600
601 #ifdef DEBUG_MARK_AND_SWEEP
602 static int depth = 0;
603
604 static
indent()605 void indent()
606 {
607 while (depth > 0)
608 {
609 int d = depth;
610 if (d > 16)
611 d = 16;
612 printf("%s", &" "[16-d]);
613 depth -= d;
614 }
615 }
616 #define DEBUGGING_MARKING(A) do { A; } while (0)
617 #else
618 #define DEBUGGING_MARKING(A) do { } while (0)
619 #endif
620
621 /* Recursively mark an object. If any references found are duff, then
622 * replace them with nulls. */
markobj(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj)623 static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
624 {
625 int i;
626
627 DEBUGGING_MARKING(depth++);
628
629 while (pdf_is_indirect(ctx, obj))
630 {
631 int duff;
632 DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
633 obj = markref(ctx, doc, opts, obj, &duff);
634 if (duff)
635 {
636 DEBUGGING_MARKING(depth--);
637 return 1;
638 }
639 }
640
641 if (pdf_is_dict(ctx, obj))
642 {
643 int n = pdf_dict_len(ctx, obj);
644 for (i = 0; i < n; i++)
645 {
646 DEBUGGING_MARKING(indent(); printf("DICT[%d/%d] = %s\n", i, n, pdf_to_name(ctx, pdf_dict_get_key(ctx, obj, i))));
647 if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
648 pdf_dict_put_val_null(ctx, obj, i);
649 }
650 }
651
652 else if (pdf_is_array(ctx, obj))
653 {
654 int n = pdf_array_len(ctx, obj);
655 for (i = 0; i < n; i++)
656 {
657 DEBUGGING_MARKING(indent(); printf("ARRAY[%d/%d]\n", i, n));
658 if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
659 pdf_array_put(ctx, obj, i, PDF_NULL);
660 }
661 }
662
663 DEBUGGING_MARKING(depth--);
664
665 return 0;
666 }
667
668 /*
669 * Scan for and remove duplicate objects (slow)
670 */
671
removeduplicateobjs(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)672 static void removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
673 {
674 int num, other, max_num;
675 int xref_len = pdf_xref_len(ctx, doc);
676
677 for (num = 1; num < xref_len; num++)
678 {
679 /* Only compare an object to objects preceding it */
680 for (other = 1; other < num; other++)
681 {
682 pdf_obj *a, *b;
683 int newnum, streama = 0, streamb = 0, differ = 0;
684
685 if (num == other || !opts->use_list[num] || !opts->use_list[other])
686 continue;
687
688 /* TODO: resolve indirect references to see if we can omit them */
689
690 /*
691 * Comparing stream objects data contents would take too long.
692 *
693 * pdf_obj_num_is_stream calls pdf_cache_object and ensures
694 * that the xref table has the objects loaded.
695 */
696 fz_try(ctx)
697 {
698 streama = pdf_obj_num_is_stream(ctx, doc, num);
699 streamb = pdf_obj_num_is_stream(ctx, doc, other);
700 differ = streama || streamb;
701 if (streama && streamb && opts->do_garbage >= 4)
702 differ = 0;
703 }
704 fz_catch(ctx)
705 {
706 /* Assume different */
707 differ = 1;
708 }
709 if (differ)
710 continue;
711
712 a = pdf_get_xref_entry(ctx, doc, num)->obj;
713 b = pdf_get_xref_entry(ctx, doc, other)->obj;
714
715 if (pdf_objcmp(ctx, a, b))
716 continue;
717
718 if (streama && streamb)
719 {
720 /* Check to see if streams match too. */
721 fz_buffer *sa = NULL;
722 fz_buffer *sb = NULL;
723
724 fz_var(sa);
725 fz_var(sb);
726
727 differ = 1;
728 fz_try(ctx)
729 {
730 unsigned char *dataa, *datab;
731 size_t lena, lenb;
732 sa = pdf_load_raw_stream_number(ctx, doc, num);
733 sb = pdf_load_raw_stream_number(ctx, doc, other);
734 lena = fz_buffer_storage(ctx, sa, &dataa);
735 lenb = fz_buffer_storage(ctx, sb, &datab);
736 if (lena == lenb && memcmp(dataa, datab, lena) == 0)
737 differ = 0;
738 }
739 fz_always(ctx)
740 {
741 fz_drop_buffer(ctx, sa);
742 fz_drop_buffer(ctx, sb);
743 }
744 fz_catch(ctx)
745 {
746 fz_rethrow(ctx);
747 }
748 if (differ)
749 continue;
750 }
751
752 /* Keep the lowest numbered object */
753 newnum = fz_mini(num, other);
754 max_num = fz_maxi(num, other);
755 if (max_num >= opts->list_len)
756 expand_lists(ctx, opts, max_num);
757 opts->renumber_map[num] = newnum;
758 opts->renumber_map[other] = newnum;
759 opts->rev_renumber_map[newnum] = num; /* Either will do */
760 opts->use_list[fz_maxi(num, other)] = 0;
761
762 /* One duplicate was found, do not look for another */
763 break;
764 }
765 }
766 }
767
768 /*
769 * Renumber objects sequentially so the xref is more compact
770 *
771 * This code assumes that any opts->renumber_map[n] <= n for all n.
772 */
773
compactxref(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)774 static void compactxref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
775 {
776 int num, newnum;
777 int xref_len = pdf_xref_len(ctx, doc);
778
779 /*
780 * Update renumber_map in-place, clustering all used
781 * objects together at low object ids. Objects that
782 * already should be renumbered will have their new
783 * object ids be updated to reflect the compaction.
784 */
785
786 if (xref_len > opts->list_len)
787 expand_lists(ctx, opts, xref_len-1);
788
789 newnum = 1;
790 for (num = 1; num < xref_len; num++)
791 {
792 /* If it's not used, map it to zero */
793 if (!opts->use_list[opts->renumber_map[num]])
794 {
795 opts->renumber_map[num] = 0;
796 }
797 /* If it's not moved, compact it. */
798 else if (opts->renumber_map[num] == num)
799 {
800 opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
801 opts->renumber_map[num] = newnum++;
802 }
803 /* Otherwise it's used, and moved. We know that it must have
804 * moved down, so the place it's moved to will be in the right
805 * place already. */
806 else
807 {
808 opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
809 }
810 }
811 }
812
813 /*
814 * Update indirect objects according to renumbering established when
815 * removing duplicate objects and compacting the xref.
816 */
817
renumberobj(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj)818 static void renumberobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
819 {
820 int i;
821 int xref_len = pdf_xref_len(ctx, doc);
822
823 if (pdf_is_dict(ctx, obj))
824 {
825 int n = pdf_dict_len(ctx, obj);
826 for (i = 0; i < n; i++)
827 {
828 pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
829 pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
830 if (pdf_is_indirect(ctx, val))
831 {
832 int o = pdf_to_num(ctx, val);
833 if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
834 val = PDF_NULL;
835 else
836 val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
837 pdf_dict_put_drop(ctx, obj, key, val);
838 }
839 else
840 {
841 renumberobj(ctx, doc, opts, val);
842 }
843 }
844 }
845
846 else if (pdf_is_array(ctx, obj))
847 {
848 int n = pdf_array_len(ctx, obj);
849 for (i = 0; i < n; i++)
850 {
851 pdf_obj *val = pdf_array_get(ctx, obj, i);
852 if (pdf_is_indirect(ctx, val))
853 {
854 int o = pdf_to_num(ctx, val);
855 if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
856 val = PDF_NULL;
857 else
858 val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
859 pdf_array_put_drop(ctx, obj, i, val);
860 }
861 else
862 {
863 renumberobj(ctx, doc, opts, val);
864 }
865 }
866 }
867 }
868
renumberobjs(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)869 static void renumberobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
870 {
871 pdf_xref_entry *newxref = NULL;
872 int newlen;
873 int num;
874 int *new_use_list;
875 int xref_len = pdf_xref_len(ctx, doc);
876
877 new_use_list = fz_calloc(ctx, pdf_xref_len(ctx, doc)+3, sizeof(int));
878
879 fz_var(newxref);
880 fz_try(ctx)
881 {
882 /* Apply renumber map to indirect references in all objects in xref */
883 renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
884 for (num = 0; num < xref_len; num++)
885 {
886 pdf_obj *obj;
887 int to = opts->renumber_map[num];
888
889 /* If object is going to be dropped, don't bother renumbering */
890 if (to == 0)
891 continue;
892
893 obj = pdf_get_xref_entry(ctx, doc, num)->obj;
894
895 if (pdf_is_indirect(ctx, obj))
896 {
897 obj = pdf_new_indirect(ctx, doc, to, 0);
898 fz_try(ctx)
899 pdf_update_object(ctx, doc, num, obj);
900 fz_always(ctx)
901 pdf_drop_obj(ctx, obj);
902 fz_catch(ctx)
903 fz_rethrow(ctx);
904 }
905 else
906 {
907 renumberobj(ctx, doc, opts, obj);
908 }
909 }
910
911 /* Create new table for the reordered, compacted xref */
912 newxref = Memento_label(fz_malloc_array(ctx, xref_len + 3, pdf_xref_entry), "pdf_xref_entries");
913 newxref[0] = *pdf_get_xref_entry(ctx, doc, 0);
914
915 /* Move used objects into the new compacted xref */
916 newlen = 0;
917 for (num = 1; num < xref_len; num++)
918 {
919 if (opts->use_list[num])
920 {
921 pdf_xref_entry *e;
922 if (newlen < opts->renumber_map[num])
923 newlen = opts->renumber_map[num];
924 e = pdf_get_xref_entry(ctx, doc, num);
925 newxref[opts->renumber_map[num]] = *e;
926 if (e->obj)
927 {
928 pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
929 e->obj = NULL;
930 }
931 new_use_list[opts->renumber_map[num]] = opts->use_list[num];
932 }
933 else
934 {
935 pdf_xref_entry *e = pdf_get_xref_entry(ctx, doc, num);
936 pdf_drop_obj(ctx, e->obj);
937 e->obj = NULL;
938 fz_drop_buffer(ctx, e->stm_buf);
939 e->stm_buf = NULL;
940 }
941 }
942
943 pdf_replace_xref(ctx, doc, newxref, newlen + 1);
944 newxref = NULL;
945 }
946 fz_catch(ctx)
947 {
948 fz_free(ctx, newxref);
949 fz_free(ctx, new_use_list);
950 fz_rethrow(ctx);
951 }
952 fz_free(ctx, opts->use_list);
953 opts->use_list = new_use_list;
954
955 for (num = 1; num < xref_len; num++)
956 {
957 opts->renumber_map[num] = num;
958 }
959 }
960
page_objects_list_renumber(pdf_write_state * opts)961 static void page_objects_list_renumber(pdf_write_state *opts)
962 {
963 int i, j;
964
965 for (i = 0; i < opts->page_object_lists->len; i++)
966 {
967 page_objects *po = opts->page_object_lists->page[i];
968 for (j = 0; j < po->len; j++)
969 {
970 po->object[j] = opts->renumber_map[po->object[j]];
971 }
972 po->page_object_number = opts->renumber_map[po->page_object_number];
973 }
974 }
975
976 static void
mark_all(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * val,int flag,int page)977 mark_all(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *val, int flag, int page)
978 {
979 if (pdf_mark_obj(ctx, val))
980 return;
981
982 fz_try(ctx)
983 {
984 if (pdf_is_indirect(ctx, val))
985 {
986 int num = pdf_to_num(ctx, val);
987 if (num >= opts->list_len)
988 expand_lists(ctx, opts, num);
989 if (opts->use_list[num] & USE_PAGE_MASK)
990 /* Already used */
991 opts->use_list[num] |= USE_SHARED;
992 else
993 opts->use_list[num] |= flag;
994 if (page >= 0)
995 page_objects_list_insert(ctx, opts, page, num);
996 }
997
998 if (pdf_is_dict(ctx, val))
999 {
1000 int i, n;
1001 n = pdf_dict_len(ctx, val);
1002
1003 for (i = 0; i < n; i++)
1004 {
1005 pdf_obj *v = pdf_dict_get_val(ctx, val, i);
1006 pdf_obj *type = pdf_dict_get(ctx, v, PDF_NAME(Type));
1007
1008 /* Don't walk through the Page tree, or direct to a page. */
1009 if (pdf_name_eq(ctx, PDF_NAME(Pages), type) || pdf_name_eq(ctx, PDF_NAME(Page), type))
1010 continue;
1011
1012 mark_all(ctx, doc, opts, v, flag, page);
1013 }
1014 }
1015 else if (pdf_is_array(ctx, val))
1016 {
1017 int i, n = pdf_array_len(ctx, val);
1018
1019 for (i = 0; i < n; i++)
1020 {
1021 pdf_obj *v = pdf_array_get(ctx, val, i);
1022 pdf_obj *type = pdf_dict_get(ctx, v, PDF_NAME(Type));
1023
1024 /* Don't walk through the Page tree, or direct to a page. */
1025 if (pdf_name_eq(ctx, PDF_NAME(Pages), type) || pdf_name_eq(ctx, PDF_NAME(Page), type))
1026 continue;
1027
1028 mark_all(ctx, doc, opts, v, flag, page);
1029 }
1030 }
1031 }
1032 fz_always(ctx)
1033 {
1034 pdf_unmark_obj(ctx, val);
1035 }
1036 fz_catch(ctx)
1037 {
1038 fz_rethrow(ctx);
1039 }
1040 }
1041
1042 static int
mark_pages(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * val,int pagenum)1043 mark_pages(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *val, int pagenum)
1044 {
1045 if (pdf_mark_obj(ctx, val))
1046 return pagenum;
1047
1048 fz_try(ctx)
1049 {
1050 if (pdf_is_dict(ctx, val))
1051 {
1052 if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, val, PDF_NAME(Type))))
1053 {
1054 int num = pdf_to_num(ctx, val);
1055 pdf_unmark_obj(ctx, val);
1056 mark_all(ctx, doc, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
1057 page_objects_list_set_page_object(ctx, opts, pagenum, num);
1058 pagenum++;
1059 opts->use_list[num] |= USE_PAGE_OBJECT;
1060 }
1061 else
1062 {
1063 int i, n = pdf_dict_len(ctx, val);
1064
1065 for (i = 0; i < n; i++)
1066 {
1067 pdf_obj *key = pdf_dict_get_key(ctx, val, i);
1068 pdf_obj *obj = pdf_dict_get_val(ctx, val, i);
1069
1070 if (pdf_name_eq(ctx, PDF_NAME(Kids), key))
1071 pagenum = mark_pages(ctx, doc, opts, obj, pagenum);
1072 else
1073 mark_all(ctx, doc, opts, obj, USE_CATALOGUE, -1);
1074 }
1075
1076 if (pdf_is_indirect(ctx, val))
1077 {
1078 int num = pdf_to_num(ctx, val);
1079 opts->use_list[num] |= USE_CATALOGUE;
1080 }
1081 }
1082 }
1083 else if (pdf_is_array(ctx, val))
1084 {
1085 int i, n = pdf_array_len(ctx, val);
1086
1087 for (i = 0; i < n; i++)
1088 {
1089 pagenum = mark_pages(ctx, doc, opts, pdf_array_get(ctx, val, i), pagenum);
1090 }
1091 if (pdf_is_indirect(ctx, val))
1092 {
1093 int num = pdf_to_num(ctx, val);
1094 opts->use_list[num] |= USE_CATALOGUE;
1095 }
1096 }
1097 }
1098 fz_always(ctx)
1099 {
1100 pdf_unmark_obj(ctx, val);
1101 }
1102 fz_catch(ctx)
1103 {
1104 fz_rethrow(ctx);
1105 }
1106 return pagenum;
1107 }
1108
1109 static void
mark_root(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * dict)1110 mark_root(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *dict)
1111 {
1112 int i, n = pdf_dict_len(ctx, dict);
1113
1114 if (pdf_mark_obj(ctx, dict))
1115 return;
1116
1117 fz_try(ctx)
1118 {
1119 if (pdf_is_indirect(ctx, dict))
1120 {
1121 int num = pdf_to_num(ctx, dict);
1122 opts->use_list[num] |= USE_CATALOGUE;
1123 }
1124
1125 for (i = 0; i < n; i++)
1126 {
1127 pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1128 pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1129
1130 if (pdf_name_eq(ctx, PDF_NAME(Pages), key))
1131 opts->page_count = mark_pages(ctx, doc, opts, val, 0);
1132 else if (pdf_name_eq(ctx, PDF_NAME(Names), key))
1133 mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1134 else if (pdf_name_eq(ctx, PDF_NAME(Dests), key))
1135 mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1136 else if (pdf_name_eq(ctx, PDF_NAME(Outlines), key))
1137 {
1138 int section;
1139 /* Look at PageMode to decide whether to
1140 * USE_OTHER_OBJECTS or USE_PAGE1 here. */
1141 if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(PageMode)), PDF_NAME(UseOutlines)))
1142 section = USE_PAGE1;
1143 else
1144 section = USE_OTHER_OBJECTS;
1145 mark_all(ctx, doc, opts, val, section, -1);
1146 }
1147 else
1148 mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1149 }
1150 }
1151 fz_always(ctx)
1152 {
1153 pdf_unmark_obj(ctx, dict);
1154 }
1155 fz_catch(ctx)
1156 {
1157 fz_rethrow(ctx);
1158 }
1159 }
1160
1161 static void
mark_trailer(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * dict)1162 mark_trailer(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *dict)
1163 {
1164 int i, n = pdf_dict_len(ctx, dict);
1165
1166 if (pdf_mark_obj(ctx, dict))
1167 return;
1168
1169 fz_try(ctx)
1170 {
1171 for (i = 0; i < n; i++)
1172 {
1173 pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1174 pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1175
1176 if (pdf_name_eq(ctx, PDF_NAME(Root), key))
1177 mark_root(ctx, doc, opts, val);
1178 else
1179 mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1180 }
1181 }
1182 fz_always(ctx)
1183 {
1184 pdf_unmark_obj(ctx, dict);
1185 }
1186 fz_catch(ctx)
1187 {
1188 fz_rethrow(ctx);
1189 }
1190 }
1191
1192 static void
add_linearization_objs(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)1193 add_linearization_objs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1194 {
1195 pdf_obj *params_obj = NULL;
1196 pdf_obj *params_ref = NULL;
1197 pdf_obj *hint_obj = NULL;
1198 pdf_obj *hint_ref = NULL;
1199 pdf_obj *o;
1200 int params_num, hint_num;
1201
1202 fz_var(params_obj);
1203 fz_var(params_ref);
1204 fz_var(hint_obj);
1205 fz_var(hint_ref);
1206
1207 fz_try(ctx)
1208 {
1209 /* Linearization params */
1210 params_obj = pdf_new_dict(ctx, doc, 10);
1211 params_ref = pdf_add_object(ctx, doc, params_obj);
1212 params_num = pdf_to_num(ctx, params_ref);
1213
1214 opts->use_list[params_num] = USE_PARAMS;
1215 opts->renumber_map[params_num] = params_num;
1216 opts->rev_renumber_map[params_num] = params_num;
1217 opts->gen_list[params_num] = 0;
1218 pdf_dict_put_real(ctx, params_obj, PDF_NAME(Linearized), 1.0f);
1219 opts->linear_l = pdf_new_int(ctx, INT_MIN);
1220 pdf_dict_put(ctx, params_obj, PDF_NAME(L), opts->linear_l);
1221 opts->linear_h0 = pdf_new_int(ctx, INT_MIN);
1222 o = pdf_new_array(ctx, doc, 2);
1223 pdf_dict_put_drop(ctx, params_obj, PDF_NAME(H), o);
1224 pdf_array_push(ctx, o, opts->linear_h0);
1225 opts->linear_h1 = pdf_new_int(ctx, INT_MIN);
1226 pdf_array_push(ctx, o, opts->linear_h1);
1227 opts->linear_o = pdf_new_int(ctx, INT_MIN);
1228 pdf_dict_put(ctx, params_obj, PDF_NAME(O), opts->linear_o);
1229 opts->linear_e = pdf_new_int(ctx, INT_MIN);
1230 pdf_dict_put(ctx, params_obj, PDF_NAME(E), opts->linear_e);
1231 opts->linear_n = pdf_new_int(ctx, INT_MIN);
1232 pdf_dict_put(ctx, params_obj, PDF_NAME(N), opts->linear_n);
1233 opts->linear_t = pdf_new_int(ctx, INT_MIN);
1234 pdf_dict_put(ctx, params_obj, PDF_NAME(T), opts->linear_t);
1235
1236 /* Primary hint stream */
1237 hint_obj = pdf_new_dict(ctx, doc, 10);
1238 hint_ref = pdf_add_object(ctx, doc, hint_obj);
1239 hint_num = pdf_to_num(ctx, hint_ref);
1240
1241 opts->use_list[hint_num] = USE_HINTS;
1242 opts->renumber_map[hint_num] = hint_num;
1243 opts->rev_renumber_map[hint_num] = hint_num;
1244 opts->gen_list[hint_num] = 0;
1245 pdf_dict_put_int(ctx, hint_obj, PDF_NAME(P), 0);
1246 opts->hints_s = pdf_new_int(ctx, INT_MIN);
1247 pdf_dict_put(ctx, hint_obj, PDF_NAME(S), opts->hints_s);
1248 /* FIXME: Do we have thumbnails? Do a T entry */
1249 /* FIXME: Do we have outlines? Do an O entry */
1250 /* FIXME: Do we have article threads? Do an A entry */
1251 /* FIXME: Do we have named destinations? Do a E entry */
1252 /* FIXME: Do we have interactive forms? Do a V entry */
1253 /* FIXME: Do we have document information? Do an I entry */
1254 /* FIXME: Do we have logical structure hierarchy? Do a C entry */
1255 /* FIXME: Do L, Page Label hint table */
1256 pdf_dict_put(ctx, hint_obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1257 opts->hints_length = pdf_new_int(ctx, INT_MIN);
1258 pdf_dict_put(ctx, hint_obj, PDF_NAME(Length), opts->hints_length);
1259 pdf_get_xref_entry(ctx, doc, hint_num)->stm_ofs = 0;
1260 }
1261 fz_always(ctx)
1262 {
1263 pdf_drop_obj(ctx, params_obj);
1264 pdf_drop_obj(ctx, params_ref);
1265 pdf_drop_obj(ctx, hint_ref);
1266 pdf_drop_obj(ctx, hint_obj);
1267 }
1268 fz_catch(ctx)
1269 {
1270 fz_rethrow(ctx);
1271 }
1272 }
1273
1274 static void
lpr_inherit_res_contents(fz_context * ctx,pdf_obj * res,pdf_obj * dict,pdf_obj * text)1275 lpr_inherit_res_contents(fz_context *ctx, pdf_obj *res, pdf_obj *dict, pdf_obj *text)
1276 {
1277 pdf_obj *o, *r;
1278 int i, n;
1279
1280 /* If the parent node doesn't have an entry of this type, give up. */
1281 o = pdf_dict_get(ctx, dict, text);
1282 if (!o)
1283 return;
1284
1285 /* If the resources dict we are building doesn't have an entry of this
1286 * type yet, then just copy it (ensuring it's not a reference) */
1287 r = pdf_dict_get(ctx, res, text);
1288 if (r == NULL)
1289 {
1290 o = pdf_resolve_indirect(ctx, o);
1291 if (pdf_is_dict(ctx, o))
1292 o = pdf_copy_dict(ctx, o);
1293 else if (pdf_is_array(ctx, o))
1294 o = pdf_copy_array(ctx, o);
1295 else
1296 o = NULL;
1297 if (o)
1298 pdf_dict_put_drop(ctx, res, text, o);
1299 return;
1300 }
1301
1302 /* Otherwise we need to merge o into r */
1303 if (pdf_is_dict(ctx, o))
1304 {
1305 n = pdf_dict_len(ctx, o);
1306 for (i = 0; i < n; i++)
1307 {
1308 pdf_obj *key = pdf_dict_get_key(ctx, o, i);
1309 pdf_obj *val = pdf_dict_get_val(ctx, o, i);
1310
1311 if (pdf_dict_get(ctx, res, key))
1312 continue;
1313 pdf_dict_put(ctx, res, key, val);
1314 }
1315 }
1316 }
1317
1318 static void
lpr_inherit_res(fz_context * ctx,pdf_obj * node,int depth,pdf_obj * dict)1319 lpr_inherit_res(fz_context *ctx, pdf_obj *node, int depth, pdf_obj *dict)
1320 {
1321 while (1)
1322 {
1323 pdf_obj *o;
1324
1325 node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1326 depth--;
1327 if (!node || depth < 0)
1328 break;
1329
1330 o = pdf_dict_get(ctx, node, PDF_NAME(Resources));
1331 if (o)
1332 {
1333 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ExtGState));
1334 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ColorSpace));
1335 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Pattern));
1336 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Shading));
1337 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(XObject));
1338 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Font));
1339 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ProcSet));
1340 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Properties));
1341 }
1342 }
1343 }
1344
1345 static pdf_obj *
lpr_inherit(fz_context * ctx,pdf_obj * node,char * text,int depth)1346 lpr_inherit(fz_context *ctx, pdf_obj *node, char *text, int depth)
1347 {
1348 do
1349 {
1350 pdf_obj *o = pdf_dict_gets(ctx, node, text);
1351
1352 if (o)
1353 return pdf_resolve_indirect(ctx, o);
1354 node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1355 depth--;
1356 }
1357 while (depth >= 0 && node);
1358
1359 return NULL;
1360 }
1361
1362 static int
lpr(fz_context * ctx,pdf_document * doc,pdf_obj * node,int depth,int page)1363 lpr(fz_context *ctx, pdf_document *doc, pdf_obj *node, int depth, int page)
1364 {
1365 pdf_obj *kids;
1366 pdf_obj *o = NULL;
1367 int i, n;
1368
1369 if (pdf_mark_obj(ctx, node))
1370 return page;
1371
1372 fz_var(o);
1373
1374 fz_try(ctx)
1375 {
1376 if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, node, PDF_NAME(Type))))
1377 {
1378 pdf_obj *r; /* r is deliberately not cleaned up */
1379
1380 /* Copy resources down to the child */
1381 o = pdf_keep_obj(ctx, pdf_dict_get(ctx, node, PDF_NAME(Resources)));
1382 if (!o)
1383 {
1384 o = pdf_keep_obj(ctx, pdf_new_dict(ctx, doc, 2));
1385 pdf_dict_put(ctx, node, PDF_NAME(Resources), o);
1386 }
1387 lpr_inherit_res(ctx, node, depth, o);
1388 r = lpr_inherit(ctx, node, "MediaBox", depth);
1389 if (r)
1390 pdf_dict_put(ctx, node, PDF_NAME(MediaBox), r);
1391 r = lpr_inherit(ctx, node, "CropBox", depth);
1392 if (r)
1393 pdf_dict_put(ctx, node, PDF_NAME(CropBox), r);
1394 r = lpr_inherit(ctx, node, "BleedBox", depth);
1395 if (r)
1396 pdf_dict_put(ctx, node, PDF_NAME(BleedBox), r);
1397 r = lpr_inherit(ctx, node, "TrimBox", depth);
1398 if (r)
1399 pdf_dict_put(ctx, node, PDF_NAME(TrimBox), r);
1400 r = lpr_inherit(ctx, node, "ArtBox", depth);
1401 if (r)
1402 pdf_dict_put(ctx, node, PDF_NAME(ArtBox), r);
1403 r = lpr_inherit(ctx, node, "Rotate", depth);
1404 if (r)
1405 pdf_dict_put(ctx, node, PDF_NAME(Rotate), r);
1406 page++;
1407 }
1408 else
1409 {
1410 kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
1411 n = pdf_array_len(ctx, kids);
1412 for(i = 0; i < n; i++)
1413 {
1414 page = lpr(ctx, doc, pdf_array_get(ctx, kids, i), depth+1, page);
1415 }
1416 pdf_dict_del(ctx, node, PDF_NAME(Resources));
1417 pdf_dict_del(ctx, node, PDF_NAME(MediaBox));
1418 pdf_dict_del(ctx, node, PDF_NAME(CropBox));
1419 pdf_dict_del(ctx, node, PDF_NAME(BleedBox));
1420 pdf_dict_del(ctx, node, PDF_NAME(TrimBox));
1421 pdf_dict_del(ctx, node, PDF_NAME(ArtBox));
1422 pdf_dict_del(ctx, node, PDF_NAME(Rotate));
1423 }
1424 }
1425 fz_always(ctx)
1426 {
1427 pdf_drop_obj(ctx, o);
1428 }
1429 fz_catch(ctx)
1430 {
1431 fz_rethrow(ctx);
1432 }
1433
1434 pdf_unmark_obj(ctx, node);
1435
1436 return page;
1437 }
1438
1439 void
pdf_localise_page_resources(fz_context * ctx,pdf_document * doc)1440 pdf_localise_page_resources(fz_context *ctx, pdf_document *doc)
1441 {
1442 if (doc->resources_localised)
1443 return;
1444
1445 lpr(ctx, doc, pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Pages), NULL), 0, 0);
1446
1447 doc->resources_localised = 1;
1448 }
1449
1450 static void
linearize(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)1451 linearize(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1452 {
1453 int i;
1454 int n = pdf_xref_len(ctx, doc) + 2;
1455 int *reorder;
1456 int *rev_renumber_map;
1457
1458 opts->page_object_lists = page_objects_list_create(ctx);
1459
1460 /* Ensure that every page has local references of its resources */
1461 /* FIXME: We could 'thin' the resources according to what is actually
1462 * required for each page, but this would require us to run the page
1463 * content streams. */
1464 pdf_localise_page_resources(ctx, doc);
1465
1466 /* Walk the objects for each page, marking which ones are used, where */
1467 memset(opts->use_list, 0, n * sizeof(int));
1468 mark_trailer(ctx, doc, opts, pdf_trailer(ctx, doc));
1469
1470 /* Add new objects required for linearization */
1471 add_linearization_objs(ctx, doc, opts);
1472
1473 #ifdef DEBUG_WRITING
1474 fprintf(stderr, "Usage calculated:\n");
1475 for (i=0; i < pdf_xref_len(ctx, doc); i++)
1476 {
1477 fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]);
1478 }
1479 #endif
1480
1481 /* Allocate/init the structures used for renumbering the objects */
1482 reorder = fz_calloc(ctx, n, sizeof(int));
1483 rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
1484 for (i = 0; i < n; i++)
1485 {
1486 reorder[i] = i;
1487 }
1488
1489 /* Heap sort the reordering */
1490 heap_sort(reorder+1, n-1, opts->use_list, &order_ge);
1491
1492 #ifdef DEBUG_WRITING
1493 fprintf(stderr, "Reordered:\n");
1494 for (i=1; i < pdf_xref_len(ctx, doc); i++)
1495 {
1496 fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]);
1497 }
1498 #endif
1499
1500 /* Find the split point */
1501 for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++) {}
1502 opts->start = i;
1503
1504 /* Roll the reordering into the renumber_map */
1505 for (i = 0; i < n; i++)
1506 {
1507 opts->renumber_map[reorder[i]] = i;
1508 rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
1509 }
1510 fz_free(ctx, opts->rev_renumber_map);
1511 opts->rev_renumber_map = rev_renumber_map;
1512 fz_free(ctx, reorder);
1513
1514 /* Apply the renumber_map */
1515 page_objects_list_renumber(opts);
1516 renumberobjs(ctx, doc, opts);
1517
1518 page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
1519 }
1520
1521 static void
update_linearization_params(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)1522 update_linearization_params(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1523 {
1524 int64_t offset;
1525 pdf_set_int(ctx, opts->linear_l, opts->file_len);
1526 /* Primary hint stream offset (of object, not stream!) */
1527 pdf_set_int(ctx, opts->linear_h0, opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1528 /* Primary hint stream length (of object, not stream!) */
1529 offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1530 pdf_set_int(ctx, opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1531 /* Object number of first pages page object (the first object of page 0) */
1532 pdf_set_int(ctx, opts->linear_o, opts->page_object_lists->page[0]->object[0]);
1533 /* Offset of end of first page (first page is followed by primary
1534 * hint stream (object n-1) then remaining pages (object 1...). The
1535 * primary hint stream counts as part of the first pages data, I think.
1536 */
1537 offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1538 pdf_set_int(ctx, opts->linear_e, offset);
1539 /* Number of pages in document */
1540 pdf_set_int(ctx, opts->linear_n, opts->page_count);
1541 /* Offset of first entry in main xref table */
1542 pdf_set_int(ctx, opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
1543 /* Offset of shared objects hint table in the primary hint stream */
1544 pdf_set_int(ctx, opts->hints_s, opts->hints_shared_offset);
1545 /* Primary hint stream length */
1546 pdf_set_int(ctx, opts->hints_length, opts->hintstream_len);
1547 }
1548
1549 /*
1550 * Make sure we have loaded objects from object streams.
1551 */
1552
preloadobjstms(fz_context * ctx,pdf_document * doc)1553 static void preloadobjstms(fz_context *ctx, pdf_document *doc)
1554 {
1555 pdf_obj *obj;
1556 int num;
1557
1558 /* xref_len may change due to repair, so check it every iteration */
1559 for (num = 0; num < pdf_xref_len(ctx, doc); num++)
1560 {
1561 if (pdf_get_xref_entry(ctx, doc, num)->type == 'o')
1562 {
1563 obj = pdf_load_object(ctx, doc, num);
1564 pdf_drop_obj(ctx, obj);
1565 }
1566 }
1567 }
1568
1569 /*
1570 * Save streams and objects to the output
1571 */
1572
is_bitmap_stream(fz_context * ctx,pdf_obj * obj,size_t len,int * w,int * h)1573 static int is_bitmap_stream(fz_context *ctx, pdf_obj *obj, size_t len, int *w, int *h)
1574 {
1575 pdf_obj *bpc;
1576 pdf_obj *cs;
1577 int stride;
1578 if (pdf_dict_get(ctx, obj, PDF_NAME(Subtype)) != PDF_NAME(Image))
1579 return 0;
1580 *w = pdf_dict_get_int(ctx, obj, PDF_NAME(Width));
1581 *h = pdf_dict_get_int(ctx, obj, PDF_NAME(Height));
1582 stride = (*w + 7) >> 3;
1583 if ((size_t)stride * (*h) != len)
1584 return 0;
1585 if (pdf_dict_get_bool(ctx, obj, PDF_NAME(ImageMask)))
1586 {
1587 return 1;
1588 }
1589 else
1590 {
1591 bpc = pdf_dict_get(ctx, obj, PDF_NAME(BitsPerComponent));
1592 if (!pdf_is_int(ctx, bpc))
1593 return 0;
1594 if (pdf_to_int(ctx, bpc) != 1)
1595 return 0;
1596 cs = pdf_dict_get(ctx, obj, PDF_NAME(ColorSpace));
1597 if (!pdf_name_eq(ctx, cs, PDF_NAME(DeviceGray)))
1598 return 0;
1599 return 1;
1600 }
1601 }
1602
isbinary(int c)1603 static inline int isbinary(int c)
1604 {
1605 if (c == '\n' || c == '\r' || c == '\t')
1606 return 0;
1607 return c < 32 || c > 127;
1608 }
1609
isbinarystream(fz_context * ctx,const unsigned char * data,size_t len)1610 static int isbinarystream(fz_context *ctx, const unsigned char *data, size_t len)
1611 {
1612 size_t i;
1613 for (i = 0; i < len; i++)
1614 if (isbinary(data[i]))
1615 return 1;
1616 return 0;
1617 }
1618
hexbuf(fz_context * ctx,const unsigned char * p,size_t n)1619 static fz_buffer *hexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1620 {
1621 static const char hex[17] = "0123456789abcdef";
1622 int x = 0;
1623 size_t len = n * 2 + (n / 32) + 1;
1624 unsigned char *data = Memento_label(fz_malloc(ctx, len), "hexbuf");
1625 fz_buffer *buf = fz_new_buffer_from_data(ctx, data, len);
1626
1627 while (n--)
1628 {
1629 *data++ = hex[*p >> 4];
1630 *data++ = hex[*p & 15];
1631 if (++x == 32)
1632 {
1633 *data++ = '\n';
1634 x = 0;
1635 }
1636 p++;
1637 }
1638
1639 *data++ = '>';
1640
1641 return buf;
1642 }
1643
addhexfilter(fz_context * ctx,pdf_document * doc,pdf_obj * dict)1644 static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1645 {
1646 pdf_obj *f, *dp, *newf, *newdp;
1647
1648 newf = newdp = NULL;
1649 f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1650 dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1651
1652 fz_var(newf);
1653 fz_var(newdp);
1654
1655 fz_try(ctx)
1656 {
1657 if (pdf_is_name(ctx, f))
1658 {
1659 newf = pdf_new_array(ctx, doc, 2);
1660 pdf_array_push(ctx, newf, PDF_NAME(ASCIIHexDecode));
1661 pdf_array_push(ctx, newf, f);
1662 f = newf;
1663 if (pdf_is_dict(ctx, dp))
1664 {
1665 newdp = pdf_new_array(ctx, doc, 2);
1666 pdf_array_push(ctx, newdp, PDF_NULL);
1667 pdf_array_push(ctx, newdp, dp);
1668 dp = newdp;
1669 }
1670 }
1671 else if (pdf_is_array(ctx, f))
1672 {
1673 pdf_array_insert(ctx, f, PDF_NAME(ASCIIHexDecode), 0);
1674 if (pdf_is_array(ctx, dp))
1675 pdf_array_insert(ctx, dp, PDF_NULL, 0);
1676 }
1677 else
1678 f = PDF_NAME(ASCIIHexDecode);
1679
1680 pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1681 if (dp)
1682 pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1683 }
1684 fz_always(ctx)
1685 {
1686 pdf_drop_obj(ctx, newf);
1687 pdf_drop_obj(ctx, newdp);
1688 }
1689 fz_catch(ctx)
1690 fz_rethrow(ctx);
1691 }
1692
deflatebuf(fz_context * ctx,const unsigned char * p,size_t n)1693 static fz_buffer *deflatebuf(fz_context *ctx, const unsigned char *p, size_t n)
1694 {
1695 fz_buffer *buf;
1696 uLongf csize;
1697 int t;
1698 uLong longN = (uLong)n;
1699 unsigned char *data;
1700 size_t cap;
1701
1702 if (n != (size_t)longN)
1703 fz_throw(ctx, FZ_ERROR_GENERIC, "Buffer too large to deflate");
1704
1705 cap = compressBound(longN);
1706 data = Memento_label(fz_malloc(ctx, cap), "pdf_write_deflate");
1707 buf = fz_new_buffer_from_data(ctx, data, cap);
1708 csize = (uLongf)cap;
1709 t = compress(data, &csize, p, longN);
1710 if (t != Z_OK)
1711 {
1712 fz_drop_buffer(ctx, buf);
1713 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot deflate buffer");
1714 }
1715 fz_resize_buffer(ctx, buf, csize);
1716 return buf;
1717 }
1718
striphexfilter(fz_context * ctx,pdf_document * doc,pdf_obj * dict)1719 static int striphexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1720 {
1721 pdf_obj *f, *dp;
1722 int is_hex = 0;
1723
1724 f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1725 dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1726
1727 if (pdf_is_array(ctx, f))
1728 {
1729 /* Remove ASCIIHexDecode from head of filter list */
1730 if (pdf_array_get(ctx, f, 0) == PDF_NAME(ASCIIHexDecode))
1731 {
1732 is_hex = 1;
1733 pdf_array_delete(ctx, f, 0);
1734 if (pdf_is_array(ctx, dp))
1735 pdf_array_delete(ctx, dp, 0);
1736 }
1737 /* Unpack array if only one filter remains */
1738 if (pdf_array_len(ctx, f) == 1)
1739 {
1740 f = pdf_array_get(ctx, f, 0);
1741 pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1742 if (dp)
1743 {
1744 dp = pdf_array_get(ctx, dp, 0);
1745 pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1746 }
1747 }
1748 /* Remove array if no filters remain */
1749 else if (pdf_array_len(ctx, f) == 0)
1750 {
1751 pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1752 pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1753 }
1754 }
1755 else if (f == PDF_NAME(ASCIIHexDecode))
1756 {
1757 is_hex = 1;
1758 pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1759 pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1760 }
1761
1762 return is_hex;
1763 }
1764
unhexbuf(fz_context * ctx,const unsigned char * p,size_t n)1765 static fz_buffer *unhexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1766 {
1767 fz_stream *mstm = NULL;
1768 fz_stream *xstm = NULL;
1769 fz_buffer *out = NULL;
1770 fz_var(mstm);
1771 fz_var(xstm);
1772 fz_try(ctx)
1773 {
1774 mstm = fz_open_memory(ctx, p, n);
1775 xstm = fz_open_ahxd(ctx, mstm);
1776 out = fz_read_all(ctx, xstm, n/2);
1777 }
1778 fz_always(ctx)
1779 {
1780 fz_drop_stream(ctx, xstm);
1781 fz_drop_stream(ctx, mstm);
1782 }
1783 fz_catch(ctx)
1784 fz_rethrow(ctx);
1785 return out;
1786 }
1787
write_data(fz_context * ctx,void * arg,const unsigned char * data,size_t len)1788 static void write_data(fz_context *ctx, void *arg, const unsigned char *data, size_t len)
1789 {
1790 fz_write_data(ctx, (fz_output *)arg, data, len);
1791 }
1792
copystream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj_orig,int num,int gen,int do_deflate,int unenc)1793 static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1794 {
1795 fz_buffer *tmp_unhex = NULL, *tmp_comp = NULL, *tmp_hex = NULL, *buf = NULL;
1796 pdf_obj *obj = NULL;
1797 pdf_obj *dp;
1798 size_t len;
1799 unsigned char *data;
1800 int w, h;
1801
1802 fz_var(buf);
1803 fz_var(tmp_comp);
1804 fz_var(tmp_hex);
1805 fz_var(obj);
1806
1807 fz_try(ctx)
1808 {
1809 buf = pdf_load_raw_stream_number(ctx, doc, num);
1810 obj = pdf_copy_dict(ctx, obj_orig);
1811
1812 len = fz_buffer_storage(ctx, buf, &data);
1813
1814 if (do_deflate && striphexfilter(ctx, doc, obj))
1815 {
1816 tmp_unhex = unhexbuf(ctx, data, len);
1817 len = fz_buffer_storage(ctx, tmp_unhex, &data);
1818 }
1819
1820 if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME(Filter)))
1821 {
1822 if (is_bitmap_stream(ctx, obj, len, &w, &h))
1823 {
1824 tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h);
1825 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
1826 dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
1827 pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
1828 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
1829 }
1830 else
1831 {
1832 tmp_comp = deflatebuf(ctx, data, len);
1833 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1834 }
1835 len = fz_buffer_storage(ctx, tmp_comp, &data);
1836 }
1837
1838 if (opts->do_ascii && isbinarystream(ctx, data, len))
1839 {
1840 tmp_hex = hexbuf(ctx, data, len);
1841 len = fz_buffer_storage(ctx, tmp_hex, &data);
1842 addhexfilter(ctx, doc, obj);
1843 }
1844
1845 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1846
1847 if (unenc)
1848 {
1849 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1850 pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1851 fz_write_string(ctx, opts->out, "\nstream\n");
1852 fz_write_data(ctx, opts->out, data, len);
1853 }
1854 else
1855 {
1856 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, len));
1857 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1858 fz_write_string(ctx, opts->out, "\nstream\n");
1859 pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1860 }
1861
1862 fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1863 }
1864 fz_always(ctx)
1865 {
1866 fz_drop_buffer(ctx, tmp_unhex);
1867 fz_drop_buffer(ctx, tmp_hex);
1868 fz_drop_buffer(ctx, tmp_comp);
1869 fz_drop_buffer(ctx, buf);
1870 pdf_drop_obj(ctx, obj);
1871 }
1872 fz_catch(ctx)
1873 {
1874 fz_rethrow(ctx);
1875 }
1876 }
1877
expandstream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj_orig,int num,int gen,int do_deflate,int unenc)1878 static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1879 {
1880 fz_buffer *buf = NULL, *tmp_comp = NULL, *tmp_hex = NULL;
1881 pdf_obj *obj = NULL;
1882 pdf_obj *dp;
1883 size_t len;
1884 unsigned char *data;
1885 int w, h;
1886
1887 fz_var(buf);
1888 fz_var(tmp_comp);
1889 fz_var(tmp_hex);
1890 fz_var(obj);
1891
1892 fz_try(ctx)
1893 {
1894 buf = pdf_load_stream_number(ctx, doc, num);
1895 obj = pdf_copy_dict(ctx, obj_orig);
1896 pdf_dict_del(ctx, obj, PDF_NAME(Filter));
1897 pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
1898
1899 len = fz_buffer_storage(ctx, buf, &data);
1900 if (do_deflate)
1901 {
1902 if (is_bitmap_stream(ctx, obj, len, &w, &h))
1903 {
1904 tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h);
1905 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
1906 dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
1907 pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
1908 pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
1909 }
1910 else
1911 {
1912 tmp_comp = deflatebuf(ctx, data, len);
1913 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1914 }
1915 len = fz_buffer_storage(ctx, tmp_comp, &data);
1916 }
1917
1918 if (opts->do_ascii && isbinarystream(ctx, data, len))
1919 {
1920 tmp_hex = hexbuf(ctx, data, len);
1921 len = fz_buffer_storage(ctx, tmp_hex, &data);
1922 addhexfilter(ctx, doc, obj);
1923 }
1924
1925 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1926
1927 if (unenc)
1928 {
1929 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1930 pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1931 fz_write_string(ctx, opts->out, "\nstream\n");
1932 fz_write_data(ctx, opts->out, data, len);
1933 }
1934 else
1935 {
1936 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
1937 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1938 fz_write_string(ctx, opts->out, "\nstream\n");
1939 pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1940 }
1941
1942 fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1943 }
1944 fz_always(ctx)
1945 {
1946 fz_drop_buffer(ctx, tmp_hex);
1947 fz_drop_buffer(ctx, tmp_comp);
1948 fz_drop_buffer(ctx, buf);
1949 pdf_drop_obj(ctx, obj);
1950 }
1951 fz_catch(ctx)
1952 {
1953 fz_rethrow(ctx);
1954 }
1955 }
1956
is_image_filter(pdf_obj * s)1957 static int is_image_filter(pdf_obj *s)
1958 {
1959 return
1960 s == PDF_NAME(CCITTFaxDecode) || s == PDF_NAME(CCF) ||
1961 s == PDF_NAME(DCTDecode) || s == PDF_NAME(DCT) ||
1962 s == PDF_NAME(RunLengthDecode) || s == PDF_NAME(RL) ||
1963 s == PDF_NAME(JBIG2Decode) ||
1964 s == PDF_NAME(JPXDecode);
1965 }
1966
filter_implies_image(fz_context * ctx,pdf_obj * o)1967 static int filter_implies_image(fz_context *ctx, pdf_obj *o)
1968 {
1969 if (pdf_is_name(ctx, o))
1970 return is_image_filter(o);
1971 if (pdf_is_array(ctx, o))
1972 {
1973 int i, len;
1974 len = pdf_array_len(ctx, o);
1975 for (i = 0; i < len; i++)
1976 if (is_image_filter(pdf_array_get(ctx, o, i)))
1977 return 1;
1978 }
1979 return 0;
1980 }
1981
is_jpx_filter(fz_context * ctx,pdf_obj * o)1982 static int is_jpx_filter(fz_context *ctx, pdf_obj *o)
1983 {
1984 if (o == PDF_NAME(JPXDecode))
1985 return 1;
1986 if (pdf_is_array(ctx, o))
1987 {
1988 int i, len;
1989 len = pdf_array_len(ctx, o);
1990 for (i = 0; i < len; i++)
1991 if (pdf_array_get(ctx, o, i) == PDF_NAME(JPXDecode))
1992 return 1;
1993 }
1994 return 0;
1995 }
1996
is_image_stream(fz_context * ctx,pdf_obj * obj)1997 static int is_image_stream(fz_context *ctx, pdf_obj *obj)
1998 {
1999 pdf_obj *o;
2000 if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(XObject))))
2001 if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Image))))
2002 return 1;
2003 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), filter_implies_image(ctx, o))
2004 return 1;
2005 if (pdf_dict_get(ctx, obj, PDF_NAME(Width)) != NULL && pdf_dict_get(ctx, obj, PDF_NAME(Height)) != NULL)
2006 return 1;
2007 return 0;
2008 }
2009
is_font_stream(fz_context * ctx,pdf_obj * obj)2010 static int is_font_stream(fz_context *ctx, pdf_obj *obj)
2011 {
2012 pdf_obj *o;
2013 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(Font)))
2014 return 1;
2015 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(FontDescriptor)))
2016 return 1;
2017 if (pdf_dict_get(ctx, obj, PDF_NAME(Length1)) != NULL)
2018 return 1;
2019 if (pdf_dict_get(ctx, obj, PDF_NAME(Length2)) != NULL)
2020 return 1;
2021 if (pdf_dict_get(ctx, obj, PDF_NAME(Length3)) != NULL)
2022 return 1;
2023 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Type1C)))
2024 return 1;
2025 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(CIDFontType0C)))
2026 return 1;
2027 return 0;
2028 }
2029
is_jpx_stream(fz_context * ctx,pdf_obj * obj)2030 static int is_jpx_stream(fz_context *ctx, pdf_obj *obj)
2031 {
2032 pdf_obj *o;
2033 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), is_jpx_filter(ctx, o))
2034 return 1;
2035 return 0;
2036 }
2037
2038
is_xml_metadata(fz_context * ctx,pdf_obj * obj)2039 static int is_xml_metadata(fz_context *ctx, pdf_obj *obj)
2040 {
2041 if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Type)), PDF_NAME(Metadata)))
2042 if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), PDF_NAME(XML)))
2043 return 1;
2044 return 0;
2045 }
2046
writeobject(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int num,int gen,int skip_xrefs,int unenc)2047 static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs, int unenc)
2048 {
2049 pdf_obj *obj = NULL;
2050 fz_buffer *buf = NULL;
2051 int do_deflate = 0;
2052 int do_expand = 0;
2053 int skip = 0;
2054
2055 fz_var(obj);
2056 fz_var(buf);
2057
2058 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
2059 unenc = 1;
2060
2061 fz_try(ctx)
2062 {
2063 obj = pdf_load_object(ctx, doc, num);
2064
2065 /* skip ObjStm and XRef objects */
2066 if (pdf_is_dict(ctx, obj))
2067 {
2068 pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type));
2069 if (type == PDF_NAME(ObjStm))
2070 {
2071 opts->use_list[num] = 0;
2072 skip = 1;
2073 }
2074 if (skip_xrefs && type == PDF_NAME(XRef))
2075 {
2076 opts->use_list[num] = 0;
2077 skip = 1;
2078 }
2079 }
2080
2081 if (!skip)
2082 {
2083 if (pdf_obj_num_is_stream(ctx, doc, num))
2084 {
2085 do_deflate = opts->do_compress;
2086 do_expand = opts->do_expand;
2087 if (opts->do_compress_images && is_image_stream(ctx, obj))
2088 do_deflate = 1, do_expand = 0;
2089 if (opts->do_compress_fonts && is_font_stream(ctx, obj))
2090 do_deflate = 1, do_expand = 0;
2091 if (is_xml_metadata(ctx, obj))
2092 do_deflate = 0, do_expand = 0;
2093 if (is_jpx_stream(ctx, obj))
2094 do_deflate = 0, do_expand = 0;
2095
2096 if (do_expand)
2097 expandstream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2098 else
2099 copystream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2100 }
2101 else
2102 {
2103 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
2104 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, unenc ? NULL : opts->crypt, num, gen);
2105 fz_write_string(ctx, opts->out, "\nendobj\n\n");
2106 }
2107 }
2108 }
2109 fz_always(ctx)
2110 {
2111 fz_drop_buffer(ctx, buf);
2112 pdf_drop_obj(ctx, obj);
2113 }
2114 fz_catch(ctx)
2115 {
2116 fz_rethrow(ctx);
2117 }
2118 }
2119
writexrefsubsect(fz_context * ctx,pdf_write_state * opts,int from,int to)2120 static void writexrefsubsect(fz_context *ctx, pdf_write_state *opts, int from, int to)
2121 {
2122 int num;
2123
2124 fz_write_printf(ctx, opts->out, "%d %d\n", from, to - from);
2125 for (num = from; num < to; num++)
2126 {
2127 if (opts->use_list[num])
2128 fz_write_printf(ctx, opts->out, "%010lu %05d n \n", opts->ofs_list[num], opts->gen_list[num]);
2129 else
2130 fz_write_printf(ctx, opts->out, "%010lu %05d f \n", opts->ofs_list[num], opts->gen_list[num]);
2131 }
2132 }
2133
writexref(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int from,int to,int first,int64_t main_xref_offset,int64_t startxref)2134 static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2135 {
2136 pdf_obj *trailer = NULL;
2137 pdf_obj *obj;
2138 pdf_obj *nobj = NULL;
2139
2140 fz_write_string(ctx, opts->out, "xref\n");
2141 opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2142
2143 if (opts->do_incremental)
2144 {
2145 int subfrom = from;
2146 int subto;
2147
2148 while (subfrom < to)
2149 {
2150 while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2151 subfrom++;
2152
2153 subto = subfrom;
2154 while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2155 subto++;
2156
2157 if (subfrom < subto)
2158 writexrefsubsect(ctx, opts, subfrom, subto);
2159
2160 subfrom = subto;
2161 }
2162 }
2163 else
2164 {
2165 writexrefsubsect(ctx, opts, from, to);
2166 }
2167
2168 fz_write_string(ctx, opts->out, "\n");
2169
2170 fz_var(trailer);
2171
2172 if (opts->do_incremental)
2173 {
2174 trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
2175 pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), pdf_xref_len(ctx, doc));
2176 pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), doc->startxref);
2177 doc->startxref = startxref;
2178 }
2179 else
2180 {
2181 trailer = pdf_new_dict(ctx, doc, 5);
2182
2183 nobj = pdf_new_int(ctx, to);
2184 pdf_dict_put_drop(ctx, trailer, PDF_NAME(Size), nobj);
2185
2186 if (first)
2187 {
2188 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2189 if (obj)
2190 pdf_dict_put(ctx, trailer, PDF_NAME(Info), obj);
2191
2192 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2193 if (obj)
2194 pdf_dict_put(ctx, trailer, PDF_NAME(Root), obj);
2195
2196 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2197 if (obj)
2198 pdf_dict_put(ctx, trailer, PDF_NAME(ID), obj);
2199
2200 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2201 if (obj)
2202 pdf_dict_put(ctx, trailer, PDF_NAME(Encrypt), obj);
2203 }
2204 if (main_xref_offset != 0)
2205 {
2206 nobj = pdf_new_int(ctx, main_xref_offset);
2207 pdf_dict_put_drop(ctx, trailer, PDF_NAME(Prev), nobj);
2208 }
2209 }
2210
2211 fz_write_string(ctx, opts->out, "trailer\n");
2212 /* Trailer is NOT encrypted */
2213 pdf_print_obj(ctx, opts->out, trailer, opts->do_tight, opts->do_ascii);
2214 fz_write_string(ctx, opts->out, "\n");
2215
2216 pdf_drop_obj(ctx, trailer);
2217
2218 fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2219
2220 doc->has_xref_streams = 0;
2221 }
2222
writexrefstreamsubsect(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * index,fz_buffer * fzbuf,int from,int to)2223 static void writexrefstreamsubsect(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *index, fz_buffer *fzbuf, int from, int to)
2224 {
2225 int num;
2226
2227 pdf_array_push_int(ctx, index, from);
2228 pdf_array_push_int(ctx, index, to - from);
2229 for (num = from; num < to; num++)
2230 {
2231 fz_append_byte(ctx, fzbuf, opts->use_list[num] ? 1 : 0);
2232 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>24);
2233 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>16);
2234 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>8);
2235 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]);
2236 fz_append_byte(ctx, fzbuf, opts->gen_list[num]);
2237 }
2238 }
2239
writexrefstream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int from,int to,int first,int64_t main_xref_offset,int64_t startxref)2240 static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2241 {
2242 int num;
2243 pdf_obj *dict = NULL;
2244 pdf_obj *obj;
2245 pdf_obj *w = NULL;
2246 pdf_obj *index;
2247 fz_buffer *fzbuf = NULL;
2248
2249 fz_var(dict);
2250 fz_var(w);
2251 fz_var(fzbuf);
2252 fz_try(ctx)
2253 {
2254 num = pdf_create_object(ctx, doc);
2255 dict = pdf_new_dict(ctx, doc, 6);
2256 pdf_update_object(ctx, doc, num, dict);
2257
2258 opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2259
2260 to++;
2261
2262 if (first)
2263 {
2264 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2265 if (obj)
2266 pdf_dict_put(ctx, dict, PDF_NAME(Info), obj);
2267
2268 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2269 if (obj)
2270 pdf_dict_put(ctx, dict, PDF_NAME(Root), obj);
2271
2272 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2273 if (obj)
2274 pdf_dict_put(ctx, dict, PDF_NAME(ID), obj);
2275
2276 if (opts->do_incremental)
2277 {
2278 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2279 if (obj)
2280 pdf_dict_put(ctx, dict, PDF_NAME(Encrypt), obj);
2281 }
2282 }
2283
2284 pdf_dict_put_int(ctx, dict, PDF_NAME(Size), to);
2285
2286 if (opts->do_incremental)
2287 {
2288 pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), doc->startxref);
2289 doc->startxref = startxref;
2290 }
2291 else
2292 {
2293 if (main_xref_offset != 0)
2294 pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), main_xref_offset);
2295 }
2296
2297 pdf_dict_put(ctx, dict, PDF_NAME(Type), PDF_NAME(XRef));
2298
2299 w = pdf_new_array(ctx, doc, 3);
2300 pdf_dict_put(ctx, dict, PDF_NAME(W), w);
2301 pdf_array_push_int(ctx, w, 1);
2302 pdf_array_push_int(ctx, w, 4);
2303 pdf_array_push_int(ctx, w, 1);
2304
2305 index = pdf_new_array(ctx, doc, 2);
2306 pdf_dict_put_drop(ctx, dict, PDF_NAME(Index), index);
2307
2308 /* opts->gen_list[num] is already initialized by fz_calloc. */
2309 opts->use_list[num] = 1;
2310 opts->ofs_list[num] = opts->first_xref_entry_offset;
2311
2312 fzbuf = fz_new_buffer(ctx, (1 + 4 + 1) * (to-from));
2313
2314 if (opts->do_incremental)
2315 {
2316 int subfrom = from;
2317 int subto;
2318
2319 while (subfrom < to)
2320 {
2321 while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2322 subfrom++;
2323
2324 subto = subfrom;
2325 while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2326 subto++;
2327
2328 if (subfrom < subto)
2329 writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
2330
2331 subfrom = subto;
2332 }
2333 }
2334 else
2335 {
2336 writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
2337 }
2338
2339 pdf_update_stream(ctx, doc, dict, fzbuf, 0);
2340
2341 writeobject(ctx, doc, opts, num, 0, 0, 1);
2342 fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2343 }
2344 fz_always(ctx)
2345 {
2346 pdf_drop_obj(ctx, dict);
2347 pdf_drop_obj(ctx, w);
2348 fz_drop_buffer(ctx, fzbuf);
2349 }
2350 fz_catch(ctx)
2351 {
2352 fz_rethrow(ctx);
2353 }
2354
2355 doc->has_old_style_xrefs = 0;
2356 }
2357
2358 static void
padto(fz_context * ctx,fz_output * out,int64_t target)2359 padto(fz_context *ctx, fz_output *out, int64_t target)
2360 {
2361 int64_t pos = fz_tell_output(ctx, out);
2362
2363 assert(pos <= target);
2364 while (pos < target)
2365 {
2366 fz_write_byte(ctx, out, '\n');
2367 pos++;
2368 }
2369 }
2370
2371 static void
dowriteobject(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int num,int pass)2372 dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int pass)
2373 {
2374 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
2375 if (entry->type == 'f')
2376 opts->gen_list[num] = entry->gen;
2377 if (entry->type == 'n')
2378 opts->gen_list[num] = entry->gen;
2379 if (entry->type == 'o')
2380 opts->gen_list[num] = 0;
2381
2382 /* If we are renumbering, then make sure all generation numbers are
2383 * zero (except object 0 which must be free, and have a gen number of
2384 * 65535). Changing the generation numbers (and indeed object numbers)
2385 * will break encryption - so only do this if we are renumbering
2386 * anyway. */
2387 if (opts->do_garbage >= 2)
2388 opts->gen_list[num] = (num == 0 ? 65535 : 0);
2389
2390 if (opts->do_garbage && !opts->use_list[num])
2391 return;
2392
2393 if (entry->type == 'n' || entry->type == 'o')
2394 {
2395 if (pass > 0)
2396 padto(ctx, opts->out, opts->ofs_list[num]);
2397 if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
2398 {
2399 opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
2400 writeobject(ctx, doc, opts, num, opts->gen_list[num], 1, num == opts->crypt_object_number);
2401 }
2402 }
2403 else
2404 opts->use_list[num] = 0;
2405 }
2406
2407 static void
writeobjects(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int pass)2408 writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int pass)
2409 {
2410 int num;
2411 int xref_len = pdf_xref_len(ctx, doc);
2412
2413 if (!opts->do_incremental)
2414 {
2415 int version = pdf_version(ctx, doc);
2416 fz_write_printf(ctx, opts->out, "%%PDF-%d.%d\n", version / 10, version % 10);
2417 fz_write_string(ctx, opts->out, "%\xC2\xB5\xC2\xB6\n\n");
2418 }
2419
2420 dowriteobject(ctx, doc, opts, opts->start, pass);
2421
2422 if (opts->do_linear)
2423 {
2424 /* Write first xref */
2425 if (pass == 0)
2426 opts->first_xref_offset = fz_tell_output(ctx, opts->out);
2427 else
2428 padto(ctx, opts->out, opts->first_xref_offset);
2429 writexref(ctx, doc, opts, opts->start, pdf_xref_len(ctx, doc), 1, opts->main_xref_offset, 0);
2430 }
2431
2432 for (num = opts->start+1; num < xref_len; num++)
2433 dowriteobject(ctx, doc, opts, num, pass);
2434 if (opts->do_linear && pass == 1)
2435 {
2436 int64_t offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
2437 padto(ctx, opts->out, offset);
2438 }
2439 for (num = 1; num < opts->start; num++)
2440 {
2441 if (pass == 1)
2442 opts->ofs_list[num] += opts->hintstream_len;
2443 dowriteobject(ctx, doc, opts, num, pass);
2444 }
2445 }
2446
2447 static int
my_log2(int x)2448 my_log2(int x)
2449 {
2450 int i = 0;
2451 const int sign_bit = sizeof(int)*8-1;
2452
2453 if (x <= 0)
2454 return 0;
2455
2456 while ((1<<i) <= x && i < sign_bit)
2457 i++;
2458
2459 if (i >= sign_bit)
2460 return 0;
2461
2462 return i;
2463 }
2464
2465 static void
make_page_offset_hints(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,fz_buffer * buf)2466 make_page_offset_hints(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, fz_buffer *buf)
2467 {
2468 int i, j;
2469 int min_objs_per_page, max_objs_per_page;
2470 int min_page_length, max_page_length;
2471 int objs_per_page_bits;
2472 int min_shared_object, max_shared_object;
2473 int max_shared_object_refs = 0;
2474 int min_shared_length, max_shared_length;
2475 page_objects **pop = &opts->page_object_lists->page[0];
2476 int page_len_bits, shared_object_bits, shared_object_id_bits;
2477 int shared_length_bits;
2478 int xref_len = pdf_xref_len(ctx, doc);
2479
2480 min_shared_object = pdf_xref_len(ctx, doc);
2481 max_shared_object = 1;
2482 min_shared_length = opts->file_len;
2483 max_shared_length = 0;
2484 for (i=1; i < xref_len; i++)
2485 {
2486 int min, max, page;
2487
2488 min = opts->ofs_list[i];
2489 if (i == opts->start-1 || (opts->start == 1 && i == xref_len-1))
2490 max = opts->main_xref_offset;
2491 else if (i == xref_len-1)
2492 max = opts->ofs_list[1];
2493 else
2494 max = opts->ofs_list[i+1];
2495
2496 assert(max > min);
2497
2498 if (opts->use_list[i] & USE_SHARED)
2499 {
2500 page = -1;
2501 if (i < min_shared_object)
2502 min_shared_object = i;
2503 if (i > max_shared_object)
2504 max_shared_object = i;
2505 if (min_shared_length > max - min)
2506 min_shared_length = max - min;
2507 if (max_shared_length < max - min)
2508 max_shared_length = max - min;
2509 }
2510 else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS))
2511 page = -1;
2512 else if (opts->use_list[i] & USE_PAGE1)
2513 {
2514 page = 0;
2515 if (min_shared_length > max - min)
2516 min_shared_length = max - min;
2517 if (max_shared_length < max - min)
2518 max_shared_length = max - min;
2519 }
2520 else if (opts->use_list[i] == 0)
2521 page = -1;
2522 else
2523 page = opts->use_list[i]>>USE_PAGE_SHIFT;
2524
2525 if (page >= 0)
2526 {
2527 pop[page]->num_objects++;
2528 if (pop[page]->min_ofs > min)
2529 pop[page]->min_ofs = min;
2530 if (pop[page]->max_ofs < max)
2531 pop[page]->max_ofs = max;
2532 }
2533 }
2534
2535 min_objs_per_page = max_objs_per_page = pop[0]->num_objects;
2536 min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs;
2537 for (i=1; i < opts->page_count; i++)
2538 {
2539 int tmp;
2540 if (min_objs_per_page > pop[i]->num_objects)
2541 min_objs_per_page = pop[i]->num_objects;
2542 if (max_objs_per_page < pop[i]->num_objects)
2543 max_objs_per_page = pop[i]->num_objects;
2544 tmp = pop[i]->max_ofs - pop[i]->min_ofs;
2545 if (tmp < min_page_length)
2546 min_page_length = tmp;
2547 if (tmp > max_page_length)
2548 max_page_length = tmp;
2549 }
2550
2551 for (i=0; i < opts->page_count; i++)
2552 {
2553 int count = 0;
2554 page_objects *po = opts->page_object_lists->page[i];
2555 for (j = 0; j < po->len; j++)
2556 {
2557 if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1)
2558 count++;
2559 else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED)
2560 count++;
2561 }
2562 po->num_shared = count;
2563 if (i == 0 || count > max_shared_object_refs)
2564 max_shared_object_refs = count;
2565 }
2566 if (min_shared_object > max_shared_object)
2567 min_shared_object = max_shared_object = 0;
2568
2569 /* Table F.3 - Header */
2570 /* Header Item 1: Least number of objects in a page */
2571 fz_append_bits(ctx, buf, min_objs_per_page, 32);
2572 /* Header Item 2: Location of first pages page object */
2573 fz_append_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32);
2574 /* Header Item 3: Number of bits required to represent the difference
2575 * between the greatest and least number of objects in a page. */
2576 objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
2577 fz_append_bits(ctx, buf, objs_per_page_bits, 16);
2578 /* Header Item 4: Least length of a page. */
2579 fz_append_bits(ctx, buf, min_page_length, 32);
2580 /* Header Item 5: Number of bits needed to represent the difference
2581 * between the greatest and least length of a page. */
2582 page_len_bits = my_log2(max_page_length - min_page_length);
2583 fz_append_bits(ctx, buf, page_len_bits, 16);
2584 /* Header Item 6: Least offset to start of content stream (Acrobat
2585 * sets this to always be 0) */
2586 fz_append_bits(ctx, buf, 0, 32);
2587 /* Header Item 7: Number of bits needed to represent the difference
2588 * between the greatest and least offset to content stream (Acrobat
2589 * sets this to always be 0) */
2590 fz_append_bits(ctx, buf, 0, 16);
2591 /* Header Item 8: Least content stream length. (Acrobat
2592 * sets this to always be 0) */
2593 fz_append_bits(ctx, buf, 0, 32);
2594 /* Header Item 9: Number of bits needed to represent the difference
2595 * between the greatest and least content stream length (Acrobat
2596 * sets this to always be the same as item 5) */
2597 fz_append_bits(ctx, buf, page_len_bits, 16);
2598 /* Header Item 10: Number of bits needed to represent the greatest
2599 * number of shared object references. */
2600 shared_object_bits = my_log2(max_shared_object_refs);
2601 fz_append_bits(ctx, buf, shared_object_bits, 16);
2602 /* Header Item 11: Number of bits needed to represent the greatest
2603 * shared object identifier. */
2604 shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared);
2605 fz_append_bits(ctx, buf, shared_object_id_bits, 16);
2606 /* Header Item 12: Number of bits needed to represent the numerator
2607 * of the fractions. We always send 0. */
2608 fz_append_bits(ctx, buf, 0, 16);
2609 /* Header Item 13: Number of bits needed to represent the denominator
2610 * of the fractions. We always send 0. */
2611 fz_append_bits(ctx, buf, 0, 16);
2612
2613 /* Table F.4 - Page offset hint table (per page) */
2614 /* Item 1: A number that, when added to the least number of objects
2615 * on a page, gives the number of objects in the page. */
2616 for (i = 0; i < opts->page_count; i++)
2617 {
2618 fz_append_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
2619 }
2620 fz_append_bits_pad(ctx, buf);
2621 /* Item 2: A number that, when added to the least page length, gives
2622 * the length of the page in bytes. */
2623 for (i = 0; i < opts->page_count; i++)
2624 {
2625 fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2626 }
2627 fz_append_bits_pad(ctx, buf);
2628 /* Item 3: The number of shared objects referenced from the page. */
2629 for (i = 0; i < opts->page_count; i++)
2630 {
2631 fz_append_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
2632 }
2633 fz_append_bits_pad(ctx, buf);
2634 /* Item 4: Shared object id for each shared object ref in every page.
2635 * Spec says "not for page 1", but acrobat does send page 1's - all
2636 * as zeros. */
2637 for (i = 0; i < opts->page_count; i++)
2638 {
2639 for (j = 0; j < pop[i]->len; j++)
2640 {
2641 int o = pop[i]->object[j];
2642 if (i == 0 && opts->use_list[o] & USE_PAGE1)
2643 fz_append_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits);
2644 if (i != 0 && opts->use_list[o] & USE_SHARED)
2645 fz_append_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits);
2646 }
2647 }
2648 fz_append_bits_pad(ctx, buf);
2649 /* Item 5: Numerator of fractional position for each shared object reference. */
2650 /* We always send 0 in 0 bits */
2651 /* Item 6: A number that, when added to the least offset to the start
2652 * of the content stream (F.3 Item 6), gives the offset in bytes of
2653 * start of the pages content stream object relative to the beginning
2654 * of the page. Always 0 in 0 bits. */
2655 /* Item 7: A number that, when added to the least content stream length
2656 * (F.3 Item 8), gives the length of the pages content stream object.
2657 * Always == Item 2 as least content stream length = least page stream
2658 * length.
2659 */
2660 for (i = 0; i < opts->page_count; i++)
2661 {
2662 fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2663 }
2664
2665 /* Pad, and then do shared object hint table */
2666 fz_append_bits_pad(ctx, buf);
2667 opts->hints_shared_offset = (int)fz_buffer_storage(ctx, buf, NULL);
2668
2669 /* Table F.5: */
2670 /* Header Item 1: Object number of the first object in the shared
2671 * objects section. */
2672 fz_append_bits(ctx, buf, min_shared_object, 32);
2673 /* Header Item 2: Location of first object in the shared objects
2674 * section. */
2675 fz_append_bits(ctx, buf, opts->ofs_list[min_shared_object], 32);
2676 /* Header Item 3: The number of shared object entries for the first
2677 * page. */
2678 fz_append_bits(ctx, buf, pop[0]->num_shared, 32);
2679 /* Header Item 4: The number of shared object entries for the shared
2680 * objects section + first page. */
2681 fz_append_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32);
2682 /* Header Item 5: The number of bits needed to represent the greatest
2683 * number of objects in a shared object group (Always 0). */
2684 fz_append_bits(ctx, buf, 0, 16);
2685 /* Header Item 6: The least length of a shared object group in bytes. */
2686 fz_append_bits(ctx, buf, min_shared_length, 32);
2687 /* Header Item 7: The number of bits required to represent the
2688 * difference between the greatest and least length of a shared object
2689 * group. */
2690 shared_length_bits = my_log2(max_shared_length - min_shared_length);
2691 fz_append_bits(ctx, buf, shared_length_bits, 16);
2692
2693 /* Table F.6 */
2694 /* Item 1: Shared object group length (page 1 objects) */
2695 for (j = 0; j < pop[0]->len; j++)
2696 {
2697 int o = pop[0]->object[j];
2698 int64_t min, max;
2699 min = opts->ofs_list[o];
2700 if (o == opts->start-1)
2701 max = opts->main_xref_offset;
2702 else if (o < xref_len-1)
2703 max = opts->ofs_list[o+1];
2704 else
2705 max = opts->ofs_list[1];
2706 if (opts->use_list[o] & USE_PAGE1)
2707 fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2708 }
2709 /* Item 1: Shared object group length (shared objects) */
2710 for (i = min_shared_object; i <= max_shared_object; i++)
2711 {
2712 int min, max;
2713 min = opts->ofs_list[i];
2714 if (i == opts->start-1)
2715 max = opts->main_xref_offset;
2716 else if (i < xref_len-1)
2717 max = opts->ofs_list[i+1];
2718 else
2719 max = opts->ofs_list[1];
2720 fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2721 }
2722 fz_append_bits_pad(ctx, buf);
2723
2724 /* Item 2: MD5 presence flags */
2725 for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--)
2726 {
2727 fz_append_bits(ctx, buf, 0, 1);
2728 }
2729 fz_append_bits_pad(ctx, buf);
2730 /* Item 3: MD5 sums (not present) */
2731 fz_append_bits_pad(ctx, buf);
2732 /* Item 4: Number of objects in the group (not present) */
2733 }
2734
2735 static void
make_hint_stream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)2736 make_hint_stream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2737 {
2738 fz_buffer *buf;
2739 pdf_obj *obj = NULL;
2740
2741 fz_var(obj);
2742
2743 buf = fz_new_buffer(ctx, 100);
2744 fz_try(ctx)
2745 {
2746 make_page_offset_hints(ctx, doc, opts, buf);
2747 obj = pdf_load_object(ctx, doc, pdf_xref_len(ctx, doc)-1);
2748 pdf_update_stream(ctx, doc, obj, buf, 0);
2749 opts->hintstream_len = (int)fz_buffer_storage(ctx, buf, NULL);
2750 }
2751 fz_always(ctx)
2752 {
2753 pdf_drop_obj(ctx, obj);
2754 fz_drop_buffer(ctx, buf);
2755 }
2756 fz_catch(ctx)
2757 fz_rethrow(ctx);
2758 }
2759
2760 #ifdef DEBUG_WRITING
dump_object_details(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)2761 static void dump_object_details(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2762 {
2763 int i;
2764
2765 for (i = 0; i < pdf_xref_len(ctx, doc); i++)
2766 {
2767 fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
2768 }
2769 }
2770 #endif
2771
presize_unsaved_signature_byteranges(fz_context * ctx,pdf_document * doc)2772 static void presize_unsaved_signature_byteranges(fz_context *ctx, pdf_document *doc)
2773 {
2774 int s;
2775
2776 for (s = 0; s < doc->num_incremental_sections; s++)
2777 {
2778 pdf_xref *xref = &doc->xref_sections[s];
2779
2780 if (xref->unsaved_sigs)
2781 {
2782 /* The ByteRange objects of signatures are initially written out with
2783 * dummy values, and then overwritten later. We need to make sure their
2784 * initial form at least takes enough sufficient file space */
2785 pdf_unsaved_sig *usig;
2786 int n = 0;
2787
2788 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2789 n++;
2790
2791 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2792 {
2793 /* There will be segments of bytes at the beginning, at
2794 * the end and between each consecutive pair of signatures,
2795 * hence n + 1 */
2796 int i;
2797 pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2798
2799 for (i = 0; i < n+1; i++)
2800 {
2801 pdf_array_push_int(ctx, byte_range, INT_MAX);
2802 pdf_array_push_int(ctx, byte_range, INT_MAX);
2803 }
2804 }
2805 }
2806 }
2807 }
2808
complete_signatures(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)2809 static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2810 {
2811 char *buf = NULL, *ptr;
2812 int s;
2813 fz_stream *stm = NULL;
2814 fz_var(stm);
2815 fz_var(buf);
2816
2817 fz_try(ctx)
2818 {
2819 for (s = 0; s < doc->num_incremental_sections; s++)
2820 {
2821 pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
2822
2823 if (xref->unsaved_sigs)
2824 {
2825 pdf_unsaved_sig *usig;
2826 pdf_obj *byte_range;
2827 size_t buf_size = 0;
2828 size_t i;
2829 size_t last_end;
2830
2831 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2832 {
2833 size_t size = usig->signer->max_digest_size(ctx, usig->signer);
2834
2835 buf_size = fz_maxz(buf_size, size);
2836 }
2837
2838 buf_size = buf_size * 2 + SIG_EXTRAS_SIZE;
2839
2840 buf = fz_calloc(ctx, buf_size, 1);
2841
2842 stm = fz_stream_from_output(ctx, opts->out);
2843 /* Locate the byte ranges and contents in the saved file */
2844 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2845 {
2846 char *bstr, *cstr, *fstr;
2847 size_t bytes_read;
2848 int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL));
2849 fz_seek(ctx, stm, opts->ofs_list[pnum], SEEK_SET);
2850 /* SIG_EXTRAS_SIZE is an arbitrary value and its addition above to buf_size
2851 * could cause an attempt to read off the end of the file. That's not an
2852 * error, but we need to keep track of how many bytes are read and search
2853 * for markers only in defined data */
2854 bytes_read = fz_read(ctx, stm, (unsigned char *)buf, buf_size);
2855 assert(bytes_read <= buf_size);
2856
2857 bstr = fz_memmem(buf, bytes_read, SLASH_BYTE_RANGE, sizeof(SLASH_BYTE_RANGE)-1);
2858 cstr = fz_memmem(buf, bytes_read, SLASH_CONTENTS, sizeof(SLASH_CONTENTS)-1);
2859 fstr = fz_memmem(buf, bytes_read, SLASH_FILTER, sizeof(SLASH_FILTER)-1);
2860
2861 if (!(bstr && cstr && fstr && bstr < cstr && cstr < fstr))
2862 fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to determine byte ranges while writing signature");
2863
2864 usig->byte_range_start = bstr - buf + sizeof(SLASH_BYTE_RANGE)-1 + opts->ofs_list[pnum];
2865 usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
2866 usig->contents_start = cstr - buf + sizeof(SLASH_CONTENTS)-1 + opts->ofs_list[pnum];
2867 usig->contents_end = fstr - buf + opts->ofs_list[pnum];
2868 }
2869
2870 fz_drop_stream(ctx, stm);
2871 stm = NULL;
2872
2873 /* Recreate ByteRange with correct values. Initially store the
2874 * recreated object in the first of the unsaved signatures */
2875 byte_range = pdf_new_array(ctx, doc, 4);
2876 pdf_dict_putl_drop(ctx, xref->unsaved_sigs->field, byte_range, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2877
2878 last_end = 0;
2879 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2880 {
2881 pdf_array_push_int(ctx, byte_range, last_end);
2882 pdf_array_push_int(ctx, byte_range, usig->contents_start - last_end);
2883 last_end = usig->contents_end;
2884 }
2885 pdf_array_push_int(ctx, byte_range, last_end);
2886 pdf_array_push_int(ctx, byte_range, xref->end_ofs - last_end);
2887
2888 /* Copy the new ByteRange to the other unsaved signatures */
2889 for (usig = xref->unsaved_sigs->next; usig; usig = usig->next)
2890 pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2891
2892 /* Write the byte range into buf, padding with spaces*/
2893 ptr = pdf_sprint_obj(ctx, buf, buf_size, &i, byte_range, 1, 0);
2894 if (ptr != buf) /* should never happen, since data should fit in buf_size */
2895 fz_free(ctx, ptr);
2896 memset(buf+i, ' ', buf_size-i);
2897
2898 /* Write the byte range to the file */
2899 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2900 {
2901 fz_seek_output(ctx, opts->out, usig->byte_range_start, SEEK_SET);
2902 fz_write_data(ctx, opts->out, buf, usig->byte_range_end - usig->byte_range_start);
2903 }
2904
2905 /* Write the digests into the file */
2906 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2907 pdf_write_digest(ctx, opts->out, byte_range, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
2908
2909 /* delete the unsaved_sigs records */
2910 while ((usig = xref->unsaved_sigs) != NULL)
2911 {
2912 xref->unsaved_sigs = usig->next;
2913 pdf_drop_obj(ctx, usig->field);
2914 pdf_drop_signer(ctx, usig->signer);
2915 fz_free(ctx, usig);
2916 }
2917
2918 xref->unsaved_sigs_end = NULL;
2919
2920 fz_free(ctx, buf);
2921 buf = NULL;
2922 }
2923 }
2924 }
2925 fz_catch(ctx)
2926 {
2927 fz_drop_stream(ctx, stm);
2928 fz_free(ctx, buf);
2929 fz_rethrow(ctx);
2930 }
2931 }
2932
clean_content_streams(fz_context * ctx,pdf_document * doc,int sanitize,int ascii)2933 static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii)
2934 {
2935 int n = pdf_count_pages(ctx, doc);
2936 int i;
2937
2938 pdf_filter_options filter;
2939 memset(&filter, 0, sizeof filter);
2940 filter.recurse = 1;
2941 filter.sanitize = sanitize;
2942 filter.ascii = ascii;
2943
2944 for (i = 0; i < n; i++)
2945 {
2946 pdf_annot *annot;
2947 pdf_page *page = pdf_load_page(ctx, doc, i);
2948
2949 fz_try(ctx)
2950 {
2951 pdf_filter_page_contents(ctx, doc, page, &filter);
2952 for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
2953 {
2954 pdf_filter_annot_contents(ctx, doc, annot, &filter);
2955 }
2956 }
2957 fz_always(ctx)
2958 fz_drop_page(ctx, &page->super);
2959 fz_catch(ctx)
2960 fz_rethrow(ctx);
2961 }
2962 }
2963
2964 /* Initialise the pdf_write_state, used dynamically during the write, from the static
2965 * pdf_write_options, passed into pdf_save_document */
initialise_write_state(fz_context * ctx,pdf_document * doc,const pdf_write_options * in_opts,pdf_write_state * opts)2966 static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts, pdf_write_state *opts)
2967 {
2968 int xref_len = pdf_xref_len(ctx, doc);
2969
2970 opts->do_incremental = in_opts->do_incremental;
2971 opts->do_ascii = in_opts->do_ascii;
2972 opts->do_tight = !in_opts->do_pretty;
2973 opts->do_expand = in_opts->do_decompress;
2974 opts->do_compress = in_opts->do_compress;
2975 opts->do_compress_images = in_opts->do_compress_images;
2976 opts->do_compress_fonts = in_opts->do_compress_fonts;
2977
2978 opts->do_garbage = in_opts->do_garbage;
2979 opts->do_linear = in_opts->do_linear;
2980 opts->do_clean = in_opts->do_clean;
2981 opts->do_encrypt = in_opts->do_encrypt;
2982 opts->start = 0;
2983 opts->main_xref_offset = INT_MIN;
2984
2985 opts->permissions = in_opts->permissions;
2986 memcpy(opts->opwd_utf8, in_opts->opwd_utf8, nelem(opts->opwd_utf8));
2987 memcpy(opts->upwd_utf8, in_opts->upwd_utf8, nelem(opts->upwd_utf8));
2988
2989 /* We deliberately make these arrays long enough to cope with
2990 * 1 to n access rather than 0..n-1, and add space for 2 new
2991 * extra entries that may be required for linearization. */
2992 opts->list_len = 0;
2993 opts->use_list = NULL;
2994 opts->ofs_list = NULL;
2995 opts->gen_list = NULL;
2996 opts->renumber_map = NULL;
2997 opts->rev_renumber_map = NULL;
2998
2999 expand_lists(ctx, opts, xref_len);
3000 }
3001
3002 /* Free the resources held by the dynamic write options */
finalise_write_state(fz_context * ctx,pdf_write_state * opts)3003 static void finalise_write_state(fz_context *ctx, pdf_write_state *opts)
3004 {
3005 fz_free(ctx, opts->use_list);
3006 fz_free(ctx, opts->ofs_list);
3007 fz_free(ctx, opts->gen_list);
3008 fz_free(ctx, opts->renumber_map);
3009 fz_free(ctx, opts->rev_renumber_map);
3010 pdf_drop_obj(ctx, opts->linear_l);
3011 pdf_drop_obj(ctx, opts->linear_h0);
3012 pdf_drop_obj(ctx, opts->linear_h1);
3013 pdf_drop_obj(ctx, opts->linear_o);
3014 pdf_drop_obj(ctx, opts->linear_e);
3015 pdf_drop_obj(ctx, opts->linear_n);
3016 pdf_drop_obj(ctx, opts->linear_t);
3017 pdf_drop_obj(ctx, opts->hints_s);
3018 pdf_drop_obj(ctx, opts->hints_length);
3019 page_objects_list_destroy(ctx, opts->page_object_lists);
3020 }
3021
3022 const pdf_write_options pdf_default_write_options = {
3023 0, /* do_incremental */
3024 0, /* do_pretty */
3025 0, /* do_ascii */
3026 0, /* do_compress */
3027 0, /* do_compress_images */
3028 0, /* do_compress_fonts */
3029 0, /* do_decompress */
3030 0, /* do_garbage */
3031 0, /* do_linear */
3032 0, /* do_clean */
3033 0, /* do_sanitize */
3034 0, /* do_appearance */
3035 0, /* do_encrypt */
3036 ~0, /* permissions */
3037 "", /* opwd_utf8[128] */
3038 "", /* upwd_utf8[128] */
3039 };
3040
3041 const char *fz_pdf_write_options_usage =
3042 "PDF output options:\n"
3043 "\tdecompress: decompress all streams (except compress-fonts/images)\n"
3044 "\tcompress: compress all streams\n"
3045 "\tcompress-fonts: compress embedded fonts\n"
3046 "\tcompress-images: compress images\n"
3047 "\tascii: ASCII hex encode binary streams\n"
3048 "\tpretty: pretty-print objects with indentation\n"
3049 "\tlinearize: optimize for web browsers\n"
3050 "\tclean: pretty-print graphics commands in content streams\n"
3051 "\tsanitize: sanitize graphics commands in content streams\n"
3052 "\tgarbage: garbage collect unused objects\n"
3053 "\tincremental: write changes as incremental update\n"
3054 "\tcontinue-on-error: continue saving the document even if there is an error\n"
3055 "\tor garbage=compact: ... and compact cross reference table\n"
3056 "\tor garbage=deduplicate: ... and remove duplicate objects\n"
3057 "\tdecrypt: write unencrypted document\n"
3058 "\tencrypt=rc4-40|rc4-128|aes-128|aes-256: write encrypted document\n"
3059 "\tpermissions=NUMBER: document permissions to grant when encrypting\n"
3060 "\tuser-password=PASSWORD: password required to read document\n"
3061 "\towner-password=PASSWORD: password required to edit document\n"
3062 "\n";
3063
3064 pdf_write_options *
pdf_parse_write_options(fz_context * ctx,pdf_write_options * opts,const char * args)3065 pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
3066 {
3067 const char *val;
3068
3069 memset(opts, 0, sizeof *opts);
3070
3071 if (fz_has_option(ctx, args, "decompress", &val))
3072 opts->do_decompress = fz_option_eq(val, "yes");
3073 if (fz_has_option(ctx, args, "compress", &val))
3074 opts->do_compress = fz_option_eq(val, "yes");
3075 if (fz_has_option(ctx, args, "compress-fonts", &val))
3076 opts->do_compress_fonts = fz_option_eq(val, "yes");
3077 if (fz_has_option(ctx, args, "compress-images", &val))
3078 opts->do_compress_images = fz_option_eq(val, "yes");
3079 if (fz_has_option(ctx, args, "ascii", &val))
3080 opts->do_ascii = fz_option_eq(val, "yes");
3081 if (fz_has_option(ctx, args, "pretty", &val))
3082 opts->do_pretty = fz_option_eq(val, "yes");
3083 if (fz_has_option(ctx, args, "linearize", &val))
3084 opts->do_linear = fz_option_eq(val, "yes");
3085 if (fz_has_option(ctx, args, "clean", &val))
3086 opts->do_clean = fz_option_eq(val, "yes");
3087 if (fz_has_option(ctx, args, "sanitize", &val))
3088 opts->do_sanitize = fz_option_eq(val, "yes");
3089 if (fz_has_option(ctx, args, "incremental", &val))
3090 opts->do_incremental = fz_option_eq(val, "yes");
3091 if (fz_has_option(ctx, args, "decrypt", &val))
3092 opts->do_encrypt = fz_option_eq(val, "yes") ? PDF_ENCRYPT_NONE : PDF_ENCRYPT_KEEP;
3093 if (fz_has_option(ctx, args, "encrypt", &val))
3094 {
3095 opts->do_encrypt = PDF_ENCRYPT_UNKNOWN;
3096 if (fz_option_eq(val, "none") || fz_option_eq(val, "no"))
3097 opts->do_encrypt = PDF_ENCRYPT_NONE;
3098 if (fz_option_eq(val, "keep"))
3099 opts->do_encrypt = PDF_ENCRYPT_KEEP;
3100 if (fz_option_eq(val, "rc4-40") || fz_option_eq(val, "yes"))
3101 opts->do_encrypt = PDF_ENCRYPT_RC4_40;
3102 if (fz_option_eq(val, "rc4-128"))
3103 opts->do_encrypt = PDF_ENCRYPT_RC4_128;
3104 if (fz_option_eq(val, "aes-128"))
3105 opts->do_encrypt = PDF_ENCRYPT_AES_128;
3106 if (fz_option_eq(val, "aes-256"))
3107 opts->do_encrypt = PDF_ENCRYPT_AES_256;
3108 }
3109 if (fz_has_option(ctx, args, "owner-password", &val))
3110 fz_copy_option(ctx, val, opts->opwd_utf8, nelem(opts->opwd_utf8));
3111 if (fz_has_option(ctx, args, "user-password", &val))
3112 fz_copy_option(ctx, val, opts->upwd_utf8, nelem(opts->upwd_utf8));
3113 if (fz_has_option(ctx, args, "permissions", &val))
3114 opts->permissions = fz_atoi(val);
3115 else
3116 opts->permissions = ~0;
3117 if (fz_has_option(ctx, args, "garbage", &val))
3118 {
3119 if (fz_option_eq(val, "yes"))
3120 opts->do_garbage = 1;
3121 else if (fz_option_eq(val, "compact"))
3122 opts->do_garbage = 2;
3123 else if (fz_option_eq(val, "deduplicate"))
3124 opts->do_garbage = 3;
3125 else
3126 opts->do_garbage = fz_atoi(val);
3127 }
3128 if (fz_has_option(ctx, args, "appearance", &val))
3129 {
3130 if (fz_option_eq(val, "yes"))
3131 opts->do_appearance = 1;
3132 else if (fz_option_eq(val, "all"))
3133 opts->do_appearance = 2;
3134 }
3135
3136 return opts;
3137 }
3138
pdf_can_be_saved_incrementally(fz_context * ctx,pdf_document * doc)3139 int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc)
3140 {
3141 if (doc->repair_attempted)
3142 return 0;
3143 if (doc->redacted)
3144 return 0;
3145 if (doc->has_xref_streams && doc->has_old_style_xrefs)
3146 return 0;
3147 return 1;
3148 }
3149
3150 static void
prepare_for_save(fz_context * ctx,pdf_document * doc,pdf_write_options * in_opts)3151 prepare_for_save(fz_context *ctx, pdf_document *doc, pdf_write_options *in_opts)
3152 {
3153 /* Rewrite (and possibly sanitize) the operator streams */
3154 if (in_opts->do_clean || in_opts->do_sanitize)
3155 clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii);
3156
3157 /* When saving a PDF with signatures the file will
3158 first be written once, then the file will have its
3159 digests and byte ranges calculated and and then the
3160 signature dictionary containing them will be updated
3161 both in memory and in the saved file. By setting this
3162 flag we avoid a new xref section from being created when
3163 the signature dictionary is updated. */
3164 doc->save_in_progress = 1;
3165
3166 presize_unsaved_signature_byteranges(ctx, doc);
3167 }
3168
3169 static pdf_obj *
new_identity(fz_context * ctx,pdf_document * doc)3170 new_identity(fz_context *ctx, pdf_document *doc)
3171 {
3172 unsigned char rnd[32];
3173 pdf_obj *id;
3174
3175 fz_memrnd(ctx, rnd, nelem(rnd));
3176
3177 id = pdf_dict_put_array(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), 2);
3178 pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 0, nelem(rnd) / 2));
3179 pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 16, nelem(rnd) / 2));
3180
3181 return id;
3182 }
3183
3184 static void
change_identity(fz_context * ctx,pdf_document * doc,pdf_obj * id)3185 change_identity(fz_context *ctx, pdf_document *doc, pdf_obj *id)
3186 {
3187 unsigned char rnd[16];
3188 if (pdf_array_len(ctx, id) >= 2)
3189 {
3190 /* Update second half of ID array with new random data. */
3191 fz_memrnd(ctx, rnd, 16);
3192 pdf_array_put_drop(ctx, id, 1, pdf_new_string(ctx, (char *)rnd, 16));
3193 }
3194 }
3195
3196 static void
create_encryption_dictionary(fz_context * ctx,pdf_document * doc,pdf_crypt * crypt)3197 create_encryption_dictionary(fz_context *ctx, pdf_document *doc, pdf_crypt *crypt)
3198 {
3199 unsigned char *o, *u;
3200 pdf_obj *encrypt;
3201 int r;
3202
3203 r = pdf_crypt_revision(ctx, crypt);
3204
3205 encrypt = pdf_dict_put_dict(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), 10);
3206
3207 pdf_dict_put_name(ctx, encrypt, PDF_NAME(Filter), "Standard");
3208 pdf_dict_put_int(ctx, encrypt, PDF_NAME(R), r);
3209 pdf_dict_put_int(ctx, encrypt, PDF_NAME(V), pdf_crypt_version(ctx, crypt));
3210 pdf_dict_put_int(ctx, encrypt, PDF_NAME(Length), pdf_crypt_length(ctx, crypt));
3211 pdf_dict_put_int(ctx, encrypt, PDF_NAME(P), pdf_crypt_permissions(ctx, crypt));
3212 pdf_dict_put_bool(ctx, encrypt, PDF_NAME(EncryptMetadata), pdf_crypt_encrypt_metadata(ctx, crypt));
3213
3214 o = pdf_crypt_owner_password(ctx, crypt);
3215 u = pdf_crypt_user_password(ctx, crypt);
3216
3217 if (r < 4)
3218 {
3219 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3220 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3221 }
3222 else if (r == 4)
3223 {
3224 pdf_obj *cf;
3225
3226 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3227 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3228
3229 cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3230 cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3231 pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3232 pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV2");
3233 pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 16);
3234 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3235 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3236 }
3237 else if (r == 6)
3238 {
3239 unsigned char *oe = pdf_crypt_owner_encryption(ctx, crypt);
3240 unsigned char *ue = pdf_crypt_user_encryption(ctx, crypt);
3241 pdf_obj *cf;
3242
3243 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3244 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3245
3246 cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3247 cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3248 pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3249 pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV3");
3250 pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 32);
3251 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 48);
3252 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 48);
3253 pdf_dict_put_string(ctx, encrypt, PDF_NAME(OE), (char *) oe, 32);
3254 pdf_dict_put_string(ctx, encrypt, PDF_NAME(UE), (char *) ue, 32);
3255 pdf_dict_put_string(ctx, encrypt, PDF_NAME(Perms), (char *) pdf_crypt_permissions_encryption(ctx, crypt), 16);
3256 }
3257 }
3258
3259 static void
ensure_initial_incremental_contents(fz_context * ctx,fz_stream * in,fz_output * out)3260 ensure_initial_incremental_contents(fz_context *ctx, fz_stream *in, fz_output *out)
3261 {
3262 fz_stream *verify;
3263 unsigned char buf0[256];
3264 unsigned char buf1[256];
3265 size_t n0, n1;
3266 int64_t off = 0;
3267 int same;
3268
3269 if (!in)
3270 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't copy contents for incremental write");
3271
3272 verify = fz_stream_from_output(ctx, out);
3273 if (!verify)
3274 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't incrementally write pdf to this type of output");
3275
3276 fz_var(verify);
3277
3278 fz_try(ctx)
3279 {
3280 do
3281 {
3282 fz_seek(ctx, in, off, SEEK_SET);
3283 n0 = fz_read(ctx, in, buf0, sizeof(buf0));
3284 fz_seek(ctx, verify, off, SEEK_SET);
3285 n1 = fz_read(ctx, verify, buf1, sizeof(buf1));
3286 same = (n0 == n1 && !memcmp(buf0, buf1, n0));
3287 off += n0;
3288 }
3289 while (same && n0 > 0);
3290
3291 if (same)
3292 break;
3293
3294 fz_drop_stream(ctx, verify);
3295 verify = NULL;
3296
3297 /* Copy old contents into new file */
3298 fz_seek(ctx, in, 0, SEEK_SET);
3299 fz_seek_output(ctx, out, 0, SEEK_SET);
3300 do
3301 {
3302 n0 = fz_read(ctx, in, buf0, sizeof(buf0));
3303 if (n0)
3304 fz_write_data(ctx, out, buf0, n0);
3305 }
3306 while (n0);
3307 fz_truncate_output(ctx, out);
3308 }
3309 fz_always(ctx)
3310 fz_drop_stream(ctx, verify);
3311 fz_catch(ctx)
3312 fz_rethrow(ctx);
3313 }
3314
3315 static void
do_pdf_save_document(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_write_options * in_opts)3316 do_pdf_save_document(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_write_options *in_opts)
3317 {
3318 int lastfree;
3319 int num;
3320 int xref_len;
3321 pdf_obj *id, *id1;
3322
3323 if (in_opts->do_incremental)
3324 {
3325 /* If no changes, nothing to write */
3326 if (doc->num_incremental_sections == 0)
3327 {
3328 doc->save_in_progress = 0;
3329 return;
3330 }
3331
3332 ensure_initial_incremental_contents(ctx, doc->file, opts->out);
3333
3334 fz_seek_output(ctx, opts->out, 0, SEEK_END);
3335 fz_write_string(ctx, opts->out, "\n");
3336 }
3337
3338 xref_len = pdf_xref_len(ctx, doc);
3339
3340 fz_try(ctx)
3341 {
3342 initialise_write_state(ctx, doc, in_opts, opts);
3343
3344 /* Update second half of ID array if it exists. */
3345 id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
3346 if (id)
3347 change_identity(ctx, doc, id);
3348
3349 /* Remove encryption dictionary if saving without encryption. */
3350 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
3351 {
3352 pdf_dict_del(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3353 }
3354
3355 /* Keep encryption dictionary if saving with old encryption. */
3356 else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
3357 {
3358 opts->crypt = doc->crypt;
3359 }
3360
3361 /* Create encryption dictionary if saving with new encryption. */
3362 else
3363 {
3364 if (!id)
3365 id = new_identity(ctx, doc);
3366 id1 = pdf_array_get(ctx, id, 0);
3367 opts->crypt = pdf_new_encrypt(ctx, opts->opwd_utf8, opts->upwd_utf8, id1, opts->permissions, opts->do_encrypt);
3368 create_encryption_dictionary(ctx, doc, opts->crypt);
3369 }
3370
3371 /* Make sure any objects hidden in compressed streams have been loaded */
3372 if (!opts->do_incremental)
3373 {
3374 pdf_ensure_solid_xref(ctx, doc, xref_len);
3375 preloadobjstms(ctx, doc);
3376 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3377 expand_lists(ctx, opts, xref_len);
3378 }
3379
3380 /* Sweep & mark objects from the trailer */
3381 if (opts->do_garbage >= 1 || opts->do_linear)
3382 (void)markobj(ctx, doc, opts, pdf_trailer(ctx, doc));
3383 else
3384 {
3385 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3386 expand_lists(ctx, opts, xref_len);
3387 for (num = 0; num < xref_len; num++)
3388 opts->use_list[num] = 1;
3389 }
3390
3391 /* Coalesce and renumber duplicate objects */
3392 if (opts->do_garbage >= 3)
3393 removeduplicateobjs(ctx, doc, opts);
3394
3395 /* Compact xref by renumbering and removing unused objects */
3396 if (opts->do_garbage >= 2 || opts->do_linear)
3397 compactxref(ctx, doc, opts);
3398
3399 opts->crypt_object_number = 0;
3400 if (opts->crypt)
3401 {
3402 pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3403 int crypt_num = pdf_to_num(ctx, crypt);
3404 opts->crypt_object_number = opts->renumber_map[crypt_num];
3405 }
3406
3407 /* Make renumbering affect all indirect references and update xref */
3408 if (opts->do_garbage >= 2 || opts->do_linear)
3409 renumberobjs(ctx, doc, opts);
3410
3411 /* Truncate the xref after compacting and renumbering */
3412 if ((opts->do_garbage >= 2 || opts->do_linear) && !opts->do_incremental)
3413 {
3414 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3415 expand_lists(ctx, opts, xref_len);
3416 while (xref_len > 0 && !opts->use_list[xref_len-1])
3417 xref_len--;
3418 }
3419
3420 if (opts->do_linear)
3421 linearize(ctx, doc, opts);
3422
3423 if (opts->do_incremental)
3424 {
3425 int i;
3426
3427 doc->disallow_new_increments = 1;
3428
3429 for (i = 0; i < doc->num_incremental_sections; i++)
3430 {
3431 doc->xref_base = doc->num_incremental_sections - i - 1;
3432
3433 writeobjects(ctx, doc, opts, 0);
3434
3435 #ifdef DEBUG_WRITING
3436 dump_object_details(ctx, doc, opts);
3437 #endif
3438
3439 for (num = 0; num < xref_len; num++)
3440 {
3441 if (!opts->use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
3442 {
3443 /* Make unreusable. FIXME: would be better to link to existing free list */
3444 opts->gen_list[num] = 65535;
3445 opts->ofs_list[num] = 0;
3446 }
3447 }
3448
3449 opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3450 if (doc->has_xref_streams)
3451 writexrefstream(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3452 else
3453 writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3454
3455 doc->xref_sections[doc->xref_base].end_ofs = fz_tell_output(ctx, opts->out);
3456 }
3457
3458 doc->xref_base = 0;
3459 doc->disallow_new_increments = 0;
3460 }
3461 else
3462 {
3463 writeobjects(ctx, doc, opts, 0);
3464
3465 #ifdef DEBUG_WRITING
3466 dump_object_details(ctx, doc, opts);
3467 #endif
3468
3469 /* Construct linked list of free object slots */
3470 lastfree = 0;
3471 for (num = 0; num < xref_len; num++)
3472 {
3473 if (!opts->use_list[num])
3474 {
3475 opts->gen_list[num]++;
3476 opts->ofs_list[lastfree] = num;
3477 lastfree = num;
3478 }
3479 }
3480
3481 if (opts->do_linear && opts->page_count > 0)
3482 {
3483 opts->main_xref_offset = fz_tell_output(ctx, opts->out);
3484 writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3485 opts->file_len = fz_tell_output(ctx, opts->out);
3486
3487 make_hint_stream(ctx, doc, opts);
3488 if (opts->do_ascii)
3489 {
3490 opts->hintstream_len *= 2;
3491 opts->hintstream_len += 1 + ((opts->hintstream_len+63)>>6);
3492 }
3493 opts->file_len += opts->hintstream_len;
3494 opts->main_xref_offset += opts->hintstream_len;
3495 update_linearization_params(ctx, doc, opts);
3496 fz_seek_output(ctx, opts->out, 0, 0);
3497 writeobjects(ctx, doc, opts, 1);
3498
3499 padto(ctx, opts->out, opts->main_xref_offset);
3500 writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3501 }
3502 else
3503 {
3504 opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3505 writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3506 }
3507
3508 doc->xref_sections[0].end_ofs = fz_tell_output(ctx, opts->out);
3509 }
3510
3511 complete_signatures(ctx, doc, opts);
3512
3513 doc->dirty = 0;
3514 }
3515 fz_always(ctx)
3516 {
3517 #ifdef DEBUG_LINEARIZATION
3518 page_objects_dump(opts);
3519 objects_dump(ctx, doc, opts);
3520 #endif
3521 finalise_write_state(ctx, opts);
3522 if (opts->crypt != doc->crypt)
3523 pdf_drop_crypt(ctx, opts->crypt);
3524 doc->save_in_progress = 0;
3525 }
3526 fz_catch(ctx)
3527 {
3528 fz_rethrow(ctx);
3529 }
3530 }
3531
pdf_has_unsaved_sigs(fz_context * ctx,pdf_document * doc)3532 int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc)
3533 {
3534 int s;
3535 for (s = 0; s < doc->num_incremental_sections; s++)
3536 {
3537 pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
3538
3539 if (xref->unsaved_sigs)
3540 return 1;
3541 }
3542 return 0;
3543 }
3544
pdf_write_document(fz_context * ctx,pdf_document * doc,fz_output * out,pdf_write_options * in_opts)3545 void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, pdf_write_options *in_opts)
3546 {
3547 pdf_write_options opts_defaults = pdf_default_write_options;
3548 pdf_write_state opts = { 0 };
3549
3550 if (!doc || !out)
3551 return;
3552
3553 if (!in_opts)
3554 in_opts = &opts_defaults;
3555
3556 if (in_opts->do_incremental && doc->repair_attempted)
3557 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3558 if (in_opts->do_incremental && in_opts->do_garbage)
3559 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3560 if (in_opts->do_incremental && in_opts->do_linear)
3561 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3562 if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3563 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3564 if (pdf_has_unsaved_sigs(ctx, doc) && !fz_output_supports_stream(ctx, out))
3565 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't write pdf that has unsaved sigs to a fz_output unless it supports fz_stream_from_output!");
3566
3567 prepare_for_save(ctx, doc, in_opts);
3568
3569 opts.out = out;
3570
3571 do_pdf_save_document(ctx, doc, &opts, in_opts);
3572 }
3573
pdf_save_document(fz_context * ctx,pdf_document * doc,const char * filename,pdf_write_options * in_opts)3574 void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *in_opts)
3575 {
3576 pdf_write_options opts_defaults = pdf_default_write_options;
3577 pdf_write_state opts = { 0 };
3578
3579 if (!doc)
3580 return;
3581
3582 if (!in_opts)
3583 in_opts = &opts_defaults;
3584
3585 if (in_opts->do_incremental && !doc->file)
3586 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a new document");
3587 if (in_opts->do_incremental && doc->repair_attempted)
3588 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3589 if (in_opts->do_incremental && in_opts->do_garbage)
3590 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3591 if (in_opts->do_incremental && in_opts->do_linear)
3592 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3593 if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3594 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3595
3596 if (in_opts->do_appearance > 0)
3597 {
3598 int i, n = pdf_count_pages(ctx, doc);
3599 for (i = 0; i < n; ++i)
3600 {
3601 pdf_page *page = pdf_load_page(ctx, doc, i);
3602 fz_try(ctx)
3603 {
3604 if (in_opts->do_appearance > 1)
3605 {
3606 pdf_annot *annot;
3607 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
3608 annot->needs_new_ap = 1;
3609 for (annot = pdf_first_widget(ctx, page); annot; annot = pdf_next_widget(ctx, annot))
3610 annot->needs_new_ap = 1;
3611 }
3612 pdf_update_page(ctx, page);
3613 }
3614 fz_always(ctx)
3615 fz_drop_page(ctx, &page->super);
3616 fz_catch(ctx)
3617 fz_warn(ctx, "could not create annotation appearances");
3618 }
3619 }
3620
3621 prepare_for_save(ctx, doc, in_opts);
3622
3623 if (in_opts->do_incremental)
3624 {
3625 /* If no changes, nothing to write */
3626 if (doc->num_incremental_sections == 0)
3627 {
3628 doc->save_in_progress = 0;
3629 return;
3630 }
3631 opts.out = fz_new_output_with_path(ctx, filename, 1);
3632 }
3633 else
3634 {
3635 opts.out = fz_new_output_with_path(ctx, filename, 0);
3636 }
3637 fz_try(ctx)
3638 {
3639 do_pdf_save_document(ctx, doc, &opts, in_opts);
3640 fz_close_output(ctx, opts.out);
3641 }
3642 fz_always(ctx)
3643 {
3644 fz_drop_output(ctx, opts.out);
3645 opts.out = NULL;
3646 }
3647 fz_catch(ctx)
3648 {
3649 fz_rethrow(ctx);
3650 }
3651 }
3652
3653 char *
pdf_format_write_options(fz_context * ctx,char * buffer,size_t buffer_len,const pdf_write_options * opts)3654 pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts)
3655 {
3656 #define ADD_OPT(S) do { if (!first) fz_strlcat(buffer, ",", buffer_len); fz_strlcat(buffer, (S), buffer_len); first = 0; } while (0)
3657
3658 int first = 1;
3659 *buffer = 0;
3660 if (opts->do_decompress)
3661 ADD_OPT("decompress=yes");
3662 if (opts->do_compress)
3663 ADD_OPT("compress=yes");
3664 if (opts->do_compress_fonts)
3665 ADD_OPT("compress-fonts=yes");
3666 if (opts->do_compress_images)
3667 ADD_OPT("compress-images=yes");
3668 if (opts->do_ascii)
3669 ADD_OPT("ascii=yes");
3670 if (opts->do_pretty)
3671 ADD_OPT("pretty=yes");
3672 if (opts->do_linear)
3673 ADD_OPT("linearize=yes");
3674 if (opts->do_clean)
3675 ADD_OPT("clean=yes");
3676 if (opts->do_sanitize)
3677 ADD_OPT("sanitize=yes");
3678 if (opts->do_incremental)
3679 ADD_OPT("incremental=yes");
3680 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
3681 ADD_OPT("decrypt=yes");
3682 else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
3683 ADD_OPT("decrypt=no");
3684 switch(opts->do_encrypt)
3685 {
3686 default:
3687 case PDF_ENCRYPT_UNKNOWN:
3688 break;
3689 case PDF_ENCRYPT_NONE:
3690 ADD_OPT("encrypt=no");
3691 break;
3692 case PDF_ENCRYPT_KEEP:
3693 ADD_OPT("encrypt=keep");
3694 break;
3695 case PDF_ENCRYPT_RC4_40:
3696 ADD_OPT("encrypt=rc4-40");
3697 break;
3698 case PDF_ENCRYPT_RC4_128:
3699 ADD_OPT("encrypt=rc4-128");
3700 break;
3701 case PDF_ENCRYPT_AES_128:
3702 ADD_OPT("encrypt=aes-128");
3703 break;
3704 case PDF_ENCRYPT_AES_256:
3705 ADD_OPT("encrypt=aes-256");
3706 break;
3707 }
3708 if (strlen(opts->opwd_utf8)) {
3709 ADD_OPT("owner-password=");
3710 fz_strlcat(buffer, opts->opwd_utf8, buffer_len);
3711 }
3712 if (strlen(opts->upwd_utf8)) {
3713 ADD_OPT("user-password=");
3714 fz_strlcat(buffer, opts->upwd_utf8, buffer_len);
3715 }
3716 {
3717 char temp[32];
3718 ADD_OPT("permissions=");
3719 fz_snprintf(temp, sizeof(temp), "%d", opts->permissions);
3720 fz_strlcat(buffer, temp, buffer_len);
3721 }
3722 switch(opts->do_garbage)
3723 {
3724 case 0:
3725 break;
3726 case 1:
3727 ADD_OPT("garbage=yes");
3728 break;
3729 case 2:
3730 ADD_OPT("garbage=compact");
3731 break;
3732 case 3:
3733 ADD_OPT("garbage=deduplicate");
3734 break;
3735 default:
3736 {
3737 char temp[32];
3738 fz_snprintf(temp, sizeof(temp), "%d", opts->do_garbage);
3739 ADD_OPT("garbage=");
3740 fz_strlcat(buffer, temp, buffer_len);
3741 break;
3742 }
3743 }
3744 switch(opts->do_appearance)
3745 {
3746 case 1:
3747 ADD_OPT("appearance=yes");
3748 break;
3749 case 2:
3750 ADD_OPT("appearance=all");
3751 break;
3752 }
3753
3754 #undef ADD_OPT
3755
3756 return buffer;
3757 }
3758
3759 typedef struct
3760 {
3761 fz_document_writer super;
3762 pdf_document *pdf;
3763 pdf_write_options opts;
3764 fz_output *out;
3765
3766 fz_rect mediabox;
3767 pdf_obj *resources;
3768 fz_buffer *contents;
3769 } pdf_writer;
3770
3771 static fz_device *
pdf_writer_begin_page(fz_context * ctx,fz_document_writer * wri_,fz_rect mediabox)3772 pdf_writer_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
3773 {
3774 pdf_writer *wri = (pdf_writer*)wri_;
3775 wri->mediabox = mediabox;
3776 return pdf_page_write(ctx, wri->pdf, wri->mediabox, &wri->resources, &wri->contents);
3777 }
3778
3779 static void
pdf_writer_end_page(fz_context * ctx,fz_document_writer * wri_,fz_device * dev)3780 pdf_writer_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
3781 {
3782 pdf_writer *wri = (pdf_writer*)wri_;
3783 pdf_obj *obj = NULL;
3784
3785 fz_var(obj);
3786
3787 fz_try(ctx)
3788 {
3789 fz_close_device(ctx, dev);
3790 obj = pdf_add_page(ctx, wri->pdf, wri->mediabox, 0, wri->resources, wri->contents);
3791 pdf_insert_page(ctx, wri->pdf, -1, obj);
3792 }
3793 fz_always(ctx)
3794 {
3795 fz_drop_device(ctx, dev);
3796 pdf_drop_obj(ctx, obj);
3797 fz_drop_buffer(ctx, wri->contents);
3798 wri->contents = NULL;
3799 pdf_drop_obj(ctx, wri->resources);
3800 wri->resources = NULL;
3801 }
3802 fz_catch(ctx)
3803 fz_rethrow(ctx);
3804 }
3805
3806 static void
pdf_writer_close_writer(fz_context * ctx,fz_document_writer * wri_)3807 pdf_writer_close_writer(fz_context *ctx, fz_document_writer *wri_)
3808 {
3809 pdf_writer *wri = (pdf_writer*)wri_;
3810 pdf_write_document(ctx, wri->pdf, wri->out, &wri->opts);
3811 fz_close_output(ctx, wri->out);
3812 }
3813
3814 static void
pdf_writer_drop_writer(fz_context * ctx,fz_document_writer * wri_)3815 pdf_writer_drop_writer(fz_context *ctx, fz_document_writer *wri_)
3816 {
3817 pdf_writer *wri = (pdf_writer*)wri_;
3818 fz_drop_buffer(ctx, wri->contents);
3819 pdf_drop_obj(ctx, wri->resources);
3820 pdf_drop_document(ctx, wri->pdf);
3821 fz_drop_output(ctx, wri->out);
3822 }
3823
3824 fz_document_writer *
fz_new_pdf_writer_with_output(fz_context * ctx,fz_output * out,const char * options)3825 fz_new_pdf_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
3826 {
3827 pdf_writer *wri = fz_new_derived_document_writer(ctx, pdf_writer, pdf_writer_begin_page, pdf_writer_end_page, pdf_writer_close_writer, pdf_writer_drop_writer);
3828
3829 fz_try(ctx)
3830 {
3831 pdf_parse_write_options(ctx, &wri->opts, options);
3832 wri->out = out;
3833 wri->pdf = pdf_create_document(ctx);
3834 }
3835 fz_catch(ctx)
3836 {
3837 pdf_drop_document(ctx, wri->pdf);
3838 fz_free(ctx, wri);
3839 fz_rethrow(ctx);
3840 }
3841
3842 return (fz_document_writer*)wri;
3843 }
3844
3845 fz_document_writer *
fz_new_pdf_writer(fz_context * ctx,const char * path,const char * options)3846 fz_new_pdf_writer(fz_context *ctx, const char *path, const char *options)
3847 {
3848 fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.pdf", 0);
3849 fz_document_writer *wri = NULL;
3850 fz_try(ctx)
3851 wri = fz_new_pdf_writer_with_output(ctx, out, options);
3852 fz_catch(ctx)
3853 {
3854 fz_drop_output(ctx, out);
3855 fz_rethrow(ctx);
3856 }
3857 return wri;
3858 }
3859