1 #include "mupdf/fitz.h"
2 #include "mupdf/pdf.h"
3 
4 #include <zlib.h>
5 
6 #include <assert.h>
7 #include <limits.h>
8 #include <string.h>
9 
10 #include <stdio.h> /* for debug printing */
11 /* #define DEBUG_LINEARIZATION */
12 /* #define DEBUG_HEAP_SORT */
13 /* #define DEBUG_WRITING */
14 
15 #define SIG_EXTRAS_SIZE (1024)
16 
17 #define SLASH_BYTE_RANGE ("/ByteRange")
18 #define SLASH_CONTENTS ("/Contents")
19 #define SLASH_FILTER ("/Filter")
20 
21 
22 /*
23 	As part of linearization, we need to keep a list of what objects are used
24 	by what page. We do this by recording the objects used in a given page
25 	in a page_objects structure. We have a list of these structures (one per
26 	page) in the page_objects_list structure.
27 
28 	The page_objects structure maintains a heap in the object array, so
29 	insertion takes log n time, and we can heapsort and dedupe at the end for
30 	a total worse case n log n time.
31 
32 	The magic heap invariant is that:
33 		entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2]
34 	or equivalently:
35 		entry[(n-1)>>1] >= entry[n]
36 
37 	For a discussion of the heap data structure (and heapsort) see Kingston,
38 	"Algorithms and Data Structures".
39 */
40 
41 typedef struct {
42 	int num_shared;
43 	int page_object_number;
44 	int num_objects;
45 	int min_ofs;
46 	int max_ofs;
47 	/* Extensible list of objects used on this page */
48 	int cap;
49 	int len;
50 	int object[1];
51 } page_objects;
52 
53 typedef struct {
54 	int cap;
55 	int len;
56 	page_objects *page[1];
57 } page_objects_list;
58 
59 typedef struct
60 {
61 	fz_output *out;
62 
63 	int do_incremental;
64 	int do_tight;
65 	int do_ascii;
66 	int do_expand;
67 	int do_compress;
68 	int do_compress_images;
69 	int do_compress_fonts;
70 	int do_garbage;
71 	int do_linear;
72 	int do_clean;
73 	int do_encrypt;
74 
75 	int list_len;
76 	int *use_list;
77 	int64_t *ofs_list;
78 	int *gen_list;
79 	int *renumber_map;
80 
81 	/* The following extras are required for linearization */
82 	int *rev_renumber_map;
83 	int start;
84 	int64_t first_xref_offset;
85 	int64_t main_xref_offset;
86 	int64_t first_xref_entry_offset;
87 	int64_t file_len;
88 	int hints_shared_offset;
89 	int hintstream_len;
90 	pdf_obj *linear_l;
91 	pdf_obj *linear_h0;
92 	pdf_obj *linear_h1;
93 	pdf_obj *linear_o;
94 	pdf_obj *linear_e;
95 	pdf_obj *linear_n;
96 	pdf_obj *linear_t;
97 	pdf_obj *hints_s;
98 	pdf_obj *hints_length;
99 	int page_count;
100 	page_objects_list *page_object_lists;
101 	int crypt_object_number;
102 	char opwd_utf8[128];
103 	char upwd_utf8[128];
104 	int permissions;
105 	pdf_crypt *crypt;
106 } pdf_write_state;
107 
108 /*
109  * Constants for use with use_list.
110  *
111  * If use_list[num] = 0, then object num is unused.
112  * If use_list[num] & PARAMS, then object num is the linearisation params obj.
113  * If use_list[num] & CATALOGUE, then object num is used by the catalogue.
114  * If use_list[num] & PAGE1, then object num is used by page 1.
115  * If use_list[num] & SHARED, then object num is shared between pages.
116  * If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
117  * If use_list[num] & OTHER_OBJECTS then this must should appear in section 9.
118  * Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
119  */
120 enum
121 {
122 	USE_CATALOGUE = 2,
123 	USE_PAGE1 = 4,
124 	USE_SHARED = 8,
125 	USE_PARAMS = 16,
126 	USE_HINTS = 32,
127 	USE_PAGE_OBJECT = 64,
128 	USE_OTHER_OBJECTS = 128,
129 	USE_PAGE_MASK = ~255,
130 	USE_PAGE_SHIFT = 8
131 };
132 
133 static void
expand_lists(fz_context * ctx,pdf_write_state * opts,int num)134 expand_lists(fz_context *ctx, pdf_write_state *opts, int num)
135 {
136 	int i;
137 
138 	/* objects are numbered 0..num and maybe two additional objects for linearization */
139 	num += 3;
140 	opts->use_list = fz_realloc_array(ctx, opts->use_list, num, int);
141 	opts->ofs_list = fz_realloc_array(ctx, opts->ofs_list, num, int64_t);
142 	opts->gen_list = fz_realloc_array(ctx, opts->gen_list, num, int);
143 	opts->renumber_map = fz_realloc_array(ctx, opts->renumber_map, num, int);
144 	opts->rev_renumber_map = fz_realloc_array(ctx, opts->rev_renumber_map, num, int);
145 
146 	for (i = opts->list_len; i < num; i++)
147 	{
148 		opts->use_list[i] = 0;
149 		opts->ofs_list[i] = 0;
150 		opts->gen_list[i] = 0;
151 		opts->renumber_map[i] = i;
152 		opts->rev_renumber_map[i] = i;
153 	}
154 	opts->list_len = num;
155 }
156 
157 /*
158  * page_objects and page_object_list handling functions
159  */
160 static page_objects_list *
page_objects_list_create(fz_context * ctx)161 page_objects_list_create(fz_context *ctx)
162 {
163 	page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol));
164 
165 	pol->cap = 1;
166 	pol->len = 0;
167 	return pol;
168 }
169 
170 static void
page_objects_list_destroy(fz_context * ctx,page_objects_list * pol)171 page_objects_list_destroy(fz_context *ctx, page_objects_list *pol)
172 {
173 	int i;
174 
175 	if (!pol)
176 		return;
177 	for (i = 0; i < pol->len; i++)
178 	{
179 		fz_free(ctx, pol->page[i]);
180 	}
181 	fz_free(ctx, pol);
182 }
183 
184 static void
page_objects_list_ensure(fz_context * ctx,page_objects_list ** pol,int newcap)185 page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap)
186 {
187 	int oldcap = (*pol)->cap;
188 	if (newcap <= oldcap)
189 		return;
190 	*pol = fz_realloc(ctx, *pol, sizeof(page_objects_list) + (newcap-1)*sizeof(page_objects *));
191 	memset(&(*pol)->page[oldcap], 0, (newcap-oldcap)*sizeof(page_objects *));
192 	(*pol)->cap = newcap;
193 }
194 
195 static page_objects *
page_objects_create(fz_context * ctx)196 page_objects_create(fz_context *ctx)
197 {
198 	int initial_cap = 8;
199 	page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int));
200 
201 	po->cap = initial_cap;
202 	po->len = 0;
203 	return po;
204 }
205 
206 static void
page_objects_insert(fz_context * ctx,page_objects ** ppo,int i)207 page_objects_insert(fz_context *ctx, page_objects **ppo, int i)
208 {
209 	page_objects *po;
210 
211 	/* Make a page_objects if we don't have one */
212 	if (*ppo == NULL)
213 		*ppo = page_objects_create(ctx);
214 
215 	po = *ppo;
216 	/* page_objects insertion: extend the page_objects by 1, and put us on the end */
217 	if (po->len == po->cap)
218 	{
219 		po = fz_realloc(ctx, po, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int));
220 		po->cap *= 2;
221 		*ppo = po;
222 	}
223 	po->object[po->len++] = i;
224 }
225 
226 static void
page_objects_list_insert(fz_context * ctx,pdf_write_state * opts,int page,int object)227 page_objects_list_insert(fz_context *ctx, pdf_write_state *opts, int page, int object)
228 {
229 	page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
230 	if (object >= opts->list_len)
231 		expand_lists(ctx, opts, object);
232 	if (opts->page_object_lists->len < page+1)
233 		opts->page_object_lists->len = page+1;
234 	page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
235 }
236 
237 static void
page_objects_list_set_page_object(fz_context * ctx,pdf_write_state * opts,int page,int object)238 page_objects_list_set_page_object(fz_context *ctx, pdf_write_state *opts, int page, int object)
239 {
240 	page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
241 	if (object >= opts->list_len)
242 		expand_lists(ctx, opts, object);
243 	opts->page_object_lists->page[page]->page_object_number = object;
244 }
245 
246 static void
page_objects_sort(fz_context * ctx,page_objects * po)247 page_objects_sort(fz_context *ctx, page_objects *po)
248 {
249 	int i, j;
250 	int n = po->len;
251 
252 	/* Step 1: Make a heap */
253 	/* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
254 	for (i = 1; i < n; i++)
255 	{
256 		/* Now bubble backwards to maintain heap invariant */
257 		j = i;
258 		while (j != 0)
259 		{
260 			int tmp;
261 			int k = (j-1)>>1;
262 			if (po->object[k] >= po->object[j])
263 				break;
264 			tmp = po->object[k];
265 			po->object[k] = po->object[j];
266 			po->object[j] = tmp;
267 			j = k;
268 		}
269 	}
270 
271 	/* Step 2: Heap sort */
272 	/* Invariant: valid heap in [0..i), sorted list in [i..n) */
273 	/* Initially: i = n */
274 	for (i = n-1; i > 0; i--)
275 	{
276 		/* Swap the maximum (0th) element from the page_objects into its place
277 		 * in the sorted list (position i). */
278 		int tmp = po->object[0];
279 		po->object[0] = po->object[i];
280 		po->object[i] = tmp;
281 		/* Now, the page_objects is invalid because the 0th element is out
282 		 * of place. Bubble it until the page_objects is valid. */
283 		j = 0;
284 		while (1)
285 		{
286 			/* Children are k and k+1 */
287 			int k = (j+1)*2-1;
288 			/* If both children out of the page_objects, we're done */
289 			if (k > i-1)
290 				break;
291 			/* If both are in the page_objects, pick the larger one */
292 			if (k < i-1 && po->object[k] < po->object[k+1])
293 				k++;
294 			/* If j is bigger than k (i.e. both of its children),
295 			 * we're done */
296 			if (po->object[j] > po->object[k])
297 				break;
298 			tmp = po->object[k];
299 			po->object[k] = po->object[j];
300 			po->object[j] = tmp;
301 			j = k;
302 		}
303 	}
304 }
305 
306 static int
order_ge(int ui,int uj)307 order_ge(int ui, int uj)
308 {
309 	/*
310 	For linearization, we need to order the sections as follows:
311 
312 		Remaining pages					(Part 7)
313 		Shared objects					(Part 8)
314 		Objects not associated with any page		(Part 9)
315 		Any "other" objects
316 							(Header)(Part 1)
317 		(Linearization params)				(Part 2)
318 					(1st page Xref/Trailer)	(Part 3)
319 		Catalogue (and other document level objects)	(Part 4)
320 		First page					(Part 6)
321 		(Primary Hint stream)			(*)	(Part 5)
322 		Any free objects
323 
324 	Note, this is NOT the same order they appear in
325 	the final file!
326 
327 	(*) The PDF reference gives us the option of putting the hint stream
328 	after the first page, and we take it, for simplicity.
329 	*/
330 
331 	/* If the 2 objects are in the same section, then page object comes first. */
332 	if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0)
333 		return ((ui & USE_PAGE_OBJECT) == 0);
334 	/* Put unused objects last */
335 	else if (ui == 0)
336 		return 1;
337 	else if (uj == 0)
338 		return 0;
339 	/* Put the hint stream before that... */
340 	else if (ui & USE_HINTS)
341 		return 1;
342 	else if (uj & USE_HINTS)
343 		return 0;
344 	/* Put page 1 before that... */
345 	else if (ui & USE_PAGE1)
346 		return 1;
347 	else if (uj & USE_PAGE1)
348 		return 0;
349 	/* Put the catalogue before that... */
350 	else if (ui & USE_CATALOGUE)
351 		return 1;
352 	else if (uj & USE_CATALOGUE)
353 		return 0;
354 	/* Put the linearization params before that... */
355 	else if (ui & USE_PARAMS)
356 		return 1;
357 	else if (uj & USE_PARAMS)
358 		return 0;
359 	/* Put other objects before that */
360 	else if (ui & USE_OTHER_OBJECTS)
361 		return 1;
362 	else if (uj & USE_OTHER_OBJECTS)
363 		return 0;
364 	/* Put shared objects before that... */
365 	else if (ui & USE_SHARED)
366 		return 1;
367 	else if (uj & USE_SHARED)
368 		return 0;
369 	/* And otherwise, order by the page number on which
370 	 * they are used. */
371 	return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
372 }
373 
374 static void
heap_sort(int * list,int n,const int * val,int (* ge)(int,int))375 heap_sort(int *list, int n, const int *val, int (*ge)(int, int))
376 {
377 	int i, j;
378 
379 #ifdef DEBUG_HEAP_SORT
380 	fprintf(stderr, "Initially:\n");
381 	for (i=0; i < n; i++)
382 	{
383 		fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
384 	}
385 #endif
386 	/* Step 1: Make a heap */
387 	/* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
388 	for (i = 1; i < n; i++)
389 	{
390 		/* Now bubble backwards to maintain heap invariant */
391 		j = i;
392 		while (j != 0)
393 		{
394 			int tmp;
395 			int k = (j-1)>>1;
396 			if (ge(val[list[k]], val[list[j]]))
397 				break;
398 			tmp = list[k];
399 			list[k] = list[j];
400 			list[j] = tmp;
401 			j = k;
402 		}
403 	}
404 #ifdef DEBUG_HEAP_SORT
405 	fprintf(stderr, "Valid heap:\n");
406 	for (i=0; i < n; i++)
407 	{
408 		int k;
409 		fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
410 		k = (i+1)*2-1;
411 		if (k < n)
412 		{
413 			if (ge(val[list[i]], val[list[k]]))
414 				fprintf(stderr, "OK ");
415 			else
416 				fprintf(stderr, "BAD ");
417 		}
418 		if (k+1 < n)
419 		{
420 			if (ge(val[list[i]], val[list[k+1]]))
421 				fprintf(stderr, "OK\n");
422 			else
423 				fprintf(stderr, "BAD\n");
424 		}
425 		else
426 				fprintf(stderr, "\n");
427 	}
428 #endif
429 
430 	/* Step 2: Heap sort */
431 	/* Invariant: valid heap in [0..i), sorted list in [i..n) */
432 	/* Initially: i = n */
433 	for (i = n-1; i > 0; i--)
434 	{
435 		/* Swap the maximum (0th) element from the page_objects into its place
436 		 * in the sorted list (position i). */
437 		int tmp = list[0];
438 		list[0] = list[i];
439 		list[i] = tmp;
440 		/* Now, the page_objects is invalid because the 0th element is out
441 		 * of place. Bubble it until the page_objects is valid. */
442 		j = 0;
443 		while (1)
444 		{
445 			/* Children are k and k+1 */
446 			int k = (j+1)*2-1;
447 			/* If both children out of the page_objects, we're done */
448 			if (k > i-1)
449 				break;
450 			/* If both are in the page_objects, pick the larger one */
451 			if (k < i-1 && ge(val[list[k+1]], val[list[k]]))
452 				k++;
453 			/* If j is bigger than k (i.e. both of its children),
454 			 * we're done */
455 			if (ge(val[list[j]], val[list[k]]))
456 				break;
457 			tmp = list[k];
458 			list[k] = list[j];
459 			list[j] = tmp;
460 			j = k;
461 		}
462 	}
463 #ifdef DEBUG_HEAP_SORT
464 	fprintf(stderr, "Sorted:\n");
465 	for (i=0; i < n; i++)
466 	{
467 		fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
468 		if (i+1 < n)
469 		{
470 			if (ge(val[list[i+1]], val[list[i]]))
471 				fprintf(stderr, "OK");
472 			else
473 				fprintf(stderr, "BAD");
474 		}
475 		fprintf(stderr, "\n");
476 	}
477 #endif
478 }
479 
480 static void
page_objects_dedupe(fz_context * ctx,page_objects * po)481 page_objects_dedupe(fz_context *ctx, page_objects *po)
482 {
483 	int i, j;
484 	int n = po->len-1;
485 
486 	for (i = 0; i < n; i++)
487 	{
488 		if (po->object[i] == po->object[i+1])
489 			break;
490 	}
491 	j = i; /* j points to the last valid one */
492 	i++; /* i points to the first one we haven't looked at */
493 	for (; i < n; i++)
494 	{
495 		if (po->object[j] != po->object[i])
496 			po->object[++j] = po->object[i];
497 	}
498 	po->len = j+1;
499 }
500 
501 static void
page_objects_list_sort_and_dedupe(fz_context * ctx,page_objects_list * pol)502 page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol)
503 {
504 	int i;
505 	int n = pol->len;
506 
507 	for (i = 0; i < n; i++)
508 	{
509 		page_objects_sort(ctx, pol->page[i]);
510 		page_objects_dedupe(ctx, pol->page[i]);
511 	}
512 }
513 
514 #ifdef DEBUG_LINEARIZATION
515 static void
page_objects_dump(pdf_write_state * opts)516 page_objects_dump(pdf_write_state *opts)
517 {
518 	page_objects_list *pol = opts->page_object_lists;
519 	int i, j;
520 
521 	for (i = 0; i < pol->len; i++)
522 	{
523 		page_objects *p = pol->page[i];
524 		fprintf(stderr, "Page %d\n", i+1);
525 		for (j = 0; j < p->len; j++)
526 		{
527 			int o = p->object[j];
528 			fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]);
529 		}
530 		fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
531 		fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
532 		fprintf(stderr, "Page object number=%d\n", p->page_object_number);
533 	}
534 }
535 
536 static void
objects_dump(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)537 objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
538 {
539 	int i;
540 
541 	for (i=0; i < pdf_xref_len(ctx, doc); i++)
542 	{
543 		fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], (int)opts->ofs_list[i]);
544 	}
545 }
546 #endif
547 
548 /*
549  * Garbage collect objects not reachable from the trailer.
550  */
551 
552 /* Mark a reference. If it's been marked already, return NULL (as no further
553  * processing is required). If it's not, return the resolved object so
554  * that we can continue our recursive marking. If it's a duff reference
555  * return the fact so that we can remove the reference at source.
556  */
markref(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj,int * duff)557 static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj, int *duff)
558 {
559 	int num = pdf_to_num(ctx, obj);
560 
561 	if (num <= 0 || num >= pdf_xref_len(ctx, doc))
562 	{
563 		*duff = 1;
564 		return NULL;
565 	}
566 	*duff = 0;
567 	if (opts->use_list[num])
568 		return NULL;
569 
570 	opts->use_list[num] = 1;
571 
572 	/* Bake in /Length in stream objects */
573 	fz_try(ctx)
574 	{
575 		if (pdf_obj_num_is_stream(ctx, doc, num))
576 		{
577 			pdf_obj *len = pdf_dict_get(ctx, obj, PDF_NAME(Length));
578 			if (pdf_is_indirect(ctx, len))
579 			{
580 				opts->use_list[pdf_to_num(ctx, len)] = 0;
581 				len = pdf_resolve_indirect(ctx, len);
582 				pdf_dict_put(ctx, obj, PDF_NAME(Length), len);
583 			}
584 		}
585 	}
586 	fz_catch(ctx)
587 	{
588 		/* Leave broken */
589 	}
590 
591 	obj = pdf_resolve_indirect(ctx, obj);
592 	if (obj == NULL || pdf_is_null(ctx, obj))
593 	{
594 		*duff = 1;
595 		opts->use_list[num] = 0;
596 	}
597 
598 	return obj;
599 }
600 
601 #ifdef DEBUG_MARK_AND_SWEEP
602 static int depth = 0;
603 
604 static
indent()605 void indent()
606 {
607 	while (depth > 0)
608 	{
609 		int d  = depth;
610 		if (d > 16)
611 			d = 16;
612 		printf("%s", &"                "[16-d]);
613 		depth -= d;
614 	}
615 }
616 #define DEBUGGING_MARKING(A) do { A; } while (0)
617 #else
618 #define DEBUGGING_MARKING(A) do { } while (0)
619 #endif
620 
621 /* Recursively mark an object. If any references found are duff, then
622  * replace them with nulls. */
markobj(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj)623 static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
624 {
625 	int i;
626 
627 	DEBUGGING_MARKING(depth++);
628 
629 	while (pdf_is_indirect(ctx, obj))
630 	{
631 		int duff;
632 		DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
633 		obj = markref(ctx, doc, opts, obj, &duff);
634 		if (duff)
635 		{
636 			DEBUGGING_MARKING(depth--);
637 			return 1;
638 		}
639 	}
640 
641 	if (pdf_is_dict(ctx, obj))
642 	{
643 		int n = pdf_dict_len(ctx, obj);
644 		for (i = 0; i < n; i++)
645 		{
646 			DEBUGGING_MARKING(indent(); printf("DICT[%d/%d] = %s\n", i, n, pdf_to_name(ctx, pdf_dict_get_key(ctx, obj, i))));
647 			if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
648 				pdf_dict_put_val_null(ctx, obj, i);
649 		}
650 	}
651 
652 	else if (pdf_is_array(ctx, obj))
653 	{
654 		int n = pdf_array_len(ctx, obj);
655 		for (i = 0; i < n; i++)
656 		{
657 			DEBUGGING_MARKING(indent(); printf("ARRAY[%d/%d]\n", i, n));
658 			if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
659 				pdf_array_put(ctx, obj, i, PDF_NULL);
660 		}
661 	}
662 
663 	DEBUGGING_MARKING(depth--);
664 
665 	return 0;
666 }
667 
668 /*
669  * Scan for and remove duplicate objects (slow)
670  */
671 
removeduplicateobjs(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)672 static void removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
673 {
674 	int num, other, max_num;
675 	int xref_len = pdf_xref_len(ctx, doc);
676 
677 	for (num = 1; num < xref_len; num++)
678 	{
679 		/* Only compare an object to objects preceding it */
680 		for (other = 1; other < num; other++)
681 		{
682 			pdf_obj *a, *b;
683 			int newnum, streama = 0, streamb = 0, differ = 0;
684 
685 			if (num == other || !opts->use_list[num] || !opts->use_list[other])
686 				continue;
687 
688 			/* TODO: resolve indirect references to see if we can omit them */
689 
690 			/*
691 			 * Comparing stream objects data contents would take too long.
692 			 *
693 			 * pdf_obj_num_is_stream calls pdf_cache_object and ensures
694 			 * that the xref table has the objects loaded.
695 			 */
696 			fz_try(ctx)
697 			{
698 				streama = pdf_obj_num_is_stream(ctx, doc, num);
699 				streamb = pdf_obj_num_is_stream(ctx, doc, other);
700 				differ = streama || streamb;
701 				if (streama && streamb && opts->do_garbage >= 4)
702 					differ = 0;
703 			}
704 			fz_catch(ctx)
705 			{
706 				/* Assume different */
707 				differ = 1;
708 			}
709 			if (differ)
710 				continue;
711 
712 			a = pdf_get_xref_entry(ctx, doc, num)->obj;
713 			b = pdf_get_xref_entry(ctx, doc, other)->obj;
714 
715 			if (pdf_objcmp(ctx, a, b))
716 				continue;
717 
718 			if (streama && streamb)
719 			{
720 				/* Check to see if streams match too. */
721 				fz_buffer *sa = NULL;
722 				fz_buffer *sb = NULL;
723 
724 				fz_var(sa);
725 				fz_var(sb);
726 
727 				differ = 1;
728 				fz_try(ctx)
729 				{
730 					unsigned char *dataa, *datab;
731 					size_t lena, lenb;
732 					sa = pdf_load_raw_stream_number(ctx, doc, num);
733 					sb = pdf_load_raw_stream_number(ctx, doc, other);
734 					lena = fz_buffer_storage(ctx, sa, &dataa);
735 					lenb = fz_buffer_storage(ctx, sb, &datab);
736 					if (lena == lenb && memcmp(dataa, datab, lena) == 0)
737 						differ = 0;
738 				}
739 				fz_always(ctx)
740 				{
741 					fz_drop_buffer(ctx, sa);
742 					fz_drop_buffer(ctx, sb);
743 				}
744 				fz_catch(ctx)
745 				{
746 					fz_rethrow(ctx);
747 				}
748 				if (differ)
749 					continue;
750 			}
751 
752 			/* Keep the lowest numbered object */
753 			newnum = fz_mini(num, other);
754 			max_num = fz_maxi(num, other);
755 			if (max_num >= opts->list_len)
756 				expand_lists(ctx, opts, max_num);
757 			opts->renumber_map[num] = newnum;
758 			opts->renumber_map[other] = newnum;
759 			opts->rev_renumber_map[newnum] = num; /* Either will do */
760 			opts->use_list[fz_maxi(num, other)] = 0;
761 
762 			/* One duplicate was found, do not look for another */
763 			break;
764 		}
765 	}
766 }
767 
768 /*
769  * Renumber objects sequentially so the xref is more compact
770  *
771  * This code assumes that any opts->renumber_map[n] <= n for all n.
772  */
773 
compactxref(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)774 static void compactxref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
775 {
776 	int num, newnum;
777 	int xref_len = pdf_xref_len(ctx, doc);
778 
779 	/*
780 	 * Update renumber_map in-place, clustering all used
781 	 * objects together at low object ids. Objects that
782 	 * already should be renumbered will have their new
783 	 * object ids be updated to reflect the compaction.
784 	 */
785 
786 	if (xref_len > opts->list_len)
787 		expand_lists(ctx, opts, xref_len-1);
788 
789 	newnum = 1;
790 	for (num = 1; num < xref_len; num++)
791 	{
792 		/* If it's not used, map it to zero */
793 		if (!opts->use_list[opts->renumber_map[num]])
794 		{
795 			opts->renumber_map[num] = 0;
796 		}
797 		/* If it's not moved, compact it. */
798 		else if (opts->renumber_map[num] == num)
799 		{
800 			opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
801 			opts->renumber_map[num] = newnum++;
802 		}
803 		/* Otherwise it's used, and moved. We know that it must have
804 		 * moved down, so the place it's moved to will be in the right
805 		 * place already. */
806 		else
807 		{
808 			opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
809 		}
810 	}
811 }
812 
813 /*
814  * Update indirect objects according to renumbering established when
815  * removing duplicate objects and compacting the xref.
816  */
817 
renumberobj(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj)818 static void renumberobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
819 {
820 	int i;
821 	int xref_len = pdf_xref_len(ctx, doc);
822 
823 	if (pdf_is_dict(ctx, obj))
824 	{
825 		int n = pdf_dict_len(ctx, obj);
826 		for (i = 0; i < n; i++)
827 		{
828 			pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
829 			pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
830 			if (pdf_is_indirect(ctx, val))
831 			{
832 				int o = pdf_to_num(ctx, val);
833 				if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
834 					val = PDF_NULL;
835 				else
836 					val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
837 				pdf_dict_put_drop(ctx, obj, key, val);
838 			}
839 			else
840 			{
841 				renumberobj(ctx, doc, opts, val);
842 			}
843 		}
844 	}
845 
846 	else if (pdf_is_array(ctx, obj))
847 	{
848 		int n = pdf_array_len(ctx, obj);
849 		for (i = 0; i < n; i++)
850 		{
851 			pdf_obj *val = pdf_array_get(ctx, obj, i);
852 			if (pdf_is_indirect(ctx, val))
853 			{
854 				int o = pdf_to_num(ctx, val);
855 				if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
856 					val = PDF_NULL;
857 				else
858 					val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
859 				pdf_array_put_drop(ctx, obj, i, val);
860 			}
861 			else
862 			{
863 				renumberobj(ctx, doc, opts, val);
864 			}
865 		}
866 	}
867 }
868 
renumberobjs(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)869 static void renumberobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
870 {
871 	pdf_xref_entry *newxref = NULL;
872 	int newlen;
873 	int num;
874 	int *new_use_list;
875 	int xref_len = pdf_xref_len(ctx, doc);
876 
877 	new_use_list = fz_calloc(ctx, pdf_xref_len(ctx, doc)+3, sizeof(int));
878 
879 	fz_var(newxref);
880 	fz_try(ctx)
881 	{
882 		/* Apply renumber map to indirect references in all objects in xref */
883 		renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
884 		for (num = 0; num < xref_len; num++)
885 		{
886 			pdf_obj *obj;
887 			int to = opts->renumber_map[num];
888 
889 			/* If object is going to be dropped, don't bother renumbering */
890 			if (to == 0)
891 				continue;
892 
893 			obj = pdf_get_xref_entry(ctx, doc, num)->obj;
894 
895 			if (pdf_is_indirect(ctx, obj))
896 			{
897 				obj = pdf_new_indirect(ctx, doc, to, 0);
898 				fz_try(ctx)
899 					pdf_update_object(ctx, doc, num, obj);
900 				fz_always(ctx)
901 					pdf_drop_obj(ctx, obj);
902 				fz_catch(ctx)
903 					fz_rethrow(ctx);
904 			}
905 			else
906 			{
907 				renumberobj(ctx, doc, opts, obj);
908 			}
909 		}
910 
911 		/* Create new table for the reordered, compacted xref */
912 		newxref = Memento_label(fz_malloc_array(ctx, xref_len + 3, pdf_xref_entry), "pdf_xref_entries");
913 		newxref[0] = *pdf_get_xref_entry(ctx, doc, 0);
914 
915 		/* Move used objects into the new compacted xref */
916 		newlen = 0;
917 		for (num = 1; num < xref_len; num++)
918 		{
919 			if (opts->use_list[num])
920 			{
921 				pdf_xref_entry *e;
922 				if (newlen < opts->renumber_map[num])
923 					newlen = opts->renumber_map[num];
924 				e = pdf_get_xref_entry(ctx, doc, num);
925 				newxref[opts->renumber_map[num]] = *e;
926 				if (e->obj)
927 				{
928 					pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
929 					e->obj = NULL;
930 				}
931 				new_use_list[opts->renumber_map[num]] = opts->use_list[num];
932 			}
933 			else
934 			{
935 				pdf_xref_entry *e = pdf_get_xref_entry(ctx, doc, num);
936 				pdf_drop_obj(ctx, e->obj);
937 				e->obj = NULL;
938 				fz_drop_buffer(ctx, e->stm_buf);
939 				e->stm_buf = NULL;
940 			}
941 		}
942 
943 		pdf_replace_xref(ctx, doc, newxref, newlen + 1);
944 		newxref = NULL;
945 	}
946 	fz_catch(ctx)
947 	{
948 		fz_free(ctx, newxref);
949 		fz_free(ctx, new_use_list);
950 		fz_rethrow(ctx);
951 	}
952 	fz_free(ctx, opts->use_list);
953 	opts->use_list = new_use_list;
954 
955 	for (num = 1; num < xref_len; num++)
956 	{
957 		opts->renumber_map[num] = num;
958 	}
959 }
960 
page_objects_list_renumber(pdf_write_state * opts)961 static void page_objects_list_renumber(pdf_write_state *opts)
962 {
963 	int i, j;
964 
965 	for (i = 0; i < opts->page_object_lists->len; i++)
966 	{
967 		page_objects *po = opts->page_object_lists->page[i];
968 		for (j = 0; j < po->len; j++)
969 		{
970 			po->object[j] = opts->renumber_map[po->object[j]];
971 		}
972 		po->page_object_number = opts->renumber_map[po->page_object_number];
973 	}
974 }
975 
976 static void
mark_all(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * val,int flag,int page)977 mark_all(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *val, int flag, int page)
978 {
979 	if (pdf_mark_obj(ctx, val))
980 		return;
981 
982 	fz_try(ctx)
983 	{
984 		if (pdf_is_indirect(ctx, val))
985 		{
986 			int num = pdf_to_num(ctx, val);
987 			if (num >= opts->list_len)
988 				expand_lists(ctx, opts, num);
989 			if (opts->use_list[num] & USE_PAGE_MASK)
990 				/* Already used */
991 				opts->use_list[num] |= USE_SHARED;
992 			else
993 				opts->use_list[num] |= flag;
994 			if (page >= 0)
995 				page_objects_list_insert(ctx, opts, page, num);
996 		}
997 
998 		if (pdf_is_dict(ctx, val))
999 		{
1000 			int i, n;
1001 			n = pdf_dict_len(ctx, val);
1002 
1003 			for (i = 0; i < n; i++)
1004 			{
1005 				pdf_obj *v = pdf_dict_get_val(ctx, val, i);
1006 				pdf_obj *type = pdf_dict_get(ctx, v, PDF_NAME(Type));
1007 
1008 				/* Don't walk through the Page tree, or direct to a page. */
1009 				if (pdf_name_eq(ctx, PDF_NAME(Pages), type) || pdf_name_eq(ctx, PDF_NAME(Page), type))
1010 					continue;
1011 
1012 				mark_all(ctx, doc, opts, v, flag, page);
1013 			}
1014 		}
1015 		else if (pdf_is_array(ctx, val))
1016 		{
1017 			int i, n = pdf_array_len(ctx, val);
1018 
1019 			for (i = 0; i < n; i++)
1020 			{
1021 				pdf_obj *v = pdf_array_get(ctx, val, i);
1022 				pdf_obj *type = pdf_dict_get(ctx, v, PDF_NAME(Type));
1023 
1024 				/* Don't walk through the Page tree, or direct to a page. */
1025 				if (pdf_name_eq(ctx, PDF_NAME(Pages), type) || pdf_name_eq(ctx, PDF_NAME(Page), type))
1026 					continue;
1027 
1028 				mark_all(ctx, doc, opts, v, flag, page);
1029 			}
1030 		}
1031 	}
1032 	fz_always(ctx)
1033 	{
1034 		pdf_unmark_obj(ctx, val);
1035 	}
1036 	fz_catch(ctx)
1037 	{
1038 		fz_rethrow(ctx);
1039 	}
1040 }
1041 
1042 static int
mark_pages(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * val,int pagenum)1043 mark_pages(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *val, int pagenum)
1044 {
1045 	if (pdf_mark_obj(ctx, val))
1046 		return pagenum;
1047 
1048 	fz_try(ctx)
1049 	{
1050 		if (pdf_is_dict(ctx, val))
1051 		{
1052 			if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, val, PDF_NAME(Type))))
1053 			{
1054 				int num = pdf_to_num(ctx, val);
1055 				pdf_unmark_obj(ctx, val);
1056 				mark_all(ctx, doc, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
1057 				page_objects_list_set_page_object(ctx, opts, pagenum, num);
1058 				pagenum++;
1059 				opts->use_list[num] |= USE_PAGE_OBJECT;
1060 			}
1061 			else
1062 			{
1063 				int i, n = pdf_dict_len(ctx, val);
1064 
1065 				for (i = 0; i < n; i++)
1066 				{
1067 					pdf_obj *key = pdf_dict_get_key(ctx, val, i);
1068 					pdf_obj *obj = pdf_dict_get_val(ctx, val, i);
1069 
1070 					if (pdf_name_eq(ctx, PDF_NAME(Kids), key))
1071 						pagenum = mark_pages(ctx, doc, opts, obj, pagenum);
1072 					else
1073 						mark_all(ctx, doc, opts, obj, USE_CATALOGUE, -1);
1074 				}
1075 
1076 				if (pdf_is_indirect(ctx, val))
1077 				{
1078 					int num = pdf_to_num(ctx, val);
1079 					opts->use_list[num] |= USE_CATALOGUE;
1080 				}
1081 			}
1082 		}
1083 		else if (pdf_is_array(ctx, val))
1084 		{
1085 			int i, n = pdf_array_len(ctx, val);
1086 
1087 			for (i = 0; i < n; i++)
1088 			{
1089 				pagenum = mark_pages(ctx, doc, opts, pdf_array_get(ctx, val, i), pagenum);
1090 			}
1091 			if (pdf_is_indirect(ctx, val))
1092 			{
1093 				int num = pdf_to_num(ctx, val);
1094 				opts->use_list[num] |= USE_CATALOGUE;
1095 			}
1096 		}
1097 	}
1098 	fz_always(ctx)
1099 	{
1100 		pdf_unmark_obj(ctx, val);
1101 	}
1102 	fz_catch(ctx)
1103 	{
1104 		fz_rethrow(ctx);
1105 	}
1106 	return pagenum;
1107 }
1108 
1109 static void
mark_root(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * dict)1110 mark_root(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *dict)
1111 {
1112 	int i, n = pdf_dict_len(ctx, dict);
1113 
1114 	if (pdf_mark_obj(ctx, dict))
1115 		return;
1116 
1117 	fz_try(ctx)
1118 	{
1119 		if (pdf_is_indirect(ctx, dict))
1120 		{
1121 			int num = pdf_to_num(ctx, dict);
1122 			opts->use_list[num] |= USE_CATALOGUE;
1123 		}
1124 
1125 		for (i = 0; i < n; i++)
1126 		{
1127 			pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1128 			pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1129 
1130 			if (pdf_name_eq(ctx, PDF_NAME(Pages), key))
1131 				opts->page_count = mark_pages(ctx, doc, opts, val, 0);
1132 			else if (pdf_name_eq(ctx, PDF_NAME(Names), key))
1133 				mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1134 			else if (pdf_name_eq(ctx, PDF_NAME(Dests), key))
1135 				mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1136 			else if (pdf_name_eq(ctx, PDF_NAME(Outlines), key))
1137 			{
1138 				int section;
1139 				/* Look at PageMode to decide whether to
1140 				 * USE_OTHER_OBJECTS or USE_PAGE1 here. */
1141 				if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(PageMode)), PDF_NAME(UseOutlines)))
1142 					section = USE_PAGE1;
1143 				else
1144 					section = USE_OTHER_OBJECTS;
1145 				mark_all(ctx, doc, opts, val, section, -1);
1146 			}
1147 			else
1148 				mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1149 		}
1150 	}
1151 	fz_always(ctx)
1152 	{
1153 		pdf_unmark_obj(ctx, dict);
1154 	}
1155 	fz_catch(ctx)
1156 	{
1157 		fz_rethrow(ctx);
1158 	}
1159 }
1160 
1161 static void
mark_trailer(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * dict)1162 mark_trailer(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *dict)
1163 {
1164 	int i, n = pdf_dict_len(ctx, dict);
1165 
1166 	if (pdf_mark_obj(ctx, dict))
1167 		return;
1168 
1169 	fz_try(ctx)
1170 	{
1171 		for (i = 0; i < n; i++)
1172 		{
1173 			pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1174 			pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1175 
1176 			if (pdf_name_eq(ctx, PDF_NAME(Root), key))
1177 				mark_root(ctx, doc, opts, val);
1178 			else
1179 				mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1180 		}
1181 	}
1182 	fz_always(ctx)
1183 	{
1184 		pdf_unmark_obj(ctx, dict);
1185 	}
1186 	fz_catch(ctx)
1187 	{
1188 		fz_rethrow(ctx);
1189 	}
1190 }
1191 
1192 static void
add_linearization_objs(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)1193 add_linearization_objs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1194 {
1195 	pdf_obj *params_obj = NULL;
1196 	pdf_obj *params_ref = NULL;
1197 	pdf_obj *hint_obj = NULL;
1198 	pdf_obj *hint_ref = NULL;
1199 	pdf_obj *o;
1200 	int params_num, hint_num;
1201 
1202 	fz_var(params_obj);
1203 	fz_var(params_ref);
1204 	fz_var(hint_obj);
1205 	fz_var(hint_ref);
1206 
1207 	fz_try(ctx)
1208 	{
1209 		/* Linearization params */
1210 		params_obj = pdf_new_dict(ctx, doc, 10);
1211 		params_ref = pdf_add_object(ctx, doc, params_obj);
1212 		params_num = pdf_to_num(ctx, params_ref);
1213 
1214 		opts->use_list[params_num] = USE_PARAMS;
1215 		opts->renumber_map[params_num] = params_num;
1216 		opts->rev_renumber_map[params_num] = params_num;
1217 		opts->gen_list[params_num] = 0;
1218 		pdf_dict_put_real(ctx, params_obj, PDF_NAME(Linearized), 1.0f);
1219 		opts->linear_l = pdf_new_int(ctx, INT_MIN);
1220 		pdf_dict_put(ctx, params_obj, PDF_NAME(L), opts->linear_l);
1221 		opts->linear_h0 = pdf_new_int(ctx, INT_MIN);
1222 		o = pdf_new_array(ctx, doc, 2);
1223 		pdf_dict_put_drop(ctx, params_obj, PDF_NAME(H), o);
1224 		pdf_array_push(ctx, o, opts->linear_h0);
1225 		opts->linear_h1 = pdf_new_int(ctx, INT_MIN);
1226 		pdf_array_push(ctx, o, opts->linear_h1);
1227 		opts->linear_o = pdf_new_int(ctx, INT_MIN);
1228 		pdf_dict_put(ctx, params_obj, PDF_NAME(O), opts->linear_o);
1229 		opts->linear_e = pdf_new_int(ctx, INT_MIN);
1230 		pdf_dict_put(ctx, params_obj, PDF_NAME(E), opts->linear_e);
1231 		opts->linear_n = pdf_new_int(ctx, INT_MIN);
1232 		pdf_dict_put(ctx, params_obj, PDF_NAME(N), opts->linear_n);
1233 		opts->linear_t = pdf_new_int(ctx, INT_MIN);
1234 		pdf_dict_put(ctx, params_obj, PDF_NAME(T), opts->linear_t);
1235 
1236 		/* Primary hint stream */
1237 		hint_obj = pdf_new_dict(ctx, doc, 10);
1238 		hint_ref = pdf_add_object(ctx, doc, hint_obj);
1239 		hint_num = pdf_to_num(ctx, hint_ref);
1240 
1241 		opts->use_list[hint_num] = USE_HINTS;
1242 		opts->renumber_map[hint_num] = hint_num;
1243 		opts->rev_renumber_map[hint_num] = hint_num;
1244 		opts->gen_list[hint_num] = 0;
1245 		pdf_dict_put_int(ctx, hint_obj, PDF_NAME(P), 0);
1246 		opts->hints_s = pdf_new_int(ctx, INT_MIN);
1247 		pdf_dict_put(ctx, hint_obj, PDF_NAME(S), opts->hints_s);
1248 		/* FIXME: Do we have thumbnails? Do a T entry */
1249 		/* FIXME: Do we have outlines? Do an O entry */
1250 		/* FIXME: Do we have article threads? Do an A entry */
1251 		/* FIXME: Do we have named destinations? Do a E entry */
1252 		/* FIXME: Do we have interactive forms? Do a V entry */
1253 		/* FIXME: Do we have document information? Do an I entry */
1254 		/* FIXME: Do we have logical structure hierarchy? Do a C entry */
1255 		/* FIXME: Do L, Page Label hint table */
1256 		pdf_dict_put(ctx, hint_obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1257 		opts->hints_length = pdf_new_int(ctx, INT_MIN);
1258 		pdf_dict_put(ctx, hint_obj, PDF_NAME(Length), opts->hints_length);
1259 		pdf_get_xref_entry(ctx, doc, hint_num)->stm_ofs = 0;
1260 	}
1261 	fz_always(ctx)
1262 	{
1263 		pdf_drop_obj(ctx, params_obj);
1264 		pdf_drop_obj(ctx, params_ref);
1265 		pdf_drop_obj(ctx, hint_ref);
1266 		pdf_drop_obj(ctx, hint_obj);
1267 	}
1268 	fz_catch(ctx)
1269 	{
1270 		fz_rethrow(ctx);
1271 	}
1272 }
1273 
1274 static void
lpr_inherit_res_contents(fz_context * ctx,pdf_obj * res,pdf_obj * dict,pdf_obj * text)1275 lpr_inherit_res_contents(fz_context *ctx, pdf_obj *res, pdf_obj *dict, pdf_obj *text)
1276 {
1277 	pdf_obj *o, *r;
1278 	int i, n;
1279 
1280 	/* If the parent node doesn't have an entry of this type, give up. */
1281 	o = pdf_dict_get(ctx, dict, text);
1282 	if (!o)
1283 		return;
1284 
1285 	/* If the resources dict we are building doesn't have an entry of this
1286 	 * type yet, then just copy it (ensuring it's not a reference) */
1287 	r = pdf_dict_get(ctx, res, text);
1288 	if (r == NULL)
1289 	{
1290 		o = pdf_resolve_indirect(ctx, o);
1291 		if (pdf_is_dict(ctx, o))
1292 			o = pdf_copy_dict(ctx, o);
1293 		else if (pdf_is_array(ctx, o))
1294 			o = pdf_copy_array(ctx, o);
1295 		else
1296 			o = NULL;
1297 		if (o)
1298 			pdf_dict_put_drop(ctx, res, text, o);
1299 		return;
1300 	}
1301 
1302 	/* Otherwise we need to merge o into r */
1303 	if (pdf_is_dict(ctx, o))
1304 	{
1305 		n = pdf_dict_len(ctx, o);
1306 		for (i = 0; i < n; i++)
1307 		{
1308 			pdf_obj *key = pdf_dict_get_key(ctx, o, i);
1309 			pdf_obj *val = pdf_dict_get_val(ctx, o, i);
1310 
1311 			if (pdf_dict_get(ctx, res, key))
1312 				continue;
1313 			pdf_dict_put(ctx, res, key, val);
1314 		}
1315 	}
1316 }
1317 
1318 static void
lpr_inherit_res(fz_context * ctx,pdf_obj * node,int depth,pdf_obj * dict)1319 lpr_inherit_res(fz_context *ctx, pdf_obj *node, int depth, pdf_obj *dict)
1320 {
1321 	while (1)
1322 	{
1323 		pdf_obj *o;
1324 
1325 		node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1326 		depth--;
1327 		if (!node || depth < 0)
1328 			break;
1329 
1330 		o = pdf_dict_get(ctx, node, PDF_NAME(Resources));
1331 		if (o)
1332 		{
1333 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ExtGState));
1334 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ColorSpace));
1335 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Pattern));
1336 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Shading));
1337 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(XObject));
1338 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Font));
1339 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ProcSet));
1340 			lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Properties));
1341 		}
1342 	}
1343 }
1344 
1345 static pdf_obj *
lpr_inherit(fz_context * ctx,pdf_obj * node,char * text,int depth)1346 lpr_inherit(fz_context *ctx, pdf_obj *node, char *text, int depth)
1347 {
1348 	do
1349 	{
1350 		pdf_obj *o = pdf_dict_gets(ctx, node, text);
1351 
1352 		if (o)
1353 			return pdf_resolve_indirect(ctx, o);
1354 		node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1355 		depth--;
1356 	}
1357 	while (depth >= 0 && node);
1358 
1359 	return NULL;
1360 }
1361 
1362 static int
lpr(fz_context * ctx,pdf_document * doc,pdf_obj * node,int depth,int page)1363 lpr(fz_context *ctx, pdf_document *doc, pdf_obj *node, int depth, int page)
1364 {
1365 	pdf_obj *kids;
1366 	pdf_obj *o = NULL;
1367 	int i, n;
1368 
1369 	if (pdf_mark_obj(ctx, node))
1370 		return page;
1371 
1372 	fz_var(o);
1373 
1374 	fz_try(ctx)
1375 	{
1376 		if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, node, PDF_NAME(Type))))
1377 		{
1378 			pdf_obj *r; /* r is deliberately not cleaned up */
1379 
1380 			/* Copy resources down to the child */
1381 			o = pdf_keep_obj(ctx, pdf_dict_get(ctx, node, PDF_NAME(Resources)));
1382 			if (!o)
1383 			{
1384 				o = pdf_keep_obj(ctx, pdf_new_dict(ctx, doc, 2));
1385 				pdf_dict_put(ctx, node, PDF_NAME(Resources), o);
1386 			}
1387 			lpr_inherit_res(ctx, node, depth, o);
1388 			r = lpr_inherit(ctx, node, "MediaBox", depth);
1389 			if (r)
1390 				pdf_dict_put(ctx, node, PDF_NAME(MediaBox), r);
1391 			r = lpr_inherit(ctx, node, "CropBox", depth);
1392 			if (r)
1393 				pdf_dict_put(ctx, node, PDF_NAME(CropBox), r);
1394 			r = lpr_inherit(ctx, node, "BleedBox", depth);
1395 			if (r)
1396 				pdf_dict_put(ctx, node, PDF_NAME(BleedBox), r);
1397 			r = lpr_inherit(ctx, node, "TrimBox", depth);
1398 			if (r)
1399 				pdf_dict_put(ctx, node, PDF_NAME(TrimBox), r);
1400 			r = lpr_inherit(ctx, node, "ArtBox", depth);
1401 			if (r)
1402 				pdf_dict_put(ctx, node, PDF_NAME(ArtBox), r);
1403 			r = lpr_inherit(ctx, node, "Rotate", depth);
1404 			if (r)
1405 				pdf_dict_put(ctx, node, PDF_NAME(Rotate), r);
1406 			page++;
1407 		}
1408 		else
1409 		{
1410 			kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
1411 			n = pdf_array_len(ctx, kids);
1412 			for(i = 0; i < n; i++)
1413 			{
1414 				page = lpr(ctx, doc, pdf_array_get(ctx, kids, i), depth+1, page);
1415 			}
1416 			pdf_dict_del(ctx, node, PDF_NAME(Resources));
1417 			pdf_dict_del(ctx, node, PDF_NAME(MediaBox));
1418 			pdf_dict_del(ctx, node, PDF_NAME(CropBox));
1419 			pdf_dict_del(ctx, node, PDF_NAME(BleedBox));
1420 			pdf_dict_del(ctx, node, PDF_NAME(TrimBox));
1421 			pdf_dict_del(ctx, node, PDF_NAME(ArtBox));
1422 			pdf_dict_del(ctx, node, PDF_NAME(Rotate));
1423 		}
1424 	}
1425 	fz_always(ctx)
1426 	{
1427 		pdf_drop_obj(ctx, o);
1428 	}
1429 	fz_catch(ctx)
1430 	{
1431 		fz_rethrow(ctx);
1432 	}
1433 
1434 	pdf_unmark_obj(ctx, node);
1435 
1436 	return page;
1437 }
1438 
1439 void
pdf_localise_page_resources(fz_context * ctx,pdf_document * doc)1440 pdf_localise_page_resources(fz_context *ctx, pdf_document *doc)
1441 {
1442 	if (doc->resources_localised)
1443 		return;
1444 
1445 	lpr(ctx, doc, pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Pages), NULL), 0, 0);
1446 
1447 	doc->resources_localised = 1;
1448 }
1449 
1450 static void
linearize(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)1451 linearize(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1452 {
1453 	int i;
1454 	int n = pdf_xref_len(ctx, doc) + 2;
1455 	int *reorder;
1456 	int *rev_renumber_map;
1457 
1458 	opts->page_object_lists = page_objects_list_create(ctx);
1459 
1460 	/* Ensure that every page has local references of its resources */
1461 	/* FIXME: We could 'thin' the resources according to what is actually
1462 	 * required for each page, but this would require us to run the page
1463 	 * content streams. */
1464 	pdf_localise_page_resources(ctx, doc);
1465 
1466 	/* Walk the objects for each page, marking which ones are used, where */
1467 	memset(opts->use_list, 0, n * sizeof(int));
1468 	mark_trailer(ctx, doc, opts, pdf_trailer(ctx, doc));
1469 
1470 	/* Add new objects required for linearization */
1471 	add_linearization_objs(ctx, doc, opts);
1472 
1473 #ifdef DEBUG_WRITING
1474 	fprintf(stderr, "Usage calculated:\n");
1475 	for (i=0; i < pdf_xref_len(ctx, doc); i++)
1476 	{
1477 		fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]);
1478 	}
1479 #endif
1480 
1481 	/* Allocate/init the structures used for renumbering the objects */
1482 	reorder = fz_calloc(ctx, n, sizeof(int));
1483 	rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
1484 	for (i = 0; i < n; i++)
1485 	{
1486 		reorder[i] = i;
1487 	}
1488 
1489 	/* Heap sort the reordering */
1490 	heap_sort(reorder+1, n-1, opts->use_list, &order_ge);
1491 
1492 #ifdef DEBUG_WRITING
1493 	fprintf(stderr, "Reordered:\n");
1494 	for (i=1; i < pdf_xref_len(ctx, doc); i++)
1495 	{
1496 		fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]);
1497 	}
1498 #endif
1499 
1500 	/* Find the split point */
1501 	for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++) {}
1502 	opts->start = i;
1503 
1504 	/* Roll the reordering into the renumber_map */
1505 	for (i = 0; i < n; i++)
1506 	{
1507 		opts->renumber_map[reorder[i]] = i;
1508 		rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
1509 	}
1510 	fz_free(ctx, opts->rev_renumber_map);
1511 	opts->rev_renumber_map = rev_renumber_map;
1512 	fz_free(ctx, reorder);
1513 
1514 	/* Apply the renumber_map */
1515 	page_objects_list_renumber(opts);
1516 	renumberobjs(ctx, doc, opts);
1517 
1518 	page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
1519 }
1520 
1521 static void
update_linearization_params(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)1522 update_linearization_params(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1523 {
1524 	int64_t offset;
1525 	pdf_set_int(ctx, opts->linear_l, opts->file_len);
1526 	/* Primary hint stream offset (of object, not stream!) */
1527 	pdf_set_int(ctx, opts->linear_h0, opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1528 	/* Primary hint stream length (of object, not stream!) */
1529 	offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1530 	pdf_set_int(ctx, opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1531 	/* Object number of first pages page object (the first object of page 0) */
1532 	pdf_set_int(ctx, opts->linear_o, opts->page_object_lists->page[0]->object[0]);
1533 	/* Offset of end of first page (first page is followed by primary
1534 	 * hint stream (object n-1) then remaining pages (object 1...). The
1535 	 * primary hint stream counts as part of the first pages data, I think.
1536 	 */
1537 	offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1538 	pdf_set_int(ctx, opts->linear_e, offset);
1539 	/* Number of pages in document */
1540 	pdf_set_int(ctx, opts->linear_n, opts->page_count);
1541 	/* Offset of first entry in main xref table */
1542 	pdf_set_int(ctx, opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
1543 	/* Offset of shared objects hint table in the primary hint stream */
1544 	pdf_set_int(ctx, opts->hints_s, opts->hints_shared_offset);
1545 	/* Primary hint stream length */
1546 	pdf_set_int(ctx, opts->hints_length, opts->hintstream_len);
1547 }
1548 
1549 /*
1550  * Make sure we have loaded objects from object streams.
1551  */
1552 
preloadobjstms(fz_context * ctx,pdf_document * doc)1553 static void preloadobjstms(fz_context *ctx, pdf_document *doc)
1554 {
1555 	pdf_obj *obj;
1556 	int num;
1557 
1558 	/* xref_len may change due to repair, so check it every iteration */
1559 	for (num = 0; num < pdf_xref_len(ctx, doc); num++)
1560 	{
1561 		if (pdf_get_xref_entry(ctx, doc, num)->type == 'o')
1562 		{
1563 			obj = pdf_load_object(ctx, doc, num);
1564 			pdf_drop_obj(ctx, obj);
1565 		}
1566 	}
1567 }
1568 
1569 /*
1570  * Save streams and objects to the output
1571  */
1572 
is_bitmap_stream(fz_context * ctx,pdf_obj * obj,size_t len,int * w,int * h)1573 static int is_bitmap_stream(fz_context *ctx, pdf_obj *obj, size_t len, int *w, int *h)
1574 {
1575 	pdf_obj *bpc;
1576 	pdf_obj *cs;
1577 	int stride;
1578 	if (pdf_dict_get(ctx, obj, PDF_NAME(Subtype)) != PDF_NAME(Image))
1579 		return 0;
1580 	*w = pdf_dict_get_int(ctx, obj, PDF_NAME(Width));
1581 	*h = pdf_dict_get_int(ctx, obj, PDF_NAME(Height));
1582 	stride = (*w + 7) >> 3;
1583 	if ((size_t)stride * (*h) != len)
1584 		return 0;
1585 	if (pdf_dict_get_bool(ctx, obj, PDF_NAME(ImageMask)))
1586 	{
1587 		return 1;
1588 	}
1589 	else
1590 	{
1591 		bpc = pdf_dict_get(ctx, obj, PDF_NAME(BitsPerComponent));
1592 		if (!pdf_is_int(ctx, bpc))
1593 			return 0;
1594 		if (pdf_to_int(ctx, bpc) != 1)
1595 			return 0;
1596 		cs = pdf_dict_get(ctx, obj, PDF_NAME(ColorSpace));
1597 		if (!pdf_name_eq(ctx, cs, PDF_NAME(DeviceGray)))
1598 			return 0;
1599 		return 1;
1600 	}
1601 }
1602 
isbinary(int c)1603 static inline int isbinary(int c)
1604 {
1605 	if (c == '\n' || c == '\r' || c == '\t')
1606 		return 0;
1607 	return c < 32 || c > 127;
1608 }
1609 
isbinarystream(fz_context * ctx,const unsigned char * data,size_t len)1610 static int isbinarystream(fz_context *ctx, const unsigned char *data, size_t len)
1611 {
1612 	size_t i;
1613 	for (i = 0; i < len; i++)
1614 		if (isbinary(data[i]))
1615 			return 1;
1616 	return 0;
1617 }
1618 
hexbuf(fz_context * ctx,const unsigned char * p,size_t n)1619 static fz_buffer *hexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1620 {
1621 	static const char hex[17] = "0123456789abcdef";
1622 	int x = 0;
1623 	size_t len = n * 2 + (n / 32) + 1;
1624 	unsigned char *data = Memento_label(fz_malloc(ctx, len), "hexbuf");
1625 	fz_buffer *buf = fz_new_buffer_from_data(ctx, data, len);
1626 
1627 	while (n--)
1628 	{
1629 		*data++ = hex[*p >> 4];
1630 		*data++ = hex[*p & 15];
1631 		if (++x == 32)
1632 		{
1633 			*data++ = '\n';
1634 			x = 0;
1635 		}
1636 		p++;
1637 	}
1638 
1639 	*data++ = '>';
1640 
1641 	return buf;
1642 }
1643 
addhexfilter(fz_context * ctx,pdf_document * doc,pdf_obj * dict)1644 static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1645 {
1646 	pdf_obj *f, *dp, *newf, *newdp;
1647 
1648 	newf = newdp = NULL;
1649 	f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1650 	dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1651 
1652 	fz_var(newf);
1653 	fz_var(newdp);
1654 
1655 	fz_try(ctx)
1656 	{
1657 		if (pdf_is_name(ctx, f))
1658 		{
1659 			newf = pdf_new_array(ctx, doc, 2);
1660 			pdf_array_push(ctx, newf, PDF_NAME(ASCIIHexDecode));
1661 			pdf_array_push(ctx, newf, f);
1662 			f = newf;
1663 			if (pdf_is_dict(ctx, dp))
1664 			{
1665 				newdp = pdf_new_array(ctx, doc, 2);
1666 				pdf_array_push(ctx, newdp, PDF_NULL);
1667 				pdf_array_push(ctx, newdp, dp);
1668 				dp = newdp;
1669 			}
1670 		}
1671 		else if (pdf_is_array(ctx, f))
1672 		{
1673 			pdf_array_insert(ctx, f, PDF_NAME(ASCIIHexDecode), 0);
1674 			if (pdf_is_array(ctx, dp))
1675 				pdf_array_insert(ctx, dp, PDF_NULL, 0);
1676 		}
1677 		else
1678 			f = PDF_NAME(ASCIIHexDecode);
1679 
1680 		pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1681 		if (dp)
1682 			pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1683 	}
1684 	fz_always(ctx)
1685 	{
1686 		pdf_drop_obj(ctx, newf);
1687 		pdf_drop_obj(ctx, newdp);
1688 	}
1689 	fz_catch(ctx)
1690 		fz_rethrow(ctx);
1691 }
1692 
deflatebuf(fz_context * ctx,const unsigned char * p,size_t n)1693 static fz_buffer *deflatebuf(fz_context *ctx, const unsigned char *p, size_t n)
1694 {
1695 	fz_buffer *buf;
1696 	uLongf csize;
1697 	int t;
1698 	uLong longN = (uLong)n;
1699 	unsigned char *data;
1700 	size_t cap;
1701 
1702 	if (n != (size_t)longN)
1703 		fz_throw(ctx, FZ_ERROR_GENERIC, "Buffer too large to deflate");
1704 
1705 	cap = compressBound(longN);
1706 	data = Memento_label(fz_malloc(ctx, cap), "pdf_write_deflate");
1707 	buf = fz_new_buffer_from_data(ctx, data, cap);
1708 	csize = (uLongf)cap;
1709 	t = compress(data, &csize, p, longN);
1710 	if (t != Z_OK)
1711 	{
1712 		fz_drop_buffer(ctx, buf);
1713 		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot deflate buffer");
1714 	}
1715 	fz_resize_buffer(ctx, buf, csize);
1716 	return buf;
1717 }
1718 
striphexfilter(fz_context * ctx,pdf_document * doc,pdf_obj * dict)1719 static int striphexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1720 {
1721 	pdf_obj *f, *dp;
1722 	int is_hex = 0;
1723 
1724 	f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1725 	dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1726 
1727 	if (pdf_is_array(ctx, f))
1728 	{
1729 		/* Remove ASCIIHexDecode from head of filter list */
1730 		if (pdf_array_get(ctx, f, 0) == PDF_NAME(ASCIIHexDecode))
1731 		{
1732 			is_hex = 1;
1733 			pdf_array_delete(ctx, f, 0);
1734 			if (pdf_is_array(ctx, dp))
1735 				pdf_array_delete(ctx, dp, 0);
1736 		}
1737 		/* Unpack array if only one filter remains */
1738 		if (pdf_array_len(ctx, f) == 1)
1739 		{
1740 			f = pdf_array_get(ctx, f, 0);
1741 			pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1742 			if (dp)
1743 			{
1744 				dp = pdf_array_get(ctx, dp, 0);
1745 				pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1746 			}
1747 		}
1748 		/* Remove array if no filters remain */
1749 		else if (pdf_array_len(ctx, f) == 0)
1750 		{
1751 			pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1752 			pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1753 		}
1754 	}
1755 	else if (f == PDF_NAME(ASCIIHexDecode))
1756 	{
1757 		is_hex = 1;
1758 		pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1759 		pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1760 	}
1761 
1762 	return is_hex;
1763 }
1764 
unhexbuf(fz_context * ctx,const unsigned char * p,size_t n)1765 static fz_buffer *unhexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1766 {
1767 	fz_stream *mstm = NULL;
1768 	fz_stream *xstm = NULL;
1769 	fz_buffer *out = NULL;
1770 	fz_var(mstm);
1771 	fz_var(xstm);
1772 	fz_try(ctx)
1773 	{
1774 		mstm = fz_open_memory(ctx, p, n);
1775 		xstm = fz_open_ahxd(ctx, mstm);
1776 		out = fz_read_all(ctx, xstm, n/2);
1777 	}
1778 	fz_always(ctx)
1779 	{
1780 		fz_drop_stream(ctx, xstm);
1781 		fz_drop_stream(ctx, mstm);
1782 	}
1783 	fz_catch(ctx)
1784 		fz_rethrow(ctx);
1785 	return out;
1786 }
1787 
write_data(fz_context * ctx,void * arg,const unsigned char * data,size_t len)1788 static void write_data(fz_context *ctx, void *arg, const unsigned char *data, size_t len)
1789 {
1790 	fz_write_data(ctx, (fz_output *)arg, data, len);
1791 }
1792 
copystream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj_orig,int num,int gen,int do_deflate,int unenc)1793 static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1794 {
1795 	fz_buffer *tmp_unhex = NULL, *tmp_comp = NULL, *tmp_hex = NULL, *buf = NULL;
1796 	pdf_obj *obj = NULL;
1797 	pdf_obj *dp;
1798 	size_t len;
1799 	unsigned char *data;
1800 	int w, h;
1801 
1802 	fz_var(buf);
1803 	fz_var(tmp_comp);
1804 	fz_var(tmp_hex);
1805 	fz_var(obj);
1806 
1807 	fz_try(ctx)
1808 	{
1809 		buf = pdf_load_raw_stream_number(ctx, doc, num);
1810 		obj = pdf_copy_dict(ctx, obj_orig);
1811 
1812 		len = fz_buffer_storage(ctx, buf, &data);
1813 
1814 		if (do_deflate && striphexfilter(ctx, doc, obj))
1815 		{
1816 			tmp_unhex = unhexbuf(ctx, data, len);
1817 			len = fz_buffer_storage(ctx, tmp_unhex, &data);
1818 		}
1819 
1820 		if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME(Filter)))
1821 		{
1822 			if (is_bitmap_stream(ctx, obj, len, &w, &h))
1823 			{
1824 				tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h);
1825 				pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
1826 				dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
1827 				pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
1828 				pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
1829 			}
1830 			else
1831 			{
1832 				tmp_comp = deflatebuf(ctx, data, len);
1833 				pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1834 			}
1835 			len = fz_buffer_storage(ctx, tmp_comp, &data);
1836 		}
1837 
1838 		if (opts->do_ascii && isbinarystream(ctx, data, len))
1839 		{
1840 			tmp_hex = hexbuf(ctx, data, len);
1841 			len = fz_buffer_storage(ctx, tmp_hex, &data);
1842 			addhexfilter(ctx, doc, obj);
1843 		}
1844 
1845 		fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1846 
1847 		if (unenc)
1848 		{
1849 			pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1850 			pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1851 			fz_write_string(ctx, opts->out, "\nstream\n");
1852 			fz_write_data(ctx, opts->out, data, len);
1853 		}
1854 		else
1855 		{
1856 			pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, len));
1857 			pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1858 			fz_write_string(ctx, opts->out, "\nstream\n");
1859 			pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1860 		}
1861 
1862 		fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1863 	}
1864 	fz_always(ctx)
1865 	{
1866 		fz_drop_buffer(ctx, tmp_unhex);
1867 		fz_drop_buffer(ctx, tmp_hex);
1868 		fz_drop_buffer(ctx, tmp_comp);
1869 		fz_drop_buffer(ctx, buf);
1870 		pdf_drop_obj(ctx, obj);
1871 	}
1872 	fz_catch(ctx)
1873 	{
1874 		fz_rethrow(ctx);
1875 	}
1876 }
1877 
expandstream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * obj_orig,int num,int gen,int do_deflate,int unenc)1878 static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1879 {
1880 	fz_buffer *buf = NULL, *tmp_comp = NULL, *tmp_hex = NULL;
1881 	pdf_obj *obj = NULL;
1882 	pdf_obj *dp;
1883 	size_t len;
1884 	unsigned char *data;
1885 	int w, h;
1886 
1887 	fz_var(buf);
1888 	fz_var(tmp_comp);
1889 	fz_var(tmp_hex);
1890 	fz_var(obj);
1891 
1892 	fz_try(ctx)
1893 	{
1894 		buf = pdf_load_stream_number(ctx, doc, num);
1895 		obj = pdf_copy_dict(ctx, obj_orig);
1896 		pdf_dict_del(ctx, obj, PDF_NAME(Filter));
1897 		pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
1898 
1899 		len = fz_buffer_storage(ctx, buf, &data);
1900 		if (do_deflate)
1901 		{
1902 			if (is_bitmap_stream(ctx, obj, len, &w, &h))
1903 			{
1904 				tmp_comp = fz_compress_ccitt_fax_g4(ctx, data, w, h);
1905 				pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(CCITTFaxDecode));
1906 				dp = pdf_dict_put_dict(ctx, obj, PDF_NAME(DecodeParms), 1);
1907 				pdf_dict_put_int(ctx, dp, PDF_NAME(K), -1);
1908 				pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), w);
1909 			}
1910 			else
1911 			{
1912 				tmp_comp = deflatebuf(ctx, data, len);
1913 				pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1914 			}
1915 			len = fz_buffer_storage(ctx, tmp_comp, &data);
1916 		}
1917 
1918 		if (opts->do_ascii && isbinarystream(ctx, data, len))
1919 		{
1920 			tmp_hex = hexbuf(ctx, data, len);
1921 			len = fz_buffer_storage(ctx, tmp_hex, &data);
1922 			addhexfilter(ctx, doc, obj);
1923 		}
1924 
1925 		fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1926 
1927 		if (unenc)
1928 		{
1929 			pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1930 			pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1931 			fz_write_string(ctx, opts->out, "\nstream\n");
1932 			fz_write_data(ctx, opts->out, data, len);
1933 		}
1934 		else
1935 		{
1936 			pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
1937 			pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1938 			fz_write_string(ctx, opts->out, "\nstream\n");
1939 			pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1940 		}
1941 
1942 		fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1943 	}
1944 	fz_always(ctx)
1945 	{
1946 		fz_drop_buffer(ctx, tmp_hex);
1947 		fz_drop_buffer(ctx, tmp_comp);
1948 		fz_drop_buffer(ctx, buf);
1949 		pdf_drop_obj(ctx, obj);
1950 	}
1951 	fz_catch(ctx)
1952 	{
1953 		fz_rethrow(ctx);
1954 	}
1955 }
1956 
is_image_filter(pdf_obj * s)1957 static int is_image_filter(pdf_obj *s)
1958 {
1959 	return
1960 		s == PDF_NAME(CCITTFaxDecode) || s == PDF_NAME(CCF) ||
1961 		s == PDF_NAME(DCTDecode) || s == PDF_NAME(DCT) ||
1962 		s == PDF_NAME(RunLengthDecode) || s == PDF_NAME(RL) ||
1963 		s == PDF_NAME(JBIG2Decode) ||
1964 		s == PDF_NAME(JPXDecode);
1965 }
1966 
filter_implies_image(fz_context * ctx,pdf_obj * o)1967 static int filter_implies_image(fz_context *ctx, pdf_obj *o)
1968 {
1969 	if (pdf_is_name(ctx, o))
1970 		return is_image_filter(o);
1971 	if (pdf_is_array(ctx, o))
1972 	{
1973 		int i, len;
1974 		len = pdf_array_len(ctx, o);
1975 		for (i = 0; i < len; i++)
1976 			if (is_image_filter(pdf_array_get(ctx, o, i)))
1977 				return 1;
1978 	}
1979 	return 0;
1980 }
1981 
is_jpx_filter(fz_context * ctx,pdf_obj * o)1982 static int is_jpx_filter(fz_context *ctx, pdf_obj *o)
1983 {
1984 	if (o == PDF_NAME(JPXDecode))
1985 		return 1;
1986 	if (pdf_is_array(ctx, o))
1987 	{
1988 		int i, len;
1989 		len = pdf_array_len(ctx, o);
1990 		for (i = 0; i < len; i++)
1991 			if (pdf_array_get(ctx, o, i) == PDF_NAME(JPXDecode))
1992 				return 1;
1993 	}
1994 	return 0;
1995 }
1996 
is_image_stream(fz_context * ctx,pdf_obj * obj)1997 static int is_image_stream(fz_context *ctx, pdf_obj *obj)
1998 {
1999 	pdf_obj *o;
2000 	if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(XObject))))
2001 		if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Image))))
2002 			return 1;
2003 	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), filter_implies_image(ctx, o))
2004 		return 1;
2005 	if (pdf_dict_get(ctx, obj, PDF_NAME(Width)) != NULL && pdf_dict_get(ctx, obj, PDF_NAME(Height)) != NULL)
2006 		return 1;
2007 	return 0;
2008 }
2009 
is_font_stream(fz_context * ctx,pdf_obj * obj)2010 static int is_font_stream(fz_context *ctx, pdf_obj *obj)
2011 {
2012 	pdf_obj *o;
2013 	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(Font)))
2014 		return 1;
2015 	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(FontDescriptor)))
2016 		return 1;
2017 	if (pdf_dict_get(ctx, obj, PDF_NAME(Length1)) != NULL)
2018 		return 1;
2019 	if (pdf_dict_get(ctx, obj, PDF_NAME(Length2)) != NULL)
2020 		return 1;
2021 	if (pdf_dict_get(ctx, obj, PDF_NAME(Length3)) != NULL)
2022 		return 1;
2023 	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Type1C)))
2024 		return 1;
2025 	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(CIDFontType0C)))
2026 		return 1;
2027 	return 0;
2028 }
2029 
is_jpx_stream(fz_context * ctx,pdf_obj * obj)2030 static int is_jpx_stream(fz_context *ctx, pdf_obj *obj)
2031 {
2032 	pdf_obj *o;
2033 	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), is_jpx_filter(ctx, o))
2034 		return 1;
2035 	return 0;
2036 }
2037 
2038 
is_xml_metadata(fz_context * ctx,pdf_obj * obj)2039 static int is_xml_metadata(fz_context *ctx, pdf_obj *obj)
2040 {
2041 	if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Type)), PDF_NAME(Metadata)))
2042 		if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), PDF_NAME(XML)))
2043 			return 1;
2044 	return 0;
2045 }
2046 
writeobject(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int num,int gen,int skip_xrefs,int unenc)2047 static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs, int unenc)
2048 {
2049 	pdf_obj *obj = NULL;
2050 	fz_buffer *buf = NULL;
2051 	int do_deflate = 0;
2052 	int do_expand = 0;
2053 	int skip = 0;
2054 
2055 	fz_var(obj);
2056 	fz_var(buf);
2057 
2058 	if (opts->do_encrypt == PDF_ENCRYPT_NONE)
2059 		unenc = 1;
2060 
2061 	fz_try(ctx)
2062 	{
2063 		obj = pdf_load_object(ctx, doc, num);
2064 
2065 		/* skip ObjStm and XRef objects */
2066 		if (pdf_is_dict(ctx, obj))
2067 		{
2068 			pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type));
2069 			if (type == PDF_NAME(ObjStm))
2070 			{
2071 				opts->use_list[num] = 0;
2072 				skip = 1;
2073 			}
2074 			if (skip_xrefs && type == PDF_NAME(XRef))
2075 			{
2076 				opts->use_list[num] = 0;
2077 				skip = 1;
2078 			}
2079 		}
2080 
2081 		if (!skip)
2082 		{
2083 			if (pdf_obj_num_is_stream(ctx, doc, num))
2084 			{
2085 				do_deflate = opts->do_compress;
2086 				do_expand = opts->do_expand;
2087 				if (opts->do_compress_images && is_image_stream(ctx, obj))
2088 					do_deflate = 1, do_expand = 0;
2089 				if (opts->do_compress_fonts && is_font_stream(ctx, obj))
2090 					do_deflate = 1, do_expand = 0;
2091 				if (is_xml_metadata(ctx, obj))
2092 					do_deflate = 0, do_expand = 0;
2093 				if (is_jpx_stream(ctx, obj))
2094 					do_deflate = 0, do_expand = 0;
2095 
2096 				if (do_expand)
2097 					expandstream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2098 				else
2099 					copystream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2100 			}
2101 			else
2102 			{
2103 				fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
2104 				pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, unenc ? NULL : opts->crypt, num, gen);
2105 				fz_write_string(ctx, opts->out, "\nendobj\n\n");
2106 			}
2107 		}
2108 	}
2109 	fz_always(ctx)
2110 	{
2111 		fz_drop_buffer(ctx, buf);
2112 		pdf_drop_obj(ctx, obj);
2113 	}
2114 	fz_catch(ctx)
2115 	{
2116 		fz_rethrow(ctx);
2117 	}
2118 }
2119 
writexrefsubsect(fz_context * ctx,pdf_write_state * opts,int from,int to)2120 static void writexrefsubsect(fz_context *ctx, pdf_write_state *opts, int from, int to)
2121 {
2122 	int num;
2123 
2124 	fz_write_printf(ctx, opts->out, "%d %d\n", from, to - from);
2125 	for (num = from; num < to; num++)
2126 	{
2127 		if (opts->use_list[num])
2128 			fz_write_printf(ctx, opts->out, "%010lu %05d n \n", opts->ofs_list[num], opts->gen_list[num]);
2129 		else
2130 			fz_write_printf(ctx, opts->out, "%010lu %05d f \n", opts->ofs_list[num], opts->gen_list[num]);
2131 	}
2132 }
2133 
writexref(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int from,int to,int first,int64_t main_xref_offset,int64_t startxref)2134 static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2135 {
2136 	pdf_obj *trailer = NULL;
2137 	pdf_obj *obj;
2138 	pdf_obj *nobj = NULL;
2139 
2140 	fz_write_string(ctx, opts->out, "xref\n");
2141 	opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2142 
2143 	if (opts->do_incremental)
2144 	{
2145 		int subfrom = from;
2146 		int subto;
2147 
2148 		while (subfrom < to)
2149 		{
2150 			while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2151 				subfrom++;
2152 
2153 			subto = subfrom;
2154 			while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2155 				subto++;
2156 
2157 			if (subfrom < subto)
2158 				writexrefsubsect(ctx, opts, subfrom, subto);
2159 
2160 			subfrom = subto;
2161 		}
2162 	}
2163 	else
2164 	{
2165 		writexrefsubsect(ctx, opts, from, to);
2166 	}
2167 
2168 	fz_write_string(ctx, opts->out, "\n");
2169 
2170 	fz_var(trailer);
2171 
2172 	if (opts->do_incremental)
2173 	{
2174 		trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
2175 		pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), pdf_xref_len(ctx, doc));
2176 		pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), doc->startxref);
2177 		doc->startxref = startxref;
2178 	}
2179 	else
2180 	{
2181 		trailer = pdf_new_dict(ctx, doc, 5);
2182 
2183 		nobj = pdf_new_int(ctx, to);
2184 		pdf_dict_put_drop(ctx, trailer, PDF_NAME(Size), nobj);
2185 
2186 		if (first)
2187 		{
2188 			obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2189 			if (obj)
2190 				pdf_dict_put(ctx, trailer, PDF_NAME(Info), obj);
2191 
2192 			obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2193 			if (obj)
2194 				pdf_dict_put(ctx, trailer, PDF_NAME(Root), obj);
2195 
2196 			obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2197 			if (obj)
2198 				pdf_dict_put(ctx, trailer, PDF_NAME(ID), obj);
2199 
2200 			obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2201 			if (obj)
2202 				pdf_dict_put(ctx, trailer, PDF_NAME(Encrypt), obj);
2203 		}
2204 		if (main_xref_offset != 0)
2205 		{
2206 			nobj = pdf_new_int(ctx, main_xref_offset);
2207 			pdf_dict_put_drop(ctx, trailer, PDF_NAME(Prev), nobj);
2208 		}
2209 	}
2210 
2211 	fz_write_string(ctx, opts->out, "trailer\n");
2212 	/* Trailer is NOT encrypted */
2213 	pdf_print_obj(ctx, opts->out, trailer, opts->do_tight, opts->do_ascii);
2214 	fz_write_string(ctx, opts->out, "\n");
2215 
2216 	pdf_drop_obj(ctx, trailer);
2217 
2218 	fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2219 
2220 	doc->has_xref_streams = 0;
2221 }
2222 
writexrefstreamsubsect(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_obj * index,fz_buffer * fzbuf,int from,int to)2223 static void writexrefstreamsubsect(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *index, fz_buffer *fzbuf, int from, int to)
2224 {
2225 	int num;
2226 
2227 	pdf_array_push_int(ctx, index, from);
2228 	pdf_array_push_int(ctx, index, to - from);
2229 	for (num = from; num < to; num++)
2230 	{
2231 		fz_append_byte(ctx, fzbuf, opts->use_list[num] ? 1 : 0);
2232 		fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>24);
2233 		fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>16);
2234 		fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>8);
2235 		fz_append_byte(ctx, fzbuf, opts->ofs_list[num]);
2236 		fz_append_byte(ctx, fzbuf, opts->gen_list[num]);
2237 	}
2238 }
2239 
writexrefstream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int from,int to,int first,int64_t main_xref_offset,int64_t startxref)2240 static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2241 {
2242 	int num;
2243 	pdf_obj *dict = NULL;
2244 	pdf_obj *obj;
2245 	pdf_obj *w = NULL;
2246 	pdf_obj *index;
2247 	fz_buffer *fzbuf = NULL;
2248 
2249 	fz_var(dict);
2250 	fz_var(w);
2251 	fz_var(fzbuf);
2252 	fz_try(ctx)
2253 	{
2254 		num = pdf_create_object(ctx, doc);
2255 		dict = pdf_new_dict(ctx, doc, 6);
2256 		pdf_update_object(ctx, doc, num, dict);
2257 
2258 		opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2259 
2260 		to++;
2261 
2262 		if (first)
2263 		{
2264 			obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2265 			if (obj)
2266 				pdf_dict_put(ctx, dict, PDF_NAME(Info), obj);
2267 
2268 			obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2269 			if (obj)
2270 				pdf_dict_put(ctx, dict, PDF_NAME(Root), obj);
2271 
2272 			obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2273 			if (obj)
2274 				pdf_dict_put(ctx, dict, PDF_NAME(ID), obj);
2275 
2276 			if (opts->do_incremental)
2277 			{
2278 				obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2279 				if (obj)
2280 					pdf_dict_put(ctx, dict, PDF_NAME(Encrypt), obj);
2281 			}
2282 		}
2283 
2284 		pdf_dict_put_int(ctx, dict, PDF_NAME(Size), to);
2285 
2286 		if (opts->do_incremental)
2287 		{
2288 			pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), doc->startxref);
2289 			doc->startxref = startxref;
2290 		}
2291 		else
2292 		{
2293 			if (main_xref_offset != 0)
2294 				pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), main_xref_offset);
2295 		}
2296 
2297 		pdf_dict_put(ctx, dict, PDF_NAME(Type), PDF_NAME(XRef));
2298 
2299 		w = pdf_new_array(ctx, doc, 3);
2300 		pdf_dict_put(ctx, dict, PDF_NAME(W), w);
2301 		pdf_array_push_int(ctx, w, 1);
2302 		pdf_array_push_int(ctx, w, 4);
2303 		pdf_array_push_int(ctx, w, 1);
2304 
2305 		index = pdf_new_array(ctx, doc, 2);
2306 		pdf_dict_put_drop(ctx, dict, PDF_NAME(Index), index);
2307 
2308 		/* opts->gen_list[num] is already initialized by fz_calloc. */
2309 		opts->use_list[num] = 1;
2310 		opts->ofs_list[num] = opts->first_xref_entry_offset;
2311 
2312 		fzbuf = fz_new_buffer(ctx, (1 + 4 + 1) * (to-from));
2313 
2314 		if (opts->do_incremental)
2315 		{
2316 			int subfrom = from;
2317 			int subto;
2318 
2319 			while (subfrom < to)
2320 			{
2321 				while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2322 					subfrom++;
2323 
2324 				subto = subfrom;
2325 				while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2326 					subto++;
2327 
2328 				if (subfrom < subto)
2329 					writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
2330 
2331 				subfrom = subto;
2332 			}
2333 		}
2334 		else
2335 		{
2336 			writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
2337 		}
2338 
2339 		pdf_update_stream(ctx, doc, dict, fzbuf, 0);
2340 
2341 		writeobject(ctx, doc, opts, num, 0, 0, 1);
2342 		fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2343 	}
2344 	fz_always(ctx)
2345 	{
2346 		pdf_drop_obj(ctx, dict);
2347 		pdf_drop_obj(ctx, w);
2348 		fz_drop_buffer(ctx, fzbuf);
2349 	}
2350 	fz_catch(ctx)
2351 	{
2352 		fz_rethrow(ctx);
2353 	}
2354 
2355 	doc->has_old_style_xrefs = 0;
2356 }
2357 
2358 static void
padto(fz_context * ctx,fz_output * out,int64_t target)2359 padto(fz_context *ctx, fz_output *out, int64_t target)
2360 {
2361 	int64_t pos = fz_tell_output(ctx, out);
2362 
2363 	assert(pos <= target);
2364 	while (pos < target)
2365 	{
2366 		fz_write_byte(ctx, out, '\n');
2367 		pos++;
2368 	}
2369 }
2370 
2371 static void
dowriteobject(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int num,int pass)2372 dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int pass)
2373 {
2374 	pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
2375 	if (entry->type == 'f')
2376 		opts->gen_list[num] = entry->gen;
2377 	if (entry->type == 'n')
2378 		opts->gen_list[num] = entry->gen;
2379 	if (entry->type == 'o')
2380 		opts->gen_list[num] = 0;
2381 
2382 	/* If we are renumbering, then make sure all generation numbers are
2383 	 * zero (except object 0 which must be free, and have a gen number of
2384 	 * 65535). Changing the generation numbers (and indeed object numbers)
2385 	 * will break encryption - so only do this if we are renumbering
2386 	 * anyway. */
2387 	if (opts->do_garbage >= 2)
2388 		opts->gen_list[num] = (num == 0 ? 65535 : 0);
2389 
2390 	if (opts->do_garbage && !opts->use_list[num])
2391 		return;
2392 
2393 	if (entry->type == 'n' || entry->type == 'o')
2394 	{
2395 		if (pass > 0)
2396 			padto(ctx, opts->out, opts->ofs_list[num]);
2397 		if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
2398 		{
2399 			opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
2400 			writeobject(ctx, doc, opts, num, opts->gen_list[num], 1, num == opts->crypt_object_number);
2401 		}
2402 	}
2403 	else
2404 		opts->use_list[num] = 0;
2405 }
2406 
2407 static void
writeobjects(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,int pass)2408 writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int pass)
2409 {
2410 	int num;
2411 	int xref_len = pdf_xref_len(ctx, doc);
2412 
2413 	if (!opts->do_incremental)
2414 	{
2415 		int version = pdf_version(ctx, doc);
2416 		fz_write_printf(ctx, opts->out, "%%PDF-%d.%d\n", version / 10, version % 10);
2417 		fz_write_string(ctx, opts->out, "%\xC2\xB5\xC2\xB6\n\n");
2418 	}
2419 
2420 	dowriteobject(ctx, doc, opts, opts->start, pass);
2421 
2422 	if (opts->do_linear)
2423 	{
2424 		/* Write first xref */
2425 		if (pass == 0)
2426 			opts->first_xref_offset = fz_tell_output(ctx, opts->out);
2427 		else
2428 			padto(ctx, opts->out, opts->first_xref_offset);
2429 		writexref(ctx, doc, opts, opts->start, pdf_xref_len(ctx, doc), 1, opts->main_xref_offset, 0);
2430 	}
2431 
2432 	for (num = opts->start+1; num < xref_len; num++)
2433 		dowriteobject(ctx, doc, opts, num, pass);
2434 	if (opts->do_linear && pass == 1)
2435 	{
2436 		int64_t offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
2437 		padto(ctx, opts->out, offset);
2438 	}
2439 	for (num = 1; num < opts->start; num++)
2440 	{
2441 		if (pass == 1)
2442 			opts->ofs_list[num] += opts->hintstream_len;
2443 		dowriteobject(ctx, doc, opts, num, pass);
2444 	}
2445 }
2446 
2447 static int
my_log2(int x)2448 my_log2(int x)
2449 {
2450 	int i = 0;
2451 	const int sign_bit = sizeof(int)*8-1;
2452 
2453 	if (x <= 0)
2454 		return 0;
2455 
2456 	while ((1<<i) <= x && i < sign_bit)
2457 		i++;
2458 
2459 	if (i >= sign_bit)
2460 		return 0;
2461 
2462 	return i;
2463 }
2464 
2465 static void
make_page_offset_hints(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,fz_buffer * buf)2466 make_page_offset_hints(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, fz_buffer *buf)
2467 {
2468 	int i, j;
2469 	int min_objs_per_page, max_objs_per_page;
2470 	int min_page_length, max_page_length;
2471 	int objs_per_page_bits;
2472 	int min_shared_object, max_shared_object;
2473 	int max_shared_object_refs = 0;
2474 	int min_shared_length, max_shared_length;
2475 	page_objects **pop = &opts->page_object_lists->page[0];
2476 	int page_len_bits, shared_object_bits, shared_object_id_bits;
2477 	int shared_length_bits;
2478 	int xref_len = pdf_xref_len(ctx, doc);
2479 
2480 	min_shared_object = pdf_xref_len(ctx, doc);
2481 	max_shared_object = 1;
2482 	min_shared_length = opts->file_len;
2483 	max_shared_length = 0;
2484 	for (i=1; i < xref_len; i++)
2485 	{
2486 		int min, max, page;
2487 
2488 		min = opts->ofs_list[i];
2489 		if (i == opts->start-1 || (opts->start == 1 && i == xref_len-1))
2490 			max = opts->main_xref_offset;
2491 		else if (i == xref_len-1)
2492 			max = opts->ofs_list[1];
2493 		else
2494 			max = opts->ofs_list[i+1];
2495 
2496 		assert(max > min);
2497 
2498 		if (opts->use_list[i] & USE_SHARED)
2499 		{
2500 			page = -1;
2501 			if (i < min_shared_object)
2502 				min_shared_object = i;
2503 			if (i > max_shared_object)
2504 				max_shared_object = i;
2505 			if (min_shared_length > max - min)
2506 				min_shared_length = max - min;
2507 			if (max_shared_length < max - min)
2508 				max_shared_length = max - min;
2509 		}
2510 		else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS))
2511 			page = -1;
2512 		else if (opts->use_list[i] & USE_PAGE1)
2513 		{
2514 			page = 0;
2515 			if (min_shared_length > max - min)
2516 				min_shared_length = max - min;
2517 			if (max_shared_length < max - min)
2518 				max_shared_length = max - min;
2519 		}
2520 		else if (opts->use_list[i] == 0)
2521 			page = -1;
2522 		else
2523 			page = opts->use_list[i]>>USE_PAGE_SHIFT;
2524 
2525 		if (page >= 0)
2526 		{
2527 			pop[page]->num_objects++;
2528 			if (pop[page]->min_ofs > min)
2529 				pop[page]->min_ofs = min;
2530 			if (pop[page]->max_ofs < max)
2531 				pop[page]->max_ofs = max;
2532 		}
2533 	}
2534 
2535 	min_objs_per_page = max_objs_per_page = pop[0]->num_objects;
2536 	min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs;
2537 	for (i=1; i < opts->page_count; i++)
2538 	{
2539 		int tmp;
2540 		if (min_objs_per_page > pop[i]->num_objects)
2541 			min_objs_per_page = pop[i]->num_objects;
2542 		if (max_objs_per_page < pop[i]->num_objects)
2543 			max_objs_per_page = pop[i]->num_objects;
2544 		tmp = pop[i]->max_ofs - pop[i]->min_ofs;
2545 		if (tmp < min_page_length)
2546 			min_page_length = tmp;
2547 		if (tmp > max_page_length)
2548 			max_page_length = tmp;
2549 	}
2550 
2551 	for (i=0; i < opts->page_count; i++)
2552 	{
2553 		int count = 0;
2554 		page_objects *po = opts->page_object_lists->page[i];
2555 		for (j = 0; j < po->len; j++)
2556 		{
2557 			if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1)
2558 				count++;
2559 			else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED)
2560 				count++;
2561 		}
2562 		po->num_shared = count;
2563 		if (i == 0 || count > max_shared_object_refs)
2564 			max_shared_object_refs = count;
2565 	}
2566 	if (min_shared_object > max_shared_object)
2567 		min_shared_object = max_shared_object = 0;
2568 
2569 	/* Table F.3 - Header */
2570 	/* Header Item 1: Least number of objects in a page */
2571 	fz_append_bits(ctx, buf, min_objs_per_page, 32);
2572 	/* Header Item 2: Location of first pages page object */
2573 	fz_append_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32);
2574 	/* Header Item 3: Number of bits required to represent the difference
2575 	 * between the greatest and least number of objects in a page. */
2576 	objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
2577 	fz_append_bits(ctx, buf, objs_per_page_bits, 16);
2578 	/* Header Item 4: Least length of a page. */
2579 	fz_append_bits(ctx, buf, min_page_length, 32);
2580 	/* Header Item 5: Number of bits needed to represent the difference
2581 	 * between the greatest and least length of a page. */
2582 	page_len_bits = my_log2(max_page_length - min_page_length);
2583 	fz_append_bits(ctx, buf, page_len_bits, 16);
2584 	/* Header Item 6: Least offset to start of content stream (Acrobat
2585 	 * sets this to always be 0) */
2586 	fz_append_bits(ctx, buf, 0, 32);
2587 	/* Header Item 7: Number of bits needed to represent the difference
2588 	 * between the greatest and least offset to content stream (Acrobat
2589 	 * sets this to always be 0) */
2590 	fz_append_bits(ctx, buf, 0, 16);
2591 	/* Header Item 8: Least content stream length. (Acrobat
2592 	 * sets this to always be 0) */
2593 	fz_append_bits(ctx, buf, 0, 32);
2594 	/* Header Item 9: Number of bits needed to represent the difference
2595 	 * between the greatest and least content stream length (Acrobat
2596 	 * sets this to always be the same as item 5) */
2597 	fz_append_bits(ctx, buf, page_len_bits, 16);
2598 	/* Header Item 10: Number of bits needed to represent the greatest
2599 	 * number of shared object references. */
2600 	shared_object_bits = my_log2(max_shared_object_refs);
2601 	fz_append_bits(ctx, buf, shared_object_bits, 16);
2602 	/* Header Item 11: Number of bits needed to represent the greatest
2603 	 * shared object identifier. */
2604 	shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared);
2605 	fz_append_bits(ctx, buf, shared_object_id_bits, 16);
2606 	/* Header Item 12: Number of bits needed to represent the numerator
2607 	 * of the fractions. We always send 0. */
2608 	fz_append_bits(ctx, buf, 0, 16);
2609 	/* Header Item 13: Number of bits needed to represent the denominator
2610 	 * of the fractions. We always send 0. */
2611 	fz_append_bits(ctx, buf, 0, 16);
2612 
2613 	/* Table F.4 - Page offset hint table (per page) */
2614 	/* Item 1: A number that, when added to the least number of objects
2615 	 * on a page, gives the number of objects in the page. */
2616 	for (i = 0; i < opts->page_count; i++)
2617 	{
2618 		fz_append_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
2619 	}
2620 	fz_append_bits_pad(ctx, buf);
2621 	/* Item 2: A number that, when added to the least page length, gives
2622 	 * the length of the page in bytes. */
2623 	for (i = 0; i < opts->page_count; i++)
2624 	{
2625 		fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2626 	}
2627 	fz_append_bits_pad(ctx, buf);
2628 	/* Item 3: The number of shared objects referenced from the page. */
2629 	for (i = 0; i < opts->page_count; i++)
2630 	{
2631 		fz_append_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
2632 	}
2633 	fz_append_bits_pad(ctx, buf);
2634 	/* Item 4: Shared object id for each shared object ref in every page.
2635 	 * Spec says "not for page 1", but acrobat does send page 1's - all
2636 	 * as zeros. */
2637 	for (i = 0; i < opts->page_count; i++)
2638 	{
2639 		for (j = 0; j < pop[i]->len; j++)
2640 		{
2641 			int o = pop[i]->object[j];
2642 			if (i == 0 && opts->use_list[o] & USE_PAGE1)
2643 				fz_append_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits);
2644 			if (i != 0 && opts->use_list[o] & USE_SHARED)
2645 				fz_append_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits);
2646 		}
2647 	}
2648 	fz_append_bits_pad(ctx, buf);
2649 	/* Item 5: Numerator of fractional position for each shared object reference. */
2650 	/* We always send 0 in 0 bits */
2651 	/* Item 6: A number that, when added to the least offset to the start
2652 	 * of the content stream (F.3 Item 6), gives the offset in bytes of
2653 	 * start of the pages content stream object relative to the beginning
2654 	 * of the page. Always 0 in 0 bits. */
2655 	/* Item 7: A number that, when added to the least content stream length
2656 	 * (F.3 Item 8), gives the length of the pages content stream object.
2657 	 * Always == Item 2 as least content stream length = least page stream
2658 	 * length.
2659 	 */
2660 	for (i = 0; i < opts->page_count; i++)
2661 	{
2662 		fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2663 	}
2664 
2665 	/* Pad, and then do shared object hint table */
2666 	fz_append_bits_pad(ctx, buf);
2667 	opts->hints_shared_offset = (int)fz_buffer_storage(ctx, buf, NULL);
2668 
2669 	/* Table F.5: */
2670 	/* Header Item 1: Object number of the first object in the shared
2671 	 * objects section. */
2672 	fz_append_bits(ctx, buf, min_shared_object, 32);
2673 	/* Header Item 2: Location of first object in the shared objects
2674 	 * section. */
2675 	fz_append_bits(ctx, buf, opts->ofs_list[min_shared_object], 32);
2676 	/* Header Item 3: The number of shared object entries for the first
2677 	 * page. */
2678 	fz_append_bits(ctx, buf, pop[0]->num_shared, 32);
2679 	/* Header Item 4: The number of shared object entries for the shared
2680 	 * objects section + first page. */
2681 	fz_append_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32);
2682 	/* Header Item 5: The number of bits needed to represent the greatest
2683 	 * number of objects in a shared object group (Always 0). */
2684 	fz_append_bits(ctx, buf, 0, 16);
2685 	/* Header Item 6: The least length of a shared object group in bytes. */
2686 	fz_append_bits(ctx, buf, min_shared_length, 32);
2687 	/* Header Item 7: The number of bits required to represent the
2688 	 * difference between the greatest and least length of a shared object
2689 	 * group. */
2690 	shared_length_bits = my_log2(max_shared_length - min_shared_length);
2691 	fz_append_bits(ctx, buf, shared_length_bits, 16);
2692 
2693 	/* Table F.6 */
2694 	/* Item 1: Shared object group length (page 1 objects) */
2695 	for (j = 0; j < pop[0]->len; j++)
2696 	{
2697 		int o = pop[0]->object[j];
2698 		int64_t min, max;
2699 		min = opts->ofs_list[o];
2700 		if (o == opts->start-1)
2701 			max = opts->main_xref_offset;
2702 		else if (o < xref_len-1)
2703 			max = opts->ofs_list[o+1];
2704 		else
2705 			max = opts->ofs_list[1];
2706 		if (opts->use_list[o] & USE_PAGE1)
2707 			fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2708 	}
2709 	/* Item 1: Shared object group length (shared objects) */
2710 	for (i = min_shared_object; i <= max_shared_object; i++)
2711 	{
2712 		int min, max;
2713 		min = opts->ofs_list[i];
2714 		if (i == opts->start-1)
2715 			max = opts->main_xref_offset;
2716 		else if (i < xref_len-1)
2717 			max = opts->ofs_list[i+1];
2718 		else
2719 			max = opts->ofs_list[1];
2720 		fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2721 	}
2722 	fz_append_bits_pad(ctx, buf);
2723 
2724 	/* Item 2: MD5 presence flags */
2725 	for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--)
2726 	{
2727 		fz_append_bits(ctx, buf, 0, 1);
2728 	}
2729 	fz_append_bits_pad(ctx, buf);
2730 	/* Item 3: MD5 sums (not present) */
2731 	fz_append_bits_pad(ctx, buf);
2732 	/* Item 4: Number of objects in the group (not present) */
2733 }
2734 
2735 static void
make_hint_stream(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)2736 make_hint_stream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2737 {
2738 	fz_buffer *buf;
2739 	pdf_obj *obj = NULL;
2740 
2741 	fz_var(obj);
2742 
2743 	buf = fz_new_buffer(ctx, 100);
2744 	fz_try(ctx)
2745 	{
2746 		make_page_offset_hints(ctx, doc, opts, buf);
2747 		obj = pdf_load_object(ctx, doc, pdf_xref_len(ctx, doc)-1);
2748 		pdf_update_stream(ctx, doc, obj, buf, 0);
2749 		opts->hintstream_len = (int)fz_buffer_storage(ctx, buf, NULL);
2750 	}
2751 	fz_always(ctx)
2752 	{
2753 		pdf_drop_obj(ctx, obj);
2754 		fz_drop_buffer(ctx, buf);
2755 	}
2756 	fz_catch(ctx)
2757 		fz_rethrow(ctx);
2758 }
2759 
2760 #ifdef DEBUG_WRITING
dump_object_details(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)2761 static void dump_object_details(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2762 {
2763 	int i;
2764 
2765 	for (i = 0; i < pdf_xref_len(ctx, doc); i++)
2766 	{
2767 		fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
2768 	}
2769 }
2770 #endif
2771 
presize_unsaved_signature_byteranges(fz_context * ctx,pdf_document * doc)2772 static void presize_unsaved_signature_byteranges(fz_context *ctx, pdf_document *doc)
2773 {
2774 	int s;
2775 
2776 	for (s = 0; s < doc->num_incremental_sections; s++)
2777 	{
2778 		pdf_xref *xref = &doc->xref_sections[s];
2779 
2780 		if (xref->unsaved_sigs)
2781 		{
2782 			/* The ByteRange objects of signatures are initially written out with
2783 			* dummy values, and then overwritten later. We need to make sure their
2784 			* initial form at least takes enough sufficient file space */
2785 			pdf_unsaved_sig *usig;
2786 			int n = 0;
2787 
2788 			for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2789 				n++;
2790 
2791 			for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2792 			{
2793 				/* There will be segments of bytes at the beginning, at
2794 				* the end and between each consecutive pair of signatures,
2795 				* hence n + 1 */
2796 				int i;
2797 				pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2798 
2799 				for (i = 0; i < n+1; i++)
2800 				{
2801 					pdf_array_push_int(ctx, byte_range, INT_MAX);
2802 					pdf_array_push_int(ctx, byte_range, INT_MAX);
2803 				}
2804 			}
2805 		}
2806 	}
2807 }
2808 
complete_signatures(fz_context * ctx,pdf_document * doc,pdf_write_state * opts)2809 static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2810 {
2811 	char *buf = NULL, *ptr;
2812 	int s;
2813 	fz_stream *stm = NULL;
2814 	fz_var(stm);
2815 	fz_var(buf);
2816 
2817 	fz_try(ctx)
2818 	{
2819 		for (s = 0; s < doc->num_incremental_sections; s++)
2820 		{
2821 			pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
2822 
2823 			if (xref->unsaved_sigs)
2824 			{
2825 				pdf_unsaved_sig *usig;
2826 				pdf_obj *byte_range;
2827 				size_t buf_size = 0;
2828 				size_t i;
2829 				size_t last_end;
2830 
2831 				for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2832 				{
2833 					size_t size = usig->signer->max_digest_size(ctx, usig->signer);
2834 
2835 					buf_size = fz_maxz(buf_size, size);
2836 				}
2837 
2838 				buf_size = buf_size * 2 + SIG_EXTRAS_SIZE;
2839 
2840 				buf = fz_calloc(ctx, buf_size, 1);
2841 
2842 				stm = fz_stream_from_output(ctx, opts->out);
2843 				/* Locate the byte ranges and contents in the saved file */
2844 				for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2845 				{
2846 					char *bstr, *cstr, *fstr;
2847 					size_t bytes_read;
2848 					int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL));
2849 					fz_seek(ctx, stm, opts->ofs_list[pnum], SEEK_SET);
2850 					/* SIG_EXTRAS_SIZE is an arbitrary value and its addition above to buf_size
2851 					 * could cause an attempt to read off the end of the file. That's not an
2852 					 * error, but we need to keep track of how many bytes are read and search
2853 					 * for markers only in defined data */
2854 					bytes_read = fz_read(ctx, stm, (unsigned char *)buf, buf_size);
2855 					assert(bytes_read <= buf_size);
2856 
2857 					bstr = fz_memmem(buf, bytes_read, SLASH_BYTE_RANGE, sizeof(SLASH_BYTE_RANGE)-1);
2858 					cstr = fz_memmem(buf, bytes_read, SLASH_CONTENTS, sizeof(SLASH_CONTENTS)-1);
2859 					fstr = fz_memmem(buf, bytes_read, SLASH_FILTER, sizeof(SLASH_FILTER)-1);
2860 
2861 					if (!(bstr && cstr && fstr && bstr < cstr && cstr < fstr))
2862 						fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to determine byte ranges while writing signature");
2863 
2864 					usig->byte_range_start = bstr - buf + sizeof(SLASH_BYTE_RANGE)-1 + opts->ofs_list[pnum];
2865 					usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
2866 					usig->contents_start = cstr - buf + sizeof(SLASH_CONTENTS)-1 + opts->ofs_list[pnum];
2867 					usig->contents_end = fstr - buf + opts->ofs_list[pnum];
2868 				}
2869 
2870 				fz_drop_stream(ctx, stm);
2871 				stm = NULL;
2872 
2873 				/* Recreate ByteRange with correct values. Initially store the
2874 				* recreated object in the first of the unsaved signatures */
2875 				byte_range = pdf_new_array(ctx, doc, 4);
2876 				pdf_dict_putl_drop(ctx, xref->unsaved_sigs->field, byte_range, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2877 
2878 				last_end = 0;
2879 				for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2880 				{
2881 					pdf_array_push_int(ctx, byte_range, last_end);
2882 					pdf_array_push_int(ctx, byte_range, usig->contents_start - last_end);
2883 					last_end = usig->contents_end;
2884 				}
2885 				pdf_array_push_int(ctx, byte_range, last_end);
2886 				pdf_array_push_int(ctx, byte_range, xref->end_ofs - last_end);
2887 
2888 				/* Copy the new ByteRange to the other unsaved signatures */
2889 				for (usig = xref->unsaved_sigs->next; usig; usig = usig->next)
2890 					pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2891 
2892 				/* Write the byte range into buf, padding with spaces*/
2893 				ptr = pdf_sprint_obj(ctx, buf, buf_size, &i, byte_range, 1, 0);
2894 				if (ptr != buf) /* should never happen, since data should fit in buf_size */
2895 					fz_free(ctx, ptr);
2896 				memset(buf+i, ' ', buf_size-i);
2897 
2898 				/* Write the byte range to the file */
2899 				for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2900 				{
2901 					fz_seek_output(ctx, opts->out, usig->byte_range_start, SEEK_SET);
2902 					fz_write_data(ctx, opts->out, buf, usig->byte_range_end - usig->byte_range_start);
2903 				}
2904 
2905 				/* Write the digests into the file */
2906 				for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2907 					pdf_write_digest(ctx, opts->out, byte_range, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
2908 
2909 				/* delete the unsaved_sigs records */
2910 				while ((usig = xref->unsaved_sigs) != NULL)
2911 				{
2912 					xref->unsaved_sigs = usig->next;
2913 					pdf_drop_obj(ctx, usig->field);
2914 					pdf_drop_signer(ctx, usig->signer);
2915 					fz_free(ctx, usig);
2916 				}
2917 
2918 				xref->unsaved_sigs_end = NULL;
2919 
2920 				fz_free(ctx, buf);
2921 				buf = NULL;
2922 			}
2923 		}
2924 	}
2925 	fz_catch(ctx)
2926 	{
2927 		fz_drop_stream(ctx, stm);
2928 		fz_free(ctx, buf);
2929 		fz_rethrow(ctx);
2930 	}
2931 }
2932 
clean_content_streams(fz_context * ctx,pdf_document * doc,int sanitize,int ascii)2933 static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii)
2934 {
2935 	int n = pdf_count_pages(ctx, doc);
2936 	int i;
2937 
2938 	pdf_filter_options filter;
2939 	memset(&filter, 0, sizeof filter);
2940 	filter.recurse = 1;
2941 	filter.sanitize = sanitize;
2942 	filter.ascii = ascii;
2943 
2944 	for (i = 0; i < n; i++)
2945 	{
2946 		pdf_annot *annot;
2947 		pdf_page *page = pdf_load_page(ctx, doc, i);
2948 
2949 		fz_try(ctx)
2950 		{
2951 			pdf_filter_page_contents(ctx, doc, page, &filter);
2952 			for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
2953 			{
2954 				pdf_filter_annot_contents(ctx, doc, annot, &filter);
2955 			}
2956 		}
2957 		fz_always(ctx)
2958 			fz_drop_page(ctx, &page->super);
2959 		fz_catch(ctx)
2960 			fz_rethrow(ctx);
2961 	}
2962 }
2963 
2964 /* Initialise the pdf_write_state, used dynamically during the write, from the static
2965  * pdf_write_options, passed into pdf_save_document */
initialise_write_state(fz_context * ctx,pdf_document * doc,const pdf_write_options * in_opts,pdf_write_state * opts)2966 static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts, pdf_write_state *opts)
2967 {
2968 	int xref_len = pdf_xref_len(ctx, doc);
2969 
2970 	opts->do_incremental = in_opts->do_incremental;
2971 	opts->do_ascii = in_opts->do_ascii;
2972 	opts->do_tight = !in_opts->do_pretty;
2973 	opts->do_expand = in_opts->do_decompress;
2974 	opts->do_compress = in_opts->do_compress;
2975 	opts->do_compress_images = in_opts->do_compress_images;
2976 	opts->do_compress_fonts = in_opts->do_compress_fonts;
2977 
2978 	opts->do_garbage = in_opts->do_garbage;
2979 	opts->do_linear = in_opts->do_linear;
2980 	opts->do_clean = in_opts->do_clean;
2981 	opts->do_encrypt = in_opts->do_encrypt;
2982 	opts->start = 0;
2983 	opts->main_xref_offset = INT_MIN;
2984 
2985 	opts->permissions = in_opts->permissions;
2986 	memcpy(opts->opwd_utf8, in_opts->opwd_utf8, nelem(opts->opwd_utf8));
2987 	memcpy(opts->upwd_utf8, in_opts->upwd_utf8, nelem(opts->upwd_utf8));
2988 
2989 	/* We deliberately make these arrays long enough to cope with
2990 	* 1 to n access rather than 0..n-1, and add space for 2 new
2991 	* extra entries that may be required for linearization. */
2992 	opts->list_len = 0;
2993 	opts->use_list = NULL;
2994 	opts->ofs_list = NULL;
2995 	opts->gen_list = NULL;
2996 	opts->renumber_map = NULL;
2997 	opts->rev_renumber_map = NULL;
2998 
2999 	expand_lists(ctx, opts, xref_len);
3000 }
3001 
3002 /* Free the resources held by the dynamic write options */
finalise_write_state(fz_context * ctx,pdf_write_state * opts)3003 static void finalise_write_state(fz_context *ctx, pdf_write_state *opts)
3004 {
3005 	fz_free(ctx, opts->use_list);
3006 	fz_free(ctx, opts->ofs_list);
3007 	fz_free(ctx, opts->gen_list);
3008 	fz_free(ctx, opts->renumber_map);
3009 	fz_free(ctx, opts->rev_renumber_map);
3010 	pdf_drop_obj(ctx, opts->linear_l);
3011 	pdf_drop_obj(ctx, opts->linear_h0);
3012 	pdf_drop_obj(ctx, opts->linear_h1);
3013 	pdf_drop_obj(ctx, opts->linear_o);
3014 	pdf_drop_obj(ctx, opts->linear_e);
3015 	pdf_drop_obj(ctx, opts->linear_n);
3016 	pdf_drop_obj(ctx, opts->linear_t);
3017 	pdf_drop_obj(ctx, opts->hints_s);
3018 	pdf_drop_obj(ctx, opts->hints_length);
3019 	page_objects_list_destroy(ctx, opts->page_object_lists);
3020 }
3021 
3022 const pdf_write_options pdf_default_write_options = {
3023 	0, /* do_incremental */
3024 	0, /* do_pretty */
3025 	0, /* do_ascii */
3026 	0, /* do_compress */
3027 	0, /* do_compress_images */
3028 	0, /* do_compress_fonts */
3029 	0, /* do_decompress */
3030 	0, /* do_garbage */
3031 	0, /* do_linear */
3032 	0, /* do_clean */
3033 	0, /* do_sanitize */
3034 	0, /* do_appearance */
3035 	0, /* do_encrypt */
3036 	~0, /* permissions */
3037 	"", /* opwd_utf8[128] */
3038 	"", /* upwd_utf8[128] */
3039 };
3040 
3041 const char *fz_pdf_write_options_usage =
3042 	"PDF output options:\n"
3043 	"\tdecompress: decompress all streams (except compress-fonts/images)\n"
3044 	"\tcompress: compress all streams\n"
3045 	"\tcompress-fonts: compress embedded fonts\n"
3046 	"\tcompress-images: compress images\n"
3047 	"\tascii: ASCII hex encode binary streams\n"
3048 	"\tpretty: pretty-print objects with indentation\n"
3049 	"\tlinearize: optimize for web browsers\n"
3050 	"\tclean: pretty-print graphics commands in content streams\n"
3051 	"\tsanitize: sanitize graphics commands in content streams\n"
3052 	"\tgarbage: garbage collect unused objects\n"
3053 	"\tincremental: write changes as incremental update\n"
3054 	"\tcontinue-on-error: continue saving the document even if there is an error\n"
3055 	"\tor garbage=compact: ... and compact cross reference table\n"
3056 	"\tor garbage=deduplicate: ... and remove duplicate objects\n"
3057 	"\tdecrypt: write unencrypted document\n"
3058 	"\tencrypt=rc4-40|rc4-128|aes-128|aes-256: write encrypted document\n"
3059 	"\tpermissions=NUMBER: document permissions to grant when encrypting\n"
3060 	"\tuser-password=PASSWORD: password required to read document\n"
3061 	"\towner-password=PASSWORD: password required to edit document\n"
3062 	"\n";
3063 
3064 pdf_write_options *
pdf_parse_write_options(fz_context * ctx,pdf_write_options * opts,const char * args)3065 pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
3066 {
3067 	const char *val;
3068 
3069 	memset(opts, 0, sizeof *opts);
3070 
3071 	if (fz_has_option(ctx, args, "decompress", &val))
3072 		opts->do_decompress = fz_option_eq(val, "yes");
3073 	if (fz_has_option(ctx, args, "compress", &val))
3074 		opts->do_compress = fz_option_eq(val, "yes");
3075 	if (fz_has_option(ctx, args, "compress-fonts", &val))
3076 		opts->do_compress_fonts = fz_option_eq(val, "yes");
3077 	if (fz_has_option(ctx, args, "compress-images", &val))
3078 		opts->do_compress_images = fz_option_eq(val, "yes");
3079 	if (fz_has_option(ctx, args, "ascii", &val))
3080 		opts->do_ascii = fz_option_eq(val, "yes");
3081 	if (fz_has_option(ctx, args, "pretty", &val))
3082 		opts->do_pretty = fz_option_eq(val, "yes");
3083 	if (fz_has_option(ctx, args, "linearize", &val))
3084 		opts->do_linear = fz_option_eq(val, "yes");
3085 	if (fz_has_option(ctx, args, "clean", &val))
3086 		opts->do_clean = fz_option_eq(val, "yes");
3087 	if (fz_has_option(ctx, args, "sanitize", &val))
3088 		opts->do_sanitize = fz_option_eq(val, "yes");
3089 	if (fz_has_option(ctx, args, "incremental", &val))
3090 		opts->do_incremental = fz_option_eq(val, "yes");
3091 	if (fz_has_option(ctx, args, "decrypt", &val))
3092 		opts->do_encrypt = fz_option_eq(val, "yes") ? PDF_ENCRYPT_NONE : PDF_ENCRYPT_KEEP;
3093 	if (fz_has_option(ctx, args, "encrypt", &val))
3094 	{
3095 		opts->do_encrypt = PDF_ENCRYPT_UNKNOWN;
3096 		if (fz_option_eq(val, "none") || fz_option_eq(val, "no"))
3097 			opts->do_encrypt = PDF_ENCRYPT_NONE;
3098 		if (fz_option_eq(val, "keep"))
3099 			opts->do_encrypt = PDF_ENCRYPT_KEEP;
3100 		if (fz_option_eq(val, "rc4-40") || fz_option_eq(val, "yes"))
3101 			opts->do_encrypt = PDF_ENCRYPT_RC4_40;
3102 		if (fz_option_eq(val, "rc4-128"))
3103 			opts->do_encrypt = PDF_ENCRYPT_RC4_128;
3104 		if (fz_option_eq(val, "aes-128"))
3105 			opts->do_encrypt = PDF_ENCRYPT_AES_128;
3106 		if (fz_option_eq(val, "aes-256"))
3107 			opts->do_encrypt = PDF_ENCRYPT_AES_256;
3108 	}
3109 	if (fz_has_option(ctx, args, "owner-password", &val))
3110 		fz_copy_option(ctx, val, opts->opwd_utf8, nelem(opts->opwd_utf8));
3111 	if (fz_has_option(ctx, args, "user-password", &val))
3112 		fz_copy_option(ctx, val, opts->upwd_utf8, nelem(opts->upwd_utf8));
3113 	if (fz_has_option(ctx, args, "permissions", &val))
3114 		opts->permissions = fz_atoi(val);
3115 	else
3116 		opts->permissions = ~0;
3117 	if (fz_has_option(ctx, args, "garbage", &val))
3118 	{
3119 		if (fz_option_eq(val, "yes"))
3120 			opts->do_garbage = 1;
3121 		else if (fz_option_eq(val, "compact"))
3122 			opts->do_garbage = 2;
3123 		else if (fz_option_eq(val, "deduplicate"))
3124 			opts->do_garbage = 3;
3125 		else
3126 			opts->do_garbage = fz_atoi(val);
3127 	}
3128 	if (fz_has_option(ctx, args, "appearance", &val))
3129 	{
3130 		if (fz_option_eq(val, "yes"))
3131 			opts->do_appearance = 1;
3132 		else if (fz_option_eq(val, "all"))
3133 			opts->do_appearance = 2;
3134 	}
3135 
3136 	return opts;
3137 }
3138 
pdf_can_be_saved_incrementally(fz_context * ctx,pdf_document * doc)3139 int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc)
3140 {
3141 	if (doc->repair_attempted)
3142 		return 0;
3143 	if (doc->redacted)
3144 		return 0;
3145 	if (doc->has_xref_streams && doc->has_old_style_xrefs)
3146 		return 0;
3147 	return 1;
3148 }
3149 
3150 static void
prepare_for_save(fz_context * ctx,pdf_document * doc,pdf_write_options * in_opts)3151 prepare_for_save(fz_context *ctx, pdf_document *doc, pdf_write_options *in_opts)
3152 {
3153 	/* Rewrite (and possibly sanitize) the operator streams */
3154 	if (in_opts->do_clean || in_opts->do_sanitize)
3155 		clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii);
3156 
3157 	/* When saving a PDF with signatures the file will
3158 	first be written once, then the file will have its
3159 	digests and byte ranges calculated and and then the
3160 	signature dictionary containing them will be updated
3161 	both in memory and in the saved file. By setting this
3162 	flag we avoid a new xref section from being created when
3163 	the signature dictionary is updated. */
3164 	doc->save_in_progress = 1;
3165 
3166 	presize_unsaved_signature_byteranges(ctx, doc);
3167 }
3168 
3169 static pdf_obj *
new_identity(fz_context * ctx,pdf_document * doc)3170 new_identity(fz_context *ctx, pdf_document *doc)
3171 {
3172 	unsigned char rnd[32];
3173 	pdf_obj *id;
3174 
3175 	fz_memrnd(ctx, rnd, nelem(rnd));
3176 
3177 	id = pdf_dict_put_array(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), 2);
3178 	pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 0, nelem(rnd) / 2));
3179 	pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 16, nelem(rnd) / 2));
3180 
3181 	return id;
3182 }
3183 
3184 static void
change_identity(fz_context * ctx,pdf_document * doc,pdf_obj * id)3185 change_identity(fz_context *ctx, pdf_document *doc, pdf_obj *id)
3186 {
3187 	unsigned char rnd[16];
3188 	if (pdf_array_len(ctx, id) >= 2)
3189 	{
3190 		/* Update second half of ID array with new random data. */
3191 		fz_memrnd(ctx, rnd, 16);
3192 		pdf_array_put_drop(ctx, id, 1, pdf_new_string(ctx, (char *)rnd, 16));
3193 	}
3194 }
3195 
3196 static void
create_encryption_dictionary(fz_context * ctx,pdf_document * doc,pdf_crypt * crypt)3197 create_encryption_dictionary(fz_context *ctx, pdf_document *doc, pdf_crypt *crypt)
3198 {
3199 	unsigned char *o, *u;
3200 	pdf_obj *encrypt;
3201 	int r;
3202 
3203 	r = pdf_crypt_revision(ctx, crypt);
3204 
3205 	encrypt = pdf_dict_put_dict(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), 10);
3206 
3207 	pdf_dict_put_name(ctx, encrypt, PDF_NAME(Filter), "Standard");
3208 	pdf_dict_put_int(ctx, encrypt, PDF_NAME(R), r);
3209 	pdf_dict_put_int(ctx, encrypt, PDF_NAME(V), pdf_crypt_version(ctx, crypt));
3210 	pdf_dict_put_int(ctx, encrypt, PDF_NAME(Length), pdf_crypt_length(ctx, crypt));
3211 	pdf_dict_put_int(ctx, encrypt, PDF_NAME(P), pdf_crypt_permissions(ctx, crypt));
3212 	pdf_dict_put_bool(ctx, encrypt, PDF_NAME(EncryptMetadata), pdf_crypt_encrypt_metadata(ctx, crypt));
3213 
3214 	o = pdf_crypt_owner_password(ctx, crypt);
3215 	u = pdf_crypt_user_password(ctx, crypt);
3216 
3217 	if (r < 4)
3218 	{
3219 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3220 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3221 	}
3222 	else if (r == 4)
3223 	{
3224 		pdf_obj *cf;
3225 
3226 		pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3227 		pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3228 
3229 		cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3230 		cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3231 		pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3232 		pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV2");
3233 		pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 16);
3234 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3235 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3236 	}
3237 	else if (r == 6)
3238 	{
3239 		unsigned char *oe = pdf_crypt_owner_encryption(ctx, crypt);
3240 		unsigned char *ue = pdf_crypt_user_encryption(ctx, crypt);
3241 		pdf_obj *cf;
3242 
3243 		pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3244 		pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3245 
3246 		cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3247 		cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3248 		pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3249 		pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV3");
3250 		pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 32);
3251 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 48);
3252 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 48);
3253 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(OE), (char *) oe, 32);
3254 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(UE), (char *) ue, 32);
3255 		pdf_dict_put_string(ctx, encrypt, PDF_NAME(Perms), (char *) pdf_crypt_permissions_encryption(ctx, crypt), 16);
3256 	}
3257 }
3258 
3259 static void
ensure_initial_incremental_contents(fz_context * ctx,fz_stream * in,fz_output * out)3260 ensure_initial_incremental_contents(fz_context *ctx, fz_stream *in, fz_output *out)
3261 {
3262 	fz_stream *verify;
3263 	unsigned char buf0[256];
3264 	unsigned char buf1[256];
3265 	size_t n0, n1;
3266 	int64_t off = 0;
3267 	int same;
3268 
3269 	if (!in)
3270 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't copy contents for incremental write");
3271 
3272 	verify = fz_stream_from_output(ctx, out);
3273 	if (!verify)
3274 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't incrementally write pdf to this type of output");
3275 
3276 	fz_var(verify);
3277 
3278 	fz_try(ctx)
3279 	{
3280 		do
3281 		{
3282 			fz_seek(ctx, in, off, SEEK_SET);
3283 			n0 = fz_read(ctx, in, buf0, sizeof(buf0));
3284 			fz_seek(ctx, verify, off, SEEK_SET);
3285 			n1 = fz_read(ctx, verify, buf1, sizeof(buf1));
3286 			same = (n0 == n1 && !memcmp(buf0, buf1, n0));
3287 			off += n0;
3288 		}
3289 		while (same && n0 > 0);
3290 
3291 		if (same)
3292 			break;
3293 
3294 		fz_drop_stream(ctx, verify);
3295 		verify = NULL;
3296 
3297 		/* Copy old contents into new file */
3298 		fz_seek(ctx, in, 0, SEEK_SET);
3299 		fz_seek_output(ctx, out, 0, SEEK_SET);
3300 		do
3301 		{
3302 			n0 = fz_read(ctx, in, buf0, sizeof(buf0));
3303 			if (n0)
3304 				fz_write_data(ctx, out, buf0, n0);
3305 		}
3306 		while (n0);
3307 		fz_truncate_output(ctx, out);
3308 	}
3309 	fz_always(ctx)
3310 		fz_drop_stream(ctx, verify);
3311 	fz_catch(ctx)
3312 		fz_rethrow(ctx);
3313 }
3314 
3315 static void
do_pdf_save_document(fz_context * ctx,pdf_document * doc,pdf_write_state * opts,pdf_write_options * in_opts)3316 do_pdf_save_document(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_write_options *in_opts)
3317 {
3318 	int lastfree;
3319 	int num;
3320 	int xref_len;
3321 	pdf_obj *id, *id1;
3322 
3323 	if (in_opts->do_incremental)
3324 	{
3325 		/* If no changes, nothing to write */
3326 		if (doc->num_incremental_sections == 0)
3327 		{
3328 			doc->save_in_progress = 0;
3329 			return;
3330 		}
3331 
3332 		ensure_initial_incremental_contents(ctx, doc->file, opts->out);
3333 
3334 		fz_seek_output(ctx, opts->out, 0, SEEK_END);
3335 		fz_write_string(ctx, opts->out, "\n");
3336 	}
3337 
3338 	xref_len = pdf_xref_len(ctx, doc);
3339 
3340 	fz_try(ctx)
3341 	{
3342 		initialise_write_state(ctx, doc, in_opts, opts);
3343 
3344 		/* Update second half of ID array if it exists. */
3345 		id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
3346 		if (id)
3347 			change_identity(ctx, doc, id);
3348 
3349 		/* Remove encryption dictionary if saving without encryption. */
3350 		if (opts->do_encrypt == PDF_ENCRYPT_NONE)
3351 		{
3352 			pdf_dict_del(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3353 		}
3354 
3355 		/* Keep encryption dictionary if saving with old encryption. */
3356 		else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
3357 		{
3358 			opts->crypt = doc->crypt;
3359 		}
3360 
3361 		/* Create encryption dictionary if saving with new encryption. */
3362 		else
3363 		{
3364 			if (!id)
3365 				id = new_identity(ctx, doc);
3366 			id1 = pdf_array_get(ctx, id, 0);
3367 			opts->crypt = pdf_new_encrypt(ctx, opts->opwd_utf8, opts->upwd_utf8, id1, opts->permissions, opts->do_encrypt);
3368 			create_encryption_dictionary(ctx, doc, opts->crypt);
3369 		}
3370 
3371 		/* Make sure any objects hidden in compressed streams have been loaded */
3372 		if (!opts->do_incremental)
3373 		{
3374 			pdf_ensure_solid_xref(ctx, doc, xref_len);
3375 			preloadobjstms(ctx, doc);
3376 			xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3377 			expand_lists(ctx, opts, xref_len);
3378 		}
3379 
3380 		/* Sweep & mark objects from the trailer */
3381 		if (opts->do_garbage >= 1 || opts->do_linear)
3382 			(void)markobj(ctx, doc, opts, pdf_trailer(ctx, doc));
3383 		else
3384 		{
3385 			xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3386 			expand_lists(ctx, opts, xref_len);
3387 			for (num = 0; num < xref_len; num++)
3388 				opts->use_list[num] = 1;
3389 		}
3390 
3391 		/* Coalesce and renumber duplicate objects */
3392 		if (opts->do_garbage >= 3)
3393 			removeduplicateobjs(ctx, doc, opts);
3394 
3395 		/* Compact xref by renumbering and removing unused objects */
3396 		if (opts->do_garbage >= 2 || opts->do_linear)
3397 			compactxref(ctx, doc, opts);
3398 
3399 		opts->crypt_object_number = 0;
3400 		if (opts->crypt)
3401 		{
3402 			pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3403 			int crypt_num = pdf_to_num(ctx, crypt);
3404 			opts->crypt_object_number = opts->renumber_map[crypt_num];
3405 		}
3406 
3407 		/* Make renumbering affect all indirect references and update xref */
3408 		if (opts->do_garbage >= 2 || opts->do_linear)
3409 			renumberobjs(ctx, doc, opts);
3410 
3411 		/* Truncate the xref after compacting and renumbering */
3412 		if ((opts->do_garbage >= 2 || opts->do_linear) && !opts->do_incremental)
3413 		{
3414 			xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3415 			expand_lists(ctx, opts, xref_len);
3416 			while (xref_len > 0 && !opts->use_list[xref_len-1])
3417 				xref_len--;
3418 		}
3419 
3420 		if (opts->do_linear)
3421 			linearize(ctx, doc, opts);
3422 
3423 		if (opts->do_incremental)
3424 		{
3425 			int i;
3426 
3427 			doc->disallow_new_increments = 1;
3428 
3429 			for (i = 0; i < doc->num_incremental_sections; i++)
3430 			{
3431 				doc->xref_base = doc->num_incremental_sections - i - 1;
3432 
3433 				writeobjects(ctx, doc, opts, 0);
3434 
3435 #ifdef DEBUG_WRITING
3436 				dump_object_details(ctx, doc, opts);
3437 #endif
3438 
3439 				for (num = 0; num < xref_len; num++)
3440 				{
3441 					if (!opts->use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
3442 					{
3443 						/* Make unreusable. FIXME: would be better to link to existing free list */
3444 						opts->gen_list[num] = 65535;
3445 						opts->ofs_list[num] = 0;
3446 					}
3447 				}
3448 
3449 				opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3450 				if (doc->has_xref_streams)
3451 					writexrefstream(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3452 				else
3453 					writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3454 
3455 				doc->xref_sections[doc->xref_base].end_ofs = fz_tell_output(ctx, opts->out);
3456 			}
3457 
3458 			doc->xref_base = 0;
3459 			doc->disallow_new_increments = 0;
3460 		}
3461 		else
3462 		{
3463 			writeobjects(ctx, doc, opts, 0);
3464 
3465 #ifdef DEBUG_WRITING
3466 			dump_object_details(ctx, doc, opts);
3467 #endif
3468 
3469 			/* Construct linked list of free object slots */
3470 			lastfree = 0;
3471 			for (num = 0; num < xref_len; num++)
3472 			{
3473 				if (!opts->use_list[num])
3474 				{
3475 					opts->gen_list[num]++;
3476 					opts->ofs_list[lastfree] = num;
3477 					lastfree = num;
3478 				}
3479 			}
3480 
3481 			if (opts->do_linear && opts->page_count > 0)
3482 			{
3483 				opts->main_xref_offset = fz_tell_output(ctx, opts->out);
3484 				writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3485 				opts->file_len = fz_tell_output(ctx, opts->out);
3486 
3487 				make_hint_stream(ctx, doc, opts);
3488 				if (opts->do_ascii)
3489 				{
3490 					opts->hintstream_len *= 2;
3491 					opts->hintstream_len += 1 + ((opts->hintstream_len+63)>>6);
3492 				}
3493 				opts->file_len += opts->hintstream_len;
3494 				opts->main_xref_offset += opts->hintstream_len;
3495 				update_linearization_params(ctx, doc, opts);
3496 				fz_seek_output(ctx, opts->out, 0, 0);
3497 				writeobjects(ctx, doc, opts, 1);
3498 
3499 				padto(ctx, opts->out, opts->main_xref_offset);
3500 				writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3501 			}
3502 			else
3503 			{
3504 				opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3505 				writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3506 			}
3507 
3508 			doc->xref_sections[0].end_ofs = fz_tell_output(ctx, opts->out);
3509 		}
3510 
3511 		complete_signatures(ctx, doc, opts);
3512 
3513 		doc->dirty = 0;
3514 	}
3515 	fz_always(ctx)
3516 	{
3517 #ifdef DEBUG_LINEARIZATION
3518 		page_objects_dump(opts);
3519 		objects_dump(ctx, doc, opts);
3520 #endif
3521 		finalise_write_state(ctx, opts);
3522 		if (opts->crypt != doc->crypt)
3523 			pdf_drop_crypt(ctx, opts->crypt);
3524 		doc->save_in_progress = 0;
3525 	}
3526 	fz_catch(ctx)
3527 	{
3528 		fz_rethrow(ctx);
3529 	}
3530 }
3531 
pdf_has_unsaved_sigs(fz_context * ctx,pdf_document * doc)3532 int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc)
3533 {
3534 	int s;
3535 	for (s = 0; s < doc->num_incremental_sections; s++)
3536 	{
3537 		pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
3538 
3539 		if (xref->unsaved_sigs)
3540 			return 1;
3541 	}
3542 	return 0;
3543 }
3544 
pdf_write_document(fz_context * ctx,pdf_document * doc,fz_output * out,pdf_write_options * in_opts)3545 void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, pdf_write_options *in_opts)
3546 {
3547 	pdf_write_options opts_defaults = pdf_default_write_options;
3548 	pdf_write_state opts = { 0 };
3549 
3550 	if (!doc || !out)
3551 		return;
3552 
3553 	if (!in_opts)
3554 		in_opts = &opts_defaults;
3555 
3556 	if (in_opts->do_incremental && doc->repair_attempted)
3557 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3558 	if (in_opts->do_incremental && in_opts->do_garbage)
3559 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3560 	if (in_opts->do_incremental && in_opts->do_linear)
3561 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3562 	if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3563 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3564 	if (pdf_has_unsaved_sigs(ctx, doc) && !fz_output_supports_stream(ctx, out))
3565 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't write pdf that has unsaved sigs to a fz_output unless it supports fz_stream_from_output!");
3566 
3567 	prepare_for_save(ctx, doc, in_opts);
3568 
3569 	opts.out = out;
3570 
3571 	do_pdf_save_document(ctx, doc, &opts, in_opts);
3572 }
3573 
pdf_save_document(fz_context * ctx,pdf_document * doc,const char * filename,pdf_write_options * in_opts)3574 void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *in_opts)
3575 {
3576 	pdf_write_options opts_defaults = pdf_default_write_options;
3577 	pdf_write_state opts = { 0 };
3578 
3579 	if (!doc)
3580 		return;
3581 
3582 	if (!in_opts)
3583 		in_opts = &opts_defaults;
3584 
3585 	if (in_opts->do_incremental && !doc->file)
3586 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a new document");
3587 	if (in_opts->do_incremental && doc->repair_attempted)
3588 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3589 	if (in_opts->do_incremental && in_opts->do_garbage)
3590 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3591 	if (in_opts->do_incremental && in_opts->do_linear)
3592 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3593 	if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3594 		fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3595 
3596 	if (in_opts->do_appearance > 0)
3597 	{
3598 		int i, n = pdf_count_pages(ctx, doc);
3599 		for (i = 0; i < n; ++i)
3600 		{
3601 			pdf_page *page = pdf_load_page(ctx, doc, i);
3602 			fz_try(ctx)
3603 			{
3604 				if (in_opts->do_appearance > 1)
3605 				{
3606 					pdf_annot *annot;
3607 					for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
3608 						annot->needs_new_ap = 1;
3609 					for (annot = pdf_first_widget(ctx, page); annot; annot = pdf_next_widget(ctx, annot))
3610 						annot->needs_new_ap = 1;
3611 				}
3612 				pdf_update_page(ctx, page);
3613 			}
3614 			fz_always(ctx)
3615 				fz_drop_page(ctx, &page->super);
3616 			fz_catch(ctx)
3617 				fz_warn(ctx, "could not create annotation appearances");
3618 		}
3619 	}
3620 
3621 	prepare_for_save(ctx, doc, in_opts);
3622 
3623 	if (in_opts->do_incremental)
3624 	{
3625 		/* If no changes, nothing to write */
3626 		if (doc->num_incremental_sections == 0)
3627 		{
3628 			doc->save_in_progress = 0;
3629 			return;
3630 		}
3631 		opts.out = fz_new_output_with_path(ctx, filename, 1);
3632 	}
3633 	else
3634 	{
3635 		opts.out = fz_new_output_with_path(ctx, filename, 0);
3636 	}
3637 	fz_try(ctx)
3638 	{
3639 		do_pdf_save_document(ctx, doc, &opts, in_opts);
3640 		fz_close_output(ctx, opts.out);
3641 	}
3642 	fz_always(ctx)
3643 	{
3644 		fz_drop_output(ctx, opts.out);
3645 		opts.out = NULL;
3646 	}
3647 	fz_catch(ctx)
3648 	{
3649 		fz_rethrow(ctx);
3650 	}
3651 }
3652 
3653 char *
pdf_format_write_options(fz_context * ctx,char * buffer,size_t buffer_len,const pdf_write_options * opts)3654 pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts)
3655 {
3656 #define ADD_OPT(S) do { if (!first) fz_strlcat(buffer, ",", buffer_len); fz_strlcat(buffer, (S), buffer_len); first = 0; } while (0)
3657 
3658 	int first = 1;
3659 	*buffer = 0;
3660 	if (opts->do_decompress)
3661 		ADD_OPT("decompress=yes");
3662 	if (opts->do_compress)
3663 		ADD_OPT("compress=yes");
3664 	if (opts->do_compress_fonts)
3665 		ADD_OPT("compress-fonts=yes");
3666 	if (opts->do_compress_images)
3667 		ADD_OPT("compress-images=yes");
3668 	if (opts->do_ascii)
3669 		ADD_OPT("ascii=yes");
3670 	if (opts->do_pretty)
3671 		ADD_OPT("pretty=yes");
3672 	if (opts->do_linear)
3673 		ADD_OPT("linearize=yes");
3674 	if (opts->do_clean)
3675 		ADD_OPT("clean=yes");
3676 	if (opts->do_sanitize)
3677 		ADD_OPT("sanitize=yes");
3678 	if (opts->do_incremental)
3679 		ADD_OPT("incremental=yes");
3680 	if (opts->do_encrypt == PDF_ENCRYPT_NONE)
3681 		ADD_OPT("decrypt=yes");
3682 	else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
3683 		ADD_OPT("decrypt=no");
3684 	switch(opts->do_encrypt)
3685 	{
3686 	default:
3687 	case PDF_ENCRYPT_UNKNOWN:
3688 		break;
3689 	case PDF_ENCRYPT_NONE:
3690 		ADD_OPT("encrypt=no");
3691 		break;
3692 	case PDF_ENCRYPT_KEEP:
3693 		ADD_OPT("encrypt=keep");
3694 		break;
3695 	case PDF_ENCRYPT_RC4_40:
3696 		ADD_OPT("encrypt=rc4-40");
3697 		break;
3698 	case PDF_ENCRYPT_RC4_128:
3699 		ADD_OPT("encrypt=rc4-128");
3700 		break;
3701 	case PDF_ENCRYPT_AES_128:
3702 		ADD_OPT("encrypt=aes-128");
3703 		break;
3704 	case PDF_ENCRYPT_AES_256:
3705 		ADD_OPT("encrypt=aes-256");
3706 		break;
3707 	}
3708 	if (strlen(opts->opwd_utf8)) {
3709 		ADD_OPT("owner-password=");
3710 		fz_strlcat(buffer, opts->opwd_utf8, buffer_len);
3711 	}
3712 	if (strlen(opts->upwd_utf8)) {
3713 		ADD_OPT("user-password=");
3714 		fz_strlcat(buffer, opts->upwd_utf8, buffer_len);
3715 	}
3716 	{
3717 		char temp[32];
3718 		ADD_OPT("permissions=");
3719 		fz_snprintf(temp, sizeof(temp), "%d", opts->permissions);
3720 		fz_strlcat(buffer, temp, buffer_len);
3721 	}
3722 	switch(opts->do_garbage)
3723 	{
3724 	case 0:
3725 		break;
3726 	case 1:
3727 		ADD_OPT("garbage=yes");
3728 		break;
3729 	case 2:
3730 		ADD_OPT("garbage=compact");
3731 		break;
3732 	case 3:
3733 		ADD_OPT("garbage=deduplicate");
3734 		break;
3735 	default:
3736 	{
3737 		char temp[32];
3738 		fz_snprintf(temp, sizeof(temp), "%d", opts->do_garbage);
3739 		ADD_OPT("garbage=");
3740 		fz_strlcat(buffer, temp, buffer_len);
3741 		break;
3742 	}
3743 	}
3744 	switch(opts->do_appearance)
3745 	{
3746 	case 1:
3747 		ADD_OPT("appearance=yes");
3748 		break;
3749 	case 2:
3750 		ADD_OPT("appearance=all");
3751 		break;
3752 	}
3753 
3754 #undef ADD_OPT
3755 
3756 	return buffer;
3757 }
3758 
3759 typedef struct
3760 {
3761 	fz_document_writer super;
3762 	pdf_document *pdf;
3763 	pdf_write_options opts;
3764 	fz_output *out;
3765 
3766 	fz_rect mediabox;
3767 	pdf_obj *resources;
3768 	fz_buffer *contents;
3769 } pdf_writer;
3770 
3771 static fz_device *
pdf_writer_begin_page(fz_context * ctx,fz_document_writer * wri_,fz_rect mediabox)3772 pdf_writer_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
3773 {
3774 	pdf_writer *wri = (pdf_writer*)wri_;
3775 	wri->mediabox = mediabox;
3776 	return pdf_page_write(ctx, wri->pdf, wri->mediabox, &wri->resources, &wri->contents);
3777 }
3778 
3779 static void
pdf_writer_end_page(fz_context * ctx,fz_document_writer * wri_,fz_device * dev)3780 pdf_writer_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
3781 {
3782 	pdf_writer *wri = (pdf_writer*)wri_;
3783 	pdf_obj *obj = NULL;
3784 
3785 	fz_var(obj);
3786 
3787 	fz_try(ctx)
3788 	{
3789 		fz_close_device(ctx, dev);
3790 		obj = pdf_add_page(ctx, wri->pdf, wri->mediabox, 0, wri->resources, wri->contents);
3791 		pdf_insert_page(ctx, wri->pdf, -1, obj);
3792 	}
3793 	fz_always(ctx)
3794 	{
3795 		fz_drop_device(ctx, dev);
3796 		pdf_drop_obj(ctx, obj);
3797 		fz_drop_buffer(ctx, wri->contents);
3798 		wri->contents = NULL;
3799 		pdf_drop_obj(ctx, wri->resources);
3800 		wri->resources = NULL;
3801 	}
3802 	fz_catch(ctx)
3803 		fz_rethrow(ctx);
3804 }
3805 
3806 static void
pdf_writer_close_writer(fz_context * ctx,fz_document_writer * wri_)3807 pdf_writer_close_writer(fz_context *ctx, fz_document_writer *wri_)
3808 {
3809 	pdf_writer *wri = (pdf_writer*)wri_;
3810 	pdf_write_document(ctx, wri->pdf, wri->out, &wri->opts);
3811 	fz_close_output(ctx, wri->out);
3812 }
3813 
3814 static void
pdf_writer_drop_writer(fz_context * ctx,fz_document_writer * wri_)3815 pdf_writer_drop_writer(fz_context *ctx, fz_document_writer *wri_)
3816 {
3817 	pdf_writer *wri = (pdf_writer*)wri_;
3818 	fz_drop_buffer(ctx, wri->contents);
3819 	pdf_drop_obj(ctx, wri->resources);
3820 	pdf_drop_document(ctx, wri->pdf);
3821 	fz_drop_output(ctx, wri->out);
3822 }
3823 
3824 fz_document_writer *
fz_new_pdf_writer_with_output(fz_context * ctx,fz_output * out,const char * options)3825 fz_new_pdf_writer_with_output(fz_context *ctx, fz_output *out, const char *options)
3826 {
3827 	pdf_writer *wri = fz_new_derived_document_writer(ctx, pdf_writer, pdf_writer_begin_page, pdf_writer_end_page, pdf_writer_close_writer, pdf_writer_drop_writer);
3828 
3829 	fz_try(ctx)
3830 	{
3831 		pdf_parse_write_options(ctx, &wri->opts, options);
3832 		wri->out = out;
3833 		wri->pdf = pdf_create_document(ctx);
3834 	}
3835 	fz_catch(ctx)
3836 	{
3837 		pdf_drop_document(ctx, wri->pdf);
3838 		fz_free(ctx, wri);
3839 		fz_rethrow(ctx);
3840 	}
3841 
3842 	return (fz_document_writer*)wri;
3843 }
3844 
3845 fz_document_writer *
fz_new_pdf_writer(fz_context * ctx,const char * path,const char * options)3846 fz_new_pdf_writer(fz_context *ctx, const char *path, const char *options)
3847 {
3848 	fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.pdf", 0);
3849 	fz_document_writer *wri = NULL;
3850 	fz_try(ctx)
3851 		wri = fz_new_pdf_writer_with_output(ctx, out, options);
3852 	fz_catch(ctx)
3853 	{
3854 		fz_drop_output(ctx, out);
3855 		fz_rethrow(ctx);
3856 	}
3857 	return wri;
3858 }
3859