1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2
3 Copyright (C) 2007-2014 by Jin-Hwan Cho and Shunsaku Hirata,
4 the dvipdfmx project team.
5
6 Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26
27 #include <ctype.h>
28 #include <string.h>
29
30 #include "system.h"
31 #include "mem.h"
32 #include "error.h"
33 #include "mfileio.h"
34 #include "dpxutil.h"
35 #include "pdflimits.h"
36 #include "pdfencrypt.h"
37 #include "pdfparse.h"
38
39 #ifdef HAVE_ZLIB
40 #include <zlib.h>
41 #endif /* HAVE_ZLIB */
42
43 #include "pdfobj.h"
44 #include "pdfdev.h"
45
46 #define STREAM_ALLOC_SIZE 4096u
47 #define ARRAY_ALLOC_SIZE 256
48 #define IND_OBJECTS_ALLOC_SIZE 512
49
50 #define OBJ_NO_OBJSTM (1 << 0)
51 /* Objects with this flag will not be put into an object stream.
52 For instance, all stream objects have this flag set. */
53 #define OBJ_NO_ENCRYPT (1 << 1)
54 /* Objects with this flag will not be encrypted.
55 This implies OBJ_NO_OBJSTM if encryption is turned on. */
56
57 /* Any of these types can be represented as follows */
58 struct pdf_obj
59 {
60 int type;
61
62 unsigned long label; /* Only used for indirect objects
63 all other "label" to zero */
64 unsigned short generation; /* Only used if "label" is used */
65 unsigned refcount; /* Number of links to this object */
66 int flags;
67 void *data;
68 };
69
70 struct pdf_boolean
71 {
72 char value;
73 };
74
75 struct pdf_number
76 {
77 double value;
78 };
79
80 struct pdf_string
81 {
82 unsigned char *string;
83 unsigned short length;
84 };
85
86 struct pdf_name
87 {
88 char *name;
89 };
90
91 struct pdf_array
92 {
93 unsigned long max;
94 unsigned long size;
95 struct pdf_obj **values;
96 };
97
98 struct pdf_dict
99 {
100 struct pdf_obj *key;
101 struct pdf_obj *value;
102 struct pdf_dict *next;
103 };
104
105 struct pdf_stream
106 {
107 struct pdf_obj *dict;
108 unsigned char *stream;
109 long *objstm_data; /* used for object streams */
110 unsigned long stream_length;
111 unsigned long max_length;
112 unsigned char _flags;
113 };
114
115 struct pdf_indirect
116 {
117 pdf_file *pf;
118 pdf_obj *obj; /* used when PF == NULL */
119 unsigned long label;
120 unsigned short generation;
121 };
122
123 typedef void pdf_null;
124 typedef struct pdf_boolean pdf_boolean;
125 typedef struct pdf_number pdf_number;
126 typedef struct pdf_string pdf_string;
127 typedef struct pdf_name pdf_name;
128 typedef struct pdf_array pdf_array;
129 typedef struct pdf_dict pdf_dict;
130 typedef struct pdf_stream pdf_stream;
131 typedef struct pdf_indirect pdf_indirect;
132
133 static FILE *pdf_output_file = NULL;
134
135 static long pdf_output_file_position = 0;
136 static long pdf_output_line_position = 0;
137 static long compression_saved = 0;
138
139 #define FORMAT_BUF_SIZE 4096
140 static char format_buffer[FORMAT_BUF_SIZE];
141
142 typedef struct xref_entry
143 {
144 unsigned char type; /* object storage type */
145 unsigned long field2; /* offset in file or object stream */
146 unsigned short field3; /* generation or index */
147 pdf_obj *direct; /* used for imported objects */
148 pdf_obj *indirect; /* used for imported objects */
149 } xref_entry;
150
151 static xref_entry *output_xref;
152
153 static unsigned long pdf_max_ind_objects;
154 static unsigned long next_label;
155
156 static unsigned long startxref;
157
158 struct pdf_file
159 {
160 FILE *file;
161 pdf_obj *trailer;
162 xref_entry *xref_table;
163 pdf_obj *catalog;
164 long num_obj;
165 long file_size;
166 int version;
167 };
168
169 static pdf_obj *output_stream;
170
171 #define OBJSTM_MAX_OBJS 200
172 /* the limit is only 100 for linearized PDF */
173
174 static int enc_mode;
175 static int doc_enc_mode;
176
177 static pdf_obj *trailer_dict;
178 static pdf_obj *xref_stream;
179
180 /* Internal static routines */
181
182 static int check_for_pdf_version (FILE *file);
183
184 static void pdf_flush_obj (pdf_obj *object, FILE *file);
185 static void pdf_label_obj (pdf_obj *object);
186 static void pdf_write_obj (pdf_obj *object, FILE *file);
187
188 static void set_objstm_data (pdf_obj *objstm, long *data);
189 static long *get_objstm_data (pdf_obj *objstm);
190 static void release_objstm (pdf_obj *objstm);
191
192 static void pdf_out_char (FILE *file, char c);
193 static void pdf_out (FILE *file, const void *buffer, long length);
194
195 static pdf_obj *pdf_new_ref (pdf_obj *object);
196 static void release_indirect (pdf_indirect *data);
197 static void write_indirect (pdf_indirect *indirect, FILE *file);
198
199 static void release_boolean (pdf_obj *data);
200 static void write_boolean (pdf_boolean *data, FILE *file);
201
202 static void write_null (FILE *file);
203
204 static void release_number (pdf_number *number);
205 static void write_number (pdf_number *number, FILE *file);
206
207 static void write_string (pdf_string *str, FILE *file);
208 static void release_string (pdf_string *str);
209
210 static void write_name (pdf_name *name, FILE *file);
211 static void release_name (pdf_name *name);
212
213 static void write_array (pdf_array *array, FILE *file);
214 static void release_array (pdf_array *array);
215
216 static void write_dict (pdf_dict *dict, FILE *file);
217 static void release_dict (pdf_dict *dict);
218
219 static void write_stream (pdf_stream *stream, FILE *file);
220 static void release_stream (pdf_stream *stream);
221
222 static int verbose = 0;
223 static char compression_level = 9;
224
225 void
pdf_set_compression(int level)226 pdf_set_compression (int level)
227 {
228 #ifndef HAVE_ZLIB
229 ERROR("You don't have compression compiled in. Possibly libz wasn't found by configure.");
230 #else
231 #ifndef HAVE_ZLIB_COMPRESS2
232 if (level != 0)
233 WARN("Unable to set compression level -- your zlib doesn't have compress2().");
234 #endif
235 if (level >= 0 && level <= 9)
236 compression_level = level;
237 else {
238 ERROR("set_compression: invalid compression level: %d", level);
239 }
240 #endif /* !HAVE_ZLIB */
241
242 return;
243 }
244
245 static unsigned pdf_version = PDF_VERSION_DEFAULT;
246
247 void
pdf_set_version(unsigned version)248 pdf_set_version (unsigned version)
249 {
250 /* Don't forget to update CIDFont_stdcc_def[] in cid.c too! */
251 if (version >= PDF_VERSION_MIN && version <= PDF_VERSION_MAX) {
252 pdf_version = version;
253 }
254 }
255
256 unsigned
pdf_get_version(void)257 pdf_get_version (void)
258 {
259 return pdf_version;
260 }
261
262 int
pdf_obj_get_verbose(void)263 pdf_obj_get_verbose(void)
264 {
265 return verbose;
266 }
267
268 void
pdf_obj_set_verbose(void)269 pdf_obj_set_verbose(void)
270 {
271 verbose++;
272 }
273
274 static pdf_obj *current_objstm = NULL;
275 static int do_objstm;
276
277 static void
add_xref_entry(unsigned long label,unsigned char type,unsigned long field2,unsigned short field3)278 add_xref_entry (unsigned long label, unsigned char type, unsigned long field2, unsigned short field3)
279 {
280 if (label >= pdf_max_ind_objects) {
281 pdf_max_ind_objects = (label/IND_OBJECTS_ALLOC_SIZE+1)*IND_OBJECTS_ALLOC_SIZE;
282 output_xref = RENEW(output_xref, pdf_max_ind_objects, xref_entry);
283 }
284
285 output_xref[label].type = type;
286 output_xref[label].field2 = field2;
287 output_xref[label].field3 = field3;
288 output_xref[label].direct = NULL;
289 output_xref[label].indirect = NULL;
290 }
291
292 #define BINARY_MARKER "%\344\360\355\370\n"
293 void
pdf_out_init(const char * filename,int do_encryption)294 pdf_out_init (const char *filename, int do_encryption)
295 {
296 char v;
297
298 output_xref = NULL;
299 pdf_max_ind_objects = 0;
300 add_xref_entry(0, 0, 0, 0xffff);
301 next_label = 1;
302
303 if (pdf_version >= 5) {
304 xref_stream = pdf_new_stream(STREAM_COMPRESS);
305 xref_stream->flags |= OBJ_NO_ENCRYPT;
306 trailer_dict = pdf_stream_dict(xref_stream);
307 pdf_add_dict(trailer_dict, pdf_new_name("Type"), pdf_new_name("XRef"));
308 do_objstm = 1;
309 } else {
310 xref_stream = NULL;
311 trailer_dict = pdf_new_dict();
312 do_objstm = 0;
313 }
314
315 output_stream = NULL;
316
317 if (filename == NULL) { /* no filename: writing to stdout */
318 #ifdef WIN32
319 setmode(fileno(stdout), _O_BINARY);
320 #endif
321 pdf_output_file = stdout;
322 } else {
323 pdf_output_file = MFOPEN(filename, FOPEN_WBIN_MODE);
324 if (!pdf_output_file) {
325 if (strlen(filename) < 128)
326 ERROR("Unable to open \"%s\".", filename);
327 else
328 ERROR("Unable to open file.");
329 }
330 }
331 pdf_out(pdf_output_file, "%PDF-1.", strlen("%PDF-1."));
332 v = '0' + pdf_version;
333 pdf_out(pdf_output_file, &v, 1);
334 pdf_out(pdf_output_file, "\n", 1);
335 pdf_out(pdf_output_file, BINARY_MARKER, strlen(BINARY_MARKER));
336
337 enc_mode = 0;
338 doc_enc_mode = do_encryption;
339 }
340
341 static void
dump_xref_table(void)342 dump_xref_table (void)
343 {
344 long length;
345 unsigned long i;
346
347 pdf_out(pdf_output_file, "xref\n", 5);
348
349 length = sprintf(format_buffer, "%d %lu\n", 0, next_label);
350 pdf_out(pdf_output_file, format_buffer, length);
351
352 /*
353 * Every space counts. The space after the 'f' and 'n' is * *essential*.
354 * The PDF spec says the lines must be 20 characters long including the
355 * end of line character.
356 */
357 for (i = 0; i < next_label; i++) {
358 unsigned char type = output_xref[i].type;
359 if (type > 1)
360 ERROR("object type %hu not allowed in xref table", type);
361 length = sprintf(format_buffer, "%010lu %05hu %c \n",
362 output_xref[i].field2, output_xref[i].field3,
363 type ? 'n' : 'f');
364 pdf_out(pdf_output_file, format_buffer, length);
365 }
366 }
367
368 static void
dump_trailer_dict(void)369 dump_trailer_dict (void)
370 {
371 pdf_out(pdf_output_file, "trailer\n", 8);
372 enc_mode = 0;
373 write_dict(trailer_dict->data, pdf_output_file);
374 pdf_release_obj(trailer_dict);
375 pdf_out_char(pdf_output_file, '\n');
376 }
377
378 /*
379 * output a PDF 1.5 cross-reference stream;
380 * contributed by Matthias Franz (March 21, 2007)
381 */
382 static void
dump_xref_stream(void)383 dump_xref_stream (void)
384 {
385 unsigned long pos, i;
386 unsigned poslen;
387 unsigned char buf[7] = {0, 0, 0, 0, 0};
388
389 pdf_obj *w;
390
391 /* determine the necessary size of the offset field */
392 pos = startxref; /* maximal offset value */
393 poslen = 1;
394 while (pos >>= 8)
395 poslen++;
396
397 w = pdf_new_array();
398 pdf_add_array(w, pdf_new_number(1)); /* type */
399 pdf_add_array(w, pdf_new_number(poslen)); /* offset (big-endian) */
400 pdf_add_array(w, pdf_new_number(2)); /* generation */
401 pdf_add_dict(trailer_dict, pdf_new_name("W"), w);
402
403 /* We need the xref entry for the xref stream right now */
404 add_xref_entry(next_label-1, 1, startxref, 0);
405
406 for (i = 0; i < next_label; i++) {
407 unsigned j;
408 unsigned short f3;
409 buf[0] = output_xref[i].type;
410 pos = output_xref[i].field2;
411 for (j = poslen; j--; ) {
412 buf[1+j] = (unsigned char) pos;
413 pos >>= 8;
414 }
415 f3 = output_xref[i].field3;
416 buf[poslen+1] = (unsigned char) (f3 >> 8);
417 buf[poslen+2] = (unsigned char) (f3);
418 pdf_add_stream(xref_stream, &buf, poslen+3);
419 }
420
421 pdf_release_obj(xref_stream);
422 }
423
424 void
pdf_out_flush(void)425 pdf_out_flush (void)
426 {
427 if (pdf_output_file) {
428 long length;
429
430 /* Flush current object stream */
431 if (current_objstm) {
432 release_objstm(current_objstm);
433 current_objstm =NULL;
434 }
435
436 /*
437 * Label xref stream - we need the number of correct objects
438 * for the xref stream dictionary (= trailer).
439 * Labelling it in pdf_out_init (with 1) does not work (why?).
440 */
441 if (xref_stream)
442 pdf_label_obj(xref_stream);
443
444 /* Record where this xref is for trailer */
445 startxref = pdf_output_file_position;
446
447 pdf_add_dict(trailer_dict, pdf_new_name("Size"),
448 pdf_new_number(next_label));
449
450 if (xref_stream)
451 dump_xref_stream();
452 else {
453 dump_xref_table();
454 dump_trailer_dict();
455 }
456
457 /* Done with xref table */
458 RELEASE(output_xref);
459
460 pdf_out(pdf_output_file, "startxref\n", 10);
461 length = sprintf(format_buffer, "%lu\n", startxref);
462 pdf_out(pdf_output_file, format_buffer, length);
463 pdf_out(pdf_output_file, "%%EOF\n", 6);
464
465 MESG("\n");
466 if (verbose) {
467 if (compression_level > 0) {
468 MESG("Compression saved %ld bytes%s\n", compression_saved,
469 pdf_version < 5 ? ". Try \"-V 5\" for better compression" : "");
470 }
471 }
472 MESG("%ld bytes written", pdf_output_file_position);
473
474 MFCLOSE(pdf_output_file);
475 }
476 }
477
478 void
pdf_error_cleanup(void)479 pdf_error_cleanup (void)
480 {
481 /*
482 * This routine is the cleanup required for an abnormal exit.
483 * For now, simply close the file.
484 */
485 if (pdf_output_file)
486 MFCLOSE(pdf_output_file);
487 }
488
489
490 void
pdf_set_root(pdf_obj * object)491 pdf_set_root (pdf_obj *object)
492 {
493 if (pdf_add_dict(trailer_dict, pdf_new_name("Root"), pdf_ref_obj(object))) {
494 ERROR("Root object already set!");
495 }
496 /* Adobe Readers don't like a document catalog inside an encrypted
497 * object stream, although the PDF v1.5 spec seems to allow this.
498 * Note that we don't set OBJ_NO_ENCRYPT since the name dictionary in
499 * a document catalog may contain strings, which should be encrypted.
500 */
501 if (doc_enc_mode)
502 object->flags |= OBJ_NO_OBJSTM;
503 }
504
505 void
pdf_set_info(pdf_obj * object)506 pdf_set_info (pdf_obj *object)
507 {
508 if (pdf_add_dict(trailer_dict, pdf_new_name("Info"), pdf_ref_obj(object))) {
509 ERROR ("Info object already set!");
510 }
511 }
512
513 void
pdf_set_id(pdf_obj * id)514 pdf_set_id (pdf_obj *id)
515 {
516 if (pdf_add_dict(trailer_dict, pdf_new_name("ID"), id)) {
517 ERROR ("ID already set!");
518 }
519 }
520
521 void
pdf_set_encrypt(pdf_obj * encrypt)522 pdf_set_encrypt (pdf_obj *encrypt)
523 {
524 if (pdf_add_dict(trailer_dict, pdf_new_name("Encrypt"), pdf_ref_obj(encrypt))) {
525 ERROR("Encrypt object already set!");
526 }
527 encrypt->flags |= OBJ_NO_ENCRYPT;
528 }
529
530 static
pdf_out_char(FILE * file,char c)531 void pdf_out_char (FILE *file, char c)
532 {
533 if (output_stream && file == pdf_output_file)
534 pdf_add_stream(output_stream, &c, 1);
535 else {
536 fputc(c, file);
537 /* Keep tallys for xref table *only* if writing a pdf file. */
538 if (file == pdf_output_file) {
539 pdf_output_file_position += 1;
540 if (c == '\n')
541 pdf_output_line_position = 0;
542 else
543 pdf_output_line_position += 1;
544 }
545 }
546 }
547
548 static char xchar[] = "0123456789abcdef";
549
550 #define pdf_out_xchar(f,c) do {\
551 pdf_out_char((f), xchar[((c) >> 4) & 0x0f]);\
552 pdf_out_char((f), xchar[(c) & 0x0f]);\
553 } while (0)
554
555 static
pdf_out(FILE * file,const void * buffer,long length)556 void pdf_out (FILE *file, const void *buffer, long length)
557 {
558 if (output_stream && file == pdf_output_file)
559 pdf_add_stream(output_stream, buffer, length);
560 else {
561 fwrite(buffer, 1, length, file);
562 /* Keep tallys for xref table *only* if writing a pdf file */
563 if (file == pdf_output_file) {
564 pdf_output_file_position += length;
565 pdf_output_line_position += length;
566 /* "foo\nbar\n "... */
567 if (length > 0 &&
568 ((const char *)buffer)[length-1] == '\n')
569 pdf_output_line_position = 0;
570 }
571 }
572 }
573
574 /* returns 1 if a white-space character is necessary to separate
575 an object of type1 followed by an object of type2 */
576 static
pdf_need_white(int type1,int type2)577 int pdf_need_white (int type1, int type2)
578 {
579 return !(type1 == PDF_STRING || type1 == PDF_ARRAY || type1 == PDF_DICT ||
580 type2 == PDF_STRING || type2 == PDF_NAME ||
581 type2 == PDF_ARRAY || type2 == PDF_DICT);
582 }
583
584 static
pdf_out_white(FILE * file)585 void pdf_out_white (FILE *file)
586 {
587 if (file == pdf_output_file && pdf_output_line_position >= 80) {
588 pdf_out_char(file, '\n');
589 } else {
590 pdf_out_char(file, ' ');
591 }
592 }
593
594 #define TYPECHECK(o,t) if (!(o) || (o)->type != (t)) {\
595 ERROR("typecheck: Invalid object type: %d %d (line %d)", (o) ? (o)->type : -1, (t), __LINE__);\
596 }
597
598 #define INVALIDOBJ(o) ((o) == NULL || (o)->type <= 0 || (o)->type > PDF_UNDEFINED)
599
600 static pdf_obj *
pdf_new_obj(int type)601 pdf_new_obj(int type)
602 {
603 pdf_obj *result;
604
605 if (type > PDF_UNDEFINED || type < 0)
606 ERROR("Invalid object type: %d", type);
607
608 result = NEW(1, pdf_obj);
609 result->type = type;
610 result->data = NULL;
611 result->label = 0;
612 result->generation = 0;
613 result->refcount = 1;
614 result->flags = 0;
615
616 return result;
617 }
618
619 int
pdf_obj_typeof(pdf_obj * object)620 pdf_obj_typeof (pdf_obj *object)
621 {
622 if (INVALIDOBJ(object))
623 return PDF_OBJ_INVALID;
624
625 return object->type;
626 }
627
628 static void
pdf_label_obj(pdf_obj * object)629 pdf_label_obj (pdf_obj *object)
630 {
631 if (INVALIDOBJ(object))
632 ERROR("pdf_label_obj(): passed invalid object.");
633
634 /*
635 * Don't change label on an already labeled object. Ignore such calls.
636 */
637 if (object->label == 0) {
638 object->label = next_label++;
639 object->generation = 0;
640 }
641 }
642
643 /*
644 * Transfer the label assigned to the object src to the object dst.
645 * The object dst must not yet have been labeled.
646 */
647 void
pdf_transfer_label(pdf_obj * dst,pdf_obj * src)648 pdf_transfer_label (pdf_obj *dst, pdf_obj *src)
649 {
650 ASSERT(dst && !dst->label && src);
651
652 dst->label = src->label;
653 dst->generation = src->generation;
654 src->label = 0;
655 src->generation = 0;
656 }
657
658 /*
659 * This doesn't really copy the object, but allows it to be used without
660 * fear that somebody else will free it.
661 */
662 pdf_obj *
pdf_link_obj(pdf_obj * object)663 pdf_link_obj (pdf_obj *object)
664 {
665 if (INVALIDOBJ(object))
666 ERROR("pdf_link_obj(): passed invalid object.");
667
668 object->refcount += 1;
669
670 return object;
671 }
672
673
674 pdf_obj *
pdf_ref_obj(pdf_obj * object)675 pdf_ref_obj (pdf_obj *object)
676 {
677 if (INVALIDOBJ(object))
678 ERROR("pdf_ref_obj(): passed invalid object.");
679
680 if (object->refcount == 0) {
681 MESG("\nTrying to refer already released object!!!\n");
682 pdf_write_obj(object, stderr);
683 ERROR("Cannot continue...");
684 }
685
686 if (PDF_OBJ_INDIRECTTYPE(object)) {
687 return pdf_link_obj(object);
688 } else {
689 return pdf_new_ref(object);
690 }
691 }
692
693 static void
release_indirect(pdf_indirect * data)694 release_indirect (pdf_indirect *data)
695 {
696 RELEASE(data);
697 }
698
699 static void
write_indirect(pdf_indirect * indirect,FILE * file)700 write_indirect (pdf_indirect *indirect, FILE *file)
701 {
702 long length;
703
704 ASSERT(!indirect->pf);
705
706 length = sprintf(format_buffer, "%lu %hu R", indirect->label, indirect->generation);
707 pdf_out(file, format_buffer, length);
708 }
709
710 /* The undefined object is used as a placeholder in pdfnames.c
711 * for objects which are referenced before they are defined.
712 */
713 pdf_obj *
pdf_new_undefined(void)714 pdf_new_undefined (void)
715 {
716 pdf_obj *result;
717
718 result = pdf_new_obj(PDF_UNDEFINED);
719 result->data = NULL;
720
721 return result;
722 }
723
724 pdf_obj *
pdf_new_null(void)725 pdf_new_null (void)
726 {
727 pdf_obj *result;
728
729 result = pdf_new_obj(PDF_NULL);
730 result->data = NULL;
731
732 return result;
733 }
734
735 static void
write_null(FILE * file)736 write_null (FILE *file)
737 {
738 pdf_out(file, "null", 4);
739 }
740
741 pdf_obj *
pdf_new_boolean(char value)742 pdf_new_boolean (char value)
743 {
744 pdf_obj *result;
745 pdf_boolean *data;
746
747 result = pdf_new_obj(PDF_BOOLEAN);
748 data = NEW(1, pdf_boolean);
749 data->value = value;
750 result->data = data;
751
752 return result;
753 }
754
755 static void
release_boolean(pdf_obj * data)756 release_boolean (pdf_obj *data)
757 {
758 RELEASE (data);
759 }
760
761 static void
write_boolean(pdf_boolean * data,FILE * file)762 write_boolean (pdf_boolean *data, FILE *file)
763 {
764 if (data->value) {
765 pdf_out(file, "true", 4);
766 } else {
767 pdf_out(file, "false", 5);
768 }
769 }
770
771 char
pdf_boolean_value(pdf_obj * object)772 pdf_boolean_value (pdf_obj *object)
773 {
774 pdf_boolean *data;
775
776 TYPECHECK(object, PDF_BOOLEAN);
777
778 data = object->data;
779
780 return data->value;
781 }
782
783 pdf_obj *
pdf_new_number(double value)784 pdf_new_number (double value)
785 {
786 pdf_obj *result;
787 pdf_number *data;
788
789 result = pdf_new_obj(PDF_NUMBER);
790 data = NEW(1, pdf_number);
791 data->value = value;
792 result->data = data;
793
794 return result;
795 }
796
797 static void
release_number(pdf_number * data)798 release_number (pdf_number *data)
799 {
800 RELEASE (data);
801 }
802
803 static void
write_number(pdf_number * number,FILE * file)804 write_number (pdf_number *number, FILE *file)
805 {
806 int count;
807
808 count = pdf_sprint_number(format_buffer, number->value);
809
810 pdf_out(file, format_buffer, count);
811 }
812
813
814 void
pdf_set_number(pdf_obj * object,double value)815 pdf_set_number (pdf_obj *object, double value)
816 {
817 pdf_number *data;
818
819 TYPECHECK(object, PDF_NUMBER);
820
821 data = object->data;
822 data->value = value;
823 }
824
825 double
pdf_number_value(pdf_obj * object)826 pdf_number_value (pdf_obj *object)
827 {
828 pdf_number *data;
829
830 TYPECHECK(object, PDF_NUMBER);
831
832 data = object->data;
833
834 return data->value;
835 }
836
837 pdf_obj *
pdf_new_string(const void * str,unsigned length)838 pdf_new_string (const void *str, unsigned length)
839 {
840 pdf_obj *result;
841 pdf_string *data;
842
843 ASSERT(str);
844
845 result = pdf_new_obj(PDF_STRING);
846 data = NEW(1, pdf_string);
847 result->data = data;
848 data->length = length;
849
850 if (length) {
851 data->string = NEW(length+1, unsigned char);
852 memcpy(data->string, str, length);
853 /* Shouldn't assume NULL terminated. */
854 data->string[length] = '\0';
855 } else
856 data->string = NULL;
857
858 return result;
859 }
860
861 void *
pdf_string_value(pdf_obj * object)862 pdf_string_value (pdf_obj *object)
863 {
864 pdf_string *data;
865
866 TYPECHECK(object, PDF_STRING);
867
868 data = object->data;
869
870 return data->string;
871 }
872
873 unsigned
pdf_string_length(pdf_obj * object)874 pdf_string_length (pdf_obj *object)
875 {
876 pdf_string *data;
877
878 TYPECHECK(object, PDF_STRING);
879
880 data = object->data;
881
882 return (unsigned) (data->length);
883 }
884
885 /*
886 * This routine escapes non printable characters and control
887 * characters in an output string.
888 */
889 int
pdfobj_escape_str(char * buffer,int bufsize,const unsigned char * s,int len)890 pdfobj_escape_str (char *buffer, int bufsize, const unsigned char *s, int len)
891 {
892 int result = 0;
893 int i;
894
895 for (i = 0; i < len; i++) {
896 unsigned char ch;
897
898 ch = s[i];
899 if (result > bufsize - 4)
900 ERROR("pdfobj_escape_str: Buffer overflow");
901
902 /*
903 * We always write three octal digits. Optimization only gives few Kb
904 * smaller size for most documents when zlib compressed.
905 */
906 if (ch < 32 || ch > 126) {
907 buffer[result++] = '\\';
908 #if 0
909 if (i < len - 1 && !isdigit(s[i+1]))
910 result += sprintf(buffer+result, "%o", ch);
911 else
912 result += sprintf(buffer+result, "%03o", ch);
913 #endif
914 result += sprintf(buffer+result, "%03o", ch);
915 } else {
916 switch (ch) {
917 case '(':
918 buffer[result++] = '\\';
919 buffer[result++] = '(';
920 break;
921 case ')':
922 buffer[result++] = '\\';
923 buffer[result++] = ')';
924 break;
925 case '\\':
926 buffer[result++] = '\\';
927 buffer[result++] = '\\';
928 break;
929 default:
930 buffer[result++] = ch;
931 break;
932 }
933 }
934 }
935
936 return result;
937 }
938
939 static void
write_string(pdf_string * str,FILE * file)940 write_string (pdf_string *str, FILE *file)
941 {
942 unsigned char *s;
943 char wbuf[FORMAT_BUF_SIZE]; /* Shouldn't use format_buffer[]. */
944 int nescc = 0, i, count;
945
946 s = str->string;
947
948 if (enc_mode)
949 pdf_encrypt_data(s, str->length);
950
951 /*
952 * Count all ASCII non-printable characters.
953 */
954 for (i = 0; i < str->length; i++) {
955 if (!isprint(s[i]))
956 nescc++;
957 }
958 /*
959 * If the string contains much escaped chars, then we write it as
960 * ASCII hex string.
961 */
962 if (nescc > str->length / 3) {
963 pdf_out_char(file, '<');
964 for (i = 0; i < str->length; i++) {
965 pdf_out_xchar(file, s[i]);
966 }
967 pdf_out_char(file, '>');
968 } else {
969 pdf_out_char(file, '(');
970 /*
971 * This section of code probably isn't speed critical. Escaping the
972 * characters in the string one at a time may seem slow, but it's
973 * safe if the formatted string length exceeds FORMAT_BUF_SIZE.
974 * Occasionally you see some long strings in PDF. pdfobj_escape_str
975 * is also used for strings of text with no kerning. These must be
976 * handled as quickly as possible since there are so many of them.
977 */
978 for (i = 0; i < str->length; i++) {
979 count = pdfobj_escape_str(wbuf, FORMAT_BUF_SIZE, &(s[i]), 1);
980 pdf_out(file, wbuf, count);
981 }
982 pdf_out_char(file, ')');
983 }
984 }
985
986 static void
release_string(pdf_string * data)987 release_string (pdf_string *data)
988 {
989 if (data->string != NULL) {
990 RELEASE(data->string);
991 data->string = NULL;
992 }
993 RELEASE(data);
994 }
995
996 void
pdf_set_string(pdf_obj * object,unsigned char * str,unsigned length)997 pdf_set_string (pdf_obj *object, unsigned char *str, unsigned length)
998 {
999 pdf_string *data;
1000
1001 TYPECHECK(object, PDF_STRING);
1002
1003 data = object->data;
1004 if (data->string != 0) {
1005 RELEASE(data->string);
1006 }
1007 if (length != 0) {
1008 data->length = length;
1009 data->string = NEW(length + 1, unsigned char);
1010 memcpy(data->string, str, length);
1011 data->string[length] = '\0';
1012 } else {
1013 data->length = 0;
1014 data->string = NULL;
1015 }
1016 }
1017
1018 /* Name does *not* include the /. */
1019 pdf_obj *
pdf_new_name(const char * name)1020 pdf_new_name (const char *name)
1021 {
1022 pdf_obj *result;
1023 unsigned length;
1024 pdf_name *data;
1025
1026 result = pdf_new_obj(PDF_NAME);
1027 data = NEW (1, pdf_name);
1028 result->data = data;
1029 length = strlen(name);
1030 if (length != 0) {
1031 data->name = NEW(length+1, char);
1032 memcpy(data->name, name, length);
1033 data->name[length] = '\0';
1034 } else {
1035 data->name = NULL;
1036 }
1037
1038 return result;
1039 }
1040
1041 static void
write_name(pdf_name * name,FILE * file)1042 write_name (pdf_name *name, FILE *file)
1043 {
1044 char *s;
1045 int i, length;
1046
1047 s = name->name;
1048 length = name->name ? strlen(name->name) : 0;
1049 /*
1050 * From PDF Reference, 3rd ed., p.33:
1051 *
1052 * Beginning with PDF 1.2, any character except null (character code 0)
1053 * may be included in a name by writing its 2-digit hexadecimal code,
1054 * preceded bythe number sign character (#); see implementation notes 3
1055 * and 4 in Appendix H. This syntax is required in order to represent
1056 * any of the delimiter or white-space characters or the number sign
1057 * character itself; it is recommended but not required for characters
1058 * whose codes are outside the range 33 (!) to 126 (~).
1059 */
1060 #ifndef is_delim
1061 /* Avoid '{' and '}' for PostScript compatibility? */
1062 #define is_delim(c) ((c) == '(' || (c) == '/' || \
1063 (c) == '<' || (c) == '>' || \
1064 (c) == '[' || (c) == ']' || \
1065 (c) == '{' || (c) == '}' || \
1066 (c) == '%')
1067 #endif
1068 pdf_out_char(file, '/');
1069 for (i = 0; i < length; i++) {
1070 if (s[i] < '!' || s[i] > '~' || s[i] == '#' || is_delim(s[i])) {
1071 /* ^ "space" is here. */
1072 pdf_out_char (file, '#');
1073 pdf_out_xchar(file, s[i]);
1074 } else {
1075 pdf_out_char (file, s[i]);
1076 }
1077 }
1078 }
1079
1080 static void
release_name(pdf_name * data)1081 release_name (pdf_name *data)
1082 {
1083 if (data->name != NULL) {
1084 RELEASE(data->name);
1085 data->name = NULL;
1086 }
1087 RELEASE(data);
1088 }
1089
1090 char *
pdf_name_value(pdf_obj * object)1091 pdf_name_value (pdf_obj *object)
1092 {
1093 pdf_name *data;
1094
1095 TYPECHECK(object, PDF_NAME);
1096
1097 data = object->data;
1098
1099 return data->name;
1100 }
1101
1102 /*
1103 * We do not have pdf_name_length() since '\0' is not allowed
1104 * in PDF name object.
1105 */
1106
1107 pdf_obj *
pdf_new_array(void)1108 pdf_new_array (void)
1109 {
1110 pdf_obj *result;
1111 pdf_array *data;
1112
1113 result = pdf_new_obj(PDF_ARRAY);
1114 data = NEW(1, pdf_array);
1115 data->values = NULL;
1116 data->max = 0;
1117 data->size = 0;
1118 result->data = data;
1119
1120 return result;
1121 }
1122
1123 static void
write_array(pdf_array * array,FILE * file)1124 write_array (pdf_array *array, FILE *file)
1125 {
1126 pdf_out_char(file, '[');
1127 if (array->size > 0) {
1128 unsigned long i;
1129 int type1 = PDF_UNDEFINED, type2;
1130
1131 for (i = 0; i < array->size; i++) {
1132 if (array->values[i]) {
1133 type2 = array->values[i]->type;
1134 if (type1 != PDF_UNDEFINED && pdf_need_white(type1, type2))
1135 pdf_out_white(file);
1136 type1 = type2;
1137 pdf_write_obj(array->values[i], file);
1138 } else
1139 WARN("PDF array element #ld undefined.", i);
1140 }
1141 }
1142 pdf_out_char(file, ']');
1143 }
1144
1145 pdf_obj *
pdf_get_array(pdf_obj * array,long idx)1146 pdf_get_array (pdf_obj *array, long idx)
1147 {
1148 pdf_obj *result = NULL;
1149 pdf_array *data;
1150
1151 TYPECHECK(array, PDF_ARRAY);
1152
1153 data = array->data;
1154 if (idx < 0)
1155 result = data->values[idx + data->size];
1156 else if (idx < data->size) {
1157 result = data->values[idx];
1158 }
1159
1160 return result;
1161 }
1162
1163 unsigned int
pdf_array_length(pdf_obj * array)1164 pdf_array_length (pdf_obj *array)
1165 {
1166 pdf_array *data;
1167
1168 TYPECHECK(array, PDF_ARRAY);
1169
1170 data = (pdf_array *) array->data;
1171
1172 return (unsigned int) data->size;
1173 }
1174
1175 static void
release_array(pdf_array * data)1176 release_array (pdf_array *data)
1177 {
1178 unsigned long i;
1179
1180 if (data->values) {
1181 for (i = 0; i < data->size; i++) {
1182 pdf_release_obj(data->values[i]);
1183 data->values[i] = NULL;
1184 }
1185 RELEASE(data->values);
1186 data->values = NULL;
1187 }
1188 RELEASE(data);
1189 }
1190
1191 /*
1192 * The name pdf_add_array is misleading. It behaves differently than
1193 * pdf_add_dict(). This should be pdf_push_array().
1194 */
1195 void
pdf_add_array(pdf_obj * array,pdf_obj * object)1196 pdf_add_array (pdf_obj *array, pdf_obj *object)
1197 {
1198 pdf_array *data;
1199
1200 TYPECHECK(array, PDF_ARRAY);
1201
1202 data = array->data;
1203 if (data->size >= data->max) {
1204 data->max += ARRAY_ALLOC_SIZE;
1205 data->values = RENEW(data->values, data->max, pdf_obj *);
1206 }
1207 data->values[data->size] = object;
1208 data->size++;
1209
1210 return;
1211 }
1212
1213 #if 0
1214 void
1215 pdf_put_array (pdf_obj *array, unsigned idx, pdf_obj *object)
1216 {
1217 pdf_array *data;
1218 long i;
1219
1220 TYPECHECK(array, PDF_ARRAY);
1221
1222 data = array->data;
1223 if (idx + 1 > data->max) {
1224 data->max += ARRAY_ALLOC_SIZE;
1225 data->values = RENEW(data->values, data->max, pdf_obj *);
1226 }
1227 /*
1228 * Rangecheck error in PostScript interpreters if
1229 * idx > data->size - 1. But pdf_new_array() doesn't set
1230 * array size, pdf_add_array() dynamically increases size
1231 * of array. This might confusing...
1232 */
1233 if (idx + 1 > data->size) {
1234 for (i = data->size; i < idx; i++)
1235 data->values[i] = pdf_new_null(); /* release_array() won't work without this */
1236 data->values[idx] = object;
1237 data->size = idx + 1;
1238 } else {
1239 if (data->values[idx])
1240 pdf_release_obj(data->values[idx]);
1241 data->values[idx] = object;
1242 }
1243 }
1244
1245 /* Easily leaks memory... */
1246 pdf_obj *
1247 pdf_shift_array (pdf_obj *array)
1248 {
1249 pdf_obj *result = NULL;
1250 pdf_array *data;
1251
1252 TYPECHECK(array, PDF_ARRAY);
1253
1254 data = array->data;
1255 if (data->size > 0) {
1256 int i;
1257
1258 result = data->values[0];
1259 for (i = 1; i < data->size; i++)
1260 data->values[i-1] = data->values[i];
1261 data->size--;
1262 }
1263
1264 return result;
1265 }
1266 #endif
1267
1268 /* Prepend an object to an array */
1269 static void
pdf_unshift_array(pdf_obj * array,pdf_obj * object)1270 pdf_unshift_array (pdf_obj *array, pdf_obj *object)
1271 {
1272 pdf_array *data;
1273
1274 TYPECHECK(array, PDF_ARRAY);
1275
1276 data = array->data;
1277 if (data->size >= data->max) {
1278 data->max += ARRAY_ALLOC_SIZE;
1279 data->values = RENEW(data->values, data->max, pdf_obj *);
1280 }
1281 memmove(&data->values[1], data->values, data->size * sizeof(pdf_obj *));
1282 data->values[0] = object;
1283 data->size++;
1284 }
1285
1286 #if 0
1287 pdf_obj *
1288 pdf_pop_array (pdf_obj *array)
1289 {
1290 pdf_obj *result;
1291 pdf_array *data;
1292
1293 TYPECHECK(array, PDF_ARRAY);
1294
1295 data = array->data;
1296 if (data->size > 0) {
1297 result = data->values[data->size - 1];
1298 data->size--;
1299 } else {
1300 result = NULL;
1301 }
1302
1303 return result;
1304 }
1305 #endif
1306
1307 static void
write_dict(pdf_dict * dict,FILE * file)1308 write_dict (pdf_dict *dict, FILE *file)
1309 {
1310 #if 0
1311 pdf_out (file, "<<\n", 3); /* dropping \n saves few kb. */
1312 #else
1313 pdf_out (file, "<<", 2);
1314 #endif
1315 while (dict->key != NULL) {
1316 pdf_write_obj(dict->key, file);
1317 if (pdf_need_white(PDF_NAME, (dict->value)->type)) {
1318 pdf_out_white(file);
1319 }
1320 pdf_write_obj(dict->value, file);
1321 #if 0
1322 pdf_out_char (file, '\n'); /* removing this saves few kb. */
1323 #endif
1324 dict = dict->next;
1325 }
1326 pdf_out (file, ">>", 2);
1327 }
1328
1329 pdf_obj *
pdf_new_dict(void)1330 pdf_new_dict (void)
1331 {
1332 pdf_obj *result;
1333 pdf_dict *data;
1334
1335 result = pdf_new_obj(PDF_DICT);
1336 data = NEW(1, pdf_dict);
1337 data->key = NULL;
1338 data->value = NULL;
1339 data->next = NULL;
1340 result->data = data;
1341
1342 return result;
1343 }
1344
1345 static void
release_dict(pdf_dict * data)1346 release_dict (pdf_dict *data)
1347 {
1348 pdf_dict *next;
1349
1350 while (data != NULL && data->key != NULL) {
1351 pdf_release_obj(data->key);
1352 pdf_release_obj(data->value);
1353 data->key = NULL;
1354 data->value = NULL;
1355 next = data->next;
1356 RELEASE(data);
1357 data = next;
1358 }
1359 RELEASE(data);
1360 }
1361
1362 /* Array is ended by a node with NULL this pointer */
1363 /* pdf_add_dict returns 0 if the key is new and non-zero otherwise */
1364 int
pdf_add_dict(pdf_obj * dict,pdf_obj * key,pdf_obj * value)1365 pdf_add_dict (pdf_obj *dict, pdf_obj *key, pdf_obj *value)
1366 {
1367 pdf_dict *data, *new_node;
1368
1369 TYPECHECK(dict, PDF_DICT);
1370 TYPECHECK(key, PDF_NAME);
1371
1372 /* It seems that NULL is sometimes used for null object... */
1373 if (value != NULL && INVALIDOBJ(value))
1374 ERROR("pdf_add_dict(): Passed invalid value");
1375
1376 /* If this key already exists, simply replace the value */
1377 for (data = dict->data; data->key != NULL; data = data->next) {
1378 if (!strcmp(pdf_name_value(key), pdf_name_value(data->key))) {
1379 /* Release the old value */
1380 pdf_release_obj(data->value);
1381 /* Release the new key (we don't need it) */
1382 pdf_release_obj(key);
1383 data->value = value;
1384 return 1;
1385 }
1386 }
1387 /*
1388 * We didn't find the key. We build a new "end" node and add
1389 * the new key just before the end
1390 */
1391 new_node = NEW (1, pdf_dict);
1392 new_node->key = NULL;
1393 new_node->value = NULL;
1394 new_node->next = NULL;
1395 data->next = new_node;
1396 data->key = key;
1397 data->value = value;
1398 return 0;
1399 }
1400
1401 #if 0
1402 void
1403 pdf_put_dict (pdf_obj *dict, const char *key, pdf_obj *value)
1404 {
1405 pdf_dict *data;
1406
1407 TYPECHECK(dict, PDF_DICT);
1408
1409 if (!key) {
1410 ERROR("pdf_put_dict(): Passed invalid key.");
1411 }
1412 /* It seems that NULL is sometimes used for null object... */
1413 if (value != NULL && INVALIDOBJ(value)) {
1414 ERROR("pdf_add_dict(): Passed invalid value.");
1415 }
1416
1417 data = dict->data;
1418
1419 while (data->key != NULL) {
1420 if (!strcmp(key, pdf_name_value(data->key))) {
1421 pdf_release_obj(data->value);
1422 data->value = value;
1423 break;
1424 }
1425 data = data->next;
1426 }
1427
1428 /*
1429 * If we didn't find the key, build a new "end" node and add
1430 * the new key just before the end
1431 */
1432 if (data->key == NULL) {
1433 pdf_dict *new_node;
1434
1435 new_node = NEW (1, pdf_dict);
1436 new_node->key = NULL;
1437 new_node->value = NULL;
1438 new_node->next = NULL;
1439 data->next = new_node;
1440 data->key = pdf_new_name(key);
1441 data->value = value;
1442 }
1443 }
1444 #endif
1445
1446 /* pdf_merge_dict makes a link for each item in dict2 before stealing it */
1447 void
pdf_merge_dict(pdf_obj * dict1,pdf_obj * dict2)1448 pdf_merge_dict (pdf_obj *dict1, pdf_obj *dict2)
1449 {
1450 pdf_dict *data;
1451
1452 TYPECHECK(dict1, PDF_DICT);
1453 TYPECHECK(dict2, PDF_DICT);
1454
1455 data = dict2->data;
1456 while (data->key != NULL) {
1457 pdf_add_dict(dict1, pdf_link_obj(data->key), pdf_link_obj(data->value));
1458 data = data->next;
1459 }
1460 }
1461
1462 int
pdf_foreach_dict(pdf_obj * dict,int (* proc)(pdf_obj *,pdf_obj *,void *),void * pdata)1463 pdf_foreach_dict (pdf_obj *dict,
1464 int (*proc) (pdf_obj *, pdf_obj *, void *), void *pdata)
1465 {
1466 int error = 0;
1467 pdf_dict *data;
1468
1469 ASSERT(proc);
1470
1471 TYPECHECK(dict, PDF_DICT);
1472
1473 data = dict->data;
1474 while (!error &&
1475 data->key != NULL) {
1476 error = proc(data->key, data->value, pdata);
1477 data = data->next;
1478 }
1479
1480 return error;
1481 }
1482
1483 #define pdf_match_name(o,s) ((o) && (s) && !strcmp(((pdf_name *)(o)->data)->name, (s)))
1484 pdf_obj *
pdf_lookup_dict(pdf_obj * dict,const char * name)1485 pdf_lookup_dict (pdf_obj *dict, const char *name)
1486 {
1487 pdf_dict *data;
1488
1489 ASSERT(name);
1490
1491 TYPECHECK(dict, PDF_DICT);
1492
1493 data = dict->data;
1494 while (data->key != NULL) {
1495 if (!strcmp(name, pdf_name_value(data->key))) {
1496 return data->value;
1497 }
1498 data = data->next;
1499 }
1500
1501 return NULL;
1502 }
1503
1504 /* Returns array of dictionary keys */
1505 pdf_obj *
pdf_dict_keys(pdf_obj * dict)1506 pdf_dict_keys (pdf_obj *dict)
1507 {
1508 pdf_obj *keys;
1509 pdf_dict *data;
1510
1511 TYPECHECK(dict, PDF_DICT);
1512
1513 keys = pdf_new_array();
1514 for (data = dict->data; (data &&
1515 data->key != NULL); data = data->next) {
1516 /* We duplicate name object rather than linking keys.
1517 * If we forget to free keys, broken PDF is generated.
1518 */
1519 pdf_add_array(keys, pdf_new_name(pdf_name_value(data->key)));
1520 }
1521
1522 return keys;
1523 }
1524
1525 void
pdf_remove_dict(pdf_obj * dict,const char * name)1526 pdf_remove_dict (pdf_obj *dict, const char *name)
1527 {
1528 pdf_dict *data, **data_p;
1529
1530 TYPECHECK(dict, PDF_DICT);
1531
1532 data = dict->data;
1533 data_p = (pdf_dict **) (void *) &(dict->data);
1534 while (data->key != NULL) {
1535 if (pdf_match_name(data->key, name)) {
1536 pdf_release_obj(data->key);
1537 pdf_release_obj(data->value);
1538 *data_p = data->next;
1539 RELEASE(data);
1540 break;
1541 }
1542 data_p = &(data->next);
1543 data = data->next;
1544 }
1545 }
1546
1547 pdf_obj *
pdf_new_stream(int flags)1548 pdf_new_stream (int flags)
1549 {
1550 pdf_obj *result;
1551 pdf_stream *data;
1552
1553 result = pdf_new_obj(PDF_STREAM);
1554 data = NEW(1, pdf_stream);
1555 /*
1556 * Although we are using an arbitrary pdf_object here, it must have
1557 * type=PDF_DICT and cannot be an indirect reference. This will be
1558 * checked by the output routine.
1559 */
1560 data->dict = pdf_new_dict();
1561 data->_flags = flags;
1562 data->stream = NULL;
1563 data->stream_length = 0;
1564 data->max_length = 0;
1565 data->objstm_data = NULL;
1566
1567 result->data = data;
1568 result->flags |= OBJ_NO_OBJSTM;
1569
1570 return result;
1571 }
1572
1573 static void
write_stream(pdf_stream * stream,FILE * file)1574 write_stream (pdf_stream *stream, FILE *file)
1575 {
1576 unsigned char *filtered;
1577 unsigned long filtered_length;
1578 unsigned long buffer_length;
1579 unsigned char *buffer;
1580
1581 /*
1582 * Always work from a copy of the stream. All filters read from
1583 * "filtered" and leave their result in "filtered".
1584 */
1585 #if 0
1586 filtered = NEW(stream->stream_length + 1, unsigned char);
1587 #endif
1588 filtered = NEW(stream->stream_length, unsigned char);
1589 memcpy(filtered, stream->stream, stream->stream_length);
1590 filtered_length = stream->stream_length;
1591
1592 #if 0
1593 if (stream->stream_length < 10)
1594 stream->_flags &= ^STREAM_COMPRESS;
1595 #endif
1596
1597 #ifdef HAVE_ZLIB
1598 /* Apply compression filter if requested */
1599 if (stream->stream_length > 0 &&
1600 (stream->_flags & STREAM_COMPRESS) &&
1601 compression_level > 0) {
1602
1603 pdf_obj *filters = pdf_lookup_dict(stream->dict, "Filter");
1604
1605 buffer_length = filtered_length + filtered_length/1000 + 14;
1606 buffer = NEW(buffer_length, unsigned char);
1607 {
1608 pdf_obj *filter_name = pdf_new_name("FlateDecode");
1609
1610 if (filters)
1611 /*
1612 * FlateDecode is the first filter to be applied to the stream.
1613 */
1614 pdf_unshift_array(filters, filter_name);
1615 else
1616 /*
1617 * Adding the filter as a name instead of a one-element array
1618 * is crucial because otherwise Adobe Reader cannot read the
1619 * cross-reference stream any more, cf. the PDF v1.5 Errata.
1620 */
1621 pdf_add_dict(stream->dict, pdf_new_name("Filter"), filter_name);
1622 }
1623 #ifdef HAVE_ZLIB_COMPRESS2
1624 if (compress2(buffer, &buffer_length, filtered,
1625 filtered_length, compression_level)) {
1626 ERROR("Zlib error");
1627 }
1628 #else
1629 if (compress(buffer, &buffer_length, filtered,
1630 filtered_length)) {
1631 ERROR ("Zlib error");
1632 }
1633 #endif /* HAVE_ZLIB_COMPRESS2 */
1634 RELEASE(filtered);
1635 compression_saved += filtered_length - buffer_length
1636 - (filters ? strlen("/FlateDecode "): strlen("/Filter/FlateDecode\n"));
1637
1638 filtered = buffer;
1639 filtered_length = buffer_length;
1640 }
1641 #endif /* HAVE_ZLIB */
1642
1643 #if 0
1644 /*
1645 * An optional end-of-line marker preceding the "endstream" is
1646 * not part of stream data. See, PDF Reference 4th ed., p. 38.
1647 */
1648 /* Add a '\n' if the last character wasn't one */
1649 if (filtered_length > 0 &&
1650 filtered[filtered_length-1] != '\n') {
1651 filtered[filtered_length] = '\n';
1652 filtered_length++;
1653 }
1654 #endif
1655 pdf_add_dict(stream->dict,
1656 pdf_new_name("Length"), pdf_new_number(filtered_length));
1657
1658 pdf_write_obj(stream->dict, file);
1659
1660 pdf_out(file, "\nstream\n", 8);
1661
1662 if (enc_mode)
1663 pdf_encrypt_data(filtered, filtered_length);
1664
1665 if (filtered_length > 0) {
1666 pdf_out(file, filtered, filtered_length);
1667 }
1668 RELEASE(filtered);
1669
1670 /*
1671 * This stream length "object" gets reset every time write_stream is
1672 * called for the stream object.
1673 * If this stream gets written more than once with different
1674 * filters, this could be a problem.
1675 */
1676
1677 pdf_out(file, "\n", 1);
1678 pdf_out(file, "endstream", 9);
1679 }
1680
1681 static void
release_stream(pdf_stream * stream)1682 release_stream (pdf_stream *stream)
1683 {
1684 pdf_release_obj(stream->dict);
1685 stream->dict = NULL;
1686
1687 if (stream->stream) {
1688 RELEASE(stream->stream);
1689 stream->stream = NULL;
1690 }
1691
1692 if (stream->objstm_data) {
1693 RELEASE(stream->objstm_data);
1694 stream->objstm_data = NULL;
1695 }
1696
1697 RELEASE(stream);
1698 }
1699
1700 pdf_obj *
pdf_stream_dict(pdf_obj * stream)1701 pdf_stream_dict (pdf_obj *stream)
1702 {
1703 pdf_stream *data;
1704
1705 TYPECHECK(stream, PDF_STREAM);
1706
1707 data = stream->data;
1708
1709 return data->dict;
1710 }
1711
1712 const void *
pdf_stream_dataptr(pdf_obj * stream)1713 pdf_stream_dataptr (pdf_obj *stream)
1714 {
1715 pdf_stream *data;
1716
1717 TYPECHECK(stream, PDF_STREAM);
1718
1719 data = stream->data;
1720
1721 return (const void *) data->stream;
1722 }
1723
1724 long
pdf_stream_length(pdf_obj * stream)1725 pdf_stream_length (pdf_obj *stream)
1726 {
1727 pdf_stream *data;
1728
1729 TYPECHECK(stream, PDF_STREAM);
1730
1731 data = stream->data;
1732
1733 return (long) data->stream_length;
1734 }
1735
1736 static void
set_objstm_data(pdf_obj * objstm,long * data)1737 set_objstm_data (pdf_obj *objstm, long *data) {
1738 TYPECHECK(objstm, PDF_STREAM);
1739
1740 ((pdf_stream *) objstm->data)->objstm_data = data;
1741 }
1742
1743 static long *
get_objstm_data(pdf_obj * objstm)1744 get_objstm_data (pdf_obj *objstm) {
1745 TYPECHECK(objstm, PDF_STREAM);
1746
1747 return ((pdf_stream *) objstm->data)->objstm_data;
1748 }
1749
1750 void
pdf_add_stream(pdf_obj * stream,const void * stream_data,long length)1751 pdf_add_stream (pdf_obj *stream, const void *stream_data, long length)
1752 {
1753 pdf_stream *data;
1754
1755 TYPECHECK(stream, PDF_STREAM);
1756
1757 if (length < 1)
1758 return;
1759 data = stream->data;
1760 if (data->stream_length + length > data->max_length) {
1761 data->max_length += length + STREAM_ALLOC_SIZE;
1762 data->stream = RENEW(data->stream, data->max_length, unsigned char);
1763 }
1764 memcpy(data->stream + data->stream_length, stream_data, length);
1765 data->stream_length += length;
1766 }
1767
1768 #if HAVE_ZLIB
1769 #define WBUF_SIZE 4096
1770 int
pdf_add_stream_flate(pdf_obj * dst,const void * data,long len)1771 pdf_add_stream_flate (pdf_obj *dst, const void *data, long len)
1772 {
1773 z_stream z;
1774 Bytef wbuf[WBUF_SIZE];
1775
1776 z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL;
1777
1778 z.next_in = (z_const Bytef *) data; z.avail_in = len;
1779 z.next_out = (Bytef *) wbuf; z.avail_out = WBUF_SIZE;
1780
1781 if (inflateInit(&z) != Z_OK) {
1782 WARN("inflateInit() failed.");
1783 return -1;
1784 }
1785
1786 for (;;) {
1787 int status;
1788 status = inflate(&z, Z_NO_FLUSH);
1789 if (status == Z_STREAM_END)
1790 break;
1791 else if (status != Z_OK) {
1792 WARN("inflate() failed. Broken PDF file?");
1793 inflateEnd(&z);
1794 return -1;
1795 }
1796
1797 if (z.avail_out == 0) {
1798 pdf_add_stream(dst, wbuf, WBUF_SIZE);
1799 z.next_out = wbuf;
1800 z.avail_out = WBUF_SIZE;
1801 }
1802 }
1803
1804 if (WBUF_SIZE - z.avail_out > 0)
1805 pdf_add_stream(dst, wbuf, WBUF_SIZE - z.avail_out);
1806
1807 return (inflateEnd(&z) == Z_OK ? 0 : -1);
1808 }
1809
1810
1811 /* DecodeParms for FlateDecode
1812 *
1813 */
1814 struct decode_parms {
1815 int predictor;
1816 int colors;
1817 int bits_per_component;
1818 int columns;
1819 /* EarlyChange unsupported */
1820 };
1821
1822 static int
get_decode_parms(struct decode_parms * parms,pdf_obj * dict)1823 get_decode_parms (struct decode_parms *parms, pdf_obj *dict)
1824 {
1825 pdf_obj *tmp;
1826
1827 ASSERT(dict && parms);
1828 ASSERT(PDF_OBJ_DICTTYPE(dict));
1829
1830 /* Fill with default values */
1831 parms->predictor = 1;
1832 parms->colors = 1;
1833 parms->bits_per_component = 8;
1834 parms->columns = 1;
1835
1836 tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Predictor"));
1837 if (tmp)
1838 parms->predictor = pdf_number_value(tmp);
1839 tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Colors"));
1840 if (tmp)
1841 parms->colors = pdf_number_value(tmp);
1842 tmp = pdf_deref_obj(pdf_lookup_dict(dict, "BitsPerComponent"));
1843 if (tmp)
1844 parms->bits_per_component = pdf_number_value(tmp);
1845 tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Columns"));
1846 if (tmp)
1847 parms->columns = pdf_number_value(tmp);
1848
1849 if (parms->bits_per_component != 1 &&
1850 parms->bits_per_component != 2 &&
1851 parms->bits_per_component != 4 &&
1852 parms->bits_per_component != 8 &&
1853 parms->bits_per_component != 16) {
1854 WARN("Invalid BPC value in DecodeParms: %d", parms->bits_per_component);
1855 return -1;
1856 } else if (parms->predictor <= 0 || parms->colors <= 0 ||
1857 parms->columns <= 0)
1858 return -1;
1859 return 0;
1860 }
1861
1862 /* From Xpdf version 3.04
1863 * I'm not sure if I properly ported... Untested.
1864 */
1865 #define PREDICTOR_TIFF2_MAX_COLORS 32
1866 static int
filter_row_TIFF2(unsigned char * dst,const unsigned char * src,struct decode_parms * parms)1867 filter_row_TIFF2 (unsigned char *dst, const unsigned char *src,
1868 struct decode_parms *parms)
1869 {
1870 const unsigned char *p = src;
1871 unsigned char col[PREDICTOR_TIFF2_MAX_COLORS];
1872 /* bits_per_component < 8 here */
1873 long mask = (1 << parms->bits_per_component) - 1;
1874 long inbuf, outbuf; /* 2 bytes buffer */
1875 int i, ci, j, k, inbits, outbits;
1876
1877 if (parms->colors > PREDICTOR_TIFF2_MAX_COLORS) {
1878 WARN("Sorry, Colors value > %d not supported for TIFF 2 predictor",
1879 PREDICTOR_TIFF2_MAX_COLORS);
1880 return -1;
1881 }
1882
1883 memset(col, 0, parms->colors);
1884 inbuf = outbuf = 0; inbits = outbits = 0;
1885 j = k = 0;
1886 for (i = 0; i < parms->columns; i++) {
1887 /* expanding each color component into an 8-bits bytes array */
1888 for (ci = 0; ci < parms->colors; ci++) {
1889 if (inbits < parms->bits_per_component) {
1890 /* need more byte */
1891 inbuf = (inbuf << 8) | p[j++];
1892 inbits += 8;
1893 }
1894 /* predict current color component */
1895 col[ci] = (unsigned char) ((col[ci] +
1896 (inbuf >> (inbits - parms->bits_per_component))) & mask);
1897 inbits -= parms->bits_per_component; /* consumed bpc bits */
1898 /* append newly predicted color component value */
1899 outbuf = (outbuf << parms->bits_per_component) | col[ci];
1900 outbits += parms->bits_per_component;
1901 if (outbits >= 8) { /* flush */
1902 dst[k++] = (unsigned char) (outbuf >> (outbits - 8));
1903 outbits -= 8;
1904 }
1905 }
1906 }
1907 if (outbits > 0) {
1908 dst[k] = (unsigned char) (outbuf << (8 - outbits));
1909 }
1910
1911 return 0;
1912 }
1913
1914 /* This routine is inefficient. Length is typically 4 for Xref streams.
1915 * Especially, calling pdf_add_stream() for each 4 bytes append is highly
1916 * inefficient.
1917 */
1918 static int
filter_decoded(pdf_obj * dst,const void * src,long srclen,struct decode_parms * parms)1919 filter_decoded (pdf_obj *dst, const void *src, long srclen,
1920 struct decode_parms *parms)
1921 {
1922 const unsigned char *p = (const unsigned char *) src;
1923 const unsigned char *endptr = p + srclen;
1924 unsigned char *prev, *buf;
1925 int bits_per_pixel = parms->colors * parms->bits_per_component;
1926 int bytes_per_pixel = (bits_per_pixel + 7) / 8;
1927 int length = (parms->columns * bits_per_pixel + 7) / 8;
1928 int i, error = 0;
1929
1930 prev = NEW(length, unsigned char);
1931 buf = NEW(length, unsigned char);
1932
1933 memset(prev, 0, length);
1934 switch (parms->predictor) {
1935 case 1 : /* No prediction */
1936 pdf_add_stream(dst, src, srclen); /* Just copy */
1937 break;
1938 case 2: /* TIFF Predictor 2 */
1939 {
1940 if (parms->bits_per_component == 8) {
1941 while (p + length < endptr) {
1942 /* Same as PNG Sub */
1943 for (i = 0; i < length; i++) {
1944 int pv = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
1945 buf[i] = (unsigned char)(((int) p[i] + pv) & 0xff);
1946 }
1947 pdf_add_stream(dst, buf, length);
1948 p += length;
1949 }
1950 } else if (parms->bits_per_component == 16) {
1951 while (p + length < endptr) {
1952 for (i = 0; i < length; i += 2) {
1953 int b = i - bytes_per_pixel;
1954 char hi = b >= 0 ? buf[b] : 0;
1955 char lo = b >= 0 ? buf[b + 1] : 0;
1956 long pv = (hi << 8) | lo;
1957 long cv = (p[i] << 8) | p[i + 1];
1958 long c = pv + cv;
1959 buf[i] = (unsigned char) (c >> 8);
1960 buf[i + 1] = (unsigned char) (c & 0xff);
1961 }
1962 pdf_add_stream(dst, buf, length);
1963 p += length;
1964 }
1965 } else { /* bits per component 1, 2, 4 */
1966 while (!error && p + length < endptr) {
1967 error = filter_row_TIFF2(buf, p, parms);
1968 if (!error) {
1969 pdf_add_stream(dst, buf, length);
1970 p += length;
1971 }
1972 }
1973 }
1974 }
1975 break;
1976 /* PNG predictors: first byte of each rows is predictor type */
1977 case 10: /* PNG None */
1978 case 11: /* PNG Sub on all rows */
1979 case 12: /* PNG UP on all rows */
1980 case 13: /* PNG Average on all rows */
1981 case 14: /* PNG Paeth on all rows */
1982 case 15: /* PNG optimun: prediction algorithm can change from line to line. */
1983 {
1984 int type = parms->predictor - 10;
1985
1986 while (!error && p + length < endptr) {
1987 if (parms->predictor == 15)
1988 type = *p;
1989 else if (*p != type) {
1990 WARN("Mismatched Predictor type in data stream.");
1991 error = -1;
1992 }
1993 p++;
1994 switch (type) {
1995 case 0: /* Do nothing just skip first byte */
1996 memcpy(buf, p, length);
1997 break;
1998 case 1:
1999 for (i = 0; i < length; i++) {
2000 int pv = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
2001 buf[i] = (unsigned char)(((int) p[i] + pv) & 0xff);
2002 }
2003 break;
2004 case 2:
2005 for (i = 0; i < length; i++) {
2006 buf[i] = (unsigned char)(((int) p[i] + (int) prev[i]) & 0xff);
2007 }
2008 break;
2009 case 3:
2010 for (i = 0; i < length; i++) {
2011 int up = prev[i];
2012 int left = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
2013 int tmp = floor((up + left) / 2);
2014 buf[i] = (unsigned char)((p[i] + tmp) & 0xff);
2015 }
2016 break;
2017 case 4:
2018 for (i = 0; i < length; i++) {
2019 int a = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0; /* left */
2020 int b = prev[i]; /* above */
2021 int c = i - bytes_per_pixel >= 0 ? prev[i - bytes_per_pixel] : 0; /* upper left */
2022 int q = a + b - c;
2023 int qa = q - a, qb = q - b, qc = q - c;
2024 qa = qa < 0 ? -qa : qa;
2025 qb = qb < 0 ? -qb : qb;
2026 qc = qc < 0 ? -qc : qc;
2027 if (qa <= qb && qa <= qc)
2028 buf[i] = (unsigned char) (((int) p[i] + a) & 0xff);
2029 else if (qb <= qc)
2030 buf[i] = (unsigned char) (((int) p[i] + b) & 0xff);
2031 else
2032 buf[i] = (unsigned char) (((int) p[i] + c) & 0xff);
2033 }
2034 break;
2035 default:
2036 WARN("Unknown PNG predictor type: %d", type);
2037 error = -1;
2038 }
2039 if (!error) {
2040 pdf_add_stream(dst, buf, length); /* highly inefficient */
2041 memcpy(prev, buf, length);
2042 p += length;
2043 }
2044 }
2045 }
2046 break;
2047 default:
2048 WARN("Unknown Predictor type value :%d", parms->predictor);
2049 error = -1;
2050 }
2051
2052 RELEASE(prev);
2053 RELEASE(buf);
2054
2055 return error;
2056 }
2057
2058 static int
pdf_add_stream_flate_filtered(pdf_obj * dst,const void * data,long len,struct decode_parms * parms)2059 pdf_add_stream_flate_filtered (pdf_obj *dst, const void *data, long len, struct decode_parms *parms)
2060 {
2061 pdf_obj *tmp;
2062 z_stream z;
2063 Bytef wbuf[WBUF_SIZE];
2064 int error;
2065
2066 z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL;
2067
2068 z.next_in = (z_const Bytef *) data; z.avail_in = len;
2069 z.next_out = (Bytef *) wbuf; z.avail_out = WBUF_SIZE;
2070
2071 if (inflateInit(&z) != Z_OK) {
2072 WARN("inflateInit() failed.");
2073 return -1;
2074 }
2075
2076 tmp = pdf_new_stream(0);
2077 for (;;) {
2078 int status;
2079 status = inflate(&z, Z_NO_FLUSH);
2080 if (status == Z_STREAM_END)
2081 break;
2082 else if (status != Z_OK) {
2083 WARN("inflate() failed. Broken PDF file?");
2084 inflateEnd(&z);
2085 return -1;
2086 }
2087
2088 if (z.avail_out == 0) {
2089 pdf_add_stream(tmp, wbuf, WBUF_SIZE);
2090 z.next_out = wbuf;
2091 z.avail_out = WBUF_SIZE;
2092 }
2093 }
2094
2095 if (WBUF_SIZE - z.avail_out > 0)
2096 pdf_add_stream(tmp, wbuf, WBUF_SIZE - z.avail_out);
2097
2098 error = filter_decoded(dst, pdf_stream_dataptr(tmp), pdf_stream_length(tmp), parms);
2099 pdf_release_obj(tmp);
2100
2101 return ((!error && inflateEnd(&z) == Z_OK) ? 0 : -1);
2102 }
2103 #endif
2104
2105 int
pdf_concat_stream(pdf_obj * dst,pdf_obj * src)2106 pdf_concat_stream (pdf_obj *dst, pdf_obj *src)
2107 {
2108 const char *stream_data;
2109 long stream_length;
2110 pdf_obj *stream_dict;
2111 pdf_obj *filter;
2112 int error = 0;
2113
2114 if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src))
2115 ERROR("Invalid type.");
2116
2117 stream_data = pdf_stream_dataptr(src);
2118 stream_length = pdf_stream_length (src);
2119 stream_dict = pdf_stream_dict (src);
2120
2121 filter = pdf_lookup_dict(stream_dict, "Filter");
2122 if (!filter)
2123 pdf_add_stream(dst, stream_data, stream_length);
2124 #if HAVE_ZLIB
2125 else {
2126 struct decode_parms parms;
2127 int have_parms = 0;
2128
2129 if (pdf_lookup_dict(stream_dict, "DecodeParms")) {
2130 pdf_obj *tmp;
2131
2132 /* Dictionary or array */
2133 tmp = pdf_deref_obj(pdf_lookup_dict(stream_dict, "DecodeParms"));
2134 if (PDF_OBJ_ARRAYTYPE(tmp)) {
2135 if (pdf_array_length(tmp) > 1) {
2136 WARN("Unexpected size for DecodeParms array.");
2137 return -1;
2138 }
2139 tmp = pdf_deref_obj(pdf_get_array(tmp, 0));
2140 }
2141 if (!PDF_OBJ_DICTTYPE(tmp)) {
2142 WARN("PDF dict expected for DecodeParms...");
2143 return -1;
2144 }
2145 error = get_decode_parms(&parms, tmp);
2146 if (error)
2147 ERROR("Invalid value(s) in DecodeParms dictionary.");
2148 have_parms = 1;
2149 }
2150 if (PDF_OBJ_ARRAYTYPE(filter)) {
2151 if (pdf_array_length(filter) > 1) {
2152 WARN("Multiple DecodeFilter not supported.");
2153 return -1;
2154 }
2155 filter = pdf_get_array(filter, 0);
2156 }
2157 if (PDF_OBJ_NAMETYPE(filter)) {
2158 char *filter_name = pdf_name_value(filter);
2159 if (filter_name && !strcmp(filter_name, "FlateDecode")) {
2160 if (have_parms)
2161 error = pdf_add_stream_flate_filtered(dst, stream_data, stream_length, &parms);
2162 else
2163 error = pdf_add_stream_flate(dst, stream_data, stream_length);
2164 } else {
2165 WARN("DecodeFilter \"%s\" not supported.", filter_name);
2166 error = -1;
2167 }
2168 } else
2169 ERROR("Broken PDF file?");
2170 #endif /* HAVE_ZLIB */
2171 }
2172
2173 return error;
2174 }
2175
2176 static pdf_obj *
pdf_stream_uncompress(pdf_obj * src)2177 pdf_stream_uncompress (pdf_obj *src) {
2178 pdf_obj *dst = pdf_new_stream(0);
2179
2180 TYPECHECK(src, PDF_STREAM);
2181
2182 pdf_merge_dict(pdf_stream_dict(dst), pdf_stream_dict(src));
2183 pdf_remove_dict(pdf_stream_dict(dst), "Length");
2184 pdf_concat_stream(dst, src);
2185
2186 return dst;
2187 }
2188
2189 #if 0
2190 void
2191 pdf_stream_set_flags (pdf_obj *stream, int flags)
2192 {
2193 pdf_stream *data;
2194
2195 TYPECHECK(stream, PDF_STREAM);
2196
2197 data = stream->data;
2198 data->_flags = flags;
2199 }
2200
2201 int
2202 pdf_stream_get_flags (pdf_obj *stream)
2203 {
2204 pdf_stream *data;
2205
2206 TYPECHECK(stream, PDF_STREAM);
2207
2208 data = stream->data;
2209
2210 return data->_flags;
2211 }
2212 #endif
2213
2214 static void
pdf_write_obj(pdf_obj * object,FILE * file)2215 pdf_write_obj (pdf_obj *object, FILE *file)
2216 {
2217 if (object == NULL) {
2218 write_null(file);
2219 return;
2220 }
2221
2222 if (INVALIDOBJ(object) || PDF_OBJ_UNDEFINED(object))
2223 ERROR("pdf_write_obj: Invalid object, type = %d\n", object->type);
2224
2225 if (file == stderr)
2226 fprintf(stderr, "{%d}", object->refcount);
2227
2228 switch (object->type) {
2229 case PDF_BOOLEAN:
2230 write_boolean(object->data, file);
2231 break;
2232 case PDF_NUMBER:
2233 write_number (object->data, file);
2234 break;
2235 case PDF_STRING:
2236 write_string (object->data, file);
2237 break;
2238 case PDF_NAME:
2239 write_name(object->data, file);
2240 break;
2241 case PDF_ARRAY:
2242 write_array(object->data, file);
2243 break;
2244 case PDF_DICT:
2245 write_dict (object->data, file);
2246 break;
2247 case PDF_STREAM:
2248 write_stream(object->data, file);
2249 break;
2250 case PDF_NULL:
2251 write_null(file);
2252 break;
2253 case PDF_INDIRECT:
2254 write_indirect(object->data, file);
2255 break;
2256 }
2257 }
2258
2259 /* Write the object to the file */
2260 static void
pdf_flush_obj(pdf_obj * object,FILE * file)2261 pdf_flush_obj (pdf_obj *object, FILE *file)
2262 {
2263 long length;
2264
2265 /*
2266 * Record file position
2267 */
2268 add_xref_entry(object->label, 1,
2269 pdf_output_file_position, object->generation);
2270 length = sprintf(format_buffer, "%lu %hu obj\n", object->label, object->generation);
2271 enc_mode = doc_enc_mode && !(object->flags & OBJ_NO_ENCRYPT);
2272 pdf_enc_set_label(object->label);
2273 pdf_enc_set_generation(object->generation);
2274 pdf_out(file, format_buffer, length);
2275 pdf_write_obj(object, file);
2276 pdf_out(file, "\nendobj\n", 8);
2277 }
2278
2279 static long
pdf_add_objstm(pdf_obj * objstm,pdf_obj * object)2280 pdf_add_objstm (pdf_obj *objstm, pdf_obj *object)
2281 {
2282 long *data, pos;
2283
2284 TYPECHECK(objstm, PDF_STREAM);
2285
2286 data = get_objstm_data(objstm);
2287 pos = ++data[0];
2288
2289 data[2*pos] = object->label;
2290 data[2*pos+1] = pdf_stream_length(objstm);
2291
2292 add_xref_entry(object->label, 2, objstm->label, pos-1);
2293
2294 /* redirect output into objstm */
2295 output_stream = objstm;
2296 enc_mode = 0;
2297 pdf_write_obj(object, pdf_output_file);
2298 pdf_out_char(pdf_output_file, '\n');
2299 output_stream = NULL;
2300
2301 return pos;
2302 }
2303
2304 static void
release_objstm(pdf_obj * objstm)2305 release_objstm (pdf_obj *objstm)
2306 {
2307 long *data = get_objstm_data(objstm);
2308 long pos = data[0];
2309 pdf_obj *dict;
2310 pdf_stream *stream;
2311 unsigned char *old_buf;
2312 unsigned long old_length;
2313 stream = (pdf_stream *) objstm->data;
2314
2315 /* Precede stream data by offset table */
2316 old_buf = stream->stream;
2317 old_length = stream->stream_length;
2318 /* Reserve 22 bytes for each entry (two 10 digit numbers plus two spaces) */
2319 stream->stream = NEW(old_length + 22*pos, unsigned char);
2320 stream->stream_length = 0;
2321
2322 {
2323 long i = 2*pos, *val = data+2;
2324 while (i--) {
2325 long length = sprintf(format_buffer, "%ld ", *(val++));
2326 pdf_add_stream(objstm, format_buffer, length);
2327 }
2328 }
2329
2330 dict = pdf_stream_dict(objstm);
2331 pdf_add_dict(dict, pdf_new_name("Type"), pdf_new_name("ObjStm"));
2332 pdf_add_dict(dict, pdf_new_name("N"), pdf_new_number(pos));
2333 pdf_add_dict(dict, pdf_new_name("First"), pdf_new_number(stream->stream_length));
2334
2335 pdf_add_stream(objstm, old_buf, old_length);
2336 RELEASE(old_buf);
2337 pdf_release_obj(objstm);
2338 }
2339
2340 void
pdf_release_obj(pdf_obj * object)2341 pdf_release_obj (pdf_obj *object)
2342 {
2343 if (object == NULL)
2344 return;
2345 if (INVALIDOBJ(object) || object->refcount <= 0) {
2346 MESG("\npdf_release_obj: object=%p, type=%d, refcount=%d\n",
2347 object, object->type, object->refcount);
2348 pdf_write_obj(object, stderr);
2349 ERROR("pdf_release_obj: Called with invalid object.");
2350 }
2351 object->refcount -= 1;
2352 if (object->refcount == 0) {
2353 /*
2354 * Nothing is using this object so it's okay to remove it.
2355 * Nonzero "label" means object needs to be written before it's destroyed.
2356 */
2357 if (object->label && pdf_output_file != NULL) {
2358 if (!do_objstm || object->flags & OBJ_NO_OBJSTM
2359 || (doc_enc_mode && object->flags & OBJ_NO_ENCRYPT)
2360 || object->generation)
2361 pdf_flush_obj(object, pdf_output_file);
2362 else {
2363 if (!current_objstm) {
2364 long *data = NEW(2*OBJSTM_MAX_OBJS+2, long);
2365 data[0] = data[1] = 0;
2366 current_objstm = pdf_new_stream(STREAM_COMPRESS);
2367 set_objstm_data(current_objstm, data);
2368 pdf_label_obj(current_objstm);
2369 }
2370 if (pdf_add_objstm(current_objstm, object) == OBJSTM_MAX_OBJS) {
2371 release_objstm(current_objstm);
2372 current_objstm = NULL;
2373 }
2374 }
2375 }
2376 switch (object->type) {
2377 case PDF_BOOLEAN:
2378 release_boolean(object->data);
2379 break;
2380 case PDF_NULL:
2381 break;
2382 case PDF_NUMBER:
2383 release_number(object->data);
2384 break;
2385 case PDF_STRING:
2386 release_string(object->data);
2387 break;
2388 case PDF_NAME:
2389 release_name(object->data);
2390 break;
2391 case PDF_ARRAY:
2392 release_array(object->data);
2393 break;
2394 case PDF_DICT:
2395 release_dict(object->data);
2396 break;
2397 case PDF_STREAM:
2398 release_stream(object->data);
2399 break;
2400 case PDF_INDIRECT:
2401 release_indirect(object->data);
2402 break;
2403 }
2404 /* This might help detect freeing already freed objects */
2405 object->type = -1;
2406 object->data = NULL;
2407 RELEASE(object);
2408 }
2409 }
2410
2411 static int
backup_line(FILE * pdf_input_file)2412 backup_line (FILE *pdf_input_file)
2413 {
2414 int ch = -1;
2415
2416 /*
2417 * Note: this code should work even if \r\n is eol. It could fail on a
2418 * machine where \n is eol and there is a \r in the stream --- Highly
2419 * unlikely in the last few bytes where this is likely to be used.
2420 */
2421 if (tell_position(pdf_input_file) > 1)
2422 do {
2423 seek_relative (pdf_input_file, -2);
2424 } while (tell_position(pdf_input_file) > 0 &&
2425 (ch = fgetc(pdf_input_file)) >= 0 &&
2426 (ch != '\n' && ch != '\r' ));
2427 if (ch < 0) {
2428 return 0;
2429 }
2430
2431 return 1;
2432 }
2433
2434 static long
find_xref(FILE * pdf_input_file)2435 find_xref (FILE *pdf_input_file)
2436 {
2437 long xref_pos;
2438 int tries = 10;
2439
2440 do {
2441 long currentpos;
2442
2443 if (!backup_line(pdf_input_file)) {
2444 tries = 0;
2445 break;
2446 }
2447 currentpos = tell_position(pdf_input_file);
2448 fread(work_buffer, sizeof(char), strlen("startxref"), pdf_input_file);
2449 seek_absolute(pdf_input_file, currentpos);
2450 tries--;
2451 } while (tries > 0 &&
2452 strncmp(work_buffer, "startxref", strlen("startxref")));
2453 if (tries <= 0)
2454 return 0;
2455
2456 /* Skip rest of this line */
2457 mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
2458 /* Next line of input file should contain actual xref location */
2459 mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
2460
2461 {
2462 const char *start, *end;
2463 char *number;
2464
2465 start = work_buffer;
2466 end = start + strlen(work_buffer);
2467 skip_white(&start, end);
2468 number = parse_number(&start, end);
2469 xref_pos = (long) atof(number);
2470 RELEASE(number);
2471 }
2472
2473 return xref_pos;
2474 }
2475
2476 /*
2477 * This routine must be called with the file pointer located
2478 * at the start of the trailer.
2479 */
2480 static pdf_obj *
parse_trailer(pdf_file * pf)2481 parse_trailer (pdf_file *pf)
2482 {
2483 pdf_obj *result;
2484 /*
2485 * Fill work_buffer and hope trailer fits. This should
2486 * be made a bit more robust sometime.
2487 */
2488 if (fread(work_buffer, sizeof(char),
2489 WORK_BUFFER_SIZE, pf->file) == 0 ||
2490 strncmp(work_buffer, "trailer", strlen("trailer"))) {
2491 WARN("No trailer. Are you sure this is a PDF file?");
2492 WARN("buffer:\n->%s<-\n", work_buffer);
2493 result = NULL;
2494 } else {
2495 const char *p = work_buffer + strlen("trailer");
2496 skip_white(&p, work_buffer + WORK_BUFFER_SIZE);
2497 result = parse_pdf_dict(&p, work_buffer + WORK_BUFFER_SIZE, pf);
2498 }
2499
2500 return result;
2501 }
2502
2503 /*
2504 * This routine tries to estimate an upper bound for character position
2505 * of the end of the object, so it knows how big the buffer must be.
2506 * The parsing routines require that the entire object be read into
2507 * memory. It would be a major pain to rewrite them. The worst case
2508 * is that an object before an xref table will grab the whole table
2509 * :-(
2510 */
2511 static long
next_object_offset(pdf_file * pf,unsigned long obj_num)2512 next_object_offset (pdf_file *pf, unsigned long obj_num)
2513 {
2514 long next = pf->file_size; /* Worst case */
2515 long i, curr;
2516
2517 curr = pf->xref_table[obj_num].field2;
2518 /* Check all other type 1 objects to find next one */
2519 for (i = 0; i < pf->num_obj; i++) {
2520 if (pf->xref_table[i].type == 1 &&
2521 pf->xref_table[i].field2 > curr &&
2522 pf->xref_table[i].field2 < next)
2523 next = pf->xref_table[i].field2;
2524 }
2525
2526 return next;
2527 }
2528
2529 #define checklabel(pf, n, g) ((n) > 0 && (n) < (pf)->num_obj && ( \
2530 ((pf)->xref_table[(n)].type == 1 && (pf)->xref_table[(n)].field3 == (g)) || \
2531 ((pf)->xref_table[(n)].type == 2 && !(g))))
2532
2533 pdf_obj *
pdf_new_indirect(pdf_file * pf,unsigned long obj_num,unsigned short obj_gen)2534 pdf_new_indirect (pdf_file *pf, unsigned long obj_num, unsigned short obj_gen)
2535 {
2536 pdf_obj *result;
2537 pdf_indirect *indirect;
2538
2539 indirect = NEW(1, pdf_indirect);
2540 indirect->pf = pf;
2541 indirect->obj = NULL;
2542 indirect->label = obj_num;
2543 indirect->generation = obj_gen;
2544
2545 result = pdf_new_obj(PDF_INDIRECT);
2546 result->data = indirect;
2547
2548 return result;
2549 }
2550
2551 static pdf_obj *
pdf_read_object(unsigned long obj_num,unsigned short obj_gen,pdf_file * pf,long offset,long limit)2552 pdf_read_object (unsigned long obj_num, unsigned short obj_gen,
2553 pdf_file *pf, long offset, long limit)
2554 {
2555 long length;
2556 char *buffer;
2557 const char *p, *endptr;
2558 pdf_obj *result;
2559
2560 length = limit - offset;
2561
2562 if (length <= 0)
2563 return NULL;
2564
2565 buffer = NEW(length + 1, char);
2566
2567 seek_absolute(pf->file, offset);
2568 fread(buffer, sizeof(char), length, pf->file);
2569
2570 p = buffer;
2571 endptr = p + length;
2572
2573 /* Check for obj_num and obj_gen */
2574 {
2575 const char *q = p; /* <== p */
2576 char *sp;
2577 unsigned long n, g;
2578
2579 skip_white(&q, endptr);
2580 sp = parse_unsigned(&q, endptr);
2581 if (!sp) {
2582 RELEASE(buffer);
2583 return NULL;
2584 }
2585 n = strtoul(sp, NULL, 10);
2586 RELEASE(sp);
2587
2588 skip_white(&q, endptr);
2589 sp = parse_unsigned(&q, endptr);
2590 if (!sp) {
2591 RELEASE(buffer);
2592 return NULL;
2593 }
2594 g = strtoul(sp, NULL, 10);
2595 RELEASE(sp);
2596
2597 if (obj_num && (n != obj_num || g != obj_gen)) {
2598 RELEASE(buffer);
2599 return NULL;
2600 }
2601
2602 p = q; /* ==> p */
2603 }
2604
2605
2606 skip_white(&p, endptr);
2607 if (memcmp(p, "obj", strlen("obj"))) {
2608 WARN("Didn't find \"obj\".");
2609 RELEASE(buffer);
2610 return NULL;
2611 }
2612 p += strlen("obj");
2613
2614 result = parse_pdf_object(&p, endptr, pf);
2615
2616 skip_white(&p, endptr);
2617 if (memcmp(p, "endobj", strlen("endobj"))) {
2618 WARN("Didn't find \"endobj\".");
2619 if (result)
2620 pdf_release_obj(result);
2621 result = NULL;
2622 }
2623 RELEASE(buffer);
2624
2625 return result;
2626 }
2627
2628 static pdf_obj *
read_objstm(pdf_file * pf,unsigned long num)2629 read_objstm (pdf_file *pf, unsigned long num)
2630 {
2631 unsigned long offset = pf->xref_table[num].field2;
2632 unsigned short gen = pf->xref_table[num].field3;
2633 long limit = next_object_offset(pf, num), n, first, *header = NULL;
2634 char *data = NULL, *q;
2635 const char *p, *endptr;
2636 int i;
2637
2638 pdf_obj *objstm, *dict, *type, *n_obj, *first_obj;
2639
2640 objstm = pdf_read_object(num, gen, pf, offset, limit);
2641
2642 if (!PDF_OBJ_STREAMTYPE(objstm))
2643 goto error;
2644
2645 {
2646 pdf_obj *tmp = pdf_stream_uncompress(objstm);
2647 if (!tmp)
2648 goto error;
2649 pdf_release_obj(objstm);
2650 objstm = tmp;
2651 }
2652
2653 dict = pdf_stream_dict(objstm);
2654
2655 type = pdf_lookup_dict(dict, "Type");
2656 if (!PDF_OBJ_NAMETYPE(type) ||
2657 strcmp(pdf_name_value(type), "ObjStm"))
2658 goto error;
2659
2660 n_obj = pdf_lookup_dict(dict, "N");
2661 if (!PDF_OBJ_NUMBERTYPE(n_obj))
2662 goto error;
2663 n = (long) pdf_number_value(n_obj);
2664
2665 first_obj = pdf_lookup_dict(dict, "First");
2666 if (!PDF_OBJ_NUMBERTYPE(first_obj))
2667 goto error;
2668 first = (long) pdf_number_value(first_obj);
2669 /* reject object streams without object data */
2670 if (first >= pdf_stream_length(objstm))
2671 goto error;
2672
2673 header = NEW(2*(n+1), long);
2674 set_objstm_data(objstm, header);
2675 *(header++) = n;
2676 *(header++) = first;
2677
2678 /* avoid parsing beyond offset table */
2679 data = NEW(first + 1, char);
2680 memcpy(data, pdf_stream_dataptr(objstm), first);
2681 data[first] = 0;
2682
2683 p = data;
2684 endptr = p + first;
2685 i = 2*n;
2686 while (i--) {
2687 *(header++) = strtoul(p, &q, 10);
2688 if (q == p)
2689 goto error;
2690 p = q;
2691 }
2692
2693 /* Any garbage after last entry? */
2694 skip_white(&p, endptr);
2695 if (p != endptr)
2696 goto error;
2697 RELEASE(data);
2698
2699 return pf->xref_table[num].direct = objstm;
2700
2701 error:
2702 WARN("Cannot parse object stream.");
2703 if (data)
2704 RELEASE(data);
2705 if (objstm)
2706 pdf_release_obj(objstm);
2707 return NULL;
2708 }
2709
2710 /* Label without corresponding object definition are replaced by the
2711 * null object, as required by the PDF spec. This is important to parse
2712 * several cross-reference sections.
2713 */
2714 static pdf_obj *
pdf_get_object(pdf_file * pf,unsigned long obj_num,unsigned short obj_gen)2715 pdf_get_object (pdf_file *pf, unsigned long obj_num, unsigned short obj_gen)
2716 {
2717 pdf_obj *result;
2718
2719 if (!checklabel(pf, obj_num, obj_gen)) {
2720 WARN("Trying to read nonexistent or deleted object: %lu %u",
2721 obj_num, obj_gen);
2722 return pdf_new_null();
2723 }
2724
2725 if ((result = pf->xref_table[obj_num].direct)) {
2726 return pdf_link_obj(result);
2727 }
2728
2729 if (pf->xref_table[obj_num].type == 1) {
2730 /* type == 1 */
2731 unsigned long offset;
2732 long limit;
2733 offset = pf->xref_table[obj_num].field2;
2734 limit = next_object_offset(pf, obj_num);
2735 result = pdf_read_object(obj_num, obj_gen, pf, offset, limit);
2736 } else {
2737 /* type == 2 */
2738 unsigned long objstm_num = pf->xref_table[obj_num].field2;
2739 unsigned short index = pf->xref_table[obj_num].field3;
2740 pdf_obj *objstm;
2741 long *data, n, first, length;
2742 const char *p, *q;
2743
2744 if (objstm_num >= pf->num_obj ||
2745 pf->xref_table[objstm_num].type != 1 ||
2746 !((objstm = pf->xref_table[objstm_num].direct) ||
2747 (objstm = read_objstm(pf, objstm_num))))
2748 goto error;
2749
2750 data = get_objstm_data(objstm);
2751 n = *(data++);
2752 first = *(data++);
2753
2754 if (index >= n || data[2*index] != obj_num)
2755 goto error;
2756
2757 length = pdf_stream_length(objstm);
2758 p = (const char *) pdf_stream_dataptr(objstm) + first + data[2*index+1];
2759 q = p + (index == n-1 ? length : first+data[2*index+3]);
2760 result = parse_pdf_object(&p, q, pf);
2761 if (!result)
2762 goto error;
2763 }
2764
2765 /* Make sure the caller doesn't free this object */
2766 pf->xref_table[obj_num].direct = pdf_link_obj(result);
2767
2768 return result;
2769
2770 error:
2771 WARN("Could not read object from object stream.");
2772 return pdf_new_null();
2773 }
2774
2775 #define OBJ_FILE(o) (((pdf_indirect *)((o)->data))->pf)
2776 #define OBJ_OBJ(o) (((pdf_indirect *)((o)->data))->obj)
2777 #define OBJ_NUM(o) (((pdf_indirect *)((o)->data))->label)
2778 #define OBJ_GEN(o) (((pdf_indirect *)((o)->data))->generation)
2779
2780 static pdf_obj *
pdf_new_ref(pdf_obj * object)2781 pdf_new_ref (pdf_obj *object)
2782 {
2783 pdf_obj *result;
2784
2785 if (object->label == 0) {
2786 pdf_label_obj(object);
2787 }
2788 result = pdf_new_indirect(NULL, object->label, object->generation);
2789 OBJ_OBJ(result) = object;
2790 return result;
2791 }
2792
2793 /* pdf_deref_obj always returns a link instead of the original */
2794 /* It never return the null object, but the NULL pointer instead */
2795 pdf_obj *
pdf_deref_obj(pdf_obj * obj)2796 pdf_deref_obj (pdf_obj *obj)
2797 {
2798 int count = PDF_OBJ_MAX_DEPTH;
2799
2800 if (obj)
2801 obj = pdf_link_obj(obj);
2802
2803 while (PDF_OBJ_INDIRECTTYPE(obj) && --count) {
2804 pdf_file *pf = OBJ_FILE(obj);
2805 if (pf) {
2806 unsigned long obj_num = OBJ_NUM(obj);
2807 unsigned short obj_gen = OBJ_GEN(obj);
2808 pdf_release_obj(obj);
2809 obj = pdf_get_object(pf, obj_num, obj_gen);
2810 } else {
2811 pdf_obj *next_obj = OBJ_OBJ(obj);
2812 if (!next_obj) {
2813 ERROR("Undefined object reference");
2814 }
2815 pdf_release_obj(obj);
2816 obj = pdf_link_obj(next_obj);
2817 }
2818 }
2819
2820 if (!count)
2821 ERROR("Loop in object hierarchy detected. Broken PDF file?");
2822
2823 if (PDF_OBJ_NULLTYPE(obj)) {
2824 pdf_release_obj(obj);
2825 return NULL;
2826 } else
2827 return obj;
2828 }
2829
2830 static void
extend_xref(pdf_file * pf,long new_size)2831 extend_xref (pdf_file *pf, long new_size)
2832 {
2833 unsigned long i;
2834
2835 pf->xref_table = RENEW(pf->xref_table, new_size, xref_entry);
2836 for (i = pf->num_obj; i < new_size; i++) {
2837 pf->xref_table[i].direct = NULL;
2838 pf->xref_table[i].indirect = NULL;
2839 pf->xref_table[i].type = 0;
2840 pf->xref_table[i].field3 = 0;
2841 pf->xref_table[i].field2 = 0L;
2842 }
2843 pf->num_obj = new_size;
2844 }
2845
2846 static int
parse_xref_table(pdf_file * pf,long xref_pos)2847 parse_xref_table (pdf_file *pf, long xref_pos)
2848 {
2849 FILE *pdf_input_file = pf->file;
2850 unsigned long first, size;
2851 unsigned long i, offset;
2852 unsigned int obj_gen;
2853 char flag;
2854 int r;
2855
2856 /*
2857 * This routine reads one xref segment. It may be called multiple times
2858 * on the same file. xref tables sometimes come in pieces.
2859 */
2860
2861 seek_absolute(pf->file, xref_pos);
2862
2863 mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
2864 if (memcmp(work_buffer, "xref", strlen("xref"))) {
2865 /* Might be an xref stream and not an xref table */
2866 return 0;
2867 }
2868 /* Next line in file has first item and size of table */
2869 for (;;) {
2870 unsigned long current_pos;
2871
2872 current_pos = tell_position(pdf_input_file);
2873 if (mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file) == NULL) {
2874 WARN("Premature end of PDF file while parsing xref table.");
2875 return -1;
2876 }
2877 if (!strncmp(work_buffer, "trailer", strlen ("trailer"))) {
2878 /*
2879 * Backup... This is ugly, but it seems like the safest thing to
2880 * do. It is possible the trailer dictionary starts on the same
2881 * logical line as the word trailer. In that case, the mfgets
2882 * call might have started to read the trailer dictionary and
2883 * parse_trailer would fail.
2884 */
2885 seek_absolute(pdf_input_file, current_pos);
2886 break;
2887 }
2888 sscanf(work_buffer, "%lu %lu", &first, &size);
2889 if (pf->num_obj < first + size) {
2890 extend_xref(pf, first + size);
2891 }
2892
2893 for (i = first; i < first + size; i++) {
2894 fread(work_buffer, sizeof(char), 20, pdf_input_file);
2895 /*
2896 * Don't overwrite positions that have already been set by a
2897 * modified xref table. We are working our way backwards
2898 * through the reference table, so we only set "position"
2899 * if it hasn't been set yet.
2900 */
2901 work_buffer[19] = 0;
2902 offset = 0UL; obj_gen = 0; flag = 0;
2903 r = sscanf(work_buffer, "%010lu %05u %c", &offset, &obj_gen, &flag);
2904 if ( r != 3 ||
2905 ((flag != 'n' && flag != 'f') ||
2906 (flag == 'n' &&
2907 (offset >= pf->file_size || (offset > 0 && offset < 4))))) {
2908 WARN("Invalid xref table entry [%lu]. PDF file is corrupt...", i);
2909 return -1;
2910 }
2911 if (!pf->xref_table[i].field2) {
2912 pf->xref_table[i].type = (flag == 'n');
2913 pf->xref_table[i].field2 = offset;
2914 pf->xref_table[i].field3 = obj_gen;
2915 }
2916 }
2917 }
2918
2919 return 1;
2920 }
2921
2922 static unsigned long
parse_xrefstm_field(const char ** p,int length,unsigned long def)2923 parse_xrefstm_field (const char **p, int length, unsigned long def)
2924 {
2925 unsigned long val = 0;
2926
2927 if (!length)
2928 return def;
2929
2930 while (length--) {
2931 val <<= 8;
2932 val |= (unsigned char) *((*p)++);
2933 }
2934
2935 return val;
2936 }
2937
2938 static int
parse_xrefstm_subsec(pdf_file * pf,const char ** p,long * length,int * W,int wsum,long first,long size)2939 parse_xrefstm_subsec (pdf_file *pf,
2940 const char **p, long *length,
2941 int *W, int wsum,
2942 long first, long size) {
2943 xref_entry *e;
2944
2945 if ((*length -= wsum*size) < 0)
2946 return -1;
2947
2948 if (pf->num_obj < first+size)
2949 extend_xref(pf, first+size); /* TODO: change! why? */
2950
2951 e = pf->xref_table + first;
2952 while (size--) {
2953 unsigned char type;
2954 unsigned long field2;
2955 unsigned short field3;
2956
2957 type = (unsigned char) parse_xrefstm_field(p, W[0], 1);
2958 if (type > 2)
2959 WARN("Unknown cross-reference stream entry type.");
2960 #if 0
2961 /* Not sure */
2962 else if (!W[1] || (type != 1 && !W[2]))
2963 return -1;
2964 #endif
2965
2966 field2 = (unsigned long) parse_xrefstm_field(p, W[1], 0);
2967 field3 = (unsigned short) parse_xrefstm_field(p, W[2], 0);
2968
2969 if (!e->field2) {
2970 e->type = type;
2971 e->field2 = field2;
2972 e->field3 = field3;
2973 }
2974 e++;
2975 }
2976
2977 return 0;
2978 }
2979
2980 static int
parse_xref_stream(pdf_file * pf,long xref_pos,pdf_obj ** trailer)2981 parse_xref_stream (pdf_file *pf, long xref_pos, pdf_obj **trailer)
2982 {
2983 pdf_obj *xrefstm, *size_obj, *W_obj, *index_obj;
2984 unsigned long size;
2985 long length;
2986 int W[3], i, wsum = 0;
2987 const char *p;
2988
2989 xrefstm = pdf_read_object(0, 0, pf, xref_pos, pf->file_size);
2990 if (!PDF_OBJ_STREAMTYPE(xrefstm))
2991 goto error;
2992
2993 {
2994 pdf_obj *tmp = pdf_stream_uncompress(xrefstm);
2995 if (!tmp)
2996 goto error;
2997 pdf_release_obj(xrefstm);
2998 xrefstm = tmp;
2999 }
3000
3001 *trailer = pdf_link_obj(pdf_stream_dict(xrefstm));
3002
3003 size_obj = pdf_lookup_dict(*trailer, "Size");
3004 if (!PDF_OBJ_NUMBERTYPE(size_obj))
3005 goto error;
3006 size = (unsigned long) pdf_number_value(size_obj);
3007
3008 length = pdf_stream_length(xrefstm);
3009
3010 W_obj = pdf_lookup_dict(*trailer, "W");
3011 if (!PDF_OBJ_ARRAYTYPE(W_obj) || pdf_array_length(W_obj) != 3)
3012 goto error;
3013
3014 for (i = 0; i < 3; i++) {
3015 pdf_obj *tmp = pdf_get_array(W_obj, i);
3016 if (!PDF_OBJ_NUMBERTYPE(tmp))
3017 goto error;
3018 wsum += (W[i] = (int) pdf_number_value(tmp));
3019 }
3020
3021 p = pdf_stream_dataptr(xrefstm);
3022
3023 index_obj = pdf_lookup_dict(*trailer, "Index");
3024 if (index_obj) {
3025 unsigned int index_len;
3026 if (!PDF_OBJ_ARRAYTYPE(index_obj) ||
3027 ((index_len = pdf_array_length(index_obj)) % 2 ))
3028 goto error;
3029
3030 i = 0;
3031 while (i < index_len) {
3032 pdf_obj *first = pdf_get_array(index_obj, i++);
3033 size_obj = pdf_get_array(index_obj, i++);
3034 if (!PDF_OBJ_NUMBERTYPE(first) ||
3035 !PDF_OBJ_NUMBERTYPE(size_obj) ||
3036 parse_xrefstm_subsec(pf, &p, &length, W, wsum,
3037 (long) pdf_number_value(first),
3038 (long) pdf_number_value(size_obj)))
3039 goto error;
3040 }
3041 } else if (parse_xrefstm_subsec(pf, &p, &length, W, wsum, 0, size))
3042 goto error;
3043
3044 if (length)
3045 WARN("Garbage in xref stream.");
3046
3047 pdf_release_obj(xrefstm);
3048
3049 return 1;
3050
3051 error:
3052 WARN("Cannot parse cross-reference stream.");
3053 if (xrefstm)
3054 pdf_release_obj(xrefstm);
3055 if (*trailer) {
3056 pdf_release_obj(*trailer);
3057 *trailer = NULL;
3058 }
3059 return 0;
3060 }
3061
3062 /* TODO: parse Version entry */
3063 static pdf_obj *
read_xref(pdf_file * pf)3064 read_xref (pdf_file *pf)
3065 {
3066 pdf_obj *trailer = NULL, *main_trailer = NULL;
3067 long xref_pos;
3068
3069 if (!(xref_pos = find_xref(pf->file)))
3070 goto error;
3071
3072 while (xref_pos) {
3073 pdf_obj *prev;
3074
3075 int res = parse_xref_table(pf, xref_pos);
3076 if (res > 0) {
3077 /* cross-reference table */
3078 pdf_obj *xrefstm;
3079
3080 if (!(trailer = parse_trailer(pf)))
3081 goto error;
3082
3083 if (!main_trailer)
3084 main_trailer = pdf_link_obj(trailer);
3085
3086 if ((xrefstm = pdf_lookup_dict(trailer, "XRefStm"))) {
3087 pdf_obj *new_trailer = NULL;
3088 if (PDF_OBJ_NUMBERTYPE(xrefstm) &&
3089 parse_xref_stream(pf, (long) pdf_number_value(xrefstm),
3090 &new_trailer))
3091 pdf_release_obj(new_trailer);
3092 else
3093 WARN("Skipping hybrid reference section.");
3094 /* Many PDF 1.5 xref streams use DecodeParms, which we cannot
3095 parse. This way we can use at least xref tables in hybrid
3096 documents. Or should we better stop parsing the file?
3097 */
3098 }
3099
3100 } else if (!res && parse_xref_stream(pf, xref_pos, &trailer)) {
3101 /* cross-reference stream */
3102 if (!main_trailer)
3103 main_trailer = pdf_link_obj(trailer);
3104 } else
3105 goto error;
3106
3107 if ((prev = pdf_lookup_dict(trailer, "Prev"))) {
3108 if (PDF_OBJ_NUMBERTYPE(prev))
3109 xref_pos = (long) pdf_number_value(prev);
3110 else
3111 goto error;
3112 } else
3113 xref_pos = 0;
3114
3115 pdf_release_obj(trailer);
3116 }
3117
3118 #if 0
3119 if (!pdf_lookup_dict(main_trailer, "Root")) {
3120 WARN("Trailer doesn't have catalog. Is this a correct PDF file?");
3121 goto error;
3122 }
3123 #endif
3124
3125 return main_trailer;
3126
3127 error:
3128 WARN("Error while parsing PDF file.");
3129 if (trailer)
3130 pdf_release_obj(trailer);
3131 if (main_trailer)
3132 pdf_release_obj(main_trailer);
3133 return NULL;
3134 }
3135
3136 static struct ht_table *pdf_files = NULL;
3137
3138 static pdf_file *
pdf_file_new(FILE * file)3139 pdf_file_new (FILE *file)
3140 {
3141 pdf_file *pf;
3142 ASSERT(file);
3143 pf = NEW(1, pdf_file);
3144 pf->file = file;
3145 pf->trailer = NULL;
3146 pf->xref_table = NULL;
3147 pf->catalog = NULL;
3148 pf->num_obj = 0;
3149 pf->version = 0;
3150
3151 seek_end(file);
3152 pf->file_size = tell_position(file);
3153
3154 return pf;
3155 }
3156
3157 static void
pdf_file_free(pdf_file * pf)3158 pdf_file_free (pdf_file *pf)
3159 {
3160 unsigned long i;
3161
3162 if (!pf) {
3163 return;
3164 }
3165
3166 for (i = 0; i < pf->num_obj; i++) {
3167 if (pf->xref_table[i].direct)
3168 pdf_release_obj(pf->xref_table[i].direct);
3169 if (pf->xref_table[i].indirect)
3170 pdf_release_obj(pf->xref_table[i].indirect);
3171 }
3172
3173 RELEASE(pf->xref_table);
3174 if (pf->trailer)
3175 pdf_release_obj(pf->trailer);
3176 if (pf->catalog)
3177 pdf_release_obj(pf->catalog);
3178
3179 RELEASE(pf);
3180 }
3181
3182 void
pdf_files_init(void)3183 pdf_files_init (void)
3184 {
3185 pdf_files = NEW(1, struct ht_table);
3186 ht_init_table(pdf_files, (void (*)(void *)) pdf_file_free);
3187 }
3188
3189 int
pdf_file_get_version(pdf_file * pf)3190 pdf_file_get_version (pdf_file *pf)
3191 {
3192 ASSERT(pf);
3193 return pf->version;
3194 }
3195
3196 pdf_obj *
pdf_file_get_trailer(pdf_file * pf)3197 pdf_file_get_trailer (pdf_file *pf)
3198 {
3199 ASSERT(pf);
3200 return pdf_link_obj(pf->trailer);
3201 }
3202
3203 pdf_obj *
pdf_file_get_catalog(pdf_file * pf)3204 pdf_file_get_catalog (pdf_file *pf)
3205 {
3206 ASSERT(pf);
3207 return pf->catalog;
3208 }
3209
3210 pdf_file *
pdf_open(const char * ident,FILE * file)3211 pdf_open (const char *ident, FILE *file)
3212 {
3213 pdf_file *pf = NULL;
3214
3215 ASSERT(pdf_files);
3216
3217 if (ident)
3218 pf = (pdf_file *) ht_lookup_table(pdf_files, ident, strlen(ident));
3219
3220 if (pf) {
3221 pf->file = file;
3222 } else {
3223 pdf_obj *new_version;
3224 int version = check_for_pdf_version(file);
3225
3226 if (version < 1 || version > pdf_version) {
3227 WARN("pdf_open: Not a PDF 1.[1-%u] file.", pdf_version);
3228 return NULL;
3229 }
3230
3231 pf = pdf_file_new(file);
3232 pf->version = version;
3233
3234 if (!(pf->trailer = read_xref(pf)))
3235 goto error;
3236
3237 if (pdf_lookup_dict(pf->trailer, "Encrypt")) {
3238 WARN("PDF document is encrypted.");
3239 goto error;
3240 }
3241
3242 pf->catalog = pdf_deref_obj(pdf_lookup_dict(pf->trailer, "Root"));
3243 if (!PDF_OBJ_DICTTYPE(pf->catalog)) {
3244 WARN("Cannot read PDF document catalog. Broken PDF file?");
3245 goto error;
3246 }
3247
3248 new_version = pdf_deref_obj(pdf_lookup_dict(pf->catalog, "Version"));
3249 if (new_version) {
3250 unsigned int minor;
3251
3252 if (!PDF_OBJ_NAMETYPE(new_version) ||
3253 sscanf(pdf_name_value(new_version), "1.%u", &minor) != 1) {
3254 pdf_release_obj(new_version);
3255 WARN("Illegal Version entry in document catalog. Broken PDF file?");
3256 goto error;
3257 }
3258
3259 if (pf->version < minor)
3260 pf->version = minor;
3261
3262 pdf_release_obj(new_version);
3263 }
3264
3265 if (ident)
3266 ht_append_table(pdf_files, ident, strlen(ident), pf);
3267 }
3268
3269 return pf;
3270
3271 error:
3272 pdf_file_free(pf);
3273 return NULL;
3274 }
3275
3276 void
pdf_close(pdf_file * pf)3277 pdf_close (pdf_file *pf)
3278 {
3279 if (pf)
3280 pf->file = NULL;
3281 }
3282
3283 void
pdf_files_close(void)3284 pdf_files_close (void)
3285 {
3286 ASSERT(pdf_files);
3287 ht_clear_table(pdf_files);
3288 RELEASE(pdf_files);
3289 }
3290
3291 static int
check_for_pdf_version(FILE * file)3292 check_for_pdf_version (FILE *file)
3293 {
3294 unsigned int minor;
3295
3296 rewind(file);
3297
3298 return (ungetc(fgetc(file), file) == '%' &&
3299 fscanf(file, "%%PDF-1.%u", &minor) == 1) ? minor : -1;
3300 }
3301
3302 int
check_for_pdf(FILE * file)3303 check_for_pdf (FILE *file)
3304 {
3305 int version = check_for_pdf_version(file);
3306
3307 if (version < 0) /* not a PDF file */
3308 return 0;
3309
3310 if (version <= pdf_version)
3311 return 1;
3312
3313 WARN("Version of PDF file (1.%d) is newer than version limit specification.",
3314 version);
3315 return 1;
3316 }
3317
3318 static int CDECL
import_dict(pdf_obj * key,pdf_obj * value,void * pdata)3319 import_dict (pdf_obj *key, pdf_obj *value, void *pdata)
3320 {
3321 pdf_obj *copy;
3322 pdf_obj *tmp;
3323
3324 copy = (pdf_obj *) pdata;
3325
3326 tmp = pdf_import_object(value);
3327 if (!tmp) {
3328 return -1;
3329 }
3330 pdf_add_dict(copy, pdf_link_obj(key), tmp);
3331
3332 return 0;
3333 }
3334
3335 static pdf_obj loop_marker = { PDF_OBJ_INVALID, 0, 0, 0, 0, NULL };
3336
3337 static pdf_obj *
pdf_import_indirect(pdf_obj * object)3338 pdf_import_indirect (pdf_obj *object)
3339 {
3340 pdf_file *pf = OBJ_FILE(object);
3341 unsigned long obj_num = OBJ_NUM(object);
3342 unsigned short obj_gen = OBJ_GEN(object);
3343
3344 pdf_obj *ref;
3345
3346 ASSERT(pf);
3347
3348 if (!checklabel(pf, obj_num, obj_gen)) {
3349 WARN("Can't resolve object: %lu %u", obj_num, obj_gen);
3350 return pdf_new_null();
3351 }
3352
3353 if ((ref = pf->xref_table[obj_num].indirect)) {
3354 if (ref == &loop_marker)
3355 ERROR("Loop in object hierarchy detected. Broken PDF file?");
3356 return pdf_link_obj(ref);
3357 } else {
3358 pdf_obj *obj, *tmp;
3359
3360 obj = pdf_get_object(pf, obj_num, obj_gen);
3361 if (!obj) {
3362 WARN("Could not read object: %lu %u", obj_num, obj_gen);
3363 return NULL;
3364 }
3365
3366 /* We mark the reference to be able to detect loops */
3367 pf->xref_table[obj_num].indirect = &loop_marker;
3368
3369 tmp = pdf_import_object(obj);
3370
3371 pf->xref_table[obj_num].indirect = ref = pdf_ref_obj(tmp);
3372
3373 pdf_release_obj(tmp);
3374 pdf_release_obj(obj);
3375
3376 return pdf_link_obj(ref);
3377 }
3378 }
3379
3380 /*
3381 * pdf_import_object recursively copies the object and those
3382 * referenced by it and changes the indirect references so that
3383 * they refer to the current output file. New indirect references
3384 * are remembered, which avoids duplicating objects when they
3385 * are imported several times.
3386 */
3387 pdf_obj *
pdf_import_object(pdf_obj * object)3388 pdf_import_object (pdf_obj *object)
3389 {
3390 pdf_obj *imported;
3391 pdf_obj *tmp;
3392 int i;
3393
3394 switch (pdf_obj_typeof(object)) {
3395
3396 case PDF_INDIRECT:
3397 if (OBJ_FILE(object)) {
3398 imported = pdf_import_indirect(object);
3399 } else {
3400 imported = pdf_link_obj(object);
3401 }
3402 break;
3403
3404 case PDF_STREAM:
3405 {
3406 pdf_obj *stream_dict;
3407
3408 tmp = pdf_import_object(pdf_stream_dict(object));
3409 if (!tmp)
3410 return NULL;
3411
3412 imported = pdf_new_stream(0);
3413 stream_dict = pdf_stream_dict(imported);
3414 pdf_merge_dict(stream_dict, tmp);
3415 pdf_release_obj(tmp);
3416 pdf_add_stream(imported,
3417 pdf_stream_dataptr(object),
3418 pdf_stream_length(object));
3419 }
3420 break;
3421
3422 case PDF_DICT:
3423
3424 imported = pdf_new_dict();
3425 if (pdf_foreach_dict(object, import_dict, imported) < 0) {
3426 pdf_release_obj(imported);
3427 return NULL;
3428 }
3429
3430 break;
3431
3432 case PDF_ARRAY:
3433
3434 imported = pdf_new_array();
3435 for (i = 0; i < pdf_array_length(object); i++) {
3436 tmp = pdf_import_object(pdf_get_array(object, i));
3437 if (!tmp) {
3438 pdf_release_obj(imported);
3439 return NULL;
3440 }
3441 pdf_add_array(imported, tmp);
3442 }
3443 break;
3444
3445 default:
3446 imported = pdf_link_obj(object);
3447 }
3448
3449 return imported;
3450 }
3451
3452
3453 /* returns 0 if indirect references point to the same object */
3454 int
pdf_compare_reference(pdf_obj * ref1,pdf_obj * ref2)3455 pdf_compare_reference (pdf_obj *ref1, pdf_obj *ref2)
3456 {
3457 pdf_indirect *data1, *data2;
3458
3459 ASSERT(PDF_OBJ_INDIRECTTYPE(ref1) && PDF_OBJ_INDIRECTTYPE(ref2));
3460
3461 data1 = (pdf_indirect *) ref1->data;
3462 data2 = (pdf_indirect *) ref2->data;
3463
3464 return data1->pf != data2->pf || data1->label != data2->label
3465 || data1->generation != data2->generation;
3466 }
3467