1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2 
3     Copyright (C) 2007-2014 by Jin-Hwan Cho and Shunsaku Hirata,
4     the dvipdfmx project team.
5 
6     Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
7 
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12 
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17 
18     You should have received a copy of the GNU General Public License
19     along with this program; if not, write to the Free Software
20     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 */
22 
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26 
27 #include <ctype.h>
28 #include <string.h>
29 
30 #include "system.h"
31 #include "mem.h"
32 #include "error.h"
33 #include "mfileio.h"
34 #include "dpxutil.h"
35 #include "pdflimits.h"
36 #include "pdfencrypt.h"
37 #include "pdfparse.h"
38 
39 #ifdef HAVE_ZLIB
40 #include <zlib.h>
41 #endif /* HAVE_ZLIB */
42 
43 #include "pdfobj.h"
44 #include "pdfdev.h"
45 
46 #define STREAM_ALLOC_SIZE      4096u
47 #define ARRAY_ALLOC_SIZE       256
48 #define IND_OBJECTS_ALLOC_SIZE 512
49 
50 #define OBJ_NO_OBJSTM   (1 << 0)
51 /* Objects with this flag will not be put into an object stream.
52    For instance, all stream objects have this flag set.          */
53 #define OBJ_NO_ENCRYPT  (1 << 1)
54 /* Objects with this flag will not be encrypted.
55    This implies OBJ_NO_OBJSTM if encryption is turned on.        */
56 
57 /* Any of these types can be represented as follows */
58 struct pdf_obj
59 {
60   int type;
61 
62   unsigned long  label;  /* Only used for indirect objects
63 			    all other "label" to zero */
64   unsigned short generation;  /* Only used if "label" is used */
65   unsigned refcount;  /* Number of links to this object */
66   int      flags;
67   void    *data;
68 };
69 
70 struct pdf_boolean
71 {
72   char  value;
73 };
74 
75 struct pdf_number
76 {
77   double value;
78 };
79 
80 struct pdf_string
81 {
82   unsigned char *string;
83   unsigned short length;
84 };
85 
86 struct pdf_name
87 {
88   char *name;
89 };
90 
91 struct pdf_array
92 {
93   unsigned long max;
94   unsigned long size;
95   struct pdf_obj **values;
96 };
97 
98 struct pdf_dict
99 {
100   struct pdf_obj  *key;
101   struct pdf_obj  *value;
102   struct pdf_dict *next;
103 };
104 
105 struct pdf_stream
106 {
107   struct pdf_obj *dict;
108   unsigned char  *stream;
109   long           *objstm_data;    /* used for object streams */
110   unsigned long   stream_length;
111   unsigned long   max_length;
112   unsigned char   _flags;
113 };
114 
115 struct pdf_indirect
116 {
117   pdf_file      *pf;
118   pdf_obj       *obj;             /* used when PF == NULL */
119   unsigned long label;
120   unsigned short generation;
121 };
122 
123 typedef void                pdf_null;
124 typedef struct pdf_boolean  pdf_boolean;
125 typedef struct pdf_number   pdf_number;
126 typedef struct pdf_string   pdf_string;
127 typedef struct pdf_name     pdf_name;
128 typedef struct pdf_array    pdf_array;
129 typedef struct pdf_dict     pdf_dict;
130 typedef struct pdf_stream   pdf_stream;
131 typedef struct pdf_indirect pdf_indirect;
132 
133 static FILE *pdf_output_file = NULL;
134 
135 static long pdf_output_file_position = 0;
136 static long pdf_output_line_position = 0;
137 static long compression_saved        = 0;
138 
139 #define FORMAT_BUF_SIZE 4096
140 static char format_buffer[FORMAT_BUF_SIZE];
141 
142 typedef struct xref_entry
143 {
144   unsigned char  type;       /* object storage type              */
145   unsigned long  field2;     /* offset in file or object stream  */
146   unsigned short field3;     /* generation or index              */
147   pdf_obj       *direct;     /* used for imported objects        */
148   pdf_obj       *indirect;   /* used for imported objects        */
149 } xref_entry;
150 
151 static xref_entry *output_xref;
152 
153 static unsigned long pdf_max_ind_objects;
154 static unsigned long next_label;
155 
156 static unsigned long startxref;
157 
158 struct pdf_file
159 {
160   FILE       *file;
161   pdf_obj    *trailer;
162   xref_entry *xref_table;
163   pdf_obj    *catalog;
164   long        num_obj;
165   long        file_size;
166   int         version;
167 };
168 
169 static pdf_obj *output_stream;
170 
171 #define OBJSTM_MAX_OBJS  200
172 /* the limit is only 100 for linearized PDF */
173 
174 static int enc_mode;
175 static int doc_enc_mode;
176 
177 static pdf_obj *trailer_dict;
178 static pdf_obj *xref_stream;
179 
180 /* Internal static routines */
181 
182 static int check_for_pdf_version (FILE *file);
183 
184 static void pdf_flush_obj (pdf_obj *object, FILE *file);
185 static void pdf_label_obj (pdf_obj *object);
186 static void pdf_write_obj (pdf_obj *object, FILE *file);
187 
188 static void  set_objstm_data (pdf_obj *objstm, long *data);
189 static long *get_objstm_data (pdf_obj *objstm);
190 static void  release_objstm  (pdf_obj *objstm);
191 
192 static void pdf_out_char (FILE *file, char c);
193 static void pdf_out      (FILE *file, const void *buffer, long length);
194 
195 static pdf_obj *pdf_new_ref  (pdf_obj *object);
196 static void release_indirect (pdf_indirect *data);
197 static void write_indirect   (pdf_indirect *indirect, FILE *file);
198 
199 static void release_boolean (pdf_obj *data);
200 static void write_boolean   (pdf_boolean *data, FILE *file);
201 
202 static void write_null   (FILE *file);
203 
204 static void release_number (pdf_number *number);
205 static void write_number   (pdf_number *number, FILE *file);
206 
207 static void write_string   (pdf_string *str, FILE *file);
208 static void release_string (pdf_string *str);
209 
210 static void write_name   (pdf_name *name, FILE *file);
211 static void release_name (pdf_name *name);
212 
213 static void write_array   (pdf_array *array, FILE *file);
214 static void release_array (pdf_array *array);
215 
216 static void write_dict   (pdf_dict *dict, FILE *file);
217 static void release_dict (pdf_dict *dict);
218 
219 static void write_stream   (pdf_stream *stream, FILE *file);
220 static void release_stream (pdf_stream *stream);
221 
222 static int  verbose = 0;
223 static char compression_level = 9;
224 
225 void
pdf_set_compression(int level)226 pdf_set_compression (int level)
227 {
228 #ifndef   HAVE_ZLIB
229   ERROR("You don't have compression compiled in. Possibly libz wasn't found by configure.");
230 #else
231 #ifndef HAVE_ZLIB_COMPRESS2
232   if (level != 0)
233     WARN("Unable to set compression level -- your zlib doesn't have compress2().");
234 #endif
235   if (level >= 0 && level <= 9)
236     compression_level = level;
237   else {
238     ERROR("set_compression: invalid compression level: %d", level);
239   }
240 #endif /* !HAVE_ZLIB */
241 
242   return;
243 }
244 
245 static unsigned pdf_version = PDF_VERSION_DEFAULT;
246 
247 void
pdf_set_version(unsigned version)248 pdf_set_version (unsigned version)
249 {
250   /* Don't forget to update CIDFont_stdcc_def[] in cid.c too! */
251   if (version >= PDF_VERSION_MIN && version <= PDF_VERSION_MAX) {
252     pdf_version = version;
253   }
254 }
255 
256 unsigned
pdf_get_version(void)257 pdf_get_version (void)
258 {
259   return pdf_version;
260 }
261 
262 int
pdf_obj_get_verbose(void)263 pdf_obj_get_verbose(void)
264 {
265   return verbose;
266 }
267 
268 void
pdf_obj_set_verbose(void)269 pdf_obj_set_verbose(void)
270 {
271   verbose++;
272 }
273 
274 static pdf_obj *current_objstm = NULL;
275 static int do_objstm;
276 
277 static void
add_xref_entry(unsigned long label,unsigned char type,unsigned long field2,unsigned short field3)278 add_xref_entry (unsigned long label, unsigned char type, unsigned long field2, unsigned short field3)
279 {
280   if (label >= pdf_max_ind_objects) {
281     pdf_max_ind_objects = (label/IND_OBJECTS_ALLOC_SIZE+1)*IND_OBJECTS_ALLOC_SIZE;
282     output_xref = RENEW(output_xref, pdf_max_ind_objects, xref_entry);
283   }
284 
285   output_xref[label].type   = type;
286   output_xref[label].field2 = field2;
287   output_xref[label].field3 = field3;
288   output_xref[label].direct   = NULL;
289   output_xref[label].indirect = NULL;
290 }
291 
292 #define BINARY_MARKER "%\344\360\355\370\n"
293 void
pdf_out_init(const char * filename,int do_encryption)294 pdf_out_init (const char *filename, int do_encryption)
295 {
296   char v;
297 
298   output_xref = NULL;
299   pdf_max_ind_objects = 0;
300   add_xref_entry(0, 0, 0, 0xffff);
301   next_label = 1;
302 
303   if (pdf_version >= 5) {
304     xref_stream = pdf_new_stream(STREAM_COMPRESS);
305     xref_stream->flags |= OBJ_NO_ENCRYPT;
306     trailer_dict = pdf_stream_dict(xref_stream);
307     pdf_add_dict(trailer_dict, pdf_new_name("Type"), pdf_new_name("XRef"));
308     do_objstm = 1;
309   } else {
310     xref_stream = NULL;
311     trailer_dict = pdf_new_dict();
312     do_objstm = 0;
313   }
314 
315   output_stream = NULL;
316 
317   if (filename == NULL) { /* no filename: writing to stdout */
318 #ifdef WIN32
319     setmode(fileno(stdout), _O_BINARY);
320 #endif
321     pdf_output_file = stdout;
322   } else {
323     pdf_output_file = MFOPEN(filename, FOPEN_WBIN_MODE);
324     if (!pdf_output_file) {
325       if (strlen(filename) < 128)
326         ERROR("Unable to open \"%s\".", filename);
327       else
328         ERROR("Unable to open file.");
329     }
330   }
331   pdf_out(pdf_output_file, "%PDF-1.", strlen("%PDF-1."));
332   v = '0' + pdf_version;
333   pdf_out(pdf_output_file, &v, 1);
334   pdf_out(pdf_output_file, "\n", 1);
335   pdf_out(pdf_output_file, BINARY_MARKER, strlen(BINARY_MARKER));
336 
337   enc_mode = 0;
338   doc_enc_mode = do_encryption;
339 }
340 
341 static void
dump_xref_table(void)342 dump_xref_table (void)
343 {
344   long length;
345   unsigned long i;
346 
347   pdf_out(pdf_output_file, "xref\n", 5);
348 
349   length = sprintf(format_buffer, "%d %lu\n", 0, next_label);
350   pdf_out(pdf_output_file, format_buffer, length);
351 
352   /*
353    * Every space counts.  The space after the 'f' and 'n' is * *essential*.
354    * The PDF spec says the lines must be 20 characters long including the
355    * end of line character.
356    */
357   for (i = 0; i < next_label; i++) {
358     unsigned char type = output_xref[i].type;
359     if (type > 1)
360       ERROR("object type %hu not allowed in xref table", type);
361     length = sprintf(format_buffer, "%010lu %05hu %c \n",
362 		     output_xref[i].field2, output_xref[i].field3,
363 		     type ? 'n' : 'f');
364     pdf_out(pdf_output_file, format_buffer, length);
365   }
366 }
367 
368 static void
dump_trailer_dict(void)369 dump_trailer_dict (void)
370 {
371   pdf_out(pdf_output_file, "trailer\n", 8);
372   enc_mode = 0;
373   write_dict(trailer_dict->data, pdf_output_file);
374   pdf_release_obj(trailer_dict);
375   pdf_out_char(pdf_output_file, '\n');
376 }
377 
378 /*
379  * output a PDF 1.5 cross-reference stream;
380  * contributed by Matthias Franz (March 21, 2007)
381  */
382 static void
dump_xref_stream(void)383 dump_xref_stream (void)
384 {
385   unsigned long pos, i;
386   unsigned poslen;
387   unsigned char buf[7] = {0, 0, 0, 0, 0};
388 
389   pdf_obj *w;
390 
391   /* determine the necessary size of the offset field */
392   pos = startxref; /* maximal offset value */
393   poslen = 1;
394   while (pos >>= 8)
395     poslen++;
396 
397   w = pdf_new_array();
398   pdf_add_array(w, pdf_new_number(1));      /* type                */
399   pdf_add_array(w, pdf_new_number(poslen)); /* offset (big-endian) */
400   pdf_add_array(w, pdf_new_number(2));      /* generation          */
401   pdf_add_dict(trailer_dict, pdf_new_name("W"), w);
402 
403   /* We need the xref entry for the xref stream right now */
404   add_xref_entry(next_label-1, 1, startxref, 0);
405 
406   for (i = 0; i < next_label; i++) {
407     unsigned j;
408     unsigned short f3;
409     buf[0] = output_xref[i].type;
410     pos = output_xref[i].field2;
411     for (j = poslen; j--; ) {
412       buf[1+j] = (unsigned char) pos;
413       pos >>= 8;
414     }
415     f3 = output_xref[i].field3;
416     buf[poslen+1] = (unsigned char) (f3 >> 8);
417     buf[poslen+2] = (unsigned char) (f3);
418     pdf_add_stream(xref_stream, &buf, poslen+3);
419   }
420 
421   pdf_release_obj(xref_stream);
422 }
423 
424 void
pdf_out_flush(void)425 pdf_out_flush (void)
426 {
427   if (pdf_output_file) {
428     long length;
429 
430     /* Flush current object stream */
431     if (current_objstm) {
432       release_objstm(current_objstm);
433       current_objstm =NULL;
434     }
435 
436     /*
437      * Label xref stream - we need the number of correct objects
438      * for the xref stream dictionary (= trailer).
439      * Labelling it in pdf_out_init (with 1)  does not work (why?).
440      */
441     if (xref_stream)
442       pdf_label_obj(xref_stream);
443 
444     /* Record where this xref is for trailer */
445     startxref = pdf_output_file_position;
446 
447     pdf_add_dict(trailer_dict, pdf_new_name("Size"),
448 		 pdf_new_number(next_label));
449 
450     if (xref_stream)
451       dump_xref_stream();
452     else {
453       dump_xref_table();
454       dump_trailer_dict();
455     }
456 
457     /* Done with xref table */
458     RELEASE(output_xref);
459 
460     pdf_out(pdf_output_file, "startxref\n", 10);
461     length = sprintf(format_buffer, "%lu\n", startxref);
462     pdf_out(pdf_output_file, format_buffer, length);
463     pdf_out(pdf_output_file, "%%EOF\n", 6);
464 
465     MESG("\n");
466     if (verbose) {
467       if (compression_level > 0) {
468 	MESG("Compression saved %ld bytes%s\n", compression_saved,
469 	     pdf_version < 5 ? ". Try \"-V 5\" for better compression" : "");
470       }
471     }
472     MESG("%ld bytes written", pdf_output_file_position);
473 
474     MFCLOSE(pdf_output_file);
475   }
476 }
477 
478 void
pdf_error_cleanup(void)479 pdf_error_cleanup (void)
480 {
481   /*
482    * This routine is the cleanup required for an abnormal exit.
483    * For now, simply close the file.
484    */
485   if (pdf_output_file)
486     MFCLOSE(pdf_output_file);
487 }
488 
489 
490 void
pdf_set_root(pdf_obj * object)491 pdf_set_root (pdf_obj *object)
492 {
493   if (pdf_add_dict(trailer_dict, pdf_new_name("Root"), pdf_ref_obj(object))) {
494     ERROR("Root object already set!");
495   }
496   /* Adobe Readers don't like a document catalog inside an encrypted
497    * object stream, although the PDF v1.5 spec seems to allow this.
498    * Note that we don't set OBJ_NO_ENCRYPT since the name dictionary in
499    * a document catalog may contain strings, which should be encrypted.
500    */
501   if (doc_enc_mode)
502     object->flags |= OBJ_NO_OBJSTM;
503 }
504 
505 void
pdf_set_info(pdf_obj * object)506 pdf_set_info (pdf_obj *object)
507 {
508   if (pdf_add_dict(trailer_dict, pdf_new_name("Info"), pdf_ref_obj(object))) {
509     ERROR ("Info object already set!");
510   }
511 }
512 
513 void
pdf_set_id(pdf_obj * id)514 pdf_set_id (pdf_obj *id)
515 {
516   if (pdf_add_dict(trailer_dict, pdf_new_name("ID"), id)) {
517     ERROR ("ID already set!");
518   }
519 }
520 
521 void
pdf_set_encrypt(pdf_obj * encrypt)522 pdf_set_encrypt (pdf_obj *encrypt)
523 {
524   if (pdf_add_dict(trailer_dict, pdf_new_name("Encrypt"), pdf_ref_obj(encrypt))) {
525     ERROR("Encrypt object already set!");
526   }
527   encrypt->flags |= OBJ_NO_ENCRYPT;
528 }
529 
530 static
pdf_out_char(FILE * file,char c)531 void pdf_out_char (FILE *file, char c)
532 {
533   if (output_stream && file ==  pdf_output_file)
534     pdf_add_stream(output_stream, &c, 1);
535   else {
536     fputc(c, file);
537     /* Keep tallys for xref table *only* if writing a pdf file. */
538     if (file == pdf_output_file) {
539       pdf_output_file_position += 1;
540       if (c == '\n')
541         pdf_output_line_position  = 0;
542       else
543         pdf_output_line_position += 1;
544     }
545   }
546 }
547 
548 static char xchar[] = "0123456789abcdef";
549 
550 #define pdf_out_xchar(f,c) do {\
551   pdf_out_char((f), xchar[((c) >> 4) & 0x0f]);\
552   pdf_out_char((f), xchar[(c) & 0x0f]);\
553 } while (0)
554 
555 static
pdf_out(FILE * file,const void * buffer,long length)556 void pdf_out (FILE *file, const void *buffer, long length)
557 {
558   if (output_stream && file ==  pdf_output_file)
559     pdf_add_stream(output_stream, buffer, length);
560   else {
561     fwrite(buffer, 1, length, file);
562     /* Keep tallys for xref table *only* if writing a pdf file */
563     if (file == pdf_output_file) {
564       pdf_output_file_position += length;
565       pdf_output_line_position += length;
566       /* "foo\nbar\n "... */
567       if (length > 0 &&
568 	((const char *)buffer)[length-1] == '\n')
569         pdf_output_line_position = 0;
570     }
571   }
572 }
573 
574 /*  returns 1 if a white-space character is necessary to separate
575     an object of type1 followed by an object of type2              */
576 static
pdf_need_white(int type1,int type2)577 int pdf_need_white (int type1, int type2)
578 {
579   return !(type1 == PDF_STRING || type1 == PDF_ARRAY || type1 == PDF_DICT ||
580 	   type2 == PDF_STRING || type2 == PDF_NAME ||
581 	   type2 == PDF_ARRAY || type2 == PDF_DICT);
582 }
583 
584 static
pdf_out_white(FILE * file)585 void pdf_out_white (FILE *file)
586 {
587   if (file == pdf_output_file && pdf_output_line_position >= 80) {
588     pdf_out_char(file, '\n');
589   } else {
590     pdf_out_char(file, ' ');
591   }
592 }
593 
594 #define TYPECHECK(o,t) if (!(o) || (o)->type != (t)) {\
595   ERROR("typecheck: Invalid object type: %d %d (line %d)", (o) ? (o)->type : -1, (t), __LINE__);\
596 }
597 
598 #define INVALIDOBJ(o)  ((o) == NULL || (o)->type <= 0 || (o)->type > PDF_UNDEFINED)
599 
600 static pdf_obj *
pdf_new_obj(int type)601 pdf_new_obj(int type)
602 {
603   pdf_obj *result;
604 
605   if (type > PDF_UNDEFINED || type < 0)
606     ERROR("Invalid object type: %d", type);
607 
608   result = NEW(1, pdf_obj);
609   result->type  = type;
610   result->data  = NULL;
611   result->label      = 0;
612   result->generation = 0;
613   result->refcount   = 1;
614   result->flags      = 0;
615 
616   return result;
617 }
618 
619 int
pdf_obj_typeof(pdf_obj * object)620 pdf_obj_typeof (pdf_obj *object)
621 {
622   if (INVALIDOBJ(object))
623     return PDF_OBJ_INVALID;
624 
625   return object->type;
626 }
627 
628 static void
pdf_label_obj(pdf_obj * object)629 pdf_label_obj (pdf_obj *object)
630 {
631   if (INVALIDOBJ(object))
632     ERROR("pdf_label_obj(): passed invalid object.");
633 
634   /*
635    * Don't change label on an already labeled object. Ignore such calls.
636    */
637   if (object->label == 0) {
638     object->label      = next_label++;
639     object->generation = 0;
640   }
641 }
642 
643 /*
644  * Transfer the label assigned to the object src to the object dst.
645  * The object dst must not yet have been labeled.
646  */
647 void
pdf_transfer_label(pdf_obj * dst,pdf_obj * src)648 pdf_transfer_label (pdf_obj *dst, pdf_obj *src)
649 {
650   ASSERT(dst && !dst->label && src);
651 
652   dst->label      = src->label;
653   dst->generation = src->generation;
654   src->label      = 0;
655   src->generation = 0;
656 }
657 
658 /*
659  * This doesn't really copy the object, but allows it to be used without
660  * fear that somebody else will free it.
661  */
662 pdf_obj *
pdf_link_obj(pdf_obj * object)663 pdf_link_obj (pdf_obj *object)
664 {
665   if (INVALIDOBJ(object))
666     ERROR("pdf_link_obj(): passed invalid object.");
667 
668   object->refcount += 1;
669 
670   return object;
671 }
672 
673 
674 pdf_obj *
pdf_ref_obj(pdf_obj * object)675 pdf_ref_obj (pdf_obj *object)
676 {
677   if (INVALIDOBJ(object))
678     ERROR("pdf_ref_obj(): passed invalid object.");
679 
680   if (object->refcount == 0) {
681     MESG("\nTrying to refer already released object!!!\n");
682     pdf_write_obj(object, stderr);
683     ERROR("Cannot continue...");
684   }
685 
686   if (PDF_OBJ_INDIRECTTYPE(object)) {
687     return pdf_link_obj(object);
688   } else {
689     return pdf_new_ref(object);
690   }
691 }
692 
693 static void
release_indirect(pdf_indirect * data)694 release_indirect (pdf_indirect *data)
695 {
696   RELEASE(data);
697 }
698 
699 static void
write_indirect(pdf_indirect * indirect,FILE * file)700 write_indirect (pdf_indirect *indirect, FILE *file)
701 {
702   long length;
703 
704   ASSERT(!indirect->pf);
705 
706   length = sprintf(format_buffer, "%lu %hu R", indirect->label, indirect->generation);
707   pdf_out(file, format_buffer, length);
708 }
709 
710 /* The undefined object is used as a placeholder in pdfnames.c
711  * for objects which are referenced before they are defined.
712  */
713 pdf_obj *
pdf_new_undefined(void)714 pdf_new_undefined (void)
715 {
716   pdf_obj *result;
717 
718   result = pdf_new_obj(PDF_UNDEFINED);
719   result->data = NULL;
720 
721   return result;
722 }
723 
724 pdf_obj *
pdf_new_null(void)725 pdf_new_null (void)
726 {
727   pdf_obj *result;
728 
729   result = pdf_new_obj(PDF_NULL);
730   result->data = NULL;
731 
732   return result;
733 }
734 
735 static void
write_null(FILE * file)736 write_null (FILE *file)
737 {
738   pdf_out(file, "null", 4);
739 }
740 
741 pdf_obj *
pdf_new_boolean(char value)742 pdf_new_boolean (char value)
743 {
744   pdf_obj     *result;
745   pdf_boolean *data;
746 
747   result = pdf_new_obj(PDF_BOOLEAN);
748   data   = NEW(1, pdf_boolean);
749   data->value  = value;
750   result->data = data;
751 
752   return result;
753 }
754 
755 static void
release_boolean(pdf_obj * data)756 release_boolean (pdf_obj *data)
757 {
758   RELEASE (data);
759 }
760 
761 static void
write_boolean(pdf_boolean * data,FILE * file)762 write_boolean (pdf_boolean *data, FILE *file)
763 {
764   if (data->value) {
765     pdf_out(file, "true", 4);
766   } else {
767     pdf_out(file, "false", 5);
768   }
769 }
770 
771 char
pdf_boolean_value(pdf_obj * object)772 pdf_boolean_value (pdf_obj *object)
773 {
774   pdf_boolean *data;
775 
776   TYPECHECK(object, PDF_BOOLEAN);
777 
778   data = object->data;
779 
780   return data->value;
781 }
782 
783 pdf_obj *
pdf_new_number(double value)784 pdf_new_number (double value)
785 {
786   pdf_obj    *result;
787   pdf_number *data;
788 
789   result = pdf_new_obj(PDF_NUMBER);
790   data   = NEW(1, pdf_number);
791   data->value  = value;
792   result->data = data;
793 
794   return result;
795 }
796 
797 static void
release_number(pdf_number * data)798 release_number (pdf_number *data)
799 {
800   RELEASE (data);
801 }
802 
803 static void
write_number(pdf_number * number,FILE * file)804 write_number (pdf_number *number, FILE *file)
805 {
806   int count;
807 
808   count = pdf_sprint_number(format_buffer, number->value);
809 
810   pdf_out(file, format_buffer, count);
811 }
812 
813 
814 void
pdf_set_number(pdf_obj * object,double value)815 pdf_set_number (pdf_obj *object, double value)
816 {
817   pdf_number *data;
818 
819   TYPECHECK(object, PDF_NUMBER);
820 
821   data = object->data;
822   data->value = value;
823 }
824 
825 double
pdf_number_value(pdf_obj * object)826 pdf_number_value (pdf_obj *object)
827 {
828   pdf_number *data;
829 
830   TYPECHECK(object, PDF_NUMBER);
831 
832   data = object->data;
833 
834   return data->value;
835 }
836 
837 pdf_obj *
pdf_new_string(const void * str,unsigned length)838 pdf_new_string (const void *str, unsigned length)
839 {
840   pdf_obj    *result;
841   pdf_string *data;
842 
843   ASSERT(str);
844 
845   result = pdf_new_obj(PDF_STRING);
846   data   = NEW(1, pdf_string);
847   result->data = data;
848   data->length = length;
849 
850   if (length) {
851     data->string = NEW(length+1, unsigned char);
852     memcpy(data->string, str, length);
853     /* Shouldn't assume NULL terminated. */
854     data->string[length] = '\0';
855   } else
856     data->string = NULL;
857 
858   return result;
859 }
860 
861 void *
pdf_string_value(pdf_obj * object)862 pdf_string_value (pdf_obj *object)
863 {
864   pdf_string *data;
865 
866   TYPECHECK(object, PDF_STRING);
867 
868   data = object->data;
869 
870   return data->string;
871 }
872 
873 unsigned
pdf_string_length(pdf_obj * object)874 pdf_string_length (pdf_obj *object)
875 {
876   pdf_string *data;
877 
878   TYPECHECK(object, PDF_STRING);
879 
880   data = object->data;
881 
882   return (unsigned) (data->length);
883 }
884 
885 /*
886  * This routine escapes non printable characters and control
887  * characters in an output string.
888  */
889 int
pdfobj_escape_str(char * buffer,int bufsize,const unsigned char * s,int len)890 pdfobj_escape_str (char *buffer, int bufsize, const unsigned char *s, int len)
891 {
892   int result = 0;
893   int i;
894 
895   for (i = 0; i < len; i++) {
896     unsigned char ch;
897 
898     ch = s[i];
899     if (result > bufsize - 4)
900       ERROR("pdfobj_escape_str: Buffer overflow");
901 
902     /*
903      * We always write three octal digits. Optimization only gives few Kb
904      * smaller size for most documents when zlib compressed.
905      */
906     if (ch < 32 || ch > 126) {
907       buffer[result++] = '\\';
908 #if 0
909       if (i < len - 1 && !isdigit(s[i+1]))
910 	result += sprintf(buffer+result, "%o", ch);
911       else
912 	result += sprintf(buffer+result, "%03o", ch);
913 #endif
914       result += sprintf(buffer+result, "%03o", ch);
915     } else {
916       switch (ch) {
917       case '(':
918 	buffer[result++] = '\\';
919 	buffer[result++] = '(';
920 	break;
921       case ')':
922 	buffer[result++] = '\\';
923 	buffer[result++] = ')';
924 	break;
925       case '\\':
926 	buffer[result++] = '\\';
927 	buffer[result++] = '\\';
928 	break;
929       default:
930 	buffer[result++] = ch;
931 	break;
932       }
933     }
934   }
935 
936   return result;
937 }
938 
939 static void
write_string(pdf_string * str,FILE * file)940 write_string (pdf_string *str, FILE *file)
941 {
942   unsigned char *s;
943   char wbuf[FORMAT_BUF_SIZE]; /* Shouldn't use format_buffer[]. */
944   int  nescc = 0, i, count;
945 
946   s = str->string;
947 
948   if (enc_mode)
949     pdf_encrypt_data(s, str->length);
950 
951   /*
952    * Count all ASCII non-printable characters.
953    */
954   for (i = 0; i < str->length; i++) {
955     if (!isprint(s[i]))
956       nescc++;
957   }
958   /*
959    * If the string contains much escaped chars, then we write it as
960    * ASCII hex string.
961    */
962   if (nescc > str->length / 3) {
963     pdf_out_char(file, '<');
964     for (i = 0; i < str->length; i++) {
965       pdf_out_xchar(file, s[i]);
966     }
967     pdf_out_char(file, '>');
968   } else {
969     pdf_out_char(file, '(');
970     /*
971      * This section of code probably isn't speed critical.  Escaping the
972      * characters in the string one at a time may seem slow, but it's
973      * safe if the formatted string length exceeds FORMAT_BUF_SIZE.
974      * Occasionally you see some long strings in PDF.  pdfobj_escape_str
975      * is also used for strings of text with no kerning.  These must be
976      * handled as quickly as possible since there are so many of them.
977      */
978     for (i = 0; i < str->length; i++) {
979       count = pdfobj_escape_str(wbuf, FORMAT_BUF_SIZE, &(s[i]), 1);
980       pdf_out(file, wbuf, count);
981     }
982     pdf_out_char(file, ')');
983   }
984 }
985 
986 static void
release_string(pdf_string * data)987 release_string (pdf_string *data)
988 {
989   if (data->string != NULL) {
990     RELEASE(data->string);
991     data->string = NULL;
992   }
993   RELEASE(data);
994 }
995 
996 void
pdf_set_string(pdf_obj * object,unsigned char * str,unsigned length)997 pdf_set_string (pdf_obj *object, unsigned char *str, unsigned length)
998 {
999   pdf_string *data;
1000 
1001   TYPECHECK(object, PDF_STRING);
1002 
1003   data = object->data;
1004   if (data->string != 0) {
1005     RELEASE(data->string);
1006   }
1007   if (length != 0) {
1008     data->length = length;
1009     data->string = NEW(length + 1, unsigned char);
1010     memcpy(data->string, str, length);
1011     data->string[length] = '\0';
1012   } else {
1013     data->length = 0;
1014     data->string = NULL;
1015   }
1016 }
1017 
1018 /* Name does *not* include the /. */
1019 pdf_obj *
pdf_new_name(const char * name)1020 pdf_new_name (const char *name)
1021 {
1022   pdf_obj  *result;
1023   unsigned  length;
1024   pdf_name *data;
1025 
1026   result = pdf_new_obj(PDF_NAME);
1027   data   = NEW (1, pdf_name);
1028   result->data = data;
1029   length = strlen(name);
1030   if (length != 0) {
1031     data->name = NEW(length+1, char);
1032     memcpy(data->name, name, length);
1033     data->name[length] = '\0';
1034   } else {
1035     data->name = NULL;
1036   }
1037 
1038   return result;
1039 }
1040 
1041 static void
write_name(pdf_name * name,FILE * file)1042 write_name (pdf_name *name, FILE *file)
1043 {
1044   char *s;
1045   int i, length;
1046 
1047   s      = name->name;
1048   length = name->name ? strlen(name->name) : 0;
1049   /*
1050    * From PDF Reference, 3rd ed., p.33:
1051    *
1052    *  Beginning with PDF 1.2, any character except null (character code 0)
1053    *  may be included in a name by writing its 2-digit hexadecimal code,
1054    *  preceded bythe number sign character (#); see implementation notes 3
1055    *  and 4 in Appendix H. This syntax is required in order to represent
1056    *  any of the delimiter or white-space characters or the number sign
1057    *  character itself; it is recommended but not required for characters
1058    *  whose codes are outside the range 33 (!) to 126 (~).
1059    */
1060 #ifndef is_delim
1061   /* Avoid '{' and '}' for PostScript compatibility? */
1062 #define is_delim(c) ((c) == '(' || (c) == '/' || \
1063                      (c) == '<' || (c) == '>' || \
1064                      (c) == '[' || (c) == ']' || \
1065                      (c) == '{' || (c) == '}' || \
1066                      (c) == '%')
1067 #endif
1068   pdf_out_char(file, '/');
1069   for (i = 0; i < length; i++) {
1070     if (s[i] < '!' || s[i] > '~' || s[i] == '#' || is_delim(s[i])) {
1071       /*     ^ "space" is here. */
1072       pdf_out_char (file, '#');
1073       pdf_out_xchar(file, s[i]);
1074     } else {
1075       pdf_out_char (file, s[i]);
1076     }
1077   }
1078 }
1079 
1080 static void
release_name(pdf_name * data)1081 release_name (pdf_name *data)
1082 {
1083   if (data->name != NULL) {
1084     RELEASE(data->name);
1085     data->name = NULL;
1086   }
1087   RELEASE(data);
1088 }
1089 
1090 char *
pdf_name_value(pdf_obj * object)1091 pdf_name_value (pdf_obj *object)
1092 {
1093   pdf_name *data;
1094 
1095   TYPECHECK(object, PDF_NAME);
1096 
1097   data = object->data;
1098 
1099   return data->name;
1100 }
1101 
1102 /*
1103  * We do not have pdf_name_length() since '\0' is not allowed
1104  * in PDF name object.
1105  */
1106 
1107 pdf_obj *
pdf_new_array(void)1108 pdf_new_array (void)
1109 {
1110   pdf_obj   *result;
1111   pdf_array *data;
1112 
1113   result = pdf_new_obj(PDF_ARRAY);
1114   data   = NEW(1, pdf_array);
1115   data->values = NULL;
1116   data->max    = 0;
1117   data->size   = 0;
1118   result->data = data;
1119 
1120   return result;
1121 }
1122 
1123 static void
write_array(pdf_array * array,FILE * file)1124 write_array (pdf_array *array, FILE *file)
1125 {
1126   pdf_out_char(file, '[');
1127   if (array->size > 0) {
1128     unsigned long i;
1129     int type1 = PDF_UNDEFINED, type2;
1130 
1131     for (i = 0; i < array->size; i++) {
1132       if (array->values[i]) {
1133 	type2 = array->values[i]->type;
1134 	if (type1 != PDF_UNDEFINED && pdf_need_white(type1, type2))
1135 	  pdf_out_white(file);
1136 	type1 = type2;
1137 	pdf_write_obj(array->values[i], file);
1138       } else
1139 	WARN("PDF array element #ld undefined.", i);
1140     }
1141   }
1142   pdf_out_char(file, ']');
1143 }
1144 
1145 pdf_obj *
pdf_get_array(pdf_obj * array,long idx)1146 pdf_get_array (pdf_obj *array, long idx)
1147 {
1148   pdf_obj   *result = NULL;
1149   pdf_array *data;
1150 
1151   TYPECHECK(array, PDF_ARRAY);
1152 
1153   data = array->data;
1154   if (idx < 0)
1155     result = data->values[idx + data->size];
1156   else if (idx < data->size) {
1157     result = data->values[idx];
1158   }
1159 
1160   return result;
1161 }
1162 
1163 unsigned int
pdf_array_length(pdf_obj * array)1164 pdf_array_length (pdf_obj *array)
1165 {
1166   pdf_array *data;
1167 
1168   TYPECHECK(array, PDF_ARRAY);
1169 
1170   data = (pdf_array *) array->data;
1171 
1172   return (unsigned int) data->size;
1173 }
1174 
1175 static void
release_array(pdf_array * data)1176 release_array (pdf_array *data)
1177 {
1178   unsigned long i;
1179 
1180   if (data->values) {
1181     for (i = 0; i < data->size; i++) {
1182       pdf_release_obj(data->values[i]);
1183       data->values[i] = NULL;
1184     }
1185     RELEASE(data->values);
1186     data->values = NULL;
1187   }
1188   RELEASE(data);
1189 }
1190 
1191 /*
1192  * The name pdf_add_array is misleading. It behaves differently than
1193  * pdf_add_dict(). This should be pdf_push_array().
1194  */
1195 void
pdf_add_array(pdf_obj * array,pdf_obj * object)1196 pdf_add_array (pdf_obj *array, pdf_obj *object)
1197 {
1198   pdf_array *data;
1199 
1200   TYPECHECK(array, PDF_ARRAY);
1201 
1202   data = array->data;
1203   if (data->size >= data->max) {
1204     data->max   += ARRAY_ALLOC_SIZE;
1205     data->values = RENEW(data->values, data->max, pdf_obj *);
1206   }
1207   data->values[data->size] = object;
1208   data->size++;
1209 
1210   return;
1211 }
1212 
1213 #if 0
1214 void
1215 pdf_put_array (pdf_obj *array, unsigned idx, pdf_obj *object)
1216 {
1217   pdf_array *data;
1218   long       i;
1219 
1220   TYPECHECK(array, PDF_ARRAY);
1221 
1222   data = array->data;
1223   if (idx + 1 > data->max) {
1224     data->max   += ARRAY_ALLOC_SIZE;
1225     data->values = RENEW(data->values, data->max, pdf_obj *);
1226   }
1227   /*
1228    * Rangecheck error in PostScript interpreters if
1229    * idx > data->size - 1. But pdf_new_array() doesn't set
1230    * array size, pdf_add_array() dynamically increases size
1231    * of array. This might confusing...
1232    */
1233   if (idx + 1 > data->size) {
1234     for (i = data->size; i < idx; i++)
1235       data->values[i] = pdf_new_null(); /* release_array() won't work without this */
1236     data->values[idx] = object;
1237     data->size = idx + 1;
1238   } else {
1239     if (data->values[idx])
1240       pdf_release_obj(data->values[idx]);
1241     data->values[idx] = object;
1242   }
1243 }
1244 
1245 /* Easily leaks memory... */
1246 pdf_obj *
1247 pdf_shift_array (pdf_obj *array)
1248 {
1249   pdf_obj   *result = NULL;
1250   pdf_array *data;
1251 
1252   TYPECHECK(array, PDF_ARRAY);
1253 
1254   data = array->data;
1255   if (data->size > 0) {
1256     int i;
1257 
1258     result = data->values[0];
1259     for (i = 1; i < data->size; i++)
1260       data->values[i-1] = data->values[i];
1261     data->size--;
1262   }
1263 
1264   return result;
1265 }
1266 #endif
1267 
1268 /* Prepend an object to an array */
1269 static void
pdf_unshift_array(pdf_obj * array,pdf_obj * object)1270 pdf_unshift_array (pdf_obj *array, pdf_obj *object)
1271 {
1272   pdf_array *data;
1273 
1274   TYPECHECK(array, PDF_ARRAY);
1275 
1276   data = array->data;
1277   if (data->size >= data->max) {
1278     data->max   += ARRAY_ALLOC_SIZE;
1279     data->values = RENEW(data->values, data->max, pdf_obj *);
1280   }
1281   memmove(&data->values[1], data->values, data->size * sizeof(pdf_obj *));
1282   data->values[0] = object;
1283   data->size++;
1284 }
1285 
1286 #if 0
1287 pdf_obj *
1288 pdf_pop_array (pdf_obj *array)
1289 {
1290   pdf_obj   *result;
1291   pdf_array *data;
1292 
1293   TYPECHECK(array, PDF_ARRAY);
1294 
1295   data = array->data;
1296   if (data->size > 0) {
1297     result = data->values[data->size - 1];
1298     data->size--;
1299   } else {
1300     result = NULL;
1301   }
1302 
1303   return result;
1304 }
1305 #endif
1306 
1307 static void
write_dict(pdf_dict * dict,FILE * file)1308 write_dict (pdf_dict *dict, FILE *file)
1309 {
1310 #if 0
1311   pdf_out (file, "<<\n", 3); /* dropping \n saves few kb. */
1312 #else
1313   pdf_out (file, "<<", 2);
1314 #endif
1315   while (dict->key != NULL) {
1316     pdf_write_obj(dict->key, file);
1317     if (pdf_need_white(PDF_NAME, (dict->value)->type)) {
1318       pdf_out_white(file);
1319     }
1320     pdf_write_obj(dict->value, file);
1321 #if 0
1322     pdf_out_char (file, '\n'); /* removing this saves few kb. */
1323 #endif
1324     dict = dict->next;
1325   }
1326   pdf_out (file, ">>", 2);
1327 }
1328 
1329 pdf_obj *
pdf_new_dict(void)1330 pdf_new_dict (void)
1331 {
1332   pdf_obj  *result;
1333   pdf_dict *data;
1334 
1335   result = pdf_new_obj(PDF_DICT);
1336   data   = NEW(1, pdf_dict);
1337   data->key    = NULL;
1338   data->value  = NULL;
1339   data->next   = NULL;
1340   result->data = data;
1341 
1342   return result;
1343 }
1344 
1345 static void
release_dict(pdf_dict * data)1346 release_dict (pdf_dict *data)
1347 {
1348   pdf_dict *next;
1349 
1350   while (data != NULL && data->key != NULL) {
1351     pdf_release_obj(data->key);
1352     pdf_release_obj(data->value);
1353     data->key   = NULL;
1354     data->value = NULL;
1355     next = data->next;
1356     RELEASE(data);
1357     data = next;
1358   }
1359   RELEASE(data);
1360 }
1361 
1362 /* Array is ended by a node with NULL this pointer */
1363 /* pdf_add_dict returns 0 if the key is new and non-zero otherwise */
1364 int
pdf_add_dict(pdf_obj * dict,pdf_obj * key,pdf_obj * value)1365 pdf_add_dict (pdf_obj *dict, pdf_obj *key, pdf_obj *value)
1366 {
1367   pdf_dict *data, *new_node;
1368 
1369   TYPECHECK(dict, PDF_DICT);
1370   TYPECHECK(key,  PDF_NAME);
1371 
1372   /* It seems that NULL is sometimes used for null object... */
1373   if (value != NULL && INVALIDOBJ(value))
1374     ERROR("pdf_add_dict(): Passed invalid value");
1375 
1376   /* If this key already exists, simply replace the value */
1377   for (data = dict->data; data->key != NULL; data = data->next) {
1378     if (!strcmp(pdf_name_value(key), pdf_name_value(data->key))) {
1379       /* Release the old value */
1380       pdf_release_obj(data->value);
1381       /* Release the new key (we don't need it) */
1382       pdf_release_obj(key);
1383       data->value = value;
1384       return 1;
1385     }
1386   }
1387   /*
1388    * We didn't find the key. We build a new "end" node and add
1389    * the new key just before the end
1390    */
1391   new_node = NEW (1, pdf_dict);
1392   new_node->key = NULL;
1393   new_node->value = NULL;
1394   new_node->next = NULL;
1395   data->next  = new_node;
1396   data->key   = key;
1397   data->value = value;
1398   return 0;
1399 }
1400 
1401 #if 0
1402 void
1403 pdf_put_dict (pdf_obj *dict, const char *key, pdf_obj *value)
1404 {
1405   pdf_dict *data;
1406 
1407   TYPECHECK(dict, PDF_DICT);
1408 
1409   if (!key) {
1410     ERROR("pdf_put_dict(): Passed invalid key.");
1411   }
1412   /* It seems that NULL is sometimes used for null object... */
1413   if (value != NULL && INVALIDOBJ(value)) {
1414     ERROR("pdf_add_dict(): Passed invalid value.");
1415   }
1416 
1417   data = dict->data;
1418 
1419   while (data->key != NULL) {
1420     if (!strcmp(key, pdf_name_value(data->key))) {
1421       pdf_release_obj(data->value);
1422       data->value = value;
1423       break;
1424     }
1425     data = data->next;
1426   }
1427 
1428   /*
1429    * If we didn't find the key, build a new "end" node and add
1430    * the new key just before the end
1431    */
1432   if (data->key == NULL) {
1433     pdf_dict *new_node;
1434 
1435     new_node = NEW (1, pdf_dict);
1436     new_node->key   = NULL;
1437     new_node->value = NULL;
1438     new_node->next  = NULL;
1439     data->next  = new_node;
1440     data->key   = pdf_new_name(key);
1441     data->value = value;
1442   }
1443 }
1444 #endif
1445 
1446 /* pdf_merge_dict makes a link for each item in dict2 before stealing it */
1447 void
pdf_merge_dict(pdf_obj * dict1,pdf_obj * dict2)1448 pdf_merge_dict (pdf_obj *dict1, pdf_obj *dict2)
1449 {
1450   pdf_dict *data;
1451 
1452   TYPECHECK(dict1, PDF_DICT);
1453   TYPECHECK(dict2, PDF_DICT);
1454 
1455   data = dict2->data;
1456   while (data->key != NULL) {
1457     pdf_add_dict(dict1, pdf_link_obj(data->key), pdf_link_obj(data->value));
1458     data = data->next;
1459   }
1460 }
1461 
1462 int
pdf_foreach_dict(pdf_obj * dict,int (* proc)(pdf_obj *,pdf_obj *,void *),void * pdata)1463 pdf_foreach_dict (pdf_obj *dict,
1464 		  int (*proc) (pdf_obj *, pdf_obj *, void *), void *pdata)
1465 {
1466   int       error = 0;
1467   pdf_dict *data;
1468 
1469   ASSERT(proc);
1470 
1471   TYPECHECK(dict, PDF_DICT);
1472 
1473   data = dict->data;
1474   while (!error &&
1475 	 data->key != NULL) {
1476     error = proc(data->key, data->value, pdata);
1477     data = data->next;
1478   }
1479 
1480   return error;
1481 }
1482 
1483 #define pdf_match_name(o,s) ((o) && (s) && !strcmp(((pdf_name *)(o)->data)->name, (s)))
1484 pdf_obj *
pdf_lookup_dict(pdf_obj * dict,const char * name)1485 pdf_lookup_dict (pdf_obj *dict, const char *name)
1486 {
1487   pdf_dict *data;
1488 
1489   ASSERT(name);
1490 
1491   TYPECHECK(dict, PDF_DICT);
1492 
1493   data = dict->data;
1494   while (data->key != NULL) {
1495     if (!strcmp(name, pdf_name_value(data->key))) {
1496       return data->value;
1497     }
1498     data = data->next;
1499   }
1500 
1501   return NULL;
1502 }
1503 
1504 /* Returns array of dictionary keys */
1505 pdf_obj *
pdf_dict_keys(pdf_obj * dict)1506 pdf_dict_keys (pdf_obj *dict)
1507 {
1508   pdf_obj  *keys;
1509   pdf_dict *data;
1510 
1511   TYPECHECK(dict, PDF_DICT);
1512 
1513   keys = pdf_new_array();
1514   for (data = dict->data; (data &&
1515 			   data->key != NULL); data = data->next) {
1516     /* We duplicate name object rather than linking keys.
1517      * If we forget to free keys, broken PDF is generated.
1518      */
1519     pdf_add_array(keys, pdf_new_name(pdf_name_value(data->key)));
1520   }
1521 
1522   return keys;
1523 }
1524 
1525 void
pdf_remove_dict(pdf_obj * dict,const char * name)1526 pdf_remove_dict (pdf_obj *dict, const char *name)
1527 {
1528   pdf_dict *data, **data_p;
1529 
1530   TYPECHECK(dict, PDF_DICT);
1531 
1532   data   = dict->data;
1533   data_p = (pdf_dict **) (void *) &(dict->data);
1534   while (data->key != NULL) {
1535     if (pdf_match_name(data->key, name)) {
1536       pdf_release_obj(data->key);
1537       pdf_release_obj(data->value);
1538       *data_p = data->next;
1539       RELEASE(data);
1540       break;
1541     }
1542     data_p = &(data->next);
1543     data   = data->next;
1544   }
1545 }
1546 
1547 pdf_obj *
pdf_new_stream(int flags)1548 pdf_new_stream (int flags)
1549 {
1550   pdf_obj    *result;
1551   pdf_stream *data;
1552 
1553   result = pdf_new_obj(PDF_STREAM);
1554   data   = NEW(1, pdf_stream);
1555   /*
1556    * Although we are using an arbitrary pdf_object here, it must have
1557    * type=PDF_DICT and cannot be an indirect reference.  This will be
1558    * checked by the output routine.
1559    */
1560   data->dict   = pdf_new_dict();
1561   data->_flags = flags;
1562   data->stream = NULL;
1563   data->stream_length = 0;
1564   data->max_length    = 0;
1565   data->objstm_data = NULL;
1566 
1567   result->data = data;
1568   result->flags |= OBJ_NO_OBJSTM;
1569 
1570   return result;
1571 }
1572 
1573 static void
write_stream(pdf_stream * stream,FILE * file)1574 write_stream (pdf_stream *stream, FILE *file)
1575 {
1576   unsigned char *filtered;
1577   unsigned long  filtered_length;
1578   unsigned long  buffer_length;
1579   unsigned char *buffer;
1580 
1581   /*
1582    * Always work from a copy of the stream. All filters read from
1583    * "filtered" and leave their result in "filtered".
1584    */
1585 #if 0
1586   filtered = NEW(stream->stream_length + 1, unsigned char);
1587 #endif
1588   filtered = NEW(stream->stream_length, unsigned char);
1589   memcpy(filtered, stream->stream, stream->stream_length);
1590   filtered_length = stream->stream_length;
1591 
1592 #if 0
1593   if (stream->stream_length < 10)
1594     stream->_flags &= ^STREAM_COMPRESS;
1595 #endif
1596 
1597 #ifdef HAVE_ZLIB
1598   /* Apply compression filter if requested */
1599   if (stream->stream_length > 0 &&
1600       (stream->_flags & STREAM_COMPRESS) &&
1601       compression_level > 0) {
1602 
1603     pdf_obj *filters = pdf_lookup_dict(stream->dict, "Filter");
1604 
1605     buffer_length = filtered_length + filtered_length/1000 + 14;
1606     buffer = NEW(buffer_length, unsigned char);
1607     {
1608       pdf_obj *filter_name = pdf_new_name("FlateDecode");
1609 
1610       if (filters)
1611         /*
1612          * FlateDecode is the first filter to be applied to the stream.
1613          */
1614         pdf_unshift_array(filters, filter_name);
1615       else
1616         /*
1617          * Adding the filter as a name instead of a one-element array
1618          * is crucial because otherwise Adobe Reader cannot read the
1619          * cross-reference stream any more, cf. the PDF v1.5 Errata.
1620          */
1621         pdf_add_dict(stream->dict, pdf_new_name("Filter"), filter_name);
1622     }
1623 #ifdef HAVE_ZLIB_COMPRESS2
1624     if (compress2(buffer, &buffer_length, filtered,
1625 		  filtered_length, compression_level)) {
1626       ERROR("Zlib error");
1627     }
1628 #else
1629     if (compress(buffer, &buffer_length, filtered,
1630 		 filtered_length)) {
1631       ERROR ("Zlib error");
1632     }
1633 #endif /* HAVE_ZLIB_COMPRESS2 */
1634     RELEASE(filtered);
1635     compression_saved += filtered_length - buffer_length
1636       - (filters ? strlen("/FlateDecode "): strlen("/Filter/FlateDecode\n"));
1637 
1638     filtered        = buffer;
1639     filtered_length = buffer_length;
1640   }
1641 #endif /* HAVE_ZLIB */
1642 
1643 #if 0
1644   /*
1645    * An optional end-of-line marker preceding the "endstream" is
1646    * not part of stream data. See, PDF Reference 4th ed., p. 38.
1647    */
1648   /* Add a '\n' if the last character wasn't one */
1649   if (filtered_length > 0 &&
1650       filtered[filtered_length-1] != '\n') {
1651     filtered[filtered_length] = '\n';
1652     filtered_length++;
1653   }
1654 #endif
1655   pdf_add_dict(stream->dict,
1656 	       pdf_new_name("Length"), pdf_new_number(filtered_length));
1657 
1658   pdf_write_obj(stream->dict, file);
1659 
1660   pdf_out(file, "\nstream\n", 8);
1661 
1662   if (enc_mode)
1663     pdf_encrypt_data(filtered, filtered_length);
1664 
1665   if (filtered_length > 0) {
1666     pdf_out(file, filtered, filtered_length);
1667   }
1668   RELEASE(filtered);
1669 
1670   /*
1671    * This stream length "object" gets reset every time write_stream is
1672    * called for the stream object.
1673    * If this stream gets written more than once with different
1674    * filters, this could be a problem.
1675    */
1676 
1677   pdf_out(file, "\n", 1);
1678   pdf_out(file, "endstream", 9);
1679 }
1680 
1681 static void
release_stream(pdf_stream * stream)1682 release_stream (pdf_stream *stream)
1683 {
1684   pdf_release_obj(stream->dict);
1685   stream->dict = NULL;
1686 
1687   if (stream->stream) {
1688     RELEASE(stream->stream);
1689     stream->stream = NULL;
1690   }
1691 
1692   if (stream->objstm_data) {
1693     RELEASE(stream->objstm_data);
1694     stream->objstm_data = NULL;
1695   }
1696 
1697   RELEASE(stream);
1698 }
1699 
1700 pdf_obj *
pdf_stream_dict(pdf_obj * stream)1701 pdf_stream_dict (pdf_obj *stream)
1702 {
1703   pdf_stream *data;
1704 
1705   TYPECHECK(stream, PDF_STREAM);
1706 
1707   data = stream->data;
1708 
1709   return data->dict;
1710 }
1711 
1712 const void *
pdf_stream_dataptr(pdf_obj * stream)1713 pdf_stream_dataptr (pdf_obj *stream)
1714 {
1715   pdf_stream *data;
1716 
1717   TYPECHECK(stream, PDF_STREAM);
1718 
1719   data = stream->data;
1720 
1721   return (const void *) data->stream;
1722 }
1723 
1724 long
pdf_stream_length(pdf_obj * stream)1725 pdf_stream_length (pdf_obj *stream)
1726 {
1727   pdf_stream *data;
1728 
1729   TYPECHECK(stream, PDF_STREAM);
1730 
1731   data = stream->data;
1732 
1733   return (long) data->stream_length;
1734 }
1735 
1736 static void
set_objstm_data(pdf_obj * objstm,long * data)1737 set_objstm_data (pdf_obj *objstm, long *data) {
1738   TYPECHECK(objstm, PDF_STREAM);
1739 
1740   ((pdf_stream *) objstm->data)->objstm_data = data;
1741 }
1742 
1743 static long *
get_objstm_data(pdf_obj * objstm)1744 get_objstm_data (pdf_obj *objstm) {
1745   TYPECHECK(objstm, PDF_STREAM);
1746 
1747   return ((pdf_stream *) objstm->data)->objstm_data;
1748 }
1749 
1750 void
pdf_add_stream(pdf_obj * stream,const void * stream_data,long length)1751 pdf_add_stream (pdf_obj *stream, const void *stream_data, long length)
1752 {
1753   pdf_stream *data;
1754 
1755   TYPECHECK(stream, PDF_STREAM);
1756 
1757   if (length < 1)
1758     return;
1759   data = stream->data;
1760   if (data->stream_length + length > data->max_length) {
1761     data->max_length += length + STREAM_ALLOC_SIZE;
1762     data->stream      = RENEW(data->stream, data->max_length, unsigned char);
1763   }
1764   memcpy(data->stream + data->stream_length, stream_data, length);
1765   data->stream_length += length;
1766 }
1767 
1768 #if HAVE_ZLIB
1769 #define WBUF_SIZE 4096
1770 int
pdf_add_stream_flate(pdf_obj * dst,const void * data,long len)1771 pdf_add_stream_flate (pdf_obj *dst, const void *data, long len)
1772 {
1773   z_stream z;
1774   Bytef    wbuf[WBUF_SIZE];
1775 
1776   z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL;
1777 
1778   z.next_in  = (z_const Bytef *) data; z.avail_in  = len;
1779   z.next_out = (Bytef *) wbuf; z.avail_out = WBUF_SIZE;
1780 
1781   if (inflateInit(&z) != Z_OK) {
1782     WARN("inflateInit() failed.");
1783     return -1;
1784   }
1785 
1786   for (;;) {
1787     int status;
1788     status = inflate(&z, Z_NO_FLUSH);
1789     if (status == Z_STREAM_END)
1790       break;
1791     else if (status != Z_OK) {
1792       WARN("inflate() failed. Broken PDF file?");
1793       inflateEnd(&z);
1794       return -1;
1795     }
1796 
1797     if (z.avail_out == 0) {
1798       pdf_add_stream(dst, wbuf, WBUF_SIZE);
1799       z.next_out  = wbuf;
1800       z.avail_out = WBUF_SIZE;
1801     }
1802   }
1803 
1804   if (WBUF_SIZE - z.avail_out > 0)
1805     pdf_add_stream(dst, wbuf, WBUF_SIZE - z.avail_out);
1806 
1807   return (inflateEnd(&z) == Z_OK ? 0 : -1);
1808 }
1809 
1810 
1811 /* DecodeParms for FlateDecode
1812  *
1813  */
1814  struct decode_parms {
1815   int predictor;
1816   int colors;
1817   int bits_per_component;
1818   int columns;
1819   /* EarlyChange unsupported */
1820  };
1821 
1822 static int
get_decode_parms(struct decode_parms * parms,pdf_obj * dict)1823 get_decode_parms (struct decode_parms *parms, pdf_obj *dict)
1824 {
1825   pdf_obj *tmp;
1826 
1827   ASSERT(dict && parms);
1828   ASSERT(PDF_OBJ_DICTTYPE(dict));
1829 
1830   /* Fill with default values */
1831   parms->predictor = 1;
1832   parms->colors    = 1;
1833   parms->bits_per_component = 8;
1834   parms->columns   = 1;
1835 
1836   tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Predictor"));
1837   if (tmp)
1838     parms->predictor = pdf_number_value(tmp);
1839   tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Colors"));
1840   if (tmp)
1841     parms->colors = pdf_number_value(tmp);
1842   tmp = pdf_deref_obj(pdf_lookup_dict(dict, "BitsPerComponent"));
1843   if (tmp)
1844     parms->bits_per_component = pdf_number_value(tmp);
1845   tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Columns"));
1846   if (tmp)
1847     parms->columns = pdf_number_value(tmp);
1848 
1849   if (parms->bits_per_component != 1 &&
1850       parms->bits_per_component != 2 &&
1851       parms->bits_per_component != 4 &&
1852       parms->bits_per_component != 8 &&
1853       parms->bits_per_component != 16) {
1854       WARN("Invalid BPC value in DecodeParms: %d", parms->bits_per_component);
1855       return -1;
1856   } else if (parms->predictor <= 0 || parms->colors <= 0 ||
1857              parms->columns <= 0)
1858     return -1;
1859   return 0;
1860 }
1861 
1862 /* From Xpdf version 3.04
1863  * I'm not sure if I properly ported... Untested.
1864  */
1865 #define PREDICTOR_TIFF2_MAX_COLORS 32
1866 static int
filter_row_TIFF2(unsigned char * dst,const unsigned char * src,struct decode_parms * parms)1867 filter_row_TIFF2 (unsigned char *dst, const unsigned char *src,
1868                   struct decode_parms *parms)
1869 {
1870   const unsigned char *p = src;
1871   unsigned char  col[PREDICTOR_TIFF2_MAX_COLORS];
1872   /* bits_per_component < 8 here */
1873   long mask = (1 << parms->bits_per_component) - 1;
1874   long inbuf, outbuf; /* 2 bytes buffer */
1875   int  i, ci, j, k, inbits, outbits;
1876 
1877   if (parms->colors > PREDICTOR_TIFF2_MAX_COLORS) {
1878     WARN("Sorry, Colors value > %d not supported for TIFF 2 predictor",
1879          PREDICTOR_TIFF2_MAX_COLORS);
1880     return -1;
1881   }
1882 
1883   memset(col, 0, parms->colors);
1884   inbuf = outbuf = 0; inbits = outbits = 0;
1885   j = k = 0;
1886   for (i = 0; i < parms->columns; i++) {
1887     /* expanding each color component into an 8-bits bytes array */
1888     for (ci = 0; ci < parms->colors; ci++) {
1889       if (inbits < parms->bits_per_component) {
1890          /* need more byte */
1891          inbuf   = (inbuf << 8) | p[j++];
1892          inbits += 8;
1893       }
1894       /* predict current color component */
1895       col[ci]  = (unsigned char) ((col[ci] +
1896                  (inbuf >> (inbits - parms->bits_per_component))) & mask);
1897       inbits  -= parms->bits_per_component; /* consumed bpc bits */
1898       /* append newly predicted color component value */
1899       outbuf   = (outbuf << parms->bits_per_component) | col[ci];
1900       outbits += parms->bits_per_component;
1901       if (outbits >= 8) { /* flush */
1902         dst[k++] = (unsigned char) (outbuf >> (outbits - 8));
1903         outbits -= 8;
1904       }
1905     }
1906   }
1907   if (outbits > 0) {
1908     dst[k] = (unsigned char) (outbuf << (8 - outbits));
1909   }
1910 
1911   return 0;
1912 }
1913 
1914 /* This routine is inefficient. Length is typically 4 for Xref streams.
1915  * Especially, calling pdf_add_stream() for each 4 bytes append is highly
1916  * inefficient.
1917  */
1918 static int
filter_decoded(pdf_obj * dst,const void * src,long srclen,struct decode_parms * parms)1919 filter_decoded (pdf_obj *dst, const void *src, long srclen,
1920                 struct decode_parms *parms)
1921 {
1922   const unsigned char *p = (const unsigned char *) src;
1923   const unsigned char *endptr = p + srclen;
1924   unsigned char *prev, *buf;
1925   int bits_per_pixel  = parms->colors * parms->bits_per_component;
1926   int bytes_per_pixel = (bits_per_pixel + 7) / 8;
1927   int length = (parms->columns * bits_per_pixel + 7) / 8;
1928   int i, error = 0;
1929 
1930   prev = NEW(length, unsigned char);
1931   buf  = NEW(length, unsigned char);
1932 
1933   memset(prev, 0, length);
1934   switch (parms->predictor) {
1935   case 1 : /* No prediction */
1936     pdf_add_stream(dst, src, srclen); /* Just copy */
1937     break;
1938   case 2: /* TIFF Predictor 2 */
1939     {
1940       if (parms->bits_per_component == 8) {
1941         while (p + length < endptr) {
1942           /* Same as PNG Sub */
1943           for (i = 0; i < length; i++) {
1944             int pv = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
1945             buf[i] = (unsigned char)(((int) p[i] + pv) & 0xff);
1946           }
1947           pdf_add_stream(dst, buf, length);
1948           p += length;
1949         }
1950       } else if (parms->bits_per_component == 16) {
1951         while (p + length < endptr) {
1952           for (i = 0; i < length; i += 2) {
1953             int  b  = i - bytes_per_pixel;
1954             char hi = b >= 0 ? buf[b] : 0;
1955             char lo = b >= 0 ? buf[b + 1] : 0;
1956             long pv = (hi << 8) | lo;
1957             long cv = (p[i] << 8) | p[i + 1];
1958             long c  = pv + cv;
1959             buf[i]     = (unsigned char) (c >> 8);
1960             buf[i + 1] = (unsigned char) (c & 0xff);
1961           }
1962           pdf_add_stream(dst, buf, length);
1963           p += length;
1964         }
1965       } else { /* bits per component 1, 2, 4 */
1966         while (!error && p + length < endptr) {
1967           error = filter_row_TIFF2(buf, p, parms);
1968           if (!error) {
1969             pdf_add_stream(dst, buf, length);
1970             p += length;
1971           }
1972         }
1973       }
1974     }
1975     break;
1976   /* PNG predictors: first byte of each rows is predictor type */
1977   case 10: /* PNG None */
1978   case 11: /* PNG Sub on all rows */
1979   case 12: /* PNG UP on all rows */
1980   case 13: /* PNG Average on all rows */
1981   case 14: /* PNG Paeth on all rows */
1982   case 15: /* PNG optimun: prediction algorithm can change from line to line. */
1983     {
1984       int type = parms->predictor - 10;
1985 
1986       while (!error && p + length < endptr) {
1987         if (parms->predictor == 15)
1988           type = *p;
1989         else if (*p != type) {
1990           WARN("Mismatched Predictor type in data stream.");
1991           error = -1;
1992         }
1993         p++;
1994         switch (type) {
1995         case 0: /* Do nothing just skip first byte */
1996           memcpy(buf, p, length);
1997           break;
1998         case 1:
1999           for (i = 0; i < length; i++) {
2000             int pv = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
2001             buf[i] = (unsigned char)(((int) p[i] + pv) & 0xff);
2002           }
2003           break;
2004         case 2:
2005           for (i = 0; i < length; i++) {
2006             buf[i] = (unsigned char)(((int) p[i] + (int) prev[i]) & 0xff);
2007           }
2008           break;
2009         case 3:
2010           for (i = 0; i < length; i++) {
2011             int up   = prev[i];
2012             int left = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
2013             int tmp  = floor((up + left) / 2);
2014             buf[i] = (unsigned char)((p[i] + tmp) & 0xff);
2015           }
2016           break;
2017         case 4:
2018           for (i = 0; i < length; i++) {
2019             int a = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0; /* left */
2020             int b = prev[i]; /* above */
2021             int c = i - bytes_per_pixel >= 0 ? prev[i - bytes_per_pixel] : 0; /* upper left */
2022             int q = a + b - c;
2023             int qa = q - a, qb = q - b, qc = q - c;
2024             qa = qa < 0 ? -qa : qa;
2025             qb = qb < 0 ? -qb : qb;
2026             qc = qc < 0 ? -qc : qc;
2027             if (qa <= qb && qa <= qc)
2028               buf[i] = (unsigned char) (((int) p[i] + a) & 0xff);
2029             else if (qb <= qc)
2030               buf[i] = (unsigned char) (((int) p[i] + b) & 0xff);
2031             else
2032               buf[i] = (unsigned char) (((int) p[i] + c) & 0xff);
2033           }
2034           break;
2035         default:
2036           WARN("Unknown PNG predictor type: %d", type);
2037           error = -1;
2038         }
2039         if (!error) {
2040           pdf_add_stream(dst, buf, length); /* highly inefficient */
2041           memcpy(prev, buf, length);
2042           p += length;
2043         }
2044       }
2045     }
2046     break;
2047   default:
2048     WARN("Unknown Predictor type value :%d", parms->predictor);
2049     error = -1;
2050   }
2051 
2052   RELEASE(prev);
2053   RELEASE(buf);
2054 
2055   return error;
2056 }
2057 
2058 static int
pdf_add_stream_flate_filtered(pdf_obj * dst,const void * data,long len,struct decode_parms * parms)2059 pdf_add_stream_flate_filtered (pdf_obj *dst, const void *data, long len, struct decode_parms *parms)
2060 {
2061   pdf_obj *tmp;
2062   z_stream z;
2063   Bytef    wbuf[WBUF_SIZE];
2064   int      error;
2065 
2066   z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL;
2067 
2068   z.next_in  = (z_const Bytef *) data; z.avail_in  = len;
2069   z.next_out = (Bytef *) wbuf; z.avail_out = WBUF_SIZE;
2070 
2071   if (inflateInit(&z) != Z_OK) {
2072     WARN("inflateInit() failed.");
2073     return -1;
2074   }
2075 
2076   tmp = pdf_new_stream(0);
2077   for (;;) {
2078     int status;
2079     status = inflate(&z, Z_NO_FLUSH);
2080     if (status == Z_STREAM_END)
2081       break;
2082     else if (status != Z_OK) {
2083       WARN("inflate() failed. Broken PDF file?");
2084       inflateEnd(&z);
2085       return -1;
2086     }
2087 
2088     if (z.avail_out == 0) {
2089       pdf_add_stream(tmp, wbuf, WBUF_SIZE);
2090       z.next_out  = wbuf;
2091       z.avail_out = WBUF_SIZE;
2092     }
2093   }
2094 
2095   if (WBUF_SIZE - z.avail_out > 0)
2096     pdf_add_stream(tmp, wbuf, WBUF_SIZE - z.avail_out);
2097 
2098   error = filter_decoded(dst, pdf_stream_dataptr(tmp), pdf_stream_length(tmp), parms);
2099   pdf_release_obj(tmp);
2100 
2101   return ((!error && inflateEnd(&z) == Z_OK) ? 0 : -1);
2102 }
2103 #endif
2104 
2105 int
pdf_concat_stream(pdf_obj * dst,pdf_obj * src)2106 pdf_concat_stream (pdf_obj *dst, pdf_obj *src)
2107 {
2108   const char *stream_data;
2109   long        stream_length;
2110   pdf_obj    *stream_dict;
2111   pdf_obj    *filter;
2112   int         error = 0;
2113 
2114   if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src))
2115     ERROR("Invalid type.");
2116 
2117   stream_data   = pdf_stream_dataptr(src);
2118   stream_length = pdf_stream_length (src);
2119   stream_dict   = pdf_stream_dict   (src);
2120 
2121   filter = pdf_lookup_dict(stream_dict, "Filter");
2122   if (!filter)
2123     pdf_add_stream(dst, stream_data, stream_length);
2124 #if HAVE_ZLIB
2125   else {
2126     struct decode_parms parms;
2127     int    have_parms = 0;
2128 
2129     if (pdf_lookup_dict(stream_dict, "DecodeParms")) {
2130       pdf_obj *tmp;
2131 
2132       /* Dictionary or array */
2133       tmp = pdf_deref_obj(pdf_lookup_dict(stream_dict, "DecodeParms"));
2134       if (PDF_OBJ_ARRAYTYPE(tmp)) {
2135         if (pdf_array_length(tmp) > 1) {
2136           WARN("Unexpected size for DecodeParms array.");
2137           return -1;
2138         }
2139         tmp = pdf_deref_obj(pdf_get_array(tmp, 0));
2140       }
2141       if (!PDF_OBJ_DICTTYPE(tmp)) {
2142         WARN("PDF dict expected for DecodeParms...");
2143         return -1;
2144       }
2145       error = get_decode_parms(&parms, tmp);
2146       if (error)
2147         ERROR("Invalid value(s) in DecodeParms dictionary.");
2148       have_parms = 1;
2149     }
2150     if (PDF_OBJ_ARRAYTYPE(filter)) {
2151       if (pdf_array_length(filter) > 1) {
2152         WARN("Multiple DecodeFilter not supported.");
2153         return -1;
2154       }
2155       filter = pdf_get_array(filter, 0);
2156     }
2157     if (PDF_OBJ_NAMETYPE(filter)) {
2158       char  *filter_name = pdf_name_value(filter);
2159       if (filter_name && !strcmp(filter_name, "FlateDecode")) {
2160         if (have_parms)
2161           error = pdf_add_stream_flate_filtered(dst, stream_data, stream_length, &parms);
2162         else
2163           error = pdf_add_stream_flate(dst, stream_data, stream_length);
2164       } else {
2165         WARN("DecodeFilter \"%s\" not supported.", filter_name);
2166         error = -1;
2167       }
2168     } else
2169       ERROR("Broken PDF file?");
2170 #endif /* HAVE_ZLIB */
2171   }
2172 
2173   return error;
2174 }
2175 
2176 static pdf_obj *
pdf_stream_uncompress(pdf_obj * src)2177 pdf_stream_uncompress (pdf_obj *src) {
2178   pdf_obj *dst = pdf_new_stream(0);
2179 
2180   TYPECHECK(src, PDF_STREAM);
2181 
2182   pdf_merge_dict(pdf_stream_dict(dst), pdf_stream_dict(src));
2183   pdf_remove_dict(pdf_stream_dict(dst), "Length");
2184   pdf_concat_stream(dst, src);
2185 
2186   return dst;
2187 }
2188 
2189 #if 0
2190 void
2191 pdf_stream_set_flags (pdf_obj *stream, int flags)
2192 {
2193   pdf_stream *data;
2194 
2195   TYPECHECK(stream, PDF_STREAM);
2196 
2197   data = stream->data;
2198   data->_flags = flags;
2199 }
2200 
2201 int
2202 pdf_stream_get_flags (pdf_obj *stream)
2203 {
2204   pdf_stream *data;
2205 
2206   TYPECHECK(stream, PDF_STREAM);
2207 
2208   data = stream->data;
2209 
2210   return data->_flags;
2211 }
2212 #endif
2213 
2214 static void
pdf_write_obj(pdf_obj * object,FILE * file)2215 pdf_write_obj (pdf_obj *object, FILE *file)
2216 {
2217   if (object == NULL) {
2218     write_null(file);
2219     return;
2220   }
2221 
2222   if (INVALIDOBJ(object) || PDF_OBJ_UNDEFINED(object))
2223     ERROR("pdf_write_obj: Invalid object, type = %d\n", object->type);
2224 
2225   if (file == stderr)
2226     fprintf(stderr, "{%d}", object->refcount);
2227 
2228   switch (object->type) {
2229   case PDF_BOOLEAN:
2230     write_boolean(object->data, file);
2231     break;
2232   case PDF_NUMBER:
2233     write_number (object->data, file);
2234     break;
2235   case PDF_STRING:
2236     write_string (object->data, file);
2237     break;
2238   case PDF_NAME:
2239     write_name(object->data, file);
2240     break;
2241   case PDF_ARRAY:
2242     write_array(object->data, file);
2243     break;
2244   case PDF_DICT:
2245     write_dict (object->data, file);
2246     break;
2247   case PDF_STREAM:
2248     write_stream(object->data, file);
2249     break;
2250   case PDF_NULL:
2251     write_null(file);
2252     break;
2253   case PDF_INDIRECT:
2254     write_indirect(object->data, file);
2255     break;
2256   }
2257 }
2258 
2259 /* Write the object to the file */
2260 static void
pdf_flush_obj(pdf_obj * object,FILE * file)2261 pdf_flush_obj (pdf_obj *object, FILE *file)
2262 {
2263   long length;
2264 
2265   /*
2266    * Record file position
2267    */
2268   add_xref_entry(object->label, 1,
2269 		 pdf_output_file_position, object->generation);
2270   length = sprintf(format_buffer, "%lu %hu obj\n", object->label, object->generation);
2271   enc_mode = doc_enc_mode && !(object->flags & OBJ_NO_ENCRYPT);
2272   pdf_enc_set_label(object->label);
2273   pdf_enc_set_generation(object->generation);
2274   pdf_out(file, format_buffer, length);
2275   pdf_write_obj(object, file);
2276   pdf_out(file, "\nendobj\n", 8);
2277 }
2278 
2279 static long
pdf_add_objstm(pdf_obj * objstm,pdf_obj * object)2280 pdf_add_objstm (pdf_obj *objstm, pdf_obj *object)
2281 {
2282   long *data, pos;
2283 
2284   TYPECHECK(objstm, PDF_STREAM);
2285 
2286   data = get_objstm_data(objstm);
2287   pos = ++data[0];
2288 
2289   data[2*pos]   = object->label;
2290   data[2*pos+1] = pdf_stream_length(objstm);
2291 
2292   add_xref_entry(object->label, 2, objstm->label, pos-1);
2293 
2294   /* redirect output into objstm */
2295   output_stream = objstm;
2296   enc_mode = 0;
2297   pdf_write_obj(object, pdf_output_file);
2298   pdf_out_char(pdf_output_file, '\n');
2299   output_stream = NULL;
2300 
2301   return pos;
2302 }
2303 
2304 static void
release_objstm(pdf_obj * objstm)2305 release_objstm (pdf_obj *objstm)
2306 {
2307   long *data = get_objstm_data(objstm);
2308   long pos = data[0];
2309   pdf_obj *dict;
2310   pdf_stream *stream;
2311   unsigned char *old_buf;
2312   unsigned long old_length;
2313   stream = (pdf_stream *) objstm->data;
2314 
2315   /* Precede stream data by offset table */
2316   old_buf = stream->stream;
2317   old_length = stream->stream_length;
2318   /* Reserve 22 bytes for each entry (two 10 digit numbers plus two spaces) */
2319   stream->stream = NEW(old_length + 22*pos, unsigned char);
2320   stream->stream_length = 0;
2321 
2322   {
2323     long i = 2*pos, *val = data+2;
2324     while (i--) {
2325       long length = sprintf(format_buffer, "%ld ", *(val++));
2326       pdf_add_stream(objstm, format_buffer, length);
2327     }
2328   }
2329 
2330   dict = pdf_stream_dict(objstm);
2331   pdf_add_dict(dict, pdf_new_name("Type"), pdf_new_name("ObjStm"));
2332   pdf_add_dict(dict, pdf_new_name("N"), pdf_new_number(pos));
2333   pdf_add_dict(dict, pdf_new_name("First"), pdf_new_number(stream->stream_length));
2334 
2335   pdf_add_stream(objstm, old_buf, old_length);
2336   RELEASE(old_buf);
2337   pdf_release_obj(objstm);
2338 }
2339 
2340 void
pdf_release_obj(pdf_obj * object)2341 pdf_release_obj (pdf_obj *object)
2342 {
2343   if (object == NULL)
2344     return;
2345   if (INVALIDOBJ(object) || object->refcount <= 0) {
2346     MESG("\npdf_release_obj: object=%p, type=%d, refcount=%d\n",
2347 	 object, object->type, object->refcount);
2348     pdf_write_obj(object, stderr);
2349     ERROR("pdf_release_obj:  Called with invalid object.");
2350   }
2351   object->refcount -= 1;
2352   if (object->refcount == 0) {
2353     /*
2354      * Nothing is using this object so it's okay to remove it.
2355      * Nonzero "label" means object needs to be written before it's destroyed.
2356      */
2357     if (object->label && pdf_output_file != NULL) {
2358       if (!do_objstm || object->flags & OBJ_NO_OBJSTM
2359 	  || (doc_enc_mode && object->flags & OBJ_NO_ENCRYPT)
2360 	  || object->generation)
2361 	pdf_flush_obj(object, pdf_output_file);
2362       else {
2363         if (!current_objstm) {
2364 	  long *data = NEW(2*OBJSTM_MAX_OBJS+2, long);
2365 	  data[0] = data[1] = 0;
2366 	  current_objstm = pdf_new_stream(STREAM_COMPRESS);
2367 	  set_objstm_data(current_objstm, data);
2368 	  pdf_label_obj(current_objstm);
2369 	}
2370 	if (pdf_add_objstm(current_objstm, object) == OBJSTM_MAX_OBJS) {
2371 	  release_objstm(current_objstm);
2372 	  current_objstm = NULL;
2373 	}
2374       }
2375     }
2376     switch (object->type) {
2377     case PDF_BOOLEAN:
2378       release_boolean(object->data);
2379       break;
2380     case PDF_NULL:
2381       break;
2382     case PDF_NUMBER:
2383       release_number(object->data);
2384       break;
2385     case PDF_STRING:
2386       release_string(object->data);
2387       break;
2388     case PDF_NAME:
2389       release_name(object->data);
2390       break;
2391     case PDF_ARRAY:
2392       release_array(object->data);
2393       break;
2394     case PDF_DICT:
2395       release_dict(object->data);
2396       break;
2397     case PDF_STREAM:
2398       release_stream(object->data);
2399       break;
2400     case PDF_INDIRECT:
2401       release_indirect(object->data);
2402       break;
2403     }
2404     /* This might help detect freeing already freed objects */
2405     object->type = -1;
2406     object->data = NULL;
2407     RELEASE(object);
2408   }
2409 }
2410 
2411 static int
backup_line(FILE * pdf_input_file)2412 backup_line (FILE *pdf_input_file)
2413 {
2414   int ch = -1;
2415 
2416   /*
2417    * Note: this code should work even if \r\n is eol. It could fail on a
2418    * machine where \n is eol and there is a \r in the stream --- Highly
2419    * unlikely in the last few bytes where this is likely to be used.
2420    */
2421   if (tell_position(pdf_input_file) > 1)
2422     do {
2423       seek_relative (pdf_input_file, -2);
2424     } while (tell_position(pdf_input_file) > 0 &&
2425 	     (ch = fgetc(pdf_input_file)) >= 0 &&
2426 	     (ch != '\n' && ch != '\r' ));
2427   if (ch < 0) {
2428     return 0;
2429   }
2430 
2431   return 1;
2432 }
2433 
2434 static long
find_xref(FILE * pdf_input_file)2435 find_xref (FILE *pdf_input_file)
2436 {
2437   long xref_pos;
2438   int  tries = 10;
2439 
2440   do {
2441     long currentpos;
2442 
2443     if (!backup_line(pdf_input_file)) {
2444       tries = 0;
2445       break;
2446     }
2447     currentpos = tell_position(pdf_input_file);
2448     fread(work_buffer, sizeof(char), strlen("startxref"), pdf_input_file);
2449     seek_absolute(pdf_input_file, currentpos);
2450     tries--;
2451   } while (tries > 0 &&
2452 	   strncmp(work_buffer, "startxref", strlen("startxref")));
2453   if (tries <= 0)
2454     return 0;
2455 
2456   /* Skip rest of this line */
2457   mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
2458   /* Next line of input file should contain actual xref location */
2459   mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
2460 
2461   {
2462     const char *start, *end;
2463     char *number;
2464 
2465     start = work_buffer;
2466     end   = start + strlen(work_buffer);
2467     skip_white(&start, end);
2468     number   = parse_number(&start, end);
2469     xref_pos = (long) atof(number);
2470     RELEASE(number);
2471   }
2472 
2473   return xref_pos;
2474 }
2475 
2476 /*
2477  * This routine must be called with the file pointer located
2478  * at the start of the trailer.
2479  */
2480 static pdf_obj *
parse_trailer(pdf_file * pf)2481 parse_trailer (pdf_file *pf)
2482 {
2483   pdf_obj *result;
2484   /*
2485    * Fill work_buffer and hope trailer fits. This should
2486    * be made a bit more robust sometime.
2487    */
2488   if (fread(work_buffer, sizeof(char),
2489 	    WORK_BUFFER_SIZE, pf->file) == 0 ||
2490       strncmp(work_buffer, "trailer", strlen("trailer"))) {
2491     WARN("No trailer.  Are you sure this is a PDF file?");
2492     WARN("buffer:\n->%s<-\n", work_buffer);
2493     result = NULL;
2494   } else {
2495     const char *p = work_buffer + strlen("trailer");
2496     skip_white(&p, work_buffer + WORK_BUFFER_SIZE);
2497     result = parse_pdf_dict(&p, work_buffer + WORK_BUFFER_SIZE, pf);
2498   }
2499 
2500   return result;
2501 }
2502 
2503 /*
2504  * This routine tries to estimate an upper bound for character position
2505  * of the end of the object, so it knows how big the buffer must be.
2506  * The parsing routines require that the entire object be read into
2507  * memory. It would be a major pain to rewrite them.  The worst case
2508  * is that an object before an xref table will grab the whole table
2509  * :-(
2510  */
2511 static long
next_object_offset(pdf_file * pf,unsigned long obj_num)2512 next_object_offset (pdf_file *pf, unsigned long obj_num)
2513 {
2514   long  next = pf->file_size;  /* Worst case */
2515   long  i, curr;
2516 
2517   curr = pf->xref_table[obj_num].field2;
2518   /* Check all other type 1 objects to find next one */
2519   for (i = 0; i < pf->num_obj; i++) {
2520     if (pf->xref_table[i].type == 1 &&
2521         pf->xref_table[i].field2 > curr &&
2522         pf->xref_table[i].field2 < next)
2523       next = pf->xref_table[i].field2;
2524   }
2525 
2526   return  next;
2527 }
2528 
2529 #define checklabel(pf, n, g) ((n) > 0 && (n) < (pf)->num_obj && ( \
2530   ((pf)->xref_table[(n)].type == 1 && (pf)->xref_table[(n)].field3 == (g)) || \
2531   ((pf)->xref_table[(n)].type == 2 && !(g))))
2532 
2533 pdf_obj *
pdf_new_indirect(pdf_file * pf,unsigned long obj_num,unsigned short obj_gen)2534 pdf_new_indirect (pdf_file *pf, unsigned long obj_num, unsigned short obj_gen)
2535 {
2536   pdf_obj      *result;
2537   pdf_indirect *indirect;
2538 
2539   indirect = NEW(1, pdf_indirect);
2540   indirect->pf         = pf;
2541   indirect->obj        = NULL;
2542   indirect->label      = obj_num;
2543   indirect->generation = obj_gen;
2544 
2545   result   = pdf_new_obj(PDF_INDIRECT);
2546   result->data = indirect;
2547 
2548   return result;
2549 }
2550 
2551 static pdf_obj *
pdf_read_object(unsigned long obj_num,unsigned short obj_gen,pdf_file * pf,long offset,long limit)2552 pdf_read_object (unsigned long obj_num, unsigned short obj_gen,
2553 		pdf_file *pf, long offset, long limit)
2554 {
2555   long     length;
2556   char    *buffer;
2557   const char *p, *endptr;
2558   pdf_obj *result;
2559 
2560   length = limit - offset;
2561 
2562   if (length <= 0)
2563     return NULL;
2564 
2565   buffer = NEW(length + 1, char);
2566 
2567   seek_absolute(pf->file, offset);
2568   fread(buffer, sizeof(char), length, pf->file);
2569 
2570   p      = buffer;
2571   endptr = p + length;
2572 
2573   /* Check for obj_num and obj_gen */
2574   {
2575     const char   *q = p; /* <== p */
2576     char         *sp;
2577     unsigned long n, g;
2578 
2579     skip_white(&q, endptr);
2580     sp = parse_unsigned(&q, endptr);
2581     if (!sp) {
2582       RELEASE(buffer);
2583       return NULL;
2584     }
2585     n = strtoul(sp, NULL, 10);
2586     RELEASE(sp);
2587 
2588     skip_white(&q, endptr);
2589     sp = parse_unsigned(&q, endptr);
2590     if (!sp) {
2591       RELEASE(buffer);
2592       return NULL;
2593     }
2594     g = strtoul(sp, NULL, 10);
2595     RELEASE(sp);
2596 
2597     if (obj_num && (n != obj_num || g != obj_gen)) {
2598       RELEASE(buffer);
2599       return NULL;
2600     }
2601 
2602     p = q; /* ==> p */
2603   }
2604 
2605 
2606   skip_white(&p, endptr);
2607   if (memcmp(p, "obj", strlen("obj"))) {
2608     WARN("Didn't find \"obj\".");
2609     RELEASE(buffer);
2610     return NULL;
2611   }
2612   p += strlen("obj");
2613 
2614   result = parse_pdf_object(&p, endptr, pf);
2615 
2616   skip_white(&p, endptr);
2617   if (memcmp(p, "endobj", strlen("endobj"))) {
2618     WARN("Didn't find \"endobj\".");
2619     if (result)
2620       pdf_release_obj(result);
2621     result = NULL;
2622   }
2623   RELEASE(buffer);
2624 
2625   return result;
2626 }
2627 
2628 static pdf_obj *
read_objstm(pdf_file * pf,unsigned long num)2629 read_objstm (pdf_file *pf, unsigned long num)
2630 {
2631   unsigned long offset = pf->xref_table[num].field2;
2632   unsigned short gen = pf->xref_table[num].field3;
2633   long limit = next_object_offset(pf, num), n, first, *header = NULL;
2634   char *data = NULL, *q;
2635   const char *p, *endptr;
2636   int i;
2637 
2638   pdf_obj *objstm, *dict, *type, *n_obj, *first_obj;
2639 
2640   objstm = pdf_read_object(num, gen, pf, offset, limit);
2641 
2642   if (!PDF_OBJ_STREAMTYPE(objstm))
2643     goto error;
2644 
2645   {
2646     pdf_obj *tmp = pdf_stream_uncompress(objstm);
2647     if (!tmp)
2648       goto error;
2649     pdf_release_obj(objstm);
2650     objstm = tmp;
2651   }
2652 
2653   dict = pdf_stream_dict(objstm);
2654 
2655   type = pdf_lookup_dict(dict, "Type");
2656   if (!PDF_OBJ_NAMETYPE(type) ||
2657       strcmp(pdf_name_value(type), "ObjStm"))
2658     goto error;
2659 
2660   n_obj = pdf_lookup_dict(dict, "N");
2661   if (!PDF_OBJ_NUMBERTYPE(n_obj))
2662     goto error;
2663   n = (long) pdf_number_value(n_obj);
2664 
2665   first_obj = pdf_lookup_dict(dict, "First");
2666   if (!PDF_OBJ_NUMBERTYPE(first_obj))
2667     goto error;
2668   first = (long) pdf_number_value(first_obj);
2669   /* reject object streams without object data */
2670   if (first >= pdf_stream_length(objstm))
2671     goto error;
2672 
2673   header = NEW(2*(n+1), long);
2674   set_objstm_data(objstm, header);
2675   *(header++) = n;
2676   *(header++) = first;
2677 
2678   /* avoid parsing beyond offset table */
2679   data = NEW(first + 1, char);
2680   memcpy(data, pdf_stream_dataptr(objstm), first);
2681   data[first] = 0;
2682 
2683   p      = data;
2684   endptr = p + first;
2685   i = 2*n;
2686   while (i--) {
2687     *(header++) = strtoul(p, &q, 10);
2688     if (q == p)
2689       goto error;
2690     p = q;
2691   }
2692 
2693   /* Any garbage after last entry? */
2694   skip_white(&p, endptr);
2695   if (p != endptr)
2696     goto error;
2697   RELEASE(data);
2698 
2699   return pf->xref_table[num].direct = objstm;
2700 
2701  error:
2702   WARN("Cannot parse object stream.");
2703   if (data)
2704     RELEASE(data);
2705   if (objstm)
2706     pdf_release_obj(objstm);
2707   return NULL;
2708 }
2709 
2710 /* Label without corresponding object definition are replaced by the
2711  * null object, as required by the PDF spec. This is important to parse
2712  * several cross-reference sections.
2713  */
2714 static pdf_obj *
pdf_get_object(pdf_file * pf,unsigned long obj_num,unsigned short obj_gen)2715 pdf_get_object (pdf_file *pf, unsigned long obj_num, unsigned short obj_gen)
2716 {
2717   pdf_obj *result;
2718 
2719   if (!checklabel(pf, obj_num, obj_gen)) {
2720     WARN("Trying to read nonexistent or deleted object: %lu %u",
2721          obj_num, obj_gen);
2722     return pdf_new_null();
2723   }
2724 
2725   if ((result = pf->xref_table[obj_num].direct)) {
2726     return pdf_link_obj(result);
2727   }
2728 
2729   if (pf->xref_table[obj_num].type == 1) {
2730     /* type == 1 */
2731     unsigned long offset;
2732     long limit;
2733     offset = pf->xref_table[obj_num].field2;
2734     limit  = next_object_offset(pf, obj_num);
2735     result = pdf_read_object(obj_num, obj_gen, pf, offset, limit);
2736   } else {
2737     /* type == 2 */
2738     unsigned long  objstm_num = pf->xref_table[obj_num].field2;
2739     unsigned short index = pf->xref_table[obj_num].field3;
2740     pdf_obj *objstm;
2741     long *data, n, first, length;
2742     const char *p, *q;
2743 
2744     if (objstm_num >= pf->num_obj ||
2745 	pf->xref_table[objstm_num].type != 1 ||
2746 	!((objstm = pf->xref_table[objstm_num].direct) ||
2747 	  (objstm = read_objstm(pf, objstm_num))))
2748       goto error;
2749 
2750     data = get_objstm_data(objstm);
2751     n = *(data++);
2752     first = *(data++);
2753 
2754     if (index >= n || data[2*index] != obj_num)
2755       goto error;
2756 
2757     length = pdf_stream_length(objstm);
2758     p = (const char *) pdf_stream_dataptr(objstm) + first + data[2*index+1];
2759     q = p + (index == n-1 ? length : first+data[2*index+3]);
2760     result = parse_pdf_object(&p, q, pf);
2761     if (!result)
2762       goto error;
2763   }
2764 
2765   /* Make sure the caller doesn't free this object */
2766   pf->xref_table[obj_num].direct = pdf_link_obj(result);
2767 
2768   return result;
2769 
2770  error:
2771   WARN("Could not read object from object stream.");
2772   return pdf_new_null();
2773 }
2774 
2775 #define OBJ_FILE(o) (((pdf_indirect *)((o)->data))->pf)
2776 #define OBJ_OBJ(o)  (((pdf_indirect *)((o)->data))->obj)
2777 #define OBJ_NUM(o)  (((pdf_indirect *)((o)->data))->label)
2778 #define OBJ_GEN(o)  (((pdf_indirect *)((o)->data))->generation)
2779 
2780 static pdf_obj *
pdf_new_ref(pdf_obj * object)2781 pdf_new_ref (pdf_obj *object)
2782 {
2783   pdf_obj *result;
2784 
2785   if (object->label == 0) {
2786     pdf_label_obj(object);
2787   }
2788   result = pdf_new_indirect(NULL, object->label, object->generation);
2789   OBJ_OBJ(result) = object;
2790   return result;
2791 }
2792 
2793 /* pdf_deref_obj always returns a link instead of the original   */
2794 /* It never return the null object, but the NULL pointer instead */
2795 pdf_obj *
pdf_deref_obj(pdf_obj * obj)2796 pdf_deref_obj (pdf_obj *obj)
2797 {
2798   int count = PDF_OBJ_MAX_DEPTH;
2799 
2800   if (obj)
2801     obj = pdf_link_obj(obj);
2802 
2803   while (PDF_OBJ_INDIRECTTYPE(obj) && --count) {
2804     pdf_file *pf = OBJ_FILE(obj);
2805     if (pf) {
2806       unsigned long  obj_num = OBJ_NUM(obj);
2807       unsigned short obj_gen = OBJ_GEN(obj);
2808       pdf_release_obj(obj);
2809       obj = pdf_get_object(pf, obj_num, obj_gen);
2810     } else {
2811       pdf_obj *next_obj = OBJ_OBJ(obj);
2812       if (!next_obj) {
2813         ERROR("Undefined object reference");
2814       }
2815       pdf_release_obj(obj);
2816       obj = pdf_link_obj(next_obj);
2817     }
2818   }
2819 
2820   if (!count)
2821     ERROR("Loop in object hierarchy detected. Broken PDF file?");
2822 
2823   if (PDF_OBJ_NULLTYPE(obj)) {
2824     pdf_release_obj(obj);
2825     return NULL;
2826   } else
2827     return obj;
2828 }
2829 
2830 static void
extend_xref(pdf_file * pf,long new_size)2831 extend_xref (pdf_file *pf, long new_size)
2832 {
2833   unsigned long i;
2834 
2835   pf->xref_table = RENEW(pf->xref_table, new_size, xref_entry);
2836   for (i = pf->num_obj; i < new_size; i++) {
2837     pf->xref_table[i].direct   = NULL;
2838     pf->xref_table[i].indirect = NULL;
2839     pf->xref_table[i].type     = 0;
2840     pf->xref_table[i].field3 = 0;
2841     pf->xref_table[i].field2 = 0L;
2842   }
2843   pf->num_obj = new_size;
2844 }
2845 
2846 static int
parse_xref_table(pdf_file * pf,long xref_pos)2847 parse_xref_table (pdf_file *pf, long xref_pos)
2848 {
2849   FILE         *pdf_input_file = pf->file;
2850   unsigned long first, size;
2851   unsigned long i, offset;
2852   unsigned int  obj_gen;
2853   char          flag;
2854   int           r;
2855 
2856   /*
2857    * This routine reads one xref segment. It may be called multiple times
2858    * on the same file.  xref tables sometimes come in pieces.
2859    */
2860 
2861   seek_absolute(pf->file, xref_pos);
2862 
2863   mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
2864   if (memcmp(work_buffer, "xref", strlen("xref"))) {
2865     /* Might be an xref stream and not an xref table */
2866     return 0;
2867   }
2868   /* Next line in file has first item and size of table */
2869   for (;;) {
2870     unsigned long current_pos;
2871 
2872     current_pos = tell_position(pdf_input_file);
2873     if (mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file) == NULL) {
2874       WARN("Premature end of PDF file while parsing xref table.");
2875       return -1;
2876     }
2877     if (!strncmp(work_buffer, "trailer", strlen ("trailer"))) {
2878       /*
2879        * Backup... This is ugly, but it seems like the safest thing to
2880        * do.  It is possible the trailer dictionary starts on the same
2881        * logical line as the word trailer.  In that case, the mfgets
2882        * call might have started to read the trailer dictionary and
2883        * parse_trailer would fail.
2884        */
2885       seek_absolute(pdf_input_file, current_pos);
2886       break;
2887     }
2888     sscanf(work_buffer, "%lu %lu", &first, &size);
2889     if (pf->num_obj < first + size) {
2890       extend_xref(pf, first + size);
2891     }
2892 
2893     for (i = first; i < first + size; i++) {
2894       fread(work_buffer, sizeof(char), 20, pdf_input_file);
2895       /*
2896        * Don't overwrite positions that have already been set by a
2897        * modified xref table.  We are working our way backwards
2898        * through the reference table, so we only set "position"
2899        * if it hasn't been set yet.
2900        */
2901       work_buffer[19] = 0;
2902       offset = 0UL; obj_gen = 0; flag = 0;
2903       r = sscanf(work_buffer, "%010lu %05u %c", &offset, &obj_gen, &flag);
2904       if ( r != 3 ||
2905           ((flag != 'n' && flag != 'f') ||
2906            (flag == 'n' &&
2907            (offset >= pf->file_size || (offset > 0 && offset < 4))))) {
2908         WARN("Invalid xref table entry [%lu]. PDF file is corrupt...", i);
2909         return -1;
2910       }
2911       if (!pf->xref_table[i].field2) {
2912 	pf->xref_table[i].type   = (flag == 'n');
2913 	pf->xref_table[i].field2 = offset;
2914 	pf->xref_table[i].field3 = obj_gen;
2915       }
2916     }
2917   }
2918 
2919   return  1;
2920 }
2921 
2922 static unsigned long
parse_xrefstm_field(const char ** p,int length,unsigned long def)2923 parse_xrefstm_field (const char **p, int length, unsigned long def)
2924 {
2925   unsigned long val = 0;
2926 
2927   if (!length)
2928     return def;
2929 
2930   while (length--) {
2931     val <<= 8;
2932     val |= (unsigned char) *((*p)++);
2933   }
2934 
2935   return val;
2936 }
2937 
2938 static int
parse_xrefstm_subsec(pdf_file * pf,const char ** p,long * length,int * W,int wsum,long first,long size)2939 parse_xrefstm_subsec (pdf_file *pf,
2940 		      const char **p, long *length,
2941 		      int *W, int wsum,
2942 		      long first, long size) {
2943   xref_entry *e;
2944 
2945   if ((*length -= wsum*size) < 0)
2946     return -1;
2947 
2948   if (pf->num_obj < first+size)
2949     extend_xref(pf, first+size);  /* TODO: change! why? */
2950 
2951   e = pf->xref_table + first;
2952   while (size--) {
2953     unsigned char  type;
2954     unsigned long  field2;
2955     unsigned short field3;
2956 
2957     type = (unsigned char) parse_xrefstm_field(p, W[0], 1);
2958     if (type > 2)
2959       WARN("Unknown cross-reference stream entry type.");
2960 #if 0
2961     /* Not sure */
2962     else if (!W[1] || (type != 1 && !W[2]))
2963       return -1;
2964 #endif
2965 
2966     field2 = (unsigned long)  parse_xrefstm_field(p, W[1], 0);
2967     field3 = (unsigned short) parse_xrefstm_field(p, W[2], 0);
2968 
2969     if (!e->field2) {
2970       e->type   = type;
2971       e->field2 = field2;
2972       e->field3 = field3;
2973       }
2974     e++;
2975   }
2976 
2977   return 0;
2978 }
2979 
2980 static int
parse_xref_stream(pdf_file * pf,long xref_pos,pdf_obj ** trailer)2981 parse_xref_stream (pdf_file *pf, long xref_pos, pdf_obj **trailer)
2982 {
2983   pdf_obj *xrefstm, *size_obj, *W_obj, *index_obj;
2984   unsigned long size;
2985   long length;
2986   int W[3], i, wsum = 0;
2987   const char *p;
2988 
2989   xrefstm = pdf_read_object(0, 0, pf, xref_pos, pf->file_size);
2990   if (!PDF_OBJ_STREAMTYPE(xrefstm))
2991     goto error;
2992 
2993   {
2994     pdf_obj *tmp = pdf_stream_uncompress(xrefstm);
2995     if (!tmp)
2996       goto error;
2997     pdf_release_obj(xrefstm);
2998     xrefstm = tmp;
2999   }
3000 
3001   *trailer = pdf_link_obj(pdf_stream_dict(xrefstm));
3002 
3003   size_obj = pdf_lookup_dict(*trailer, "Size");
3004   if (!PDF_OBJ_NUMBERTYPE(size_obj))
3005     goto error;
3006   size = (unsigned long) pdf_number_value(size_obj);
3007 
3008   length = pdf_stream_length(xrefstm);
3009 
3010   W_obj = pdf_lookup_dict(*trailer, "W");
3011   if (!PDF_OBJ_ARRAYTYPE(W_obj) || pdf_array_length(W_obj) != 3)
3012     goto error;
3013 
3014   for (i = 0; i < 3; i++) {
3015     pdf_obj *tmp = pdf_get_array(W_obj, i);
3016     if (!PDF_OBJ_NUMBERTYPE(tmp))
3017       goto error;
3018     wsum += (W[i] = (int) pdf_number_value(tmp));
3019   }
3020 
3021   p = pdf_stream_dataptr(xrefstm);
3022 
3023   index_obj = pdf_lookup_dict(*trailer, "Index");
3024   if (index_obj) {
3025     unsigned int index_len;
3026     if (!PDF_OBJ_ARRAYTYPE(index_obj) ||
3027 	((index_len = pdf_array_length(index_obj)) % 2 ))
3028       goto error;
3029 
3030     i = 0;
3031     while (i < index_len) {
3032       pdf_obj *first = pdf_get_array(index_obj, i++);
3033       size_obj  = pdf_get_array(index_obj, i++);
3034       if (!PDF_OBJ_NUMBERTYPE(first) ||
3035 	  !PDF_OBJ_NUMBERTYPE(size_obj) ||
3036 	  parse_xrefstm_subsec(pf, &p, &length, W, wsum,
3037 			       (long) pdf_number_value(first),
3038 			       (long) pdf_number_value(size_obj)))
3039 	goto error;
3040     }
3041   } else if (parse_xrefstm_subsec(pf, &p, &length, W, wsum, 0, size))
3042       goto error;
3043 
3044   if (length)
3045     WARN("Garbage in xref stream.");
3046 
3047   pdf_release_obj(xrefstm);
3048 
3049   return 1;
3050 
3051  error:
3052   WARN("Cannot parse cross-reference stream.");
3053   if (xrefstm)
3054     pdf_release_obj(xrefstm);
3055   if (*trailer) {
3056     pdf_release_obj(*trailer);
3057     *trailer = NULL;
3058   }
3059   return 0;
3060 }
3061 
3062 /* TODO: parse Version entry */
3063 static pdf_obj *
read_xref(pdf_file * pf)3064 read_xref (pdf_file *pf)
3065 {
3066   pdf_obj *trailer = NULL, *main_trailer = NULL;
3067   long     xref_pos;
3068 
3069   if (!(xref_pos = find_xref(pf->file)))
3070     goto error;
3071 
3072   while (xref_pos) {
3073     pdf_obj *prev;
3074 
3075     int res = parse_xref_table(pf, xref_pos);
3076     if (res > 0) {
3077       /* cross-reference table */
3078       pdf_obj *xrefstm;
3079 
3080       if (!(trailer = parse_trailer(pf)))
3081 	goto error;
3082 
3083       if (!main_trailer)
3084 	main_trailer = pdf_link_obj(trailer);
3085 
3086       if ((xrefstm = pdf_lookup_dict(trailer, "XRefStm"))) {
3087 	pdf_obj *new_trailer = NULL;
3088 	if (PDF_OBJ_NUMBERTYPE(xrefstm) &&
3089 	    parse_xref_stream(pf, (long) pdf_number_value(xrefstm),
3090 			      &new_trailer))
3091 	  pdf_release_obj(new_trailer);
3092 	else
3093 	  WARN("Skipping hybrid reference section.");
3094 	/* Many PDF 1.5 xref streams use DecodeParms, which we cannot
3095 	   parse. This way we can use at least xref tables in hybrid
3096 	   documents. Or should we better stop parsing the file?
3097 	*/
3098       }
3099 
3100     } else if (!res && parse_xref_stream(pf, xref_pos, &trailer)) {
3101       /* cross-reference stream */
3102       if (!main_trailer)
3103 	main_trailer = pdf_link_obj(trailer);
3104     } else
3105       goto error;
3106 
3107     if ((prev = pdf_lookup_dict(trailer, "Prev"))) {
3108       if (PDF_OBJ_NUMBERTYPE(prev))
3109 	xref_pos = (long) pdf_number_value(prev);
3110       else
3111 	goto error;
3112     } else
3113       xref_pos = 0;
3114 
3115     pdf_release_obj(trailer);
3116   }
3117 
3118 #if 0
3119   if (!pdf_lookup_dict(main_trailer, "Root")) {
3120       WARN("Trailer doesn't have catalog. Is this a correct PDF file?");
3121       goto error;
3122     }
3123 #endif
3124 
3125   return main_trailer;
3126 
3127  error:
3128   WARN("Error while parsing PDF file.");
3129   if (trailer)
3130     pdf_release_obj(trailer);
3131   if (main_trailer)
3132     pdf_release_obj(main_trailer);
3133   return NULL;
3134 }
3135 
3136 static struct ht_table *pdf_files = NULL;
3137 
3138 static pdf_file *
pdf_file_new(FILE * file)3139 pdf_file_new (FILE *file)
3140 {
3141   pdf_file *pf;
3142   ASSERT(file);
3143   pf = NEW(1, pdf_file);
3144   pf->file    = file;
3145   pf->trailer = NULL;
3146   pf->xref_table = NULL;
3147   pf->catalog = NULL;
3148   pf->num_obj = 0;
3149   pf->version = 0;
3150 
3151   seek_end(file);
3152   pf->file_size = tell_position(file);
3153 
3154   return pf;
3155 }
3156 
3157 static void
pdf_file_free(pdf_file * pf)3158 pdf_file_free (pdf_file *pf)
3159 {
3160   unsigned long i;
3161 
3162   if (!pf) {
3163     return;
3164   }
3165 
3166   for (i = 0; i < pf->num_obj; i++) {
3167     if (pf->xref_table[i].direct)
3168       pdf_release_obj(pf->xref_table[i].direct);
3169     if (pf->xref_table[i].indirect)
3170       pdf_release_obj(pf->xref_table[i].indirect);
3171   }
3172 
3173   RELEASE(pf->xref_table);
3174   if (pf->trailer)
3175     pdf_release_obj(pf->trailer);
3176   if (pf->catalog)
3177     pdf_release_obj(pf->catalog);
3178 
3179   RELEASE(pf);
3180 }
3181 
3182 void
pdf_files_init(void)3183 pdf_files_init (void)
3184 {
3185   pdf_files = NEW(1, struct ht_table);
3186   ht_init_table(pdf_files, (void (*)(void *)) pdf_file_free);
3187 }
3188 
3189 int
pdf_file_get_version(pdf_file * pf)3190 pdf_file_get_version (pdf_file *pf)
3191 {
3192   ASSERT(pf);
3193   return pf->version;
3194 }
3195 
3196 pdf_obj *
pdf_file_get_trailer(pdf_file * pf)3197 pdf_file_get_trailer (pdf_file *pf)
3198 {
3199   ASSERT(pf);
3200   return pdf_link_obj(pf->trailer);
3201 }
3202 
3203 pdf_obj *
pdf_file_get_catalog(pdf_file * pf)3204 pdf_file_get_catalog (pdf_file *pf)
3205 {
3206   ASSERT(pf);
3207   return pf->catalog;
3208 }
3209 
3210 pdf_file *
pdf_open(const char * ident,FILE * file)3211 pdf_open (const char *ident, FILE *file)
3212 {
3213   pdf_file *pf = NULL;
3214 
3215   ASSERT(pdf_files);
3216 
3217   if (ident)
3218     pf = (pdf_file *) ht_lookup_table(pdf_files, ident, strlen(ident));
3219 
3220   if (pf) {
3221     pf->file = file;
3222   } else {
3223     pdf_obj *new_version;
3224     int version = check_for_pdf_version(file);
3225 
3226     if (version < 1 || version > pdf_version) {
3227       WARN("pdf_open: Not a PDF 1.[1-%u] file.", pdf_version);
3228       return NULL;
3229     }
3230 
3231     pf = pdf_file_new(file);
3232     pf->version = version;
3233 
3234     if (!(pf->trailer = read_xref(pf)))
3235       goto error;
3236 
3237     if (pdf_lookup_dict(pf->trailer, "Encrypt")) {
3238       WARN("PDF document is encrypted.");
3239       goto error;
3240     }
3241 
3242     pf->catalog = pdf_deref_obj(pdf_lookup_dict(pf->trailer, "Root"));
3243     if (!PDF_OBJ_DICTTYPE(pf->catalog)) {
3244       WARN("Cannot read PDF document catalog. Broken PDF file?");
3245       goto error;
3246     }
3247 
3248     new_version = pdf_deref_obj(pdf_lookup_dict(pf->catalog, "Version"));
3249     if (new_version) {
3250       unsigned int minor;
3251 
3252       if (!PDF_OBJ_NAMETYPE(new_version) ||
3253 	  sscanf(pdf_name_value(new_version), "1.%u", &minor) != 1) {
3254 	pdf_release_obj(new_version);
3255 	WARN("Illegal Version entry in document catalog. Broken PDF file?");
3256 	goto error;
3257       }
3258 
3259       if (pf->version < minor)
3260 	pf->version = minor;
3261 
3262       pdf_release_obj(new_version);
3263     }
3264 
3265     if (ident)
3266       ht_append_table(pdf_files, ident, strlen(ident), pf);
3267   }
3268 
3269   return pf;
3270 
3271  error:
3272   pdf_file_free(pf);
3273   return NULL;
3274 }
3275 
3276 void
pdf_close(pdf_file * pf)3277 pdf_close (pdf_file *pf)
3278 {
3279   if (pf)
3280     pf->file = NULL;
3281 }
3282 
3283 void
pdf_files_close(void)3284 pdf_files_close (void)
3285 {
3286   ASSERT(pdf_files);
3287   ht_clear_table(pdf_files);
3288   RELEASE(pdf_files);
3289 }
3290 
3291 static int
check_for_pdf_version(FILE * file)3292 check_for_pdf_version (FILE *file)
3293 {
3294   unsigned int minor;
3295 
3296   rewind(file);
3297 
3298   return (ungetc(fgetc(file), file) == '%' &&
3299 	  fscanf(file, "%%PDF-1.%u", &minor) == 1) ? minor : -1;
3300 }
3301 
3302 int
check_for_pdf(FILE * file)3303 check_for_pdf (FILE *file)
3304 {
3305   int version = check_for_pdf_version(file);
3306 
3307   if (version < 0)  /* not a PDF file */
3308     return 0;
3309 
3310   if (version <= pdf_version)
3311     return 1;
3312 
3313   WARN("Version of PDF file (1.%d) is newer than version limit specification.",
3314        version);
3315   return 1;
3316 }
3317 
3318 static int CDECL
import_dict(pdf_obj * key,pdf_obj * value,void * pdata)3319 import_dict (pdf_obj *key, pdf_obj *value, void *pdata)
3320 {
3321   pdf_obj *copy;
3322   pdf_obj *tmp;
3323 
3324   copy = (pdf_obj *) pdata;
3325 
3326   tmp  = pdf_import_object(value);
3327   if (!tmp) {
3328     return -1;
3329   }
3330   pdf_add_dict(copy, pdf_link_obj(key), tmp);
3331 
3332   return 0;
3333 }
3334 
3335 static pdf_obj loop_marker = { PDF_OBJ_INVALID, 0, 0, 0, 0, NULL };
3336 
3337 static pdf_obj *
pdf_import_indirect(pdf_obj * object)3338 pdf_import_indirect (pdf_obj *object)
3339 {
3340   pdf_file *pf = OBJ_FILE(object);
3341   unsigned long obj_num = OBJ_NUM(object);
3342   unsigned short obj_gen = OBJ_GEN(object);
3343 
3344   pdf_obj *ref;
3345 
3346   ASSERT(pf);
3347 
3348   if (!checklabel(pf, obj_num, obj_gen)) {
3349     WARN("Can't resolve object: %lu %u", obj_num, obj_gen);
3350     return pdf_new_null();
3351   }
3352 
3353   if ((ref = pf->xref_table[obj_num].indirect)) {
3354     if (ref == &loop_marker)
3355       ERROR("Loop in object hierarchy detected. Broken PDF file?");
3356     return  pdf_link_obj(ref);
3357   } else {
3358     pdf_obj *obj, *tmp;
3359 
3360     obj = pdf_get_object(pf, obj_num, obj_gen);
3361     if (!obj) {
3362       WARN("Could not read object: %lu %u", obj_num, obj_gen);
3363       return NULL;
3364     }
3365 
3366     /* We mark the reference to be able to detect loops */
3367     pf->xref_table[obj_num].indirect = &loop_marker;
3368 
3369     tmp = pdf_import_object(obj);
3370 
3371     pf->xref_table[obj_num].indirect = ref = pdf_ref_obj(tmp);
3372 
3373     pdf_release_obj(tmp);
3374     pdf_release_obj(obj);
3375 
3376     return  pdf_link_obj(ref);
3377   }
3378 }
3379 
3380 /*
3381  * pdf_import_object recursively copies the object and those
3382  * referenced by it and changes the indirect references so that
3383  * they refer to the current output file. New indirect references
3384  * are remembered, which avoids duplicating objects when they
3385  * are imported several times.
3386  */
3387 pdf_obj *
pdf_import_object(pdf_obj * object)3388 pdf_import_object (pdf_obj *object)
3389 {
3390   pdf_obj  *imported;
3391   pdf_obj  *tmp;
3392   int       i;
3393 
3394   switch (pdf_obj_typeof(object)) {
3395 
3396   case PDF_INDIRECT:
3397     if (OBJ_FILE(object)) {
3398       imported = pdf_import_indirect(object);
3399     } else {
3400       imported = pdf_link_obj(object);
3401     }
3402     break;
3403 
3404   case PDF_STREAM:
3405     {
3406       pdf_obj *stream_dict;
3407 
3408       tmp = pdf_import_object(pdf_stream_dict(object));
3409       if (!tmp)
3410 	return NULL;
3411 
3412       imported    = pdf_new_stream(0);
3413       stream_dict = pdf_stream_dict(imported);
3414       pdf_merge_dict(stream_dict, tmp);
3415       pdf_release_obj(tmp);
3416       pdf_add_stream(imported,
3417 		     pdf_stream_dataptr(object),
3418 		     pdf_stream_length(object));
3419     }
3420     break;
3421 
3422   case PDF_DICT:
3423 
3424     imported = pdf_new_dict();
3425     if (pdf_foreach_dict(object, import_dict, imported) < 0) {
3426       pdf_release_obj(imported);
3427       return NULL;
3428     }
3429 
3430     break;
3431 
3432   case PDF_ARRAY:
3433 
3434     imported = pdf_new_array();
3435     for (i = 0; i < pdf_array_length(object); i++) {
3436       tmp = pdf_import_object(pdf_get_array(object, i));
3437       if (!tmp) {
3438 	pdf_release_obj(imported);
3439 	return NULL;
3440       }
3441       pdf_add_array(imported, tmp);
3442     }
3443     break;
3444 
3445   default:
3446     imported = pdf_link_obj(object);
3447   }
3448 
3449   return imported;
3450 }
3451 
3452 
3453 /* returns 0 if indirect references point to the same object */
3454 int
pdf_compare_reference(pdf_obj * ref1,pdf_obj * ref2)3455 pdf_compare_reference (pdf_obj *ref1, pdf_obj *ref2)
3456 {
3457   pdf_indirect *data1, *data2;
3458 
3459   ASSERT(PDF_OBJ_INDIRECTTYPE(ref1) && PDF_OBJ_INDIRECTTYPE(ref2));
3460 
3461   data1 = (pdf_indirect *) ref1->data;
3462   data2 = (pdf_indirect *) ref2->data;
3463 
3464   return data1->pf != data2->pf || data1->label != data2->label
3465     || data1->generation != data2->generation;
3466 }
3467