1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2
3 Copyright (C) 2008-2015 by Jin-Hwan Cho, Matthias Franz, and Shunsaku Hirata,
4 the dvipdfmx project team.
5
6 Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 */
22
23 /*
24 * TODO: Many things...
25 * {begin,end}_{bead,article}, box stack, name tree (not limited to dests)...
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31
32 #include <time.h>
33
34 #include "system.h"
35 #include "mem.h"
36 #include "error.h"
37 #include "mfileio.h"
38
39 #include "numbers.h"
40
41 #include "pdfobj.h"
42 #include "pdfparse.h"
43 #include "pdfnames.h"
44
45 #include "pdfencrypt.h"
46
47 #include "dvipdfmx.h"
48
49 #include "pdfdev.h"
50 #include "pdfdraw.h"
51 #include "pdfcolor.h"
52
53 #include "pdfresource.h"
54 #include "pdffont.h"
55 #include "pdfximage.h"
56
57 #include "pdflimits.h"
58
59 #if HAVE_LIBPNG
60 #include "pngimage.h"
61 #endif
62 #include "jpegimage.h"
63
64 #include "pdfdoc.h"
65
66 #define PDFDOC_PAGES_ALLOC_SIZE 128u
67 #define PDFDOC_ARTICLE_ALLOC_SIZE 16
68 #define PDFDOC_BEAD_ALLOC_SIZE 16
69
70 static int verbose = 0;
71
72 static char manual_thumb_enabled = 0;
73 static char *thumb_basename = NULL;
74
75 void
pdf_doc_enable_manual_thumbnails(void)76 pdf_doc_enable_manual_thumbnails (void)
77 {
78 #if HAVE_LIBPNG
79 manual_thumb_enabled = 1;
80 #else
81 WARN("Manual thumbnail is not supported without the libpng library.");
82 #endif
83 }
84
85 static pdf_obj *
read_thumbnail(const char * thumb_filename)86 read_thumbnail (const char *thumb_filename)
87 {
88 pdf_obj *image_ref;
89 int xobj_id;
90 FILE *fp;
91
92 fp = MFOPEN(thumb_filename, FOPEN_RBIN_MODE);
93 if (!fp) {
94 WARN("Could not open thumbnail file \"%s\"", thumb_filename);
95 return NULL;
96 }
97 if (!check_for_png(fp) && !check_for_jpeg(fp)) {
98 WARN("Thumbnail \"%s\" not a png/jpeg file!", thumb_filename);
99 MFCLOSE(fp);
100 return NULL;
101 }
102 MFCLOSE(fp);
103
104 xobj_id = pdf_ximage_findresource(thumb_filename, 0, NULL);
105 if (xobj_id < 0) {
106 WARN("Could not read thumbnail file \"%s\".", thumb_filename);
107 image_ref = NULL;
108 } else {
109 image_ref = pdf_ximage_get_reference(xobj_id);
110 }
111
112 return image_ref;
113 }
114
115 void
pdf_doc_set_verbose(void)116 pdf_doc_set_verbose (void)
117 {
118 verbose++;
119 pdf_font_set_verbose();
120 pdf_color_set_verbose();
121 pdf_ximage_set_verbose();
122 }
123
124 typedef struct pdf_form
125 {
126 char *ident;
127
128 pdf_tmatrix matrix;
129 pdf_rect cropbox;
130
131 pdf_obj *resources;
132 pdf_obj *contents;
133 } pdf_form;
134
135 struct form_list_node
136 {
137 int q_depth;
138 pdf_form form;
139
140 struct form_list_node *prev;
141 };
142
143 #define USE_MY_MEDIABOX (1 << 0)
144 typedef struct pdf_page
145 {
146 pdf_obj *page_obj;
147 pdf_obj *page_ref;
148
149 int flags;
150
151 double ref_x, ref_y;
152 pdf_rect cropbox;
153
154 pdf_obj *resources;
155
156 /* Contents */
157 pdf_obj *background;
158 pdf_obj *contents;
159
160 /* global bop, background, contents, global eop */
161 pdf_obj *content_refs[4];
162
163 pdf_obj *annots;
164 pdf_obj *beads;
165 } pdf_page;
166
167 typedef struct pdf_olitem
168 {
169 pdf_obj *dict;
170
171 int is_open;
172
173 struct pdf_olitem *first;
174 struct pdf_olitem *parent;
175
176 struct pdf_olitem *next;
177 } pdf_olitem;
178
179 typedef struct pdf_bead
180 {
181 char *id;
182 long page_no;
183 pdf_rect rect;
184 } pdf_bead;
185
186 typedef struct pdf_article
187 {
188 char *id;
189 pdf_obj *info;
190 long num_beads;
191 long max_beads;
192 pdf_bead *beads;
193 } pdf_article;
194
195 struct name_dict
196 {
197 const char *category;
198 struct ht_table *data;
199 };
200
201
202 typedef struct pdf_doc
203 {
204 struct {
205 pdf_obj *dict;
206
207 pdf_obj *viewerpref;
208 pdf_obj *pagelabels;
209 pdf_obj *pages;
210 pdf_obj *names;
211 pdf_obj *threads;
212 } root;
213
214 pdf_obj *info;
215
216 struct {
217 pdf_rect mediabox;
218 pdf_obj *bop, *eop;
219
220 long num_entries; /* This is not actually total number of pages. */
221 long max_entries;
222 pdf_page *entries;
223 } pages;
224
225 struct {
226 pdf_olitem *first;
227 pdf_olitem *current;
228 int current_depth;
229 } outlines;
230
231 struct {
232 long num_entries;
233 long max_entries;
234 pdf_article *entries;
235 } articles;
236
237 struct name_dict *names;
238
239 int check_gotos;
240 struct ht_table gotos;
241
242 struct {
243 int outline_open_depth;
244 double annot_grow;
245 } opt;
246
247 struct form_list_node *pending_forms;
248
249 } pdf_doc;
250 static pdf_doc pdoc;
251
252 static void
pdf_doc_init_catalog(pdf_doc * p)253 pdf_doc_init_catalog (pdf_doc *p)
254 {
255 p->root.viewerpref = NULL;
256 p->root.pagelabels = NULL;
257 p->root.pages = NULL;
258 p->root.names = NULL;
259 p->root.threads = NULL;
260
261 p->root.dict = pdf_new_dict();
262 pdf_set_root(p->root.dict);
263
264 return;
265 }
266
267 static void
pdf_doc_close_catalog(pdf_doc * p)268 pdf_doc_close_catalog (pdf_doc *p)
269 {
270 pdf_obj *tmp;
271
272 if (p->root.viewerpref) {
273 tmp = pdf_lookup_dict(p->root.dict, "ViewerPreferences");
274 if (!tmp) {
275 pdf_add_dict(p->root.dict,
276 pdf_new_name("ViewerPreferences"),
277 pdf_ref_obj (p->root.viewerpref));
278 } else if (PDF_OBJ_DICTTYPE(tmp)) {
279 pdf_merge_dict(p->root.viewerpref, tmp);
280 pdf_add_dict(p->root.dict,
281 pdf_new_name("ViewerPreferences"),
282 pdf_ref_obj (p->root.viewerpref));
283 } else { /* Maybe reference */
284 /* What should I do? */
285 WARN("Could not modify ViewerPreferences.");
286 }
287 pdf_release_obj(p->root.viewerpref);
288 p->root.viewerpref = NULL;
289 }
290
291 if (p->root.pagelabels) {
292 tmp = pdf_lookup_dict(p->root.dict, "PageLabels");
293 if (!tmp) {
294 tmp = pdf_new_dict();
295 pdf_add_dict(tmp, pdf_new_name("Nums"), pdf_link_obj(p->root.pagelabels));
296 pdf_add_dict(p->root.dict,
297 pdf_new_name("PageLabels"), pdf_ref_obj(tmp));
298 pdf_release_obj(tmp);
299 } else { /* Maybe reference */
300 /* What should I do? */
301 WARN("Could not modify PageLabels.");
302 }
303 pdf_release_obj(p->root.pagelabels);
304 p->root.pagelabels = NULL;
305 }
306
307 pdf_add_dict(p->root.dict,
308 pdf_new_name("Type"), pdf_new_name("Catalog"));
309 pdf_release_obj(p->root.dict);
310 p->root.dict = NULL;
311
312 return;
313 }
314
315 /*
316 * Pages are starting at 1.
317 * The page count does not increase until the page is finished.
318 */
319 #define LASTPAGE(p) (&(p->pages.entries[p->pages.num_entries]))
320 #define FIRSTPAGE(p) (&(p->pages.entries[0]))
321 #define PAGECOUNT(p) (p->pages.num_entries)
322 #define MAXPAGES(p) (p->pages.max_entries)
323
324 static void
doc_resize_page_entries(pdf_doc * p,long size)325 doc_resize_page_entries (pdf_doc *p, long size)
326 {
327 if (size > MAXPAGES(p)) {
328 long i;
329
330 p->pages.entries = RENEW(p->pages.entries, size, struct pdf_page);
331 for (i = p->pages.max_entries; i < size; i++) {
332 p->pages.entries[i].page_obj = NULL;
333 p->pages.entries[i].page_ref = NULL;
334 p->pages.entries[i].flags = 0;
335 p->pages.entries[i].resources = NULL;
336 p->pages.entries[i].background = NULL;
337 p->pages.entries[i].contents = NULL;
338 p->pages.entries[i].content_refs[0] = NULL; /* global bop */
339 p->pages.entries[i].content_refs[1] = NULL; /* background */
340 p->pages.entries[i].content_refs[2] = NULL; /* page body */
341 p->pages.entries[i].content_refs[3] = NULL; /* global eop */
342 p->pages.entries[i].annots = NULL;
343 p->pages.entries[i].beads = NULL;
344 }
345 p->pages.max_entries = size;
346 }
347
348 return;
349 }
350
351 static pdf_page *
doc_get_page_entry(pdf_doc * p,unsigned long page_no)352 doc_get_page_entry (pdf_doc *p, unsigned long page_no)
353 {
354 pdf_page *page;
355
356 if (page_no > 65535ul) {
357 ERROR("Page number %ul too large!", page_no);
358 } else if (page_no == 0) {
359 ERROR("Invalid Page number %ul.", page_no);
360 }
361
362 if (page_no > MAXPAGES(p)) {
363 doc_resize_page_entries(p, page_no + PDFDOC_PAGES_ALLOC_SIZE);
364 }
365
366 page = &(p->pages.entries[page_no - 1]);
367
368 return page;
369 }
370
371 static void pdf_doc_init_page_tree (pdf_doc *p, double media_width, double media_height);
372 static void pdf_doc_close_page_tree (pdf_doc *p);
373
374 static void pdf_doc_init_names (pdf_doc *p, int check_gotos);
375 static void pdf_doc_close_names (pdf_doc *p);
376
377 static void pdf_doc_add_goto (pdf_obj *annot_dict);
378
379 static void pdf_doc_init_docinfo (pdf_doc *p);
380 static void pdf_doc_close_docinfo (pdf_doc *p);
381
382 static void pdf_doc_init_articles (pdf_doc *p);
383 static void pdf_doc_close_articles (pdf_doc *p);
384 static void pdf_doc_init_bookmarks (pdf_doc *p, int bm_open_depth);
385 static void pdf_doc_close_bookmarks (pdf_doc *p);
386
387 void
pdf_doc_set_bop_content(const char * content,unsigned length)388 pdf_doc_set_bop_content (const char *content, unsigned length)
389 {
390 pdf_doc *p = &pdoc;
391
392 ASSERT(p);
393
394 if (p->pages.bop) {
395 pdf_release_obj(p->pages.bop);
396 p->pages.bop = NULL;
397 }
398
399 if (length > 0) {
400 p->pages.bop = pdf_new_stream(STREAM_COMPRESS);
401 pdf_add_stream(p->pages.bop, content, length);
402 } else {
403 p->pages.bop = NULL;
404 }
405
406 return;
407 }
408
409 void
pdf_doc_set_eop_content(const char * content,unsigned length)410 pdf_doc_set_eop_content (const char *content, unsigned length)
411 {
412 pdf_doc *p = &pdoc;
413
414 if (p->pages.eop) {
415 pdf_release_obj(p->pages.eop);
416 p->pages.eop = NULL;
417 }
418
419 if (length > 0) {
420 p->pages.eop = pdf_new_stream(STREAM_COMPRESS);
421 pdf_add_stream(p->pages.eop, content, length);
422 } else {
423 p->pages.eop = NULL;
424 }
425
426 return;
427 }
428
429 #ifndef HAVE_TM_GMTOFF
430 #ifndef HAVE_TIMEZONE
431
432 /* auxiliary function to compute timezone offset on
433 systems that do not support the tm_gmtoff in struct tm,
434 or have a timezone variable. Such as i386-solaris. */
435
436 static long
compute_timezone_offset()437 compute_timezone_offset()
438 {
439 const time_t now = time(NULL);
440 struct tm tm;
441 struct tm local;
442 time_t gmtoff;
443
444 localtime_r(&now, &local);
445 gmtime_r(&now, &tm);
446 return (mktime(&local) - mktime(&tm));
447 }
448
449 #endif /* HAVE_TIMEZONE */
450 #endif /* HAVE_TM_GMTOFF */
451
452 /*
453 * Docinfo
454 */
455 static long
asn_date(char * date_string)456 asn_date (char *date_string)
457 {
458 long tz_offset;
459 time_t current_time;
460 struct tm *bd_time;
461
462 time(¤t_time);
463 bd_time = localtime(¤t_time);
464
465 #ifdef HAVE_TM_GMTOFF
466 tz_offset = bd_time->tm_gmtoff;
467 #else
468 # ifdef HAVE_TIMEZONE
469 tz_offset = -timezone;
470 # else
471 tz_offset = compute_timezone_offset();
472 # endif /* HAVE_TIMEZONE */
473 #endif /* HAVE_TM_GMTOFF */
474
475 sprintf(date_string, "D:%04d%02d%02d%02d%02d%02d%c%02ld'%02ld'",
476 bd_time->tm_year + 1900, bd_time->tm_mon + 1, bd_time->tm_mday,
477 bd_time->tm_hour, bd_time->tm_min, bd_time->tm_sec,
478 (tz_offset > 0) ? '+' : '-', labs(tz_offset) / 3600,
479 (labs(tz_offset) / 60) % 60);
480
481 return strlen(date_string);
482 }
483
484 static void
pdf_doc_init_docinfo(pdf_doc * p)485 pdf_doc_init_docinfo (pdf_doc *p)
486 {
487 p->info = pdf_new_dict();
488 pdf_set_info(p->info);
489
490 return;
491 }
492
493 static void
pdf_doc_close_docinfo(pdf_doc * p)494 pdf_doc_close_docinfo (pdf_doc *p)
495 {
496 pdf_obj *docinfo = p->info;
497
498 /*
499 * Excerpt from PDF Reference 4th ed., sec. 10.2.1.
500 *
501 * Any entry whose value is not known should be omitted from the dictionary,
502 * rather than included with an empty string as its value.
503 *
504 * ....
505 *
506 * Note: Although viewer applications can store custom metadata in the document
507 * information dictionary, it is inappropriate to store private content or
508 * structural information there; such information should be stored in the
509 * document catalog instead (see Section 3.6.1, Document Catalog ).
510 */
511 const char *keys[] = {
512 "Title", "Author", "Subject", "Keywords", "Creator", "Producer",
513 "CreationDate", "ModDate", /* Date */
514 NULL
515 };
516 pdf_obj *value;
517 int i;
518
519 for (i = 0; keys[i] != NULL; i++) {
520 value = pdf_lookup_dict(docinfo, keys[i]);
521 if (value) {
522 if (!PDF_OBJ_STRINGTYPE(value)) {
523 WARN("\"%s\" in DocInfo dictionary not string type.", keys[i]);
524 pdf_remove_dict(docinfo, keys[i]);
525 WARN("\"%s\" removed from DocInfo.", keys[i]);
526 } else if (pdf_string_length(value) == 0) {
527 /* The hyperref package often uses emtpy strings. */
528 pdf_remove_dict(docinfo, keys[i]);
529 }
530 }
531 }
532
533 if (!pdf_lookup_dict(docinfo, "Producer")) {
534 char *banner;
535
536 banner = NEW(strlen(my_name)+strlen(VERSION)+4, char);
537 sprintf(banner, "%s (%s)", my_name, VERSION);
538 pdf_add_dict(docinfo,
539 pdf_new_name("Producer"),
540 pdf_new_string(banner, strlen(banner)));
541 RELEASE(banner);
542 }
543
544 if (!pdf_lookup_dict(docinfo, "CreationDate")) {
545 char now[32];
546
547 asn_date(now);
548 pdf_add_dict(docinfo,
549 pdf_new_name ("CreationDate"),
550 pdf_new_string(now, strlen(now)));
551 }
552
553 pdf_release_obj(docinfo);
554 p->info = NULL;
555
556 return;
557 }
558
559 static pdf_obj *
pdf_doc_get_page_resources(pdf_doc * p,const char * category)560 pdf_doc_get_page_resources (pdf_doc *p, const char *category)
561 {
562 pdf_obj *resources;
563 pdf_page *currentpage;
564 pdf_obj *res_dict;
565
566 if (!p || !category) {
567 return NULL;
568 }
569
570 if (p->pending_forms) {
571 if (p->pending_forms->form.resources) {
572 res_dict = p->pending_forms->form.resources;
573 } else {
574 res_dict = p->pending_forms->form.resources = pdf_new_dict();
575 }
576 } else {
577 currentpage = LASTPAGE(p);
578 if (currentpage->resources) {
579 res_dict = currentpage->resources;
580 } else {
581 res_dict = currentpage->resources = pdf_new_dict();
582 }
583 }
584 resources = pdf_lookup_dict(res_dict, category);
585 if (!resources) {
586 resources = pdf_new_dict();
587 pdf_add_dict(res_dict, pdf_new_name(category), resources);
588 }
589
590 return resources;
591 }
592
593 void
pdf_doc_add_page_resource(const char * category,const char * resource_name,pdf_obj * resource_ref)594 pdf_doc_add_page_resource (const char *category,
595 const char *resource_name, pdf_obj *resource_ref)
596 {
597 pdf_doc *p = &pdoc;
598 pdf_obj *resources;
599 pdf_obj *duplicate;
600
601 if (!PDF_OBJ_INDIRECTTYPE(resource_ref)) {
602 WARN("Passed non indirect reference...");
603 resource_ref = pdf_ref_obj(resource_ref); /* leak */
604 }
605 resources = pdf_doc_get_page_resources(p, category);
606 duplicate = pdf_lookup_dict(resources, resource_name);
607 if (duplicate && pdf_compare_reference(duplicate, resource_ref)) {
608 WARN("Conflicting page resource found (page: %ld, category: %s, name: %s).",
609 pdf_doc_current_page_number(), category, resource_name);
610 WARN("Ignoring...");
611 pdf_release_obj(resource_ref);
612 } else {
613 pdf_add_dict(resources, pdf_new_name(resource_name), resource_ref);
614 }
615
616 return;
617 }
618
619 static void
doc_flush_page(pdf_doc * p,pdf_page * page,pdf_obj * parent_ref)620 doc_flush_page (pdf_doc *p, pdf_page *page, pdf_obj *parent_ref)
621 {
622 pdf_obj *contents_array;
623 int count;
624
625 pdf_add_dict(page->page_obj,
626 pdf_new_name("Type"), pdf_new_name("Page"));
627 pdf_add_dict(page->page_obj,
628 pdf_new_name("Parent"), parent_ref);
629
630 /*
631 * Clipping area specified by CropBox is affected by MediaBox which
632 * might be inherit from parent node. If MediaBox of the root node
633 * does not have enough size to cover all page's imaging area, using
634 * CropBox here gives incorrect result.
635 */
636 if (page->flags & USE_MY_MEDIABOX) {
637 pdf_obj *mediabox;
638
639 mediabox = pdf_new_array();
640 pdf_add_array(mediabox,
641 pdf_new_number(ROUND(page->cropbox.llx, 0.01)));
642 pdf_add_array(mediabox,
643 pdf_new_number(ROUND(page->cropbox.lly, 0.01)));
644 pdf_add_array(mediabox,
645 pdf_new_number(ROUND(page->cropbox.urx, 0.01)));
646 pdf_add_array(mediabox,
647 pdf_new_number(ROUND(page->cropbox.ury, 0.01)));
648 pdf_add_dict(page->page_obj, pdf_new_name("MediaBox"), mediabox);
649 }
650
651 count = 0;
652 contents_array = pdf_new_array();
653 if (page->content_refs[0]) { /* global bop */
654 pdf_add_array(contents_array, page->content_refs[0]);
655 count++;
656 } else if (p->pages.bop &&
657 pdf_stream_length(p->pages.bop) > 0) {
658 pdf_add_array(contents_array, pdf_ref_obj(p->pages.bop));
659 count++;
660 }
661 if (page->content_refs[1]) { /* background */
662 pdf_add_array(contents_array, page->content_refs[1]);
663 count++;
664 }
665 if (page->content_refs[2]) { /* page body */
666 pdf_add_array(contents_array, page->content_refs[2]);
667 count++;
668 }
669 if (page->content_refs[3]) { /* global eop */
670 pdf_add_array(contents_array, page->content_refs[3]);
671 count++;
672 } else if (p->pages.eop &&
673 pdf_stream_length(p->pages.eop) > 0) {
674 pdf_add_array(contents_array, pdf_ref_obj(p->pages.eop));
675 count++;
676 }
677
678 if (count == 0) {
679 WARN("Page with empty content found!!!");
680 }
681 page->content_refs[0] = NULL;
682 page->content_refs[1] = NULL;
683 page->content_refs[2] = NULL;
684 page->content_refs[3] = NULL;
685
686 pdf_add_dict(page->page_obj,
687 pdf_new_name("Contents"), contents_array);
688
689
690 if (page->annots) {
691 pdf_add_dict(page->page_obj,
692 pdf_new_name("Annots"), pdf_ref_obj(page->annots));
693 pdf_release_obj(page->annots);
694 }
695 if (page->beads) {
696 pdf_add_dict(page->page_obj,
697 pdf_new_name("B"), pdf_ref_obj(page->beads));
698 pdf_release_obj(page->beads);
699 }
700 pdf_release_obj(page->page_obj);
701 pdf_release_obj(page->page_ref);
702
703 page->page_obj = NULL;
704 page->page_ref = NULL;
705 page->annots = NULL;
706 page->beads = NULL;
707
708 return;
709 }
710
711 /* B-tree? */
712 #define PAGE_CLUSTER 4
713 static pdf_obj *
build_page_tree(pdf_doc * p,pdf_page * firstpage,long num_pages,pdf_obj * parent_ref)714 build_page_tree (pdf_doc *p,
715 pdf_page *firstpage, long num_pages,
716 pdf_obj *parent_ref)
717 {
718 pdf_obj *self, *self_ref, *kids;
719 long i;
720
721 self = pdf_new_dict();
722 /*
723 * This is a slight kludge which allow the subtree dictionary
724 * generated by this routine to be merged with the real
725 * page_tree dictionary, while keeping the indirect object
726 * references right.
727 */
728 self_ref = parent_ref ? pdf_ref_obj(self) : pdf_ref_obj(p->root.pages);
729
730 pdf_add_dict(self, pdf_new_name("Type"), pdf_new_name("Pages"));
731 pdf_add_dict(self, pdf_new_name("Count"), pdf_new_number((double) num_pages));
732
733 if (parent_ref != NULL)
734 pdf_add_dict(self, pdf_new_name("Parent"), parent_ref);
735
736 kids = pdf_new_array();
737 if (num_pages > 0 && num_pages <= PAGE_CLUSTER) {
738 for (i = 0; i < num_pages; i++) {
739 pdf_page *page;
740
741 page = firstpage + i;
742 if (!page->page_ref)
743 page->page_ref = pdf_ref_obj(page->page_obj);
744 pdf_add_array (kids, pdf_link_obj(page->page_ref));
745 doc_flush_page(p, page, pdf_link_obj(self_ref));
746 }
747 } else if (num_pages > 0) {
748 for (i = 0; i < PAGE_CLUSTER; i++) {
749 long start, end;
750
751 start = (i*num_pages)/PAGE_CLUSTER;
752 end = ((i+1)*num_pages)/PAGE_CLUSTER;
753 if (end - start > 1) {
754 pdf_obj *subtree;
755
756 subtree = build_page_tree(p, firstpage + start, end - start,
757 pdf_link_obj(self_ref));
758 pdf_add_array(kids, pdf_ref_obj(subtree));
759 pdf_release_obj(subtree);
760 } else {
761 pdf_page *page;
762
763 page = firstpage + start;
764 if (!page->page_ref)
765 page->page_ref = pdf_ref_obj(page->page_obj);
766 pdf_add_array (kids, pdf_link_obj(page->page_ref));
767 doc_flush_page(p, page, pdf_link_obj(self_ref));
768 }
769 }
770 }
771 pdf_add_dict(self, pdf_new_name("Kids"), kids);
772 pdf_release_obj(self_ref);
773
774 return self;
775 }
776
777 static void
pdf_doc_init_page_tree(pdf_doc * p,double media_width,double media_height)778 pdf_doc_init_page_tree (pdf_doc *p, double media_width, double media_height)
779 {
780 /*
781 * Create empty page tree.
782 * The docroot.pages is kept open until the document is closed.
783 * This allows the user to write to pages if he so choses.
784 */
785 p->root.pages = pdf_new_dict();
786
787 p->pages.num_entries = 0;
788 p->pages.max_entries = 0;
789 p->pages.entries = NULL;
790
791 p->pages.bop = NULL;
792 p->pages.eop = NULL;
793
794 p->pages.mediabox.llx = 0.0;
795 p->pages.mediabox.lly = 0.0;
796 p->pages.mediabox.urx = media_width;
797 p->pages.mediabox.ury = media_height;
798
799 return;
800 }
801
802 static void
pdf_doc_close_page_tree(pdf_doc * p)803 pdf_doc_close_page_tree (pdf_doc *p)
804 {
805 pdf_obj *page_tree_root;
806 pdf_obj *mediabox;
807 long page_no;
808
809 /*
810 * Do consistency check on forward references to pages.
811 */
812 for (page_no = PAGECOUNT(p) + 1; page_no <= MAXPAGES(p); page_no++) {
813 pdf_page *page;
814
815 page = doc_get_page_entry(p, page_no);
816 if (page->page_obj) {
817 WARN("Nonexistent page #%ld refered.", page_no);
818 pdf_release_obj(page->page_ref);
819 page->page_ref = NULL;
820 }
821 if (page->page_obj) {
822 WARN("Entry for a nonexistent page #%ld created.", page_no);
823 pdf_release_obj(page->page_obj);
824 page->page_obj = NULL;
825 }
826 if (page->annots) {
827 WARN("Annotation attached to a nonexistent page #%ld.", page_no);
828 pdf_release_obj(page->annots);
829 page->annots = NULL;
830 }
831 if (page->beads) {
832 WARN("Article beads attached to a nonexistent page #%ld.", page_no);
833 pdf_release_obj(page->beads);
834 page->beads = NULL;
835 }
836 if (page->resources) {
837 pdf_release_obj(page->resources);
838 page->resources = NULL;
839 }
840 }
841
842 /*
843 * Connect page tree to root node.
844 */
845 page_tree_root = build_page_tree(p, FIRSTPAGE(p), PAGECOUNT(p), NULL);
846 pdf_merge_dict (p->root.pages, page_tree_root);
847 pdf_release_obj(page_tree_root);
848
849 /* They must be after build_page_tree() */
850 if (p->pages.bop) {
851 pdf_add_stream (p->pages.bop, "\n", 1);
852 pdf_release_obj(p->pages.bop);
853 p->pages.bop = NULL;
854 }
855 if (p->pages.eop) {
856 pdf_add_stream (p->pages.eop, "\n", 1);
857 pdf_release_obj(p->pages.eop);
858 p->pages.eop = NULL;
859 }
860
861 /* Create media box at root node and let the other pages inherit it. */
862 mediabox = pdf_new_array();
863 pdf_add_array(mediabox, pdf_new_number(ROUND(p->pages.mediabox.llx, 0.01)));
864 pdf_add_array(mediabox, pdf_new_number(ROUND(p->pages.mediabox.lly, 0.01)));
865 pdf_add_array(mediabox, pdf_new_number(ROUND(p->pages.mediabox.urx, 0.01)));
866 pdf_add_array(mediabox, pdf_new_number(ROUND(p->pages.mediabox.ury, 0.01)));
867 pdf_add_dict(p->root.pages, pdf_new_name("MediaBox"), mediabox);
868
869 pdf_add_dict(p->root.dict,
870 pdf_new_name("Pages"),
871 pdf_ref_obj (p->root.pages));
872 pdf_release_obj(p->root.pages);
873 p->root.pages = NULL;
874
875 RELEASE(p->pages.entries);
876 p->pages.entries = NULL;
877 p->pages.num_entries = 0;
878 p->pages.max_entries = 0;
879
880 return;
881 }
882
883 /*
884 * From PDFReference15_v6.pdf (p.119 and p.834)
885 *
886 * MediaBox rectangle (Required; inheritable)
887 *
888 * The media box defines the boundaries of the physical medium on which the
889 * page is to be printed. It may include any extended area surrounding the
890 * finished page for bleed, printing marks, or other such purposes. It may
891 * also include areas close to the edges of the medium that cannot be marked
892 * because of physical limitations of the output device. Content falling
893 * outside this boundary can safely be discarded without affecting the
894 * meaning of the PDF file.
895 *
896 * CropBox rectangle (Optional; inheritable)
897 *
898 * The crop box defines the region to which the contents of the page are to be
899 * clipped (cropped) when displayed or printed. Unlike the other boxes, the
900 * crop box has no defined meaning in terms of physical page geometry or
901 * intended use; it merely imposes clipping on the page contents. However,
902 * in the absence of additional information (such as imposition instructions
903 * specified in a JDF or PJTF job ticket), the crop box will determine how
904 * the page's contents are to be positioned on the output medium. The default
905 * value is the page's media box.
906 *
907 * BleedBox rectangle (Optional; PDF 1.3)
908 *
909 * The bleed box (PDF 1.3) defines the region to which the contents of the
910 * page should be clipped when output in a production environment. This may
911 * include any extra "bleed area" needed to accommodate the physical
912 * limitations of cutting, folding, and trimming equipment. The actual printed
913 * page may include printing marks that fall outside the bleed box.
914 * The default value is the page's crop box.
915 *
916 * TrimBox rectangle (Optional; PDF 1.3)
917 *
918 * The trim box (PDF 1.3) defines the intended dimensions of the finished page
919 * after trimming. It may be smaller than the media box, to allow for
920 * production-related content such as printing instructions, cut marks, or
921 * color bars. The default value is the page's crop box.
922 *
923 * ArtBox rectangle (Optional; PDF 1.3)
924 *
925 * The art box (PDF 1.3) defines the extent of the page's meaningful content
926 * (including potential white space) as intended by the page's creator.
927 * The default value is the page's crop box.
928 *
929 * Rotate integer (Optional; inheritable)
930 *
931 * The number of degrees by which the page should be rotated clockwise when
932 * displayed or printed. The value must be a multiple of 90. Default value: 0.
933 */
934
935 pdf_obj *
pdf_doc_get_page(pdf_file * pf,long page_no,long * count_p,pdf_rect * bbox,pdf_obj ** resources_p)936 pdf_doc_get_page (pdf_file *pf, long page_no, long *count_p,
937 pdf_rect *bbox, pdf_obj **resources_p) {
938 pdf_obj *page_tree = NULL;
939 pdf_obj *resources = NULL, *box = NULL, *rotate = NULL;
940 pdf_obj *catalog;
941
942 catalog = pdf_file_get_catalog(pf);
943
944 page_tree = pdf_deref_obj(pdf_lookup_dict(catalog, "Pages"));
945
946 if (!PDF_OBJ_DICTTYPE(page_tree))
947 goto error;
948
949 {
950 long count;
951 pdf_obj *tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Count"));
952 if (!PDF_OBJ_NUMBERTYPE(tmp)) {
953 if (tmp)
954 pdf_release_obj(tmp);
955 goto error;
956 }
957 count = pdf_number_value(tmp);
958 pdf_release_obj(tmp);
959 if (count_p)
960 *count_p = count;
961 if (page_no <= 0 || page_no > count) {
962 WARN("Page %ld does not exist.", page_no);
963 goto error_silent;
964 }
965 }
966
967 /*
968 * Seek correct page. Get MediaBox, CropBox and Resources.
969 * (Note that these entries can be inherited.)
970 */
971 {
972 pdf_obj *media_box = NULL, *crop_box = NULL, *kids, *tmp;
973 int depth = PDF_OBJ_MAX_DEPTH;
974 long page_idx = page_no-1, kids_length = 1, i = 0;
975
976 while (--depth && i != kids_length) {
977 if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "MediaBox")))) {
978 if (media_box)
979 pdf_release_obj(media_box);
980 media_box = tmp;
981 }
982
983 if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "CropBox")))) {
984 if (crop_box)
985 pdf_release_obj(crop_box);
986 crop_box = tmp;
987 }
988
989 if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Rotate")))) {
990 if (rotate)
991 pdf_release_obj(rotate);
992 rotate = tmp;
993 }
994
995 if ((tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Resources")))) {
996 if (resources)
997 pdf_release_obj(resources);
998 resources = tmp;
999 }
1000
1001 kids = pdf_deref_obj(pdf_lookup_dict(page_tree, "Kids"));
1002 if (!kids)
1003 break;
1004 else if (!PDF_OBJ_ARRAYTYPE(kids)) {
1005 pdf_release_obj(kids);
1006 goto error;
1007 }
1008 kids_length = pdf_array_length(kids);
1009
1010 for (i = 0; i < kids_length; i++) {
1011 long count;
1012
1013 pdf_release_obj(page_tree);
1014 page_tree = pdf_deref_obj(pdf_get_array(kids, i));
1015 if (!PDF_OBJ_DICTTYPE(page_tree))
1016 goto error;
1017
1018 tmp = pdf_deref_obj(pdf_lookup_dict(page_tree, "Count"));
1019 if (PDF_OBJ_NUMBERTYPE(tmp)) {
1020 /* Pages object */
1021 count = pdf_number_value(tmp);
1022 pdf_release_obj(tmp);
1023 } else if (!tmp)
1024 /* Page object */
1025 count = 1;
1026 else {
1027 pdf_release_obj(tmp);
1028 goto error;
1029 }
1030
1031 if (page_idx < count)
1032 break;
1033
1034 page_idx -= count;
1035 }
1036
1037 pdf_release_obj(kids);
1038 }
1039
1040 if (!depth || kids_length == i) {
1041 if (media_box)
1042 pdf_release_obj(media_box);
1043 if (crop_box)
1044 pdf_release_obj(crop_box);
1045 goto error;
1046 }
1047
1048 if (crop_box)
1049 box = crop_box;
1050 else
1051 if (!(box = pdf_deref_obj(pdf_lookup_dict(page_tree, "ArtBox"))) &&
1052 !(box = pdf_deref_obj(pdf_lookup_dict(page_tree, "TrimBox"))) &&
1053 !(box = pdf_deref_obj(pdf_lookup_dict(page_tree, "BleedBox"))) &&
1054 media_box) {
1055 box = media_box;
1056 media_box = NULL;
1057 }
1058 if (media_box)
1059 pdf_release_obj(media_box);
1060 }
1061
1062 if (!PDF_OBJ_ARRAYTYPE(box) || pdf_array_length(box) != 4 ||
1063 !PDF_OBJ_DICTTYPE(resources))
1064 goto error;
1065
1066 if (PDF_OBJ_NUMBERTYPE(rotate)) {
1067 if (pdf_number_value(rotate))
1068 WARN("<< /Rotate %d >> found. (Not supported yet)",
1069 (int) pdf_number_value(rotate));
1070 pdf_release_obj(rotate);
1071 rotate = NULL;
1072 } else if (rotate)
1073 goto error;
1074
1075 {
1076 int i;
1077
1078 for (i = 4; i--; ) {
1079 double x;
1080 pdf_obj *tmp = pdf_deref_obj(pdf_get_array(box, i));
1081 if (!PDF_OBJ_NUMBERTYPE(tmp)) {
1082 pdf_release_obj(tmp);
1083 goto error;
1084 }
1085 x = pdf_number_value(tmp);
1086 switch (i) {
1087 case 0: bbox->llx = x; break;
1088 case 1: bbox->lly = x; break;
1089 case 2: bbox->urx = x; break;
1090 case 3: bbox->ury = x; break;
1091 }
1092 pdf_release_obj(tmp);
1093 }
1094 }
1095
1096 pdf_release_obj(box);
1097
1098 if (resources_p)
1099 *resources_p = resources;
1100 else if (resources)
1101 pdf_release_obj(resources);
1102
1103 return page_tree;
1104
1105 error:
1106 WARN("Cannot parse document. Broken PDF file?");
1107 error_silent:
1108 if (box)
1109 pdf_release_obj(box);
1110 if (rotate)
1111 pdf_release_obj(rotate);
1112 if (resources)
1113 pdf_release_obj(resources);
1114 if (page_tree)
1115 pdf_release_obj(page_tree);
1116
1117 return NULL;
1118 }
1119
1120 #ifndef BOOKMARKS_OPEN_DEFAULT
1121 #define BOOKMARKS_OPEN_DEFAULT 0
1122 #endif
1123
1124 static int clean_bookmarks (pdf_olitem *item);
1125 static int flush_bookmarks (pdf_olitem *item,
1126 pdf_obj *parent_ref,
1127 pdf_obj *parent_dict);
1128
1129 static void
pdf_doc_init_bookmarks(pdf_doc * p,int bm_open_depth)1130 pdf_doc_init_bookmarks (pdf_doc *p, int bm_open_depth)
1131 {
1132 pdf_olitem *item;
1133
1134 #define MAX_OUTLINE_DEPTH 256u
1135 p->opt.outline_open_depth =
1136 ((bm_open_depth >= 0) ?
1137 bm_open_depth : MAX_OUTLINE_DEPTH - bm_open_depth);
1138
1139 p->outlines.current_depth = 1;
1140
1141 item = NEW(1, pdf_olitem);
1142 item->dict = NULL;
1143 item->next = NULL;
1144 item->first = NULL;
1145 item->parent = NULL;
1146 item->is_open = 1;
1147
1148 p->outlines.current = item;
1149 p->outlines.first = item;
1150
1151 return;
1152 }
1153
1154 static int
clean_bookmarks(pdf_olitem * item)1155 clean_bookmarks (pdf_olitem *item)
1156 {
1157 pdf_olitem *next;
1158
1159 while (item) {
1160 next = item->next;
1161 if (item->dict)
1162 pdf_release_obj(item->dict);
1163 if (item->first)
1164 clean_bookmarks(item->first);
1165 RELEASE(item);
1166
1167 item = next;
1168 }
1169
1170 return 0;
1171 }
1172
1173 static int
flush_bookmarks(pdf_olitem * node,pdf_obj * parent_ref,pdf_obj * parent_dict)1174 flush_bookmarks (pdf_olitem *node,
1175 pdf_obj *parent_ref, pdf_obj *parent_dict)
1176 {
1177 int retval;
1178 int count;
1179 pdf_olitem *item;
1180 pdf_obj *this_ref, *prev_ref, *next_ref;
1181
1182 ASSERT(node->dict);
1183
1184 this_ref = pdf_ref_obj(node->dict);
1185 pdf_add_dict(parent_dict,
1186 pdf_new_name("First"), pdf_link_obj(this_ref));
1187
1188 retval = 0;
1189 for (item = node, prev_ref = NULL;
1190 item && item->dict; item = item->next) {
1191 if (item->first && item->first->dict) {
1192 count = flush_bookmarks(item->first, this_ref, item->dict);
1193 if (item->is_open) {
1194 pdf_add_dict(item->dict,
1195 pdf_new_name("Count"),
1196 pdf_new_number(count));
1197 retval += count;
1198 } else {
1199 pdf_add_dict(item->dict,
1200 pdf_new_name("Count"),
1201 pdf_new_number(-count));
1202 }
1203 }
1204 pdf_add_dict(item->dict,
1205 pdf_new_name("Parent"),
1206 pdf_link_obj(parent_ref));
1207 if (prev_ref) {
1208 pdf_add_dict(item->dict,
1209 pdf_new_name("Prev"),
1210 prev_ref);
1211 }
1212 if (item->next && item->next->dict) {
1213 next_ref = pdf_ref_obj(item->next->dict);
1214 pdf_add_dict(item->dict,
1215 pdf_new_name("Next"),
1216 pdf_link_obj(next_ref));
1217 } else {
1218 next_ref = NULL;
1219 }
1220
1221 pdf_release_obj(item->dict);
1222 item->dict = NULL;
1223
1224 prev_ref = this_ref;
1225 this_ref = next_ref;
1226 retval++;
1227 }
1228
1229 pdf_add_dict(parent_dict,
1230 pdf_new_name("Last"),
1231 pdf_link_obj(prev_ref));
1232
1233 pdf_release_obj(prev_ref);
1234 pdf_release_obj(node->dict);
1235 node->dict = NULL;
1236
1237 return retval;
1238 }
1239
1240 int
pdf_doc_bookmarks_up(void)1241 pdf_doc_bookmarks_up (void)
1242 {
1243 pdf_doc *p = &pdoc;
1244 pdf_olitem *parent, *item;
1245
1246 item = p->outlines.current;
1247 if (!item || !item->parent) {
1248 WARN("Can't go up above the bookmark root node!");
1249 return -1;
1250 }
1251 parent = item->parent;
1252 item = parent->next;
1253 if (!parent->next) {
1254 parent->next = item = NEW(1, pdf_olitem);
1255 item->dict = NULL;
1256 item->first = NULL;
1257 item->next = NULL;
1258 item->is_open = 0;
1259 item->parent = parent->parent;
1260 }
1261 p->outlines.current = item;
1262 p->outlines.current_depth--;
1263
1264 return 0;
1265 }
1266
1267 int
pdf_doc_bookmarks_down(void)1268 pdf_doc_bookmarks_down (void)
1269 {
1270 pdf_doc *p = &pdoc;
1271 pdf_olitem *item, *first;
1272
1273 item = p->outlines.current;
1274 if (!item->dict) {
1275 pdf_obj *tcolor, *action;
1276
1277 WARN("Empty bookmark node!");
1278 WARN("You have tried to jump more than 1 level.");
1279
1280 item->dict = pdf_new_dict();
1281
1282 #define TITLE_STRING "<No Title>"
1283 pdf_add_dict(item->dict,
1284 pdf_new_name("Title"),
1285 pdf_new_string(TITLE_STRING, strlen(TITLE_STRING)));
1286
1287 tcolor = pdf_new_array();
1288 pdf_add_array(tcolor, pdf_new_number(1.0));
1289 pdf_add_array(tcolor, pdf_new_number(0.0));
1290 pdf_add_array(tcolor, pdf_new_number(0.0));
1291 pdf_add_dict (item->dict,
1292 pdf_new_name("C"), pdf_link_obj(tcolor));
1293 pdf_release_obj(tcolor);
1294
1295 pdf_add_dict (item->dict,
1296 pdf_new_name("F"), pdf_new_number(1.0));
1297
1298 #define JS_CODE "app.alert(\"The author of this document made this bookmark item empty!\", 3, 0)"
1299 action = pdf_new_dict();
1300 pdf_add_dict(action,
1301 pdf_new_name("S"), pdf_new_name("JavaScript"));
1302 pdf_add_dict(action,
1303 pdf_new_name("JS"), pdf_new_string(JS_CODE, strlen(JS_CODE)));
1304 pdf_add_dict(item->dict,
1305 pdf_new_name("A"), pdf_link_obj(action));
1306 pdf_release_obj(action);
1307 }
1308
1309 item->first = first = NEW(1, pdf_olitem);
1310 first->dict = NULL;
1311 first->is_open = 0;
1312 first->parent = item;
1313 first->next = NULL;
1314 first->first = NULL;
1315
1316 p->outlines.current = first;
1317 p->outlines.current_depth++;
1318
1319 return 0;
1320 }
1321
1322 int
pdf_doc_bookmarks_depth(void)1323 pdf_doc_bookmarks_depth (void)
1324 {
1325 pdf_doc *p = &pdoc;
1326
1327 return p->outlines.current_depth;
1328 }
1329
1330 void
pdf_doc_bookmarks_add(pdf_obj * dict,int is_open)1331 pdf_doc_bookmarks_add (pdf_obj *dict, int is_open)
1332 {
1333 pdf_doc *p = &pdoc;
1334 pdf_olitem *item, *next;
1335
1336 ASSERT(p && dict);
1337
1338 item = p->outlines.current;
1339
1340 if (!item) {
1341 item = NEW(1, pdf_olitem);
1342 item->parent = NULL;
1343 p->outlines.first = item;
1344 } else if (item->dict) { /* go to next item */
1345 item = item->next;
1346 }
1347
1348 #define BMOPEN(b,p) (((b) < 0) ? (((p)->outlines.current_depth > (p)->opt.outline_open_depth) ? 0 : 1) : (b))
1349
1350 #if 0
1351 item->dict = pdf_link_obj(dict);
1352 #endif
1353 item->dict = dict;
1354 item->first = NULL;
1355 item->is_open = BMOPEN(is_open, p);
1356
1357 item->next = next = NEW(1, pdf_olitem);
1358 next->dict = NULL;
1359 next->parent = item->parent;
1360 next->first = NULL;
1361 next->is_open = -1;
1362 next->next = NULL;
1363
1364 p->outlines.current = item;
1365
1366 pdf_doc_add_goto(dict);
1367
1368 return;
1369 }
1370
1371 static void
pdf_doc_close_bookmarks(pdf_doc * p)1372 pdf_doc_close_bookmarks (pdf_doc *p)
1373 {
1374 pdf_obj *catalog = p->root.dict;
1375 pdf_olitem *item;
1376 int count;
1377 pdf_obj *bm_root, *bm_root_ref;
1378
1379 item = p->outlines.first;
1380 if (item->dict) {
1381 bm_root = pdf_new_dict();
1382 bm_root_ref = pdf_ref_obj(bm_root);
1383 count = flush_bookmarks(item, bm_root_ref, bm_root);
1384 pdf_add_dict(bm_root,
1385 pdf_new_name("Count"),
1386 pdf_new_number(count));
1387 pdf_add_dict(catalog,
1388 pdf_new_name("Outlines"),
1389 bm_root_ref);
1390 pdf_release_obj(bm_root);
1391 }
1392 clean_bookmarks(item);
1393
1394 p->outlines.first = NULL;
1395 p->outlines.current = NULL;
1396 p->outlines.current_depth = 0;
1397
1398 return;
1399 }
1400
1401
1402 static const char *name_dict_categories[] = {
1403 "Dests", "AP", "JavaScript", "Pages",
1404 "Templates", "IDS", "URLS", "EmbeddedFiles",
1405 "AlternatePresentations", "Renditions"
1406 };
1407 #define NUM_NAME_CATEGORY (sizeof(name_dict_categories)/sizeof(name_dict_categories[0]))
1408
1409 static void
pdf_doc_init_names(pdf_doc * p,int check_gotos)1410 pdf_doc_init_names (pdf_doc *p, int check_gotos)
1411 {
1412 int i;
1413
1414 p->root.names = NULL;
1415
1416 p->names = NEW(NUM_NAME_CATEGORY + 1, struct name_dict);
1417 for (i = 0; i < NUM_NAME_CATEGORY; i++) {
1418 p->names[i].category = name_dict_categories[i];
1419 p->names[i].data = strcmp(name_dict_categories[i], "Dests") ?
1420 NULL : pdf_new_name_tree();
1421 /*
1422 * We need a non-null entry for PDF destinations in order to find
1423 * broken links even if no destination is defined in the DVI file.
1424 */
1425 }
1426 p->names[NUM_NAME_CATEGORY].category = NULL;
1427 p->names[NUM_NAME_CATEGORY].data = NULL;
1428
1429 p->check_gotos = check_gotos;
1430 ht_init_table(&p->gotos, (void (*) (void *)) pdf_release_obj);
1431
1432 return;
1433 }
1434
1435 int
pdf_doc_add_names(const char * category,const void * key,int keylen,pdf_obj * value)1436 pdf_doc_add_names (const char *category,
1437 const void *key, int keylen, pdf_obj *value)
1438 {
1439 pdf_doc *p = &pdoc;
1440 int i;
1441
1442 for (i = 0; p->names[i].category != NULL; i++) {
1443 if (!strcmp(p->names[i].category, category)) {
1444 break;
1445 }
1446 }
1447 if (p->names[i].category == NULL) {
1448 WARN("Unknown name dictionary category \"%s\".", category);
1449 return -1;
1450 }
1451 if (!p->names[i].data) {
1452 p->names[i].data = pdf_new_name_tree();
1453 }
1454
1455 return pdf_names_add_object(p->names[i].data, key, keylen, value);
1456 }
1457
1458 static void
pdf_doc_add_goto(pdf_obj * annot_dict)1459 pdf_doc_add_goto (pdf_obj *annot_dict)
1460 {
1461 pdf_obj *subtype = NULL, *A = NULL, *S = NULL, *D = NULL, *D_new, *dict;
1462 const char *dest, *key;
1463
1464 if (!pdoc.check_gotos)
1465 return;
1466
1467 /*
1468 * An annotation dictionary coming from an annotation special
1469 * must have a "Subtype". An annotation dictionary coming from
1470 * an outline special has none.
1471 */
1472 subtype = pdf_deref_obj(pdf_lookup_dict(annot_dict, "Subtype"));
1473 if (subtype) {
1474 if (PDF_OBJ_UNDEFINED(subtype))
1475 goto undefined;
1476 else if (!PDF_OBJ_NAMETYPE(subtype))
1477 goto error;
1478 else if (strcmp(pdf_name_value(subtype), "Link"))
1479 goto cleanup;
1480 }
1481
1482 dict = annot_dict;
1483 key = "Dest";
1484 D = pdf_deref_obj(pdf_lookup_dict(annot_dict, key));
1485 if (PDF_OBJ_UNDEFINED(D))
1486 goto undefined;
1487
1488 A = pdf_deref_obj(pdf_lookup_dict(annot_dict, "A"));
1489 if (A) {
1490 if (PDF_OBJ_UNDEFINED(A))
1491 goto undefined;
1492 else if (D || !PDF_OBJ_DICTTYPE(A))
1493 goto error;
1494 else {
1495 S = pdf_deref_obj(pdf_lookup_dict(A, "S"));
1496 if (PDF_OBJ_UNDEFINED(S))
1497 goto undefined;
1498 else if (!PDF_OBJ_NAMETYPE(S))
1499 goto error;
1500 else if (strcmp(pdf_name_value(S), "GoTo"))
1501 goto cleanup;
1502
1503 dict = A;
1504 key = "D";
1505 D = pdf_deref_obj(pdf_lookup_dict(A, key));
1506 }
1507 }
1508
1509 if (PDF_OBJ_STRINGTYPE(D))
1510 dest = (char *) pdf_string_value(D);
1511 #if 0
1512 /* Names as destinations are not supported by dvipdfmx */
1513 else if (PDF_OBJ_NAMETYPE(D))
1514 dest = pdf_name_value(D);
1515 #endif
1516 else if (PDF_OBJ_ARRAYTYPE(D))
1517 goto cleanup;
1518 else if (PDF_OBJ_UNDEFINED(D))
1519 goto undefined;
1520 else
1521 goto error;
1522
1523 D_new = ht_lookup_table(&pdoc.gotos, dest, strlen(dest));
1524 if (!D_new) {
1525 char buf[10];
1526
1527 /* We use hexadecimal notation for our numeric destinations.
1528 * Other bases (e.g., 10+26 or 10+2*26) would be more efficient.
1529 */
1530 sprintf(buf, "%lx", ht_table_size(&pdoc.gotos));
1531 D_new = pdf_new_string(buf, strlen(buf));
1532 ht_append_table(&pdoc.gotos, dest, strlen(dest), D_new);
1533 }
1534
1535 {
1536 pdf_obj *key_obj = pdf_new_name(key);
1537 if (!pdf_add_dict(dict, key_obj, pdf_link_obj(D_new)))
1538 pdf_release_obj(key_obj);
1539 }
1540
1541 cleanup:
1542 if (subtype)
1543 pdf_release_obj(subtype);
1544 if (A)
1545 pdf_release_obj(A);
1546 if (S)
1547 pdf_release_obj(S);
1548 if (D)
1549 pdf_release_obj(D);
1550
1551 return;
1552
1553 error:
1554 WARN("Unknown PDF annotation format. Output file may be broken.");
1555 goto cleanup;
1556
1557 undefined:
1558 WARN("Cannot optimize PDF annotations. Output file may be broken."
1559 " Please restart with option \"-C 0x10\"\n");
1560 goto cleanup;
1561 }
1562
1563 static void
warn_undef_dests(struct ht_table * dests,struct ht_table * gotos)1564 warn_undef_dests (struct ht_table *dests, struct ht_table *gotos)
1565 {
1566 struct ht_iter iter;
1567
1568 if (ht_set_iter(gotos, &iter) < 0)
1569 return;
1570
1571 do {
1572 int keylen;
1573 char *key = ht_iter_getkey(&iter, &keylen);
1574 if (!ht_lookup_table(dests, key, keylen)) {
1575 char *dest = NEW(keylen+1, char);
1576 memcpy(dest, key, keylen);
1577 dest[keylen] = 0;
1578 WARN("PDF destination \"%s\" not defined.", dest);
1579 RELEASE(dest);
1580 }
1581 } while (ht_iter_next(&iter) >= 0);
1582
1583 ht_clear_iter(&iter);
1584 }
1585
1586 static void
pdf_doc_close_names(pdf_doc * p)1587 pdf_doc_close_names (pdf_doc *p)
1588 {
1589 pdf_obj *tmp;
1590 int i;
1591
1592 for (i = 0; p->names[i].category != NULL; i++) {
1593 if (p->names[i].data) {
1594 struct ht_table *data = p->names[i].data;
1595 pdf_obj *name_tree;
1596 long count;
1597
1598 if (!pdoc.check_gotos || strcmp(p->names[i].category, "Dests"))
1599 name_tree = pdf_names_create_tree(data, &count, NULL);
1600 else {
1601 name_tree = pdf_names_create_tree(data, &count, &pdoc.gotos);
1602
1603 if (verbose && count < data->count)
1604 MESG("\nRemoved %ld unused PDF destinations\n", data->count-count);
1605
1606 if (count < pdoc.gotos.count)
1607 warn_undef_dests(data, &pdoc.gotos);
1608 }
1609
1610 if (name_tree) {
1611 if (!p->root.names)
1612 p->root.names = pdf_new_dict();
1613 pdf_add_dict(p->root.names,
1614 pdf_new_name(p->names[i].category),
1615 pdf_ref_obj(name_tree));
1616 pdf_release_obj(name_tree);
1617 }
1618 pdf_delete_name_tree(&p->names[i].data);
1619 }
1620 }
1621
1622 if (p->root.names) {
1623 tmp = pdf_lookup_dict(p->root.dict, "Names");
1624 if (!tmp) {
1625 pdf_add_dict(p->root.dict,
1626 pdf_new_name("Names"),
1627 pdf_ref_obj (p->root.names));
1628 } else if (PDF_OBJ_DICTTYPE(tmp)) {
1629 pdf_merge_dict(p->root.names, tmp);
1630 pdf_add_dict(p->root.dict,
1631 pdf_new_name("Names"),
1632 pdf_ref_obj (p->root.names));
1633 } else { /* Maybe reference */
1634 /* What should I do? */
1635 WARN("Could not modify Names dictionary.");
1636 }
1637 pdf_release_obj(p->root.names);
1638 p->root.names = NULL;
1639 }
1640
1641 RELEASE(p->names);
1642 p->names = NULL;
1643
1644 ht_clear_table(&p->gotos);
1645
1646 return;
1647 }
1648
1649
1650 void
pdf_doc_add_annot(unsigned page_no,const pdf_rect * rect,pdf_obj * annot_dict,int new_annot)1651 pdf_doc_add_annot (unsigned page_no, const pdf_rect *rect,
1652 pdf_obj *annot_dict, int new_annot)
1653 {
1654 pdf_doc *p = &pdoc;
1655 pdf_page *page;
1656 pdf_obj *rect_array;
1657 double annot_grow = p->opt.annot_grow;
1658 double xpos, ypos;
1659 pdf_rect annbox;
1660
1661 page = doc_get_page_entry(p, page_no);
1662 if (!page->annots)
1663 page->annots = pdf_new_array();
1664
1665 {
1666 pdf_rect mediabox;
1667
1668 pdf_doc_get_mediabox(page_no, &mediabox);
1669 pdf_dev_get_coord(&xpos, &ypos);
1670 annbox.llx = rect->llx - xpos; annbox.lly = rect->lly - ypos;
1671 annbox.urx = rect->urx - xpos; annbox.ury = rect->ury - ypos;
1672
1673 if (annbox.llx < mediabox.llx || annbox.urx > mediabox.urx ||
1674 annbox.lly < mediabox.lly || annbox.ury > mediabox.ury) {
1675 WARN("Annotation out of page boundary.");
1676 WARN("Current page's MediaBox: [%g %g %g %g]",
1677 mediabox.llx, mediabox.lly, mediabox.urx, mediabox.ury);
1678 WARN("Annotation: [%g %g %g %g]",
1679 annbox.llx, annbox.lly, annbox.urx, annbox.ury);
1680 WARN("Maybe incorrect paper size specified.");
1681 }
1682 if (annbox.llx > annbox.urx || annbox.lly > annbox.ury) {
1683 WARN("Rectangle with negative width/height: [%g %g %g %g]",
1684 annbox.llx, annbox.lly, annbox.urx, annbox.ury);
1685 }
1686 }
1687
1688 rect_array = pdf_new_array();
1689 pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.llx - annot_grow, 0.001)));
1690 pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.lly - annot_grow, 0.001)));
1691 pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.urx + annot_grow, 0.001)));
1692 pdf_add_array(rect_array, pdf_new_number(ROUND(annbox.ury + annot_grow, 0.001)));
1693 pdf_add_dict (annot_dict, pdf_new_name("Rect"), rect_array);
1694
1695 pdf_add_array(page->annots, pdf_ref_obj(annot_dict));
1696
1697 if (new_annot)
1698 pdf_doc_add_goto(annot_dict);
1699
1700 return;
1701 }
1702
1703
1704 /*
1705 * PDF Article Thread
1706 */
1707 static void
pdf_doc_init_articles(pdf_doc * p)1708 pdf_doc_init_articles (pdf_doc *p)
1709 {
1710 p->root.threads = NULL;
1711
1712 p->articles.num_entries = 0;
1713 p->articles.max_entries = 0;
1714 p->articles.entries = NULL;
1715
1716 return;
1717 }
1718
1719 void
pdf_doc_begin_article(const char * article_id,pdf_obj * article_info)1720 pdf_doc_begin_article (const char *article_id, pdf_obj *article_info)
1721 {
1722 pdf_doc *p = &pdoc;
1723 pdf_article *article;
1724
1725 if (article_id == NULL || strlen(article_id) == 0)
1726 ERROR("Article thread without internal identifier.");
1727
1728 if (p->articles.num_entries >= p->articles.max_entries) {
1729 p->articles.max_entries += PDFDOC_ARTICLE_ALLOC_SIZE;
1730 p->articles.entries = RENEW(p->articles.entries,
1731 p->articles.max_entries, struct pdf_article);
1732 }
1733 article = &(p->articles.entries[p->articles.num_entries]);
1734
1735 article->id = NEW(strlen(article_id)+1, char);
1736 strcpy(article->id, article_id);
1737 article->info = article_info;
1738 article->num_beads = 0;
1739 article->max_beads = 0;
1740 article->beads = NULL;
1741
1742 p->articles.num_entries++;
1743
1744 return;
1745 }
1746
1747 #if 0
1748 void
1749 pdf_doc_end_article (const char *article_id)
1750 {
1751 return; /* no-op */
1752 }
1753 #endif
1754
1755 static pdf_bead *
find_bead(pdf_article * article,const char * bead_id)1756 find_bead (pdf_article *article, const char *bead_id)
1757 {
1758 pdf_bead *bead;
1759 long i;
1760
1761 bead = NULL;
1762 for (i = 0; i < article->num_beads; i++) {
1763 if (!strcmp(article->beads[i].id, bead_id)) {
1764 bead = &(article->beads[i]);
1765 break;
1766 }
1767 }
1768
1769 return bead;
1770 }
1771
1772 void
pdf_doc_add_bead(const char * article_id,const char * bead_id,long page_no,const pdf_rect * rect)1773 pdf_doc_add_bead (const char *article_id,
1774 const char *bead_id, long page_no, const pdf_rect *rect)
1775 {
1776 pdf_doc *p = &pdoc;
1777 pdf_article *article;
1778 pdf_bead *bead;
1779 long i;
1780
1781 if (!article_id) {
1782 ERROR("No article identifier specified.");
1783 }
1784
1785 article = NULL;
1786 for (i = 0; i < p->articles.num_entries; i++) {
1787 if (!strcmp(p->articles.entries[i].id, article_id)) {
1788 article = &(p->articles.entries[i]);
1789 break;
1790 }
1791 }
1792 if (!article) {
1793 ERROR("Specified article thread that doesn't exist.");
1794 return;
1795 }
1796
1797 bead = bead_id ? find_bead(article, bead_id) : NULL;
1798 if (!bead) {
1799 if (article->num_beads >= article->max_beads) {
1800 article->max_beads += PDFDOC_BEAD_ALLOC_SIZE;
1801 article->beads = RENEW(article->beads,
1802 article->max_beads, struct pdf_bead);
1803 for (i = article->num_beads; i < article->max_beads; i++) {
1804 article->beads[i].id = NULL;
1805 article->beads[i].page_no = -1;
1806 }
1807 }
1808 bead = &(article->beads[article->num_beads]);
1809 if (bead_id) {
1810 bead->id = NEW(strlen(bead_id)+1, char);
1811 strcpy(bead->id, bead_id);
1812 } else {
1813 bead->id = NULL;
1814 }
1815 article->num_beads++;
1816 }
1817 bead->rect.llx = rect->llx;
1818 bead->rect.lly = rect->lly;
1819 bead->rect.urx = rect->urx;
1820 bead->rect.ury = rect->ury;
1821 bead->page_no = page_no;
1822
1823 return;
1824 }
1825
1826 static pdf_obj *
make_article(pdf_doc * p,pdf_article * article,const char ** bead_ids,int num_beads,pdf_obj * article_info)1827 make_article (pdf_doc *p,
1828 pdf_article *article,
1829 const char **bead_ids, int num_beads,
1830 pdf_obj *article_info)
1831 {
1832 pdf_obj *art_dict;
1833 pdf_obj *first, *prev, *last;
1834 long i, n;
1835
1836 if (!article)
1837 return NULL;
1838
1839 art_dict = pdf_new_dict();
1840 first = prev = last = NULL;
1841 /*
1842 * The bead_ids represents logical order of beads in an article thread.
1843 * If bead_ids is not given, we create an article thread in the order of
1844 * beads appeared.
1845 */
1846 n = bead_ids ? num_beads : article->num_beads;
1847 for (i = 0; i < n; i++) {
1848 pdf_bead *bead;
1849
1850 bead = bead_ids ? find_bead(article, bead_ids[i]) : &(article->beads[i]);
1851 if (!bead || bead->page_no < 0) {
1852 continue;
1853 }
1854 last = pdf_new_dict();
1855 if (prev == NULL) {
1856 first = last;
1857 pdf_add_dict(first,
1858 pdf_new_name("T"), pdf_ref_obj(art_dict));
1859 } else {
1860 pdf_add_dict(prev,
1861 pdf_new_name("N"), pdf_ref_obj(last));
1862 pdf_add_dict(last,
1863 pdf_new_name("V"), pdf_ref_obj(prev));
1864 /* We must link first to last. */
1865 if (prev != first)
1866 pdf_release_obj(prev);
1867 }
1868
1869 /* Realize bead now. */
1870 {
1871 pdf_page *page;
1872 pdf_obj *rect;
1873
1874 page = doc_get_page_entry(p, bead->page_no);
1875 if (!page->beads) {
1876 page->beads = pdf_new_array();
1877 }
1878 pdf_add_dict(last, pdf_new_name("P"), pdf_link_obj(page->page_ref));
1879 rect = pdf_new_array();
1880 pdf_add_array(rect, pdf_new_number(ROUND(bead->rect.llx, 0.01)));
1881 pdf_add_array(rect, pdf_new_number(ROUND(bead->rect.lly, 0.01)));
1882 pdf_add_array(rect, pdf_new_number(ROUND(bead->rect.urx, 0.01)));
1883 pdf_add_array(rect, pdf_new_number(ROUND(bead->rect.ury, 0.01)));
1884 pdf_add_dict (last, pdf_new_name("R"), rect);
1885 pdf_add_array(page->beads, pdf_ref_obj(last));
1886 }
1887
1888 prev = last;
1889 }
1890
1891 if (first && last) {
1892 pdf_add_dict(last,
1893 pdf_new_name("N"), pdf_ref_obj(first));
1894 pdf_add_dict(first,
1895 pdf_new_name("V"), pdf_ref_obj(last));
1896 if (first != last) {
1897 pdf_release_obj(last);
1898 }
1899 pdf_add_dict(art_dict,
1900 pdf_new_name("F"), pdf_ref_obj(first));
1901 /* If article_info is supplied, we override article->info. */
1902 if (article_info) {
1903 pdf_add_dict(art_dict,
1904 pdf_new_name("I"), article_info);
1905 } else if (article->info) {
1906 pdf_add_dict(art_dict,
1907 pdf_new_name("I"), pdf_ref_obj(article->info));
1908 pdf_release_obj(article->info);
1909 article->info = NULL; /* We do not write as object reference. */
1910 }
1911 pdf_release_obj(first);
1912 } else {
1913 pdf_release_obj(art_dict);
1914 art_dict = NULL;
1915 }
1916
1917 return art_dict;
1918 }
1919
1920 static void
clean_article(pdf_article * article)1921 clean_article (pdf_article *article)
1922 {
1923 if (!article)
1924 return;
1925
1926 if (article->beads) {
1927 long i;
1928
1929 for (i = 0; i < article->num_beads; i++) {
1930 if (article->beads[i].id)
1931 RELEASE(article->beads[i].id);
1932 }
1933 RELEASE(article->beads);
1934 article->beads = NULL;
1935 }
1936
1937 if (article->id)
1938 RELEASE(article->id);
1939 article->id = NULL;
1940 article->num_beads = 0;
1941 article->max_beads = 0;
1942
1943 return;
1944 }
1945
1946 static void
pdf_doc_close_articles(pdf_doc * p)1947 pdf_doc_close_articles (pdf_doc *p)
1948 {
1949 int i;
1950
1951 for (i = 0; i < p->articles.num_entries; i++) {
1952 pdf_article *article;
1953
1954 article = &(p->articles.entries[i]);
1955 if (article->beads) {
1956 pdf_obj *art_dict;
1957
1958 art_dict = make_article(p, article, NULL, 0, NULL);
1959 if (!p->root.threads) {
1960 p->root.threads = pdf_new_array();
1961 }
1962 pdf_add_array(p->root.threads, pdf_ref_obj(art_dict));
1963 pdf_release_obj(art_dict);
1964 }
1965 clean_article(article);
1966 }
1967 RELEASE(p->articles.entries);
1968 p->articles.entries = NULL;
1969 p->articles.num_entries = 0;
1970 p->articles.max_entries = 0;
1971
1972 if (p->root.threads) {
1973 pdf_add_dict(p->root.dict,
1974 pdf_new_name("Threads"),
1975 pdf_ref_obj (p->root.threads));
1976 pdf_release_obj(p->root.threads);
1977 p->root.threads = NULL;
1978 }
1979
1980 return;
1981 }
1982
1983 /* page_no = 0 for root page tree node. */
1984 void
pdf_doc_set_mediabox(unsigned page_no,const pdf_rect * mediabox)1985 pdf_doc_set_mediabox (unsigned page_no, const pdf_rect *mediabox)
1986 {
1987 pdf_doc *p = &pdoc;
1988 pdf_page *page;
1989
1990 if (page_no == 0) {
1991 p->pages.mediabox.llx = mediabox->llx;
1992 p->pages.mediabox.lly = mediabox->lly;
1993 p->pages.mediabox.urx = mediabox->urx;
1994 p->pages.mediabox.ury = mediabox->ury;
1995 } else {
1996 page = doc_get_page_entry(p, page_no);
1997 page->cropbox.llx = mediabox->llx;
1998 page->cropbox.lly = mediabox->lly;
1999 page->cropbox.urx = mediabox->urx;
2000 page->cropbox.ury = mediabox->ury;
2001 page->flags |= USE_MY_MEDIABOX;
2002 }
2003
2004 return;
2005 }
2006
2007 void
pdf_doc_get_mediabox(unsigned page_no,pdf_rect * mediabox)2008 pdf_doc_get_mediabox (unsigned page_no, pdf_rect *mediabox)
2009 {
2010 pdf_doc *p = &pdoc;
2011 pdf_page *page;
2012
2013 if (page_no == 0) {
2014 mediabox->llx = p->pages.mediabox.llx;
2015 mediabox->lly = p->pages.mediabox.lly;
2016 mediabox->urx = p->pages.mediabox.urx;
2017 mediabox->ury = p->pages.mediabox.ury;
2018 } else {
2019 page = doc_get_page_entry(p, page_no);
2020 if (page->flags & USE_MY_MEDIABOX) {
2021 mediabox->llx = page->cropbox.llx;
2022 mediabox->lly = page->cropbox.lly;
2023 mediabox->urx = page->cropbox.urx;
2024 mediabox->ury = page->cropbox.ury;
2025 } else {
2026 mediabox->llx = p->pages.mediabox.llx;
2027 mediabox->lly = p->pages.mediabox.lly;
2028 mediabox->urx = p->pages.mediabox.urx;
2029 mediabox->ury = p->pages.mediabox.ury;
2030 }
2031 }
2032
2033 return;
2034 }
2035
2036 pdf_obj *
pdf_doc_current_page_resources(void)2037 pdf_doc_current_page_resources (void)
2038 {
2039 pdf_obj *resources;
2040 pdf_doc *p = &pdoc;
2041 pdf_page *currentpage;
2042
2043 if (p->pending_forms) {
2044 if (p->pending_forms->form.resources) {
2045 resources = p->pending_forms->form.resources;
2046 } else {
2047 resources = p->pending_forms->form.resources = pdf_new_dict();
2048 }
2049 } else {
2050 currentpage = LASTPAGE(p);
2051 if (currentpage->resources) {
2052 resources = currentpage->resources;
2053 } else {
2054 resources = currentpage->resources = pdf_new_dict();
2055 }
2056 }
2057
2058 return resources;
2059 }
2060
2061 pdf_obj *
pdf_doc_get_dictionary(const char * category)2062 pdf_doc_get_dictionary (const char *category)
2063 {
2064 pdf_doc *p = &pdoc;
2065 pdf_obj *dict = NULL;
2066
2067 ASSERT(category);
2068
2069 if (!strcmp(category, "Names")) {
2070 if (!p->root.names)
2071 p->root.names = pdf_new_dict();
2072 dict = p->root.names;
2073 } else if (!strcmp(category, "Pages")) {
2074 if (!p->root.pages)
2075 p->root.pages = pdf_new_dict();
2076 dict = p->root.pages;
2077 } else if (!strcmp(category, "Catalog")) {
2078 if (!p->root.dict)
2079 p->root.dict = pdf_new_dict();
2080 dict = p->root.dict;
2081 } else if (!strcmp(category, "Info")) {
2082 if (!p->info)
2083 p->info = pdf_new_dict();
2084 dict = p->info;
2085 } else if (!strcmp(category, "@THISPAGE")) {
2086 /* Sorry for this... */
2087 pdf_page *currentpage;
2088
2089 currentpage = LASTPAGE(p);
2090 dict = currentpage->page_obj;
2091 }
2092
2093 if (!dict) {
2094 ERROR("Document dict. \"%s\" not exist. ", category);
2095 }
2096
2097 return dict;
2098 }
2099
2100 long
pdf_doc_current_page_number(void)2101 pdf_doc_current_page_number (void)
2102 {
2103 pdf_doc *p = &pdoc;
2104
2105 return (long) (PAGECOUNT(p) + 1);
2106 }
2107
2108 pdf_obj *
pdf_doc_ref_page(unsigned long page_no)2109 pdf_doc_ref_page (unsigned long page_no)
2110 {
2111 pdf_doc *p = &pdoc;
2112 pdf_page *page;
2113
2114 page = doc_get_page_entry(p, page_no);
2115 if (!page->page_obj) {
2116 page->page_obj = pdf_new_dict();
2117 page->page_ref = pdf_ref_obj(page->page_obj);
2118 }
2119
2120 return pdf_link_obj(page->page_ref);
2121 }
2122
2123 pdf_obj *
pdf_doc_get_reference(const char * category)2124 pdf_doc_get_reference (const char *category)
2125 {
2126 pdf_obj *ref = NULL;
2127 long page_no;
2128
2129 ASSERT(category);
2130
2131 page_no = pdf_doc_current_page_number();
2132 if (!strcmp(category, "@THISPAGE")) {
2133 ref = pdf_doc_ref_page(page_no);
2134 } else if (!strcmp(category, "@PREVPAGE")) {
2135 if (page_no <= 1) {
2136 ERROR("Reference to previous page, but no pages have been completed yet.");
2137 }
2138 ref = pdf_doc_ref_page(page_no - 1);
2139 } else if (!strcmp(category, "@NEXTPAGE")) {
2140 ref = pdf_doc_ref_page(page_no + 1);
2141 }
2142
2143 if (!ref) {
2144 ERROR("Reference to \"%s\" not exist. ", category);
2145 }
2146
2147 return ref;
2148 }
2149
2150 static void
pdf_doc_new_page(pdf_doc * p)2151 pdf_doc_new_page (pdf_doc *p)
2152 {
2153 pdf_page *currentpage;
2154
2155 if (PAGECOUNT(p) >= MAXPAGES(p)) {
2156 doc_resize_page_entries(p, MAXPAGES(p) + PDFDOC_PAGES_ALLOC_SIZE);
2157 }
2158
2159 /*
2160 * This is confusing. pdf_doc_finish_page() have increased page count!
2161 */
2162 currentpage = LASTPAGE(p);
2163 /* Was this page already instantiated by a forward reference to it? */
2164 if (!currentpage->page_ref) {
2165 currentpage->page_obj = pdf_new_dict();
2166 currentpage->page_ref = pdf_ref_obj(currentpage->page_obj);
2167 }
2168
2169 currentpage->background = NULL;
2170 currentpage->contents = pdf_new_stream(STREAM_COMPRESS);
2171 currentpage->resources = pdf_new_dict();
2172
2173 currentpage->annots = NULL;
2174 currentpage->beads = NULL;
2175
2176 return;
2177 }
2178
2179 /* This only closes contents and resources. */
2180 static void
pdf_doc_finish_page(pdf_doc * p)2181 pdf_doc_finish_page (pdf_doc *p)
2182 {
2183 pdf_page *currentpage;
2184
2185 if (p->pending_forms) {
2186 ERROR("A pending form XObject at the end of page.");
2187 }
2188
2189 currentpage = LASTPAGE(p);
2190 if (!currentpage->page_obj)
2191 currentpage->page_obj = pdf_new_dict();
2192
2193 /*
2194 * Make Contents array.
2195 */
2196
2197 /*
2198 * Global BOP content stream.
2199 * pdf_ref_obj() returns reference itself when the object is
2200 * indirect reference, not reference to the indirect reference.
2201 * We keep bop itself but not reference to it since it is
2202 * expected to be small.
2203 */
2204 if (p->pages.bop &&
2205 pdf_stream_length(p->pages.bop) > 0) {
2206 currentpage->content_refs[0] = pdf_ref_obj(p->pages.bop);
2207 } else {
2208 currentpage->content_refs[0] = NULL;
2209 }
2210 /*
2211 * Current page background content stream.
2212 */
2213 if (currentpage->background) {
2214 if (pdf_stream_length(currentpage->background) > 0) {
2215 currentpage->content_refs[1] = pdf_ref_obj(currentpage->background);
2216 pdf_add_stream (currentpage->background, "\n", 1);
2217 }
2218 pdf_release_obj(currentpage->background);
2219 currentpage->background = NULL;
2220 } else {
2221 currentpage->content_refs[1] = NULL;
2222 }
2223
2224 /* Content body of current page */
2225 currentpage->content_refs[2] = pdf_ref_obj(currentpage->contents);
2226 pdf_add_stream (currentpage->contents, "\n", 1);
2227 pdf_release_obj(currentpage->contents);
2228 currentpage->contents = NULL;
2229
2230 /*
2231 * Global EOP content stream.
2232 */
2233 if (p->pages.eop &&
2234 pdf_stream_length(p->pages.eop) > 0) {
2235 currentpage->content_refs[3] = pdf_ref_obj(p->pages.eop);
2236 } else {
2237 currentpage->content_refs[3] = NULL;
2238 }
2239
2240 /*
2241 * Page resources.
2242 */
2243 if (currentpage->resources) {
2244 pdf_obj *procset;
2245 /*
2246 * ProcSet is obsolete in PDF-1.4 but recommended for compatibility.
2247 */
2248
2249 procset = pdf_new_array ();
2250 pdf_add_array(procset, pdf_new_name("PDF"));
2251 pdf_add_array(procset, pdf_new_name("Text"));
2252 pdf_add_array(procset, pdf_new_name("ImageC"));
2253 pdf_add_array(procset, pdf_new_name("ImageB"));
2254 pdf_add_array(procset, pdf_new_name("ImageI"));
2255 pdf_add_dict(currentpage->resources, pdf_new_name("ProcSet"), procset);
2256
2257 pdf_add_dict(currentpage->page_obj,
2258 pdf_new_name("Resources"),
2259 pdf_ref_obj(currentpage->resources));
2260 pdf_release_obj(currentpage->resources);
2261 currentpage->resources = NULL;
2262 }
2263
2264 if (manual_thumb_enabled) {
2265 char *thumb_filename;
2266 pdf_obj *thumb_ref;
2267
2268 thumb_filename = NEW(strlen(thumb_basename)+7, char);
2269 sprintf(thumb_filename, "%s.%ld",
2270 thumb_basename, (p->pages.num_entries % 99999) + 1L);
2271 thumb_ref = read_thumbnail(thumb_filename);
2272 RELEASE(thumb_filename);
2273 if (thumb_ref)
2274 pdf_add_dict(currentpage->page_obj, pdf_new_name("Thumb"), thumb_ref);
2275 }
2276
2277 p->pages.num_entries++;
2278
2279 return;
2280 }
2281
2282 static pdf_color bgcolor = { 1, NULL, { 1.0 } };
2283
2284 void
pdf_doc_set_bgcolor(const pdf_color * color)2285 pdf_doc_set_bgcolor (const pdf_color *color)
2286 {
2287 if (color)
2288 pdf_color_copycolor(&bgcolor, color);
2289 else { /* as clear... */
2290 pdf_color_white(&bgcolor);
2291 }
2292 }
2293
2294 static void
doc_fill_page_background(pdf_doc * p)2295 doc_fill_page_background (pdf_doc *p)
2296 {
2297 pdf_page *currentpage;
2298 pdf_rect r;
2299 int cm;
2300 pdf_obj *saved_content;
2301
2302 cm = pdf_dev_get_param(PDF_DEV_PARAM_COLORMODE);
2303 if (!cm || pdf_color_is_white(&bgcolor)) {
2304 return;
2305 }
2306
2307 pdf_doc_get_mediabox(pdf_doc_current_page_number(), &r);
2308
2309 currentpage = LASTPAGE(p);
2310 ASSERT(currentpage);
2311
2312 if (!currentpage->background)
2313 currentpage->background = pdf_new_stream(STREAM_COMPRESS);
2314
2315 saved_content = currentpage->contents;
2316 currentpage->contents = currentpage->background;
2317
2318 pdf_dev_gsave();
2319 pdf_dev_set_nonstrokingcolor(&bgcolor);
2320 pdf_dev_rectfill(r.llx, r.lly, r.urx - r.llx, r.ury - r.lly);
2321 pdf_dev_grestore();
2322
2323 currentpage->contents = saved_content;
2324
2325 return;
2326 }
2327
2328 void
pdf_doc_begin_page(double scale,double x_origin,double y_origin)2329 pdf_doc_begin_page (double scale, double x_origin, double y_origin)
2330 {
2331 pdf_doc *p = &pdoc;
2332 pdf_tmatrix M;
2333
2334 M.a = scale; M.b = 0.0;
2335 M.c = 0.0 ; M.d = scale;
2336 M.e = x_origin;
2337 M.f = y_origin;
2338
2339 /* pdf_doc_new_page() allocates page content stream. */
2340 pdf_doc_new_page(p);
2341 pdf_dev_bop(&M);
2342
2343 return;
2344 }
2345
2346 void
pdf_doc_end_page(void)2347 pdf_doc_end_page (void)
2348 {
2349 pdf_doc *p = &pdoc;
2350
2351 pdf_dev_eop();
2352 doc_fill_page_background(p);
2353
2354 pdf_doc_finish_page(p);
2355
2356 return;
2357 }
2358
2359 void
pdf_doc_add_page_content(const char * buffer,unsigned length)2360 pdf_doc_add_page_content (const char *buffer, unsigned length)
2361 {
2362 pdf_doc *p = &pdoc;
2363 pdf_page *currentpage;
2364
2365 if (p->pending_forms) {
2366 pdf_add_stream(p->pending_forms->form.contents, buffer, length);
2367 } else {
2368 currentpage = LASTPAGE(p);
2369 pdf_add_stream(currentpage->contents, buffer, length);
2370 }
2371
2372 return;
2373 }
2374
2375 static char *doccreator = NULL; /* Ugh */
2376
2377 void
pdf_open_document(const char * filename,int do_encryption,double media_width,double media_height,double annot_grow_amount,int bookmark_open_depth,int check_gotos)2378 pdf_open_document (const char *filename,
2379 int do_encryption,
2380 double media_width, double media_height,
2381 double annot_grow_amount, int bookmark_open_depth,
2382 int check_gotos)
2383 {
2384 pdf_doc *p = &pdoc;
2385
2386 pdf_out_init(filename, do_encryption);
2387
2388 pdf_doc_init_catalog(p);
2389
2390 p->opt.annot_grow = annot_grow_amount;
2391 p->opt.outline_open_depth = bookmark_open_depth;
2392
2393 pdf_init_resources();
2394 pdf_init_colors();
2395 pdf_init_fonts();
2396 /* Thumbnail want this to be initialized... */
2397 pdf_init_images();
2398
2399 pdf_doc_init_docinfo(p);
2400 if (doccreator) {
2401 pdf_add_dict(p->info,
2402 pdf_new_name("Creator"),
2403 pdf_new_string(doccreator, strlen(doccreator)));
2404 RELEASE(doccreator); doccreator = NULL;
2405 }
2406
2407 pdf_doc_init_bookmarks(p, bookmark_open_depth);
2408 pdf_doc_init_articles (p);
2409 pdf_doc_init_names (p, check_gotos);
2410 pdf_doc_init_page_tree(p, media_width, media_height);
2411
2412 pdf_doc_set_bgcolor(NULL);
2413
2414 if (do_encryption) {
2415 pdf_obj *encrypt = pdf_encrypt_obj();
2416 pdf_set_encrypt(encrypt);
2417 pdf_release_obj(encrypt);
2418 }
2419 pdf_set_id(pdf_enc_id_array());
2420
2421 /* Create a default name for thumbnail image files */
2422 if (manual_thumb_enabled) {
2423 if (strlen(filename) > 4 &&
2424 !strncmp(".pdf", filename + strlen(filename) - 4, 4)) {
2425 thumb_basename = NEW(strlen(filename)-4+1, char);
2426 strncpy(thumb_basename, filename, strlen(filename)-4);
2427 thumb_basename[strlen(filename)-4] = 0;
2428 } else {
2429 thumb_basename = NEW(strlen(filename)+1, char);
2430 strcpy(thumb_basename, filename);
2431 }
2432 }
2433
2434 p->pending_forms = NULL;
2435
2436 return;
2437 }
2438
2439 void
pdf_doc_set_creator(const char * creator)2440 pdf_doc_set_creator (const char *creator)
2441 {
2442 if (!creator ||
2443 creator[0] == '\0')
2444 return;
2445
2446 doccreator = NEW(strlen(creator)+1, char);
2447 strcpy(doccreator, creator); /* Ugh */
2448 }
2449
2450
2451 void
pdf_close_document(void)2452 pdf_close_document (void)
2453 {
2454 pdf_doc *p = &pdoc;
2455
2456 /*
2457 * Following things were kept around so user can add dictionary items.
2458 */
2459 pdf_doc_close_articles (p);
2460 pdf_doc_close_names (p);
2461 pdf_doc_close_bookmarks(p);
2462 pdf_doc_close_page_tree(p);
2463 pdf_doc_close_docinfo (p);
2464
2465 pdf_doc_close_catalog (p);
2466
2467 pdf_close_images();
2468 pdf_close_fonts ();
2469 pdf_close_colors();
2470
2471 pdf_close_resources(); /* Should be at last. */
2472
2473 pdf_out_flush();
2474
2475 if (thumb_basename)
2476 RELEASE(thumb_basename);
2477
2478 return;
2479 }
2480
2481 /*
2482 * All this routine does is give the form a name and add a unity scaling matrix.
2483 * It fills in required fields. The caller must initialize the stream.
2484 */
2485 static void
pdf_doc_make_xform(pdf_obj * xform,pdf_rect * bbox,pdf_tmatrix * matrix,pdf_obj * resources,pdf_obj * attrib)2486 pdf_doc_make_xform (pdf_obj *xform,
2487 pdf_rect *bbox,
2488 pdf_tmatrix *matrix,
2489 pdf_obj *resources,
2490 pdf_obj *attrib)
2491 {
2492 pdf_obj *xform_dict;
2493 pdf_obj *tmp;
2494
2495 xform_dict = pdf_stream_dict(xform);
2496 pdf_add_dict(xform_dict,
2497 pdf_new_name("Type"), pdf_new_name("XObject"));
2498 pdf_add_dict(xform_dict,
2499 pdf_new_name("Subtype"), pdf_new_name("Form"));
2500 pdf_add_dict(xform_dict,
2501 pdf_new_name("FormType"), pdf_new_number(1.0));
2502
2503 if (!bbox)
2504 ERROR("No BoundingBox supplied.");
2505
2506 tmp = pdf_new_array();
2507 pdf_add_array(tmp, pdf_new_number(ROUND(bbox->llx, .001)));
2508 pdf_add_array(tmp, pdf_new_number(ROUND(bbox->lly, .001)));
2509 pdf_add_array(tmp, pdf_new_number(ROUND(bbox->urx, .001)));
2510 pdf_add_array(tmp, pdf_new_number(ROUND(bbox->ury, .001)));
2511 pdf_add_dict(xform_dict, pdf_new_name("BBox"), tmp);
2512
2513 if (matrix) {
2514 tmp = pdf_new_array();
2515 pdf_add_array(tmp, pdf_new_number(ROUND(matrix->a, .00001)));
2516 pdf_add_array(tmp, pdf_new_number(ROUND(matrix->b, .00001)));
2517 pdf_add_array(tmp, pdf_new_number(ROUND(matrix->c, .00001)));
2518 pdf_add_array(tmp, pdf_new_number(ROUND(matrix->d, .00001)));
2519 pdf_add_array(tmp, pdf_new_number(ROUND(matrix->e, .001 )));
2520 pdf_add_array(tmp, pdf_new_number(ROUND(matrix->f, .001 )));
2521 pdf_add_dict(xform_dict, pdf_new_name("Matrix"), tmp);
2522 }
2523
2524 if (attrib) {
2525 pdf_merge_dict(xform_dict, attrib);
2526 }
2527
2528 pdf_add_dict(xform_dict, pdf_new_name("Resources"), resources);
2529
2530 return;
2531 }
2532
2533 /*
2534 * begin_form_xobj creates an xobject with its "origin" at
2535 * xpos and ypos that is clipped to the specified bbox. Note
2536 * that the origin is not the lower left corner of the bbox.
2537 */
2538 int
pdf_doc_begin_grabbing(const char * ident,double ref_x,double ref_y,const pdf_rect * cropbox)2539 pdf_doc_begin_grabbing (const char *ident,
2540 double ref_x, double ref_y, const pdf_rect *cropbox)
2541 {
2542 int xobj_id = -1;
2543 pdf_doc *p = &pdoc;
2544 pdf_form *form;
2545 struct form_list_node *fnode;
2546 xform_info info;
2547
2548 pdf_dev_push_gstate();
2549
2550 fnode = NEW(1, struct form_list_node);
2551
2552 fnode->prev = p->pending_forms;
2553 fnode->q_depth = pdf_dev_current_depth();
2554 form = &fnode->form;
2555
2556 /*
2557 * The reference point of an Xobject is at the lower left corner
2558 * of the bounding box. Since we would like to have an arbitrary
2559 * reference point, we use a transformation matrix, translating
2560 * the reference point to (0,0).
2561 */
2562
2563 form->matrix.a = 1.0; form->matrix.b = 0.0;
2564 form->matrix.c = 0.0; form->matrix.d = 1.0;
2565 form->matrix.e = -ref_x;
2566 form->matrix.f = -ref_y;
2567
2568 form->cropbox.llx = ref_x + cropbox->llx;
2569 form->cropbox.lly = ref_y + cropbox->lly;
2570 form->cropbox.urx = ref_x + cropbox->urx;
2571 form->cropbox.ury = ref_y + cropbox->ury;
2572
2573 form->contents = pdf_new_stream(STREAM_COMPRESS);
2574 form->resources = pdf_new_dict();
2575
2576 pdf_ximage_init_form_info(&info);
2577
2578 info.matrix.a = 1.0; info.matrix.b = 0.0;
2579 info.matrix.c = 0.0; info.matrix.d = 1.0;
2580 info.matrix.e = -ref_x;
2581 info.matrix.f = -ref_y;
2582
2583 info.bbox.llx = cropbox->llx;
2584 info.bbox.lly = cropbox->lly;
2585 info.bbox.urx = cropbox->urx;
2586 info.bbox.ury = cropbox->ury;
2587
2588 /* Use reference since content itself isn't available yet. */
2589 xobj_id = pdf_ximage_defineresource(ident,
2590 PDF_XOBJECT_TYPE_FORM,
2591 &info, pdf_ref_obj(form->contents));
2592
2593 p->pending_forms = fnode;
2594
2595 /*
2596 * Make sure the object is self-contained by adding the
2597 * current font and color to the object stream.
2598 */
2599 pdf_dev_reset_fonts(1);
2600 pdf_dev_reset_color(1); /* force color operators to be added to stream */
2601
2602 return xobj_id;
2603 }
2604
2605 void
pdf_doc_end_grabbing(pdf_obj * attrib)2606 pdf_doc_end_grabbing (pdf_obj *attrib)
2607 {
2608 pdf_form *form;
2609 pdf_obj *procset;
2610 pdf_doc *p = &pdoc;
2611 struct form_list_node *fnode;
2612
2613 if (!p->pending_forms) {
2614 WARN("Tried to close a nonexistent form XOject.");
2615 return;
2616 }
2617
2618 fnode = p->pending_forms;
2619 form = &fnode->form;
2620
2621 pdf_dev_grestore_to(fnode->q_depth);
2622
2623 /*
2624 * ProcSet is obsolete in PDF-1.4 but recommended for compatibility.
2625 */
2626 procset = pdf_new_array();
2627 pdf_add_array(procset, pdf_new_name("PDF"));
2628 pdf_add_array(procset, pdf_new_name("Text"));
2629 pdf_add_array(procset, pdf_new_name("ImageC"));
2630 pdf_add_array(procset, pdf_new_name("ImageB"));
2631 pdf_add_array(procset, pdf_new_name("ImageI"));
2632 pdf_add_dict (form->resources, pdf_new_name("ProcSet"), procset);
2633
2634 pdf_doc_make_xform(form->contents,
2635 &form->cropbox, &form->matrix,
2636 pdf_ref_obj(form->resources), attrib);
2637 pdf_release_obj(form->resources);
2638 pdf_release_obj(form->contents);
2639 if (attrib) pdf_release_obj(attrib);
2640
2641 p->pending_forms = fnode->prev;
2642
2643 pdf_dev_pop_gstate();
2644
2645 pdf_dev_reset_fonts(1);
2646 pdf_dev_reset_color(0);
2647
2648 RELEASE(fnode);
2649
2650 return;
2651 }
2652
2653 static struct
2654 {
2655 int dirty;
2656 int broken;
2657 pdf_obj *annot_dict;
2658 pdf_rect rect;
2659 } breaking_state = {0, 0, NULL, {0.0, 0.0, 0.0, 0.0}};
2660
2661 static void
reset_box(void)2662 reset_box (void)
2663 {
2664 breaking_state.rect.llx = breaking_state.rect.lly = HUGE_VAL;
2665 breaking_state.rect.urx = breaking_state.rect.ury = -HUGE_VAL;
2666 breaking_state.dirty = 0;
2667 }
2668
2669 void
pdf_doc_begin_annot(pdf_obj * dict)2670 pdf_doc_begin_annot (pdf_obj *dict)
2671 {
2672 breaking_state.annot_dict = dict;
2673 breaking_state.broken = 0;
2674 reset_box();
2675 }
2676
2677 void
pdf_doc_end_annot(void)2678 pdf_doc_end_annot (void)
2679 {
2680 pdf_doc_break_annot();
2681 breaking_state.annot_dict = NULL;
2682 }
2683
2684 void
pdf_doc_break_annot(void)2685 pdf_doc_break_annot (void)
2686 {
2687 if (breaking_state.dirty) {
2688 pdf_obj *annot_dict;
2689
2690 /* Copy dict */
2691 annot_dict = pdf_new_dict();
2692 pdf_merge_dict(annot_dict, breaking_state.annot_dict);
2693 pdf_doc_add_annot(pdf_doc_current_page_number(), &(breaking_state.rect),
2694 annot_dict, !breaking_state.broken);
2695 pdf_release_obj(annot_dict);
2696
2697 breaking_state.broken = 1;
2698 }
2699 reset_box();
2700 }
2701
2702 void
pdf_doc_expand_box(const pdf_rect * rect)2703 pdf_doc_expand_box (const pdf_rect *rect)
2704 {
2705 breaking_state.rect.llx = MIN(breaking_state.rect.llx, rect->llx);
2706 breaking_state.rect.lly = MIN(breaking_state.rect.lly, rect->lly);
2707 breaking_state.rect.urx = MAX(breaking_state.rect.urx, rect->urx);
2708 breaking_state.rect.ury = MAX(breaking_state.rect.ury, rect->ury);
2709 breaking_state.dirty = 1;
2710 }
2711
2712 #if 0
2713 /* This should be number tree */
2714 void
2715 pdf_doc_set_pagelabel (long pg_start,
2716 const char *type,
2717 const void *prefix, int prfx_len, long start)
2718 {
2719 pdf_doc *p = &pdoc;
2720 pdf_obj *label_dict;
2721
2722 if (!p->root.pagelabels)
2723 p->root.pagelabels = pdf_new_array();
2724
2725 label_dict = pdf_new_dict();
2726 if (!type || type[0] == '\0') /* Set back to default. */
2727 pdf_add_dict(label_dict, pdf_new_name("S"), pdf_new_name("D"));
2728 else {
2729 if (type)
2730 pdf_add_dict(label_dict, pdf_new_name("S"), pdf_new_name(type));
2731 if (prefix && prfx_len > 0)
2732 pdf_add_dict(label_dict,
2733 pdf_new_name("P"),
2734 pdf_new_string(prefix, prfx_len));
2735 if (start != 1)
2736 pdf_add_dict(label_dict,
2737 pdf_new_name("St"), pdf_new_number(start));
2738 }
2739
2740 pdf_add_array(p->root.pagelabels, pdf_new_number(pg_start));
2741 pdf_add_array(p->root.pagelabels, label_dict);
2742
2743 return;
2744 }
2745 #endif
2746