1 /*
2 Copyright 1996-2014 Han The Thanh, <thanh@pdftex.org>
3
4 This file is part of pdfTeX.
5
6 pdfTeX is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 pdfTeX is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /* Do this early in order to avoid a conflict between
21 MINGW32 <rpcndr.h> defining 'boolean' as 'unsigned char' and
22 <kpathsea/types.h> defining Pascal's boolean as 'int'.
23 */
24 #include <w2c/config.h>
25 #include <kpathsea/lib.h>
26
27 #include <stdlib.h>
28 #include <math.h>
29 #include <stddef.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33
34 #ifdef POPPLER_VERSION
35 #include <dirent.h>
36 #include <poppler-config.h>
37 #include <goo/GooString.h>
38 #include <goo/gmem.h>
39 #include <goo/gfile.h>
40 #define GString GooString
41 #else
42 #include <aconf.h>
43 #include <GString.h>
44 #include <gmem.h>
45 #include <gfile.h>
46 #endif
47 #include <assert.h>
48
49 #include "Object.h"
50 #include "Stream.h"
51 #include "Array.h"
52 #include "Dict.h"
53 #include "XRef.h"
54 #include "Catalog.h"
55 #include "Link.h"
56 #include "Page.h"
57 #include "GfxFont.h"
58 #include "PDFDoc.h"
59 #include "GlobalParams.h"
60 #include "Error.h"
61
62 // This file is mostly C and not very much C++; it's just used to interface
63 // the functions of xpdf, which are written in C++.
64
65 extern "C" {
66 #include <pdftexdir/ptexmac.h>
67 #include <pdftexdir/pdftex-common.h>
68
69 // This function from pdftex.web gets declared in pdftexcoerce.h in the
70 // usual web2c way, but we cannot include that file here because C++
71 // does not allow it.
72 extern int getpdfsuppresswarningpagegroup(void);
73 }
74
75 // The prefix "PTEX" for the PDF keys is special to pdfTeX;
76 // this has been registered with Adobe by Hans Hagen.
77
78 #define pdfkeyprefix "PTEX"
79
80 // PdfObject encapsulates the xpdf Object type,
81 // and properly frees its resources on destruction.
82 // Use obj-> to access members of the Object,
83 // and &obj to get a pointer to the object.
84 // It is no longer necessary to call Object::free explicitely.
85
86 class PdfObject {
87 public:
PdfObject()88 PdfObject() { // nothing
89 } ~PdfObject() {
90 iObject.free();
91 }
operator ->()92 Object *operator->() {
93 return &iObject;
94 }
operator &()95 Object *operator&() {
96 return &iObject;
97 }
98 private: // no copying or assigning
99 PdfObject(const PdfObject &);
100 void operator=(const PdfObject &);
101 public:
102 Object iObject;
103 };
104
105 // When copying the Resources of the selected page, all objects are copied
106 // recusively top-down. Indirect objects however are not fetched during
107 // copying, but get a new object number from pdfTeX and then will be
108 // appended into a linked list. Duplicates are checked and removed from the
109 // list of indirect objects during appending.
110
111 enum InObjType {
112 objFont,
113 objFontDesc,
114 objOther
115 };
116
117 struct InObj {
118 Ref ref; // ref in original PDF
119 InObjType type; // object type
120 InObj *next; // next entry in list of indirect objects
121 int num; // new object number in output PDF
122 fd_entry *fd; // pointer to /FontDescriptor object structure
123 int enc_objnum; // Encoding for objFont
124 int written; // has it been written to output PDF?
125 };
126
127 struct UsedEncoding {
128 int enc_objnum;
129 GfxFont *font;
130 UsedEncoding *next;
131 };
132
133 static InObj *inObjList;
134 static UsedEncoding *encodingList;
135 static GBool isInit = gFalse;
136
137 // --------------------------------------------------------------------
138 // Maintain list of open embedded PDF files
139 // --------------------------------------------------------------------
140
141 struct PdfDocument {
142 char *file_name;
143 PDFDoc *doc;
144 XRef *xref;
145 InObj *inObjList;
146 int occurences; // number of references to the document; the doc can be
147 // deleted when this is negative
148 PdfDocument *next;
149 };
150
151 static PdfDocument *pdfDocuments = 0;
152
153 static XRef *xref = 0;
154
155 // Returns pointer to PdfDocument record for PDF file.
156 // Creates a new record if it doesn't exist yet.
157 // xref is made current for the document.
158
find_add_document(char * file_name)159 static PdfDocument *find_add_document(char *file_name)
160 {
161 PdfDocument *p = pdfDocuments;
162 while (p && strcmp(p->file_name, file_name) != 0)
163 p = p->next;
164 if (p) {
165 xref = p->xref;
166 (p->occurences)++;
167 return p;
168 }
169 p = new PdfDocument;
170 p->file_name = xstrdup(file_name);
171 p->xref = xref = 0;
172 p->occurences = 0;
173 GString *docName = new GString(p->file_name);
174 p->doc = new PDFDoc(docName); // takes ownership of docName
175 if (!p->doc->isOk() || !p->doc->okToPrint()) {
176 pdftex_fail("xpdf: reading PDF image failed");
177 }
178 p->inObjList = 0;
179 p->next = pdfDocuments;
180 pdfDocuments = p;
181 return p;
182 }
183
184 // Deallocate a PdfDocument with all its resources
185
delete_document(PdfDocument * pdf_doc)186 static void delete_document(PdfDocument * pdf_doc)
187 {
188 PdfDocument **p = &pdfDocuments;
189 while (*p && *p != pdf_doc)
190 p = &((*p)->next);
191 // should not happen:
192 if (!*p)
193 return;
194 // unlink from list
195 *p = pdf_doc->next;
196 // free pdf_doc's resources
197 InObj *r, *n;
198 for (r = pdf_doc->inObjList; r != 0; r = n) {
199 n = r->next;
200 delete r;
201 }
202 xref = pdf_doc->xref;
203 delete pdf_doc->doc;
204 xfree(pdf_doc->file_name);
205 delete pdf_doc;
206 }
207
208 // Replacement for
209 // Object *initDict(Dict *dict1){ initObj(objDict); dict = dict1; return this; }
210
initDictFromDict(PdfObject & obj,Dict * dict)211 static void initDictFromDict(PdfObject & obj, Dict * dict)
212 {
213 obj->initDict(xref);
214 for (int i = 0, l = dict->getLength(); i < l; i++) {
215 Object obj1;
216 obj->dictAdd(copyString(dict->getKey(i)), dict->getValNF(i, &obj1));
217 }
218 }
219
220 // --------------------------------------------------------------------
221
addEncoding(GfxFont * gfont)222 static int addEncoding(GfxFont * gfont)
223 {
224 UsedEncoding *n;
225 n = new UsedEncoding;
226 n->next = encodingList;
227 encodingList = n;
228 n->font = gfont;
229 n->enc_objnum = pdfnewobjnum();
230 return n->enc_objnum;
231 }
232
233 #define addFont(ref, fd, enc_objnum) \
234 addInObj(objFont, ref, fd, enc_objnum)
235
236 // addFontDesc is only used to avoid writing the original FontDescriptor
237 // from the PDF file.
238
239 #define addFontDesc(ref, fd) \
240 addInObj(objFontDesc, ref, fd, 0)
241
242 #define addOther(ref) \
243 addInObj(objOther, ref, 0, 0)
244
addInObj(InObjType type,Ref ref,fd_entry * fd,int e)245 static int addInObj(InObjType type, Ref ref, fd_entry * fd, int e)
246 {
247 InObj *p, *q, *n = new InObj;
248 if (ref.num == 0)
249 pdftex_fail("PDF inclusion: invalid reference");
250 n->ref = ref;
251 n->type = type;
252 n->next = 0;
253 n->fd = fd;
254 n->enc_objnum = e;
255 n->written = 0;
256 if (inObjList == 0)
257 inObjList = n;
258 else {
259 for (p = inObjList; p != 0; p = p->next) {
260 if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
261 delete n;
262 return p->num;
263 }
264 q = p;
265 }
266 // it is important to add new objects at the end of the list,
267 // because new objects are being added while the list is being
268 // written out.
269 q->next = n;
270 }
271 if (type == objFontDesc)
272 n->num = get_fd_objnum(fd);
273 else
274 n->num = pdfnewobjnum();
275 return n->num;
276 }
277
getNewObjectNumber(Ref ref)278 static int getNewObjectNumber(Ref ref)
279 {
280 InObj *p;
281 if (inObjList == 0) {
282 pdftex_fail("No objects copied yet");
283 } else {
284 for (p = inObjList; p != 0; p = p->next) {
285 if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
286 return p->num;
287 }
288 }
289 pdftex_fail("Object not yet copied: %i %i", ref.num, ref.gen);
290 }
291 #ifdef _MSC_VER
292 /* Never reached, but without __attribute__((noreturn)) for pdftex_fail()
293 MSVC 5.0 requires an int return value. */
294 return -60000;
295 #endif
296 }
297
298 static void copyObject(Object *);
299
copyName(char * s)300 static void copyName(char *s)
301 {
302 pdf_puts("/");
303 for (; *s != 0; s++) {
304 if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' ||
305 *s == '.' || *s == '-' || *s == '+')
306 pdfout(*s);
307 else
308 pdf_printf("#%.2X", *s & 0xFF);
309 }
310 }
311
copyDictEntry(Object * obj,int i)312 static void copyDictEntry(Object * obj, int i)
313 {
314 PdfObject obj1;
315 copyName(obj->dictGetKey(i));
316 pdf_puts(" ");
317 obj->dictGetValNF(i, &obj1);
318 copyObject(&obj1);
319 pdf_puts("\n");
320 }
321
copyDict(Object * obj)322 static void copyDict(Object * obj)
323 {
324 int i, l;
325 if (!obj->isDict())
326 pdftex_fail("PDF inclusion: invalid dict type <%s>",
327 obj->getTypeName());
328 for (i = 0, l = obj->dictGetLength(); i < l; ++i)
329 copyDictEntry(obj, i);
330 }
331
copyFontDict(Object * obj,InObj * r)332 static void copyFontDict(Object * obj, InObj * r)
333 {
334 int i, l;
335 char *key;
336 if (!obj->isDict())
337 pdftex_fail("PDF inclusion: invalid dict type <%s>",
338 obj->getTypeName());
339 pdf_puts("<<\n");
340 assert(r->type == objFont); // FontDescriptor is in fd_tree
341 for (i = 0, l = obj->dictGetLength(); i < l; ++i) {
342 key = obj->dictGetKey(i);
343 if (strncmp("FontDescriptor", key, strlen("FontDescriptor")) == 0
344 || strncmp("BaseFont", key, strlen("BaseFont")) == 0
345 || strncmp("Encoding", key, strlen("Encoding")) == 0)
346 continue; // skip original values
347 copyDictEntry(obj, i);
348 }
349 // write new FontDescriptor, BaseFont, and Encoding
350 pdf_printf("/FontDescriptor %d 0 R\n", get_fd_objnum(r->fd));
351 pdf_printf("/BaseFont %d 0 R\n", get_fn_objnum(r->fd));
352 pdf_printf("/Encoding %d 0 R\n", r->enc_objnum);
353 pdf_puts(">>");
354 }
355
copyStream(Stream * str)356 static void copyStream(Stream * str)
357 {
358 int c, c2 = 0;
359 str->reset();
360 while ((c = str->getChar()) != EOF) {
361 pdfout(c);
362 c2 = c;
363 }
364 pdflastbyte = c2;
365 }
366
copyProcSet(Object * obj)367 static void copyProcSet(Object * obj)
368 {
369 int i, l;
370 PdfObject procset;
371 if (!obj->isArray())
372 pdftex_fail("PDF inclusion: invalid ProcSet array type <%s>",
373 obj->getTypeName());
374 pdf_puts("/ProcSet [ ");
375 for (i = 0, l = obj->arrayGetLength(); i < l; ++i) {
376 obj->arrayGetNF(i, &procset);
377 if (!procset->isName())
378 pdftex_fail("PDF inclusion: invalid ProcSet entry type <%s>",
379 procset->getTypeName());
380 copyName(procset->getName());
381 pdf_puts(" ");
382 }
383 pdf_puts("]\n");
384 }
385
386 #define REPLACE_TYPE1C true
387
copyFont(char * tag,Object * fontRef)388 static void copyFont(char *tag, Object * fontRef)
389 {
390 PdfObject fontdict, subtype, basefont, fontdescRef, fontdesc, charset,
391 fontfile, ffsubtype, stemV;
392 GfxFont *gfont;
393 fd_entry *fd;
394 fm_entry *fontmap;
395 // Check whether the font has already been embedded before analysing it.
396 InObj *p;
397 Ref ref = fontRef->getRef();
398 for (p = inObjList; p; p = p->next) {
399 if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
400 copyName(tag);
401 pdf_printf(" %d 0 R ", p->num);
402 return;
403 }
404 }
405 // Only handle included Type1 (and Type1C) fonts; anything else will be copied.
406 // Type1C fonts are replaced by Type1 fonts, if REPLACE_TYPE1C is true.
407 if (!fixedinclusioncopyfont && fontRef->fetch(xref, &fontdict)->isDict()
408 && fontdict->dictLookup((char *)"Subtype", &subtype)->isName()
409 && !strcmp(subtype->getName(), "Type1")
410 && fontdict->dictLookup((char *)"BaseFont", &basefont)->isName()
411 && fontdict->dictLookupNF((char *)"FontDescriptor", &fontdescRef)->isRef()
412 && fontdescRef->fetch(xref, &fontdesc)->isDict()
413 && (fontdesc->dictLookup((char *)"FontFile", &fontfile)->isStream()
414 || (REPLACE_TYPE1C
415 && fontdesc->dictLookup((char *)"FontFile3", &fontfile)->isStream()
416 && fontfile->streamGetDict()->lookup((char *)"Subtype",
417 &ffsubtype)->isName()
418 && !strcmp(ffsubtype->getName(), "Type1C")))
419 && (fontmap = lookup_fontmap(basefont->getName())) != NULL) {
420 // copy the value of /StemV
421 fontdesc->dictLookup((char *)"StemV", &stemV);
422 fd = epdf_create_fontdescriptor(fontmap, stemV->getInt());
423 if (fontdesc->dictLookup((char *)"CharSet", &charset) &&
424 charset->isString() && is_subsetable(fontmap))
425 epdf_mark_glyphs(fd, charset->getString()->getCString());
426 else
427 embed_whole_font(fd);
428 addFontDesc(fontdescRef->getRef(), fd);
429 copyName(tag);
430 gfont = GfxFont::makeFont(xref, tag, fontRef->getRef(),
431 fontdict->getDict());
432 pdf_printf(" %d 0 R ", addFont(fontRef->getRef(), fd,
433 addEncoding(gfont)));
434 } else {
435 copyName(tag);
436 pdf_puts(" ");
437 copyObject(fontRef);
438 }
439 }
440
copyFontResources(Object * obj)441 static void copyFontResources(Object * obj)
442 {
443 PdfObject fontRef;
444 int i, l;
445 if (!obj->isDict())
446 pdftex_fail("PDF inclusion: invalid font resources dict type <%s>",
447 obj->getTypeName());
448 pdf_puts("/Font << ");
449 for (i = 0, l = obj->dictGetLength(); i < l; ++i) {
450 obj->dictGetValNF(i, &fontRef);
451 if (fontRef->isRef())
452 copyFont(obj->dictGetKey(i), &fontRef);
453 else if (fontRef->isDict()) { // some programs generate pdf with embedded font object
454 copyName(obj->dictGetKey(i));
455 pdf_puts(" ");
456 copyObject(&fontRef);
457 }
458 else
459 pdftex_fail("PDF inclusion: invalid font in reference type <%s>",
460 fontRef->getTypeName());
461 }
462 pdf_puts(">>\n");
463 }
464
copyOtherResources(Object * obj,char * key)465 static void copyOtherResources(Object * obj, char *key)
466 {
467 // copies all other resources (write_epdf handles Fonts and ProcSets),
468
469 // if Subtype is present, it must be a name
470 if (strcmp("Subtype", key) == 0) {
471 if (!obj->isName()) {
472 pdftex_warn("PDF inclusion: Subtype in Resources dict is not a name"
473 " (key '%s', type <%s>); ignored.",
474 key, obj->getTypeName());
475 return;
476 }
477 } else if (!obj->isDict()) {
478 //FIXME: Write the message only to the log file
479 pdftex_warn("PDF inclusion: invalid other resource which is no dict"
480 " (key '%s', type <%s>); ignored.",
481 key, obj->getTypeName());
482 return;
483 }
484 copyName(key);
485 pdf_puts(" ");
486 copyObject(obj);
487 }
488
489 // Function onverts double to string; very small and very large numbers
490 // are NOT converted to scientific notation.
491 // n must be a number or real conforming to the implementation limits
492 // of PDF as specified in appendix C.1 of the PDF Ref.
493 // These are:
494 // maximum value of ints is +2^32
495 // maximum value of reals is +2^15
496 // smalles values of reals is 1/(2^16)
497
convertNumToPDF(double n)498 static char *convertNumToPDF(double n)
499 {
500 static const int precision = 6;
501 static const int fact = (int) 1E6; // must be 10^precision
502 static const double epsilon = 0.5E-6; // 2epsilon must be 10^-precision
503 static char buf[64];
504 // handle very small values: return 0
505 if (fabs(n) < epsilon) {
506 buf[0] = '0';
507 buf[1] = '\0';
508 } else {
509 char ints[64];
510 int bindex = 0, sindex = 0;
511 int ival, fval;
512 // handle the sign part if n is negative
513 if (n < 0) {
514 buf[bindex++] = '-';
515 n = -n;
516 }
517 n += epsilon; // for rounding
518 // handle the integer part, simply with sprintf
519 ival = (int) floor(n);
520 n -= ival;
521 sprintf(ints, "%d", ival);
522 while (ints[sindex] != 0)
523 buf[bindex++] = ints[sindex++];
524 // handle the fractional part up to 'precision' digits
525 fval = (int) floor(n * fact);
526 if (fval) {
527 // set a dot
528 buf[bindex++] = '.';
529 sindex = bindex + precision;
530 buf[sindex--] = '\0';
531 // fill up trailing zeros with the string terminator NULL
532 while (((fval % 10) == 0) && (sindex >= bindex)) {
533 buf[sindex--] = '\0';
534 fval /= 10;
535 }
536 // fill up the fractional part back to front
537 while (sindex >= bindex) {
538 buf[sindex--] = (fval % 10) + '0';
539 fval /= 10;
540 }
541 } else
542 buf[bindex++] = 0;
543 }
544 return (char *) buf;
545 }
546
copyObject(Object * obj)547 static void copyObject(Object * obj)
548 {
549 PdfObject obj1;
550 int i, l, c;
551 Ref ref;
552 char *p;
553 GString *s;
554 if (obj->isBool()) {
555 pdf_printf("%s", obj->getBool()? "true" : "false");
556 } else if (obj->isInt()) {
557 pdf_printf("%i", obj->getInt());
558 } else if (obj->isReal()) {
559 pdf_printf("%s", convertNumToPDF(obj->getReal()));
560 } else if (obj->isNum()) {
561 pdf_printf("%s", convertNumToPDF(obj->getNum()));
562 } else if (obj->isString()) {
563 s = obj->getString();
564 p = s->getCString();
565 l = s->getLength();
566 if (strlen(p) == (unsigned int) l) {
567 pdf_puts("(");
568 for (; *p != 0; p++) {
569 c = (unsigned char) *p;
570 if (c == '(' || c == ')' || c == '\\')
571 pdf_printf("\\%c", c);
572 else if (c < 0x20 || c > 0x7F)
573 pdf_printf("\\%03o", c);
574 else
575 pdfout(c);
576 }
577 pdf_puts(")");
578 } else {
579 pdf_puts("<");
580 for (i = 0; i < l; i++) {
581 c = s->getChar(i) & 0xFF;
582 pdf_printf("%.2x", c);
583 }
584 pdf_puts(">");
585 }
586 } else if (obj->isName()) {
587 copyName(obj->getName());
588 } else if (obj->isNull()) {
589 pdf_puts("null");
590 } else if (obj->isArray()) {
591 pdf_puts("[");
592 for (i = 0, l = obj->arrayGetLength(); i < l; ++i) {
593 obj->arrayGetNF(i, &obj1);
594 if (!obj1->isName())
595 pdf_puts(" ");
596 copyObject(&obj1);
597 }
598 pdf_puts("]");
599 } else if (obj->isDict()) {
600 pdf_puts("<<\n");
601 copyDict(obj);
602 pdf_puts(">>");
603 } else if (obj->isStream()) {
604 initDictFromDict(obj1, obj->streamGetDict());
605 pdf_puts("<<\n");
606 copyDict(&obj1);
607 pdf_puts(">>\n");
608 pdf_puts("stream\n");
609 copyStream(obj->getStream()->getUndecodedStream());
610 pdf_puts("\nendstream");
611 } else if (obj->isRef()) {
612 ref = obj->getRef();
613 if (ref.num == 0) {
614 pdftex_fail
615 ("PDF inclusion: reference to invalid object"
616 " (is the included pdf broken?)");
617 } else
618 pdf_printf("%d 0 R", addOther(ref));
619 } else {
620 pdftex_fail("PDF inclusion: type <%s> cannot be copied",
621 obj->getTypeName());
622 }
623 }
624
writeRefs()625 static void writeRefs()
626 {
627 InObj *r;
628 for (r = inObjList; r != 0; r = r->next) {
629 if (!r->written) {
630 Object obj1;
631 r->written = 1;
632 xref->fetch(r->ref.num, r->ref.gen, &obj1);
633 if (r->type == objFont) {
634 assert(!obj1.isStream());
635 pdfbeginobj(r->num, 2); // \pdfobjcompresslevel = 2 is for this
636 copyFontDict(&obj1, r);
637 pdf_puts("\n");
638 pdfendobj();
639 } else if (r->type != objFontDesc) { // /FontDescriptor is written via write_fontdescriptor()
640 if (obj1.isStream())
641 pdfbeginobj(r->num, 0);
642 else
643 pdfbeginobj(r->num, 2); // \pdfobjcompresslevel = 2 is for this
644 copyObject(&obj1);
645 pdf_puts("\n");
646 pdfendobj();
647 }
648 obj1.free();
649 }
650 }
651 }
652
writeEncodings()653 static void writeEncodings()
654 {
655 UsedEncoding *r, *n;
656 char *glyphNames[256], *s;
657 int i;
658 for (r = encodingList; r != 0; r = r->next) {
659 for (i = 0; i < 256; i++) {
660 if (r->font->isCIDFont()) {
661 pdftex_fail
662 ("PDF inclusion: CID fonts are not supported"
663 " (try to disable font replacement to fix this)");
664 }
665 if ((s = ((Gfx8BitFont *) r->font)->getCharName(i)) != 0)
666 glyphNames[i] = s;
667 else
668 glyphNames[i] = notdef;
669 }
670 epdf_write_enc(glyphNames, r->enc_objnum);
671 }
672 for (r = encodingList; r != 0; r = n) {
673 n = r->next;
674 #ifdef POPPLER_VERSION
675 r->font->decRefCnt();
676 #else
677 delete r->font;
678 #endif
679 delete r;
680 }
681 }
682
683 // get the pagebox according to the pagebox_spec
get_pagebox(Page * page,int pagebox_spec)684 static PDFRectangle *get_pagebox(Page * page, int pagebox_spec)
685 {
686 if (pagebox_spec == pdfboxspecmedia)
687 return page->getMediaBox();
688 else if (pagebox_spec == pdfboxspeccrop)
689 return page->getCropBox();
690 else if (pagebox_spec == pdfboxspecbleed)
691 return page->getBleedBox();
692 else if (pagebox_spec == pdfboxspectrim)
693 return page->getTrimBox();
694 else if (pagebox_spec == pdfboxspecart)
695 return page->getArtBox();
696 else
697 pdftex_fail("PDF inclusion: unknown value of pagebox spec (%i)",
698 (int) pagebox_spec);
699 return page->getMediaBox(); // to make the compiler happy
700 }
701
702
703 // Reads various information about the PDF and sets it up for later inclusion.
704 // This will fail if the PDF version of the PDF is higher than
705 // minor_pdf_version_wanted or page_name is given and can not be found.
706 // It makes no sense to give page_name _and_ page_num.
707 // Returns the page number.
708
709 int
read_pdf_info(char * image_name,char * page_name,int page_num,int pagebox_spec,int minor_pdf_version_wanted,int pdf_inclusion_errorlevel)710 read_pdf_info(char *image_name, char *page_name, int page_num,
711 int pagebox_spec, int minor_pdf_version_wanted,
712 int pdf_inclusion_errorlevel)
713 {
714 PdfDocument *pdf_doc;
715 Page *page;
716 PDFRectangle *pagebox;
717 #ifdef POPPLER_VERSION
718 int pdf_major_version_found, pdf_minor_version_found;
719 #else
720 float pdf_version_found, pdf_version_wanted;
721 #endif
722 // initialize
723 if (!isInit) {
724 globalParams = new GlobalParams();
725 globalParams->setErrQuiet(gFalse);
726 isInit = gTrue;
727 }
728 // open PDF file
729 pdf_doc = find_add_document(image_name);
730 epdf_doc = (void *) pdf_doc;
731
732 // check PDF version
733 // this works only for PDF 1.x -- but since any versions of PDF newer
734 // than 1.x will not be backwards compatible to PDF 1.x, pdfTeX will
735 // then have to changed drastically anyway.
736 #ifdef POPPLER_VERSION
737 pdf_major_version_found = pdf_doc->doc->getPDFMajorVersion();
738 pdf_minor_version_found = pdf_doc->doc->getPDFMinorVersion();
739 if ((pdf_major_version_found > 1)
740 || (pdf_minor_version_found > minor_pdf_version_wanted)) {
741 const char *msg =
742 "PDF inclusion: found PDF version <%d.%d>, but at most version <1.%d> allowed";
743 if (pdf_inclusion_errorlevel > 0) {
744 pdftex_fail(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted);
745 } else if (pdf_inclusion_errorlevel < 0) {
746 ; /* do nothing */
747 } else { /* = 0, give warning */
748 pdftex_warn(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted);
749 }
750 }
751 #else
752 pdf_version_found = pdf_doc->doc->getPDFVersion();
753 pdf_version_wanted = 1 + (minor_pdf_version_wanted * 0.1);
754 if (pdf_version_found > pdf_version_wanted + 0.01) {
755 char msg[] =
756 "PDF inclusion: found PDF version <%.1f>, but at most version <%.1f> allowed";
757 if (pdf_inclusion_errorlevel > 0) {
758 pdftex_fail(msg, pdf_version_found, pdf_version_wanted);
759 } else if (pdf_inclusion_errorlevel < 0) {
760 ; /* do nothing */
761 } else { /* = 0, give warning */
762 pdftex_warn(msg, pdf_version_found, pdf_version_wanted);
763 }
764 }
765 #endif
766 epdf_num_pages = pdf_doc->doc->getCatalog()->getNumPages();
767 if (page_name) {
768 // get page by name
769 GString name(page_name);
770 LinkDest *link = pdf_doc->doc->findDest(&name);
771 if (link == 0 || !link->isOk())
772 pdftex_fail("PDF inclusion: invalid destination <%s>", page_name);
773 Ref ref = link->getPageRef();
774 page_num = pdf_doc->doc->getCatalog()->findPage(ref.num, ref.gen);
775 if (page_num == 0)
776 pdftex_fail("PDF inclusion: destination is not a page <%s>",
777 page_name);
778 delete link;
779 } else {
780 // get page by number
781 if (page_num <= 0 || page_num > epdf_num_pages)
782 pdftex_fail("PDF inclusion: required page does not exist <%i>",
783 epdf_num_pages);
784 }
785 // get the required page
786 page = pdf_doc->doc->getCatalog()->getPage(page_num);
787
788 // get the pagebox (media, crop...) to use.
789 pagebox = get_pagebox(page, pagebox_spec);
790 if (pagebox->x2 > pagebox->x1) {
791 epdf_orig_x = pagebox->x1;
792 epdf_width = pagebox->x2 - pagebox->x1;
793 } else {
794 epdf_orig_x = pagebox->x2;
795 epdf_width = pagebox->x1 - pagebox->x2;
796 }
797 if (pagebox->y2 > pagebox->y1) {
798 epdf_orig_y = pagebox->y1;
799 epdf_height = pagebox->y2 - pagebox->y1;
800 } else {
801 epdf_orig_y = pagebox->y2;
802 epdf_height = pagebox->y1 - pagebox->y2;
803 }
804
805 // get page rotation
806 epdf_rotate = page->getRotate() % 360;
807 if (epdf_rotate < 0)
808 epdf_rotate += 360;
809
810 // page group
811 if (page->getGroup() != NULL)
812 epdf_has_page_group = 1; // only flag that page group is present;
813 // the actual object number will be
814 // generated in pdftex.web
815 else
816 epdf_has_page_group = 0; // no page group present
817
818 pdf_doc->xref = pdf_doc->doc->getXRef();
819 return page_num;
820 }
821
822 // writes the current epf_doc.
823 // Here the included PDF is copied, so most errors that can happen during PDF
824 // inclusion will arise here.
825
write_epdf(void)826 void write_epdf(void)
827 {
828 Page *page;
829 Ref *pageRef;
830 Dict *pageDict;
831 PdfObject contents, obj1, obj2, pageObj, dictObj;
832 PdfObject groupDict;
833 bool writeSepGroup = false;
834 Object info;
835 char *key;
836 char s[256];
837 int i, l;
838 int rotate;
839 double scale[6] = { 0, 0, 0, 0, 0, 0 };
840 bool writematrix = false;
841 static const char *pageDictKeys[] = {
842 "LastModified",
843 "Metadata",
844 "PieceInfo",
845 "SeparationInfo",
846 // "Group",
847 // "Resources",
848 NULL
849 };
850
851 PdfDocument *pdf_doc = (PdfDocument *) epdf_doc;
852 (pdf_doc->occurences)--;
853 xref = pdf_doc->xref;
854 inObjList = pdf_doc->inObjList;
855 encodingList = 0;
856 page = pdf_doc->doc->getCatalog()->getPage(epdf_selected_page);
857 pageRef = pdf_doc->doc->getCatalog()->getPageRef(epdf_selected_page);
858 xref->fetch(pageRef->num, pageRef->gen, &pageObj);
859 pageDict = pageObj->getDict();
860 rotate = page->getRotate();
861 PDFRectangle *pagebox;
862 // write the Page header
863 pdf_puts("/Type /XObject\n");
864 pdf_puts("/Subtype /Form\n");
865 pdf_puts("/FormType 1\n");
866
867 // write additional information
868 pdf_printf("/%s.FileName (%s)\n", pdfkeyprefix,
869 convertStringToPDFString(pdf_doc->file_name,
870 strlen(pdf_doc->file_name)));
871 pdf_printf("/%s.PageNumber %i\n", pdfkeyprefix, epdf_selected_page);
872 pdf_doc->doc->getDocInfoNF(&info);
873 if (info.isRef()) {
874 // the info dict must be indirect (PDF Ref p. 61)
875 pdf_printf("/%s.InfoDict ", pdfkeyprefix);
876 pdf_printf("%d 0 R\n", addOther(info.getRef()));
877 }
878 // get the pagebox (media, crop...) to use.
879 pagebox = get_pagebox(page, epdf_page_box);
880
881 // handle page rotation
882 if (rotate != 0) {
883 if (rotate % 90 == 0) {
884 // this handles only the simple case: multiple of 90s but these
885 // are the only values allowed according to the reference
886 // (v1.3, p. 78).
887 // the image is rotated around its center.
888 // the /Rotate key is clockwise while the matrix is
889 // counterclockwise :-%
890 tex_printf(", page is rotated %d degrees", rotate);
891 switch (rotate) {
892 case 90:
893 scale[1] = -1;
894 scale[2] = 1;
895 scale[4] = pagebox->x1 - pagebox->y1;
896 scale[5] = pagebox->y1 + pagebox->x2;
897 writematrix = true;
898 break;
899 case 180:
900 scale[0] = scale[3] = -1;
901 scale[4] = pagebox->x1 + pagebox->x2;
902 scale[5] = pagebox->y1 + pagebox->y2;
903 writematrix = true;
904 break; // width and height are exchanged
905 case 270:
906 scale[1] = 1;
907 scale[2] = -1;
908 scale[4] = pagebox->x1 + pagebox->y2;
909 scale[5] = pagebox->y1 - pagebox->x1;
910 writematrix = true;
911 break;
912 }
913 if (writematrix) { // The matrix is only written if the image is rotated.
914 sprintf(s, "/Matrix [%.8f %.8f %.8f %.8f %.8f %.8f]\n",
915 scale[0],
916 scale[1], scale[2], scale[3], scale[4], scale[5]);
917 pdf_puts(stripzeros(s));
918 }
919 }
920 }
921
922 sprintf(s, "/BBox [%.8f %.8f %.8f %.8f]\n",
923 pagebox->x1, pagebox->y1, pagebox->x2, pagebox->y2);
924 pdf_puts(stripzeros(s));
925
926 // Metadata validity check (as a stream it must be indirect)
927 pageDict->lookupNF((char *)"Metadata", &dictObj);
928 if (!dictObj->isNull() && !dictObj->isRef())
929 pdftex_warn("PDF inclusion: /Metadata must be indirect object");
930
931 // copy selected items in Page dictionary except Resources & Group
932 for (i = 0; pageDictKeys[i] != NULL; i++) {
933 pageDict->lookupNF((char *)pageDictKeys[i], &dictObj);
934 if (!dictObj->isNull()) {
935 pdf_newline();
936 pdf_printf("/%s ", pageDictKeys[i]);
937 copyObject(&dictObj); // preserves indirection
938 }
939 }
940
941 // handle page group
942 pageDict->lookupNF((char *)"Group", &dictObj);
943 if (!dictObj->isNull()) {
944 if (pdfpagegroupval == 0) {
945 // another pdf with page group was included earlier on the
946 // same page; copy the Group entry as is. See manual for
947 // info on why this is a warning.
948 if (getpdfsuppresswarningpagegroup() == 0) {
949 pdftex_warn
950 ("PDF inclusion: multiple pdfs with page group included in a single page");
951 }
952 pdf_newline();
953 pdf_puts("/Group ");
954 copyObject(&dictObj);
955 } else {
956 // write Group dict as a separate object, since the Page dict also refers to it
957 pageDict->lookup((char *) "Group", &dictObj);
958 if (!dictObj->isDict())
959 pdftex_fail("PDF inclusion: /Group dict missing");
960 writeSepGroup = true;
961 initDictFromDict(groupDict, page->getGroup());
962 pdf_printf("/Group %d 0 R\n", pdfpagegroupval);
963 }
964 }
965
966 // write the Resources dictionary
967 if (page->getResourceDict() == NULL) {
968 // Resources can be missing (files without them have been spotted
969 // in the wild); in which case the /Resouces of the /Page will be used.
970 // "This practice is not recommended".
971 pdftex_warn
972 ("PDF inclusion: /Resources missing. 'This practice is not recommended' (PDF Ref)");
973 } else {
974 initDictFromDict(obj1, page->getResourceDict());
975 if (!obj1->isDict())
976 pdftex_fail("PDF inclusion: invalid resources dict type <%s>",
977 obj1->getTypeName());
978 pdf_newline();
979 pdf_puts("/Resources <<\n");
980 for (i = 0, l = obj1->dictGetLength(); i < l; ++i) {
981 obj1->dictGetVal(i, &obj2);
982 key = obj1->dictGetKey(i);
983 if (strcmp("Font", key) == 0)
984 copyFontResources(&obj2);
985 else if (strcmp("ProcSet", key) == 0)
986 copyProcSet(&obj2);
987 else
988 copyOtherResources(&obj2, key);
989 }
990 pdf_puts(">>\n");
991 }
992
993 // write the page contents
994 page->getContents(&contents);
995 if (contents->isStream()) {
996
997 // Variant A: get stream and recompress under control
998 // of \pdfcompresslevel
999 //
1000 // pdfbeginstream();
1001 // copyStream(contents->getStream());
1002 // pdfendstream();
1003
1004 // Variant B: copy stream without recompressing
1005 //
1006 contents->streamGetDict()->lookup((char *)"F", &obj1);
1007 if (!obj1->isNull()) {
1008 pdftex_fail("PDF inclusion: Unsupported external stream");
1009 }
1010 contents->streamGetDict()->lookup((char *)"Length", &obj1);
1011 assert(!obj1->isNull());
1012 pdf_puts("/Length ");
1013 copyObject(&obj1);
1014 pdf_puts("\n");
1015 contents->streamGetDict()->lookup((char *)"Filter", &obj1);
1016 if (!obj1->isNull()) {
1017 pdf_puts("/Filter ");
1018 copyObject(&obj1);
1019 pdf_puts("\n");
1020 contents->streamGetDict()->lookup((char *)"DecodeParms", &obj1);
1021 if (!obj1->isNull()) {
1022 pdf_puts("/DecodeParms ");
1023 copyObject(&obj1);
1024 pdf_puts("\n");
1025 }
1026 }
1027 pdf_puts(">>\nstream\n");
1028 copyStream(contents->getStream()->getUndecodedStream());
1029 pdfendstream();
1030 } else if (contents->isArray()) {
1031 pdfbeginstream();
1032 for (i = 0, l = contents->arrayGetLength(); i < l; ++i) {
1033 Object contentsobj;
1034 copyStream((contents->arrayGet(i, &contentsobj))->getStream());
1035 contentsobj.free();
1036 if (i < l - 1)
1037 pdf_newline(); // add a newline after each stream except the last
1038 }
1039 pdfendstream();
1040 } else { // the contents are optional, but we need to include an empty stream
1041 pdfbeginstream();
1042 pdfendstream();
1043 }
1044
1045 // write out all indirect objects
1046 writeRefs();
1047
1048 // write out all used encodings (and delete list)
1049 writeEncodings();
1050
1051 // write the Group dict if needed
1052 if (writeSepGroup) {
1053 pdfbeginobj(pdfpagegroupval, 2);
1054 copyObject(&groupDict);
1055 pdf_puts("\n");
1056 pdfendobj();
1057 pdfpagegroupval = 0; // only the 1st included pdf on a page gets its
1058 // Group included in the Page dict
1059 }
1060
1061 // save object list, xref
1062 pdf_doc->inObjList = inObjList;
1063 pdf_doc->xref = xref;
1064 }
1065
1066 // Called when an image has been written and it's resources in image_tab are
1067 // freed and it's not referenced anymore.
1068
epdf_delete()1069 void epdf_delete()
1070 {
1071 PdfDocument *pdf_doc = (PdfDocument *) epdf_doc;
1072 xref = pdf_doc->xref;
1073 if (pdf_doc->occurences < 0) {
1074 delete_document(pdf_doc);
1075 }
1076 }
1077
1078 // Called when PDF embedding system is finalized.
1079 // Now deallocate all remaining PdfDocuments.
1080
epdf_check_mem()1081 void epdf_check_mem()
1082 {
1083 if (isInit) {
1084 PdfDocument *p, *n;
1085 for (p = pdfDocuments; p; p = n) {
1086 n = p->next;
1087 delete_document(p);
1088 }
1089 // see above for globalParams
1090 delete globalParams;
1091 }
1092 }
1093