1 /*
2 Copyright 1996-2014 Han The Thanh, <thanh@pdftex.org>
3 
4 This file is part of pdfTeX.
5 
6 pdfTeX is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10 
11 pdfTeX is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License along
17 with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 /* Do this early in order to avoid a conflict between
21    MINGW32 <rpcndr.h> defining 'boolean' as 'unsigned char' and
22    <kpathsea/types.h> defining Pascal's boolean as 'int'.
23 */
24 #include <w2c/config.h>
25 #include <kpathsea/lib.h>
26 
27 #include <stdlib.h>
28 #include <math.h>
29 #include <stddef.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 
34 #ifdef POPPLER_VERSION
35 #include <dirent.h>
36 #include <poppler-config.h>
37 #include <goo/GooString.h>
38 #include <goo/gmem.h>
39 #include <goo/gfile.h>
40 #define GString GooString
41 #else
42 #include <aconf.h>
43 #include <GString.h>
44 #include <gmem.h>
45 #include <gfile.h>
46 #endif
47 #include <assert.h>
48 
49 #include "Object.h"
50 #include "Stream.h"
51 #include "Array.h"
52 #include "Dict.h"
53 #include "XRef.h"
54 #include "Catalog.h"
55 #include "Link.h"
56 #include "Page.h"
57 #include "GfxFont.h"
58 #include "PDFDoc.h"
59 #include "GlobalParams.h"
60 #include "Error.h"
61 
62 // This file is mostly C and not very much C++; it's just used to interface
63 // the functions of xpdf, which are written in C++.
64 
65 extern "C" {
66 #include <pdftexdir/ptexmac.h>
67 #include <pdftexdir/pdftex-common.h>
68 
69 // This function from pdftex.web gets declared in pdftexcoerce.h in the
70 // usual web2c way, but we cannot include that file here because C++
71 // does not allow it.
72 extern int getpdfsuppresswarningpagegroup(void);
73 }
74 
75 // The prefix "PTEX" for the PDF keys is special to pdfTeX;
76 // this has been registered with Adobe by Hans Hagen.
77 
78 #define pdfkeyprefix "PTEX"
79 
80 // PdfObject encapsulates the xpdf Object type,
81 // and properly frees its resources on destruction.
82 // Use obj-> to access members of the Object,
83 // and &obj to get a pointer to the object.
84 // It is no longer necessary to call Object::free explicitely.
85 
86 class PdfObject {
87   public:
PdfObject()88     PdfObject() {               // nothing
89     } ~PdfObject() {
90         iObject.free();
91     }
operator ->()92     Object *operator->() {
93         return &iObject;
94     }
operator &()95     Object *operator&() {
96         return &iObject;
97     }
98   private:                     // no copying or assigning
99     PdfObject(const PdfObject &);
100     void operator=(const PdfObject &);
101   public:
102     Object iObject;
103 };
104 
105 // When copying the Resources of the selected page, all objects are copied
106 // recusively top-down. Indirect objects however are not fetched during
107 // copying, but get a new object number from pdfTeX and then will be
108 // appended into a linked list. Duplicates are checked and removed from the
109 // list of indirect objects during appending.
110 
111 enum InObjType {
112     objFont,
113     objFontDesc,
114     objOther
115 };
116 
117 struct InObj {
118     Ref ref;                    // ref in original PDF
119     InObjType type;             // object type
120     InObj *next;                // next entry in list of indirect objects
121     int num;                    // new object number in output PDF
122     fd_entry *fd;               // pointer to /FontDescriptor object structure
123     int enc_objnum;             // Encoding for objFont
124     int written;                // has it been written to output PDF?
125 };
126 
127 struct UsedEncoding {
128     int enc_objnum;
129     GfxFont *font;
130     UsedEncoding *next;
131 };
132 
133 static InObj *inObjList;
134 static UsedEncoding *encodingList;
135 static GBool isInit = gFalse;
136 
137 // --------------------------------------------------------------------
138 // Maintain list of open embedded PDF files
139 // --------------------------------------------------------------------
140 
141 struct PdfDocument {
142     char *file_name;
143     PDFDoc *doc;
144     XRef *xref;
145     InObj *inObjList;
146     int occurences;             // number of references to the document; the doc can be
147     // deleted when this is negative
148     PdfDocument *next;
149 };
150 
151 static PdfDocument *pdfDocuments = 0;
152 
153 static XRef *xref = 0;
154 
155 // Returns pointer to PdfDocument record for PDF file.
156 // Creates a new record if it doesn't exist yet.
157 // xref is made current for the document.
158 
find_add_document(char * file_name)159 static PdfDocument *find_add_document(char *file_name)
160 {
161     PdfDocument *p = pdfDocuments;
162     while (p && strcmp(p->file_name, file_name) != 0)
163         p = p->next;
164     if (p) {
165         xref = p->xref;
166         (p->occurences)++;
167         return p;
168     }
169     p = new PdfDocument;
170     p->file_name = xstrdup(file_name);
171     p->xref = xref = 0;
172     p->occurences = 0;
173     GString *docName = new GString(p->file_name);
174     p->doc = new PDFDoc(docName);       // takes ownership of docName
175     if (!p->doc->isOk() || !p->doc->okToPrint()) {
176         pdftex_fail("xpdf: reading PDF image failed");
177     }
178     p->inObjList = 0;
179     p->next = pdfDocuments;
180     pdfDocuments = p;
181     return p;
182 }
183 
184 // Deallocate a PdfDocument with all its resources
185 
delete_document(PdfDocument * pdf_doc)186 static void delete_document(PdfDocument * pdf_doc)
187 {
188     PdfDocument **p = &pdfDocuments;
189     while (*p && *p != pdf_doc)
190         p = &((*p)->next);
191     // should not happen:
192     if (!*p)
193         return;
194     // unlink from list
195     *p = pdf_doc->next;
196     // free pdf_doc's resources
197     InObj *r, *n;
198     for (r = pdf_doc->inObjList; r != 0; r = n) {
199         n = r->next;
200         delete r;
201     }
202     xref = pdf_doc->xref;
203     delete pdf_doc->doc;
204     xfree(pdf_doc->file_name);
205     delete pdf_doc;
206 }
207 
208 // Replacement for
209 //      Object *initDict(Dict *dict1){ initObj(objDict); dict = dict1; return this; }
210 
initDictFromDict(PdfObject & obj,Dict * dict)211 static void initDictFromDict(PdfObject & obj, Dict * dict)
212 {
213     obj->initDict(xref);
214     for (int i = 0, l = dict->getLength(); i < l; i++) {
215         Object obj1;
216         obj->dictAdd(copyString(dict->getKey(i)), dict->getValNF(i, &obj1));
217     }
218 }
219 
220 // --------------------------------------------------------------------
221 
addEncoding(GfxFont * gfont)222 static int addEncoding(GfxFont * gfont)
223 {
224     UsedEncoding *n;
225     n = new UsedEncoding;
226     n->next = encodingList;
227     encodingList = n;
228     n->font = gfont;
229     n->enc_objnum = pdfnewobjnum();
230     return n->enc_objnum;
231 }
232 
233 #define addFont(ref, fd, enc_objnum) \
234         addInObj(objFont, ref, fd, enc_objnum)
235 
236 // addFontDesc is only used to avoid writing the original FontDescriptor
237 // from the PDF file.
238 
239 #define addFontDesc(ref, fd) \
240         addInObj(objFontDesc, ref, fd, 0)
241 
242 #define addOther(ref) \
243         addInObj(objOther, ref, 0, 0)
244 
addInObj(InObjType type,Ref ref,fd_entry * fd,int e)245 static int addInObj(InObjType type, Ref ref, fd_entry * fd, int e)
246 {
247     InObj *p, *q, *n = new InObj;
248     if (ref.num == 0)
249         pdftex_fail("PDF inclusion: invalid reference");
250     n->ref = ref;
251     n->type = type;
252     n->next = 0;
253     n->fd = fd;
254     n->enc_objnum = e;
255     n->written = 0;
256     if (inObjList == 0)
257         inObjList = n;
258     else {
259         for (p = inObjList; p != 0; p = p->next) {
260             if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
261                 delete n;
262                 return p->num;
263             }
264             q = p;
265         }
266         // it is important to add new objects at the end of the list,
267         // because new objects are being added while the list is being
268         // written out.
269         q->next = n;
270     }
271     if (type == objFontDesc)
272         n->num = get_fd_objnum(fd);
273     else
274         n->num = pdfnewobjnum();
275     return n->num;
276 }
277 
getNewObjectNumber(Ref ref)278 static int getNewObjectNumber(Ref ref)
279 {
280     InObj *p;
281     if (inObjList == 0) {
282         pdftex_fail("No objects copied yet");
283     } else {
284         for (p = inObjList; p != 0; p = p->next) {
285             if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
286                 return p->num;
287             }
288         }
289         pdftex_fail("Object not yet copied: %i %i", ref.num, ref.gen);
290     }
291 #ifdef _MSC_VER
292     /* Never reached, but without __attribute__((noreturn)) for pdftex_fail()
293        MSVC 5.0 requires an int return value.  */
294     return -60000;
295 #endif
296 }
297 
298 static void copyObject(Object *);
299 
copyName(char * s)300 static void copyName(char *s)
301 {
302     pdf_puts("/");
303     for (; *s != 0; s++) {
304         if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' ||
305             *s == '.' || *s == '-' || *s == '+')
306             pdfout(*s);
307         else
308             pdf_printf("#%.2X", *s & 0xFF);
309     }
310 }
311 
copyDictEntry(Object * obj,int i)312 static void copyDictEntry(Object * obj, int i)
313 {
314     PdfObject obj1;
315     copyName(obj->dictGetKey(i));
316     pdf_puts(" ");
317     obj->dictGetValNF(i, &obj1);
318     copyObject(&obj1);
319     pdf_puts("\n");
320 }
321 
copyDict(Object * obj)322 static void copyDict(Object * obj)
323 {
324     int i, l;
325     if (!obj->isDict())
326         pdftex_fail("PDF inclusion: invalid dict type <%s>",
327                     obj->getTypeName());
328     for (i = 0, l = obj->dictGetLength(); i < l; ++i)
329         copyDictEntry(obj, i);
330 }
331 
copyFontDict(Object * obj,InObj * r)332 static void copyFontDict(Object * obj, InObj * r)
333 {
334     int i, l;
335     char *key;
336     if (!obj->isDict())
337         pdftex_fail("PDF inclusion: invalid dict type <%s>",
338                     obj->getTypeName());
339     pdf_puts("<<\n");
340     assert(r->type == objFont); // FontDescriptor is in fd_tree
341     for (i = 0, l = obj->dictGetLength(); i < l; ++i) {
342         key = obj->dictGetKey(i);
343         if (strncmp("FontDescriptor", key, strlen("FontDescriptor")) == 0
344             || strncmp("BaseFont", key, strlen("BaseFont")) == 0
345             || strncmp("Encoding", key, strlen("Encoding")) == 0)
346             continue;           // skip original values
347         copyDictEntry(obj, i);
348     }
349     // write new FontDescriptor, BaseFont, and Encoding
350     pdf_printf("/FontDescriptor %d 0 R\n", get_fd_objnum(r->fd));
351     pdf_printf("/BaseFont %d 0 R\n", get_fn_objnum(r->fd));
352     pdf_printf("/Encoding %d 0 R\n", r->enc_objnum);
353     pdf_puts(">>");
354 }
355 
copyStream(Stream * str)356 static void copyStream(Stream * str)
357 {
358     int c, c2 = 0;
359     str->reset();
360     while ((c = str->getChar()) != EOF) {
361         pdfout(c);
362         c2 = c;
363     }
364     pdflastbyte = c2;
365 }
366 
copyProcSet(Object * obj)367 static void copyProcSet(Object * obj)
368 {
369     int i, l;
370     PdfObject procset;
371     if (!obj->isArray())
372         pdftex_fail("PDF inclusion: invalid ProcSet array type <%s>",
373                     obj->getTypeName());
374     pdf_puts("/ProcSet [ ");
375     for (i = 0, l = obj->arrayGetLength(); i < l; ++i) {
376         obj->arrayGetNF(i, &procset);
377         if (!procset->isName())
378             pdftex_fail("PDF inclusion: invalid ProcSet entry type <%s>",
379                         procset->getTypeName());
380         copyName(procset->getName());
381         pdf_puts(" ");
382     }
383     pdf_puts("]\n");
384 }
385 
386 #define REPLACE_TYPE1C true
387 
copyFont(char * tag,Object * fontRef)388 static void copyFont(char *tag, Object * fontRef)
389 {
390     PdfObject fontdict, subtype, basefont, fontdescRef, fontdesc, charset,
391         fontfile, ffsubtype, stemV;
392     GfxFont *gfont;
393     fd_entry *fd;
394     fm_entry *fontmap;
395     // Check whether the font has already been embedded before analysing it.
396     InObj *p;
397     Ref ref = fontRef->getRef();
398     for (p = inObjList; p; p = p->next) {
399         if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
400             copyName(tag);
401             pdf_printf(" %d 0 R ", p->num);
402             return;
403         }
404     }
405     // Only handle included Type1 (and Type1C) fonts; anything else will be copied.
406     // Type1C fonts are replaced by Type1 fonts, if REPLACE_TYPE1C is true.
407     if (!fixedinclusioncopyfont && fontRef->fetch(xref, &fontdict)->isDict()
408         && fontdict->dictLookup((char *)"Subtype", &subtype)->isName()
409         && !strcmp(subtype->getName(), "Type1")
410         && fontdict->dictLookup((char *)"BaseFont", &basefont)->isName()
411         && fontdict->dictLookupNF((char *)"FontDescriptor", &fontdescRef)->isRef()
412         && fontdescRef->fetch(xref, &fontdesc)->isDict()
413         && (fontdesc->dictLookup((char *)"FontFile", &fontfile)->isStream()
414             || (REPLACE_TYPE1C
415                 && fontdesc->dictLookup((char *)"FontFile3", &fontfile)->isStream()
416                 && fontfile->streamGetDict()->lookup((char *)"Subtype",
417                                                      &ffsubtype)->isName()
418                 && !strcmp(ffsubtype->getName(), "Type1C")))
419         && (fontmap = lookup_fontmap(basefont->getName())) != NULL) {
420         // copy the value of /StemV
421         fontdesc->dictLookup((char *)"StemV", &stemV);
422         fd = epdf_create_fontdescriptor(fontmap, stemV->getInt());
423         if (fontdesc->dictLookup((char *)"CharSet", &charset) &&
424             charset->isString() && is_subsetable(fontmap))
425             epdf_mark_glyphs(fd, charset->getString()->getCString());
426         else
427             embed_whole_font(fd);
428         addFontDesc(fontdescRef->getRef(), fd);
429         copyName(tag);
430         gfont = GfxFont::makeFont(xref, tag, fontRef->getRef(),
431                                   fontdict->getDict());
432         pdf_printf(" %d 0 R ", addFont(fontRef->getRef(), fd,
433                                        addEncoding(gfont)));
434     } else {
435         copyName(tag);
436         pdf_puts(" ");
437         copyObject(fontRef);
438     }
439 }
440 
copyFontResources(Object * obj)441 static void copyFontResources(Object * obj)
442 {
443     PdfObject fontRef;
444     int i, l;
445     if (!obj->isDict())
446         pdftex_fail("PDF inclusion: invalid font resources dict type <%s>",
447                     obj->getTypeName());
448     pdf_puts("/Font << ");
449     for (i = 0, l = obj->dictGetLength(); i < l; ++i) {
450         obj->dictGetValNF(i, &fontRef);
451         if (fontRef->isRef())
452             copyFont(obj->dictGetKey(i), &fontRef);
453         else if (fontRef->isDict()) {   // some programs generate pdf with embedded font object
454             copyName(obj->dictGetKey(i));
455             pdf_puts(" ");
456             copyObject(&fontRef);
457         }
458         else
459             pdftex_fail("PDF inclusion: invalid font in reference type <%s>",
460                         fontRef->getTypeName());
461     }
462     pdf_puts(">>\n");
463 }
464 
copyOtherResources(Object * obj,char * key)465 static void copyOtherResources(Object * obj, char *key)
466 {
467     // copies all other resources (write_epdf handles Fonts and ProcSets),
468 
469     // if Subtype is present, it must be a name
470     if (strcmp("Subtype", key) == 0) {
471         if (!obj->isName()) {
472             pdftex_warn("PDF inclusion: Subtype in Resources dict is not a name"
473                         " (key '%s', type <%s>); ignored.",
474                         key, obj->getTypeName());
475             return;
476         }
477     } else if (!obj->isDict()) {
478         //FIXME: Write the message only to the log file
479         pdftex_warn("PDF inclusion: invalid other resource which is no dict"
480                     " (key '%s', type <%s>); ignored.",
481                     key, obj->getTypeName());
482         return;
483     }
484     copyName(key);
485     pdf_puts(" ");
486     copyObject(obj);
487 }
488 
489 // Function onverts double to string; very small and very large numbers
490 // are NOT converted to scientific notation.
491 // n must be a number or real conforming to the implementation limits
492 // of PDF as specified in appendix C.1 of the PDF Ref.
493 // These are:
494 // maximum value of ints is +2^32
495 // maximum value of reals is +2^15
496 // smalles values of reals is 1/(2^16)
497 
convertNumToPDF(double n)498 static char *convertNumToPDF(double n)
499 {
500     static const int precision = 6;
501     static const int fact = (int) 1E6;  // must be 10^precision
502     static const double epsilon = 0.5E-6;       // 2epsilon must be 10^-precision
503     static char buf[64];
504     // handle very small values: return 0
505     if (fabs(n) < epsilon) {
506         buf[0] = '0';
507         buf[1] = '\0';
508     } else {
509         char ints[64];
510         int bindex = 0, sindex = 0;
511         int ival, fval;
512         // handle the sign part if n is negative
513         if (n < 0) {
514             buf[bindex++] = '-';
515             n = -n;
516         }
517         n += epsilon;           // for rounding
518         // handle the integer part, simply with sprintf
519         ival = (int) floor(n);
520         n -= ival;
521         sprintf(ints, "%d", ival);
522         while (ints[sindex] != 0)
523             buf[bindex++] = ints[sindex++];
524         // handle the fractional part up to 'precision' digits
525         fval = (int) floor(n * fact);
526         if (fval) {
527             // set a dot
528             buf[bindex++] = '.';
529             sindex = bindex + precision;
530             buf[sindex--] = '\0';
531             // fill up trailing zeros with the string terminator NULL
532             while (((fval % 10) == 0) && (sindex >= bindex)) {
533                 buf[sindex--] = '\0';
534                 fval /= 10;
535             }
536             // fill up the fractional part back to front
537             while (sindex >= bindex) {
538                 buf[sindex--] = (fval % 10) + '0';
539                 fval /= 10;
540             }
541         } else
542             buf[bindex++] = 0;
543     }
544     return (char *) buf;
545 }
546 
copyObject(Object * obj)547 static void copyObject(Object * obj)
548 {
549     PdfObject obj1;
550     int i, l, c;
551     Ref ref;
552     char *p;
553     GString *s;
554     if (obj->isBool()) {
555         pdf_printf("%s", obj->getBool()? "true" : "false");
556     } else if (obj->isInt()) {
557         pdf_printf("%i", obj->getInt());
558     } else if (obj->isReal()) {
559         pdf_printf("%s", convertNumToPDF(obj->getReal()));
560     } else if (obj->isNum()) {
561         pdf_printf("%s", convertNumToPDF(obj->getNum()));
562     } else if (obj->isString()) {
563         s = obj->getString();
564         p = s->getCString();
565         l = s->getLength();
566         if (strlen(p) == (unsigned int) l) {
567             pdf_puts("(");
568             for (; *p != 0; p++) {
569                 c = (unsigned char) *p;
570                 if (c == '(' || c == ')' || c == '\\')
571                     pdf_printf("\\%c", c);
572                 else if (c < 0x20 || c > 0x7F)
573                     pdf_printf("\\%03o", c);
574                 else
575                     pdfout(c);
576             }
577             pdf_puts(")");
578         } else {
579             pdf_puts("<");
580             for (i = 0; i < l; i++) {
581                 c = s->getChar(i) & 0xFF;
582                 pdf_printf("%.2x", c);
583             }
584             pdf_puts(">");
585         }
586     } else if (obj->isName()) {
587         copyName(obj->getName());
588     } else if (obj->isNull()) {
589         pdf_puts("null");
590     } else if (obj->isArray()) {
591         pdf_puts("[");
592         for (i = 0, l = obj->arrayGetLength(); i < l; ++i) {
593             obj->arrayGetNF(i, &obj1);
594             if (!obj1->isName())
595                 pdf_puts(" ");
596             copyObject(&obj1);
597         }
598         pdf_puts("]");
599     } else if (obj->isDict()) {
600         pdf_puts("<<\n");
601         copyDict(obj);
602         pdf_puts(">>");
603     } else if (obj->isStream()) {
604         initDictFromDict(obj1, obj->streamGetDict());
605         pdf_puts("<<\n");
606         copyDict(&obj1);
607         pdf_puts(">>\n");
608         pdf_puts("stream\n");
609         copyStream(obj->getStream()->getUndecodedStream());
610         pdf_puts("\nendstream");
611     } else if (obj->isRef()) {
612         ref = obj->getRef();
613         if (ref.num == 0) {
614             pdftex_fail
615                 ("PDF inclusion: reference to invalid object"
616                  " (is the included pdf broken?)");
617         } else
618             pdf_printf("%d 0 R", addOther(ref));
619     } else {
620         pdftex_fail("PDF inclusion: type <%s> cannot be copied",
621                     obj->getTypeName());
622     }
623 }
624 
writeRefs()625 static void writeRefs()
626 {
627     InObj *r;
628     for (r = inObjList; r != 0; r = r->next) {
629         if (!r->written) {
630             Object obj1;
631             r->written = 1;
632             xref->fetch(r->ref.num, r->ref.gen, &obj1);
633             if (r->type == objFont) {
634                 assert(!obj1.isStream());
635                 pdfbeginobj(r->num, 2);         // \pdfobjcompresslevel = 2 is for this
636                 copyFontDict(&obj1, r);
637                 pdf_puts("\n");
638                 pdfendobj();
639             } else if (r->type != objFontDesc) {        // /FontDescriptor is written via write_fontdescriptor()
640                 if (obj1.isStream())
641                     pdfbeginobj(r->num, 0);
642                 else
643                     pdfbeginobj(r->num, 2);     // \pdfobjcompresslevel = 2 is for this
644                 copyObject(&obj1);
645                 pdf_puts("\n");
646                 pdfendobj();
647             }
648             obj1.free();
649         }
650     }
651 }
652 
writeEncodings()653 static void writeEncodings()
654 {
655     UsedEncoding *r, *n;
656     char *glyphNames[256], *s;
657     int i;
658     for (r = encodingList; r != 0; r = r->next) {
659         for (i = 0; i < 256; i++) {
660             if (r->font->isCIDFont()) {
661                 pdftex_fail
662                     ("PDF inclusion: CID fonts are not supported"
663                      " (try to disable font replacement to fix this)");
664             }
665             if ((s = ((Gfx8BitFont *) r->font)->getCharName(i)) != 0)
666                 glyphNames[i] = s;
667             else
668                 glyphNames[i] = notdef;
669         }
670         epdf_write_enc(glyphNames, r->enc_objnum);
671     }
672     for (r = encodingList; r != 0; r = n) {
673         n = r->next;
674 #ifdef POPPLER_VERSION
675         r->font->decRefCnt();
676 #else
677         delete r->font;
678 #endif
679         delete r;
680     }
681 }
682 
683 // get the pagebox according to the pagebox_spec
get_pagebox(Page * page,int pagebox_spec)684 static PDFRectangle *get_pagebox(Page * page, int pagebox_spec)
685 {
686     if (pagebox_spec == pdfboxspecmedia)
687         return page->getMediaBox();
688     else if (pagebox_spec == pdfboxspeccrop)
689         return page->getCropBox();
690     else if (pagebox_spec == pdfboxspecbleed)
691         return page->getBleedBox();
692     else if (pagebox_spec == pdfboxspectrim)
693         return page->getTrimBox();
694     else if (pagebox_spec == pdfboxspecart)
695         return page->getArtBox();
696     else
697         pdftex_fail("PDF inclusion: unknown value of pagebox spec (%i)",
698                     (int) pagebox_spec);
699     return page->getMediaBox(); // to make the compiler happy
700 }
701 
702 
703 // Reads various information about the PDF and sets it up for later inclusion.
704 // This will fail if the PDF version of the PDF is higher than
705 // minor_pdf_version_wanted or page_name is given and can not be found.
706 // It makes no sense to give page_name _and_ page_num.
707 // Returns the page number.
708 
709 int
read_pdf_info(char * image_name,char * page_name,int page_num,int pagebox_spec,int minor_pdf_version_wanted,int pdf_inclusion_errorlevel)710 read_pdf_info(char *image_name, char *page_name, int page_num,
711               int pagebox_spec, int minor_pdf_version_wanted,
712               int pdf_inclusion_errorlevel)
713 {
714     PdfDocument *pdf_doc;
715     Page *page;
716     PDFRectangle *pagebox;
717 #ifdef POPPLER_VERSION
718     int pdf_major_version_found, pdf_minor_version_found;
719 #else
720     float pdf_version_found, pdf_version_wanted;
721 #endif
722     // initialize
723     if (!isInit) {
724         globalParams = new GlobalParams();
725         globalParams->setErrQuiet(gFalse);
726         isInit = gTrue;
727     }
728     // open PDF file
729     pdf_doc = find_add_document(image_name);
730     epdf_doc = (void *) pdf_doc;
731 
732     // check PDF version
733     // this works only for PDF 1.x -- but since any versions of PDF newer
734     // than 1.x will not be backwards compatible to PDF 1.x, pdfTeX will
735     // then have to changed drastically anyway.
736 #ifdef POPPLER_VERSION
737     pdf_major_version_found = pdf_doc->doc->getPDFMajorVersion();
738     pdf_minor_version_found = pdf_doc->doc->getPDFMinorVersion();
739     if ((pdf_major_version_found > 1)
740      || (pdf_minor_version_found > minor_pdf_version_wanted)) {
741         const char *msg =
742             "PDF inclusion: found PDF version <%d.%d>, but at most version <1.%d> allowed";
743         if (pdf_inclusion_errorlevel > 0) {
744             pdftex_fail(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted);
745         } else if (pdf_inclusion_errorlevel < 0) {
746             ; /* do nothing */
747         } else { /* = 0, give warning */
748             pdftex_warn(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted);
749         }
750     }
751 #else
752     pdf_version_found = pdf_doc->doc->getPDFVersion();
753     pdf_version_wanted = 1 + (minor_pdf_version_wanted * 0.1);
754     if (pdf_version_found > pdf_version_wanted + 0.01) {
755         char msg[] =
756             "PDF inclusion: found PDF version <%.1f>, but at most version <%.1f> allowed";
757         if (pdf_inclusion_errorlevel > 0) {
758             pdftex_fail(msg, pdf_version_found, pdf_version_wanted);
759         } else if (pdf_inclusion_errorlevel < 0) {
760             ; /* do nothing */
761         } else { /* = 0, give warning */
762             pdftex_warn(msg, pdf_version_found, pdf_version_wanted);
763         }
764     }
765 #endif
766     epdf_num_pages = pdf_doc->doc->getCatalog()->getNumPages();
767     if (page_name) {
768         // get page by name
769         GString name(page_name);
770         LinkDest *link = pdf_doc->doc->findDest(&name);
771         if (link == 0 || !link->isOk())
772             pdftex_fail("PDF inclusion: invalid destination <%s>", page_name);
773         Ref ref = link->getPageRef();
774         page_num = pdf_doc->doc->getCatalog()->findPage(ref.num, ref.gen);
775         if (page_num == 0)
776             pdftex_fail("PDF inclusion: destination is not a page <%s>",
777                         page_name);
778         delete link;
779     } else {
780         // get page by number
781         if (page_num <= 0 || page_num > epdf_num_pages)
782             pdftex_fail("PDF inclusion: required page does not exist <%i>",
783                         epdf_num_pages);
784     }
785     // get the required page
786     page = pdf_doc->doc->getCatalog()->getPage(page_num);
787 
788     // get the pagebox (media, crop...) to use.
789     pagebox = get_pagebox(page, pagebox_spec);
790     if (pagebox->x2 > pagebox->x1) {
791         epdf_orig_x = pagebox->x1;
792         epdf_width = pagebox->x2 - pagebox->x1;
793     } else {
794         epdf_orig_x = pagebox->x2;
795         epdf_width = pagebox->x1 - pagebox->x2;
796     }
797     if (pagebox->y2 > pagebox->y1) {
798         epdf_orig_y = pagebox->y1;
799         epdf_height = pagebox->y2 - pagebox->y1;
800     } else {
801         epdf_orig_y = pagebox->y2;
802         epdf_height = pagebox->y1 - pagebox->y2;
803     }
804 
805     // get page rotation
806     epdf_rotate = page->getRotate() % 360;
807     if (epdf_rotate < 0)
808         epdf_rotate += 360;
809 
810     // page group
811     if (page->getGroup() != NULL)
812         epdf_has_page_group = 1;    // only flag that page group is present;
813                                     // the actual object number will be
814                                     // generated in pdftex.web
815     else
816         epdf_has_page_group = 0;    // no page group present
817 
818     pdf_doc->xref = pdf_doc->doc->getXRef();
819     return page_num;
820 }
821 
822 // writes the current epf_doc.
823 // Here the included PDF is copied, so most errors that can happen during PDF
824 // inclusion will arise here.
825 
write_epdf(void)826 void write_epdf(void)
827 {
828     Page *page;
829     Ref *pageRef;
830     Dict *pageDict;
831     PdfObject contents, obj1, obj2, pageObj, dictObj;
832     PdfObject groupDict;
833     bool writeSepGroup = false;
834     Object info;
835     char *key;
836     char s[256];
837     int i, l;
838     int rotate;
839     double scale[6] = { 0, 0, 0, 0, 0, 0 };
840     bool writematrix = false;
841     static const char *pageDictKeys[] = {
842         "LastModified",
843         "Metadata",
844         "PieceInfo",
845         "SeparationInfo",
846 //         "Group",
847 //         "Resources",
848         NULL
849     };
850 
851     PdfDocument *pdf_doc = (PdfDocument *) epdf_doc;
852     (pdf_doc->occurences)--;
853     xref = pdf_doc->xref;
854     inObjList = pdf_doc->inObjList;
855     encodingList = 0;
856     page = pdf_doc->doc->getCatalog()->getPage(epdf_selected_page);
857     pageRef = pdf_doc->doc->getCatalog()->getPageRef(epdf_selected_page);
858     xref->fetch(pageRef->num, pageRef->gen, &pageObj);
859     pageDict = pageObj->getDict();
860     rotate = page->getRotate();
861     PDFRectangle *pagebox;
862     // write the Page header
863     pdf_puts("/Type /XObject\n");
864     pdf_puts("/Subtype /Form\n");
865     pdf_puts("/FormType 1\n");
866 
867     // write additional information
868     pdf_printf("/%s.FileName (%s)\n", pdfkeyprefix,
869                convertStringToPDFString(pdf_doc->file_name,
870                                         strlen(pdf_doc->file_name)));
871     pdf_printf("/%s.PageNumber %i\n", pdfkeyprefix, epdf_selected_page);
872     pdf_doc->doc->getDocInfoNF(&info);
873     if (info.isRef()) {
874         // the info dict must be indirect (PDF Ref p. 61)
875         pdf_printf("/%s.InfoDict ", pdfkeyprefix);
876         pdf_printf("%d 0 R\n", addOther(info.getRef()));
877     }
878     // get the pagebox (media, crop...) to use.
879     pagebox = get_pagebox(page, epdf_page_box);
880 
881     // handle page rotation
882     if (rotate != 0) {
883         if (rotate % 90 == 0) {
884             // this handles only the simple case: multiple of 90s but these
885             // are the only values allowed according to the reference
886             // (v1.3, p. 78).
887             // the image is rotated around its center.
888             // the /Rotate key is clockwise while the matrix is
889             // counterclockwise :-%
890             tex_printf(", page is rotated %d degrees", rotate);
891             switch (rotate) {
892             case 90:
893                 scale[1] = -1;
894                 scale[2] = 1;
895                 scale[4] = pagebox->x1 - pagebox->y1;
896                 scale[5] = pagebox->y1 + pagebox->x2;
897                 writematrix = true;
898                 break;
899             case 180:
900                 scale[0] = scale[3] = -1;
901                 scale[4] = pagebox->x1 + pagebox->x2;
902                 scale[5] = pagebox->y1 + pagebox->y2;
903                 writematrix = true;
904                 break;          // width and height are exchanged
905             case 270:
906                 scale[1] = 1;
907                 scale[2] = -1;
908                 scale[4] = pagebox->x1 + pagebox->y2;
909                 scale[5] = pagebox->y1 - pagebox->x1;
910                 writematrix = true;
911                 break;
912             }
913             if (writematrix) {  // The matrix is only written if the image is rotated.
914                 sprintf(s, "/Matrix [%.8f %.8f %.8f %.8f %.8f %.8f]\n",
915                         scale[0],
916                         scale[1], scale[2], scale[3], scale[4], scale[5]);
917                 pdf_puts(stripzeros(s));
918             }
919         }
920     }
921 
922     sprintf(s, "/BBox [%.8f %.8f %.8f %.8f]\n",
923             pagebox->x1, pagebox->y1, pagebox->x2, pagebox->y2);
924     pdf_puts(stripzeros(s));
925 
926     // Metadata validity check (as a stream it must be indirect)
927     pageDict->lookupNF((char *)"Metadata", &dictObj);
928     if (!dictObj->isNull() && !dictObj->isRef())
929         pdftex_warn("PDF inclusion: /Metadata must be indirect object");
930 
931     // copy selected items in Page dictionary except Resources & Group
932     for (i = 0; pageDictKeys[i] != NULL; i++) {
933         pageDict->lookupNF((char *)pageDictKeys[i], &dictObj);
934         if (!dictObj->isNull()) {
935             pdf_newline();
936             pdf_printf("/%s ", pageDictKeys[i]);
937             copyObject(&dictObj); // preserves indirection
938         }
939     }
940 
941     // handle page group
942     pageDict->lookupNF((char *)"Group", &dictObj);
943     if (!dictObj->isNull()) {
944         if (pdfpagegroupval == 0) {
945             // another pdf with page group was included earlier on the
946             // same page; copy the Group entry as is.  See manual for
947             // info on why this is a warning.
948             if (getpdfsuppresswarningpagegroup() == 0) {
949                 pdftex_warn
950     ("PDF inclusion: multiple pdfs with page group included in a single page");
951             }
952             pdf_newline();
953             pdf_puts("/Group ");
954             copyObject(&dictObj);
955         } else {
956             // write Group dict as a separate object, since the Page dict also refers to it
957             pageDict->lookup((char *) "Group", &dictObj);
958             if (!dictObj->isDict())
959                 pdftex_fail("PDF inclusion: /Group dict missing");
960             writeSepGroup = true;
961             initDictFromDict(groupDict, page->getGroup());
962             pdf_printf("/Group %d 0 R\n", pdfpagegroupval);
963         }
964     }
965 
966     // write the Resources dictionary
967     if (page->getResourceDict() == NULL) {
968         // Resources can be missing (files without them have been spotted
969         // in the wild); in which case the /Resouces of the /Page will be used.
970         // "This practice is not recommended".
971         pdftex_warn
972             ("PDF inclusion: /Resources missing. 'This practice is not recommended' (PDF Ref)");
973     } else {
974         initDictFromDict(obj1, page->getResourceDict());
975         if (!obj1->isDict())
976             pdftex_fail("PDF inclusion: invalid resources dict type <%s>",
977                         obj1->getTypeName());
978         pdf_newline();
979         pdf_puts("/Resources <<\n");
980         for (i = 0, l = obj1->dictGetLength(); i < l; ++i) {
981             obj1->dictGetVal(i, &obj2);
982             key = obj1->dictGetKey(i);
983             if (strcmp("Font", key) == 0)
984                 copyFontResources(&obj2);
985             else if (strcmp("ProcSet", key) == 0)
986                 copyProcSet(&obj2);
987             else
988                 copyOtherResources(&obj2, key);
989         }
990         pdf_puts(">>\n");
991     }
992 
993     // write the page contents
994     page->getContents(&contents);
995     if (contents->isStream()) {
996 
997         // Variant A: get stream and recompress under control
998         // of \pdfcompresslevel
999         //
1000         // pdfbeginstream();
1001         // copyStream(contents->getStream());
1002         // pdfendstream();
1003 
1004         // Variant B: copy stream without recompressing
1005         //
1006         contents->streamGetDict()->lookup((char *)"F", &obj1);
1007         if (!obj1->isNull()) {
1008             pdftex_fail("PDF inclusion: Unsupported external stream");
1009         }
1010         contents->streamGetDict()->lookup((char *)"Length", &obj1);
1011         assert(!obj1->isNull());
1012         pdf_puts("/Length ");
1013         copyObject(&obj1);
1014         pdf_puts("\n");
1015         contents->streamGetDict()->lookup((char *)"Filter", &obj1);
1016         if (!obj1->isNull()) {
1017             pdf_puts("/Filter ");
1018             copyObject(&obj1);
1019             pdf_puts("\n");
1020             contents->streamGetDict()->lookup((char *)"DecodeParms", &obj1);
1021             if (!obj1->isNull()) {
1022                 pdf_puts("/DecodeParms ");
1023                 copyObject(&obj1);
1024                 pdf_puts("\n");
1025             }
1026         }
1027         pdf_puts(">>\nstream\n");
1028         copyStream(contents->getStream()->getUndecodedStream());
1029         pdfendstream();
1030     } else if (contents->isArray()) {
1031         pdfbeginstream();
1032         for (i = 0, l = contents->arrayGetLength(); i < l; ++i) {
1033             Object contentsobj;
1034             copyStream((contents->arrayGet(i, &contentsobj))->getStream());
1035             contentsobj.free();
1036             if (i < l - 1)
1037                 pdf_newline();  // add a newline after each stream except the last
1038         }
1039         pdfendstream();
1040     } else {                    // the contents are optional, but we need to include an empty stream
1041         pdfbeginstream();
1042         pdfendstream();
1043     }
1044 
1045     // write out all indirect objects
1046     writeRefs();
1047 
1048     // write out all used encodings (and delete list)
1049     writeEncodings();
1050 
1051     // write the Group dict if needed
1052     if (writeSepGroup) {
1053         pdfbeginobj(pdfpagegroupval, 2);
1054         copyObject(&groupDict);
1055         pdf_puts("\n");
1056         pdfendobj();
1057         pdfpagegroupval = 0;    // only the 1st included pdf on a page gets its
1058                                 // Group included in the Page dict
1059     }
1060 
1061     // save object list, xref
1062     pdf_doc->inObjList = inObjList;
1063     pdf_doc->xref = xref;
1064 }
1065 
1066 // Called when an image has been written and it's resources in image_tab are
1067 // freed and it's not referenced anymore.
1068 
epdf_delete()1069 void epdf_delete()
1070 {
1071     PdfDocument *pdf_doc = (PdfDocument *) epdf_doc;
1072     xref = pdf_doc->xref;
1073     if (pdf_doc->occurences < 0) {
1074         delete_document(pdf_doc);
1075     }
1076 }
1077 
1078 // Called when PDF embedding system is finalized.
1079 // Now deallocate all remaining PdfDocuments.
1080 
epdf_check_mem()1081 void epdf_check_mem()
1082 {
1083     if (isInit) {
1084         PdfDocument *p, *n;
1085         for (p = pdfDocuments; p; p = n) {
1086             n = p->next;
1087             delete_document(p);
1088         }
1089         // see above for globalParams
1090         delete globalParams;
1091     }
1092 }
1093