1 //========================================================================
2 //
3 // pdfinfo.cc
4 //
5 // Copyright 1998-2003 Glyph & Cog, LLC
6 // Copyright 2013 Igalia S.L.
7 //
8 //========================================================================
9 
10 //========================================================================
11 //
12 // Modified under the Poppler project - http://poppler.freedesktop.org
13 //
14 // All changes made under the Poppler project to this file are licensed
15 // under GPL version 2 or later
16 //
17 // Copyright (C) 2006 Dom Lachowicz <cinamod@hotmail.com>
18 // Copyright (C) 2007-2010, 2012, 2016-2021 Albert Astals Cid <aacid@kde.org>
19 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
20 // Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com>
21 // Copyright (C) 2012, 2013, 2016-2018, 2021 Adrian Johnson <ajohnson@redneon.com>
22 // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
23 // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
24 // Copyright (C) 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
25 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
26 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
27 // Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
28 // Copyright (C) 2019 Christian Persch <chpe@src.gnome.org>
29 // Copyright (C) 2019-2021 Oliver Sander <oliver.sander@tu-dresden.de>
30 // Copyright (C) 2019 Thomas Fischer <fischer@unix-ag.uni-kl.de>
31 //
32 // To see a description of the changes please see the Changelog file that
33 // came with your tarball or type make ChangeLog if you are building from git
34 //
35 //========================================================================
36 
37 #include "config.h"
38 #include <poppler-config.h>
39 #include <cstdio>
40 #include <cstdlib>
41 #include <cstddef>
42 #include <cstring>
43 #include <ctime>
44 #include <cmath>
45 #include <map>
46 #include <set>
47 #include "parseargs.h"
48 #include "printencodings.h"
49 #include "goo/GooString.h"
50 #include "goo/gfile.h"
51 #include "goo/glibc.h"
52 #include "goo/gmem.h"
53 #include "GlobalParams.h"
54 #include "Object.h"
55 #include "Stream.h"
56 #include "Array.h"
57 #include "Dict.h"
58 #include "XRef.h"
59 #include "Catalog.h"
60 #include "Page.h"
61 #include "PDFDoc.h"
62 #include "PDFDocFactory.h"
63 #include "CharTypes.h"
64 #include "UnicodeMap.h"
65 #include "UTF.h"
66 #include "Error.h"
67 #include "DateInfo.h"
68 #include "JSInfo.h"
69 #include "StructTreeRoot.h"
70 #include "StructElement.h"
71 #include "Win32Console.h"
72 
73 static int firstPage = 1;
74 static int lastPage = 0;
75 static bool printBoxes = false;
76 static bool printMetadata = false;
77 static bool printCustom = false;
78 static bool printJS = false;
79 static bool isoDates = false;
80 static bool rawDates = false;
81 static char textEncName[128] = "";
82 static char ownerPassword[33] = "\001";
83 static char userPassword[33] = "\001";
84 static bool printVersion = false;
85 static bool printHelp = false;
86 static bool printEnc = false;
87 static bool printStructure = false;
88 static bool printStructureText = false;
89 static bool printDests = false;
90 static bool printUrls = false;
91 
92 static const ArgDesc argDesc[] = { { "-f", argInt, &firstPage, 0, "first page to convert" },
93                                    { "-l", argInt, &lastPage, 0, "last page to convert" },
94                                    { "-box", argFlag, &printBoxes, 0, "print the page bounding boxes" },
95                                    { "-meta", argFlag, &printMetadata, 0, "print the document metadata (XML)" },
96                                    { "-custom", argFlag, &printCustom, 0, "print both custom and standard metadata" },
97                                    { "-js", argFlag, &printJS, 0, "print all JavaScript in the PDF" },
98                                    { "-struct", argFlag, &printStructure, 0, "print the logical document structure (for tagged files)" },
99                                    { "-struct-text", argFlag, &printStructureText, 0, "print text contents along with document structure (for tagged files)" },
100                                    { "-isodates", argFlag, &isoDates, 0, "print the dates in ISO-8601 format" },
101                                    { "-rawdates", argFlag, &rawDates, 0, "print the undecoded date strings directly from the PDF file" },
102                                    { "-dests", argFlag, &printDests, 0, "print all named destinations in the PDF" },
103                                    { "-url", argFlag, &printUrls, 0, "print all URLs inside PDF objects (does not scan text content)" },
104                                    { "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" },
105                                    { "-listenc", argFlag, &printEnc, 0, "list available encodings" },
106                                    { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
107                                    { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
108                                    { "-v", argFlag, &printVersion, 0, "print copyright and version info" },
109                                    { "-h", argFlag, &printHelp, 0, "print usage information" },
110                                    { "-help", argFlag, &printHelp, 0, "print usage information" },
111                                    { "--help", argFlag, &printHelp, 0, "print usage information" },
112                                    { "-?", argFlag, &printHelp, 0, "print usage information" },
113                                    {} };
114 
printTextString(const GooString * s,const UnicodeMap * uMap)115 static void printTextString(const GooString *s, const UnicodeMap *uMap)
116 {
117     Unicode *u;
118     char buf[8];
119     int len = TextStringToUCS4(s->toStr(), &u);
120     for (int i = 0; i < len; i++) {
121         int n = uMap->mapUnicode(u[i], buf, sizeof(buf));
122         fwrite(buf, 1, n, stdout);
123     }
124     gfree(u);
125 }
126 
printUCS4String(const Unicode * u,int len,const UnicodeMap * uMap)127 static void printUCS4String(const Unicode *u, int len, const UnicodeMap *uMap)
128 {
129     char buf[8];
130     for (int i = 0; i < len; i++) {
131         int n = uMap->mapUnicode(u[i], buf, sizeof(buf));
132         fwrite(buf, 1, n, stdout);
133     }
134 }
135 
printInfoString(Dict * infoDict,const char * key,const char * text,const UnicodeMap * uMap)136 static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
137 {
138     const GooString *s1;
139 
140     Object obj = infoDict->lookup(key);
141     if (obj.isString()) {
142         fputs(text, stdout);
143         s1 = obj.getString();
144         printTextString(s1, uMap);
145         fputc('\n', stdout);
146     }
147 }
148 
printInfoDate(Dict * infoDict,const char * key,const char * text,const UnicodeMap * uMap)149 static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
150 {
151     int year, mon, day, hour, min, sec, tz_hour, tz_minute;
152     char tz;
153     struct tm tmStruct;
154     time_t time;
155     char buf[256];
156 
157     Object obj = infoDict->lookup(key);
158     if (obj.isString()) {
159         fputs(text, stdout);
160         const GooString *s = obj.getString();
161         // TODO do something with the timezone info
162         if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
163             tmStruct.tm_year = year - 1900;
164             tmStruct.tm_mon = mon - 1;
165             tmStruct.tm_mday = day;
166             tmStruct.tm_hour = hour;
167             tmStruct.tm_min = min;
168             tmStruct.tm_sec = sec;
169             tmStruct.tm_wday = -1;
170             tmStruct.tm_yday = -1;
171             tmStruct.tm_isdst = -1;
172             // compute the tm_wday and tm_yday fields
173             time = timegm(&tmStruct);
174             if (time != (time_t)-1) {
175                 int offset = (tz_hour * 60 + tz_minute) * 60;
176                 if (tz == '-')
177                     offset *= -1;
178                 time -= offset;
179                 localtime_r(&time, &tmStruct);
180                 strftime(buf, sizeof(buf), "%c %Z", &tmStruct);
181                 fputs(buf, stdout);
182             } else {
183                 printTextString(s, uMap);
184             }
185         } else {
186             printTextString(s, uMap);
187         }
188         fputc('\n', stdout);
189     }
190 }
191 
printISODate(Dict * infoDict,const char * key,const char * text,const UnicodeMap * uMap)192 static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
193 {
194     int year, mon, day, hour, min, sec, tz_hour, tz_minute;
195     char tz;
196 
197     Object obj = infoDict->lookup(key);
198     if (obj.isString()) {
199         fputs(text, stdout);
200         const GooString *s = obj.getString();
201         if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
202             fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
203             if (tz_hour == 0 && tz_minute == 0) {
204                 fprintf(stdout, "Z");
205             } else {
206                 fprintf(stdout, "%c%02d", tz, tz_hour);
207                 if (tz_minute)
208                     fprintf(stdout, ":%02d", tz_minute);
209             }
210         } else {
211             printTextString(obj.getString(), uMap);
212         }
213         fputc('\n', stdout);
214     }
215 }
216 
printBox(const char * text,const PDFRectangle * box)217 static void printBox(const char *text, const PDFRectangle *box)
218 {
219     printf("%s%8.2f %8.2f %8.2f %8.2f\n", text, box->x1, box->y1, box->x2, box->y2);
220 }
221 
printIndent(unsigned indent)222 static void printIndent(unsigned indent)
223 {
224     while (indent--) {
225         putchar(' ');
226         putchar(' ');
227     }
228 }
229 
printAttribute(const Attribute * attribute,unsigned indent)230 static void printAttribute(const Attribute *attribute, unsigned indent)
231 {
232     printIndent(indent);
233     printf(" /%s ", attribute->getTypeName());
234     if (attribute->getType() == Attribute::UserProperty) {
235         std::unique_ptr<GooString> name = attribute->getName();
236         printf("(%s) ", name->c_str());
237     }
238     attribute->getValue()->print(stdout);
239     if (attribute->getFormattedValue()) {
240         printf(" \"%s\"", attribute->getFormattedValue());
241     }
242     if (attribute->isHidden()) {
243         printf(" [hidden]");
244     }
245 }
246 
printStruct(const StructElement * element,unsigned indent)247 static void printStruct(const StructElement *element, unsigned indent)
248 {
249     if (element->isObjectRef()) {
250         printIndent(indent);
251         printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen);
252         return;
253     }
254 
255     if (printStructureText && element->isContent()) {
256         GooString *text = element->getText(false);
257         printIndent(indent);
258         if (text) {
259             printf("\"%s\"\n", text->c_str());
260         } else {
261             printf("(No content?)\n");
262         }
263         delete text;
264     }
265 
266     if (!element->isContent()) {
267         printIndent(indent);
268         printf("%s", element->getTypeName());
269         if (element->getID()) {
270             printf(" <%s>", element->getID()->c_str());
271         }
272         if (element->getTitle()) {
273             printf(" \"%s\"", element->getTitle()->c_str());
274         }
275         if (element->getRevision() > 0) {
276             printf(" r%u", element->getRevision());
277         }
278         if (element->isInline() || element->isBlock()) {
279             printf(" (%s)", element->isInline() ? "inline" : "block");
280         }
281         if (element->getNumAttributes()) {
282             putchar(':');
283             for (unsigned i = 0; i < element->getNumAttributes(); i++) {
284                 putchar('\n');
285                 printAttribute(element->getAttribute(i), indent + 1);
286             }
287         }
288 
289         putchar('\n');
290         for (unsigned i = 0; i < element->getNumChildren(); i++) {
291             printStruct(element->getChild(i), indent + 1);
292         }
293     }
294 }
295 
296 struct GooStringCompare
297 {
operator ()GooStringCompare298     bool operator()(GooString *lhs, GooString *rhs) const { return lhs->cmp(const_cast<GooString *>(rhs)) < 0; }
299 };
300 
printLinkDest(const std::unique_ptr<LinkDest> & dest)301 static void printLinkDest(const std::unique_ptr<LinkDest> &dest)
302 {
303     GooString s;
304 
305     switch (dest->getKind()) {
306     case destXYZ:
307         s.append("[ XYZ ");
308         if (dest->getChangeLeft()) {
309             s.appendf("{0:4.0g} ", dest->getLeft());
310         } else {
311             s.append("null ");
312         }
313         if (dest->getChangeTop()) {
314             s.appendf("{0:4.0g} ", dest->getTop());
315         } else {
316             s.append("null ");
317         }
318         if (dest->getChangeZoom()) {
319             s.appendf("{0:4.2f} ", dest->getZoom());
320         } else {
321             s.append("null ");
322         }
323         break;
324     case destFit:
325         s.append("[ Fit ");
326         break;
327     case destFitH:
328         if (dest->getChangeTop()) {
329             s.appendf("[ FitH {0:4.0g} ", dest->getTop());
330         } else {
331             s.append("[ FitH null ");
332         }
333         break;
334     case destFitV:
335         if (dest->getChangeLeft()) {
336             s.appendf("[ FitV {0:4.0g} ", dest->getLeft());
337         } else {
338             s.append("[ FitV null ");
339         }
340         break;
341     case destFitR:
342         s.appendf("[ FitR {0:4.0g} {1:4.0g} {2:4.0g} {3:4.0g} ", dest->getLeft(), dest->getBottom(), dest->getRight(), dest->getTop());
343         break;
344     case destFitB:
345         s.append("[ FitB ");
346         break;
347     case destFitBH:
348         if (dest->getChangeTop()) {
349             s.appendf("[ FitBH {0:4.0g} ", dest->getTop());
350         } else {
351             s.append("[ FitBH null ");
352         }
353         break;
354     case destFitBV:
355         if (dest->getChangeLeft()) {
356             s.appendf("[ FitBV {0:4.0g} ", dest->getLeft());
357         } else {
358             s.append("[ FitBV null ");
359         }
360         break;
361     }
362 
363     s.append("                                ");
364     s.setChar(26, ']');
365     s.setChar(27, '\0');
366     printf("%s", s.c_str());
367 }
368 
printDestinations(PDFDoc * doc,const UnicodeMap * uMap)369 static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap)
370 {
371     std::map<Ref, std::map<GooString *, std::unique_ptr<LinkDest>, GooStringCompare>> map;
372 
373     int numDests = doc->getCatalog()->numDestNameTree();
374     for (int i = 0; i < numDests; i++) {
375         GooString *name = new GooString(doc->getCatalog()->getDestNameTreeName(i));
376         std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestNameTreeDest(i);
377         if (dest && dest->isPageRef()) {
378             Ref pageRef = dest->getPageRef();
379             map[pageRef].insert(std::make_pair(name, std::move(dest)));
380         } else {
381             delete name;
382         }
383     }
384 
385     numDests = doc->getCatalog()->numDests();
386     for (int i = 0; i < numDests; i++) {
387         GooString *name = new GooString(doc->getCatalog()->getDestsName(i));
388         std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestsDest(i);
389         if (dest && dest->isPageRef()) {
390             Ref pageRef = dest->getPageRef();
391             map[pageRef].insert(std::make_pair(name, std::move(dest)));
392         } else {
393             delete name;
394         }
395     }
396 
397     printf("Page  Destination                 Name\n");
398     for (int i = firstPage; i <= lastPage; i++) {
399         Ref *ref = doc->getCatalog()->getPageRef(i);
400         if (ref) {
401             auto pageDests = map.find(*ref);
402             if (pageDests != map.end()) {
403                 for (auto &it : pageDests->second) {
404                     printf("%4d ", i);
405                     printLinkDest(it.second);
406                     printf(" \"");
407                     printTextString(it.first, uMap);
408                     printf("\"\n");
409                     delete it.first;
410                 }
411             }
412         }
413     }
414 }
415 
printUrlList(PDFDoc * doc)416 static void printUrlList(PDFDoc *doc)
417 {
418     printf("Page  Type          URL\n");
419     for (int pg = firstPage; pg <= lastPage; pg++) {
420         Page *page = doc->getPage(pg);
421         if (page) {
422             std::unique_ptr<Links> links = page->getLinks();
423             for (int i = 0; i < links->getNumLinks(); i++) {
424                 AnnotLink *annot = links->getLink(i);
425                 LinkAction *action = annot->getAction();
426                 if (action->getKind() == actionURI) {
427                     LinkURI *linkUri = dynamic_cast<LinkURI *>(action);
428                     std::string uri = linkUri->getURI();
429                     printf("%4d  Annotation    %s\n", pg, uri.c_str());
430                 }
431             }
432         }
433     }
434 }
435 
printPdfSubtype(PDFDoc * doc,const UnicodeMap * uMap)436 static void printPdfSubtype(PDFDoc *doc, const UnicodeMap *uMap)
437 {
438     const Object info = doc->getDocInfo();
439     if (info.isDict()) {
440         const PDFSubtype pdftype = doc->getPDFSubtype();
441 
442         if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) {
443             return;
444         }
445 
446         std::unique_ptr<GooString> part;
447         std::unique_ptr<GooString> abbr;
448         std::unique_ptr<GooString> standard;
449         std::unique_ptr<GooString> typeExp;
450         std::unique_ptr<GooString> confExp;
451 
452         // Form title from PDFSubtype
453         switch (pdftype) {
454         case subtypePDFA:
455             printInfoString(info.getDict(), "GTS_PDFA1Version", "PDF subtype:    ", uMap);
456             typeExp = std::make_unique<GooString>("ISO 19005 - Electronic document file format for long-term preservation (PDF/A)");
457             standard = std::make_unique<GooString>("ISO 19005");
458             abbr = std::make_unique<GooString>("PDF/A");
459             break;
460         case subtypePDFE:
461             printInfoString(info.getDict(), "GTS_PDFEVersion", "PDF subtype:    ", uMap);
462             typeExp = std::make_unique<GooString>("ISO 24517 - Engineering document format using PDF (PDF/E)");
463             standard = std::make_unique<GooString>("ISO 24517");
464             abbr = std::make_unique<GooString>("PDF/E");
465             break;
466         case subtypePDFUA:
467             printInfoString(info.getDict(), "GTS_PDFUAVersion", "PDF subtype:    ", uMap);
468             typeExp = std::make_unique<GooString>("ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)");
469             standard = std::make_unique<GooString>("ISO 14289");
470             abbr = std::make_unique<GooString>("PDF/UA");
471             break;
472         case subtypePDFVT:
473             printInfoString(info.getDict(), "GTS_PDFVTVersion", "PDF subtype:    ", uMap);
474             typeExp = std::make_unique<GooString>("ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)");
475             standard = std::make_unique<GooString>("ISO 16612");
476             abbr = std::make_unique<GooString>("PDF/VT");
477             break;
478         case subtypePDFX:
479             printInfoString(info.getDict(), "GTS_PDFXVersion", "PDF subtype:    ", uMap);
480             typeExp = std::make_unique<GooString>("ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)");
481             standard = std::make_unique<GooString>("ISO 15930");
482             abbr = std::make_unique<GooString>("PDF/X");
483             break;
484         case subtypeNone:
485         case subtypeNull:
486         default:
487             return;
488         }
489 
490         // Form the abbreviation from PDFSubtypePart and PDFSubtype
491         const PDFSubtypePart subpart = doc->getPDFSubtypePart();
492         switch (pdftype) {
493         case subtypePDFX:
494             switch (subpart) {
495             case subtypePart1:
496                 abbr->append("-1:2001");
497                 break;
498             case subtypePart2:
499                 abbr->append("-2");
500                 break;
501             case subtypePart3:
502                 abbr->append("-3:2002");
503                 break;
504             case subtypePart4:
505                 abbr->append("-1:2003");
506                 break;
507             case subtypePart5:
508                 abbr->append("-2");
509                 break;
510             case subtypePart6:
511                 abbr->append("-3:2003");
512                 break;
513             case subtypePart7:
514                 abbr->append("-4");
515                 break;
516             case subtypePart8:
517                 abbr->append("-5");
518                 break;
519             default:
520                 break;
521             }
522             break;
523         case subtypeNone:
524         case subtypeNull:
525             break;
526         default:
527             abbr->appendf("-{0:d}", subpart);
528             break;
529         }
530 
531         // Form standard from PDFSubtypePart
532         switch (subpart) {
533         case subtypePartNone:
534         case subtypePartNull:
535             break;
536         default:
537             standard->appendf("-{0:d}", subpart);
538             break;
539         }
540 
541         // Form the subtitle from PDFSubtypePart and PDFSubtype
542         switch (pdftype) {
543         case subtypePDFA:
544             switch (subpart) {
545             case subtypePart1:
546                 part = std::make_unique<GooString>("Use of PDF 1.4");
547                 break;
548             case subtypePart2:
549                 part = std::make_unique<GooString>("Use of ISO 32000-1");
550                 break;
551             case subtypePart3:
552                 part = std::make_unique<GooString>("Use of ISO 32000-1 with support for embedded files");
553                 break;
554             default:
555                 break;
556             }
557             break;
558         case subtypePDFE:
559             switch (subpart) {
560             case subtypePart1:
561                 part = std::make_unique<GooString>("Use of PDF 1.6");
562                 break;
563             default:
564                 break;
565             }
566             break;
567         case subtypePDFUA:
568             switch (subpart) {
569             case subtypePart1:
570                 part = std::make_unique<GooString>("Use of ISO 32000-1");
571                 break;
572             case subtypePart2:
573                 part = std::make_unique<GooString>("Use of ISO 32000-2");
574                 break;
575             case subtypePart3:
576                 part = std::make_unique<GooString>("Use of ISO 32000-1 with support for embedded files");
577                 break;
578             default:
579                 break;
580             }
581             break;
582         case subtypePDFVT:
583             switch (subpart) {
584             case subtypePart1:
585                 part = std::make_unique<GooString>("Using PPML 2.1 and PDF 1.4");
586                 break;
587             case subtypePart2:
588                 part = std::make_unique<GooString>("Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)");
589                 break;
590             case subtypePart3:
591                 part = std::make_unique<GooString>("Using PDF/X-6 (PDF/VT-3)");
592                 break;
593             default:
594                 break;
595             }
596             break;
597         case subtypePDFX:
598             switch (subpart) {
599             case subtypePart1:
600                 part = std::make_unique<GooString>("Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)");
601                 break;
602             case subtypePart3:
603                 part = std::make_unique<GooString>("Complete exchange suitable for colour-managed workflows (PDF/X-3)");
604                 break;
605             case subtypePart4:
606                 part = std::make_unique<GooString>("Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)");
607                 break;
608             case subtypePart5:
609                 part = std::make_unique<GooString>("Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]");
610                 break;
611             case subtypePart6:
612                 part = std::make_unique<GooString>("Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)");
613                 break;
614             case subtypePart7:
615                 part = std::make_unique<GooString>("Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6");
616                 break;
617             case subtypePart8:
618                 part = std::make_unique<GooString>("Partial exchange of printing data using PDF 1.6 (PDF/X-5)");
619                 break;
620             default:
621                 break;
622             }
623             break;
624         default:
625             break;
626         }
627 
628         // Form Conformance explanation from PDFSubtypeConformance
629         switch (doc->getPDFSubtypeConformance()) {
630         case subtypeConfA:
631             confExp = std::make_unique<GooString>("Level A, Accessible");
632             break;
633         case subtypeConfB:
634             confExp = std::make_unique<GooString>("Level B, Basic");
635             break;
636         case subtypeConfG:
637             confExp = std::make_unique<GooString>("Level G, External graphical content");
638             break;
639         case subtypeConfN:
640             confExp = std::make_unique<GooString>("Level N, External ICC profile");
641             break;
642         case subtypeConfP:
643             confExp = std::make_unique<GooString>("Level P, Embedded ICC profile");
644             break;
645         case subtypeConfPG:
646             confExp = std::make_unique<GooString>("Level PG, Embedded ICC profile and external graphical content");
647             break;
648         case subtypeConfU:
649             confExp = std::make_unique<GooString>("Level U, Unicode support");
650             break;
651         case subtypeConfNone:
652         case subtypeConfNull:
653         default:
654             confExp.reset();
655             break;
656         }
657 
658         printf("    Title:         %s\n", typeExp->c_str());
659         printf("    Abbreviation:  %s\n", abbr->c_str());
660         if (part.get())
661             printf("    Subtitle:      Part %d: %s\n", subpart, part->c_str());
662         else
663             printf("    Subtitle:      Part %d\n", subpart);
664         printf("    Standard:      %s-%d\n", typeExp->toStr().substr(0, 9).c_str(), subpart);
665         if (confExp.get())
666             printf("    Conformance:   %s\n", confExp->c_str());
667     }
668 }
669 
printCustomInfo(PDFDoc * doc,const UnicodeMap * uMap)670 static void printCustomInfo(PDFDoc *doc, const UnicodeMap *uMap)
671 {
672     Object info = doc->getDocInfo();
673     if (info.isDict()) {
674         Dict *dict = info.getDict();
675 
676         // Sort keys
677         std::set<std::string> keys;
678         for (int i = 0; i < dict->getLength(); i++) {
679             std::string key(dict->getKey(i));
680             if (key != "Trapped") {
681                 keys.insert(key);
682             }
683         }
684 
685         for (const std::string &key : keys) {
686             if (key == "CreationDate") {
687                 if (isoDates) {
688                     printISODate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
689                 } else if (rawDates) {
690                     printInfoString(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
691                 } else {
692                     printInfoDate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
693                 }
694             } else if (key == "ModDate") {
695                 if (isoDates) {
696                     printISODate(info.getDict(), "ModDate", "ModDate:         ", uMap);
697                 } else if (rawDates) {
698                     printInfoString(info.getDict(), "ModDate", "ModDate:         ", uMap);
699                 } else {
700                     printInfoDate(info.getDict(), "ModDate", "ModDate:         ", uMap);
701                 }
702             } else {
703                 Object obj = dict->lookup(key.c_str());
704                 if (obj.isString()) {
705                     // print key
706                     Unicode *u;
707                     int len = utf8ToUCS4(key.c_str(), &u);
708                     printUCS4String(u, len, uMap);
709                     fputs(":", stdout);
710                     while (len < 16) {
711                         fputs(" ", stdout);
712                         len++;
713                     }
714                     gfree(u);
715 
716                     // print value
717                     GooString val_str(obj.getString());
718                     printTextString(&val_str, uMap);
719                     fputc('\n', stdout);
720                 }
721             }
722         }
723     }
724 }
725 
printInfo(PDFDoc * doc,const UnicodeMap * uMap,long long filesize,bool multiPage)726 static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, bool multiPage)
727 {
728     Page *page;
729     char buf[256];
730     double w, h, wISO, hISO, isoThreshold;
731     int pg, i;
732     int r;
733 
734     // print doc info
735     Object info = doc->getDocInfo();
736     if (info.isDict()) {
737         printInfoString(info.getDict(), "Title", "Title:           ", uMap);
738         printInfoString(info.getDict(), "Subject", "Subject:         ", uMap);
739         printInfoString(info.getDict(), "Keywords", "Keywords:        ", uMap);
740         printInfoString(info.getDict(), "Author", "Author:          ", uMap);
741         printInfoString(info.getDict(), "Creator", "Creator:         ", uMap);
742         printInfoString(info.getDict(), "Producer", "Producer:        ", uMap);
743         if (isoDates) {
744             printISODate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
745             printISODate(info.getDict(), "ModDate", "ModDate:         ", uMap);
746         } else if (rawDates) {
747             printInfoString(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
748             printInfoString(info.getDict(), "ModDate", "ModDate:         ", uMap);
749         } else {
750             printInfoDate(info.getDict(), "CreationDate", "CreationDate:    ", uMap);
751             printInfoDate(info.getDict(), "ModDate", "ModDate:         ", uMap);
752         }
753     }
754 
755     bool hasMetadata = false;
756     std::unique_ptr<GooString> metadata = doc->readMetadata();
757     if (metadata) {
758         hasMetadata = true;
759     }
760 
761     const std::set<std::string> docInfoStandardKeys { "Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped" };
762 
763     bool hasCustom = false;
764     if (info.isDict()) {
765         Dict *dict = info.getDict();
766         for (i = 0; i < dict->getLength(); i++) {
767             std::string key(dict->getKey(i));
768             if (docInfoStandardKeys.find(key) == docInfoStandardKeys.end()) {
769                 hasCustom = true;
770                 break;
771             }
772         }
773     }
774 
775     // print metadata info
776     printf("Custom Metadata: %s\n", hasCustom ? "yes" : "no");
777     printf("Metadata Stream: %s\n", hasMetadata ? "yes" : "no");
778 
779     // print tagging info
780     printf("Tagged:          %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no");
781     printf("UserProperties:  %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no");
782     printf("Suspects:        %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no");
783 
784     // print form info
785     switch (doc->getCatalog()->getFormType()) {
786     case Catalog::NoForm:
787         printf("Form:            none\n");
788         break;
789     case Catalog::AcroForm:
790         printf("Form:            AcroForm\n");
791         break;
792     case Catalog::XfaForm:
793         printf("Form:            XFA\n");
794         break;
795     }
796 
797     // print javascript info
798     {
799         JSInfo jsInfo(doc, firstPage - 1);
800         jsInfo.scanJS(lastPage - firstPage + 1);
801         printf("JavaScript:      %s\n", jsInfo.containsJS() ? "yes" : "no");
802     }
803 
804     // print page count
805     printf("Pages:           %d\n", doc->getNumPages());
806 
807     // print encryption info
808     printf("Encrypted:       ");
809     if (doc->isEncrypted()) {
810         unsigned char *fileKey;
811         CryptAlgorithm encAlgorithm;
812         int keyLength;
813         doc->getXRef()->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
814 
815         const char *encAlgorithmName = "unknown";
816         switch (encAlgorithm) {
817         case cryptRC4:
818             encAlgorithmName = "RC4";
819             break;
820         case cryptAES:
821             encAlgorithmName = "AES";
822             break;
823         case cryptAES256:
824             encAlgorithmName = "AES-256";
825             break;
826         case cryptNone:
827             break;
828         }
829 
830         printf("yes (print:%s copy:%s change:%s addNotes:%s algorithm:%s)\n", doc->okToPrint(true) ? "yes" : "no", doc->okToCopy(true) ? "yes" : "no", doc->okToChange(true) ? "yes" : "no", doc->okToAddNotes(true) ? "yes" : "no",
831                encAlgorithmName);
832     } else {
833         printf("no\n");
834     }
835 
836     // print page size
837     for (pg = firstPage; pg <= lastPage; ++pg) {
838         w = doc->getPageCropWidth(pg);
839         h = doc->getPageCropHeight(pg);
840         if (multiPage) {
841             printf("Page %4d size:  %g x %g pts", pg, w, h);
842         } else {
843             printf("Page size:       %g x %g pts", w, h);
844         }
845         if ((fabs(w - 612) < 1 && fabs(h - 792) < 1) || (fabs(w - 792) < 1 && fabs(h - 612) < 1)) {
846             printf(" (letter)");
847         } else {
848             hISO = sqrt(sqrt(2.0)) * 7200 / 2.54;
849             wISO = hISO / sqrt(2.0);
850             isoThreshold = hISO * 0.003; ///< allow for 0.3% error when guessing conformance to ISO 216, A series
851             for (i = 0; i <= 6; ++i) {
852                 if ((fabs(w - wISO) < isoThreshold && fabs(h - hISO) < isoThreshold) || (fabs(w - hISO) < isoThreshold && fabs(h - wISO) < isoThreshold)) {
853                     printf(" (A%d)", i);
854                     break;
855                 }
856                 hISO = wISO;
857                 wISO /= sqrt(2.0);
858                 isoThreshold /= sqrt(2.0);
859             }
860         }
861         printf("\n");
862         r = doc->getPageRotate(pg);
863         if (multiPage) {
864             printf("Page %4d rot:   %d\n", pg, r);
865         } else {
866             printf("Page rot:        %d\n", r);
867         }
868     }
869 
870     // print the boxes
871     if (printBoxes) {
872         if (multiPage) {
873             for (pg = firstPage; pg <= lastPage; ++pg) {
874                 page = doc->getPage(pg);
875                 if (!page) {
876                     error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", pg);
877                     continue;
878                 }
879                 sprintf(buf, "Page %4d MediaBox:  ", pg);
880                 printBox(buf, page->getMediaBox());
881                 sprintf(buf, "Page %4d CropBox:   ", pg);
882                 printBox(buf, page->getCropBox());
883                 sprintf(buf, "Page %4d BleedBox:  ", pg);
884                 printBox(buf, page->getBleedBox());
885                 sprintf(buf, "Page %4d TrimBox:   ", pg);
886                 printBox(buf, page->getTrimBox());
887                 sprintf(buf, "Page %4d ArtBox:    ", pg);
888                 printBox(buf, page->getArtBox());
889             }
890         } else {
891             page = doc->getPage(firstPage);
892             if (!page) {
893                 error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", firstPage);
894             } else {
895                 printBox("MediaBox:        ", page->getMediaBox());
896                 printBox("CropBox:         ", page->getCropBox());
897                 printBox("BleedBox:        ", page->getBleedBox());
898                 printBox("TrimBox:         ", page->getTrimBox());
899                 printBox("ArtBox:          ", page->getArtBox());
900             }
901         }
902     }
903 
904     // print file size
905     printf("File size:       %lld bytes\n", filesize);
906 
907     // print linearization info
908     printf("Optimized:       %s\n", doc->isLinearized() ? "yes" : "no");
909 
910     // print PDF version
911     printf("PDF version:     %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
912 
913     printPdfSubtype(doc, uMap);
914 }
915 
main(int argc,char * argv[])916 int main(int argc, char *argv[])
917 {
918     std::unique_ptr<PDFDoc> doc;
919     GooString *fileName;
920     GooString *ownerPW, *userPW;
921     const UnicodeMap *uMap;
922     FILE *f;
923     bool ok;
924     int exitCode;
925     bool multiPage;
926 
927     exitCode = 99;
928 
929     // parse args
930     Win32Console win32console(&argc, &argv);
931     ok = parseArgs(argDesc, &argc, argv);
932     if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
933         fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION);
934         fprintf(stderr, "%s\n", popplerCopyright);
935         fprintf(stderr, "%s\n", xpdfCopyright);
936         if (!printVersion) {
937             printUsage("pdfinfo", "<PDF-file>", argDesc);
938         }
939         if (printVersion || printHelp)
940             exitCode = 0;
941         goto err0;
942     }
943 
944     if (printStructureText)
945         printStructure = true;
946 
947     // read config file
948     globalParams = std::make_unique<GlobalParams>();
949 
950     if (printEnc) {
951         printEncodings();
952         exitCode = 0;
953         goto err0;
954     }
955 
956     fileName = new GooString(argv[1]);
957 
958     if (textEncName[0]) {
959         globalParams->setTextEncoding(textEncName);
960     }
961 
962     // get mapping to output encoding
963     if (!(uMap = globalParams->getTextEncoding())) {
964         error(errCommandLine, -1, "Couldn't get text encoding");
965         delete fileName;
966         goto err1;
967     }
968 
969     // open PDF file
970     if (ownerPassword[0] != '\001') {
971         ownerPW = new GooString(ownerPassword);
972     } else {
973         ownerPW = nullptr;
974     }
975     if (userPassword[0] != '\001') {
976         userPW = new GooString(userPassword);
977     } else {
978         userPW = nullptr;
979     }
980 
981     if (fileName->cmp("-") == 0) {
982         delete fileName;
983         fileName = new GooString("fd://0");
984     }
985 
986     doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
987 
988     if (userPW) {
989         delete userPW;
990     }
991     if (ownerPW) {
992         delete ownerPW;
993     }
994     if (!doc->isOk()) {
995         exitCode = 1;
996         goto err2;
997     }
998 
999     // get page range
1000     if (firstPage < 1) {
1001         firstPage = 1;
1002     }
1003     if (lastPage == 0) {
1004         multiPage = false;
1005     } else {
1006         multiPage = true;
1007     }
1008     if (lastPage < 1 || lastPage > doc->getNumPages()) {
1009         lastPage = doc->getNumPages();
1010     }
1011     if (lastPage < firstPage) {
1012         error(errCommandLine, -1, "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", firstPage, lastPage);
1013         goto err2;
1014     }
1015 
1016     if (printMetadata) {
1017         // print the metadata
1018         const std::unique_ptr<GooString> metadata = doc->readMetadata();
1019         if (metadata) {
1020             fputs(metadata->c_str(), stdout);
1021             fputc('\n', stdout);
1022         }
1023     } else if (printCustom) {
1024         printCustomInfo(doc.get(), uMap);
1025     } else if (printJS) {
1026         // print javascript
1027         JSInfo jsInfo(doc.get(), firstPage - 1);
1028         jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
1029     } else if (printStructure || printStructureText) {
1030         // print structure
1031         const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot();
1032         if (structTree) {
1033             for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
1034                 printStruct(structTree->getChild(i), 0);
1035             }
1036         }
1037     } else if (printDests) {
1038         printDestinations(doc.get(), uMap);
1039     } else if (printUrls) {
1040         printUrlList(doc.get());
1041     } else {
1042         // print info
1043         long long filesize = 0;
1044 
1045         f = fopen(fileName->c_str(), "rb");
1046         if (f) {
1047             Gfseek(f, 0, SEEK_END);
1048             filesize = Gftell(f);
1049             fclose(f);
1050         }
1051 
1052         if (multiPage == false)
1053             lastPage = 1;
1054 
1055         printInfo(doc.get(), uMap, filesize, multiPage);
1056     }
1057     exitCode = 0;
1058 
1059     // clean up
1060 err2:
1061     delete fileName;
1062 err1:
1063 err0:
1064 
1065     return exitCode;
1066 }
1067