1 //========================================================================
2 //
3 // pdfinfo.cc
4 //
5 // Copyright 1998-2003 Glyph & Cog, LLC
6 // Copyright 2013 Igalia S.L.
7 //
8 //========================================================================
9
10 //========================================================================
11 //
12 // Modified under the Poppler project - http://poppler.freedesktop.org
13 //
14 // All changes made under the Poppler project to this file are licensed
15 // under GPL version 2 or later
16 //
17 // Copyright (C) 2006 Dom Lachowicz <cinamod@hotmail.com>
18 // Copyright (C) 2007-2010, 2012, 2016-2021 Albert Astals Cid <aacid@kde.org>
19 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
20 // Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com>
21 // Copyright (C) 2012, 2013, 2016-2018, 2021 Adrian Johnson <ajohnson@redneon.com>
22 // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
23 // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
24 // Copyright (C) 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
25 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
26 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
27 // Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
28 // Copyright (C) 2019 Christian Persch <chpe@src.gnome.org>
29 // Copyright (C) 2019-2021 Oliver Sander <oliver.sander@tu-dresden.de>
30 // Copyright (C) 2019 Thomas Fischer <fischer@unix-ag.uni-kl.de>
31 //
32 // To see a description of the changes please see the Changelog file that
33 // came with your tarball or type make ChangeLog if you are building from git
34 //
35 //========================================================================
36
37 #include "config.h"
38 #include <poppler-config.h>
39 #include <cstdio>
40 #include <cstdlib>
41 #include <cstddef>
42 #include <cstring>
43 #include <ctime>
44 #include <cmath>
45 #include <map>
46 #include <set>
47 #include "parseargs.h"
48 #include "printencodings.h"
49 #include "goo/GooString.h"
50 #include "goo/gfile.h"
51 #include "goo/glibc.h"
52 #include "goo/gmem.h"
53 #include "GlobalParams.h"
54 #include "Object.h"
55 #include "Stream.h"
56 #include "Array.h"
57 #include "Dict.h"
58 #include "XRef.h"
59 #include "Catalog.h"
60 #include "Page.h"
61 #include "PDFDoc.h"
62 #include "PDFDocFactory.h"
63 #include "CharTypes.h"
64 #include "UnicodeMap.h"
65 #include "UTF.h"
66 #include "Error.h"
67 #include "DateInfo.h"
68 #include "JSInfo.h"
69 #include "StructTreeRoot.h"
70 #include "StructElement.h"
71 #include "Win32Console.h"
72
73 static int firstPage = 1;
74 static int lastPage = 0;
75 static bool printBoxes = false;
76 static bool printMetadata = false;
77 static bool printCustom = false;
78 static bool printJS = false;
79 static bool isoDates = false;
80 static bool rawDates = false;
81 static char textEncName[128] = "";
82 static char ownerPassword[33] = "\001";
83 static char userPassword[33] = "\001";
84 static bool printVersion = false;
85 static bool printHelp = false;
86 static bool printEnc = false;
87 static bool printStructure = false;
88 static bool printStructureText = false;
89 static bool printDests = false;
90 static bool printUrls = false;
91
92 static const ArgDesc argDesc[] = { { "-f", argInt, &firstPage, 0, "first page to convert" },
93 { "-l", argInt, &lastPage, 0, "last page to convert" },
94 { "-box", argFlag, &printBoxes, 0, "print the page bounding boxes" },
95 { "-meta", argFlag, &printMetadata, 0, "print the document metadata (XML)" },
96 { "-custom", argFlag, &printCustom, 0, "print both custom and standard metadata" },
97 { "-js", argFlag, &printJS, 0, "print all JavaScript in the PDF" },
98 { "-struct", argFlag, &printStructure, 0, "print the logical document structure (for tagged files)" },
99 { "-struct-text", argFlag, &printStructureText, 0, "print text contents along with document structure (for tagged files)" },
100 { "-isodates", argFlag, &isoDates, 0, "print the dates in ISO-8601 format" },
101 { "-rawdates", argFlag, &rawDates, 0, "print the undecoded date strings directly from the PDF file" },
102 { "-dests", argFlag, &printDests, 0, "print all named destinations in the PDF" },
103 { "-url", argFlag, &printUrls, 0, "print all URLs inside PDF objects (does not scan text content)" },
104 { "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" },
105 { "-listenc", argFlag, &printEnc, 0, "list available encodings" },
106 { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
107 { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
108 { "-v", argFlag, &printVersion, 0, "print copyright and version info" },
109 { "-h", argFlag, &printHelp, 0, "print usage information" },
110 { "-help", argFlag, &printHelp, 0, "print usage information" },
111 { "--help", argFlag, &printHelp, 0, "print usage information" },
112 { "-?", argFlag, &printHelp, 0, "print usage information" },
113 {} };
114
printTextString(const GooString * s,const UnicodeMap * uMap)115 static void printTextString(const GooString *s, const UnicodeMap *uMap)
116 {
117 Unicode *u;
118 char buf[8];
119 int len = TextStringToUCS4(s->toStr(), &u);
120 for (int i = 0; i < len; i++) {
121 int n = uMap->mapUnicode(u[i], buf, sizeof(buf));
122 fwrite(buf, 1, n, stdout);
123 }
124 gfree(u);
125 }
126
printUCS4String(const Unicode * u,int len,const UnicodeMap * uMap)127 static void printUCS4String(const Unicode *u, int len, const UnicodeMap *uMap)
128 {
129 char buf[8];
130 for (int i = 0; i < len; i++) {
131 int n = uMap->mapUnicode(u[i], buf, sizeof(buf));
132 fwrite(buf, 1, n, stdout);
133 }
134 }
135
printInfoString(Dict * infoDict,const char * key,const char * text,const UnicodeMap * uMap)136 static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
137 {
138 const GooString *s1;
139
140 Object obj = infoDict->lookup(key);
141 if (obj.isString()) {
142 fputs(text, stdout);
143 s1 = obj.getString();
144 printTextString(s1, uMap);
145 fputc('\n', stdout);
146 }
147 }
148
printInfoDate(Dict * infoDict,const char * key,const char * text,const UnicodeMap * uMap)149 static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
150 {
151 int year, mon, day, hour, min, sec, tz_hour, tz_minute;
152 char tz;
153 struct tm tmStruct;
154 time_t time;
155 char buf[256];
156
157 Object obj = infoDict->lookup(key);
158 if (obj.isString()) {
159 fputs(text, stdout);
160 const GooString *s = obj.getString();
161 // TODO do something with the timezone info
162 if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
163 tmStruct.tm_year = year - 1900;
164 tmStruct.tm_mon = mon - 1;
165 tmStruct.tm_mday = day;
166 tmStruct.tm_hour = hour;
167 tmStruct.tm_min = min;
168 tmStruct.tm_sec = sec;
169 tmStruct.tm_wday = -1;
170 tmStruct.tm_yday = -1;
171 tmStruct.tm_isdst = -1;
172 // compute the tm_wday and tm_yday fields
173 time = timegm(&tmStruct);
174 if (time != (time_t)-1) {
175 int offset = (tz_hour * 60 + tz_minute) * 60;
176 if (tz == '-')
177 offset *= -1;
178 time -= offset;
179 localtime_r(&time, &tmStruct);
180 strftime(buf, sizeof(buf), "%c %Z", &tmStruct);
181 fputs(buf, stdout);
182 } else {
183 printTextString(s, uMap);
184 }
185 } else {
186 printTextString(s, uMap);
187 }
188 fputc('\n', stdout);
189 }
190 }
191
printISODate(Dict * infoDict,const char * key,const char * text,const UnicodeMap * uMap)192 static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap)
193 {
194 int year, mon, day, hour, min, sec, tz_hour, tz_minute;
195 char tz;
196
197 Object obj = infoDict->lookup(key);
198 if (obj.isString()) {
199 fputs(text, stdout);
200 const GooString *s = obj.getString();
201 if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) {
202 fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec);
203 if (tz_hour == 0 && tz_minute == 0) {
204 fprintf(stdout, "Z");
205 } else {
206 fprintf(stdout, "%c%02d", tz, tz_hour);
207 if (tz_minute)
208 fprintf(stdout, ":%02d", tz_minute);
209 }
210 } else {
211 printTextString(obj.getString(), uMap);
212 }
213 fputc('\n', stdout);
214 }
215 }
216
printBox(const char * text,const PDFRectangle * box)217 static void printBox(const char *text, const PDFRectangle *box)
218 {
219 printf("%s%8.2f %8.2f %8.2f %8.2f\n", text, box->x1, box->y1, box->x2, box->y2);
220 }
221
printIndent(unsigned indent)222 static void printIndent(unsigned indent)
223 {
224 while (indent--) {
225 putchar(' ');
226 putchar(' ');
227 }
228 }
229
printAttribute(const Attribute * attribute,unsigned indent)230 static void printAttribute(const Attribute *attribute, unsigned indent)
231 {
232 printIndent(indent);
233 printf(" /%s ", attribute->getTypeName());
234 if (attribute->getType() == Attribute::UserProperty) {
235 std::unique_ptr<GooString> name = attribute->getName();
236 printf("(%s) ", name->c_str());
237 }
238 attribute->getValue()->print(stdout);
239 if (attribute->getFormattedValue()) {
240 printf(" \"%s\"", attribute->getFormattedValue());
241 }
242 if (attribute->isHidden()) {
243 printf(" [hidden]");
244 }
245 }
246
printStruct(const StructElement * element,unsigned indent)247 static void printStruct(const StructElement *element, unsigned indent)
248 {
249 if (element->isObjectRef()) {
250 printIndent(indent);
251 printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen);
252 return;
253 }
254
255 if (printStructureText && element->isContent()) {
256 GooString *text = element->getText(false);
257 printIndent(indent);
258 if (text) {
259 printf("\"%s\"\n", text->c_str());
260 } else {
261 printf("(No content?)\n");
262 }
263 delete text;
264 }
265
266 if (!element->isContent()) {
267 printIndent(indent);
268 printf("%s", element->getTypeName());
269 if (element->getID()) {
270 printf(" <%s>", element->getID()->c_str());
271 }
272 if (element->getTitle()) {
273 printf(" \"%s\"", element->getTitle()->c_str());
274 }
275 if (element->getRevision() > 0) {
276 printf(" r%u", element->getRevision());
277 }
278 if (element->isInline() || element->isBlock()) {
279 printf(" (%s)", element->isInline() ? "inline" : "block");
280 }
281 if (element->getNumAttributes()) {
282 putchar(':');
283 for (unsigned i = 0; i < element->getNumAttributes(); i++) {
284 putchar('\n');
285 printAttribute(element->getAttribute(i), indent + 1);
286 }
287 }
288
289 putchar('\n');
290 for (unsigned i = 0; i < element->getNumChildren(); i++) {
291 printStruct(element->getChild(i), indent + 1);
292 }
293 }
294 }
295
296 struct GooStringCompare
297 {
operator ()GooStringCompare298 bool operator()(GooString *lhs, GooString *rhs) const { return lhs->cmp(const_cast<GooString *>(rhs)) < 0; }
299 };
300
printLinkDest(const std::unique_ptr<LinkDest> & dest)301 static void printLinkDest(const std::unique_ptr<LinkDest> &dest)
302 {
303 GooString s;
304
305 switch (dest->getKind()) {
306 case destXYZ:
307 s.append("[ XYZ ");
308 if (dest->getChangeLeft()) {
309 s.appendf("{0:4.0g} ", dest->getLeft());
310 } else {
311 s.append("null ");
312 }
313 if (dest->getChangeTop()) {
314 s.appendf("{0:4.0g} ", dest->getTop());
315 } else {
316 s.append("null ");
317 }
318 if (dest->getChangeZoom()) {
319 s.appendf("{0:4.2f} ", dest->getZoom());
320 } else {
321 s.append("null ");
322 }
323 break;
324 case destFit:
325 s.append("[ Fit ");
326 break;
327 case destFitH:
328 if (dest->getChangeTop()) {
329 s.appendf("[ FitH {0:4.0g} ", dest->getTop());
330 } else {
331 s.append("[ FitH null ");
332 }
333 break;
334 case destFitV:
335 if (dest->getChangeLeft()) {
336 s.appendf("[ FitV {0:4.0g} ", dest->getLeft());
337 } else {
338 s.append("[ FitV null ");
339 }
340 break;
341 case destFitR:
342 s.appendf("[ FitR {0:4.0g} {1:4.0g} {2:4.0g} {3:4.0g} ", dest->getLeft(), dest->getBottom(), dest->getRight(), dest->getTop());
343 break;
344 case destFitB:
345 s.append("[ FitB ");
346 break;
347 case destFitBH:
348 if (dest->getChangeTop()) {
349 s.appendf("[ FitBH {0:4.0g} ", dest->getTop());
350 } else {
351 s.append("[ FitBH null ");
352 }
353 break;
354 case destFitBV:
355 if (dest->getChangeLeft()) {
356 s.appendf("[ FitBV {0:4.0g} ", dest->getLeft());
357 } else {
358 s.append("[ FitBV null ");
359 }
360 break;
361 }
362
363 s.append(" ");
364 s.setChar(26, ']');
365 s.setChar(27, '\0');
366 printf("%s", s.c_str());
367 }
368
printDestinations(PDFDoc * doc,const UnicodeMap * uMap)369 static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap)
370 {
371 std::map<Ref, std::map<GooString *, std::unique_ptr<LinkDest>, GooStringCompare>> map;
372
373 int numDests = doc->getCatalog()->numDestNameTree();
374 for (int i = 0; i < numDests; i++) {
375 GooString *name = new GooString(doc->getCatalog()->getDestNameTreeName(i));
376 std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestNameTreeDest(i);
377 if (dest && dest->isPageRef()) {
378 Ref pageRef = dest->getPageRef();
379 map[pageRef].insert(std::make_pair(name, std::move(dest)));
380 } else {
381 delete name;
382 }
383 }
384
385 numDests = doc->getCatalog()->numDests();
386 for (int i = 0; i < numDests; i++) {
387 GooString *name = new GooString(doc->getCatalog()->getDestsName(i));
388 std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestsDest(i);
389 if (dest && dest->isPageRef()) {
390 Ref pageRef = dest->getPageRef();
391 map[pageRef].insert(std::make_pair(name, std::move(dest)));
392 } else {
393 delete name;
394 }
395 }
396
397 printf("Page Destination Name\n");
398 for (int i = firstPage; i <= lastPage; i++) {
399 Ref *ref = doc->getCatalog()->getPageRef(i);
400 if (ref) {
401 auto pageDests = map.find(*ref);
402 if (pageDests != map.end()) {
403 for (auto &it : pageDests->second) {
404 printf("%4d ", i);
405 printLinkDest(it.second);
406 printf(" \"");
407 printTextString(it.first, uMap);
408 printf("\"\n");
409 delete it.first;
410 }
411 }
412 }
413 }
414 }
415
printUrlList(PDFDoc * doc)416 static void printUrlList(PDFDoc *doc)
417 {
418 printf("Page Type URL\n");
419 for (int pg = firstPage; pg <= lastPage; pg++) {
420 Page *page = doc->getPage(pg);
421 if (page) {
422 std::unique_ptr<Links> links = page->getLinks();
423 for (int i = 0; i < links->getNumLinks(); i++) {
424 AnnotLink *annot = links->getLink(i);
425 LinkAction *action = annot->getAction();
426 if (action->getKind() == actionURI) {
427 LinkURI *linkUri = dynamic_cast<LinkURI *>(action);
428 std::string uri = linkUri->getURI();
429 printf("%4d Annotation %s\n", pg, uri.c_str());
430 }
431 }
432 }
433 }
434 }
435
printPdfSubtype(PDFDoc * doc,const UnicodeMap * uMap)436 static void printPdfSubtype(PDFDoc *doc, const UnicodeMap *uMap)
437 {
438 const Object info = doc->getDocInfo();
439 if (info.isDict()) {
440 const PDFSubtype pdftype = doc->getPDFSubtype();
441
442 if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) {
443 return;
444 }
445
446 std::unique_ptr<GooString> part;
447 std::unique_ptr<GooString> abbr;
448 std::unique_ptr<GooString> standard;
449 std::unique_ptr<GooString> typeExp;
450 std::unique_ptr<GooString> confExp;
451
452 // Form title from PDFSubtype
453 switch (pdftype) {
454 case subtypePDFA:
455 printInfoString(info.getDict(), "GTS_PDFA1Version", "PDF subtype: ", uMap);
456 typeExp = std::make_unique<GooString>("ISO 19005 - Electronic document file format for long-term preservation (PDF/A)");
457 standard = std::make_unique<GooString>("ISO 19005");
458 abbr = std::make_unique<GooString>("PDF/A");
459 break;
460 case subtypePDFE:
461 printInfoString(info.getDict(), "GTS_PDFEVersion", "PDF subtype: ", uMap);
462 typeExp = std::make_unique<GooString>("ISO 24517 - Engineering document format using PDF (PDF/E)");
463 standard = std::make_unique<GooString>("ISO 24517");
464 abbr = std::make_unique<GooString>("PDF/E");
465 break;
466 case subtypePDFUA:
467 printInfoString(info.getDict(), "GTS_PDFUAVersion", "PDF subtype: ", uMap);
468 typeExp = std::make_unique<GooString>("ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)");
469 standard = std::make_unique<GooString>("ISO 14289");
470 abbr = std::make_unique<GooString>("PDF/UA");
471 break;
472 case subtypePDFVT:
473 printInfoString(info.getDict(), "GTS_PDFVTVersion", "PDF subtype: ", uMap);
474 typeExp = std::make_unique<GooString>("ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)");
475 standard = std::make_unique<GooString>("ISO 16612");
476 abbr = std::make_unique<GooString>("PDF/VT");
477 break;
478 case subtypePDFX:
479 printInfoString(info.getDict(), "GTS_PDFXVersion", "PDF subtype: ", uMap);
480 typeExp = std::make_unique<GooString>("ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)");
481 standard = std::make_unique<GooString>("ISO 15930");
482 abbr = std::make_unique<GooString>("PDF/X");
483 break;
484 case subtypeNone:
485 case subtypeNull:
486 default:
487 return;
488 }
489
490 // Form the abbreviation from PDFSubtypePart and PDFSubtype
491 const PDFSubtypePart subpart = doc->getPDFSubtypePart();
492 switch (pdftype) {
493 case subtypePDFX:
494 switch (subpart) {
495 case subtypePart1:
496 abbr->append("-1:2001");
497 break;
498 case subtypePart2:
499 abbr->append("-2");
500 break;
501 case subtypePart3:
502 abbr->append("-3:2002");
503 break;
504 case subtypePart4:
505 abbr->append("-1:2003");
506 break;
507 case subtypePart5:
508 abbr->append("-2");
509 break;
510 case subtypePart6:
511 abbr->append("-3:2003");
512 break;
513 case subtypePart7:
514 abbr->append("-4");
515 break;
516 case subtypePart8:
517 abbr->append("-5");
518 break;
519 default:
520 break;
521 }
522 break;
523 case subtypeNone:
524 case subtypeNull:
525 break;
526 default:
527 abbr->appendf("-{0:d}", subpart);
528 break;
529 }
530
531 // Form standard from PDFSubtypePart
532 switch (subpart) {
533 case subtypePartNone:
534 case subtypePartNull:
535 break;
536 default:
537 standard->appendf("-{0:d}", subpart);
538 break;
539 }
540
541 // Form the subtitle from PDFSubtypePart and PDFSubtype
542 switch (pdftype) {
543 case subtypePDFA:
544 switch (subpart) {
545 case subtypePart1:
546 part = std::make_unique<GooString>("Use of PDF 1.4");
547 break;
548 case subtypePart2:
549 part = std::make_unique<GooString>("Use of ISO 32000-1");
550 break;
551 case subtypePart3:
552 part = std::make_unique<GooString>("Use of ISO 32000-1 with support for embedded files");
553 break;
554 default:
555 break;
556 }
557 break;
558 case subtypePDFE:
559 switch (subpart) {
560 case subtypePart1:
561 part = std::make_unique<GooString>("Use of PDF 1.6");
562 break;
563 default:
564 break;
565 }
566 break;
567 case subtypePDFUA:
568 switch (subpart) {
569 case subtypePart1:
570 part = std::make_unique<GooString>("Use of ISO 32000-1");
571 break;
572 case subtypePart2:
573 part = std::make_unique<GooString>("Use of ISO 32000-2");
574 break;
575 case subtypePart3:
576 part = std::make_unique<GooString>("Use of ISO 32000-1 with support for embedded files");
577 break;
578 default:
579 break;
580 }
581 break;
582 case subtypePDFVT:
583 switch (subpart) {
584 case subtypePart1:
585 part = std::make_unique<GooString>("Using PPML 2.1 and PDF 1.4");
586 break;
587 case subtypePart2:
588 part = std::make_unique<GooString>("Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)");
589 break;
590 case subtypePart3:
591 part = std::make_unique<GooString>("Using PDF/X-6 (PDF/VT-3)");
592 break;
593 default:
594 break;
595 }
596 break;
597 case subtypePDFX:
598 switch (subpart) {
599 case subtypePart1:
600 part = std::make_unique<GooString>("Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)");
601 break;
602 case subtypePart3:
603 part = std::make_unique<GooString>("Complete exchange suitable for colour-managed workflows (PDF/X-3)");
604 break;
605 case subtypePart4:
606 part = std::make_unique<GooString>("Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)");
607 break;
608 case subtypePart5:
609 part = std::make_unique<GooString>("Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]");
610 break;
611 case subtypePart6:
612 part = std::make_unique<GooString>("Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)");
613 break;
614 case subtypePart7:
615 part = std::make_unique<GooString>("Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6");
616 break;
617 case subtypePart8:
618 part = std::make_unique<GooString>("Partial exchange of printing data using PDF 1.6 (PDF/X-5)");
619 break;
620 default:
621 break;
622 }
623 break;
624 default:
625 break;
626 }
627
628 // Form Conformance explanation from PDFSubtypeConformance
629 switch (doc->getPDFSubtypeConformance()) {
630 case subtypeConfA:
631 confExp = std::make_unique<GooString>("Level A, Accessible");
632 break;
633 case subtypeConfB:
634 confExp = std::make_unique<GooString>("Level B, Basic");
635 break;
636 case subtypeConfG:
637 confExp = std::make_unique<GooString>("Level G, External graphical content");
638 break;
639 case subtypeConfN:
640 confExp = std::make_unique<GooString>("Level N, External ICC profile");
641 break;
642 case subtypeConfP:
643 confExp = std::make_unique<GooString>("Level P, Embedded ICC profile");
644 break;
645 case subtypeConfPG:
646 confExp = std::make_unique<GooString>("Level PG, Embedded ICC profile and external graphical content");
647 break;
648 case subtypeConfU:
649 confExp = std::make_unique<GooString>("Level U, Unicode support");
650 break;
651 case subtypeConfNone:
652 case subtypeConfNull:
653 default:
654 confExp.reset();
655 break;
656 }
657
658 printf(" Title: %s\n", typeExp->c_str());
659 printf(" Abbreviation: %s\n", abbr->c_str());
660 if (part.get())
661 printf(" Subtitle: Part %d: %s\n", subpart, part->c_str());
662 else
663 printf(" Subtitle: Part %d\n", subpart);
664 printf(" Standard: %s-%d\n", typeExp->toStr().substr(0, 9).c_str(), subpart);
665 if (confExp.get())
666 printf(" Conformance: %s\n", confExp->c_str());
667 }
668 }
669
printCustomInfo(PDFDoc * doc,const UnicodeMap * uMap)670 static void printCustomInfo(PDFDoc *doc, const UnicodeMap *uMap)
671 {
672 Object info = doc->getDocInfo();
673 if (info.isDict()) {
674 Dict *dict = info.getDict();
675
676 // Sort keys
677 std::set<std::string> keys;
678 for (int i = 0; i < dict->getLength(); i++) {
679 std::string key(dict->getKey(i));
680 if (key != "Trapped") {
681 keys.insert(key);
682 }
683 }
684
685 for (const std::string &key : keys) {
686 if (key == "CreationDate") {
687 if (isoDates) {
688 printISODate(info.getDict(), "CreationDate", "CreationDate: ", uMap);
689 } else if (rawDates) {
690 printInfoString(info.getDict(), "CreationDate", "CreationDate: ", uMap);
691 } else {
692 printInfoDate(info.getDict(), "CreationDate", "CreationDate: ", uMap);
693 }
694 } else if (key == "ModDate") {
695 if (isoDates) {
696 printISODate(info.getDict(), "ModDate", "ModDate: ", uMap);
697 } else if (rawDates) {
698 printInfoString(info.getDict(), "ModDate", "ModDate: ", uMap);
699 } else {
700 printInfoDate(info.getDict(), "ModDate", "ModDate: ", uMap);
701 }
702 } else {
703 Object obj = dict->lookup(key.c_str());
704 if (obj.isString()) {
705 // print key
706 Unicode *u;
707 int len = utf8ToUCS4(key.c_str(), &u);
708 printUCS4String(u, len, uMap);
709 fputs(":", stdout);
710 while (len < 16) {
711 fputs(" ", stdout);
712 len++;
713 }
714 gfree(u);
715
716 // print value
717 GooString val_str(obj.getString());
718 printTextString(&val_str, uMap);
719 fputc('\n', stdout);
720 }
721 }
722 }
723 }
724 }
725
printInfo(PDFDoc * doc,const UnicodeMap * uMap,long long filesize,bool multiPage)726 static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, bool multiPage)
727 {
728 Page *page;
729 char buf[256];
730 double w, h, wISO, hISO, isoThreshold;
731 int pg, i;
732 int r;
733
734 // print doc info
735 Object info = doc->getDocInfo();
736 if (info.isDict()) {
737 printInfoString(info.getDict(), "Title", "Title: ", uMap);
738 printInfoString(info.getDict(), "Subject", "Subject: ", uMap);
739 printInfoString(info.getDict(), "Keywords", "Keywords: ", uMap);
740 printInfoString(info.getDict(), "Author", "Author: ", uMap);
741 printInfoString(info.getDict(), "Creator", "Creator: ", uMap);
742 printInfoString(info.getDict(), "Producer", "Producer: ", uMap);
743 if (isoDates) {
744 printISODate(info.getDict(), "CreationDate", "CreationDate: ", uMap);
745 printISODate(info.getDict(), "ModDate", "ModDate: ", uMap);
746 } else if (rawDates) {
747 printInfoString(info.getDict(), "CreationDate", "CreationDate: ", uMap);
748 printInfoString(info.getDict(), "ModDate", "ModDate: ", uMap);
749 } else {
750 printInfoDate(info.getDict(), "CreationDate", "CreationDate: ", uMap);
751 printInfoDate(info.getDict(), "ModDate", "ModDate: ", uMap);
752 }
753 }
754
755 bool hasMetadata = false;
756 std::unique_ptr<GooString> metadata = doc->readMetadata();
757 if (metadata) {
758 hasMetadata = true;
759 }
760
761 const std::set<std::string> docInfoStandardKeys { "Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped" };
762
763 bool hasCustom = false;
764 if (info.isDict()) {
765 Dict *dict = info.getDict();
766 for (i = 0; i < dict->getLength(); i++) {
767 std::string key(dict->getKey(i));
768 if (docInfoStandardKeys.find(key) == docInfoStandardKeys.end()) {
769 hasCustom = true;
770 break;
771 }
772 }
773 }
774
775 // print metadata info
776 printf("Custom Metadata: %s\n", hasCustom ? "yes" : "no");
777 printf("Metadata Stream: %s\n", hasMetadata ? "yes" : "no");
778
779 // print tagging info
780 printf("Tagged: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no");
781 printf("UserProperties: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no");
782 printf("Suspects: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no");
783
784 // print form info
785 switch (doc->getCatalog()->getFormType()) {
786 case Catalog::NoForm:
787 printf("Form: none\n");
788 break;
789 case Catalog::AcroForm:
790 printf("Form: AcroForm\n");
791 break;
792 case Catalog::XfaForm:
793 printf("Form: XFA\n");
794 break;
795 }
796
797 // print javascript info
798 {
799 JSInfo jsInfo(doc, firstPage - 1);
800 jsInfo.scanJS(lastPage - firstPage + 1);
801 printf("JavaScript: %s\n", jsInfo.containsJS() ? "yes" : "no");
802 }
803
804 // print page count
805 printf("Pages: %d\n", doc->getNumPages());
806
807 // print encryption info
808 printf("Encrypted: ");
809 if (doc->isEncrypted()) {
810 unsigned char *fileKey;
811 CryptAlgorithm encAlgorithm;
812 int keyLength;
813 doc->getXRef()->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
814
815 const char *encAlgorithmName = "unknown";
816 switch (encAlgorithm) {
817 case cryptRC4:
818 encAlgorithmName = "RC4";
819 break;
820 case cryptAES:
821 encAlgorithmName = "AES";
822 break;
823 case cryptAES256:
824 encAlgorithmName = "AES-256";
825 break;
826 case cryptNone:
827 break;
828 }
829
830 printf("yes (print:%s copy:%s change:%s addNotes:%s algorithm:%s)\n", doc->okToPrint(true) ? "yes" : "no", doc->okToCopy(true) ? "yes" : "no", doc->okToChange(true) ? "yes" : "no", doc->okToAddNotes(true) ? "yes" : "no",
831 encAlgorithmName);
832 } else {
833 printf("no\n");
834 }
835
836 // print page size
837 for (pg = firstPage; pg <= lastPage; ++pg) {
838 w = doc->getPageCropWidth(pg);
839 h = doc->getPageCropHeight(pg);
840 if (multiPage) {
841 printf("Page %4d size: %g x %g pts", pg, w, h);
842 } else {
843 printf("Page size: %g x %g pts", w, h);
844 }
845 if ((fabs(w - 612) < 1 && fabs(h - 792) < 1) || (fabs(w - 792) < 1 && fabs(h - 612) < 1)) {
846 printf(" (letter)");
847 } else {
848 hISO = sqrt(sqrt(2.0)) * 7200 / 2.54;
849 wISO = hISO / sqrt(2.0);
850 isoThreshold = hISO * 0.003; ///< allow for 0.3% error when guessing conformance to ISO 216, A series
851 for (i = 0; i <= 6; ++i) {
852 if ((fabs(w - wISO) < isoThreshold && fabs(h - hISO) < isoThreshold) || (fabs(w - hISO) < isoThreshold && fabs(h - wISO) < isoThreshold)) {
853 printf(" (A%d)", i);
854 break;
855 }
856 hISO = wISO;
857 wISO /= sqrt(2.0);
858 isoThreshold /= sqrt(2.0);
859 }
860 }
861 printf("\n");
862 r = doc->getPageRotate(pg);
863 if (multiPage) {
864 printf("Page %4d rot: %d\n", pg, r);
865 } else {
866 printf("Page rot: %d\n", r);
867 }
868 }
869
870 // print the boxes
871 if (printBoxes) {
872 if (multiPage) {
873 for (pg = firstPage; pg <= lastPage; ++pg) {
874 page = doc->getPage(pg);
875 if (!page) {
876 error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", pg);
877 continue;
878 }
879 sprintf(buf, "Page %4d MediaBox: ", pg);
880 printBox(buf, page->getMediaBox());
881 sprintf(buf, "Page %4d CropBox: ", pg);
882 printBox(buf, page->getCropBox());
883 sprintf(buf, "Page %4d BleedBox: ", pg);
884 printBox(buf, page->getBleedBox());
885 sprintf(buf, "Page %4d TrimBox: ", pg);
886 printBox(buf, page->getTrimBox());
887 sprintf(buf, "Page %4d ArtBox: ", pg);
888 printBox(buf, page->getArtBox());
889 }
890 } else {
891 page = doc->getPage(firstPage);
892 if (!page) {
893 error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", firstPage);
894 } else {
895 printBox("MediaBox: ", page->getMediaBox());
896 printBox("CropBox: ", page->getCropBox());
897 printBox("BleedBox: ", page->getBleedBox());
898 printBox("TrimBox: ", page->getTrimBox());
899 printBox("ArtBox: ", page->getArtBox());
900 }
901 }
902 }
903
904 // print file size
905 printf("File size: %lld bytes\n", filesize);
906
907 // print linearization info
908 printf("Optimized: %s\n", doc->isLinearized() ? "yes" : "no");
909
910 // print PDF version
911 printf("PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
912
913 printPdfSubtype(doc, uMap);
914 }
915
main(int argc,char * argv[])916 int main(int argc, char *argv[])
917 {
918 std::unique_ptr<PDFDoc> doc;
919 GooString *fileName;
920 GooString *ownerPW, *userPW;
921 const UnicodeMap *uMap;
922 FILE *f;
923 bool ok;
924 int exitCode;
925 bool multiPage;
926
927 exitCode = 99;
928
929 // parse args
930 Win32Console win32console(&argc, &argv);
931 ok = parseArgs(argDesc, &argc, argv);
932 if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
933 fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION);
934 fprintf(stderr, "%s\n", popplerCopyright);
935 fprintf(stderr, "%s\n", xpdfCopyright);
936 if (!printVersion) {
937 printUsage("pdfinfo", "<PDF-file>", argDesc);
938 }
939 if (printVersion || printHelp)
940 exitCode = 0;
941 goto err0;
942 }
943
944 if (printStructureText)
945 printStructure = true;
946
947 // read config file
948 globalParams = std::make_unique<GlobalParams>();
949
950 if (printEnc) {
951 printEncodings();
952 exitCode = 0;
953 goto err0;
954 }
955
956 fileName = new GooString(argv[1]);
957
958 if (textEncName[0]) {
959 globalParams->setTextEncoding(textEncName);
960 }
961
962 // get mapping to output encoding
963 if (!(uMap = globalParams->getTextEncoding())) {
964 error(errCommandLine, -1, "Couldn't get text encoding");
965 delete fileName;
966 goto err1;
967 }
968
969 // open PDF file
970 if (ownerPassword[0] != '\001') {
971 ownerPW = new GooString(ownerPassword);
972 } else {
973 ownerPW = nullptr;
974 }
975 if (userPassword[0] != '\001') {
976 userPW = new GooString(userPassword);
977 } else {
978 userPW = nullptr;
979 }
980
981 if (fileName->cmp("-") == 0) {
982 delete fileName;
983 fileName = new GooString("fd://0");
984 }
985
986 doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
987
988 if (userPW) {
989 delete userPW;
990 }
991 if (ownerPW) {
992 delete ownerPW;
993 }
994 if (!doc->isOk()) {
995 exitCode = 1;
996 goto err2;
997 }
998
999 // get page range
1000 if (firstPage < 1) {
1001 firstPage = 1;
1002 }
1003 if (lastPage == 0) {
1004 multiPage = false;
1005 } else {
1006 multiPage = true;
1007 }
1008 if (lastPage < 1 || lastPage > doc->getNumPages()) {
1009 lastPage = doc->getNumPages();
1010 }
1011 if (lastPage < firstPage) {
1012 error(errCommandLine, -1, "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", firstPage, lastPage);
1013 goto err2;
1014 }
1015
1016 if (printMetadata) {
1017 // print the metadata
1018 const std::unique_ptr<GooString> metadata = doc->readMetadata();
1019 if (metadata) {
1020 fputs(metadata->c_str(), stdout);
1021 fputc('\n', stdout);
1022 }
1023 } else if (printCustom) {
1024 printCustomInfo(doc.get(), uMap);
1025 } else if (printJS) {
1026 // print javascript
1027 JSInfo jsInfo(doc.get(), firstPage - 1);
1028 jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap);
1029 } else if (printStructure || printStructureText) {
1030 // print structure
1031 const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot();
1032 if (structTree) {
1033 for (unsigned i = 0; i < structTree->getNumChildren(); i++) {
1034 printStruct(structTree->getChild(i), 0);
1035 }
1036 }
1037 } else if (printDests) {
1038 printDestinations(doc.get(), uMap);
1039 } else if (printUrls) {
1040 printUrlList(doc.get());
1041 } else {
1042 // print info
1043 long long filesize = 0;
1044
1045 f = fopen(fileName->c_str(), "rb");
1046 if (f) {
1047 Gfseek(f, 0, SEEK_END);
1048 filesize = Gftell(f);
1049 fclose(f);
1050 }
1051
1052 if (multiPage == false)
1053 lastPage = 1;
1054
1055 printInfo(doc.get(), uMap, filesize, multiPage);
1056 }
1057 exitCode = 0;
1058
1059 // clean up
1060 err2:
1061 delete fileName;
1062 err1:
1063 err0:
1064
1065 return exitCode;
1066 }
1067