1 //========================================================================
2 //
3 // Catalog.cc
4 //
5 // Copyright 1996-2007 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
17 // Copyright (C) 2005-2013, 2015, 2017-2021 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2005 Jeff Muizelaar <jrmuizel@nit.ca>
19 // Copyright (C) 2005 Jonathan Blandford <jrb@redhat.com>
20 // Copyright (C) 2005 Marco Pesenti Gritti <mpg@redhat.com>
21 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
22 // Copyright (C) 2006, 2008, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
23 // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
24 // Copyright (C) 2008, 2011 Pino Toscano <pino@kde.org>
25 // Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
26 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
27 // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
28 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
29 // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
30 // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
31 // Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 José Aliste <jaliste@src.gnome.org>
33 // Copyright (C) 2014 Ed Porras <ed@moto-research.com>
34 // Copyright (C) 2015 Even Rouault <even.rouault@spatialys.com>
35 // Copyright (C) 2016 Masamichi Hosoda <trueroad@trueroad.jp>
36 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
37 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
38 // Copyright (C) 2020 Oliver Sander <oliver.sander@tu-dresden.de>
39 // Copyright (C) 2020 Katarina Behrens <Katarina.Behrens@cib.de>
40 // Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de>
41 // Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
42 // Copyright (C) 2021 RM <rm+git@arcsin.org>
43 //
44 // To see a description of the changes please see the Changelog file that
45 // came with your tarball or type make ChangeLog if you are building from git
46 //
47 //========================================================================
48
49 #include <config.h>
50
51 #include <cstddef>
52 #include <cstdlib>
53 #include "goo/gmem.h"
54 #include "Object.h"
55 #include "PDFDoc.h"
56 #include "XRef.h"
57 #include "Array.h"
58 #include "Dict.h"
59 #include "Page.h"
60 #include "Error.h"
61 #include "Link.h"
62 #include "PageLabelInfo.h"
63 #include "Catalog.h"
64 #include "Form.h"
65 #include "OptionalContent.h"
66 #include "ViewerPreferences.h"
67 #include "FileSpec.h"
68 #include "StructTreeRoot.h"
69
70 //------------------------------------------------------------------------
71 // Catalog
72 //------------------------------------------------------------------------
73
74 #define catalogLocker() std::unique_lock<std::recursive_mutex> locker(mutex)
75
Catalog(PDFDoc * docA)76 Catalog::Catalog(PDFDoc *docA)
77 {
78 ok = true;
79 doc = docA;
80 xref = doc->getXRef();
81 numPages = -1;
82 baseURI = nullptr;
83 pageLabelInfo = nullptr;
84 form = nullptr;
85 optContent = nullptr;
86 pageMode = pageModeNull;
87 pageLayout = pageLayoutNull;
88 destNameTree = nullptr;
89 embeddedFileNameTree = nullptr;
90 jsNameTree = nullptr;
91 viewerPrefs = nullptr;
92 structTreeRoot = nullptr;
93
94 pagesList = nullptr;
95 pagesRefList = nullptr;
96 attrsList = nullptr;
97 kidsIdxList = nullptr;
98 markInfo = markInfoNull;
99
100 Object catDict = xref->getCatalog();
101 if (!catDict.isDict()) {
102 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
103 ok = false;
104 return;
105 }
106 // get the AcroForm dictionary
107 acroForm = catDict.dictLookup("AcroForm");
108
109 // read base URI
110 Object obj = catDict.getDict()->lookupEnsureEncryptedIfNeeded("URI");
111 if (obj.isDict()) {
112 Object obj2 = obj.getDict()->lookupEnsureEncryptedIfNeeded("Base");
113 if (obj2.isString()) {
114 baseURI = obj2.getString()->copy();
115 }
116 }
117
118 // get the Optional Content dictionary
119 Object optContentProps = catDict.dictLookup("OCProperties");
120 if (optContentProps.isDict()) {
121 optContent = new OCGs(&optContentProps, xref);
122 if (!optContent->isOk()) {
123 delete optContent;
124 optContent = nullptr;
125 }
126 }
127
128 // actions
129 additionalActions = catDict.dictLookupNF("AA").copy();
130
131 // get the ViewerPreferences dictionary
132 viewerPreferences = catDict.dictLookup("ViewerPreferences");
133
134 const Object version = catDict.dictLookup("Version");
135 if (version.isName()) {
136 const int res = sscanf(version.getName(), "%d.%d", &catalogPdfMajorVersion, &catalogPdfMinorVersion);
137 if (res != 2) {
138 catalogPdfMajorVersion = -1;
139 catalogPdfMinorVersion = -1;
140 }
141 }
142 }
143
~Catalog()144 Catalog::~Catalog()
145 {
146 delete kidsIdxList;
147 if (attrsList) {
148 std::vector<PageAttrs *>::iterator it;
149 for (it = attrsList->begin(); it != attrsList->end(); ++it) {
150 delete *it;
151 }
152 delete attrsList;
153 }
154 delete pagesRefList;
155 delete pagesList;
156 delete destNameTree;
157 delete embeddedFileNameTree;
158 delete jsNameTree;
159 if (baseURI) {
160 delete baseURI;
161 }
162 delete pageLabelInfo;
163 delete form;
164 delete optContent;
165 delete viewerPrefs;
166 delete structTreeRoot;
167 }
168
readMetadata()169 std::unique_ptr<GooString> Catalog::readMetadata()
170 {
171 catalogLocker();
172 if (metadata.isNone()) {
173 Object catDict = xref->getCatalog();
174 if (catDict.isDict()) {
175 metadata = catDict.dictLookup("Metadata");
176 } else {
177 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
178 metadata.setToNull();
179 }
180 }
181
182 if (!metadata.isStream()) {
183 return {};
184 }
185 Object obj = metadata.streamGetDict()->lookup("Subtype");
186 if (!obj.isName("XML")) {
187 error(errSyntaxWarning, -1, "Unknown Metadata type: '{0:s}'", obj.isName() ? obj.getName() : "???");
188 }
189 std::unique_ptr<GooString> s = std::make_unique<GooString>();
190 metadata.getStream()->fillGooString(s.get());
191 metadata.streamClose();
192 return s;
193 }
194
getPage(int i)195 Page *Catalog::getPage(int i)
196 {
197 if (i < 1)
198 return nullptr;
199
200 catalogLocker();
201 if (std::size_t(i) > pages.size()) {
202 bool cached = cachePageTree(i);
203 if (cached == false) {
204 return nullptr;
205 }
206 }
207 return pages[i - 1].first.get();
208 }
209
getPageRef(int i)210 Ref *Catalog::getPageRef(int i)
211 {
212 if (i < 1)
213 return nullptr;
214
215 catalogLocker();
216 if (std::size_t(i) > pages.size()) {
217 bool cached = cachePageTree(i);
218 if (cached == false) {
219 return nullptr;
220 }
221 }
222 return &pages[i - 1].second;
223 }
224
cachePageTree(int page)225 bool Catalog::cachePageTree(int page)
226 {
227 if (pagesList == nullptr) {
228
229 Ref pagesRef;
230
231 Object catDict = xref->getCatalog();
232
233 if (catDict.isDict()) {
234 const Object &pagesDictRef = catDict.dictLookupNF("Pages");
235 if (pagesDictRef.isRef() && pagesDictRef.getRefNum() >= 0 && pagesDictRef.getRefNum() < xref->getNumObjects()) {
236 pagesRef = pagesDictRef.getRef();
237 } else {
238 error(errSyntaxError, -1, "Catalog dictionary does not contain a valid \"Pages\" entry");
239 return false;
240 }
241 } else {
242 error(errSyntaxError, -1, "Could not find catalog dictionary");
243 return false;
244 }
245
246 Object obj = catDict.dictLookup("Pages");
247 // This should really be isDict("Pages"), but I've seen at least one
248 // PDF file where the /Type entry is missing.
249 if (!obj.isDict()) {
250 error(errSyntaxError, -1, "Top-level pages object is wrong type ({0:s})", obj.getTypeName());
251 return false;
252 }
253
254 pages.clear();
255 attrsList = new std::vector<PageAttrs *>();
256 attrsList->push_back(new PageAttrs(nullptr, obj.getDict()));
257 pagesList = new std::vector<Object>();
258 pagesList->push_back(std::move(obj));
259 pagesRefList = new std::vector<Ref>();
260 pagesRefList->push_back(pagesRef);
261 kidsIdxList = new std::vector<int>();
262 kidsIdxList->push_back(0);
263 }
264
265 while (true) {
266
267 if (std::size_t(page) <= pages.size())
268 return true;
269
270 if (pagesList->empty())
271 return false;
272
273 Object kids = pagesList->back().dictLookup("Kids");
274 if (!kids.isArray()) {
275 error(errSyntaxError, -1, "Kids object (page {0:uld}) is wrong type ({1:s})", pages.size() + 1, kids.getTypeName());
276 return false;
277 }
278
279 int kidsIdx = kidsIdxList->back();
280 if (kidsIdx >= kids.arrayGetLength()) {
281 pagesList->pop_back();
282 pagesRefList->pop_back();
283 delete attrsList->back();
284 attrsList->pop_back();
285 kidsIdxList->pop_back();
286 if (!kidsIdxList->empty())
287 kidsIdxList->back()++;
288 continue;
289 }
290
291 const Object &kidRef = kids.arrayGetNF(kidsIdx);
292 if (!kidRef.isRef()) {
293 error(errSyntaxError, -1, "Kid object (page {0:uld}) is not an indirect reference ({1:s})", pages.size() + 1, kidRef.getTypeName());
294 return false;
295 }
296
297 bool loop = false;
298 ;
299 for (const Ref &pageRef : *pagesRefList) {
300 if (pageRef.num == kidRef.getRefNum()) {
301 loop = true;
302 break;
303 }
304 }
305 if (loop) {
306 error(errSyntaxError, -1, "Loop in Pages tree");
307 kidsIdxList->back()++;
308 continue;
309 }
310
311 Object kid = kids.arrayGet(kidsIdx);
312 if (kid.isDict("Page") || (kid.isDict() && !kid.getDict()->hasKey("Kids"))) {
313 PageAttrs *attrs = new PageAttrs(attrsList->back(), kid.getDict());
314 auto p = std::make_unique<Page>(doc, pages.size() + 1, std::move(kid), kidRef.getRef(), attrs, form);
315 if (!p->isOk()) {
316 error(errSyntaxError, -1, "Failed to create page (page {0:uld})", pages.size() + 1);
317 return false;
318 }
319
320 if (pages.size() >= std::size_t(numPages)) {
321 error(errSyntaxError, -1, "Page count in top-level pages object is incorrect");
322 return false;
323 }
324
325 pages.emplace_back(std::move(p), kidRef.getRef());
326
327 kidsIdxList->back()++;
328
329 // This should really be isDict("Pages"), but I've seen at least one
330 // PDF file where the /Type entry is missing.
331 } else if (kid.isDict()) {
332 attrsList->push_back(new PageAttrs(attrsList->back(), kid.getDict()));
333 pagesRefList->push_back(kidRef.getRef());
334 pagesList->push_back(std::move(kid));
335 kidsIdxList->push_back(0);
336 } else {
337 error(errSyntaxError, -1, "Kid object (page {0:uld}) is wrong type ({1:s})", pages.size() + 1, kid.getTypeName());
338 kidsIdxList->back()++;
339 }
340 }
341
342 return false;
343 }
344
findPage(const Ref pageRef)345 int Catalog::findPage(const Ref pageRef)
346 {
347 int i;
348
349 for (i = 0; i < getNumPages(); ++i) {
350 Ref *ref = getPageRef(i + 1);
351 if (ref != nullptr && *ref == pageRef)
352 return i + 1;
353 }
354 return 0;
355 }
356
findDest(const GooString * name)357 std::unique_ptr<LinkDest> Catalog::findDest(const GooString *name)
358 {
359 // try named destination dictionary then name tree
360 if (getDests()->isDict()) {
361 Object obj1 = getDests()->dictLookup(name->c_str());
362 return createLinkDest(&obj1);
363 }
364
365 catalogLocker();
366 Object obj2 = getDestNameTree()->lookup(name);
367 return createLinkDest(&obj2);
368 }
369
createLinkDest(Object * obj)370 std::unique_ptr<LinkDest> Catalog::createLinkDest(Object *obj)
371 {
372 std::unique_ptr<LinkDest> dest;
373 if (obj->isArray()) {
374 dest = std::make_unique<LinkDest>(obj->getArray());
375 } else if (obj->isDict()) {
376 Object obj2 = obj->dictLookup("D");
377 if (obj2.isArray())
378 dest = std::make_unique<LinkDest>(obj2.getArray());
379 else
380 error(errSyntaxWarning, -1, "Bad named destination value");
381 } else {
382 error(errSyntaxWarning, -1, "Bad named destination value");
383 }
384 if (dest && !dest->isOk()) {
385 dest.reset();
386 }
387
388 return dest;
389 }
390
numDests()391 int Catalog::numDests()
392 {
393 Object *obj;
394
395 obj = getDests();
396 if (!obj->isDict()) {
397 return 0;
398 }
399 return obj->dictGetLength();
400 }
401
getDestsName(int i)402 const char *Catalog::getDestsName(int i)
403 {
404 Object *obj;
405
406 obj = getDests();
407 if (!obj->isDict()) {
408 return nullptr;
409 }
410 return obj->dictGetKey(i);
411 }
412
getDestsDest(int i)413 std::unique_ptr<LinkDest> Catalog::getDestsDest(int i)
414 {
415 Object *obj = getDests();
416 if (!obj->isDict()) {
417 return nullptr;
418 }
419 Object obj1 = obj->dictGetVal(i);
420 return createLinkDest(&obj1);
421 }
422
getDestNameTreeDest(int i)423 std::unique_ptr<LinkDest> Catalog::getDestNameTreeDest(int i)
424 {
425 Object obj;
426
427 catalogLocker();
428 Object *aux = getDestNameTree()->getValue(i);
429 if (aux) {
430 obj = aux->fetch(xref);
431 }
432 return createLinkDest(&obj);
433 }
434
embeddedFile(int i)435 FileSpec *Catalog::embeddedFile(int i)
436 {
437 catalogLocker();
438 Object *obj = getEmbeddedFileNameTree()->getValue(i);
439 FileSpec *embeddedFile = nullptr;
440 if (obj->isRef()) {
441 Object fsDict = obj->fetch(xref);
442 embeddedFile = new FileSpec(&fsDict);
443 } else if (obj->isDict()) {
444 embeddedFile = new FileSpec(obj);
445 } else {
446 Object null;
447 embeddedFile = new FileSpec(&null);
448 }
449 return embeddedFile;
450 }
451
hasEmbeddedFile(const std::string & fileName)452 bool Catalog::hasEmbeddedFile(const std::string &fileName)
453 {
454 NameTree *ef = getEmbeddedFileNameTree();
455 for (int i = 0; i < ef->numEntries(); ++i) {
456 if (fileName == ef->getName(i)->toStr())
457 return true;
458 }
459 return false;
460 }
461
addEmbeddedFile(GooFile * file,const std::string & fileName)462 void Catalog::addEmbeddedFile(GooFile *file, const std::string &fileName)
463 {
464 catalogLocker();
465
466 const Ref fileSpecRef = xref->addIndirectObject(FileSpec::newFileSpecObject(xref, file, fileName));
467
468 Object catDict = xref->getCatalog();
469 Ref namesObjRef;
470 Object namesObj = catDict.getDict()->lookup("Names", &namesObjRef);
471 if (!namesObj.isDict()) {
472 // Need to create the names Dict
473 catDict.dictSet("Names", Object(new Dict(xref)));
474 namesObj = catDict.getDict()->lookup("Names");
475
476 // Trigger getting the names dict again when needed
477 names = Object();
478 }
479
480 Dict *namesDict = namesObj.getDict();
481
482 // We create a new EmbeddedFiles nametree, this replaces the existing one (if any), but it's not a problem
483 Object embeddedFilesObj = Object(new Dict(xref));
484 const Ref embeddedFilesRef = xref->addIndirectObject(embeddedFilesObj);
485
486 Array *embeddedFilesNamesArray = new Array(xref);
487
488 // This flattens out the existing EmbeddedFiles nametree (if any), should not be a problem
489 NameTree *ef = getEmbeddedFileNameTree();
490 bool fileAlreadyAdded = false;
491 for (int i = 0; i < ef->numEntries(); ++i) {
492 const GooString *efNameI = ef->getName(i);
493
494 // we need to add the file if it has not been added yet and the name is smaller or equal lexicographically
495 // than the current item
496 const bool sameFileName = fileName == efNameI->toStr();
497 const bool addFile = !fileAlreadyAdded && (sameFileName || fileName < efNameI->toStr());
498 if (addFile) {
499 // If the new name is smaller lexicographically than an existing file add it in its correct position
500 embeddedFilesNamesArray->add(Object(new GooString(fileName)));
501 embeddedFilesNamesArray->add(Object(fileSpecRef));
502 fileAlreadyAdded = true;
503 }
504 if (sameFileName) {
505 // If the new name is the same lexicographically than an existing file then don't add the existing file (i.e. replace)
506 continue;
507 }
508 embeddedFilesNamesArray->add(Object(efNameI->copy()));
509 embeddedFilesNamesArray->add(ef->getValue(i)->copy());
510 }
511
512 if (!fileAlreadyAdded) {
513 // The new file is bigger lexicographically than the existing ones
514 embeddedFilesNamesArray->add(Object(new GooString(fileName)));
515 embeddedFilesNamesArray->add(Object(fileSpecRef));
516 }
517
518 embeddedFilesObj.dictSet("Names", Object(embeddedFilesNamesArray));
519 namesDict->set("EmbeddedFiles", Object(embeddedFilesRef));
520
521 if (namesObjRef != Ref::INVALID()) {
522 xref->setModifiedObject(&namesObj, namesObjRef);
523 } else {
524 xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() });
525 }
526
527 // recreate Nametree on next call that uses it
528 delete embeddedFileNameTree;
529 embeddedFileNameTree = nullptr;
530 }
531
getJS(int i)532 GooString *Catalog::getJS(int i)
533 {
534 Object obj;
535 // getJSNameTree()->getValue(i) returns a shallow copy of the object so we
536 // do not need to free it
537 catalogLocker();
538 Object *aux = getJSNameTree()->getValue(i);
539 if (aux) {
540 obj = aux->fetch(xref);
541 }
542
543 if (!obj.isDict()) {
544 return nullptr;
545 }
546 Object obj2 = obj.dictLookup("S");
547 if (!obj2.isName()) {
548 return nullptr;
549 }
550 if (strcmp(obj2.getName(), "JavaScript")) {
551 return nullptr;
552 }
553 obj2 = obj.dictLookup("JS");
554 GooString *js = nullptr;
555 if (obj2.isString()) {
556 js = new GooString(obj2.getString());
557 } else if (obj2.isStream()) {
558 Stream *stream = obj2.getStream();
559 js = new GooString();
560 stream->fillGooString(js);
561 }
562 return js;
563 }
564
getPageMode()565 Catalog::PageMode Catalog::getPageMode()
566 {
567
568 catalogLocker();
569 if (pageMode == pageModeNull) {
570
571 pageMode = pageModeNone;
572
573 Object catDict = xref->getCatalog();
574 if (!catDict.isDict()) {
575 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
576 return pageMode;
577 }
578
579 Object obj = catDict.dictLookup("PageMode");
580 if (obj.isName()) {
581 if (obj.isName("UseNone"))
582 pageMode = pageModeNone;
583 else if (obj.isName("UseOutlines"))
584 pageMode = pageModeOutlines;
585 else if (obj.isName("UseThumbs"))
586 pageMode = pageModeThumbs;
587 else if (obj.isName("FullScreen"))
588 pageMode = pageModeFullScreen;
589 else if (obj.isName("UseOC"))
590 pageMode = pageModeOC;
591 else if (obj.isName("UseAttachments"))
592 pageMode = pageModeAttach;
593 }
594 }
595 return pageMode;
596 }
597
getPageLayout()598 Catalog::PageLayout Catalog::getPageLayout()
599 {
600
601 catalogLocker();
602 if (pageLayout == pageLayoutNull) {
603
604 pageLayout = pageLayoutNone;
605
606 Object catDict = xref->getCatalog();
607 if (!catDict.isDict()) {
608 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
609 return pageLayout;
610 }
611
612 pageLayout = pageLayoutNone;
613 Object obj = catDict.dictLookup("PageLayout");
614 if (obj.isName()) {
615 if (obj.isName("SinglePage"))
616 pageLayout = pageLayoutSinglePage;
617 if (obj.isName("OneColumn"))
618 pageLayout = pageLayoutOneColumn;
619 if (obj.isName("TwoColumnLeft"))
620 pageLayout = pageLayoutTwoColumnLeft;
621 if (obj.isName("TwoColumnRight"))
622 pageLayout = pageLayoutTwoColumnRight;
623 if (obj.isName("TwoPageLeft"))
624 pageLayout = pageLayoutTwoPageLeft;
625 if (obj.isName("TwoPageRight"))
626 pageLayout = pageLayoutTwoPageRight;
627 }
628 }
629 return pageLayout;
630 }
631
NameTree()632 NameTree::NameTree()
633 {
634 size = 0;
635 length = 0;
636 entries = nullptr;
637 }
638
~NameTree()639 NameTree::~NameTree()
640 {
641 int i;
642
643 for (i = 0; i < length; i++)
644 delete entries[i];
645
646 gfree(entries);
647 }
648
Entry(Array * array,int index)649 NameTree::Entry::Entry(Array *array, int index)
650 {
651 if (!array->getString(index, &name)) {
652 Object aux = array->get(index);
653 if (aux.isString()) {
654 name.append(aux.getString());
655 } else
656 error(errSyntaxError, -1, "Invalid page tree");
657 }
658 value = array->getNF(index + 1).copy();
659 }
660
~Entry()661 NameTree::Entry::~Entry() { }
662
addEntry(Entry * entry)663 void NameTree::addEntry(Entry *entry)
664 {
665 if (length == size) {
666 if (length == 0) {
667 size = 8;
668 } else {
669 size *= 2;
670 }
671 entries = (Entry **)grealloc(entries, sizeof(Entry *) * size);
672 }
673
674 entries[length] = entry;
675 ++length;
676 }
677
cmpEntry(const void * voidEntry,const void * voidOtherEntry)678 int NameTree::Entry::cmpEntry(const void *voidEntry, const void *voidOtherEntry)
679 {
680 Entry *entry = *(NameTree::Entry **)voidEntry;
681 Entry *otherEntry = *(NameTree::Entry **)voidOtherEntry;
682
683 return entry->name.cmp(&otherEntry->name);
684 }
685
init(XRef * xrefA,Object * tree)686 void NameTree::init(XRef *xrefA, Object *tree)
687 {
688 xref = xrefA;
689 std::set<int> seen;
690 parse(tree, seen);
691 if (entries && length > 0) {
692 qsort(entries, length, sizeof(Entry *), Entry::cmpEntry);
693 }
694 }
695
parse(const Object * tree,std::set<int> & seen)696 void NameTree::parse(const Object *tree, std::set<int> &seen)
697 {
698 if (!tree->isDict())
699 return;
700
701 // leaf node
702 Object names = tree->dictLookup("Names");
703 if (names.isArray()) {
704 for (int i = 0; i < names.arrayGetLength(); i += 2) {
705 NameTree::Entry *entry;
706
707 entry = new Entry(names.getArray(), i);
708 addEntry(entry);
709 }
710 }
711
712 // root or intermediate node
713 Ref ref;
714 const Object kids = tree->getDict()->lookup("Kids", &ref);
715 if (ref != Ref::INVALID()) {
716 const int numObj = ref.num;
717 if (seen.find(numObj) != seen.end()) {
718 error(errSyntaxError, -1, "loop in NameTree (numObj: {0:d})", numObj);
719 return;
720 }
721 seen.insert(numObj);
722 }
723 if (kids.isArray()) {
724 for (int i = 0; i < kids.arrayGetLength(); ++i) {
725 const Object kid = kids.getArray()->get(i, &ref);
726 if (ref != Ref::INVALID()) {
727 const int numObj = ref.num;
728 if (seen.find(numObj) != seen.end()) {
729 error(errSyntaxError, -1, "loop in NameTree (numObj: {0:d})", numObj);
730 continue;
731 }
732 seen.insert(numObj);
733 }
734 if (kid.isDict())
735 parse(&kid, seen);
736 }
737 }
738 }
739
cmp(const void * voidKey,const void * voidEntry)740 int NameTree::Entry::cmp(const void *voidKey, const void *voidEntry)
741 {
742 GooString *key = (GooString *)voidKey;
743 Entry *entry = *(NameTree::Entry **)voidEntry;
744
745 return key->cmp(&entry->name);
746 }
747
lookup(const GooString * name)748 Object NameTree::lookup(const GooString *name)
749 {
750 Entry **entry;
751
752 entry = (Entry **)bsearch(name, entries, length, sizeof(Entry *), Entry::cmp);
753 if (entry != nullptr) {
754 return (*entry)->value.fetch(xref);
755 } else {
756 error(errSyntaxError, -1, "failed to look up ({0:s})", name->c_str());
757 return Object(objNull);
758 }
759 }
760
getValue(int index)761 Object *NameTree::getValue(int index)
762 {
763 if (index < length) {
764 return &entries[index]->value;
765 } else {
766 return nullptr;
767 }
768 }
769
getName(int index) const770 const GooString *NameTree::getName(int index) const
771 {
772 if (index < length) {
773 return &entries[index]->name;
774 } else {
775 return nullptr;
776 }
777 }
778
labelToIndex(GooString * label,int * index)779 bool Catalog::labelToIndex(GooString *label, int *index)
780 {
781 char *end;
782
783 PageLabelInfo *pli = getPageLabelInfo();
784 if (pli != nullptr) {
785 if (!pli->labelToIndex(label, index))
786 return false;
787 } else {
788 *index = strtol(label->c_str(), &end, 10) - 1;
789 if (*end != '\0')
790 return false;
791 }
792
793 if (*index < 0 || *index >= getNumPages())
794 return false;
795
796 return true;
797 }
798
indexToLabel(int index,GooString * label)799 bool Catalog::indexToLabel(int index, GooString *label)
800 {
801 char buffer[32];
802
803 if (index < 0 || index >= getNumPages())
804 return false;
805
806 PageLabelInfo *pli = getPageLabelInfo();
807 if (pli != nullptr) {
808 return pli->indexToLabel(index, label);
809 } else {
810 snprintf(buffer, sizeof(buffer), "%d", index + 1);
811 label->append(buffer);
812 return true;
813 }
814 }
815
getNumPages()816 int Catalog::getNumPages()
817 {
818 catalogLocker();
819 if (numPages == -1) {
820 Object catDict = xref->getCatalog();
821 if (!catDict.isDict()) {
822 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
823 return 0;
824 }
825 Object pagesDict = catDict.dictLookup("Pages");
826
827 // This should really be isDict("Pages"), but I've seen at least one
828 // PDF file where the /Type entry is missing.
829 if (!pagesDict.isDict()) {
830 error(errSyntaxError, -1, "Top-level pages object is wrong type ({0:s})", pagesDict.getTypeName());
831 return 0;
832 }
833
834 Object obj = pagesDict.dictLookup("Count");
835 // some PDF files actually use real numbers here ("/Count 9.0")
836 if (!obj.isNum()) {
837 if (pagesDict.dictIs("Page")) {
838 const Object &pageRootRef = catDict.dictLookupNF("Pages");
839
840 error(errSyntaxError, -1, "Pages top-level is a single Page. The document is malformed, trying to recover...");
841
842 Dict *pageDict = pagesDict.getDict();
843 if (pageRootRef.isRef()) {
844 const Ref pageRef = pageRootRef.getRef();
845 auto p = std::make_unique<Page>(doc, 1, std::move(pagesDict), pageRef, new PageAttrs(nullptr, pageDict), form);
846 if (p->isOk()) {
847 pages.emplace_back(std::move(p), pageRef);
848
849 numPages = 1;
850 } else {
851 numPages = 0;
852 }
853 } else {
854 numPages = 0;
855 }
856 } else {
857 error(errSyntaxError, -1, "Page count in top-level pages object is wrong type ({0:s})", obj.getTypeName());
858 numPages = 0;
859 }
860 } else {
861 numPages = (int)obj.getNum();
862 if (numPages <= 0) {
863 error(errSyntaxError, -1, "Invalid page count {0:d}", numPages);
864 numPages = 0;
865 } else if (numPages > xref->getNumObjects()) {
866 error(errSyntaxError, -1, "Page count ({0:d}) larger than number of objects ({1:d})", numPages, xref->getNumObjects());
867 numPages = 0;
868 }
869 }
870 }
871
872 return numPages;
873 }
874
getPageLabelInfo()875 PageLabelInfo *Catalog::getPageLabelInfo()
876 {
877 catalogLocker();
878 if (!pageLabelInfo) {
879 Object catDict = xref->getCatalog();
880 if (!catDict.isDict()) {
881 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
882 return nullptr;
883 }
884
885 Object obj = catDict.dictLookup("PageLabels");
886 if (obj.isDict()) {
887 pageLabelInfo = new PageLabelInfo(&obj, getNumPages());
888 }
889 }
890
891 return pageLabelInfo;
892 }
893
getStructTreeRoot()894 StructTreeRoot *Catalog::getStructTreeRoot()
895 {
896 catalogLocker();
897 if (!structTreeRoot) {
898 Object catalog = xref->getCatalog();
899 if (!catalog.isDict()) {
900 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catalog.getTypeName());
901 return nullptr;
902 }
903
904 Object root = catalog.dictLookup("StructTreeRoot");
905 if (root.isDict("StructTreeRoot")) {
906 structTreeRoot = new StructTreeRoot(doc, root.getDict());
907 }
908 }
909 return structTreeRoot;
910 }
911
getMarkInfo()912 unsigned int Catalog::getMarkInfo()
913 {
914 if (markInfo == markInfoNull) {
915 markInfo = 0;
916
917 catalogLocker();
918 Object catDict = xref->getCatalog();
919
920 if (catDict.isDict()) {
921 Object markInfoDict = catDict.dictLookup("MarkInfo");
922 if (markInfoDict.isDict()) {
923 Object value = markInfoDict.dictLookup("Marked");
924 if (value.isBool()) {
925 if (value.getBool()) {
926 markInfo |= markInfoMarked;
927 }
928 } else if (!value.isNull()) {
929 error(errSyntaxError, -1, "Marked object is wrong type ({0:s})", value.getTypeName());
930 }
931
932 value = markInfoDict.dictLookup("Suspects");
933 if (value.isBool() && value.getBool())
934 markInfo |= markInfoSuspects;
935 else if (!value.isNull())
936 error(errSyntaxError, -1, "Suspects object is wrong type ({0:s})", value.getTypeName());
937
938 value = markInfoDict.dictLookup("UserProperties");
939 if (value.isBool() && value.getBool())
940 markInfo |= markInfoUserProperties;
941 else if (!value.isNull())
942 error(errSyntaxError, -1, "UserProperties object is wrong type ({0:s})", value.getTypeName());
943 } else if (!markInfoDict.isNull()) {
944 error(errSyntaxError, -1, "MarkInfo object is wrong type ({0:s})", markInfoDict.getTypeName());
945 }
946 } else {
947 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
948 }
949 }
950 return markInfo;
951 }
952
getCreateOutline()953 Object *Catalog::getCreateOutline()
954 {
955
956 catalogLocker();
957 Object catDict = xref->getCatalog();
958
959 // If there is no Object in the outline variable,
960 // check if there is an Outline dict in the catalog
961 if (outline.isNone()) {
962 if (catDict.isDict()) {
963 Object outline_obj = catDict.dictLookup("Outlines");
964 if (outline_obj.isDict()) {
965 return &outline;
966 }
967 } else {
968 // catalog is not a dict, give up?
969 return &outline;
970 }
971 }
972
973 // If there is an Object in variable, make sure it's a dict
974 if (outline.isDict()) {
975 return &outline;
976 }
977
978 // setup an empty outline dict
979 outline = Object(new Dict(doc->getXRef()));
980 outline.dictSet("Type", Object(objName, "Outlines"));
981 outline.dictSet("Count", Object(0));
982
983 const Ref outlineRef = doc->getXRef()->addIndirectObject(outline);
984 catDict.dictAdd("Outlines", Object(outlineRef));
985 xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() });
986
987 return &outline;
988 }
989
getOutline()990 Object *Catalog::getOutline()
991 {
992 catalogLocker();
993 if (outline.isNone()) {
994 Object catDict = xref->getCatalog();
995 if (catDict.isDict()) {
996 outline = catDict.dictLookup("Outlines");
997 } else {
998 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
999 outline.setToNull();
1000 }
1001 }
1002
1003 return &outline;
1004 }
1005
getDests()1006 Object *Catalog::getDests()
1007 {
1008 catalogLocker();
1009 if (dests.isNone()) {
1010 Object catDict = xref->getCatalog();
1011 if (catDict.isDict()) {
1012 dests = catDict.dictLookup("Dests");
1013 } else {
1014 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1015 dests.setToNull();
1016 }
1017 }
1018
1019 return &dests;
1020 }
1021
getFormType()1022 Catalog::FormType Catalog::getFormType()
1023 {
1024 Object xfa;
1025 FormType res = NoForm;
1026
1027 if (acroForm.isDict()) {
1028 xfa = acroForm.dictLookup("XFA");
1029 if (xfa.isStream() || xfa.isArray()) {
1030 res = XfaForm;
1031 } else {
1032 res = AcroForm;
1033 }
1034 }
1035
1036 return res;
1037 }
1038
getForm()1039 Form *Catalog::getForm()
1040 {
1041 catalogLocker();
1042 if (!form) {
1043 if (acroForm.isDict()) {
1044 form = new Form(doc, &acroForm);
1045 // perform form-related loading after all widgets have been loaded
1046 form->postWidgetsLoad();
1047 }
1048 }
1049
1050 return form;
1051 }
1052
addFormToAcroForm(const Ref formRef)1053 void Catalog::addFormToAcroForm(const Ref formRef)
1054 {
1055 catalogLocker();
1056
1057 Object catDict = xref->getCatalog();
1058 Ref acroFormRef;
1059 acroForm = catDict.getDict()->lookup("AcroForm", &acroFormRef);
1060
1061 if (!acroForm.isDict()) {
1062 // none there yet, need to create a new fields dict
1063 Object newForm = Object(new Dict(xref));
1064 newForm.dictSet("SigFlags", Object(3));
1065
1066 Array *fieldArray = new Array(xref);
1067 fieldArray->add(Object(formRef));
1068 newForm.dictSet("Fields", Object(fieldArray));
1069
1070 Ref newRef = xref->addIndirectObject(newForm);
1071 catDict.dictSet("AcroForm", Object(newRef));
1072 acroForm = catDict.getDict()->lookup("AcroForm");
1073 } else {
1074 // append to field array
1075 Ref fieldRef;
1076 Object fieldArray = acroForm.getDict()->lookup("Fields", &fieldRef);
1077 fieldArray.getArray()->add(Object(formRef));
1078 }
1079
1080 if (acroFormRef != Ref::INVALID()) {
1081 xref->setModifiedObject(&acroForm, acroFormRef);
1082 } else {
1083 xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() });
1084 }
1085 }
1086
removeFormFromAcroForm(const Ref formRef)1087 void Catalog::removeFormFromAcroForm(const Ref formRef)
1088 {
1089 catalogLocker();
1090
1091 Object catDict = xref->getCatalog();
1092 Ref acroFormRef;
1093 acroForm = catDict.getDict()->lookup("AcroForm", &acroFormRef);
1094
1095 if (acroForm.isDict()) {
1096 // remove from field array
1097 Ref fieldRef;
1098 Object fieldArrayO = acroForm.getDict()->lookup("Fields", &fieldRef);
1099 Array *fieldArray = fieldArrayO.getArray();
1100 for (int i = 0; i < fieldArray->getLength(); ++i) {
1101 const Object &o = fieldArray->getNF(i);
1102 if (o.isRef() && o.getRef() == formRef) {
1103 fieldArray->remove(i);
1104 break;
1105 }
1106 }
1107
1108 xref->setModifiedObject(&acroForm, acroFormRef);
1109 }
1110 }
1111
getViewerPreferences()1112 ViewerPreferences *Catalog::getViewerPreferences()
1113 {
1114 catalogLocker();
1115 if (!viewerPrefs) {
1116 if (viewerPreferences.isDict()) {
1117 viewerPrefs = new ViewerPreferences(viewerPreferences.getDict());
1118 }
1119 }
1120
1121 return viewerPrefs;
1122 }
1123
getNames()1124 Object *Catalog::getNames()
1125 {
1126 if (names.isNone()) {
1127 Object catDict = xref->getCatalog();
1128 if (catDict.isDict()) {
1129 names = catDict.dictLookup("Names");
1130 } else {
1131 error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1132 names.setToNull();
1133 }
1134 }
1135
1136 return &names;
1137 }
1138
getDestNameTree()1139 NameTree *Catalog::getDestNameTree()
1140 {
1141 if (!destNameTree) {
1142
1143 destNameTree = new NameTree();
1144
1145 if (getNames()->isDict()) {
1146 Object obj = getNames()->dictLookup("Dests");
1147 destNameTree->init(xref, &obj);
1148 }
1149 }
1150
1151 return destNameTree;
1152 }
1153
getEmbeddedFileNameTree()1154 NameTree *Catalog::getEmbeddedFileNameTree()
1155 {
1156 if (!embeddedFileNameTree) {
1157
1158 embeddedFileNameTree = new NameTree();
1159
1160 if (getNames()->isDict()) {
1161 Object obj = getNames()->dictLookup("EmbeddedFiles");
1162 embeddedFileNameTree->init(xref, &obj);
1163 }
1164 }
1165
1166 return embeddedFileNameTree;
1167 }
1168
getJSNameTree()1169 NameTree *Catalog::getJSNameTree()
1170 {
1171 if (!jsNameTree) {
1172
1173 jsNameTree = new NameTree();
1174
1175 if (getNames()->isDict()) {
1176 Object obj = getNames()->dictLookup("JavaScript");
1177 jsNameTree->init(xref, &obj);
1178 }
1179 }
1180
1181 return jsNameTree;
1182 }
1183
getAdditionalAction(DocumentAdditionalActionsType type)1184 std::unique_ptr<LinkAction> Catalog::getAdditionalAction(DocumentAdditionalActionsType type)
1185 {
1186 Object additionalActionsObject = additionalActions.fetch(doc->getXRef());
1187 if (additionalActionsObject.isDict()) {
1188 const char *key = (type == actionCloseDocument ? "WC"
1189 : type == actionSaveDocumentStart ? "WS"
1190 : type == actionSaveDocumentFinish ? "DS"
1191 : type == actionPrintDocumentStart ? "WP"
1192 : type == actionPrintDocumentFinish ? "DP"
1193 : nullptr);
1194
1195 Object actionObject = additionalActionsObject.dictLookup(key);
1196 if (actionObject.isDict())
1197 return LinkAction::parseAction(&actionObject, doc->getCatalog()->getBaseURI());
1198 }
1199 return nullptr;
1200 }
1201