1 //========================================================================
2 //
3 // Catalog.cc
4 //
5 // Copyright 1996-2007 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
17 // Copyright (C) 2005-2013, 2015, 2017-2021 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2005 Jeff Muizelaar <jrmuizel@nit.ca>
19 // Copyright (C) 2005 Jonathan Blandford <jrb@redhat.com>
20 // Copyright (C) 2005 Marco Pesenti Gritti <mpg@redhat.com>
21 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
22 // Copyright (C) 2006, 2008, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
23 // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
24 // Copyright (C) 2008, 2011 Pino Toscano <pino@kde.org>
25 // Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
26 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
27 // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
28 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
29 // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
30 // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
31 // Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 José Aliste <jaliste@src.gnome.org>
33 // Copyright (C) 2014 Ed Porras <ed@moto-research.com>
34 // Copyright (C) 2015 Even Rouault <even.rouault@spatialys.com>
35 // Copyright (C) 2016 Masamichi Hosoda <trueroad@trueroad.jp>
36 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
37 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
38 // Copyright (C) 2020 Oliver Sander <oliver.sander@tu-dresden.de>
39 // Copyright (C) 2020 Katarina Behrens <Katarina.Behrens@cib.de>
40 // Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de>
41 // Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
42 // Copyright (C) 2021 RM <rm+git@arcsin.org>
43 //
44 // To see a description of the changes please see the Changelog file that
45 // came with your tarball or type make ChangeLog if you are building from git
46 //
47 //========================================================================
48 
49 #include <config.h>
50 
51 #include <cstddef>
52 #include <cstdlib>
53 #include "goo/gmem.h"
54 #include "Object.h"
55 #include "PDFDoc.h"
56 #include "XRef.h"
57 #include "Array.h"
58 #include "Dict.h"
59 #include "Page.h"
60 #include "Error.h"
61 #include "Link.h"
62 #include "PageLabelInfo.h"
63 #include "Catalog.h"
64 #include "Form.h"
65 #include "OptionalContent.h"
66 #include "ViewerPreferences.h"
67 #include "FileSpec.h"
68 #include "StructTreeRoot.h"
69 
70 //------------------------------------------------------------------------
71 // Catalog
72 //------------------------------------------------------------------------
73 
74 #define catalogLocker() std::unique_lock<std::recursive_mutex> locker(mutex)
75 
Catalog(PDFDoc * docA)76 Catalog::Catalog(PDFDoc *docA)
77 {
78     ok = true;
79     doc = docA;
80     xref = doc->getXRef();
81     numPages = -1;
82     baseURI = nullptr;
83     pageLabelInfo = nullptr;
84     form = nullptr;
85     optContent = nullptr;
86     pageMode = pageModeNull;
87     pageLayout = pageLayoutNull;
88     destNameTree = nullptr;
89     embeddedFileNameTree = nullptr;
90     jsNameTree = nullptr;
91     viewerPrefs = nullptr;
92     structTreeRoot = nullptr;
93 
94     pagesList = nullptr;
95     pagesRefList = nullptr;
96     attrsList = nullptr;
97     kidsIdxList = nullptr;
98     markInfo = markInfoNull;
99 
100     Object catDict = xref->getCatalog();
101     if (!catDict.isDict()) {
102         error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
103         ok = false;
104         return;
105     }
106     // get the AcroForm dictionary
107     acroForm = catDict.dictLookup("AcroForm");
108 
109     // read base URI
110     Object obj = catDict.getDict()->lookupEnsureEncryptedIfNeeded("URI");
111     if (obj.isDict()) {
112         Object obj2 = obj.getDict()->lookupEnsureEncryptedIfNeeded("Base");
113         if (obj2.isString()) {
114             baseURI = obj2.getString()->copy();
115         }
116     }
117 
118     // get the Optional Content dictionary
119     Object optContentProps = catDict.dictLookup("OCProperties");
120     if (optContentProps.isDict()) {
121         optContent = new OCGs(&optContentProps, xref);
122         if (!optContent->isOk()) {
123             delete optContent;
124             optContent = nullptr;
125         }
126     }
127 
128     // actions
129     additionalActions = catDict.dictLookupNF("AA").copy();
130 
131     // get the ViewerPreferences dictionary
132     viewerPreferences = catDict.dictLookup("ViewerPreferences");
133 
134     const Object version = catDict.dictLookup("Version");
135     if (version.isName()) {
136         const int res = sscanf(version.getName(), "%d.%d", &catalogPdfMajorVersion, &catalogPdfMinorVersion);
137         if (res != 2) {
138             catalogPdfMajorVersion = -1;
139             catalogPdfMinorVersion = -1;
140         }
141     }
142 }
143 
~Catalog()144 Catalog::~Catalog()
145 {
146     delete kidsIdxList;
147     if (attrsList) {
148         std::vector<PageAttrs *>::iterator it;
149         for (it = attrsList->begin(); it != attrsList->end(); ++it) {
150             delete *it;
151         }
152         delete attrsList;
153     }
154     delete pagesRefList;
155     delete pagesList;
156     delete destNameTree;
157     delete embeddedFileNameTree;
158     delete jsNameTree;
159     if (baseURI) {
160         delete baseURI;
161     }
162     delete pageLabelInfo;
163     delete form;
164     delete optContent;
165     delete viewerPrefs;
166     delete structTreeRoot;
167 }
168 
readMetadata()169 std::unique_ptr<GooString> Catalog::readMetadata()
170 {
171     catalogLocker();
172     if (metadata.isNone()) {
173         Object catDict = xref->getCatalog();
174         if (catDict.isDict()) {
175             metadata = catDict.dictLookup("Metadata");
176         } else {
177             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
178             metadata.setToNull();
179         }
180     }
181 
182     if (!metadata.isStream()) {
183         return {};
184     }
185     Object obj = metadata.streamGetDict()->lookup("Subtype");
186     if (!obj.isName("XML")) {
187         error(errSyntaxWarning, -1, "Unknown Metadata type: '{0:s}'", obj.isName() ? obj.getName() : "???");
188     }
189     std::unique_ptr<GooString> s = std::make_unique<GooString>();
190     metadata.getStream()->fillGooString(s.get());
191     metadata.streamClose();
192     return s;
193 }
194 
getPage(int i)195 Page *Catalog::getPage(int i)
196 {
197     if (i < 1)
198         return nullptr;
199 
200     catalogLocker();
201     if (std::size_t(i) > pages.size()) {
202         bool cached = cachePageTree(i);
203         if (cached == false) {
204             return nullptr;
205         }
206     }
207     return pages[i - 1].first.get();
208 }
209 
getPageRef(int i)210 Ref *Catalog::getPageRef(int i)
211 {
212     if (i < 1)
213         return nullptr;
214 
215     catalogLocker();
216     if (std::size_t(i) > pages.size()) {
217         bool cached = cachePageTree(i);
218         if (cached == false) {
219             return nullptr;
220         }
221     }
222     return &pages[i - 1].second;
223 }
224 
cachePageTree(int page)225 bool Catalog::cachePageTree(int page)
226 {
227     if (pagesList == nullptr) {
228 
229         Ref pagesRef;
230 
231         Object catDict = xref->getCatalog();
232 
233         if (catDict.isDict()) {
234             const Object &pagesDictRef = catDict.dictLookupNF("Pages");
235             if (pagesDictRef.isRef() && pagesDictRef.getRefNum() >= 0 && pagesDictRef.getRefNum() < xref->getNumObjects()) {
236                 pagesRef = pagesDictRef.getRef();
237             } else {
238                 error(errSyntaxError, -1, "Catalog dictionary does not contain a valid \"Pages\" entry");
239                 return false;
240             }
241         } else {
242             error(errSyntaxError, -1, "Could not find catalog dictionary");
243             return false;
244         }
245 
246         Object obj = catDict.dictLookup("Pages");
247         // This should really be isDict("Pages"), but I've seen at least one
248         // PDF file where the /Type entry is missing.
249         if (!obj.isDict()) {
250             error(errSyntaxError, -1, "Top-level pages object is wrong type ({0:s})", obj.getTypeName());
251             return false;
252         }
253 
254         pages.clear();
255         attrsList = new std::vector<PageAttrs *>();
256         attrsList->push_back(new PageAttrs(nullptr, obj.getDict()));
257         pagesList = new std::vector<Object>();
258         pagesList->push_back(std::move(obj));
259         pagesRefList = new std::vector<Ref>();
260         pagesRefList->push_back(pagesRef);
261         kidsIdxList = new std::vector<int>();
262         kidsIdxList->push_back(0);
263     }
264 
265     while (true) {
266 
267         if (std::size_t(page) <= pages.size())
268             return true;
269 
270         if (pagesList->empty())
271             return false;
272 
273         Object kids = pagesList->back().dictLookup("Kids");
274         if (!kids.isArray()) {
275             error(errSyntaxError, -1, "Kids object (page {0:uld}) is wrong type ({1:s})", pages.size() + 1, kids.getTypeName());
276             return false;
277         }
278 
279         int kidsIdx = kidsIdxList->back();
280         if (kidsIdx >= kids.arrayGetLength()) {
281             pagesList->pop_back();
282             pagesRefList->pop_back();
283             delete attrsList->back();
284             attrsList->pop_back();
285             kidsIdxList->pop_back();
286             if (!kidsIdxList->empty())
287                 kidsIdxList->back()++;
288             continue;
289         }
290 
291         const Object &kidRef = kids.arrayGetNF(kidsIdx);
292         if (!kidRef.isRef()) {
293             error(errSyntaxError, -1, "Kid object (page {0:uld}) is not an indirect reference ({1:s})", pages.size() + 1, kidRef.getTypeName());
294             return false;
295         }
296 
297         bool loop = false;
298         ;
299         for (const Ref &pageRef : *pagesRefList) {
300             if (pageRef.num == kidRef.getRefNum()) {
301                 loop = true;
302                 break;
303             }
304         }
305         if (loop) {
306             error(errSyntaxError, -1, "Loop in Pages tree");
307             kidsIdxList->back()++;
308             continue;
309         }
310 
311         Object kid = kids.arrayGet(kidsIdx);
312         if (kid.isDict("Page") || (kid.isDict() && !kid.getDict()->hasKey("Kids"))) {
313             PageAttrs *attrs = new PageAttrs(attrsList->back(), kid.getDict());
314             auto p = std::make_unique<Page>(doc, pages.size() + 1, std::move(kid), kidRef.getRef(), attrs, form);
315             if (!p->isOk()) {
316                 error(errSyntaxError, -1, "Failed to create page (page {0:uld})", pages.size() + 1);
317                 return false;
318             }
319 
320             if (pages.size() >= std::size_t(numPages)) {
321                 error(errSyntaxError, -1, "Page count in top-level pages object is incorrect");
322                 return false;
323             }
324 
325             pages.emplace_back(std::move(p), kidRef.getRef());
326 
327             kidsIdxList->back()++;
328 
329             // This should really be isDict("Pages"), but I've seen at least one
330             // PDF file where the /Type entry is missing.
331         } else if (kid.isDict()) {
332             attrsList->push_back(new PageAttrs(attrsList->back(), kid.getDict()));
333             pagesRefList->push_back(kidRef.getRef());
334             pagesList->push_back(std::move(kid));
335             kidsIdxList->push_back(0);
336         } else {
337             error(errSyntaxError, -1, "Kid object (page {0:uld}) is wrong type ({1:s})", pages.size() + 1, kid.getTypeName());
338             kidsIdxList->back()++;
339         }
340     }
341 
342     return false;
343 }
344 
findPage(const Ref pageRef)345 int Catalog::findPage(const Ref pageRef)
346 {
347     int i;
348 
349     for (i = 0; i < getNumPages(); ++i) {
350         Ref *ref = getPageRef(i + 1);
351         if (ref != nullptr && *ref == pageRef)
352             return i + 1;
353     }
354     return 0;
355 }
356 
findDest(const GooString * name)357 std::unique_ptr<LinkDest> Catalog::findDest(const GooString *name)
358 {
359     // try named destination dictionary then name tree
360     if (getDests()->isDict()) {
361         Object obj1 = getDests()->dictLookup(name->c_str());
362         return createLinkDest(&obj1);
363     }
364 
365     catalogLocker();
366     Object obj2 = getDestNameTree()->lookup(name);
367     return createLinkDest(&obj2);
368 }
369 
createLinkDest(Object * obj)370 std::unique_ptr<LinkDest> Catalog::createLinkDest(Object *obj)
371 {
372     std::unique_ptr<LinkDest> dest;
373     if (obj->isArray()) {
374         dest = std::make_unique<LinkDest>(obj->getArray());
375     } else if (obj->isDict()) {
376         Object obj2 = obj->dictLookup("D");
377         if (obj2.isArray())
378             dest = std::make_unique<LinkDest>(obj2.getArray());
379         else
380             error(errSyntaxWarning, -1, "Bad named destination value");
381     } else {
382         error(errSyntaxWarning, -1, "Bad named destination value");
383     }
384     if (dest && !dest->isOk()) {
385         dest.reset();
386     }
387 
388     return dest;
389 }
390 
numDests()391 int Catalog::numDests()
392 {
393     Object *obj;
394 
395     obj = getDests();
396     if (!obj->isDict()) {
397         return 0;
398     }
399     return obj->dictGetLength();
400 }
401 
getDestsName(int i)402 const char *Catalog::getDestsName(int i)
403 {
404     Object *obj;
405 
406     obj = getDests();
407     if (!obj->isDict()) {
408         return nullptr;
409     }
410     return obj->dictGetKey(i);
411 }
412 
getDestsDest(int i)413 std::unique_ptr<LinkDest> Catalog::getDestsDest(int i)
414 {
415     Object *obj = getDests();
416     if (!obj->isDict()) {
417         return nullptr;
418     }
419     Object obj1 = obj->dictGetVal(i);
420     return createLinkDest(&obj1);
421 }
422 
getDestNameTreeDest(int i)423 std::unique_ptr<LinkDest> Catalog::getDestNameTreeDest(int i)
424 {
425     Object obj;
426 
427     catalogLocker();
428     Object *aux = getDestNameTree()->getValue(i);
429     if (aux) {
430         obj = aux->fetch(xref);
431     }
432     return createLinkDest(&obj);
433 }
434 
embeddedFile(int i)435 FileSpec *Catalog::embeddedFile(int i)
436 {
437     catalogLocker();
438     Object *obj = getEmbeddedFileNameTree()->getValue(i);
439     FileSpec *embeddedFile = nullptr;
440     if (obj->isRef()) {
441         Object fsDict = obj->fetch(xref);
442         embeddedFile = new FileSpec(&fsDict);
443     } else if (obj->isDict()) {
444         embeddedFile = new FileSpec(obj);
445     } else {
446         Object null;
447         embeddedFile = new FileSpec(&null);
448     }
449     return embeddedFile;
450 }
451 
hasEmbeddedFile(const std::string & fileName)452 bool Catalog::hasEmbeddedFile(const std::string &fileName)
453 {
454     NameTree *ef = getEmbeddedFileNameTree();
455     for (int i = 0; i < ef->numEntries(); ++i) {
456         if (fileName == ef->getName(i)->toStr())
457             return true;
458     }
459     return false;
460 }
461 
addEmbeddedFile(GooFile * file,const std::string & fileName)462 void Catalog::addEmbeddedFile(GooFile *file, const std::string &fileName)
463 {
464     catalogLocker();
465 
466     const Ref fileSpecRef = xref->addIndirectObject(FileSpec::newFileSpecObject(xref, file, fileName));
467 
468     Object catDict = xref->getCatalog();
469     Ref namesObjRef;
470     Object namesObj = catDict.getDict()->lookup("Names", &namesObjRef);
471     if (!namesObj.isDict()) {
472         // Need to create the names Dict
473         catDict.dictSet("Names", Object(new Dict(xref)));
474         namesObj = catDict.getDict()->lookup("Names");
475 
476         // Trigger getting the names dict again when needed
477         names = Object();
478     }
479 
480     Dict *namesDict = namesObj.getDict();
481 
482     // We create a new EmbeddedFiles nametree, this replaces the existing one (if any), but it's not a problem
483     Object embeddedFilesObj = Object(new Dict(xref));
484     const Ref embeddedFilesRef = xref->addIndirectObject(embeddedFilesObj);
485 
486     Array *embeddedFilesNamesArray = new Array(xref);
487 
488     // This flattens out the existing EmbeddedFiles nametree (if any), should not be a problem
489     NameTree *ef = getEmbeddedFileNameTree();
490     bool fileAlreadyAdded = false;
491     for (int i = 0; i < ef->numEntries(); ++i) {
492         const GooString *efNameI = ef->getName(i);
493 
494         // we need to add the file if it has not been added yet and the name is smaller or equal lexicographically
495         // than the current item
496         const bool sameFileName = fileName == efNameI->toStr();
497         const bool addFile = !fileAlreadyAdded && (sameFileName || fileName < efNameI->toStr());
498         if (addFile) {
499             // If the new name is smaller lexicographically than an existing file add it in its correct position
500             embeddedFilesNamesArray->add(Object(new GooString(fileName)));
501             embeddedFilesNamesArray->add(Object(fileSpecRef));
502             fileAlreadyAdded = true;
503         }
504         if (sameFileName) {
505             // If the new name is the same lexicographically than an existing file then don't add the existing file (i.e. replace)
506             continue;
507         }
508         embeddedFilesNamesArray->add(Object(efNameI->copy()));
509         embeddedFilesNamesArray->add(ef->getValue(i)->copy());
510     }
511 
512     if (!fileAlreadyAdded) {
513         // The new file is bigger lexicographically than the existing ones
514         embeddedFilesNamesArray->add(Object(new GooString(fileName)));
515         embeddedFilesNamesArray->add(Object(fileSpecRef));
516     }
517 
518     embeddedFilesObj.dictSet("Names", Object(embeddedFilesNamesArray));
519     namesDict->set("EmbeddedFiles", Object(embeddedFilesRef));
520 
521     if (namesObjRef != Ref::INVALID()) {
522         xref->setModifiedObject(&namesObj, namesObjRef);
523     } else {
524         xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() });
525     }
526 
527     // recreate Nametree on next call that uses it
528     delete embeddedFileNameTree;
529     embeddedFileNameTree = nullptr;
530 }
531 
getJS(int i)532 GooString *Catalog::getJS(int i)
533 {
534     Object obj;
535     // getJSNameTree()->getValue(i) returns a shallow copy of the object so we
536     // do not need to free it
537     catalogLocker();
538     Object *aux = getJSNameTree()->getValue(i);
539     if (aux) {
540         obj = aux->fetch(xref);
541     }
542 
543     if (!obj.isDict()) {
544         return nullptr;
545     }
546     Object obj2 = obj.dictLookup("S");
547     if (!obj2.isName()) {
548         return nullptr;
549     }
550     if (strcmp(obj2.getName(), "JavaScript")) {
551         return nullptr;
552     }
553     obj2 = obj.dictLookup("JS");
554     GooString *js = nullptr;
555     if (obj2.isString()) {
556         js = new GooString(obj2.getString());
557     } else if (obj2.isStream()) {
558         Stream *stream = obj2.getStream();
559         js = new GooString();
560         stream->fillGooString(js);
561     }
562     return js;
563 }
564 
getPageMode()565 Catalog::PageMode Catalog::getPageMode()
566 {
567 
568     catalogLocker();
569     if (pageMode == pageModeNull) {
570 
571         pageMode = pageModeNone;
572 
573         Object catDict = xref->getCatalog();
574         if (!catDict.isDict()) {
575             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
576             return pageMode;
577         }
578 
579         Object obj = catDict.dictLookup("PageMode");
580         if (obj.isName()) {
581             if (obj.isName("UseNone"))
582                 pageMode = pageModeNone;
583             else if (obj.isName("UseOutlines"))
584                 pageMode = pageModeOutlines;
585             else if (obj.isName("UseThumbs"))
586                 pageMode = pageModeThumbs;
587             else if (obj.isName("FullScreen"))
588                 pageMode = pageModeFullScreen;
589             else if (obj.isName("UseOC"))
590                 pageMode = pageModeOC;
591             else if (obj.isName("UseAttachments"))
592                 pageMode = pageModeAttach;
593         }
594     }
595     return pageMode;
596 }
597 
getPageLayout()598 Catalog::PageLayout Catalog::getPageLayout()
599 {
600 
601     catalogLocker();
602     if (pageLayout == pageLayoutNull) {
603 
604         pageLayout = pageLayoutNone;
605 
606         Object catDict = xref->getCatalog();
607         if (!catDict.isDict()) {
608             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
609             return pageLayout;
610         }
611 
612         pageLayout = pageLayoutNone;
613         Object obj = catDict.dictLookup("PageLayout");
614         if (obj.isName()) {
615             if (obj.isName("SinglePage"))
616                 pageLayout = pageLayoutSinglePage;
617             if (obj.isName("OneColumn"))
618                 pageLayout = pageLayoutOneColumn;
619             if (obj.isName("TwoColumnLeft"))
620                 pageLayout = pageLayoutTwoColumnLeft;
621             if (obj.isName("TwoColumnRight"))
622                 pageLayout = pageLayoutTwoColumnRight;
623             if (obj.isName("TwoPageLeft"))
624                 pageLayout = pageLayoutTwoPageLeft;
625             if (obj.isName("TwoPageRight"))
626                 pageLayout = pageLayoutTwoPageRight;
627         }
628     }
629     return pageLayout;
630 }
631 
NameTree()632 NameTree::NameTree()
633 {
634     size = 0;
635     length = 0;
636     entries = nullptr;
637 }
638 
~NameTree()639 NameTree::~NameTree()
640 {
641     int i;
642 
643     for (i = 0; i < length; i++)
644         delete entries[i];
645 
646     gfree(entries);
647 }
648 
Entry(Array * array,int index)649 NameTree::Entry::Entry(Array *array, int index)
650 {
651     if (!array->getString(index, &name)) {
652         Object aux = array->get(index);
653         if (aux.isString()) {
654             name.append(aux.getString());
655         } else
656             error(errSyntaxError, -1, "Invalid page tree");
657     }
658     value = array->getNF(index + 1).copy();
659 }
660 
~Entry()661 NameTree::Entry::~Entry() { }
662 
addEntry(Entry * entry)663 void NameTree::addEntry(Entry *entry)
664 {
665     if (length == size) {
666         if (length == 0) {
667             size = 8;
668         } else {
669             size *= 2;
670         }
671         entries = (Entry **)grealloc(entries, sizeof(Entry *) * size);
672     }
673 
674     entries[length] = entry;
675     ++length;
676 }
677 
cmpEntry(const void * voidEntry,const void * voidOtherEntry)678 int NameTree::Entry::cmpEntry(const void *voidEntry, const void *voidOtherEntry)
679 {
680     Entry *entry = *(NameTree::Entry **)voidEntry;
681     Entry *otherEntry = *(NameTree::Entry **)voidOtherEntry;
682 
683     return entry->name.cmp(&otherEntry->name);
684 }
685 
init(XRef * xrefA,Object * tree)686 void NameTree::init(XRef *xrefA, Object *tree)
687 {
688     xref = xrefA;
689     std::set<int> seen;
690     parse(tree, seen);
691     if (entries && length > 0) {
692         qsort(entries, length, sizeof(Entry *), Entry::cmpEntry);
693     }
694 }
695 
parse(const Object * tree,std::set<int> & seen)696 void NameTree::parse(const Object *tree, std::set<int> &seen)
697 {
698     if (!tree->isDict())
699         return;
700 
701     // leaf node
702     Object names = tree->dictLookup("Names");
703     if (names.isArray()) {
704         for (int i = 0; i < names.arrayGetLength(); i += 2) {
705             NameTree::Entry *entry;
706 
707             entry = new Entry(names.getArray(), i);
708             addEntry(entry);
709         }
710     }
711 
712     // root or intermediate node
713     Ref ref;
714     const Object kids = tree->getDict()->lookup("Kids", &ref);
715     if (ref != Ref::INVALID()) {
716         const int numObj = ref.num;
717         if (seen.find(numObj) != seen.end()) {
718             error(errSyntaxError, -1, "loop in NameTree (numObj: {0:d})", numObj);
719             return;
720         }
721         seen.insert(numObj);
722     }
723     if (kids.isArray()) {
724         for (int i = 0; i < kids.arrayGetLength(); ++i) {
725             const Object kid = kids.getArray()->get(i, &ref);
726             if (ref != Ref::INVALID()) {
727                 const int numObj = ref.num;
728                 if (seen.find(numObj) != seen.end()) {
729                     error(errSyntaxError, -1, "loop in NameTree (numObj: {0:d})", numObj);
730                     continue;
731                 }
732                 seen.insert(numObj);
733             }
734             if (kid.isDict())
735                 parse(&kid, seen);
736         }
737     }
738 }
739 
cmp(const void * voidKey,const void * voidEntry)740 int NameTree::Entry::cmp(const void *voidKey, const void *voidEntry)
741 {
742     GooString *key = (GooString *)voidKey;
743     Entry *entry = *(NameTree::Entry **)voidEntry;
744 
745     return key->cmp(&entry->name);
746 }
747 
lookup(const GooString * name)748 Object NameTree::lookup(const GooString *name)
749 {
750     Entry **entry;
751 
752     entry = (Entry **)bsearch(name, entries, length, sizeof(Entry *), Entry::cmp);
753     if (entry != nullptr) {
754         return (*entry)->value.fetch(xref);
755     } else {
756         error(errSyntaxError, -1, "failed to look up ({0:s})", name->c_str());
757         return Object(objNull);
758     }
759 }
760 
getValue(int index)761 Object *NameTree::getValue(int index)
762 {
763     if (index < length) {
764         return &entries[index]->value;
765     } else {
766         return nullptr;
767     }
768 }
769 
getName(int index) const770 const GooString *NameTree::getName(int index) const
771 {
772     if (index < length) {
773         return &entries[index]->name;
774     } else {
775         return nullptr;
776     }
777 }
778 
labelToIndex(GooString * label,int * index)779 bool Catalog::labelToIndex(GooString *label, int *index)
780 {
781     char *end;
782 
783     PageLabelInfo *pli = getPageLabelInfo();
784     if (pli != nullptr) {
785         if (!pli->labelToIndex(label, index))
786             return false;
787     } else {
788         *index = strtol(label->c_str(), &end, 10) - 1;
789         if (*end != '\0')
790             return false;
791     }
792 
793     if (*index < 0 || *index >= getNumPages())
794         return false;
795 
796     return true;
797 }
798 
indexToLabel(int index,GooString * label)799 bool Catalog::indexToLabel(int index, GooString *label)
800 {
801     char buffer[32];
802 
803     if (index < 0 || index >= getNumPages())
804         return false;
805 
806     PageLabelInfo *pli = getPageLabelInfo();
807     if (pli != nullptr) {
808         return pli->indexToLabel(index, label);
809     } else {
810         snprintf(buffer, sizeof(buffer), "%d", index + 1);
811         label->append(buffer);
812         return true;
813     }
814 }
815 
getNumPages()816 int Catalog::getNumPages()
817 {
818     catalogLocker();
819     if (numPages == -1) {
820         Object catDict = xref->getCatalog();
821         if (!catDict.isDict()) {
822             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
823             return 0;
824         }
825         Object pagesDict = catDict.dictLookup("Pages");
826 
827         // This should really be isDict("Pages"), but I've seen at least one
828         // PDF file where the /Type entry is missing.
829         if (!pagesDict.isDict()) {
830             error(errSyntaxError, -1, "Top-level pages object is wrong type ({0:s})", pagesDict.getTypeName());
831             return 0;
832         }
833 
834         Object obj = pagesDict.dictLookup("Count");
835         // some PDF files actually use real numbers here ("/Count 9.0")
836         if (!obj.isNum()) {
837             if (pagesDict.dictIs("Page")) {
838                 const Object &pageRootRef = catDict.dictLookupNF("Pages");
839 
840                 error(errSyntaxError, -1, "Pages top-level is a single Page. The document is malformed, trying to recover...");
841 
842                 Dict *pageDict = pagesDict.getDict();
843                 if (pageRootRef.isRef()) {
844                     const Ref pageRef = pageRootRef.getRef();
845                     auto p = std::make_unique<Page>(doc, 1, std::move(pagesDict), pageRef, new PageAttrs(nullptr, pageDict), form);
846                     if (p->isOk()) {
847                         pages.emplace_back(std::move(p), pageRef);
848 
849                         numPages = 1;
850                     } else {
851                         numPages = 0;
852                     }
853                 } else {
854                     numPages = 0;
855                 }
856             } else {
857                 error(errSyntaxError, -1, "Page count in top-level pages object is wrong type ({0:s})", obj.getTypeName());
858                 numPages = 0;
859             }
860         } else {
861             numPages = (int)obj.getNum();
862             if (numPages <= 0) {
863                 error(errSyntaxError, -1, "Invalid page count {0:d}", numPages);
864                 numPages = 0;
865             } else if (numPages > xref->getNumObjects()) {
866                 error(errSyntaxError, -1, "Page count ({0:d}) larger than number of objects ({1:d})", numPages, xref->getNumObjects());
867                 numPages = 0;
868             }
869         }
870     }
871 
872     return numPages;
873 }
874 
getPageLabelInfo()875 PageLabelInfo *Catalog::getPageLabelInfo()
876 {
877     catalogLocker();
878     if (!pageLabelInfo) {
879         Object catDict = xref->getCatalog();
880         if (!catDict.isDict()) {
881             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
882             return nullptr;
883         }
884 
885         Object obj = catDict.dictLookup("PageLabels");
886         if (obj.isDict()) {
887             pageLabelInfo = new PageLabelInfo(&obj, getNumPages());
888         }
889     }
890 
891     return pageLabelInfo;
892 }
893 
getStructTreeRoot()894 StructTreeRoot *Catalog::getStructTreeRoot()
895 {
896     catalogLocker();
897     if (!structTreeRoot) {
898         Object catalog = xref->getCatalog();
899         if (!catalog.isDict()) {
900             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catalog.getTypeName());
901             return nullptr;
902         }
903 
904         Object root = catalog.dictLookup("StructTreeRoot");
905         if (root.isDict("StructTreeRoot")) {
906             structTreeRoot = new StructTreeRoot(doc, root.getDict());
907         }
908     }
909     return structTreeRoot;
910 }
911 
getMarkInfo()912 unsigned int Catalog::getMarkInfo()
913 {
914     if (markInfo == markInfoNull) {
915         markInfo = 0;
916 
917         catalogLocker();
918         Object catDict = xref->getCatalog();
919 
920         if (catDict.isDict()) {
921             Object markInfoDict = catDict.dictLookup("MarkInfo");
922             if (markInfoDict.isDict()) {
923                 Object value = markInfoDict.dictLookup("Marked");
924                 if (value.isBool()) {
925                     if (value.getBool()) {
926                         markInfo |= markInfoMarked;
927                     }
928                 } else if (!value.isNull()) {
929                     error(errSyntaxError, -1, "Marked object is wrong type ({0:s})", value.getTypeName());
930                 }
931 
932                 value = markInfoDict.dictLookup("Suspects");
933                 if (value.isBool() && value.getBool())
934                     markInfo |= markInfoSuspects;
935                 else if (!value.isNull())
936                     error(errSyntaxError, -1, "Suspects object is wrong type ({0:s})", value.getTypeName());
937 
938                 value = markInfoDict.dictLookup("UserProperties");
939                 if (value.isBool() && value.getBool())
940                     markInfo |= markInfoUserProperties;
941                 else if (!value.isNull())
942                     error(errSyntaxError, -1, "UserProperties object is wrong type ({0:s})", value.getTypeName());
943             } else if (!markInfoDict.isNull()) {
944                 error(errSyntaxError, -1, "MarkInfo object is wrong type ({0:s})", markInfoDict.getTypeName());
945             }
946         } else {
947             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
948         }
949     }
950     return markInfo;
951 }
952 
getCreateOutline()953 Object *Catalog::getCreateOutline()
954 {
955 
956     catalogLocker();
957     Object catDict = xref->getCatalog();
958 
959     // If there is no Object in the outline variable,
960     // check if there is an Outline dict in the catalog
961     if (outline.isNone()) {
962         if (catDict.isDict()) {
963             Object outline_obj = catDict.dictLookup("Outlines");
964             if (outline_obj.isDict()) {
965                 return &outline;
966             }
967         } else {
968             // catalog is not a dict, give up?
969             return &outline;
970         }
971     }
972 
973     // If there is an Object in variable, make sure it's a dict
974     if (outline.isDict()) {
975         return &outline;
976     }
977 
978     // setup an empty outline dict
979     outline = Object(new Dict(doc->getXRef()));
980     outline.dictSet("Type", Object(objName, "Outlines"));
981     outline.dictSet("Count", Object(0));
982 
983     const Ref outlineRef = doc->getXRef()->addIndirectObject(outline);
984     catDict.dictAdd("Outlines", Object(outlineRef));
985     xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() });
986 
987     return &outline;
988 }
989 
getOutline()990 Object *Catalog::getOutline()
991 {
992     catalogLocker();
993     if (outline.isNone()) {
994         Object catDict = xref->getCatalog();
995         if (catDict.isDict()) {
996             outline = catDict.dictLookup("Outlines");
997         } else {
998             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
999             outline.setToNull();
1000         }
1001     }
1002 
1003     return &outline;
1004 }
1005 
getDests()1006 Object *Catalog::getDests()
1007 {
1008     catalogLocker();
1009     if (dests.isNone()) {
1010         Object catDict = xref->getCatalog();
1011         if (catDict.isDict()) {
1012             dests = catDict.dictLookup("Dests");
1013         } else {
1014             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1015             dests.setToNull();
1016         }
1017     }
1018 
1019     return &dests;
1020 }
1021 
getFormType()1022 Catalog::FormType Catalog::getFormType()
1023 {
1024     Object xfa;
1025     FormType res = NoForm;
1026 
1027     if (acroForm.isDict()) {
1028         xfa = acroForm.dictLookup("XFA");
1029         if (xfa.isStream() || xfa.isArray()) {
1030             res = XfaForm;
1031         } else {
1032             res = AcroForm;
1033         }
1034     }
1035 
1036     return res;
1037 }
1038 
getForm()1039 Form *Catalog::getForm()
1040 {
1041     catalogLocker();
1042     if (!form) {
1043         if (acroForm.isDict()) {
1044             form = new Form(doc, &acroForm);
1045             // perform form-related loading after all widgets have been loaded
1046             form->postWidgetsLoad();
1047         }
1048     }
1049 
1050     return form;
1051 }
1052 
addFormToAcroForm(const Ref formRef)1053 void Catalog::addFormToAcroForm(const Ref formRef)
1054 {
1055     catalogLocker();
1056 
1057     Object catDict = xref->getCatalog();
1058     Ref acroFormRef;
1059     acroForm = catDict.getDict()->lookup("AcroForm", &acroFormRef);
1060 
1061     if (!acroForm.isDict()) {
1062         // none there yet, need to create a new fields dict
1063         Object newForm = Object(new Dict(xref));
1064         newForm.dictSet("SigFlags", Object(3));
1065 
1066         Array *fieldArray = new Array(xref);
1067         fieldArray->add(Object(formRef));
1068         newForm.dictSet("Fields", Object(fieldArray));
1069 
1070         Ref newRef = xref->addIndirectObject(newForm);
1071         catDict.dictSet("AcroForm", Object(newRef));
1072         acroForm = catDict.getDict()->lookup("AcroForm");
1073     } else {
1074         // append to field array
1075         Ref fieldRef;
1076         Object fieldArray = acroForm.getDict()->lookup("Fields", &fieldRef);
1077         fieldArray.getArray()->add(Object(formRef));
1078     }
1079 
1080     if (acroFormRef != Ref::INVALID()) {
1081         xref->setModifiedObject(&acroForm, acroFormRef);
1082     } else {
1083         xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() });
1084     }
1085 }
1086 
removeFormFromAcroForm(const Ref formRef)1087 void Catalog::removeFormFromAcroForm(const Ref formRef)
1088 {
1089     catalogLocker();
1090 
1091     Object catDict = xref->getCatalog();
1092     Ref acroFormRef;
1093     acroForm = catDict.getDict()->lookup("AcroForm", &acroFormRef);
1094 
1095     if (acroForm.isDict()) {
1096         // remove from field array
1097         Ref fieldRef;
1098         Object fieldArrayO = acroForm.getDict()->lookup("Fields", &fieldRef);
1099         Array *fieldArray = fieldArrayO.getArray();
1100         for (int i = 0; i < fieldArray->getLength(); ++i) {
1101             const Object &o = fieldArray->getNF(i);
1102             if (o.isRef() && o.getRef() == formRef) {
1103                 fieldArray->remove(i);
1104                 break;
1105             }
1106         }
1107 
1108         xref->setModifiedObject(&acroForm, acroFormRef);
1109     }
1110 }
1111 
getViewerPreferences()1112 ViewerPreferences *Catalog::getViewerPreferences()
1113 {
1114     catalogLocker();
1115     if (!viewerPrefs) {
1116         if (viewerPreferences.isDict()) {
1117             viewerPrefs = new ViewerPreferences(viewerPreferences.getDict());
1118         }
1119     }
1120 
1121     return viewerPrefs;
1122 }
1123 
getNames()1124 Object *Catalog::getNames()
1125 {
1126     if (names.isNone()) {
1127         Object catDict = xref->getCatalog();
1128         if (catDict.isDict()) {
1129             names = catDict.dictLookup("Names");
1130         } else {
1131             error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1132             names.setToNull();
1133         }
1134     }
1135 
1136     return &names;
1137 }
1138 
getDestNameTree()1139 NameTree *Catalog::getDestNameTree()
1140 {
1141     if (!destNameTree) {
1142 
1143         destNameTree = new NameTree();
1144 
1145         if (getNames()->isDict()) {
1146             Object obj = getNames()->dictLookup("Dests");
1147             destNameTree->init(xref, &obj);
1148         }
1149     }
1150 
1151     return destNameTree;
1152 }
1153 
getEmbeddedFileNameTree()1154 NameTree *Catalog::getEmbeddedFileNameTree()
1155 {
1156     if (!embeddedFileNameTree) {
1157 
1158         embeddedFileNameTree = new NameTree();
1159 
1160         if (getNames()->isDict()) {
1161             Object obj = getNames()->dictLookup("EmbeddedFiles");
1162             embeddedFileNameTree->init(xref, &obj);
1163         }
1164     }
1165 
1166     return embeddedFileNameTree;
1167 }
1168 
getJSNameTree()1169 NameTree *Catalog::getJSNameTree()
1170 {
1171     if (!jsNameTree) {
1172 
1173         jsNameTree = new NameTree();
1174 
1175         if (getNames()->isDict()) {
1176             Object obj = getNames()->dictLookup("JavaScript");
1177             jsNameTree->init(xref, &obj);
1178         }
1179     }
1180 
1181     return jsNameTree;
1182 }
1183 
getAdditionalAction(DocumentAdditionalActionsType type)1184 std::unique_ptr<LinkAction> Catalog::getAdditionalAction(DocumentAdditionalActionsType type)
1185 {
1186     Object additionalActionsObject = additionalActions.fetch(doc->getXRef());
1187     if (additionalActionsObject.isDict()) {
1188         const char *key = (type == actionCloseDocument                 ? "WC"
1189                                    : type == actionSaveDocumentStart   ? "WS"
1190                                    : type == actionSaveDocumentFinish  ? "DS"
1191                                    : type == actionPrintDocumentStart  ? "WP"
1192                                    : type == actionPrintDocumentFinish ? "DP"
1193                                                                        : nullptr);
1194 
1195         Object actionObject = additionalActionsObject.dictLookup(key);
1196         if (actionObject.isDict())
1197             return LinkAction::parseAction(&actionObject, doc->getCatalog()->getBaseURI());
1198     }
1199     return nullptr;
1200 }
1201