1 //========================================================================
2 //
3 // Catalog.h
4 //
5 // Copyright 1996-2007 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
17 // Copyright (C) 2005, 2007, 2009-2011, 2013, 2017-2021 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2005 Jonathan Blandford <jrb@redhat.com>
19 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
20 // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
21 // Copyright (C) 2008, 2011 Pino Toscano <pino@kde.org>
22 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
23 // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
24 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
25 // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
26 // Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
27 // Copyright (C) 2013 José Aliste <jaliste@src.gnome.org>
28 // Copyright (C) 2016 Masamichi Hosoda <trueroad@trueroad.jp>
29 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
30 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
31 // Copyright (C) 2020 Oliver Sander <oliver.sander@tu-dresden.de>
32 // Copyright (C) 2020 Katarina Behrens <Katarina.Behrens@cib.de>
33 // Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
34 // Copyright (C) 2021 RM <rm+git@arcsin.org>
35 //
36 // To see a description of the changes please see the Changelog file that
37 // came with your tarball or type make ChangeLog if you are building from git
38 //
39 //========================================================================
40 
41 #ifndef CATALOG_H
42 #define CATALOG_H
43 
44 #include "poppler-config.h"
45 #include "poppler_private_export.h"
46 #include "Object.h"
47 #include "Link.h"
48 
49 #include <vector>
50 #include <memory>
51 
52 class PDFDoc;
53 class XRef;
54 class Object;
55 class Page;
56 class PageAttrs;
57 struct Ref;
58 class PageLabelInfo;
59 class Form;
60 class OCGs;
61 class ViewerPreferences;
62 class FileSpec;
63 class StructTreeRoot;
64 
65 //------------------------------------------------------------------------
66 // NameTree
67 //------------------------------------------------------------------------
68 
69 class POPPLER_PRIVATE_EXPORT NameTree
70 {
71 public:
72     NameTree();
73     ~NameTree();
74 
75     NameTree(const NameTree &) = delete;
76     NameTree &operator=(const NameTree &) = delete;
77 
78     void init(XRef *xref, Object *tree);
79     Object lookup(const GooString *name);
numEntries()80     int numEntries() { return length; };
81     // iterator accessor, note it returns a pointer to the internal object, do not free nor delete it
82     Object *getValue(int i);
83     const GooString *getName(int i) const;
84 
85 private:
86     struct Entry
87     {
88         Entry(Array *array, int index);
89         ~Entry();
90         GooString name;
91         Object value;
92         static int cmpEntry(const void *voidEntry, const void *voidOtherEntry);
93         static int cmp(const void *key, const void *entry);
94     };
95 
96     void parse(const Object *tree, std::set<int> &seen);
97     void addEntry(Entry *entry);
98 
99     XRef *xref;
100     Entry **entries;
101     int size, length; // size is the number of entries in
102                       // the array of Entry*
103                       // length is the number of real Entry
104 };
105 
106 //------------------------------------------------------------------------
107 // Catalog
108 //------------------------------------------------------------------------
109 
110 class POPPLER_PRIVATE_EXPORT Catalog
111 {
112 public:
113     // Constructor.
114     explicit Catalog(PDFDoc *docA);
115 
116     // Destructor.
117     ~Catalog();
118 
119     Catalog(const Catalog &) = delete;
120     Catalog &operator=(const Catalog &) = delete;
121 
122     // Is catalog valid?
isOk()123     bool isOk() { return ok; }
124 
125     // Get number of pages.
126     int getNumPages();
127 
128     // Get a page.
129     Page *getPage(int i);
130 
131     // Get the reference for a page object.
132     Ref *getPageRef(int i);
133 
134     // Return base URI, or NULL if none.
getBaseURI()135     GooString *getBaseURI() { return baseURI; }
136 
137     // Return the contents of the metadata stream, or NULL if there is
138     // no metadata.
139     std::unique_ptr<GooString> readMetadata();
140 
141     // Return the structure tree root object.
142     StructTreeRoot *getStructTreeRoot();
143 
144     // Return values from the MarkInfo dictionary as flags in a bitfield.
145     enum MarkInfoFlags
146     {
147         markInfoNull = 1 << 0,
148         markInfoMarked = 1 << 1,
149         markInfoUserProperties = 1 << 2,
150         markInfoSuspects = 1 << 3,
151     };
152     unsigned int getMarkInfo();
153 
154     // Find a page, given its object ID.  Returns page number, or 0 if
155     // not found.
156     int findPage(const Ref pageRef);
157 
158     // Find a named destination.  Returns the link destination, or
159     // NULL if <name> is not a destination.
160     std::unique_ptr<LinkDest> findDest(const GooString *name);
161 
162     Object *getDests();
163 
164     // Get the number of named destinations in name-dict
165     int numDests();
166 
167     // Get the i'th named destination name in name-dict
168     const char *getDestsName(int i);
169 
170     // Get the i'th named destination link destination in name-dict
171     std::unique_ptr<LinkDest> getDestsDest(int i);
172 
173     // Get the number of named destinations in name-tree
numDestNameTree()174     int numDestNameTree() { return getDestNameTree()->numEntries(); }
175 
176     // Get the i'th named destination name in name-tree
getDestNameTreeName(int i)177     const GooString *getDestNameTreeName(int i) { return getDestNameTree()->getName(i); }
178 
179     // Get the i'th named destination link destination in name-tree
180     std::unique_ptr<LinkDest> getDestNameTreeDest(int i);
181 
182     // Get the number of embedded files
numEmbeddedFiles()183     int numEmbeddedFiles() { return getEmbeddedFileNameTree()->numEntries(); }
184 
185     // Get the i'th file embedded (at the Document level) in the document
186     FileSpec *embeddedFile(int i);
187 
188     // Is there an embedded file with the given name?
189     bool hasEmbeddedFile(const std::string &fileName);
190 
191     // Adds and embeddedFile
192     // If there is already an existing embedded file with the given fileName
193     // it gets replaced, if that's not what you want check hasEmbeddedFile first
194     void addEmbeddedFile(GooFile *file, const std::string &fileName);
195 
196     // Get the number of javascript scripts
numJS()197     int numJS() { return getJSNameTree()->numEntries(); }
getJSName(int i)198     const GooString *getJSName(int i) { return getJSNameTree()->getName(i); }
199 
200     // Get the i'th JavaScript script (at the Document level) in the document
201     GooString *getJS(int i);
202 
203     // Convert between page indices and page labels.
204     bool labelToIndex(GooString *label, int *index);
205     bool indexToLabel(int index, GooString *label);
206 
207     Object *getOutline();
208     // returns the existing outline or new one if it doesn't exist
209     Object *getCreateOutline();
210 
getAcroForm()211     Object *getAcroForm() { return &acroForm; }
212     void addFormToAcroForm(const Ref formRef);
213     void removeFormFromAcroForm(const Ref formRef);
214 
getOptContentConfig()215     OCGs *getOptContentConfig() { return optContent; }
216 
getPDFMajorVersion()217     int getPDFMajorVersion() const { return catalogPdfMajorVersion; }
getPDFMinorVersion()218     int getPDFMinorVersion() const { return catalogPdfMinorVersion; }
219 
220     enum FormType
221     {
222         NoForm,
223         AcroForm,
224         XfaForm
225     };
226 
227     FormType getFormType();
228     Form *getForm();
229 
230     ViewerPreferences *getViewerPreferences();
231 
232     enum PageMode
233     {
234         pageModeNone,
235         pageModeOutlines,
236         pageModeThumbs,
237         pageModeFullScreen,
238         pageModeOC,
239         pageModeAttach,
240         pageModeNull
241     };
242     enum PageLayout
243     {
244         pageLayoutNone,
245         pageLayoutSinglePage,
246         pageLayoutOneColumn,
247         pageLayoutTwoColumnLeft,
248         pageLayoutTwoColumnRight,
249         pageLayoutTwoPageLeft,
250         pageLayoutTwoPageRight,
251         pageLayoutNull
252     };
253 
254     // Returns the page mode.
255     PageMode getPageMode();
256     PageLayout getPageLayout();
257 
258     enum DocumentAdditionalActionsType
259     {
260         actionCloseDocument, ///< Performed before closing the document
261         actionSaveDocumentStart, ///< Performed before saving the document
262         actionSaveDocumentFinish, ///< Performed after saving the document
263         actionPrintDocumentStart, ///< Performed before printing the document
264         actionPrintDocumentFinish, ///< Performed after printing the document
265     };
266 
267     std::unique_ptr<LinkAction> getAdditionalAction(DocumentAdditionalActionsType type);
268 
269 private:
270     // Get page label info.
271     PageLabelInfo *getPageLabelInfo();
272 
273     PDFDoc *doc;
274     XRef *xref; // the xref table for this PDF file
275     std::vector<std::pair<std::unique_ptr<Page>, Ref>> pages;
276     std::vector<Object> *pagesList;
277     std::vector<Ref> *pagesRefList;
278     std::vector<PageAttrs *> *attrsList;
279     std::vector<int> *kidsIdxList;
280     Form *form;
281     ViewerPreferences *viewerPrefs;
282     int numPages; // number of pages
283     Object dests; // named destination dictionary
284     Object names; // named names dictionary
285     NameTree *destNameTree; // named destination name-tree
286     NameTree *embeddedFileNameTree; // embedded file name-tree
287     NameTree *jsNameTree; // Java Script name-tree
288     GooString *baseURI; // base URI for URI-type links
289     Object metadata; // metadata stream
290     StructTreeRoot *structTreeRoot; // structure tree root
291     unsigned int markInfo; // Flags from MarkInfo dictionary
292     Object outline; // outline dictionary
293     Object acroForm; // AcroForm dictionary
294     Object viewerPreferences; // ViewerPreference dictionary
295     OCGs *optContent; // Optional Content groups
296     bool ok; // true if catalog is valid
297     PageLabelInfo *pageLabelInfo; // info about page labels
298     PageMode pageMode; // page mode
299     PageLayout pageLayout; // page layout
300     Object additionalActions; // page additional actions
301 
302     bool cachePageTree(int page); // Cache first <page> pages.
303     Object *findDestInTree(Object *tree, GooString *name, Object *obj);
304 
305     Object *getNames();
306     NameTree *getDestNameTree();
307     NameTree *getEmbeddedFileNameTree();
308     NameTree *getJSNameTree();
309     std::unique_ptr<LinkDest> createLinkDest(Object *obj);
310 
311     int catalogPdfMajorVersion = -1;
312     int catalogPdfMinorVersion = -1;
313 
314     mutable std::recursive_mutex mutex;
315 };
316 
317 #endif
318