1 //======================================================================== 2 // 3 // Catalog.h 4 // 5 // Copyright 1996-2007 Glyph & Cog, LLC 6 // 7 //======================================================================== 8 9 //======================================================================== 10 // 11 // Modified under the Poppler project - http://poppler.freedesktop.org 12 // 13 // All changes made under the Poppler project to this file are licensed 14 // under GPL version 2 or later 15 // 16 // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com> 17 // Copyright (C) 2005, 2007, 2009-2011, 2013, 2017-2021 Albert Astals Cid <aacid@kde.org> 18 // Copyright (C) 2005 Jonathan Blandford <jrb@redhat.com> 19 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net> 20 // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org> 21 // Copyright (C) 2008, 2011 Pino Toscano <pino@kde.org> 22 // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> 23 // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it> 24 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de> 25 // Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com> 26 // Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com> 27 // Copyright (C) 2013 José Aliste <jaliste@src.gnome.org> 28 // Copyright (C) 2016 Masamichi Hosoda <trueroad@trueroad.jp> 29 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich 30 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> 31 // Copyright (C) 2020 Oliver Sander <oliver.sander@tu-dresden.de> 32 // Copyright (C) 2020 Katarina Behrens <Katarina.Behrens@cib.de> 33 // Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden 34 // Copyright (C) 2021 RM <rm+git@arcsin.org> 35 // 36 // To see a description of the changes please see the Changelog file that 37 // came with your tarball or type make ChangeLog if you are building from git 38 // 39 //======================================================================== 40 41 #ifndef CATALOG_H 42 #define CATALOG_H 43 44 #include "poppler-config.h" 45 #include "poppler_private_export.h" 46 #include "Object.h" 47 #include "Link.h" 48 49 #include <vector> 50 #include <memory> 51 52 class PDFDoc; 53 class XRef; 54 class Object; 55 class Page; 56 class PageAttrs; 57 struct Ref; 58 class PageLabelInfo; 59 class Form; 60 class OCGs; 61 class ViewerPreferences; 62 class FileSpec; 63 class StructTreeRoot; 64 65 //------------------------------------------------------------------------ 66 // NameTree 67 //------------------------------------------------------------------------ 68 69 class POPPLER_PRIVATE_EXPORT NameTree 70 { 71 public: 72 NameTree(); 73 ~NameTree(); 74 75 NameTree(const NameTree &) = delete; 76 NameTree &operator=(const NameTree &) = delete; 77 78 void init(XRef *xref, Object *tree); 79 Object lookup(const GooString *name); numEntries()80 int numEntries() { return length; }; 81 // iterator accessor, note it returns a pointer to the internal object, do not free nor delete it 82 Object *getValue(int i); 83 const GooString *getName(int i) const; 84 85 private: 86 struct Entry 87 { 88 Entry(Array *array, int index); 89 ~Entry(); 90 GooString name; 91 Object value; 92 static int cmpEntry(const void *voidEntry, const void *voidOtherEntry); 93 static int cmp(const void *key, const void *entry); 94 }; 95 96 void parse(const Object *tree, std::set<int> &seen); 97 void addEntry(Entry *entry); 98 99 XRef *xref; 100 Entry **entries; 101 int size, length; // size is the number of entries in 102 // the array of Entry* 103 // length is the number of real Entry 104 }; 105 106 //------------------------------------------------------------------------ 107 // Catalog 108 //------------------------------------------------------------------------ 109 110 class POPPLER_PRIVATE_EXPORT Catalog 111 { 112 public: 113 // Constructor. 114 explicit Catalog(PDFDoc *docA); 115 116 // Destructor. 117 ~Catalog(); 118 119 Catalog(const Catalog &) = delete; 120 Catalog &operator=(const Catalog &) = delete; 121 122 // Is catalog valid? isOk()123 bool isOk() { return ok; } 124 125 // Get number of pages. 126 int getNumPages(); 127 128 // Get a page. 129 Page *getPage(int i); 130 131 // Get the reference for a page object. 132 Ref *getPageRef(int i); 133 134 // Return base URI, or NULL if none. getBaseURI()135 GooString *getBaseURI() { return baseURI; } 136 137 // Return the contents of the metadata stream, or NULL if there is 138 // no metadata. 139 std::unique_ptr<GooString> readMetadata(); 140 141 // Return the structure tree root object. 142 StructTreeRoot *getStructTreeRoot(); 143 144 // Return values from the MarkInfo dictionary as flags in a bitfield. 145 enum MarkInfoFlags 146 { 147 markInfoNull = 1 << 0, 148 markInfoMarked = 1 << 1, 149 markInfoUserProperties = 1 << 2, 150 markInfoSuspects = 1 << 3, 151 }; 152 unsigned int getMarkInfo(); 153 154 // Find a page, given its object ID. Returns page number, or 0 if 155 // not found. 156 int findPage(const Ref pageRef); 157 158 // Find a named destination. Returns the link destination, or 159 // NULL if <name> is not a destination. 160 std::unique_ptr<LinkDest> findDest(const GooString *name); 161 162 Object *getDests(); 163 164 // Get the number of named destinations in name-dict 165 int numDests(); 166 167 // Get the i'th named destination name in name-dict 168 const char *getDestsName(int i); 169 170 // Get the i'th named destination link destination in name-dict 171 std::unique_ptr<LinkDest> getDestsDest(int i); 172 173 // Get the number of named destinations in name-tree numDestNameTree()174 int numDestNameTree() { return getDestNameTree()->numEntries(); } 175 176 // Get the i'th named destination name in name-tree getDestNameTreeName(int i)177 const GooString *getDestNameTreeName(int i) { return getDestNameTree()->getName(i); } 178 179 // Get the i'th named destination link destination in name-tree 180 std::unique_ptr<LinkDest> getDestNameTreeDest(int i); 181 182 // Get the number of embedded files numEmbeddedFiles()183 int numEmbeddedFiles() { return getEmbeddedFileNameTree()->numEntries(); } 184 185 // Get the i'th file embedded (at the Document level) in the document 186 FileSpec *embeddedFile(int i); 187 188 // Is there an embedded file with the given name? 189 bool hasEmbeddedFile(const std::string &fileName); 190 191 // Adds and embeddedFile 192 // If there is already an existing embedded file with the given fileName 193 // it gets replaced, if that's not what you want check hasEmbeddedFile first 194 void addEmbeddedFile(GooFile *file, const std::string &fileName); 195 196 // Get the number of javascript scripts numJS()197 int numJS() { return getJSNameTree()->numEntries(); } getJSName(int i)198 const GooString *getJSName(int i) { return getJSNameTree()->getName(i); } 199 200 // Get the i'th JavaScript script (at the Document level) in the document 201 GooString *getJS(int i); 202 203 // Convert between page indices and page labels. 204 bool labelToIndex(GooString *label, int *index); 205 bool indexToLabel(int index, GooString *label); 206 207 Object *getOutline(); 208 // returns the existing outline or new one if it doesn't exist 209 Object *getCreateOutline(); 210 getAcroForm()211 Object *getAcroForm() { return &acroForm; } 212 void addFormToAcroForm(const Ref formRef); 213 void removeFormFromAcroForm(const Ref formRef); 214 getOptContentConfig()215 OCGs *getOptContentConfig() { return optContent; } 216 getPDFMajorVersion()217 int getPDFMajorVersion() const { return catalogPdfMajorVersion; } getPDFMinorVersion()218 int getPDFMinorVersion() const { return catalogPdfMinorVersion; } 219 220 enum FormType 221 { 222 NoForm, 223 AcroForm, 224 XfaForm 225 }; 226 227 FormType getFormType(); 228 Form *getForm(); 229 230 ViewerPreferences *getViewerPreferences(); 231 232 enum PageMode 233 { 234 pageModeNone, 235 pageModeOutlines, 236 pageModeThumbs, 237 pageModeFullScreen, 238 pageModeOC, 239 pageModeAttach, 240 pageModeNull 241 }; 242 enum PageLayout 243 { 244 pageLayoutNone, 245 pageLayoutSinglePage, 246 pageLayoutOneColumn, 247 pageLayoutTwoColumnLeft, 248 pageLayoutTwoColumnRight, 249 pageLayoutTwoPageLeft, 250 pageLayoutTwoPageRight, 251 pageLayoutNull 252 }; 253 254 // Returns the page mode. 255 PageMode getPageMode(); 256 PageLayout getPageLayout(); 257 258 enum DocumentAdditionalActionsType 259 { 260 actionCloseDocument, ///< Performed before closing the document 261 actionSaveDocumentStart, ///< Performed before saving the document 262 actionSaveDocumentFinish, ///< Performed after saving the document 263 actionPrintDocumentStart, ///< Performed before printing the document 264 actionPrintDocumentFinish, ///< Performed after printing the document 265 }; 266 267 std::unique_ptr<LinkAction> getAdditionalAction(DocumentAdditionalActionsType type); 268 269 private: 270 // Get page label info. 271 PageLabelInfo *getPageLabelInfo(); 272 273 PDFDoc *doc; 274 XRef *xref; // the xref table for this PDF file 275 std::vector<std::pair<std::unique_ptr<Page>, Ref>> pages; 276 std::vector<Object> *pagesList; 277 std::vector<Ref> *pagesRefList; 278 std::vector<PageAttrs *> *attrsList; 279 std::vector<int> *kidsIdxList; 280 Form *form; 281 ViewerPreferences *viewerPrefs; 282 int numPages; // number of pages 283 Object dests; // named destination dictionary 284 Object names; // named names dictionary 285 NameTree *destNameTree; // named destination name-tree 286 NameTree *embeddedFileNameTree; // embedded file name-tree 287 NameTree *jsNameTree; // Java Script name-tree 288 GooString *baseURI; // base URI for URI-type links 289 Object metadata; // metadata stream 290 StructTreeRoot *structTreeRoot; // structure tree root 291 unsigned int markInfo; // Flags from MarkInfo dictionary 292 Object outline; // outline dictionary 293 Object acroForm; // AcroForm dictionary 294 Object viewerPreferences; // ViewerPreference dictionary 295 OCGs *optContent; // Optional Content groups 296 bool ok; // true if catalog is valid 297 PageLabelInfo *pageLabelInfo; // info about page labels 298 PageMode pageMode; // page mode 299 PageLayout pageLayout; // page layout 300 Object additionalActions; // page additional actions 301 302 bool cachePageTree(int page); // Cache first <page> pages. 303 Object *findDestInTree(Object *tree, GooString *name, Object *obj); 304 305 Object *getNames(); 306 NameTree *getDestNameTree(); 307 NameTree *getEmbeddedFileNameTree(); 308 NameTree *getJSNameTree(); 309 std::unique_ptr<LinkDest> createLinkDest(Object *obj); 310 311 int catalogPdfMajorVersion = -1; 312 int catalogPdfMinorVersion = -1; 313 314 mutable std::recursive_mutex mutex; 315 }; 316 317 #endif 318