1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_ 8 #define CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_ 9 10 #include <map> 11 #include <memory> 12 #include <set> 13 14 #include "core/fxcrt/include/fx_basic.h" 15 16 class CPDF_Array; 17 class CPDF_CryptoHandler; 18 class CPDF_Dictionary; 19 class CPDF_Document; 20 class CPDF_IndirectObjectHolder; 21 class CPDF_Object; 22 class CPDF_SecurityHandler; 23 class CPDF_StreamAcc; 24 class CPDF_SyntaxParser; 25 class IFX_FileRead; 26 27 class CPDF_Parser { 28 public: 29 enum Error { 30 SUCCESS = 0, 31 FILE_ERROR, 32 FORMAT_ERROR, 33 PASSWORD_ERROR, 34 HANDLER_ERROR 35 }; 36 37 CPDF_Parser(); 38 ~CPDF_Parser(); 39 40 Error StartParse(IFX_FileRead* pFile); 41 uint32_t GetPermissions() const; 42 SetPassword(const FX_CHAR * password)43 void SetPassword(const FX_CHAR* password) { m_Password = password; } GetPassword()44 CFX_ByteString GetPassword() { return m_Password; } GetTrailer()45 CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } GetLastXRefOffset()46 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } GetDocument()47 CPDF_Document* GetDocument() const { return m_pDocument.get(); } 48 49 uint32_t GetRootObjNum(); 50 uint32_t GetInfoObjNum(); 51 CPDF_Array* GetIDArray(); 52 GetEncryptDict()53 CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } 54 55 CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, 56 uint32_t objnum); 57 58 uint32_t GetLastObjNum() const; 59 bool IsValidObjectNumber(uint32_t objnum) const; 60 FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const; 61 uint8_t GetObjectType(uint32_t objnum) const; 62 uint16_t GetObjectGenNum(uint32_t objnum) const; IsVersionUpdated()63 bool IsVersionUpdated() const { return m_bVersionUpdated; } 64 bool IsObjectFreeOrNull(uint32_t objnum) const; 65 FX_BOOL IsFormStream(uint32_t objnum, FX_BOOL& bForm); 66 CPDF_CryptoHandler* GetCryptoHandler(); 67 IFX_FileRead* GetFileAccess() const; 68 69 FX_FILESIZE GetObjectOffset(uint32_t objnum) const; 70 FX_FILESIZE GetObjectSize(uint32_t objnum) const; 71 72 void GetIndirectBinary(uint32_t objnum, uint8_t*& pBuffer, uint32_t& size); GetFileVersion()73 int GetFileVersion() const { return m_FileVersion; } IsXRefStream()74 FX_BOOL IsXRefStream() const { return m_bXRefStream; } 75 76 CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, 77 FX_FILESIZE pos, 78 uint32_t objnum); 79 80 CPDF_Object* ParseIndirectObjectAtByStrict( 81 CPDF_IndirectObjectHolder* pObjList, 82 FX_FILESIZE pos, 83 uint32_t objnum, 84 FX_FILESIZE* pResultPos); 85 86 Error StartAsyncParse(IFX_FileRead* pFile); 87 GetFirstPageNo()88 uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } 89 90 protected: 91 struct ObjectInfo { ObjectInfoObjectInfo92 ObjectInfo() : pos(0), type(0), gennum(0) {} 93 94 FX_FILESIZE pos; 95 uint8_t type; 96 uint16_t gennum; 97 }; 98 99 void CloseParser(); 100 CPDF_Object* ParseDirect(CPDF_Object* pObj); 101 FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); 102 FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); 103 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); 104 FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); 105 CPDF_Dictionary* LoadTrailerV4(); 106 FX_BOOL RebuildCrossRef(); 107 Error SetEncryptHandler(); 108 void ReleaseEncryptHandler(); 109 FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); 110 FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); 111 FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); 112 Error LoadLinearizedMainXRefTable(); 113 CPDF_StreamAcc* GetObjectStream(uint32_t number); 114 FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, uint32_t offset); 115 void SetEncryptDictionary(CPDF_Dictionary* pDict); 116 void ShrinkObjectMap(uint32_t size); 117 // A simple check whether the cross reference table matches with 118 // the objects. 119 bool VerifyCrossRefV4(); 120 121 std::unique_ptr<CPDF_Document> m_pDocument; 122 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; 123 bool m_bOwnFileRead; 124 int m_FileVersion; 125 CPDF_Dictionary* m_pTrailer; 126 CPDF_Dictionary* m_pEncryptDict; 127 FX_FILESIZE m_LastXRefOffset; 128 FX_BOOL m_bXRefStream; 129 std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler; 130 CFX_ByteString m_bsRecipient; 131 CFX_ByteString m_FilePath; 132 CFX_ByteString m_Password; 133 std::map<uint32_t, ObjectInfo> m_ObjectInfo; 134 std::set<FX_FILESIZE> m_SortedOffset; 135 CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers; 136 bool m_bVersionUpdated; 137 CPDF_Object* m_pLinearized; 138 uint32_t m_dwFirstPageNo; 139 uint32_t m_dwXrefStartObjNum; 140 141 // A map of object numbers to indirect streams. Map owns the streams. 142 std::map<uint32_t, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; 143 144 // Mapping of object numbers to offsets. The offsets are relative to the first 145 // object in the stream. 146 using StreamObjectCache = std::map<uint32_t, uint32_t>; 147 148 // Mapping of streams to their object caches. This is valid as long as the 149 // streams in |m_ObjectStreamMap| are valid. 150 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; 151 152 // All indirect object numbers that are being parsed. 153 std::set<uint32_t> m_ParsingObjNums; 154 155 friend class CPDF_DataAvail; 156 157 private: 158 enum class ParserState { 159 kDefault, 160 kComment, 161 kWhitespace, 162 kString, 163 kHexString, 164 kEscapedString, 165 kXref, 166 kObjNum, 167 kPostObjNum, 168 kGenNum, 169 kPostGenNum, 170 kTrailer, 171 kBeginObj, 172 kEndObj 173 }; 174 }; 175 176 #endif // CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_ 177