1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_
8 #define CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_
9 
10 #include <map>
11 #include <memory>
12 #include <set>
13 
14 #include "core/fxcrt/include/fx_basic.h"
15 
16 class CPDF_Array;
17 class CPDF_CryptoHandler;
18 class CPDF_Dictionary;
19 class CPDF_Document;
20 class CPDF_IndirectObjectHolder;
21 class CPDF_Object;
22 class CPDF_SecurityHandler;
23 class CPDF_StreamAcc;
24 class CPDF_SyntaxParser;
25 class IFX_FileRead;
26 
27 class CPDF_Parser {
28  public:
29   enum Error {
30     SUCCESS = 0,
31     FILE_ERROR,
32     FORMAT_ERROR,
33     PASSWORD_ERROR,
34     HANDLER_ERROR
35   };
36 
37   CPDF_Parser();
38   ~CPDF_Parser();
39 
40   Error StartParse(IFX_FileRead* pFile);
41   uint32_t GetPermissions() const;
42 
SetPassword(const FX_CHAR * password)43   void SetPassword(const FX_CHAR* password) { m_Password = password; }
GetPassword()44   CFX_ByteString GetPassword() { return m_Password; }
GetTrailer()45   CPDF_Dictionary* GetTrailer() const { return m_pTrailer; }
GetLastXRefOffset()46   FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; }
GetDocument()47   CPDF_Document* GetDocument() const { return m_pDocument.get(); }
48 
49   uint32_t GetRootObjNum();
50   uint32_t GetInfoObjNum();
51   CPDF_Array* GetIDArray();
52 
GetEncryptDict()53   CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; }
54 
55   CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList,
56                                    uint32_t objnum);
57 
58   uint32_t GetLastObjNum() const;
59   bool IsValidObjectNumber(uint32_t objnum) const;
60   FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const;
61   uint8_t GetObjectType(uint32_t objnum) const;
62   uint16_t GetObjectGenNum(uint32_t objnum) const;
IsVersionUpdated()63   bool IsVersionUpdated() const { return m_bVersionUpdated; }
64   bool IsObjectFreeOrNull(uint32_t objnum) const;
65   FX_BOOL IsFormStream(uint32_t objnum, FX_BOOL& bForm);
66   CPDF_CryptoHandler* GetCryptoHandler();
67   IFX_FileRead* GetFileAccess() const;
68 
69   FX_FILESIZE GetObjectOffset(uint32_t objnum) const;
70   FX_FILESIZE GetObjectSize(uint32_t objnum) const;
71 
72   void GetIndirectBinary(uint32_t objnum, uint8_t*& pBuffer, uint32_t& size);
GetFileVersion()73   int GetFileVersion() const { return m_FileVersion; }
IsXRefStream()74   FX_BOOL IsXRefStream() const { return m_bXRefStream; }
75 
76   CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList,
77                                      FX_FILESIZE pos,
78                                      uint32_t objnum);
79 
80   CPDF_Object* ParseIndirectObjectAtByStrict(
81       CPDF_IndirectObjectHolder* pObjList,
82       FX_FILESIZE pos,
83       uint32_t objnum,
84       FX_FILESIZE* pResultPos);
85 
86   Error StartAsyncParse(IFX_FileRead* pFile);
87 
GetFirstPageNo()88   uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; }
89 
90  protected:
91   struct ObjectInfo {
ObjectInfoObjectInfo92     ObjectInfo() : pos(0), type(0), gennum(0) {}
93 
94     FX_FILESIZE pos;
95     uint8_t type;
96     uint16_t gennum;
97   };
98 
99   void CloseParser();
100   CPDF_Object* ParseDirect(CPDF_Object* pObj);
101   FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos);
102   FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos);
103   bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip);
104   FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef);
105   CPDF_Dictionary* LoadTrailerV4();
106   FX_BOOL RebuildCrossRef();
107   Error SetEncryptHandler();
108   void ReleaseEncryptHandler();
109   FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount);
110   FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount);
111   FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
112   Error LoadLinearizedMainXRefTable();
113   CPDF_StreamAcc* GetObjectStream(uint32_t number);
114   FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, uint32_t offset);
115   void SetEncryptDictionary(CPDF_Dictionary* pDict);
116   void ShrinkObjectMap(uint32_t size);
117   // A simple check whether the cross reference table matches with
118   // the objects.
119   bool VerifyCrossRefV4();
120 
121   std::unique_ptr<CPDF_Document> m_pDocument;
122   std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
123   bool m_bOwnFileRead;
124   int m_FileVersion;
125   CPDF_Dictionary* m_pTrailer;
126   CPDF_Dictionary* m_pEncryptDict;
127   FX_FILESIZE m_LastXRefOffset;
128   FX_BOOL m_bXRefStream;
129   std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler;
130   CFX_ByteString m_bsRecipient;
131   CFX_ByteString m_FilePath;
132   CFX_ByteString m_Password;
133   std::map<uint32_t, ObjectInfo> m_ObjectInfo;
134   std::set<FX_FILESIZE> m_SortedOffset;
135   CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers;
136   bool m_bVersionUpdated;
137   CPDF_Object* m_pLinearized;
138   uint32_t m_dwFirstPageNo;
139   uint32_t m_dwXrefStartObjNum;
140 
141   // A map of object numbers to indirect streams. Map owns the streams.
142   std::map<uint32_t, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap;
143 
144   // Mapping of object numbers to offsets. The offsets are relative to the first
145   // object in the stream.
146   using StreamObjectCache = std::map<uint32_t, uint32_t>;
147 
148   // Mapping of streams to their object caches. This is valid as long as the
149   // streams in |m_ObjectStreamMap| are valid.
150   std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache;
151 
152   // All indirect object numbers that are being parsed.
153   std::set<uint32_t> m_ParsingObjNums;
154 
155   friend class CPDF_DataAvail;
156 
157  private:
158   enum class ParserState {
159     kDefault,
160     kComment,
161     kWhitespace,
162     kString,
163     kHexString,
164     kEscapedString,
165     kXref,
166     kObjNum,
167     kPostObjNum,
168     kGenNum,
169     kPostGenNum,
170     kTrailer,
171     kBeginObj,
172     kEndObj
173   };
174 };
175 
176 #endif  // CORE_FPDFAPI_FPDF_PARSER_INCLUDE_CPDF_PARSER_H_
177