1 /***************************************************************************
2 * Copyright (C) 2005 by Dominik Seichter *
3 * domseichter@web.de *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Library General Public License as *
7 * published by the Free Software Foundation; either version 2 of the *
8 * License, or (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU Library General Public *
16 * License along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 * *
20 * In addition, as a special exception, the copyright holders give *
21 * permission to link the code of portions of this program with the *
22 * OpenSSL library under certain conditions as described in each *
23 * individual source file, and distribute linked combinations *
24 * including the two. *
25 * You must obey the GNU General Public License in all respects *
26 * for all of the code used other than OpenSSL. If you modify *
27 * file(s) with this exception, you may extend this exception to your *
28 * version of the file(s), but you are not obligated to do so. If you *
29 * do not wish to do so, delete this exception statement from your *
30 * version. If you delete this exception statement from all source *
31 * files in the program, then also delete it here. *
32 ***************************************************************************/
33
34 #ifndef _PDF_PARSER_OBJECT_H_
35 #define _PDF_PARSER_OBJECT_H_
36
37 #include "PdfDefines.h"
38 #include "PdfObject.h"
39 #include "PdfTokenizer.h"
40
41 namespace PoDoFo {
42
43 class PdfEncrypt;
44 class PdfParser;
45
46 /**
47 * A PdfParserObject constructs a PdfObject from a PDF file.
48 * Parsing starts always at the current file position.
49 */
50 class PODOFO_API PdfParserObject : public PdfObject, public PdfTokenizer {
51
52 public:
53 /** Parse the object data from the given file handle starting at
54 * the current position.
55 * \param pCreator pointer to a PdfVecObjects to resolve object references
56 * \param rDevice an open reference counted input device which is positioned in
57 * front of the object which is going to be parsed.
58 * \param rBuffer buffer to use for parsing to avoid reallocations
59 * \param lOffset the position in the device from which the object shall be read
60 * if lOffset = -1, the object will be read from the current
61 * position in the file.
62 */
63 PdfParserObject( PdfVecObjects* pCreator, const PdfRefCountedInputDevice & rDevice, const PdfRefCountedBuffer & rBuffer, pdf_long lOffset = -1 );
64
65 /** Parse the object data for an internal object.
66 * You have to call ParseDictionaryKeys as next function call.
67 *
68 * The following two parameters are used to avoid allocation of a new
69 * buffer in PdfSimpleParser.
70 *
71 * \warning This constructor is for internal usage only!
72 *
73 * \param rBuffer buffer to use for parsing to avoid reallocations
74 */
75 explicit PdfParserObject( const PdfRefCountedBuffer & rBuffer );
76
77 virtual ~PdfParserObject();
78
79 /** Parse the object data from the given file handle
80 * If delayed loading is enabled, only the object and generation number
81 * is read now and everything else is read later.
82 *
83 * \param pEncrypt an encryption dictionary which is used to decrypt
84 * strings and streams during parsing or NULL if the PDF
85 * file was not encrypted
86 * \param bIsTrailer wether this is a trailer dictionary or not.
87 * trailer dictionaries do not have a object number etc.
88 */
89 void ParseFile( PdfEncrypt* pEncrypt, bool bIsTrailer = false );
90
91 /** Returns if this object has a stream object appended.
92 * which has to be parsed.
93 * \returns true if there is a stream
94 */
95 inline bool HasStreamToParse() const;
96
97 /** \returns true if this PdfParser loads all objects at
98 * the time they are accessed for the first time.
99 * The default is to load all object immediately.
100 * In this case false is returned.
101 */
102 inline bool IsLoadOnDemand() const;
103
104 /** Sets wether this object shall be loaded on demand
105 * when it's data is accessed for the first time.
106 * \param bDelayed if true the object is loaded delayed.
107 */
108 inline void SetLoadOnDemand( bool bDelayed );
109
110 /** Set the object number of this object.
111 * It is almost never necessary to use this call.
112 * It is only included for usage in the PdfParser.
113 *
114 * \param nObjNo the new object number of this object
115 */
116 inline void SetObjectNumber( unsigned int nObjNo );
117
118 /** Tries to free all memory allocated by this
119 * PdfObject (variables and streams) and reads
120 * it from disk again if it is requested another time.
121 *
122 * This will only work if load on demand is used.
123 * If the object is dirty if will not be free'd.
124 *
125 * \param bForce if true the object will be free'd
126 * even if IsDirty() returns true.
127 * So you will loose any changes made
128 * to this object.
129 *
130 * \see IsLoadOnDemand
131 * \see IsDirty
132 */
133 void FreeObjectMemory( bool bForce = false );
134
135 /** Gets an offset in which the object beginning is stored in the file.
136 * Note the offset points just after the object identificator ("0 0 obj").
137 *
138 * \returns an offset in which the object is stored in the source device,
139 * or -1, if the object was created on demand.
140 */
141 inline pdf_int64 GetOffset( void ) const;
142
143 protected:
144 /** Load all data of the object if load object on demand is enabled.
145 * Reimplemented from PdfVariant. Do not call this directly, use
146 * DelayedLoad().
147 */
148 virtual void DelayedLoadImpl();
149
150 /** Load the stream of the object if it has one and if loading on demand is enabled.
151 * Reimplemented from PdfObject. Do not call this directly, use
152 * DelayedStreamLoad().
153 */
154 virtual void DelayedStreamLoadImpl();
155
156 /** Starts reading at the file position m_lStreamOffset and interprets all bytes
157 * as contents of the objects stream.
158 * It is assumed that the dictionary has a valid /Length key already.
159 *
160 * Called from DelayedStreamLoadImpl(). Do not call directly.
161 */
162 void ParseStream();
163
164 private:
165 /** Initialize private members in this object with their default values
166 */
167 void InitPdfParserObject();
168
169 /** Parse the object data from the given file handle
170 * \param bIsTrailer wether this is a trailer dictionary or not.
171 * trailer dictionaries do not have a object number etc.
172 */
173 void ParseFileComplete( bool bIsTrailer );
174
175 void ReadObjectNumber();
176
177 private:
178 PdfEncrypt* m_pEncrypt;
179 bool m_bIsTrailer;
180
181 // Should the object try to defer loading of its contents until needed?
182 // If false, object contents will be loaded during ParseFile(...). Note that
183 // this still uses the delayed loading infrastructure.
184 // If true, loading will be triggered the first time the information is needed by
185 // an external caller.
186 // Outside callers should not be able to tell the difference between the two modes
187 // of operation.
188 bool m_bLoadOnDemand;
189
190 pdf_long m_lOffset;
191
192 bool m_bStream;
193 pdf_long m_lStreamOffset;
194 };
195
196 // -----------------------------------------------------
197 //
198 // -----------------------------------------------------
SetObjectNumber(unsigned int nObjNo)199 void PdfParserObject::SetObjectNumber( unsigned int nObjNo )
200 {
201 m_reference.SetObjectNumber( nObjNo );
202 }
203
204 // -----------------------------------------------------
205 //
206 // -----------------------------------------------------
IsLoadOnDemand()207 bool PdfParserObject::IsLoadOnDemand() const
208 {
209 return m_bLoadOnDemand;
210 }
211
212 // -----------------------------------------------------
213 //
214 // -----------------------------------------------------
SetLoadOnDemand(bool bDelayed)215 void PdfParserObject::SetLoadOnDemand( bool bDelayed )
216 {
217 m_bLoadOnDemand = bDelayed;
218 }
219
220 // -----------------------------------------------------
221 //
222 // -----------------------------------------------------
HasStreamToParse()223 bool PdfParserObject::HasStreamToParse() const
224 {
225 return m_bStream;
226 }
227
228 // -----------------------------------------------------
229 //
230 // -----------------------------------------------------
GetOffset(void)231 pdf_int64 PdfParserObject::GetOffset( void ) const
232 {
233 return static_cast<pdf_int64>(m_lOffset);
234 }
235
236 };
237
238 #endif // _PDF_PARSER_OBJECT_H_
239