1 /***************************************************************************
2  *   Copyright (C) 2005 by Dominik Seichter                                *
3  *   domseichter@web.de                                                    *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU Library General Public License as       *
7  *   published by the Free Software Foundation; either version 2 of the    *
8  *   License, or (at your option) any later version.                       *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU Library General Public     *
16  *   License along with this program; if not, write to the                 *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  *                                                                         *
20  *   In addition, as a special exception, the copyright holders give       *
21  *   permission to link the code of portions of this program with the      *
22  *   OpenSSL library under certain conditions as described in each         *
23  *   individual source file, and distribute linked combinations            *
24  *   including the two.                                                    *
25  *   You must obey the GNU General Public License in all respects          *
26  *   for all of the code used other than OpenSSL.  If you modify           *
27  *   file(s) with this exception, you may extend this exception to your    *
28  *   version of the file(s), but you are not obligated to do so.  If you   *
29  *   do not wish to do so, delete this exception statement from your       *
30  *   version.  If you delete this exception statement from all source      *
31  *   files in the program, then also delete it here.                       *
32  ***************************************************************************/
33 
34 #ifndef _PDF_PARSER_OBJECT_H_
35 #define _PDF_PARSER_OBJECT_H_
36 
37 #include "PdfDefines.h"
38 #include "PdfObject.h"
39 #include "PdfTokenizer.h"
40 
41 namespace PoDoFo {
42 
43 class PdfEncrypt;
44 class PdfParser;
45 
46 /**
47  * A PdfParserObject constructs a PdfObject from a PDF file.
48  * Parsing starts always at the current file position.
49  */
50 class PODOFO_API PdfParserObject : public PdfObject, public PdfTokenizer {
51 
52  public:
53     /** Parse the object data from the given file handle starting at
54      *  the current position.
55      *  \param pCreator pointer to a PdfVecObjects to resolve object references
56      *  \param rDevice an open reference counted input device which is positioned in
57      *                 front of the object which is going to be parsed.
58      *  \param rBuffer buffer to use for parsing to avoid reallocations
59      *  \param lOffset the position in the device from which the object shall be read
60      *                 if lOffset = -1, the object will be read from the current
61      *                 position in the file.
62      */
63     PdfParserObject( PdfVecObjects* pCreator, const PdfRefCountedInputDevice & rDevice, const PdfRefCountedBuffer & rBuffer, pdf_long lOffset = -1 );
64 
65     /** Parse the object data for an internal object.
66      *  You have to call ParseDictionaryKeys as next function call.
67      *
68      *  The following two parameters are used to avoid allocation of a new
69      *  buffer in PdfSimpleParser.
70      *
71      *  \warning This constructor is for internal usage only!
72      *
73      *  \param rBuffer buffer to use for parsing to avoid reallocations
74      */
75     explicit PdfParserObject( const PdfRefCountedBuffer & rBuffer );
76 
77     virtual ~PdfParserObject();
78 
79     /** Parse the object data from the given file handle
80      *  If delayed loading is enabled, only the object and generation number
81      *  is read now and everything else is read later.
82      *
83      *  \param pEncrypt an encryption dictionary which is used to decrypt
84      *                  strings and streams during parsing or NULL if the PDF
85      *                  file was not encrypted
86      *  \param bIsTrailer wether this is a trailer dictionary or not.
87      *                    trailer dictionaries do not have a object number etc.
88      */
89     void ParseFile( PdfEncrypt* pEncrypt, bool bIsTrailer = false );
90 
91     /** Returns if this object has a stream object appended.
92      *  which has to be parsed.
93      *  \returns true if there is a stream
94      */
95     inline bool HasStreamToParse() const;
96 
97     /** \returns true if this PdfParser loads all objects at
98      *                the time they are accessed for the first time.
99      *                The default is to load all object immediately.
100      *                In this case false is returned.
101      */
102     inline bool IsLoadOnDemand() const;
103 
104     /** Sets wether this object shall be loaded on demand
105      *  when it's data is accessed for the first time.
106      *  \param bDelayed if true the object is loaded delayed.
107      */
108     inline void SetLoadOnDemand( bool bDelayed );
109 
110     /** Set the object number of this object.
111      *  It is almost never necessary to use this call.
112      *  It is only included for usage in the PdfParser.
113      *
114      *  \param nObjNo the new object number of this object
115      */
116     inline void SetObjectNumber( unsigned int nObjNo );
117 
118     /** Tries to free all memory allocated by this
119      *  PdfObject (variables and streams) and reads
120      *  it from disk again if it is requested another time.
121      *
122      *  This will only work if load on demand is used.
123      *  If the object is dirty if will not be free'd.
124      *
125      *  \param bForce if true the object will be free'd
126      *                even if IsDirty() returns true.
127      *                So you will loose any changes made
128      *                to this object.
129      *
130      *  \see IsLoadOnDemand
131      *  \see IsDirty
132      */
133     void FreeObjectMemory( bool bForce = false );
134 
135     /** Gets an offset in which the object beginning is stored in the file.
136      *  Note the offset points just after the object identificator ("0 0 obj").
137      *
138      * \returns an offset in which the object is stored in the source device,
139      *     or -1, if the object was created on demand.
140      */
141     inline pdf_int64 GetOffset( void ) const;
142 
143  protected:
144     /** Load all data of the object if load object on demand is enabled.
145      *  Reimplemented from PdfVariant. Do not call this directly, use
146      *  DelayedLoad().
147      */
148     virtual void DelayedLoadImpl();
149 
150     /** Load the stream of the object if it has one and if loading on demand is enabled.
151      *  Reimplemented from PdfObject. Do not call this directly, use
152      *  DelayedStreamLoad().
153      */
154     virtual void DelayedStreamLoadImpl();
155 
156     /** Starts reading at the file position m_lStreamOffset and interprets all bytes
157      *  as contents of the objects stream.
158      *  It is assumed that the dictionary has a valid /Length key already.
159      *
160      *  Called from DelayedStreamLoadImpl(). Do not call directly.
161      */
162     void ParseStream();
163 
164  private:
165     /** Initialize private members in this object with their default values
166      */
167     void InitPdfParserObject();
168 
169     /** Parse the object data from the given file handle
170      *  \param bIsTrailer wether this is a trailer dictionary or not.
171      *                    trailer dictionaries do not have a object number etc.
172      */
173     void ParseFileComplete( bool bIsTrailer );
174 
175     void ReadObjectNumber();
176 
177  private:
178     PdfEncrypt* m_pEncrypt;
179     bool        m_bIsTrailer;
180 
181     // Should the object try to defer loading of its contents until needed?
182     // If false, object contents will be loaded during ParseFile(...). Note that
183     //          this still uses the delayed loading infrastructure.
184     // If true, loading will be triggered the first time the information is needed by
185     //          an external caller.
186     // Outside callers should not be able to tell the difference between the two modes
187     // of operation.
188     bool m_bLoadOnDemand;
189 
190     pdf_long m_lOffset;
191 
192     bool m_bStream;
193     pdf_long m_lStreamOffset;
194 };
195 
196 // -----------------------------------------------------
197 //
198 // -----------------------------------------------------
SetObjectNumber(unsigned int nObjNo)199 void PdfParserObject::SetObjectNumber( unsigned int nObjNo )
200 {
201     m_reference.SetObjectNumber( nObjNo );
202 }
203 
204 // -----------------------------------------------------
205 //
206 // -----------------------------------------------------
IsLoadOnDemand()207 bool PdfParserObject::IsLoadOnDemand() const
208 {
209     return m_bLoadOnDemand;
210 }
211 
212 // -----------------------------------------------------
213 //
214 // -----------------------------------------------------
SetLoadOnDemand(bool bDelayed)215 void PdfParserObject::SetLoadOnDemand( bool bDelayed )
216 {
217     m_bLoadOnDemand = bDelayed;
218 }
219 
220 // -----------------------------------------------------
221 //
222 // -----------------------------------------------------
HasStreamToParse()223 bool PdfParserObject::HasStreamToParse() const
224 {
225     return m_bStream;
226 }
227 
228 // -----------------------------------------------------
229 //
230 // -----------------------------------------------------
GetOffset(void)231 pdf_int64 PdfParserObject::GetOffset( void ) const
232 {
233 	return static_cast<pdf_int64>(m_lOffset);
234 }
235 
236 };
237 
238 #endif // _PDF_PARSER_OBJECT_H_
239