1 /***************************************************************************
2 * Copyright (C) 2009 by Dominik Seichter *
3 * domseichter@web.de *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Library General Public License as *
7 * published by the Free Software Foundation; either version 2 of the *
8 * License, or (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU Library General Public *
16 * License along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 * *
20 * In addition, as a special exception, the copyright holders give *
21 * permission to link the code of portions of this program with the *
22 * OpenSSL library under certain conditions as described in each *
23 * individual source file, and distribute linked combinations *
24 * including the two. *
25 * You must obey the GNU General Public License in all respects *
26 * for all of the code used other than OpenSSL. If you modify *
27 * file(s) with this exception, you may extend this exception to your *
28 * version of the file(s), but you are not obligated to do so. If you *
29 * do not wish to do so, delete this exception statement from your *
30 * version. If you delete this exception statement from all source *
31 * files in the program, then also delete it here. *
32 ***************************************************************************/
33
34 #include "PdfXRefStreamParserObject.h"
35
36 #include "PdfArray.h"
37 #include "PdfDefinesPrivate.h"
38 #include "PdfDictionary.h"
39 #include "PdfStream.h"
40 #include "PdfVariant.h"
41
42 #include <limits>
43
44 namespace PoDoFo {
45
PdfXRefStreamParserObject(PdfVecObjects * pCreator,const PdfRefCountedInputDevice & rDevice,const PdfRefCountedBuffer & rBuffer,PdfParser::TVecOffsets * pOffsets)46 PdfXRefStreamParserObject::PdfXRefStreamParserObject(PdfVecObjects* pCreator, const PdfRefCountedInputDevice & rDevice,
47 const PdfRefCountedBuffer & rBuffer, PdfParser::TVecOffsets* pOffsets )
48 : PdfParserObject( pCreator, rDevice, rBuffer ), m_lNextOffset(-1L), m_pOffsets( pOffsets )
49 {
50
51 }
52
~PdfXRefStreamParserObject()53 PdfXRefStreamParserObject::~PdfXRefStreamParserObject()
54 {
55
56 }
57
Parse()58 void PdfXRefStreamParserObject::Parse()
59 {
60 // Ignore the encryption in the XREF as the XREF stream must no be encrypted (see PDF Reference 3.4.7)
61 this->ParseFile( NULL );
62
63 // Do some very basic error checking
64 if( !this->GetDictionary().HasKey( PdfName::KeyType ) )
65 {
66 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
67 }
68
69 PdfObject* pObj = this->GetDictionary().GetKey( PdfName::KeyType );
70 if( !pObj->IsName() || ( pObj->GetName() != "XRef" ) )
71 {
72 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
73 }
74
75 if( !this->GetDictionary().HasKey( PdfName::KeySize )
76 || !this->GetDictionary().HasKey( "W" ) )
77 {
78 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
79 }
80
81 if( !this->HasStreamToParse() )
82 {
83 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
84 }
85
86 if( this->GetDictionary().HasKey("Prev") )
87 {
88 m_lNextOffset = static_cast<pdf_long>(this->GetDictionary().GetKeyAsLong( "Prev", 0 ));
89 }
90 }
91
ReadXRefTable()92 void PdfXRefStreamParserObject::ReadXRefTable()
93 {
94 pdf_int64 lSize = this->GetDictionary().GetKeyAsLong( PdfName::KeySize, 0 );
95 PdfVariant vWArray = *(this->GetDictionary().GetKey( "W" ));
96
97 // The pdf reference states that W is always an array with 3 entries
98 // all of them have to be integers
99 if( !vWArray.IsArray() || vWArray.GetArray().size() != 3 )
100 {
101 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
102 }
103
104
105 pdf_int64 nW[W_ARRAY_SIZE] = { 0, 0, 0 };
106 for( int i=0;i<W_ARRAY_SIZE;i++ )
107 {
108 if( !vWArray.GetArray()[i].IsNumber() )
109 {
110 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
111 }
112
113 nW[i] = static_cast<pdf_int64>(vWArray.GetArray()[i].GetNumber());
114 }
115
116 std::vector<pdf_int64> vecIndeces;
117 GetIndeces( vecIndeces, static_cast<pdf_int64>(lSize) );
118
119 ParseStream( nW, vecIndeces );
120 }
121
ParseStream(const pdf_int64 nW[W_ARRAY_SIZE],const std::vector<pdf_int64> & rvecIndeces)122 void PdfXRefStreamParserObject::ParseStream( const pdf_int64 nW[W_ARRAY_SIZE], const std::vector<pdf_int64> & rvecIndeces )
123 {
124 char* pBuffer;
125 pdf_long lBufferLen;
126
127 for(pdf_int64 nLengthSum = 0, i = 0; i < W_ARRAY_SIZE; i++ )
128 {
129 if ( nW[i] < 0 )
130 {
131 PODOFO_RAISE_ERROR_INFO( ePdfError_NoXRef,
132 "Negative field length in XRef stream" );
133 }
134 if ( std::numeric_limits<pdf_int64>::max() - nLengthSum < nW[i] )
135 {
136 PODOFO_RAISE_ERROR_INFO( ePdfError_NoXRef,
137 "Invalid entry length in XRef stream" );
138 }
139 else
140 {
141 nLengthSum += nW[i];
142 }
143 }
144
145 const size_t entryLen = static_cast<size_t>(nW[0] + nW[1] + nW[2]);
146
147 this->GetStream()->GetFilteredCopy( &pBuffer, &lBufferLen );
148
149
150 std::vector<pdf_int64>::const_iterator it = rvecIndeces.begin();
151 #ifdef PODOFO_HAVE_UNIQUE_PTR
152 std::unique_ptr<char, decltype( &podofo_free )> pStart( pBuffer, &podofo_free );
153 #else // PODOFO_HAVE_UNIQUE_PTR
154 class StrAutoPtr {
155 private:
156 char *str;
157 public:
158 StrAutoPtr( char *in_str ) : str( in_str ) {}
159 ~StrAutoPtr() {
160 podofo_free( str );
161 }
162 char *get( void ) const { return str; }
163 };
164 StrAutoPtr pStart( pBuffer );
165 #endif // PODOFO_HAVE_UNIQUE_PTR
166
167 while( it != rvecIndeces.end() )
168 {
169 pdf_int64 nFirstObj = *it; ++it;
170 pdf_int64 nCount = *it; ++it;
171
172 //pdf_int64 nFirstObjOrg = nFirstObj;
173 //pdf_int64 nCountOrg = nCount;
174
175 //printf("\n");
176 //printf("nFirstObj=%i\n", static_cast<int>(nFirstObj));
177 //printf("nCount=%i\n", static_cast<int>(nCount));
178 while( nCount > 0 )
179 {
180 if( (pBuffer - pStart.get()) >= lBufferLen )
181 {
182 PODOFO_RAISE_ERROR_INFO( ePdfError_NoXRef, "Invalid count in XRef stream" );
183 }
184
185 //printf("nCount=%i ", static_cast<int>(nCount));
186 //printf("pBuffer=%li ", (long)(pBuffer - pStart));
187 //printf("pEnd=%li ", lBufferLen);
188 if ( nFirstObj >= 0 && nFirstObj < static_cast<pdf_int64>(m_pOffsets->size())
189 && ! (*m_pOffsets)[static_cast<int>(nFirstObj)].bParsed)
190 {
191 ReadXRefStreamEntry( pBuffer, lBufferLen, nW, static_cast<int>(nFirstObj) );
192 }
193
194 nFirstObj++ ;
195 pBuffer += entryLen;
196 --nCount;
197 }
198 //printf("Exp: nFirstObj=%i nFirstObjOrg + nCount=%i\n", nFirstObj - 1, nFirstObjOrg + nCountOrg - 1 );
199 //printf("===\n");
200 }
201 }
202
GetIndeces(std::vector<pdf_int64> & rvecIndeces,pdf_int64 size)203 void PdfXRefStreamParserObject::GetIndeces( std::vector<pdf_int64> & rvecIndeces, pdf_int64 size )
204 {
205 // get the first object number in this crossref stream.
206 // it is not required to have an index key though.
207 if( this->GetDictionary().HasKey( "Index" ) )
208 {
209 PdfVariant array = *(this->GetDictionary().GetKey( "Index" ));
210 if( !array.IsArray() )
211 {
212 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
213 }
214
215 TCIVariantList it = array.GetArray().begin();
216 while ( it != array.GetArray().end() )
217 {
218 rvecIndeces.push_back( (*it).GetNumber() );
219 ++it;
220 }
221 }
222 else
223 {
224 // Default
225 rvecIndeces.push_back( static_cast<pdf_int64>(0) );
226 rvecIndeces.push_back( size );
227 }
228
229 // vecIndeces must be a multiple of 2
230 if( rvecIndeces.size() % 2 != 0)
231 {
232 PODOFO_RAISE_ERROR( ePdfError_NoXRef );
233 }
234 }
235
ReadXRefStreamEntry(char * pBuffer,pdf_long,const pdf_int64 lW[W_ARRAY_SIZE],int nObjNo)236 void PdfXRefStreamParserObject::ReadXRefStreamEntry( char* pBuffer, pdf_long, const pdf_int64 lW[W_ARRAY_SIZE], int nObjNo )
237 {
238 int i;
239 pdf_int64 z;
240 unsigned long nData[W_ARRAY_SIZE];
241
242 for( i=0;i<W_ARRAY_SIZE;i++ )
243 {
244 if( lW[i] > W_MAX_BYTES )
245 {
246 PdfError::LogMessage( eLogSeverity_Error,
247 "The XRef stream dictionary has an entry in /W of size %i.\nThe maximum supported value is %i.\n",
248 lW[i], W_MAX_BYTES );
249
250 PODOFO_RAISE_ERROR( ePdfError_InvalidXRefStream );
251 }
252
253 nData[i] = 0;
254 for( z=W_MAX_BYTES-lW[i];z<W_MAX_BYTES;z++ )
255 {
256 nData[i] = (nData[i] << 8) + static_cast<unsigned char>(*pBuffer);
257 ++pBuffer;
258 }
259 }
260
261
262 //printf("OBJ=%i nData = [ %i %i %i ]\n", nObjNo, static_cast<int>(nData[0]), static_cast<int>(nData[1]), static_cast<int>(nData[2]) );
263 (*m_pOffsets)[nObjNo].bParsed = true;
264 switch( lW[0] == 0 ? 1 : nData[0] ) // nData[0] contains the type information of this entry
265 {
266 case 0:
267 // a free object
268 (*m_pOffsets)[nObjNo].lOffset = nData[1];
269 (*m_pOffsets)[nObjNo].lGeneration = nData[2];
270 (*m_pOffsets)[nObjNo].cUsed = 'f';
271 break;
272 case 1:
273 // normal uncompressed object
274 (*m_pOffsets)[nObjNo].lOffset = nData[1];
275 (*m_pOffsets)[nObjNo].lGeneration = nData[2];
276 (*m_pOffsets)[nObjNo].cUsed = 'n';
277 break;
278 case 2:
279 // object that is part of an object stream
280 (*m_pOffsets)[nObjNo].lOffset = nData[2]; // index in the object stream
281 (*m_pOffsets)[nObjNo].lGeneration = nData[1]; // object number of the stream
282 (*m_pOffsets)[nObjNo].cUsed = 's'; // mark as stream
283 break;
284 default:
285 {
286 PODOFO_RAISE_ERROR( ePdfError_InvalidXRefType );
287 }
288 }
289 //printf("m_offsets = [ %i %i %c ]\n", (*m_pOffsets)[nObjNo].lOffset, (*m_pOffsets)[nObjNo].lGeneration, (*m_pOffsets)[nObjNo].cUsed );
290 }
291
292 };
293