1 /***************************************************************************
2  *   Copyright (C) 2009 by Dominik Seichter                                *
3  *   domseichter@web.de                                                    *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU Library General Public License as       *
7  *   published by the Free Software Foundation; either version 2 of the    *
8  *   License, or (at your option) any later version.                       *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU Library General Public     *
16  *   License along with this program; if not, write to the                 *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  *                                                                         *
20  *   In addition, as a special exception, the copyright holders give       *
21  *   permission to link the code of portions of this program with the      *
22  *   OpenSSL library under certain conditions as described in each         *
23  *   individual source file, and distribute linked combinations            *
24  *   including the two.                                                    *
25  *   You must obey the GNU General Public License in all respects          *
26  *   for all of the code used other than OpenSSL.  If you modify           *
27  *   file(s) with this exception, you may extend this exception to your    *
28  *   version of the file(s), but you are not obligated to do so.  If you   *
29  *   do not wish to do so, delete this exception statement from your       *
30  *   version.  If you delete this exception statement from all source      *
31  *   files in the program, then also delete it here.                       *
32  ***************************************************************************/
33 
34 #include "PdfXRefStreamParserObject.h"
35 
36 #include "PdfArray.h"
37 #include "PdfDefinesPrivate.h"
38 #include "PdfDictionary.h"
39 #include "PdfStream.h"
40 #include "PdfVariant.h"
41 
42 #include <limits>
43 
44 namespace PoDoFo {
45 
PdfXRefStreamParserObject(PdfVecObjects * pCreator,const PdfRefCountedInputDevice & rDevice,const PdfRefCountedBuffer & rBuffer,PdfParser::TVecOffsets * pOffsets)46 PdfXRefStreamParserObject::PdfXRefStreamParserObject(PdfVecObjects* pCreator, const PdfRefCountedInputDevice & rDevice,
47                                                      const PdfRefCountedBuffer & rBuffer, PdfParser::TVecOffsets* pOffsets )
48     : PdfParserObject( pCreator, rDevice, rBuffer ), m_lNextOffset(-1L), m_pOffsets( pOffsets )
49 {
50 
51 }
52 
~PdfXRefStreamParserObject()53 PdfXRefStreamParserObject::~PdfXRefStreamParserObject()
54 {
55 
56 }
57 
Parse()58 void PdfXRefStreamParserObject::Parse()
59 {
60     // Ignore the encryption in the XREF as the XREF stream must no be encrypted (see PDF Reference 3.4.7)
61     this->ParseFile( NULL );
62 
63     // Do some very basic error checking
64     if( !this->GetDictionary().HasKey( PdfName::KeyType ) )
65     {
66         PODOFO_RAISE_ERROR( ePdfError_NoXRef );
67     }
68 
69     PdfObject* pObj = this->GetDictionary().GetKey( PdfName::KeyType );
70     if( !pObj->IsName() || ( pObj->GetName() != "XRef" ) )
71     {
72         PODOFO_RAISE_ERROR( ePdfError_NoXRef );
73     }
74 
75     if( !this->GetDictionary().HasKey( PdfName::KeySize )
76         || !this->GetDictionary().HasKey( "W" ) )
77     {
78         PODOFO_RAISE_ERROR( ePdfError_NoXRef );
79     }
80 
81     if( !this->HasStreamToParse() )
82     {
83         PODOFO_RAISE_ERROR( ePdfError_NoXRef );
84     }
85 
86     if( this->GetDictionary().HasKey("Prev") )
87     {
88         m_lNextOffset = static_cast<pdf_long>(this->GetDictionary().GetKeyAsLong( "Prev", 0 ));
89     }
90 }
91 
ReadXRefTable()92 void PdfXRefStreamParserObject::ReadXRefTable()
93 {
94     pdf_int64  lSize   = this->GetDictionary().GetKeyAsLong( PdfName::KeySize, 0 );
95     PdfVariant vWArray = *(this->GetDictionary().GetKey( "W" ));
96 
97     // The pdf reference states that W is always an array with 3 entries
98     // all of them have to be integers
99     if( !vWArray.IsArray() || vWArray.GetArray().size() != 3 )
100     {
101         PODOFO_RAISE_ERROR( ePdfError_NoXRef );
102     }
103 
104 
105     pdf_int64 nW[W_ARRAY_SIZE] = { 0, 0, 0 };
106     for( int i=0;i<W_ARRAY_SIZE;i++ )
107     {
108         if( !vWArray.GetArray()[i].IsNumber() )
109         {
110             PODOFO_RAISE_ERROR( ePdfError_NoXRef );
111         }
112 
113         nW[i] = static_cast<pdf_int64>(vWArray.GetArray()[i].GetNumber());
114     }
115 
116     std::vector<pdf_int64> vecIndeces;
117     GetIndeces( vecIndeces, static_cast<pdf_int64>(lSize) );
118 
119     ParseStream( nW, vecIndeces );
120 }
121 
ParseStream(const pdf_int64 nW[W_ARRAY_SIZE],const std::vector<pdf_int64> & rvecIndeces)122 void PdfXRefStreamParserObject::ParseStream( const pdf_int64 nW[W_ARRAY_SIZE], const std::vector<pdf_int64> & rvecIndeces )
123 {
124     char*        pBuffer;
125     pdf_long     lBufferLen;
126 
127     for(pdf_int64 nLengthSum = 0, i = 0; i < W_ARRAY_SIZE; i++ )
128     {
129         if ( nW[i] < 0 )
130         {
131             PODOFO_RAISE_ERROR_INFO( ePdfError_NoXRef,
132                                     "Negative field length in XRef stream" );
133         }
134         if ( std::numeric_limits<pdf_int64>::max() - nLengthSum < nW[i] )
135         {
136             PODOFO_RAISE_ERROR_INFO( ePdfError_NoXRef,
137                                     "Invalid entry length in XRef stream" );
138         }
139         else
140         {
141             nLengthSum += nW[i];
142         }
143     }
144 
145     const size_t entryLen  = static_cast<size_t>(nW[0] + nW[1] + nW[2]);
146 
147     this->GetStream()->GetFilteredCopy( &pBuffer, &lBufferLen );
148 
149 
150     std::vector<pdf_int64>::const_iterator it = rvecIndeces.begin();
151     #ifdef PODOFO_HAVE_UNIQUE_PTR
152     std::unique_ptr<char, decltype( &podofo_free )> pStart( pBuffer, &podofo_free );
153     #else // PODOFO_HAVE_UNIQUE_PTR
154     class StrAutoPtr {
155     private:
156         char *str;
157     public:
158         StrAutoPtr( char *in_str ) : str( in_str ) {}
159         ~StrAutoPtr() {
160             podofo_free( str );
161         }
162         char *get( void ) const { return str; }
163     };
164     StrAutoPtr pStart( pBuffer );
165     #endif // PODOFO_HAVE_UNIQUE_PTR
166 
167     while( it != rvecIndeces.end() )
168     {
169         pdf_int64 nFirstObj = *it; ++it;
170         pdf_int64 nCount    = *it; ++it;
171 
172         //pdf_int64 nFirstObjOrg = nFirstObj;
173         //pdf_int64 nCountOrg = nCount;
174 
175         //printf("\n");
176         //printf("nFirstObj=%i\n", static_cast<int>(nFirstObj));
177         //printf("nCount=%i\n", static_cast<int>(nCount));
178         while( nCount > 0 )
179         {
180             if( (pBuffer - pStart.get()) >= lBufferLen )
181             {
182                 PODOFO_RAISE_ERROR_INFO( ePdfError_NoXRef, "Invalid count in XRef stream" );
183             }
184 
185             //printf("nCount=%i ", static_cast<int>(nCount));
186             //printf("pBuffer=%li ", (long)(pBuffer - pStart));
187             //printf("pEnd=%li ", lBufferLen);
188             if ( nFirstObj >= 0 && nFirstObj < static_cast<pdf_int64>(m_pOffsets->size())
189                  && ! (*m_pOffsets)[static_cast<int>(nFirstObj)].bParsed)
190             {
191 	        ReadXRefStreamEntry( pBuffer, lBufferLen, nW, static_cast<int>(nFirstObj) );
192             }
193 
194 			nFirstObj++ ;
195             pBuffer += entryLen;
196             --nCount;
197         }
198         //printf("Exp: nFirstObj=%i nFirstObjOrg + nCount=%i\n", nFirstObj - 1, nFirstObjOrg + nCountOrg - 1 );
199         //printf("===\n");
200     }
201 }
202 
GetIndeces(std::vector<pdf_int64> & rvecIndeces,pdf_int64 size)203 void PdfXRefStreamParserObject::GetIndeces( std::vector<pdf_int64> & rvecIndeces, pdf_int64 size )
204 {
205     // get the first object number in this crossref stream.
206     // it is not required to have an index key though.
207     if( this->GetDictionary().HasKey( "Index" ) )
208     {
209         PdfVariant array = *(this->GetDictionary().GetKey( "Index" ));
210         if( !array.IsArray() )
211         {
212             PODOFO_RAISE_ERROR( ePdfError_NoXRef );
213         }
214 
215         TCIVariantList it = array.GetArray().begin();
216         while ( it != array.GetArray().end() )
217         {
218             rvecIndeces.push_back( (*it).GetNumber() );
219             ++it;
220         }
221     }
222     else
223     {
224         // Default
225         rvecIndeces.push_back( static_cast<pdf_int64>(0) );
226         rvecIndeces.push_back( size );
227     }
228 
229     // vecIndeces must be a multiple of 2
230     if( rvecIndeces.size() % 2 != 0)
231     {
232         PODOFO_RAISE_ERROR( ePdfError_NoXRef );
233     }
234 }
235 
ReadXRefStreamEntry(char * pBuffer,pdf_long,const pdf_int64 lW[W_ARRAY_SIZE],int nObjNo)236 void PdfXRefStreamParserObject::ReadXRefStreamEntry( char* pBuffer, pdf_long, const pdf_int64 lW[W_ARRAY_SIZE], int nObjNo )
237 {
238     int              i;
239     pdf_int64        z;
240     unsigned long    nData[W_ARRAY_SIZE];
241 
242     for( i=0;i<W_ARRAY_SIZE;i++ )
243     {
244         if( lW[i] > W_MAX_BYTES )
245         {
246             PdfError::LogMessage( eLogSeverity_Error,
247                                   "The XRef stream dictionary has an entry in /W of size %i.\nThe maximum supported value is %i.\n",
248                                   lW[i], W_MAX_BYTES );
249 
250             PODOFO_RAISE_ERROR( ePdfError_InvalidXRefStream );
251         }
252 
253         nData[i] = 0;
254         for( z=W_MAX_BYTES-lW[i];z<W_MAX_BYTES;z++ )
255         {
256             nData[i] = (nData[i] << 8) + static_cast<unsigned char>(*pBuffer);
257             ++pBuffer;
258         }
259     }
260 
261 
262     //printf("OBJ=%i nData = [ %i %i %i ]\n", nObjNo, static_cast<int>(nData[0]), static_cast<int>(nData[1]), static_cast<int>(nData[2]) );
263     (*m_pOffsets)[nObjNo].bParsed = true;
264     switch( lW[0] == 0 ? 1 : nData[0] ) // nData[0] contains the type information of this entry
265     {
266         case 0:
267             // a free object
268             (*m_pOffsets)[nObjNo].lOffset     = nData[1];
269             (*m_pOffsets)[nObjNo].lGeneration = nData[2];
270             (*m_pOffsets)[nObjNo].cUsed       = 'f';
271             break;
272         case 1:
273             // normal uncompressed object
274             (*m_pOffsets)[nObjNo].lOffset     = nData[1];
275             (*m_pOffsets)[nObjNo].lGeneration = nData[2];
276             (*m_pOffsets)[nObjNo].cUsed       = 'n';
277             break;
278         case 2:
279             // object that is part of an object stream
280             (*m_pOffsets)[nObjNo].lOffset     = nData[2]; // index in the object stream
281             (*m_pOffsets)[nObjNo].lGeneration = nData[1]; // object number of the stream
282             (*m_pOffsets)[nObjNo].cUsed       = 's';      // mark as stream
283             break;
284         default:
285         {
286             PODOFO_RAISE_ERROR( ePdfError_InvalidXRefType );
287         }
288     }
289     //printf("m_offsets = [ %i %i %c ]\n", (*m_pOffsets)[nObjNo].lOffset, (*m_pOffsets)[nObjNo].lGeneration, (*m_pOffsets)[nObjNo].cUsed );
290 }
291 
292 };
293