1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 #include <comphelper/hash.hxx>
24 
25 #include <rtl/strbuf.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/digest.h>
29 #include <rtl/cipher.h>
30 #include <sal/log.hxx>
31 
32 #include <zlib.h>
33 
34 #include <math.h>
35 #include <map>
36 
37 #include <string.h>
38 
39 
40 namespace pdfparse
41 {
42 
43 struct EmitImplData
44 {
45     // xref table: maps object number to a pair of (generation, buffer offset)
46     typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
47     XRefTable m_aXRefTable;
48     // container of all indirect objects (usually a PDFFile*)
49     const PDFContainer* m_pObjectContainer;
50     unsigned int m_nDecryptObject;
51     unsigned int m_nDecryptGeneration;
52 
53     // returns true if the xref table was updated
insertXrefpdfparse::EmitImplData54     bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55     {
56         XRefTable::iterator it = m_aXRefTable.find( nObject );
57         if( it == m_aXRefTable.end() )
58         {
59             // new entry
60             m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61             return true;
62         }
63         // update old entry, if generation number is higher
64         if( it->second.first < nGeneration )
65         {
66             it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67             return true;
68         }
69         return false;
70     }
71 
EmitImplDatapdfparse::EmitImplData72     explicit EmitImplData( const PDFContainer* pTopContainer ) :
73         m_pObjectContainer( pTopContainer ),
74         m_nDecryptObject( 0 ),
75         m_nDecryptGeneration( 0 )
76     {}
decryptpdfparse::EmitImplData77     void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78                   unsigned int nObject, unsigned int nGeneration ) const
79     {
80         const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81         pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82     }
83 
setDecryptObjectpdfparse::EmitImplData84     void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85     {
86         m_nDecryptObject = nObject;
87         m_nDecryptGeneration = nGeneration;
88     }
89 };
90 
91 }
92 
93 using namespace pdfparse;
94 
EmitContext(const PDFContainer * pTop)95 EmitContext::EmitContext( const PDFContainer* pTop ) :
96     m_bDeflate( false ),
97     m_bDecrypt( false )
98 {
99     if( pTop )
100         m_pImplData.reset( new EmitImplData( pTop ) );
101 }
102 
~EmitContext()103 EmitContext::~EmitContext()
104 {
105 }
106 
~PDFEntry()107 PDFEntry::~PDFEntry()
108 {
109 }
110 
getEmitData(EmitContext const & rContext)111 EmitImplData* PDFEntry::getEmitData( EmitContext const & rContext )
112 {
113     return rContext.m_pImplData.get();
114 }
115 
setEmitData(EmitContext & rContext,EmitImplData * pNewEmitData)116 void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
117 {
118     if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
119         rContext.m_pImplData.reset();
120     rContext.m_pImplData.reset( pNewEmitData );
121 }
122 
~PDFValue()123 PDFValue::~PDFValue()
124 {
125 }
126 
~PDFComment()127 PDFComment::~PDFComment()
128 {
129 }
130 
emit(EmitContext & rWriteContext) const131 bool PDFComment::emit( EmitContext& rWriteContext ) const
132 {
133     return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
134 }
135 
clone() const136 PDFEntry* PDFComment::clone() const
137 {
138     return new PDFComment( m_aComment );
139 }
140 
~PDFName()141 PDFName::~PDFName()
142 {
143 }
144 
emit(EmitContext & rWriteContext) const145 bool PDFName::emit( EmitContext& rWriteContext ) const
146 {
147     if( ! rWriteContext.write( " /", 2 ) )
148         return false;
149     return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
150 }
151 
clone() const152 PDFEntry* PDFName::clone() const
153 {
154     return new PDFName( m_aName );
155 }
156 
getFilteredName() const157 OUString PDFName::getFilteredName() const
158 {
159     OStringBuffer aFilter( m_aName.getLength() );
160     const char* pStr = m_aName.getStr();
161     unsigned int nLen = m_aName.getLength();
162     for( unsigned int i = 0; i < nLen; i++ )
163     {
164         if( (i < nLen - 3) && pStr[i] == '#' )
165         {
166             char rResult = 0;
167             i++;
168             if( pStr[i] >= '0' && pStr[i] <= '9' )
169                 rResult = char( pStr[i]-'0' ) << 4;
170             else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
171                 rResult = char( pStr[i]-'a' + 10 ) << 4;
172             else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
173                 rResult = char( pStr[i]-'A' + 10 ) << 4;
174             i++;
175             if( pStr[i] >= '0' && pStr[i] <= '9' )
176                 rResult |= char( pStr[i]-'0' );
177             else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
178                 rResult |= char( pStr[i]-'a' + 10 );
179             else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
180                 rResult |= char( pStr[i]-'A' + 10 );
181             aFilter.append( rResult );
182         }
183         else
184             aFilter.append( pStr[i] );
185     }
186     return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
187 }
188 
~PDFString()189 PDFString::~PDFString()
190 {
191 }
192 
emit(EmitContext & rWriteContext) const193 bool PDFString::emit( EmitContext& rWriteContext ) const
194 {
195     if( ! rWriteContext.write( " ", 1 ) )
196         return false;
197     EmitImplData* pEData = getEmitData( rWriteContext );
198     if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
199     {
200         OString aFiltered( getFilteredString() );
201         // decrypt inplace (evil since OString is supposed to be const
202         // however in this case we know that getFilteredString returned a singular string instance
203         pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
204                          reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
205                          pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
206         // check for string or hex string
207         const char* pStr = aFiltered.getStr();
208         if( aFiltered.getLength() > 1 &&
209            ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
210              (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
211         {
212             static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
213                                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
214             if( ! rWriteContext.write( "<", 1 ) )
215                 return false;
216             for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
217             {
218                 if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
219                     return false;
220                 if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
221                     return false;
222             }
223             if( ! rWriteContext.write( ">", 1 ) )
224                 return false;
225         }
226         else
227         {
228             if( ! rWriteContext.write( "(", 1 ) )
229                 return false;
230             if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
231                 return false;
232             if( ! rWriteContext.write( ")", 1 ) )
233                 return false;
234         }
235         return true;
236     }
237     return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
238 }
239 
clone() const240 PDFEntry* PDFString::clone() const
241 {
242     return new PDFString( m_aString );
243 }
244 
getFilteredString() const245 OString PDFString::getFilteredString() const
246 {
247     int nLen = m_aString.getLength();
248     OStringBuffer aBuf( nLen );
249 
250     const char* pStr = m_aString.getStr();
251     if( *pStr == '(' )
252     {
253         const char* pRun = pStr+1;
254         while( pRun - pStr < nLen-1 )
255         {
256             if( *pRun == '\\' )
257             {
258                 pRun++;
259                 if( pRun - pStr < nLen )
260                 {
261                     char aEsc = 0;
262                     if( *pRun == 'n' )
263                         aEsc = '\n';
264                     else if( *pRun == 'r' )
265                         aEsc = '\r';
266                     else if( *pRun == 't' )
267                         aEsc = '\t';
268                     else if( *pRun == 'b' )
269                         aEsc = '\b';
270                     else if( *pRun == 'f' )
271                         aEsc = '\f';
272                     else if( *pRun == '(' )
273                         aEsc = '(';
274                     else if( *pRun == ')' )
275                         aEsc = ')';
276                     else if( *pRun == '\\' )
277                         aEsc = '\\';
278                     else if( *pRun == '\n' )
279                     {
280                         pRun++;
281                         continue;
282                     }
283                     else if( *pRun == '\r' )
284                     {
285                         pRun++;
286                         if( *pRun == '\n' )
287                             pRun++;
288                         continue;
289                     }
290                     else
291                     {
292                         int i = 0;
293                         while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
294                             aEsc = 8*aEsc + (*pRun++ - '0');
295                         // move pointer back to last character of octal sequence
296                         pRun--;
297                     }
298                     aBuf.append( aEsc );
299                 }
300             }
301             else
302                 aBuf.append( *pRun );
303             // move pointer to next character
304             pRun++;
305         }
306     }
307     else if( *pStr == '<' )
308     {
309         const char* pRun = pStr+1;
310         while( *pRun != '>' && pRun - pStr < nLen )
311         {
312             char rResult = 0;
313             if( *pRun >= '0' && *pRun <= '9' )
314                 rResult = char( ( *pRun-'0' ) << 4 );
315             else if( *pRun >= 'a' && *pRun <= 'f' )
316                 rResult = char( ( *pRun-'a' + 10 ) << 4 );
317             else if( *pRun >= 'A' && *pRun <= 'F' )
318                 rResult = char( ( *pRun-'A' + 10 ) << 4 );
319             pRun++;
320             if( *pRun != '>' && pRun - pStr < nLen )
321             {
322                 if( *pRun >= '0' && *pRun <= '9' )
323                     rResult |= char( *pRun-'0' );
324                 else if( *pRun >= 'a' && *pRun <= 'f' )
325                     rResult |= char( *pRun-'a' + 10 );
326                 else if( *pRun >= 'A' && *pRun <= 'F' )
327                     rResult |= char( *pRun-'A' + 10 );
328             }
329             pRun++;
330             aBuf.append( rResult );
331         }
332     }
333 
334     return aBuf.makeStringAndClear();
335 }
336 
~PDFNumber()337 PDFNumber::~PDFNumber()
338 {
339 }
340 
emit(EmitContext & rWriteContext) const341 bool PDFNumber::emit( EmitContext& rWriteContext ) const
342 {
343     OStringBuffer aBuf( 32 );
344     aBuf.append( ' ' );
345 
346     double fValue = m_fValue;
347     bool bNeg = false;
348     int nPrecision = 5;
349     if( fValue < 0.0 )
350     {
351         bNeg = true;
352         fValue=-fValue;
353     }
354 
355     sal_Int64 nInt = static_cast<sal_Int64>(fValue);
356     fValue -= static_cast<double>(nInt);
357     // optimizing hardware may lead to a value of 1.0 after the subtraction
358     if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
359     {
360         nInt++;
361         fValue = 0.0;
362     }
363     sal_Int64 nFrac = 0;
364     if( fValue )
365     {
366         fValue *= pow( 10.0, static_cast<double>(nPrecision) );
367         nFrac = static_cast<sal_Int64>(fValue);
368     }
369     if( bNeg && ( nInt || nFrac ) )
370         aBuf.append( '-' );
371     aBuf.append( nInt );
372     if( nFrac )
373     {
374         int i;
375         aBuf.append( '.' );
376         sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
377         for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
378         {
379             sal_Int64 nNumb = nFrac / nBound;
380             nFrac -= nNumb * nBound;
381             aBuf.append( nNumb );
382             nBound /= 10;
383         }
384     }
385 
386     return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
387 }
388 
clone() const389 PDFEntry* PDFNumber::clone() const
390 {
391     return new PDFNumber( m_fValue );
392 }
393 
394 
~PDFBool()395 PDFBool::~PDFBool()
396 {
397 }
398 
emit(EmitContext & rWriteContext) const399 bool PDFBool::emit( EmitContext& rWriteContext ) const
400 {
401     return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
402 }
403 
clone() const404 PDFEntry* PDFBool::clone() const
405 {
406     return new PDFBool( m_bValue );
407 }
408 
~PDFNull()409 PDFNull::~PDFNull()
410 {
411 }
412 
emit(EmitContext & rWriteContext) const413 bool PDFNull::emit( EmitContext& rWriteContext ) const
414 {
415     return rWriteContext.write( " null", 5 );
416 }
417 
clone() const418 PDFEntry* PDFNull::clone() const
419 {
420     return new PDFNull();
421 }
422 
423 
~PDFObjectRef()424 PDFObjectRef::~PDFObjectRef()
425 {
426 }
427 
emit(EmitContext & rWriteContext) const428 bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
429 {
430     OString aBuf =
431         " " +
432         OString::number( sal_Int32( m_nNumber ) ) +
433         " " +
434         OString::number( sal_Int32( m_nGeneration ) ) +
435         " R";
436     return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
437 }
438 
clone() const439 PDFEntry* PDFObjectRef::clone() const
440 {
441     return new PDFObjectRef( m_nNumber, m_nGeneration );
442 }
443 
~PDFContainer()444 PDFContainer::~PDFContainer()
445 {
446 }
447 
emitSubElements(EmitContext & rWriteContext) const448 bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
449 {
450     int nEle = m_aSubElements.size();
451     for( int i = 0; i < nEle; i++ )
452     {
453         if( rWriteContext.m_bDecrypt )
454         {
455             const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
456             if (pName && pName->m_aName == "Encrypt")
457             {
458                 i++;
459                 continue;
460             }
461         }
462         if( ! m_aSubElements[i]->emit( rWriteContext ) )
463             return false;
464     }
465     return true;
466 }
467 
cloneSubElements(std::vector<std::unique_ptr<PDFEntry>> & rNewSubElements) const468 void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
469 {
470     int nEle = m_aSubElements.size();
471     for( int i = 0; i < nEle; i++ )
472         rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
473 }
474 
findObject(unsigned int nNumber,unsigned int nGeneration) const475 PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
476 {
477     unsigned int nEle = m_aSubElements.size();
478     for( unsigned int i = 0; i < nEle; i++ )
479     {
480         PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
481         if( pObject &&
482             pObject->m_nNumber == nNumber &&
483             pObject->m_nGeneration == nGeneration )
484         {
485             return pObject;
486         }
487     }
488     return nullptr;
489 }
490 
~PDFArray()491 PDFArray::~PDFArray()
492 {
493 }
494 
emit(EmitContext & rWriteContext) const495 bool PDFArray::emit( EmitContext& rWriteContext ) const
496 {
497     if( ! rWriteContext.write( "[", 1 ) )
498         return false;
499     if( ! emitSubElements( rWriteContext ) )
500         return false;
501     return rWriteContext.write( "]", 1 );
502 }
503 
clone() const504 PDFEntry* PDFArray::clone() const
505 {
506     PDFArray* pNewAr = new PDFArray();
507     cloneSubElements( pNewAr->m_aSubElements );
508     return pNewAr;
509 }
510 
~PDFDict()511 PDFDict::~PDFDict()
512 {
513 }
514 
emit(EmitContext & rWriteContext) const515 bool PDFDict::emit( EmitContext& rWriteContext ) const
516 {
517     if( ! rWriteContext.write( "<<\n", 3 ) )
518         return false;
519     if( ! emitSubElements( rWriteContext ) )
520         return false;
521     return rWriteContext.write( "\n>>\n", 4 );
522 }
523 
insertValue(const OString & rName,std::unique_ptr<PDFEntry> pValue)524 void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
525 {
526     if( ! pValue )
527         eraseValue( rName );
528 
529     PDFEntry* pValueTmp = nullptr;
530     std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
531     if( it == m_aMap.end() )
532     {
533         // new name/value, pair, append it
534         m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
535         m_aSubElements.emplace_back( std::move(pValue) );
536         pValueTmp = m_aSubElements.back().get();
537     }
538     else
539     {
540         unsigned int nSub = m_aSubElements.size();
541         for( unsigned int i = 0; i < nSub; i++ )
542             if( m_aSubElements[i].get() == it->second )
543             {
544                 m_aSubElements[i] = std::move(pValue);
545                 pValueTmp = m_aSubElements[i].get();
546                 break;
547             }
548     }
549     assert(pValueTmp);
550     m_aMap[ rName ] = pValueTmp;
551 }
552 
eraseValue(std::string_view rName)553 void PDFDict::eraseValue( std::string_view rName )
554 {
555     unsigned int nEle = m_aSubElements.size();
556     for( unsigned int i = 0; i < nEle; i++ )
557     {
558         PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
559         if( pName && pName->m_aName == rName )
560         {
561             for( unsigned int j = i+1; j < nEle; j++ )
562             {
563                 if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
564                 {
565                     // remove and free subelements from vector
566                     m_aSubElements.erase( m_aSubElements.begin()+j );
567                     m_aSubElements.erase( m_aSubElements.begin()+i );
568                     buildMap();
569                     return;
570                 }
571             }
572         }
573     }
574 }
575 
buildMap()576 PDFEntry* PDFDict::buildMap()
577 {
578     // clear map
579     m_aMap.clear();
580     // build map
581     unsigned int nEle = m_aSubElements.size();
582     PDFName* pName = nullptr;
583     for( unsigned int i = 0; i < nEle; i++ )
584     {
585         if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
586         {
587             if( pName )
588             {
589                 m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
590                 pName = nullptr;
591             }
592             else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
593                 return m_aSubElements[i].get();
594         }
595     }
596     return pName;
597 }
598 
clone() const599 PDFEntry* PDFDict::clone() const
600 {
601     PDFDict* pNewDict = new PDFDict();
602     cloneSubElements( pNewDict->m_aSubElements );
603     pNewDict->buildMap();
604     return pNewDict;
605 }
606 
~PDFStream()607 PDFStream::~PDFStream()
608 {
609 }
610 
emit(EmitContext & rWriteContext) const611 bool PDFStream::emit( EmitContext& rWriteContext ) const
612 {
613     return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
614 }
615 
clone() const616 PDFEntry* PDFStream::clone() const
617 {
618     return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
619 }
620 
getDictLength(const PDFContainer * pContainer) const621 unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
622 {
623     if( ! m_pDict )
624         return 0;
625     // find /Length entry, can either be a direct or indirect number object
626     std::unordered_map<OString,PDFEntry*>::const_iterator it =
627         m_pDict->m_aMap.find( "Length" );
628     if( it == m_pDict->m_aMap.end() )
629         return 0;
630     PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
631     if( ! pNum && pContainer )
632     {
633         PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
634         if( pRef )
635         {
636             int nEle = pContainer->m_aSubElements.size();
637             for (int i = 0; i < nEle; i++)
638             {
639                 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
640                 if( pObj &&
641                     pObj->m_nNumber == pRef->m_nNumber &&
642                     pObj->m_nGeneration == pRef->m_nGeneration )
643                 {
644                     if( pObj->m_pObject )
645                         pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
646                     break;
647                 }
648             }
649         }
650     }
651     return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
652 }
653 
~PDFObject()654 PDFObject::~PDFObject()
655 {
656 }
657 
getDeflatedStream(std::unique_ptr<char[]> & rpStream,unsigned int * pBytes,const PDFContainer * pObjectContainer,EmitContext & rContext) const658 bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
659 {
660     bool bIsDeflated = false;
661     if( m_pStream && m_pStream->m_pDict &&
662         m_pStream->m_nEndOffset > m_pStream->m_nBeginOffset+15
663         )
664     {
665         unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
666         rpStream.reset(new char[ nOuterStreamLen ]);
667         unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
668         if( nRead != nOuterStreamLen )
669         {
670             rpStream.reset();
671             *pBytes = 0;
672             return false;
673         }
674         // is there a filter entry ?
675         std::unordered_map<OString,PDFEntry*>::const_iterator it =
676             m_pStream->m_pDict->m_aMap.find( "Filter" );
677         if( it != m_pStream->m_pDict->m_aMap.end() )
678         {
679             PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
680             if( ! pFilter )
681             {
682                 PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
683                 if( pArray && ! pArray->m_aSubElements.empty() )
684                 {
685                     pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
686                 }
687             }
688 
689             // is the (first) filter FlateDecode ?
690             if (pFilter && pFilter->m_aName == "FlateDecode")
691             {
692                 bIsDeflated = true;
693             }
694         }
695         // prepare compressed data section
696         char* pStream = rpStream.get();
697         if( pStream[0] == 's' )
698             pStream += 6; // skip "stream"
699         // skip line end after "stream"
700         while( *pStream == '\r' || *pStream == '\n' )
701             pStream++;
702         // get the compressed length
703         *pBytes = m_pStream->getDictLength( pObjectContainer );
704         if( pStream != rpStream.get() )
705             memmove( rpStream.get(), pStream, *pBytes );
706         if( rContext.m_bDecrypt )
707         {
708             EmitImplData* pEData = getEmitData( rContext );
709             pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
710                              *pBytes,
711                              reinterpret_cast<sal_uInt8*>(rpStream.get()),
712                              m_nNumber,
713                              m_nGeneration
714                              ); // decrypt inplace
715         }
716     }
717     else
718     {
719         *pBytes = 0;
720     }
721     return bIsDeflated;
722 }
723 
unzipToBuffer(char * pBegin,unsigned int nLen,sal_uInt8 ** pOutBuf,sal_uInt32 * pOutLen)724 static void unzipToBuffer( char* pBegin, unsigned int nLen,
725                            sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
726 {
727     z_stream aZStr;
728     aZStr.next_in       = reinterpret_cast<Bytef *>(pBegin);
729     aZStr.avail_in      = nLen;
730     aZStr.total_out = aZStr.total_in = 0;
731     aZStr.zalloc        = nullptr;
732     aZStr.zfree         = nullptr;
733     aZStr.opaque        = nullptr;
734 
735     int err = inflateInit(&aZStr);
736 
737     const unsigned int buf_increment_size = 16384;
738 
739     if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
740     {
741         *pOutBuf = p;
742         aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
743         aZStr.avail_out = buf_increment_size;
744         *pOutLen = buf_increment_size;
745     }
746     else
747         err = Z_MEM_ERROR;
748     while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
749     {
750         err = inflate( &aZStr, Z_NO_FLUSH );
751         if( aZStr.avail_out == 0 )
752         {
753             if( err != Z_STREAM_END )
754             {
755                 const int nNewAlloc = *pOutLen + buf_increment_size;
756                 if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
757                 {
758                     *pOutBuf = p;
759                     aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
760                     aZStr.avail_out = buf_increment_size;
761                     *pOutLen = nNewAlloc;
762                 }
763                 else
764                     err = Z_MEM_ERROR;
765             }
766         }
767     }
768     if( err == Z_STREAM_END )
769     {
770         if( aZStr.avail_out > 0 )
771             *pOutLen -= aZStr.avail_out;
772     }
773     inflateEnd(&aZStr);
774     if( err < Z_OK )
775     {
776         std::free( *pOutBuf );
777         *pOutBuf = nullptr;
778         *pOutLen = 0;
779     }
780 }
781 
writeStream(EmitContext & rWriteContext,const PDFFile * pParsedFile) const782 void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
783 {
784     if( !m_pStream )
785         return;
786 
787     std::unique_ptr<char[]> pStream;
788     unsigned int nBytes = 0;
789     if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
790     {
791         sal_uInt8* pOutBytes = nullptr;
792         sal_uInt32 nOutBytes = 0;
793         unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
794         rWriteContext.write( pOutBytes, nOutBytes );
795         std::free( pOutBytes );
796     }
797     else if( pStream && nBytes )
798         rWriteContext.write( pStream.get(), nBytes );
799 }
800 
emit(EmitContext & rWriteContext) const801 bool PDFObject::emit( EmitContext& rWriteContext ) const
802 {
803     if( ! rWriteContext.write( "\n", 1 ) )
804         return false;
805 
806     EmitImplData* pEData = getEmitData( rWriteContext );
807     if( pEData )
808         pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
809 
810     OString aBuf =
811         OString::number( sal_Int32( m_nNumber ) ) +
812         " " +
813         OString::number( sal_Int32( m_nGeneration ) ) +
814         " obj\n";
815     if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
816         return false;
817 
818     if( pEData )
819         pEData->setDecryptObject( m_nNumber, m_nGeneration );
820     if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
821     {
822         std::unique_ptr<char[]> pStream;
823         unsigned int nBytes = 0;
824         bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
825         if( pStream && nBytes )
826         {
827             // unzip the stream
828             sal_uInt8* pOutBytes = nullptr;
829             sal_uInt32 nOutBytes = 0;
830             if( bDeflate && rWriteContext.m_bDeflate )
831                 unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
832             else
833             {
834                 // nothing to deflate, but decryption has happened
835                 pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
836                 nOutBytes = static_cast<sal_uInt32>(nBytes);
837             }
838 
839             if( nOutBytes )
840             {
841                 // clone this object
842                 std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
843                 // set length in the dictionary to new stream length
844                 std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
845                 pClone->m_pStream->m_pDict->insertValue( "Length", std::move(pNewLen) );
846 
847                 if( bDeflate && rWriteContext.m_bDeflate )
848                 {
849                     // delete flatedecode filter
850                     std::unordered_map<OString,PDFEntry*>::const_iterator it =
851                     pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
852                     if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
853                     {
854                         PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
855                         if (pFilter && pFilter->m_aName == "FlateDecode")
856                             pClone->m_pStream->m_pDict->eraseValue( "Filter" );
857                         else
858                         {
859                             PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
860                             if( pArray && ! pArray->m_aSubElements.empty() )
861                             {
862                                 pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
863                                 if (pFilter && pFilter->m_aName == "FlateDecode")
864                                 {
865                                     pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
866                                 }
867                             }
868                         }
869                     }
870                 }
871 
872                 // write sub elements except stream
873                 bool bRet = true;
874                 unsigned int nEle = pClone->m_aSubElements.size();
875                 for( unsigned int i = 0; i < nEle && bRet; i++ )
876                 {
877                     if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
878                         bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
879                 }
880                 pClone.reset();
881                 // write stream
882                 if( bRet )
883                     bRet = rWriteContext.write("stream\n", 7)
884                            && rWriteContext.write(pOutBytes, nOutBytes)
885                            && rWriteContext.write("\nendstream\nendobj\n", 18);
886                 if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
887                     std::free( pOutBytes );
888                 pEData->setDecryptObject( 0, 0 );
889                 return bRet;
890             }
891             if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
892                 std::free( pOutBytes );
893         }
894     }
895 
896     bool bRet = emitSubElements( rWriteContext ) &&
897                 rWriteContext.write( "\nendobj\n", 8 );
898     if( pEData )
899         pEData->setDecryptObject( 0, 0 );
900     return bRet;
901 }
902 
clone() const903 PDFEntry* PDFObject::clone() const
904 {
905     PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
906     cloneSubElements( pNewOb->m_aSubElements );
907     unsigned int nEle = m_aSubElements.size();
908     for( unsigned int i = 0; i < nEle; i++ )
909     {
910         if( m_aSubElements[i].get() == m_pObject )
911             pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
912         else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
913         {
914             pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
915             PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
916             if (pNewDict && pNewOb->m_pStream)
917                 pNewOb->m_pStream->m_pDict = pNewDict;
918         }
919     }
920     return pNewOb;
921 }
922 
~PDFTrailer()923 PDFTrailer::~PDFTrailer()
924 {
925 }
926 
emit(EmitContext & rWriteContext) const927 bool PDFTrailer::emit( EmitContext& rWriteContext ) const
928 {
929     // get xref offset
930     unsigned int nXRefPos = rWriteContext.getCurPos();
931     // begin xref section, object 0 is always free
932     if( ! rWriteContext.write( "xref\r\n"
933                                "0 1\r\n"
934                                "0000000000 65535 f\r\n", 31 ) )
935         return false;
936     // check if we are emitting a complete PDF file
937     EmitImplData* pEData = getEmitData( rWriteContext );
938     if( pEData )
939     {
940         // emit object xrefs
941         const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
942         EmitImplData::XRefTable::const_iterator section_begin, section_end;
943         section_begin = rXRefs.begin();
944         while( section_begin != rXRefs.end() )
945         {
946             // find end of continuous object numbers
947             section_end = section_begin;
948             unsigned int nLast = section_begin->first;
949             while( (++section_end) != rXRefs.end() &&
950                    section_end->first == nLast+1 )
951                 nLast = section_end->first;
952             // write first object number and number of following entries
953             OStringBuffer aBuf( 21 );
954             aBuf.append( sal_Int32( section_begin->first ) );
955             aBuf.append( ' ' );
956             aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
957             aBuf.append( "\r\n" );
958             if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
959                 return false;
960             while( section_begin != section_end )
961             {
962                 // write 20 char entry of form
963                 // 0000offset 00gen n\r\n
964                 aBuf.setLength( 0 );
965                 OString aOffset( OString::number( section_begin->second.second ) );
966                 int nPad = 10 - aOffset.getLength();
967                 for( int i = 0; i < nPad; i++ )
968                     aBuf.append( '0' );
969                 aBuf.append( aOffset );
970                 aBuf.append( ' ' );
971                 OString aGeneration( OString::number( section_begin->second.first ) );
972                 nPad = 5 - aGeneration.getLength();
973                 for( int i = 0; i < nPad; i++ )
974                     aBuf.append( '0' );
975                 aBuf.append( aGeneration );
976                 aBuf.append( " n\r\n" );
977                 if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
978                     return false;
979                 ++section_begin;
980             }
981         }
982     }
983     if( ! rWriteContext.write( "trailer\n", 8 ) )
984         return false;
985     if( ! emitSubElements( rWriteContext ) )
986         return false;
987     if( ! rWriteContext.write( "startxref\n", 10 ) )
988         return false;
989     OString aOffset( OString::number( nXRefPos ) );
990     if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
991         return false;
992     return rWriteContext.write( "\n%%EOF\n", 7 );
993 }
994 
clone() const995 PDFEntry* PDFTrailer::clone() const
996 {
997     PDFTrailer* pNewTr = new PDFTrailer();
998     cloneSubElements( pNewTr->m_aSubElements );
999     unsigned int nEle = m_aSubElements.size();
1000     for( unsigned int i = 0; i < nEle; i++ )
1001     {
1002         if( m_aSubElements[i].get() == m_pDict )
1003         {
1004             pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
1005             break;
1006         }
1007     }
1008     return pNewTr;
1009 }
1010 
1011 #define ENCRYPTION_KEY_LEN 16
1012 #define ENCRYPTION_BUF_LEN 32
1013 
1014 namespace pdfparse {
1015 struct PDFFileImplData
1016 {
1017     bool        m_bIsEncrypted;
1018     bool        m_bStandardHandler;
1019     sal_uInt32  m_nAlgoVersion;
1020     sal_uInt32  m_nStandardRevision;
1021     sal_uInt32  m_nKeyLength;
1022     sal_uInt8   m_aOEntry[32] = {};
1023     sal_uInt8   m_aUEntry[32] = {};
1024     sal_uInt32  m_nPEntry;
1025     OString     m_aDocID;
1026     rtlCipher   m_aCipher;
1027 
1028     sal_uInt8   m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
1029 
PDFFileImplDatapdfparse::PDFFileImplData1030     PDFFileImplData() :
1031         m_bIsEncrypted( false ),
1032         m_bStandardHandler( false ),
1033         m_nAlgoVersion( 0 ),
1034         m_nStandardRevision( 0 ),
1035         m_nKeyLength( 0 ),
1036         m_nPEntry( 0 ),
1037         m_aCipher( nullptr )
1038     {
1039     }
1040 
~PDFFileImplDatapdfparse::PDFFileImplData1041     ~PDFFileImplData()
1042     {
1043         if( m_aCipher )
1044             rtl_cipher_destroyARCFOUR( m_aCipher );
1045     }
1046 };
1047 }
1048 
PDFFile()1049 PDFFile::PDFFile()
1050    : PDFContainer(), m_nMajor( 0 ), m_nMinor( 0 )
1051 {
1052 }
1053 
~PDFFile()1054 PDFFile::~PDFFile()
1055 {
1056 }
1057 
isEncrypted() const1058 bool PDFFile::isEncrypted() const
1059 {
1060     return impl_getData()->m_bIsEncrypted;
1061 }
1062 
decrypt(const sal_uInt8 * pInBuffer,sal_uInt32 nLen,sal_uInt8 * pOutBuffer,unsigned int nObject,unsigned int nGeneration) const1063 bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1064                        unsigned int nObject, unsigned int nGeneration ) const
1065 {
1066     if( ! isEncrypted() )
1067         return false;
1068 
1069     if( ! m_pData->m_aCipher )
1070         m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1071 
1072     // modify encryption key
1073     sal_uInt32 i = m_pData->m_nKeyLength;
1074     m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1075     m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1076     m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1077     m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1078     m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1079 
1080     ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
1081                 m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
1082 
1083     if( i > 16 )
1084         i = 16;
1085 
1086     rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1087                                                   rtl_Cipher_DirectionDecode,
1088                                                   aSum.data(), i,
1089                                                   nullptr, 0 );
1090     if( aErr == rtl_Cipher_E_None )
1091         aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1092                                          pInBuffer, nLen,
1093                                          pOutBuffer, nLen );
1094     return aErr == rtl_Cipher_E_None;
1095 }
1096 
1097 const sal_uInt8 nPadString[32] =
1098 {
1099     0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1100     0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1101 };
1102 
pad_or_truncate_to_32(const OString & rStr,char * pBuffer)1103 static void pad_or_truncate_to_32( const OString& rStr, char* pBuffer )
1104 {
1105     int nLen = rStr.getLength();
1106     if( nLen > 32 )
1107         nLen = 32;
1108     const char* pStr = rStr.getStr();
1109     memcpy( pBuffer, pStr, nLen );
1110     int i = 0;
1111     while( nLen < 32 )
1112         pBuffer[nLen++] = nPadString[i++];
1113 }
1114 
1115 // pass at least pData->m_nKeyLength bytes in
password_to_key(const OString & rPwd,sal_uInt8 * pOutKey,PDFFileImplData const * pData,bool bComputeO)1116 static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
1117 {
1118     // see PDF reference 1.4 Algorithm 3.2
1119     // encrypt pad string
1120     char aPadPwd[ENCRYPTION_BUF_LEN];
1121     pad_or_truncate_to_32( rPwd, aPadPwd );
1122     ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1123     aDigest.update(reinterpret_cast<unsigned char const*>(aPadPwd), sizeof(aPadPwd));
1124     if( ! bComputeO )
1125     {
1126         aDigest.update(pData->m_aOEntry, 32);
1127         sal_uInt8 aPEntry[4];
1128         aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1129         aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1130         aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1131         aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1132         aDigest.update(aPEntry, sizeof(aPEntry));
1133         aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1134     }
1135     ::std::vector<unsigned char> nSum(aDigest.finalize());
1136     if( pData->m_nStandardRevision == 3 )
1137     {
1138         for( int i = 0; i < 50; i++ )
1139         {
1140             nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
1141                     ::comphelper::HashType::MD5);
1142         }
1143     }
1144     sal_uInt32 nLen = pData->m_nKeyLength;
1145     if( nLen > RTL_DIGEST_LENGTH_MD5 )
1146         nLen = RTL_DIGEST_LENGTH_MD5;
1147     memcpy( pOutKey, nSum.data(), nLen );
1148     return nLen;
1149 }
1150 
check_user_password(const OString & rPwd,PDFFileImplData * pData)1151 static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1152 {
1153     // see PDF reference 1.4 Algorithm 3.6
1154     bool bValid = false;
1155     sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1156     sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1157     // save (at this time potential) decryption key for later use
1158     memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1159     if( pData->m_nStandardRevision == 2 )
1160     {
1161         sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
1162         // see PDF reference 1.4 Algorithm 3.4
1163         // encrypt pad string
1164         if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1165                                     aKey, nKeyLen,
1166                                     nullptr, 0 )
1167             != rtl_Cipher_E_None)
1168         {
1169             return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1170         }
1171         rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1172                                   nEncryptedEntry, sizeof( nEncryptedEntry ) );
1173         bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1174     }
1175     else if( pData->m_nStandardRevision == 3 )
1176     {
1177         // see PDF reference 1.4 Algorithm 3.5
1178         ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1179         aDigest.update(nPadString, sizeof(nPadString));
1180         aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1181         ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
1182         if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1183                                     aKey, sizeof(aKey), nullptr, 0 )
1184             != rtl_Cipher_E_None)
1185         {
1186             return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1187         }
1188         rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1189                                   nEncryptedEntry.data(), 16,
1190                                   nEncryptedEntry.data(), 16 ); // encrypt in place
1191         for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1192         {
1193             sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1194             for( size_t j = 0; j < sizeof(aTempKey); j++ )
1195                 aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1196 
1197             if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1198                                         aTempKey, sizeof(aTempKey), nullptr, 0 )
1199                 != rtl_Cipher_E_None)
1200             {
1201                 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1202             }
1203             rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1204                                       nEncryptedEntry.data(), 16,
1205                                       nEncryptedEntry.data(), 16 ); // encrypt in place
1206         }
1207         bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
1208     }
1209     return bValid;
1210 }
1211 
usesSupportedEncryptionFormat() const1212 bool PDFFile::usesSupportedEncryptionFormat() const
1213 {
1214     return m_pData->m_bStandardHandler &&
1215         m_pData->m_nAlgoVersion >= 1 &&
1216         m_pData->m_nAlgoVersion <= 2 &&
1217         m_pData->m_nStandardRevision >= 2 &&
1218         m_pData->m_nStandardRevision <= 3;
1219 }
1220 
setupDecryptionData(const OString & rPwd) const1221 bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1222 {
1223     if( !impl_getData()->m_bIsEncrypted )
1224         return rPwd.isEmpty();
1225 
1226     // check if we can handle this encryption at all
1227     if( ! usesSupportedEncryptionFormat() )
1228         return false;
1229 
1230     if( ! m_pData->m_aCipher )
1231         m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1232 
1233     // first try user password
1234     bool bValid = check_user_password( rPwd, m_pData.get() );
1235 
1236     if( ! bValid )
1237     {
1238         // try owner password
1239         // see PDF reference 1.4 Algorithm 3.7
1240         sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1241         sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
1242         sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
1243         if( m_pData->m_nStandardRevision == 2 )
1244         {
1245             if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1246                                         aKey, nKeyLen, nullptr, 0 )
1247                 != rtl_Cipher_E_None)
1248             {
1249                 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1250             }
1251             rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1252                                       m_pData->m_aOEntry, 32,
1253                                       nPwd, 32 );
1254         }
1255         else if( m_pData->m_nStandardRevision == 3 )
1256         {
1257             memcpy( nPwd, m_pData->m_aOEntry, 32 );
1258             for( int i = 19; i >= 0; i-- )
1259             {
1260                 sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1261                 for( size_t j = 0; j < sizeof(nTempKey); j++ )
1262                     nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1263                 if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1264                                             nTempKey, nKeyLen, nullptr, 0 )
1265                     != rtl_Cipher_E_None)
1266                 {
1267                     return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1268                 }
1269                 rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1270                                           nPwd, 32,
1271                                           nPwd, 32 ); // decrypt inplace
1272             }
1273         }
1274         bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
1275     }
1276 
1277     return bValid;
1278 }
1279 
impl_getData() const1280 PDFFileImplData* PDFFile::impl_getData() const
1281 {
1282     if( m_pData )
1283         return m_pData.get();
1284     m_pData.reset( new PDFFileImplData );
1285     // check for encryption dict in a trailer
1286     unsigned int nElements = m_aSubElements.size();
1287     while( nElements-- > 0 )
1288     {
1289         PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
1290         if( pTrailer && pTrailer->m_pDict )
1291         {
1292             // search doc id
1293             PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1294             if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1295             {
1296                 PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1297                 if( pArr && !pArr->m_aSubElements.empty() )
1298                 {
1299                     PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
1300                     if( pStr )
1301                         m_pData->m_aDocID = pStr->getFilteredString();
1302 #if OSL_DEBUG_LEVEL > 0
1303                     OUStringBuffer aTmp;
1304                     for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1305                         aTmp.append(static_cast<sal_Int32>(sal_uInt8(m_pData->m_aDocID[i])), 16);
1306                     SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
1307 #endif
1308                 }
1309             }
1310             // search Encrypt entry
1311             PDFDict::Map::iterator enc =
1312                 pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1313             if( enc != pTrailer->m_pDict->m_aMap.end() )
1314             {
1315                 PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1316                 if( ! pDict )
1317                 {
1318                     PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1319                     if( pRef )
1320                     {
1321                         PDFObject* pObj = findObject( pRef );
1322                         if( pObj && pObj->m_pObject )
1323                             pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1324                     }
1325                 }
1326                 if( pDict )
1327                 {
1328                     PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1329                     PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1330                     PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1331                     PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1332                     PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1333                     PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1334                     PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1335                     if( filter != pDict->m_aMap.end() )
1336                     {
1337                         m_pData->m_bIsEncrypted = true;
1338                         m_pData->m_nKeyLength = 5;
1339                         if( version != pDict->m_aMap.end() )
1340                         {
1341                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1342                             if( pNum )
1343                                 m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1344                         }
1345                         if( m_pData->m_nAlgoVersion >= 3 )
1346                             m_pData->m_nKeyLength = 16;
1347                         if( len != pDict->m_aMap.end() )
1348                         {
1349                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1350                             if( pNum )
1351                                 m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1352                         }
1353                         PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1354                         if( pFilter && pFilter->getFilteredName() == "Standard" )
1355                             m_pData->m_bStandardHandler = true;
1356                         if( o_ent != pDict->m_aMap.end() )
1357                         {
1358                             PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1359                             if( pString )
1360                             {
1361                                 OString aEnt = pString->getFilteredString();
1362                                 if( aEnt.getLength() == 32 )
1363                                     memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1364 #if OSL_DEBUG_LEVEL > 0
1365                                 else
1366                                 {
1367                                     OUStringBuffer aTmp;
1368                                     for( int i = 0; i < aEnt.getLength(); i++ )
1369                                         aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1370                                     SAL_WARN("sdext.pdfimport.pdfparse",
1371                                              "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1372                                 }
1373 #endif
1374                             }
1375                         }
1376                         if( u_ent != pDict->m_aMap.end() )
1377                         {
1378                             PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1379                             if( pString )
1380                             {
1381                                 OString aEnt = pString->getFilteredString();
1382                                 if( aEnt.getLength() == 32 )
1383                                     memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1384 #if OSL_DEBUG_LEVEL > 0
1385                                 else
1386                                 {
1387                                     OUStringBuffer aTmp;
1388                                     for( int i = 0; i < aEnt.getLength(); i++ )
1389                                         aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
1390                                     SAL_WARN("sdext.pdfimport.pdfparse",
1391                                              "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1392                                 }
1393 #endif
1394                             }
1395                         }
1396                         if( r_ent != pDict->m_aMap.end() )
1397                         {
1398                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1399                             if( pNum )
1400                                 m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1401                         }
1402                         if( p_ent != pDict->m_aMap.end() )
1403                         {
1404                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1405                             if( pNum )
1406                                 m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1407                             SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1408                         }
1409 
1410                         SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : OUString("<unknown>")) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
1411                         break;
1412                     }
1413                 }
1414             }
1415         }
1416     }
1417 
1418     return m_pData.get();
1419 }
1420 
emit(EmitContext & rWriteContext) const1421 bool PDFFile::emit( EmitContext& rWriteContext ) const
1422 {
1423     setEmitData(  rWriteContext, new EmitImplData( this ) );
1424 
1425     OString aBuf =
1426         "%PDF-" +
1427         OString::number( sal_Int32( m_nMajor ) ) +
1428         "." +
1429         OString::number( sal_Int32( m_nMinor ) ) +
1430         "\n";
1431     if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1432         return false;
1433     return emitSubElements( rWriteContext );
1434 }
1435 
clone() const1436 PDFEntry* PDFFile::clone() const
1437 {
1438     PDFFile* pNewFl = new PDFFile();
1439     pNewFl->m_nMajor = m_nMajor;
1440     pNewFl->m_nMinor = m_nMinor;
1441     cloneSubElements( pNewFl->m_aSubElements );
1442     return pNewFl;
1443 }
1444 
~PDFPart()1445 PDFPart::~PDFPart()
1446 {
1447 }
1448 
emit(EmitContext & rWriteContext) const1449 bool PDFPart::emit( EmitContext& rWriteContext ) const
1450 {
1451     return emitSubElements( rWriteContext );
1452 }
1453 
clone() const1454 PDFEntry* PDFPart::clone() const
1455 {
1456     PDFPart* pNewPt = new PDFPart();
1457     cloneSubElements( pNewPt->m_aSubElements );
1458     return pNewPt;
1459 }
1460 
1461 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1462