1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <pdfparse.hxx>
22 
23 #include <comphelper/hash.hxx>
24 
25 #include <rtl/strbuf.hxx>
26 #include <rtl/ustring.hxx>
27 #include <rtl/ustrbuf.hxx>
28 #include <rtl/digest.h>
29 #include <rtl/cipher.h>
30 #include <sal/log.hxx>
31 
32 #include <zlib.h>
33 
34 #include <math.h>
35 #include <map>
36 
37 #include <string.h>
38 
39 
40 namespace pdfparse
41 {
42 
43 struct EmitImplData
44 {
45     // xref table: maps object number to a pair of (generation, buffer offset)
46     typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
47     XRefTable m_aXRefTable;
48     // container of all indirect objects (usually a PDFFile*)
49     const PDFContainer* m_pObjectContainer;
50     unsigned int m_nDecryptObject;
51     unsigned int m_nDecryptGeneration;
52 
53     // returns true if the xref table was updated
insertXrefpdfparse::EmitImplData54     bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55     {
56         XRefTable::iterator it = m_aXRefTable.find( nObject );
57         if( it == m_aXRefTable.end() )
58         {
59             // new entry
60             m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61             return true;
62         }
63         // update old entry, if generation number is higher
64         if( it->second.first < nGeneration )
65         {
66             it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67             return true;
68         }
69         return false;
70     }
71 
EmitImplDatapdfparse::EmitImplData72     explicit EmitImplData( const PDFContainer* pTopContainer ) :
73         m_pObjectContainer( pTopContainer ),
74         m_nDecryptObject( 0 ),
75         m_nDecryptGeneration( 0 )
76     {}
decryptpdfparse::EmitImplData77     void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78                   unsigned int nObject, unsigned int nGeneration ) const
79     {
80         const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81         pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82     }
83 
setDecryptObjectpdfparse::EmitImplData84     void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85     {
86         m_nDecryptObject = nObject;
87         m_nDecryptGeneration = nGeneration;
88     }
89 };
90 
91 }
92 
93 using namespace pdfparse;
94 
EmitContext(const PDFContainer * pTop)95 EmitContext::EmitContext( const PDFContainer* pTop ) :
96     m_bDeflate( false ),
97     m_bDecrypt( false )
98 {
99     if( pTop )
100         m_pImplData.reset( new EmitImplData( pTop ) );
101 }
102 
~EmitContext()103 EmitContext::~EmitContext()
104 {
105 }
106 
~PDFEntry()107 PDFEntry::~PDFEntry()
108 {
109 }
110 
getEmitData(EmitContext const & rContext)111 EmitImplData* PDFEntry::getEmitData( EmitContext const & rContext )
112 {
113     return rContext.m_pImplData.get();
114 }
115 
setEmitData(EmitContext & rContext,EmitImplData * pNewEmitData)116 void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
117 {
118     if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
119         rContext.m_pImplData.reset();
120     rContext.m_pImplData.reset( pNewEmitData );
121 }
122 
~PDFValue()123 PDFValue::~PDFValue()
124 {
125 }
126 
~PDFComment()127 PDFComment::~PDFComment()
128 {
129 }
130 
emit(EmitContext & rWriteContext) const131 bool PDFComment::emit( EmitContext& rWriteContext ) const
132 {
133     return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
134 }
135 
clone() const136 PDFEntry* PDFComment::clone() const
137 {
138     return new PDFComment( m_aComment );
139 }
140 
~PDFName()141 PDFName::~PDFName()
142 {
143 }
144 
emit(EmitContext & rWriteContext) const145 bool PDFName::emit( EmitContext& rWriteContext ) const
146 {
147     if( ! rWriteContext.write( " /", 2 ) )
148         return false;
149     return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
150 }
151 
clone() const152 PDFEntry* PDFName::clone() const
153 {
154     return new PDFName( m_aName );
155 }
156 
getFilteredName() const157 OUString PDFName::getFilteredName() const
158 {
159     OStringBuffer aFilter( m_aName.getLength() );
160     const sal_Char* pStr = m_aName.getStr();
161     unsigned int nLen = m_aName.getLength();
162     for( unsigned int i = 0; i < nLen; i++ )
163     {
164         if( (i < nLen - 3) && pStr[i] == '#' )
165         {
166             sal_Char rResult = 0;
167             i++;
168             if( pStr[i] >= '0' && pStr[i] <= '9' )
169                 rResult = sal_Char( pStr[i]-'0' ) << 4;
170             else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
171                 rResult = sal_Char( pStr[i]-'a' + 10 ) << 4;
172             else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
173                 rResult = sal_Char( pStr[i]-'A' + 10 ) << 4;
174             i++;
175             if( pStr[i] >= '0' && pStr[i] <= '9' )
176                 rResult |= sal_Char( pStr[i]-'0' );
177             else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
178                 rResult |= sal_Char( pStr[i]-'a' + 10 );
179             else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
180                 rResult |= sal_Char( pStr[i]-'A' + 10 );
181             aFilter.append( rResult );
182         }
183         else
184             aFilter.append( pStr[i] );
185     }
186     return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
187 }
188 
~PDFString()189 PDFString::~PDFString()
190 {
191 }
192 
emit(EmitContext & rWriteContext) const193 bool PDFString::emit( EmitContext& rWriteContext ) const
194 {
195     if( ! rWriteContext.write( " ", 1 ) )
196         return false;
197     EmitImplData* pEData = getEmitData( rWriteContext );
198     if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
199     {
200         OString aFiltered( getFilteredString() );
201         // decrypt inplace (evil since OString is supposed to be const
202         // however in this case we know that getFilteredString returned a singular string instance
203         pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
204                          reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
205                          pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
206         // check for string or hex string
207         const sal_Char* pStr = aFiltered.getStr();
208         if( aFiltered.getLength() > 1 &&
209            ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
210              (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
211         {
212             static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
213                                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
214             if( ! rWriteContext.write( "<", 1 ) )
215                 return false;
216             for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
217             {
218                 if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
219                     return false;
220                 if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
221                     return false;
222             }
223             if( ! rWriteContext.write( ">", 1 ) )
224                 return false;
225         }
226         else
227         {
228             if( ! rWriteContext.write( "(", 1 ) )
229                 return false;
230             if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
231                 return false;
232             if( ! rWriteContext.write( ")", 1 ) )
233                 return false;
234         }
235         return true;
236     }
237     return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
238 }
239 
clone() const240 PDFEntry* PDFString::clone() const
241 {
242     return new PDFString( m_aString );
243 }
244 
getFilteredString() const245 OString PDFString::getFilteredString() const
246 {
247     int nLen = m_aString.getLength();
248     OStringBuffer aBuf( nLen );
249 
250     const sal_Char* pStr = m_aString.getStr();
251     if( *pStr == '(' )
252     {
253         const sal_Char* pRun = pStr+1;
254         while( pRun - pStr < nLen-1 )
255         {
256             if( *pRun == '\\' )
257             {
258                 pRun++;
259                 if( pRun - pStr < nLen )
260                 {
261                     sal_Char aEsc = 0;
262                     if( *pRun == 'n' )
263                         aEsc = '\n';
264                     else if( *pRun == 'r' )
265                         aEsc = '\r';
266                     else if( *pRun == 't' )
267                         aEsc = '\t';
268                     else if( *pRun == 'b' )
269                         aEsc = '\b';
270                     else if( *pRun == 'f' )
271                         aEsc = '\f';
272                     else if( *pRun == '(' )
273                         aEsc = '(';
274                     else if( *pRun == ')' )
275                         aEsc = ')';
276                     else if( *pRun == '\\' )
277                         aEsc = '\\';
278                     else if( *pRun == '\n' )
279                     {
280                         pRun++;
281                         continue;
282                     }
283                     else if( *pRun == '\r' )
284                     {
285                         pRun++;
286                         if( *pRun == '\n' )
287                             pRun++;
288                         continue;
289                     }
290                     else
291                     {
292                         int i = 0;
293                         while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
294                             aEsc = 8*aEsc + (*pRun++ - '0');
295                         // move pointer back to last character of octal sequence
296                         pRun--;
297                     }
298                     aBuf.append( aEsc );
299                 }
300             }
301             else
302                 aBuf.append( *pRun );
303             // move pointer to next character
304             pRun++;
305         }
306     }
307     else if( *pStr == '<' )
308     {
309         const sal_Char* pRun = pStr+1;
310         while( *pRun != '>' && pRun - pStr < nLen )
311         {
312             sal_Char rResult = 0;
313             if( *pRun >= '0' && *pRun <= '9' )
314                 rResult = sal_Char( ( *pRun-'0' ) << 4 );
315             else if( *pRun >= 'a' && *pRun <= 'f' )
316                 rResult = sal_Char( ( *pRun-'a' + 10 ) << 4 );
317             else if( *pRun >= 'A' && *pRun <= 'F' )
318                 rResult = sal_Char( ( *pRun-'A' + 10 ) << 4 );
319             pRun++;
320             if( *pRun != '>' && pRun - pStr < nLen )
321             {
322                 if( *pRun >= '0' && *pRun <= '9' )
323                     rResult |= sal_Char( *pRun-'0' );
324                 else if( *pRun >= 'a' && *pRun <= 'f' )
325                     rResult |= sal_Char( *pRun-'a' + 10 );
326                 else if( *pRun >= 'A' && *pRun <= 'F' )
327                     rResult |= sal_Char( *pRun-'A' + 10 );
328             }
329             pRun++;
330             aBuf.append( rResult );
331         }
332     }
333 
334     return aBuf.makeStringAndClear();
335 }
336 
~PDFNumber()337 PDFNumber::~PDFNumber()
338 {
339 }
340 
emit(EmitContext & rWriteContext) const341 bool PDFNumber::emit( EmitContext& rWriteContext ) const
342 {
343     OStringBuffer aBuf( 32 );
344     aBuf.append( ' ' );
345 
346     double fValue = m_fValue;
347     bool bNeg = false;
348     int nPrecision = 5;
349     if( fValue < 0.0 )
350     {
351         bNeg = true;
352         fValue=-fValue;
353     }
354 
355     sal_Int64 nInt = static_cast<sal_Int64>(fValue);
356     fValue -= static_cast<double>(nInt);
357     // optimizing hardware may lead to a value of 1.0 after the subtraction
358     if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
359     {
360         nInt++;
361         fValue = 0.0;
362     }
363     sal_Int64 nFrac = 0;
364     if( fValue )
365     {
366         fValue *= pow( 10.0, static_cast<double>(nPrecision) );
367         nFrac = static_cast<sal_Int64>(fValue);
368     }
369     if( bNeg && ( nInt || nFrac ) )
370         aBuf.append( '-' );
371     aBuf.append( nInt );
372     if( nFrac )
373     {
374         int i;
375         aBuf.append( '.' );
376         sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
377         for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
378         {
379             sal_Int64 nNumb = nFrac / nBound;
380             nFrac -= nNumb * nBound;
381             aBuf.append( nNumb );
382             nBound /= 10;
383         }
384     }
385 
386     return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
387 }
388 
clone() const389 PDFEntry* PDFNumber::clone() const
390 {
391     return new PDFNumber( m_fValue );
392 }
393 
394 
~PDFBool()395 PDFBool::~PDFBool()
396 {
397 }
398 
emit(EmitContext & rWriteContext) const399 bool PDFBool::emit( EmitContext& rWriteContext ) const
400 {
401     return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
402 }
403 
clone() const404 PDFEntry* PDFBool::clone() const
405 {
406     return new PDFBool( m_bValue );
407 }
408 
~PDFNull()409 PDFNull::~PDFNull()
410 {
411 }
412 
emit(EmitContext & rWriteContext) const413 bool PDFNull::emit( EmitContext& rWriteContext ) const
414 {
415     return rWriteContext.write( " null", 5 );
416 }
417 
clone() const418 PDFEntry* PDFNull::clone() const
419 {
420     return new PDFNull();
421 }
422 
423 
~PDFObjectRef()424 PDFObjectRef::~PDFObjectRef()
425 {
426 }
427 
emit(EmitContext & rWriteContext) const428 bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
429 {
430     OString aBuf =
431         " " +
432         OString::number( sal_Int32( m_nNumber ) ) +
433         " " +
434         OString::number( sal_Int32( m_nGeneration ) ) +
435         " R";
436     return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
437 }
438 
clone() const439 PDFEntry* PDFObjectRef::clone() const
440 {
441     return new PDFObjectRef( m_nNumber, m_nGeneration );
442 }
443 
~PDFContainer()444 PDFContainer::~PDFContainer()
445 {
446 }
447 
emitSubElements(EmitContext & rWriteContext) const448 bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
449 {
450     int nEle = m_aSubElements.size();
451     for( int i = 0; i < nEle; i++ )
452     {
453         if( rWriteContext.m_bDecrypt )
454         {
455             const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
456             if (pName && pName->m_aName == "Encrypt")
457             {
458                 i++;
459                 continue;
460             }
461         }
462         if( ! m_aSubElements[i]->emit( rWriteContext ) )
463             return false;
464     }
465     return true;
466 }
467 
cloneSubElements(std::vector<std::unique_ptr<PDFEntry>> & rNewSubElements) const468 void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
469 {
470     int nEle = m_aSubElements.size();
471     for( int i = 0; i < nEle; i++ )
472         rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
473 }
474 
findObject(unsigned int nNumber,unsigned int nGeneration) const475 PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
476 {
477     unsigned int nEle = m_aSubElements.size();
478     for( unsigned int i = 0; i < nEle; i++ )
479     {
480         PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
481         if( pObject &&
482             pObject->m_nNumber == nNumber &&
483             pObject->m_nGeneration == nGeneration )
484         {
485             return pObject;
486         }
487     }
488     return nullptr;
489 }
490 
~PDFArray()491 PDFArray::~PDFArray()
492 {
493 }
494 
emit(EmitContext & rWriteContext) const495 bool PDFArray::emit( EmitContext& rWriteContext ) const
496 {
497     if( ! rWriteContext.write( "[", 1 ) )
498         return false;
499     if( ! emitSubElements( rWriteContext ) )
500         return false;
501     return rWriteContext.write( "]", 1 );
502 }
503 
clone() const504 PDFEntry* PDFArray::clone() const
505 {
506     PDFArray* pNewAr = new PDFArray();
507     cloneSubElements( pNewAr->m_aSubElements );
508     return pNewAr;
509 }
510 
~PDFDict()511 PDFDict::~PDFDict()
512 {
513 }
514 
emit(EmitContext & rWriteContext) const515 bool PDFDict::emit( EmitContext& rWriteContext ) const
516 {
517     if( ! rWriteContext.write( "<<\n", 3 ) )
518         return false;
519     if( ! emitSubElements( rWriteContext ) )
520         return false;
521     return rWriteContext.write( "\n>>\n", 4 );
522 }
523 
insertValue(const OString & rName,std::unique_ptr<PDFEntry> pValue)524 void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
525 {
526     if( ! pValue )
527         eraseValue( rName );
528 
529     PDFEntry* pValueTmp = nullptr;
530     std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
531     if( it == m_aMap.end() )
532     {
533         // new name/value, pair, append it
534         m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
535         m_aSubElements.emplace_back( std::move(pValue) );
536         pValueTmp = m_aSubElements.back().get();
537     }
538     else
539     {
540         unsigned int nSub = m_aSubElements.size();
541         bool bFound = false;
542         for( unsigned int i = 0; i < nSub && !bFound; i++ )
543             if( m_aSubElements[i].get() == it->second )
544             {
545                 m_aSubElements[i] = std::move(pValue);
546                 pValueTmp = m_aSubElements[i].get();
547                 bFound = true;
548                 break;
549             }
550     }
551     assert(pValueTmp);
552     m_aMap[ rName ] = pValueTmp;
553 }
554 
eraseValue(const OString & rName)555 void PDFDict::eraseValue( const OString& rName )
556 {
557     unsigned int nEle = m_aSubElements.size();
558     for( unsigned int i = 0; i < nEle; i++ )
559     {
560         PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
561         if( pName && pName->m_aName == rName )
562         {
563             for( unsigned int j = i+1; j < nEle; j++ )
564             {
565                 if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
566                 {
567                     // remove and free subelements from vector
568                     m_aSubElements.erase( m_aSubElements.begin()+j );
569                     m_aSubElements.erase( m_aSubElements.begin()+i );
570                     buildMap();
571                     return;
572                 }
573             }
574         }
575     }
576 }
577 
buildMap()578 PDFEntry* PDFDict::buildMap()
579 {
580     // clear map
581     m_aMap.clear();
582     // build map
583     unsigned int nEle = m_aSubElements.size();
584     PDFName* pName = nullptr;
585     for( unsigned int i = 0; i < nEle; i++ )
586     {
587         if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
588         {
589             if( pName )
590             {
591                 m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
592                 pName = nullptr;
593             }
594             else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
595                 return m_aSubElements[i].get();
596         }
597     }
598     return pName;
599 }
600 
clone() const601 PDFEntry* PDFDict::clone() const
602 {
603     PDFDict* pNewDict = new PDFDict();
604     cloneSubElements( pNewDict->m_aSubElements );
605     pNewDict->buildMap();
606     return pNewDict;
607 }
608 
~PDFStream()609 PDFStream::~PDFStream()
610 {
611 }
612 
emit(EmitContext & rWriteContext) const613 bool PDFStream::emit( EmitContext& rWriteContext ) const
614 {
615     return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
616 }
617 
clone() const618 PDFEntry* PDFStream::clone() const
619 {
620     return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
621 }
622 
getDictLength(const PDFContainer * pContainer) const623 unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
624 {
625     if( ! m_pDict )
626         return 0;
627     // find /Length entry, can either be a direct or indirect number object
628     std::unordered_map<OString,PDFEntry*>::const_iterator it =
629         m_pDict->m_aMap.find( "Length" );
630     if( it == m_pDict->m_aMap.end() )
631         return 0;
632     PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
633     if( ! pNum && pContainer )
634     {
635         PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
636         if( pRef )
637         {
638             int nEle = pContainer->m_aSubElements.size();
639             for (int i = 0; i < nEle; i++)
640             {
641                 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
642                 if( pObj &&
643                     pObj->m_nNumber == pRef->m_nNumber &&
644                     pObj->m_nGeneration == pRef->m_nGeneration )
645                 {
646                     if( pObj->m_pObject )
647                         pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
648                     break;
649                 }
650             }
651         }
652     }
653     return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
654 }
655 
~PDFObject()656 PDFObject::~PDFObject()
657 {
658 }
659 
getDeflatedStream(std::unique_ptr<char[]> & rpStream,unsigned int * pBytes,const PDFContainer * pObjectContainer,EmitContext & rContext) const660 bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
661 {
662     bool bIsDeflated = false;
663     if( m_pStream && m_pStream->m_pDict &&
664         m_pStream->m_nEndOffset > m_pStream->m_nBeginOffset+15
665         )
666     {
667         unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
668         rpStream.reset(new char[ nOuterStreamLen ]);
669         unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
670         if( nRead != nOuterStreamLen )
671         {
672             rpStream.reset();
673             *pBytes = 0;
674             return false;
675         }
676         // is there a filter entry ?
677         std::unordered_map<OString,PDFEntry*>::const_iterator it =
678             m_pStream->m_pDict->m_aMap.find( "Filter" );
679         if( it != m_pStream->m_pDict->m_aMap.end() )
680         {
681             PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
682             if( ! pFilter )
683             {
684                 PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
685                 if( pArray && ! pArray->m_aSubElements.empty() )
686                 {
687                     pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
688                 }
689             }
690 
691             // is the (first) filter FlateDecode ?
692             if (pFilter && pFilter->m_aName == "FlateDecode")
693             {
694                 bIsDeflated = true;
695             }
696         }
697         // prepare compressed data section
698         char* pStream = rpStream.get();
699         if( pStream[0] == 's' )
700             pStream += 6; // skip "stream"
701         // skip line end after "stream"
702         while( *pStream == '\r' || *pStream == '\n' )
703             pStream++;
704         // get the compressed length
705         *pBytes = m_pStream->getDictLength( pObjectContainer );
706         if( pStream != rpStream.get() )
707             memmove( rpStream.get(), pStream, *pBytes );
708         if( rContext.m_bDecrypt )
709         {
710             EmitImplData* pEData = getEmitData( rContext );
711             pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
712                              *pBytes,
713                              reinterpret_cast<sal_uInt8*>(rpStream.get()),
714                              m_nNumber,
715                              m_nGeneration
716                              ); // decrypt inplace
717         }
718     }
719     else
720     {
721         *pBytes = 0;
722     }
723     return bIsDeflated;
724 }
725 
unzipToBuffer(char * pBegin,unsigned int nLen,sal_uInt8 ** pOutBuf,sal_uInt32 * pOutLen)726 static void unzipToBuffer( char* pBegin, unsigned int nLen,
727                            sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
728 {
729     z_stream aZStr;
730     aZStr.next_in       = reinterpret_cast<Bytef *>(pBegin);
731     aZStr.avail_in      = nLen;
732     aZStr.zalloc        = nullptr;
733     aZStr.zfree         = nullptr;
734     aZStr.opaque        = nullptr;
735 
736     int err = inflateInit(&aZStr);
737 
738     const unsigned int buf_increment_size = 16384;
739 
740     if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
741     {
742         *pOutBuf = p;
743         aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
744         aZStr.avail_out = buf_increment_size;
745         *pOutLen = buf_increment_size;
746     }
747     else
748         err = Z_MEM_ERROR;
749     while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
750     {
751         err = inflate( &aZStr, Z_NO_FLUSH );
752         if( aZStr.avail_out == 0 )
753         {
754             if( err != Z_STREAM_END )
755             {
756                 const int nNewAlloc = *pOutLen + buf_increment_size;
757                 if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
758                 {
759                     *pOutBuf = p;
760                     aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
761                     aZStr.avail_out = buf_increment_size;
762                     *pOutLen = nNewAlloc;
763                 }
764                 else
765                     err = Z_MEM_ERROR;
766             }
767         }
768     }
769     if( err == Z_STREAM_END )
770     {
771         if( aZStr.avail_out > 0 )
772             *pOutLen -= aZStr.avail_out;
773     }
774     inflateEnd(&aZStr);
775     if( err < Z_OK )
776     {
777         std::free( *pOutBuf );
778         *pOutBuf = nullptr;
779         *pOutLen = 0;
780     }
781 }
782 
writeStream(EmitContext & rWriteContext,const PDFFile * pParsedFile) const783 void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
784 {
785     if( !m_pStream )
786         return;
787 
788     std::unique_ptr<char[]> pStream;
789     unsigned int nBytes = 0;
790     if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
791     {
792         sal_uInt8* pOutBytes = nullptr;
793         sal_uInt32 nOutBytes = 0;
794         unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
795         rWriteContext.write( pOutBytes, nOutBytes );
796         std::free( pOutBytes );
797     }
798     else if( pStream && nBytes )
799         rWriteContext.write( pStream.get(), nBytes );
800 }
801 
emit(EmitContext & rWriteContext) const802 bool PDFObject::emit( EmitContext& rWriteContext ) const
803 {
804     if( ! rWriteContext.write( "\n", 1 ) )
805         return false;
806 
807     EmitImplData* pEData = getEmitData( rWriteContext );
808     if( pEData )
809         pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
810 
811     OString aBuf =
812         OString::number( sal_Int32( m_nNumber ) ) +
813         " " +
814         OString::number( sal_Int32( m_nGeneration ) ) +
815         " obj\n";
816     if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
817         return false;
818 
819     if( pEData )
820         pEData->setDecryptObject( m_nNumber, m_nGeneration );
821     if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
822     {
823         std::unique_ptr<char[]> pStream;
824         unsigned int nBytes = 0;
825         bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
826         if( pStream && nBytes )
827         {
828             // unzip the stream
829             sal_uInt8* pOutBytes = nullptr;
830             sal_uInt32 nOutBytes = 0;
831             if( bDeflate && rWriteContext.m_bDeflate )
832                 unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
833             else
834             {
835                 // nothing to deflate, but decryption has happened
836                 pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
837                 nOutBytes = static_cast<sal_uInt32>(nBytes);
838             }
839 
840             if( nOutBytes )
841             {
842                 // clone this object
843                 std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
844                 // set length in the dictionary to new stream length
845                 std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
846                 pClone->m_pStream->m_pDict->insertValue( "Length", std::move(pNewLen) );
847 
848                 if( bDeflate && rWriteContext.m_bDeflate )
849                 {
850                     // delete flatedecode filter
851                     std::unordered_map<OString,PDFEntry*>::const_iterator it =
852                     pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
853                     if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
854                     {
855                         PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
856                         if (pFilter && pFilter->m_aName == "FlateDecode")
857                             pClone->m_pStream->m_pDict->eraseValue( "Filter" );
858                         else
859                         {
860                             PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
861                             if( pArray && ! pArray->m_aSubElements.empty() )
862                             {
863                                 pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
864                                 if (pFilter && pFilter->m_aName == "FlateDecode")
865                                 {
866                                     pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
867                                 }
868                             }
869                         }
870                     }
871                 }
872 
873                 // write sub elements except stream
874                 bool bRet = true;
875                 unsigned int nEle = pClone->m_aSubElements.size();
876                 for( unsigned int i = 0; i < nEle && bRet; i++ )
877                 {
878                     if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
879                         bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
880                 }
881                 pClone.reset();
882                 // write stream
883                 if( bRet )
884                     bRet = rWriteContext.write("stream\n", 7)
885                            && rWriteContext.write(pOutBytes, nOutBytes)
886                            && rWriteContext.write("\nendstream\nendobj\n", 18);
887                 if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
888                     std::free( pOutBytes );
889                 pEData->setDecryptObject( 0, 0 );
890                 return bRet;
891             }
892             if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
893                 std::free( pOutBytes );
894         }
895     }
896 
897     bool bRet = emitSubElements( rWriteContext ) &&
898                 rWriteContext.write( "\nendobj\n", 8 );
899     if( pEData )
900         pEData->setDecryptObject( 0, 0 );
901     return bRet;
902 }
903 
clone() const904 PDFEntry* PDFObject::clone() const
905 {
906     PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
907     cloneSubElements( pNewOb->m_aSubElements );
908     unsigned int nEle = m_aSubElements.size();
909     for( unsigned int i = 0; i < nEle; i++ )
910     {
911         if( m_aSubElements[i].get() == m_pObject )
912             pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
913         else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
914         {
915             pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
916             PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
917             if (pNewDict && pNewOb->m_pStream)
918                 pNewOb->m_pStream->m_pDict = pNewDict;
919         }
920     }
921     return pNewOb;
922 }
923 
~PDFTrailer()924 PDFTrailer::~PDFTrailer()
925 {
926 }
927 
emit(EmitContext & rWriteContext) const928 bool PDFTrailer::emit( EmitContext& rWriteContext ) const
929 {
930     // get xref offset
931     unsigned int nXRefPos = rWriteContext.getCurPos();
932     // begin xref section, object 0 is always free
933     if( ! rWriteContext.write( "xref\r\n"
934                                "0 1\r\n"
935                                "0000000000 65535 f\r\n", 31 ) )
936         return false;
937     // check if we are emitting a complete PDF file
938     EmitImplData* pEData = getEmitData( rWriteContext );
939     if( pEData )
940     {
941         // emit object xrefs
942         const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
943         EmitImplData::XRefTable::const_iterator section_begin, section_end;
944         section_begin = rXRefs.begin();
945         while( section_begin != rXRefs.end() )
946         {
947             // find end of continuous object numbers
948             section_end = section_begin;
949             unsigned int nLast = section_begin->first;
950             while( (++section_end) != rXRefs.end() &&
951                    section_end->first == nLast+1 )
952                 nLast = section_end->first;
953             // write first object number and number of following entries
954             OStringBuffer aBuf( 21 );
955             aBuf.append( sal_Int32( section_begin->first ) );
956             aBuf.append( ' ' );
957             aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
958             aBuf.append( "\r\n" );
959             if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
960                 return false;
961             while( section_begin != section_end )
962             {
963                 // write 20 char entry of form
964                 // 0000offset 00gen n\r\n
965                 aBuf.setLength( 0 );
966                 OString aOffset( OString::number( section_begin->second.second ) );
967                 int nPad = 10 - aOffset.getLength();
968                 for( int i = 0; i < nPad; i++ )
969                     aBuf.append( '0' );
970                 aBuf.append( aOffset );
971                 aBuf.append( ' ' );
972                 OString aGeneration( OString::number( section_begin->second.first ) );
973                 nPad = 5 - aGeneration.getLength();
974                 for( int i = 0; i < nPad; i++ )
975                     aBuf.append( '0' );
976                 aBuf.append( aGeneration );
977                 aBuf.append( " n\r\n" );
978                 if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
979                     return false;
980                 ++section_begin;
981             }
982         }
983     }
984     if( ! rWriteContext.write( "trailer\n", 8 ) )
985         return false;
986     if( ! emitSubElements( rWriteContext ) )
987         return false;
988     if( ! rWriteContext.write( "startxref\n", 10 ) )
989         return false;
990     OString aOffset( OString::number( nXRefPos ) );
991     if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
992         return false;
993     return rWriteContext.write( "\n%%EOF\n", 7 );
994 }
995 
clone() const996 PDFEntry* PDFTrailer::clone() const
997 {
998     PDFTrailer* pNewTr = new PDFTrailer();
999     cloneSubElements( pNewTr->m_aSubElements );
1000     unsigned int nEle = m_aSubElements.size();
1001     for( unsigned int i = 0; i < nEle; i++ )
1002     {
1003         if( m_aSubElements[i].get() == m_pDict )
1004         {
1005             pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
1006             break;
1007         }
1008     }
1009     return pNewTr;
1010 }
1011 
1012 #define ENCRYPTION_KEY_LEN 16
1013 #define ENCRYPTION_BUF_LEN 32
1014 
1015 namespace pdfparse {
1016 struct PDFFileImplData
1017 {
1018     bool        m_bIsEncrypted;
1019     bool        m_bStandardHandler;
1020     sal_uInt32  m_nAlgoVersion;
1021     sal_uInt32  m_nStandardRevision;
1022     sal_uInt32  m_nKeyLength;
1023     sal_uInt8   m_aOEntry[32] = {};
1024     sal_uInt8   m_aUEntry[32] = {};
1025     sal_uInt32  m_nPEntry;
1026     OString     m_aDocID;
1027     rtlCipher   m_aCipher;
1028 
1029     sal_uInt8   m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
1030 
PDFFileImplDatapdfparse::PDFFileImplData1031     PDFFileImplData() :
1032         m_bIsEncrypted( false ),
1033         m_bStandardHandler( false ),
1034         m_nAlgoVersion( 0 ),
1035         m_nStandardRevision( 0 ),
1036         m_nKeyLength( 0 ),
1037         m_nPEntry( 0 ),
1038         m_aCipher( nullptr )
1039     {
1040     }
1041 
~PDFFileImplDatapdfparse::PDFFileImplData1042     ~PDFFileImplData()
1043     {
1044         if( m_aCipher )
1045             rtl_cipher_destroyARCFOUR( m_aCipher );
1046     }
1047 };
1048 }
1049 
PDFFile()1050 PDFFile::PDFFile()
1051    : PDFContainer(), m_nMajor( 0 ), m_nMinor( 0 )
1052 {
1053 }
1054 
~PDFFile()1055 PDFFile::~PDFFile()
1056 {
1057 }
1058 
isEncrypted() const1059 bool PDFFile::isEncrypted() const
1060 {
1061     return impl_getData()->m_bIsEncrypted;
1062 }
1063 
decrypt(const sal_uInt8 * pInBuffer,sal_uInt32 nLen,sal_uInt8 * pOutBuffer,unsigned int nObject,unsigned int nGeneration) const1064 bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1065                        unsigned int nObject, unsigned int nGeneration ) const
1066 {
1067     if( ! isEncrypted() )
1068         return false;
1069 
1070     if( ! m_pData->m_aCipher )
1071         m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1072 
1073     // modify encryption key
1074     sal_uInt32 i = m_pData->m_nKeyLength;
1075     m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1076     m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1077     m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1078     m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1079     m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1080 
1081     ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
1082                 m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
1083 
1084     if( i > 16 )
1085         i = 16;
1086 
1087     rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1088                                                   rtl_Cipher_DirectionDecode,
1089                                                   aSum.data(), i,
1090                                                   nullptr, 0 );
1091     if( aErr == rtl_Cipher_E_None )
1092         aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1093                                          pInBuffer, nLen,
1094                                          pOutBuffer, nLen );
1095     return aErr == rtl_Cipher_E_None;
1096 }
1097 
1098 static const sal_uInt8 nPadString[32] =
1099 {
1100     0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1101     0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1102 };
1103 
pad_or_truncate_to_32(const OString & rStr,sal_Char * pBuffer)1104 static void pad_or_truncate_to_32( const OString& rStr, sal_Char* pBuffer )
1105 {
1106     int nLen = rStr.getLength();
1107     if( nLen > 32 )
1108         nLen = 32;
1109     const sal_Char* pStr = rStr.getStr();
1110     memcpy( pBuffer, pStr, nLen );
1111     int i = 0;
1112     while( nLen < 32 )
1113         pBuffer[nLen++] = nPadString[i++];
1114 }
1115 
1116 // pass at least pData->m_nKeyLength bytes in
password_to_key(const OString & rPwd,sal_uInt8 * pOutKey,PDFFileImplData const * pData,bool bComputeO)1117 static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
1118 {
1119     // see PDF reference 1.4 Algorithm 3.2
1120     // encrypt pad string
1121     sal_Char aPadPwd[ENCRYPTION_BUF_LEN];
1122     pad_or_truncate_to_32( rPwd, aPadPwd );
1123     ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1124     aDigest.update(reinterpret_cast<unsigned char const*>(aPadPwd), sizeof(aPadPwd));
1125     if( ! bComputeO )
1126     {
1127         aDigest.update(pData->m_aOEntry, 32);
1128         sal_uInt8 aPEntry[4];
1129         aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1130         aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1131         aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1132         aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1133         aDigest.update(aPEntry, sizeof(aPEntry));
1134         aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1135     }
1136     ::std::vector<unsigned char> nSum(aDigest.finalize());
1137     if( pData->m_nStandardRevision == 3 )
1138     {
1139         for( int i = 0; i < 50; i++ )
1140         {
1141             nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
1142                     ::comphelper::HashType::MD5);
1143         }
1144     }
1145     sal_uInt32 nLen = pData->m_nKeyLength;
1146     if( nLen > RTL_DIGEST_LENGTH_MD5 )
1147         nLen = RTL_DIGEST_LENGTH_MD5;
1148     memcpy( pOutKey, nSum.data(), nLen );
1149     return nLen;
1150 }
1151 
check_user_password(const OString & rPwd,PDFFileImplData * pData)1152 static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1153 {
1154     // see PDF reference 1.4 Algorithm 3.6
1155     bool bValid = false;
1156     sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1157     sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1158     // save (at this time potential) decryption key for later use
1159     memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1160     if( pData->m_nStandardRevision == 2 )
1161     {
1162         sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
1163         // see PDF reference 1.4 Algorithm 3.4
1164         // encrypt pad string
1165         if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1166                                     aKey, nKeyLen,
1167                                     nullptr, 0 )
1168             != rtl_Cipher_E_None)
1169         {
1170             return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1171         }
1172         rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1173                                   nEncryptedEntry, sizeof( nEncryptedEntry ) );
1174         bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1175     }
1176     else if( pData->m_nStandardRevision == 3 )
1177     {
1178         // see PDF reference 1.4 Algorithm 3.5
1179         ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
1180         aDigest.update(nPadString, sizeof(nPadString));
1181         aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
1182         ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
1183         if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1184                                     aKey, sizeof(aKey), nullptr, 0 )
1185             != rtl_Cipher_E_None)
1186         {
1187             return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1188         }
1189         rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1190                                   nEncryptedEntry.data(), 16,
1191                                   nEncryptedEntry.data(), 16 ); // encrypt in place
1192         for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1193         {
1194             sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1195             for( size_t j = 0; j < sizeof(aTempKey); j++ )
1196                 aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1197 
1198             if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1199                                         aTempKey, sizeof(aTempKey), nullptr, 0 )
1200                 != rtl_Cipher_E_None)
1201             {
1202                 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1203             }
1204             rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1205                                       nEncryptedEntry.data(), 16,
1206                                       nEncryptedEntry.data(), 16 ); // encrypt in place
1207         }
1208         bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
1209     }
1210     return bValid;
1211 }
1212 
usesSupportedEncryptionFormat() const1213 bool PDFFile::usesSupportedEncryptionFormat() const
1214 {
1215     return m_pData->m_bStandardHandler &&
1216         m_pData->m_nAlgoVersion >= 1 &&
1217         m_pData->m_nAlgoVersion <= 2 &&
1218         m_pData->m_nStandardRevision >= 2 &&
1219         m_pData->m_nStandardRevision <= 3;
1220 }
1221 
setupDecryptionData(const OString & rPwd) const1222 bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1223 {
1224     if( !impl_getData()->m_bIsEncrypted )
1225         return rPwd.isEmpty();
1226 
1227     // check if we can handle this encryption at all
1228     if( ! usesSupportedEncryptionFormat() )
1229         return false;
1230 
1231     if( ! m_pData->m_aCipher )
1232         m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1233 
1234     // first try user password
1235     bool bValid = check_user_password( rPwd, m_pData.get() );
1236 
1237     if( ! bValid )
1238     {
1239         // try owner password
1240         // see PDF reference 1.4 Algorithm 3.7
1241         sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1242         sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
1243         sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
1244         if( m_pData->m_nStandardRevision == 2 )
1245         {
1246             if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1247                                         aKey, nKeyLen, nullptr, 0 )
1248                 != rtl_Cipher_E_None)
1249             {
1250                 return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1251             }
1252             rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1253                                       m_pData->m_aOEntry, 32,
1254                                       nPwd, 32 );
1255         }
1256         else if( m_pData->m_nStandardRevision == 3 )
1257         {
1258             memcpy( nPwd, m_pData->m_aOEntry, 32 );
1259             for( int i = 19; i >= 0; i-- )
1260             {
1261                 sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1262                 for( size_t j = 0; j < sizeof(nTempKey); j++ )
1263                     nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1264                 if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1265                                             nTempKey, nKeyLen, nullptr, 0 )
1266                     != rtl_Cipher_E_None)
1267                 {
1268                     return false; //TODO: differentiate "failed to decrypt" from "wrong password"
1269                 }
1270                 rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1271                                           nPwd, 32,
1272                                           nPwd, 32 ); // decrypt inplace
1273             }
1274         }
1275         bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
1276     }
1277 
1278     return bValid;
1279 }
1280 
impl_getData() const1281 PDFFileImplData* PDFFile::impl_getData() const
1282 {
1283     if( m_pData )
1284         return m_pData.get();
1285     m_pData.reset( new PDFFileImplData );
1286     // check for encryption dict in a trailer
1287     unsigned int nElements = m_aSubElements.size();
1288     while( nElements-- > 0 )
1289     {
1290         PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
1291         if( pTrailer && pTrailer->m_pDict )
1292         {
1293             // search doc id
1294             PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1295             if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1296             {
1297                 PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1298                 if( pArr && !pArr->m_aSubElements.empty() )
1299                 {
1300                     PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
1301                     if( pStr )
1302                         m_pData->m_aDocID = pStr->getFilteredString();
1303 #if OSL_DEBUG_LEVEL > 0
1304                     OUStringBuffer aTmp;
1305                     for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1306                         aTmp.append(OUString::number(static_cast<unsigned int>(sal_uInt8(m_pData->m_aDocID[i])), 16));
1307                     SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
1308 #endif
1309                 }
1310             }
1311             // search Encrypt entry
1312             PDFDict::Map::iterator enc =
1313                 pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1314             if( enc != pTrailer->m_pDict->m_aMap.end() )
1315             {
1316                 PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1317                 if( ! pDict )
1318                 {
1319                     PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1320                     if( pRef )
1321                     {
1322                         PDFObject* pObj = findObject( pRef );
1323                         if( pObj && pObj->m_pObject )
1324                             pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1325                     }
1326                 }
1327                 if( pDict )
1328                 {
1329                     PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1330                     PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1331                     PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1332                     PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1333                     PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1334                     PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1335                     PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1336                     if( filter != pDict->m_aMap.end() )
1337                     {
1338                         m_pData->m_bIsEncrypted = true;
1339                         m_pData->m_nKeyLength = 5;
1340                         if( version != pDict->m_aMap.end() )
1341                         {
1342                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1343                             if( pNum )
1344                                 m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1345                         }
1346                         if( m_pData->m_nAlgoVersion >= 3 )
1347                             m_pData->m_nKeyLength = 16;
1348                         if( len != pDict->m_aMap.end() )
1349                         {
1350                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1351                             if( pNum )
1352                                 m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1353                         }
1354                         PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1355                         if( pFilter && pFilter->getFilteredName() == "Standard" )
1356                             m_pData->m_bStandardHandler = true;
1357                         if( o_ent != pDict->m_aMap.end() )
1358                         {
1359                             PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1360                             if( pString )
1361                             {
1362                                 OString aEnt = pString->getFilteredString();
1363                                 if( aEnt.getLength() == 32 )
1364                                     memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1365 #if OSL_DEBUG_LEVEL > 0
1366                                 else
1367                                 {
1368                                     OUStringBuffer aTmp;
1369                                     for( int i = 0; i < aEnt.getLength(); i++ )
1370                                         aTmp.append(" ").append(OUString::number(static_cast<unsigned int>(sal_uInt8(aEnt[i])), 16));
1371                                     SAL_WARN("sdext.pdfimport.pdfparse",
1372                                              "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1373                                 }
1374 #endif
1375                             }
1376                         }
1377                         if( u_ent != pDict->m_aMap.end() )
1378                         {
1379                             PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1380                             if( pString )
1381                             {
1382                                 OString aEnt = pString->getFilteredString();
1383                                 if( aEnt.getLength() == 32 )
1384                                     memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1385 #if OSL_DEBUG_LEVEL > 0
1386                                 else
1387                                 {
1388                                     OUStringBuffer aTmp;
1389                                     for( int i = 0; i < aEnt.getLength(); i++ )
1390                                         aTmp.append(" ").append(OUString::number(static_cast<unsigned int>(sal_uInt8(aEnt[i])), 16));
1391                                     SAL_WARN("sdext.pdfimport.pdfparse",
1392                                              "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
1393                                 }
1394 #endif
1395                             }
1396                         }
1397                         if( r_ent != pDict->m_aMap.end() )
1398                         {
1399                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1400                             if( pNum )
1401                                 m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1402                         }
1403                         if( p_ent != pDict->m_aMap.end() )
1404                         {
1405                             PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1406                             if( pNum )
1407                                 m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1408                             SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1409                         }
1410 
1411                         SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : OUString("<unknown>")) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
1412                         break;
1413                     }
1414                 }
1415             }
1416         }
1417     }
1418 
1419     return m_pData.get();
1420 }
1421 
emit(EmitContext & rWriteContext) const1422 bool PDFFile::emit( EmitContext& rWriteContext ) const
1423 {
1424     setEmitData(  rWriteContext, new EmitImplData( this ) );
1425 
1426     OString aBuf =
1427         "%PDF-" +
1428         OString::number( sal_Int32( m_nMajor ) ) +
1429         "." +
1430         OString::number( sal_Int32( m_nMinor ) ) +
1431         "\n";
1432     if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1433         return false;
1434     return emitSubElements( rWriteContext );
1435 }
1436 
clone() const1437 PDFEntry* PDFFile::clone() const
1438 {
1439     PDFFile* pNewFl = new PDFFile();
1440     pNewFl->m_nMajor = m_nMajor;
1441     pNewFl->m_nMinor = m_nMinor;
1442     cloneSubElements( pNewFl->m_aSubElements );
1443     return pNewFl;
1444 }
1445 
~PDFPart()1446 PDFPart::~PDFPart()
1447 {
1448 }
1449 
emit(EmitContext & rWriteContext) const1450 bool PDFPart::emit( EmitContext& rWriteContext ) const
1451 {
1452     return emitSubElements( rWriteContext );
1453 }
1454 
clone() const1455 PDFEntry* PDFPart::clone() const
1456 {
1457     PDFPart* pNewPt = new PDFPart();
1458     cloneSubElements( pNewPt->m_aSubElements );
1459     return pNewPt;
1460 }
1461 
1462 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1463