1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8 
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12 
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_reference.h"
20 #include "core/fpdfapi/parser/cpdf_security_handler.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
24 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
25 #include "core/fxcrt/fx_ext.h"
26 #include "core/fxcrt/fx_safe_types.h"
27 #include "third_party/base/ptr_util.h"
28 #include "third_party/base/stl_util.h"
29 
30 namespace {
31 
32 // A limit on the size of the xref table. Theoretical limits are higher, but
33 // this may be large enough in practice.
34 const int32_t kMaxXRefSize = 1048576;
35 
GetVarInt(const uint8_t * p,int32_t n)36 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
37   uint32_t result = 0;
38   for (int32_t i = 0; i < n; ++i)
39     result = result * 256 + p[i];
40   return result;
41 }
42 
GetStreamNCount(CPDF_StreamAcc * pObjStream)43 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
44   return pObjStream->GetDict()->GetIntegerFor("N");
45 }
46 
GetStreamFirst(CPDF_StreamAcc * pObjStream)47 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
48   return pObjStream->GetDict()->GetIntegerFor("First");
49 }
50 
51 }  // namespace
52 
CPDF_Parser()53 CPDF_Parser::CPDF_Parser()
54     : m_pDocument(nullptr),
55       m_bHasParsed(false),
56       m_bXRefStream(false),
57       m_bVersionUpdated(false),
58       m_FileVersion(0),
59       m_pEncryptDict(nullptr),
60       m_dwXrefStartObjNum(0) {
61   m_pSyntax = pdfium::MakeUnique<CPDF_SyntaxParser>();
62 }
63 
~CPDF_Parser()64 CPDF_Parser::~CPDF_Parser() {
65   ReleaseEncryptHandler();
66   SetEncryptDictionary(nullptr);
67 }
68 
GetLastObjNum() const69 uint32_t CPDF_Parser::GetLastObjNum() const {
70   return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
71 }
72 
IsValidObjectNumber(uint32_t objnum) const73 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
74   return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
75 }
76 
GetObjectPositionOrZero(uint32_t objnum) const77 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
78   auto it = m_ObjectInfo.find(objnum);
79   return it != m_ObjectInfo.end() ? it->second.pos : 0;
80 }
81 
GetObjectType(uint32_t objnum) const82 uint8_t CPDF_Parser::GetObjectType(uint32_t objnum) const {
83   ASSERT(IsValidObjectNumber(objnum));
84   auto it = m_ObjectInfo.find(objnum);
85   return it != m_ObjectInfo.end() ? it->second.type : 0;
86 }
87 
GetObjectGenNum(uint32_t objnum) const88 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const {
89   ASSERT(IsValidObjectNumber(objnum));
90   auto it = m_ObjectInfo.find(objnum);
91   return it != m_ObjectInfo.end() ? it->second.gennum : 0;
92 }
93 
IsObjectFreeOrNull(uint32_t objnum) const94 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
95   uint8_t type = GetObjectType(objnum);
96   return type == 0 || type == 255;
97 }
98 
SetEncryptDictionary(CPDF_Dictionary * pDict)99 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
100   m_pEncryptDict = pDict;
101 }
102 
GetCryptoHandler()103 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
104   return m_pSyntax->m_pCryptoHandler.get();
105 }
106 
GetFileAccess() const107 CFX_RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const {
108   return m_pSyntax->m_pFileAccess;
109 }
110 
ShrinkObjectMap(uint32_t objnum)111 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
112   if (objnum == 0) {
113     m_ObjectInfo.clear();
114     return;
115   }
116 
117   auto it = m_ObjectInfo.lower_bound(objnum);
118   while (it != m_ObjectInfo.end()) {
119     auto saved_it = it++;
120     m_ObjectInfo.erase(saved_it);
121   }
122 
123   if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
124     m_ObjectInfo[objnum - 1].pos = 0;
125 }
126 
StartParse(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)127 CPDF_Parser::Error CPDF_Parser::StartParse(
128     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
129     CPDF_Document* pDocument) {
130   ASSERT(!m_bHasParsed);
131   m_bHasParsed = true;
132   m_bXRefStream = false;
133   m_LastXRefOffset = 0;
134 
135   int32_t offset = GetHeaderOffset(pFileAccess);
136   if (offset == -1)
137     return FORMAT_ERROR;
138 
139   m_pSyntax->InitParser(pFileAccess, offset);
140 
141   uint8_t ch;
142   if (!m_pSyntax->GetCharAt(5, ch))
143     return FORMAT_ERROR;
144 
145   if (std::isdigit(ch))
146     m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
147 
148   if (!m_pSyntax->GetCharAt(7, ch))
149     return FORMAT_ERROR;
150 
151   if (std::isdigit(ch))
152     m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
153 
154   if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
155     return FORMAT_ERROR;
156 
157   m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
158   m_pDocument = pDocument;
159 
160   bool bXRefRebuilt = false;
161   if (m_pSyntax->SearchWord("startxref", true, false, 4096)) {
162     m_SortedOffset.insert(m_pSyntax->SavePos());
163     m_pSyntax->GetKeyword();
164 
165     bool bNumber;
166     CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
167     if (!bNumber)
168       return FORMAT_ERROR;
169 
170     m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
171     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
172         !LoadAllCrossRefV5(m_LastXRefOffset)) {
173       if (!RebuildCrossRef())
174         return FORMAT_ERROR;
175 
176       bXRefRebuilt = true;
177       m_LastXRefOffset = 0;
178     }
179   } else {
180     if (!RebuildCrossRef())
181       return FORMAT_ERROR;
182 
183     bXRefRebuilt = true;
184   }
185   Error eRet = SetEncryptHandler();
186   if (eRet != SUCCESS)
187     return eRet;
188 
189   m_pDocument->LoadDoc();
190   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
191     if (bXRefRebuilt)
192       return FORMAT_ERROR;
193 
194     ReleaseEncryptHandler();
195     if (!RebuildCrossRef())
196       return FORMAT_ERROR;
197 
198     eRet = SetEncryptHandler();
199     if (eRet != SUCCESS)
200       return eRet;
201 
202     m_pDocument->LoadDoc();
203     if (!m_pDocument->GetRoot())
204       return FORMAT_ERROR;
205   }
206   if (GetRootObjNum() == 0) {
207     ReleaseEncryptHandler();
208     if (!RebuildCrossRef() || GetRootObjNum() == 0)
209       return FORMAT_ERROR;
210 
211     eRet = SetEncryptHandler();
212     if (eRet != SUCCESS)
213       return eRet;
214   }
215   if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
216     CPDF_Reference* pMetadata =
217         ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"));
218     if (pMetadata)
219       m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
220   }
221   return SUCCESS;
222 }
SetEncryptHandler()223 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
224   ReleaseEncryptHandler();
225   SetEncryptDictionary(nullptr);
226 
227   if (!m_pTrailer)
228     return FORMAT_ERROR;
229 
230   CPDF_Object* pEncryptObj = m_pTrailer->GetObjectFor("Encrypt");
231   if (pEncryptObj) {
232     if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
233       SetEncryptDictionary(pEncryptDict);
234     } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
235       pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
236       if (pEncryptObj)
237         SetEncryptDictionary(pEncryptObj->GetDict());
238     }
239   }
240 
241   if (m_pEncryptDict) {
242     CFX_ByteString filter = m_pEncryptDict->GetStringFor("Filter");
243     std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler;
244     Error err = HANDLER_ERROR;
245     if (filter == "Standard") {
246       pSecurityHandler = pdfium::MakeUnique<CPDF_SecurityHandler>();
247       err = PASSWORD_ERROR;
248     }
249     if (!pSecurityHandler)
250       return HANDLER_ERROR;
251 
252     if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
253       return err;
254 
255     m_pSecurityHandler = std::move(pSecurityHandler);
256     std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
257         m_pSecurityHandler->CreateCryptoHandler());
258     if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
259       return HANDLER_ERROR;
260     m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
261   }
262   return SUCCESS;
263 }
264 
ReleaseEncryptHandler()265 void CPDF_Parser::ReleaseEncryptHandler() {
266   m_pSyntax->m_pCryptoHandler.reset();
267   m_pSecurityHandler.reset();
268 }
269 
GetObjectOffset(uint32_t objnum) const270 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const {
271   if (!IsValidObjectNumber(objnum))
272     return 0;
273 
274   if (GetObjectType(objnum) == 1)
275     return GetObjectPositionOrZero(objnum);
276 
277   if (GetObjectType(objnum) == 2) {
278     FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
279     return GetObjectPositionOrZero(pos);
280   }
281   return 0;
282 }
283 
284 // Ideally, all the cross reference entries should be verified.
285 // In reality, we rarely see well-formed cross references don't match
286 // with the objects. crbug/602650 showed a case where object numbers
287 // in the cross reference table are all off by one.
VerifyCrossRefV4()288 bool CPDF_Parser::VerifyCrossRefV4() {
289   for (const auto& it : m_ObjectInfo) {
290     if (it.second.pos == 0)
291       continue;
292     // Find the first non-zero position.
293     FX_FILESIZE SavedPos = m_pSyntax->SavePos();
294     m_pSyntax->RestorePos(it.second.pos);
295     bool is_num = false;
296     CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num);
297     m_pSyntax->RestorePos(SavedPos);
298     if (!is_num || num_str.IsEmpty() ||
299         FXSYS_atoui(num_str.c_str()) != it.first) {
300       // If the object number read doesn't match the one stored,
301       // something is wrong with the cross reference table.
302       return false;
303     } else {
304       return true;
305     }
306   }
307   return true;
308 }
309 
LoadAllCrossRefV4(FX_FILESIZE xrefpos)310 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
311   if (!LoadCrossRefV4(xrefpos, 0, true))
312     return false;
313 
314   m_pTrailer = LoadTrailerV4();
315   if (!m_pTrailer)
316     return false;
317 
318   int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
319   if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
320     ShrinkObjectMap(xrefsize);
321 
322   std::vector<FX_FILESIZE> CrossRefList;
323   std::vector<FX_FILESIZE> XRefStreamList;
324   std::set<FX_FILESIZE> seen_xrefpos;
325 
326   CrossRefList.push_back(xrefpos);
327   XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm"));
328   seen_xrefpos.insert(xrefpos);
329 
330   // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
331   // numerical, GetDirectInteger() returns 0. Loading will end.
332   xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev");
333   while (xrefpos) {
334     // Check for circular references.
335     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
336       return false;
337 
338     seen_xrefpos.insert(xrefpos);
339 
340     // SLOW ...
341     CrossRefList.insert(CrossRefList.begin(), xrefpos);
342     LoadCrossRefV4(xrefpos, 0, true);
343 
344     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
345     if (!pDict)
346       return false;
347 
348     xrefpos = GetDirectInteger(pDict.get(), "Prev");
349 
350     // SLOW ...
351     XRefStreamList.insert(XRefStreamList.begin(),
352                           pDict->GetIntegerFor("XRefStm"));
353     m_Trailers.push_back(std::move(pDict));
354   }
355 
356   for (size_t i = 0; i < CrossRefList.size(); ++i) {
357     if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false))
358       return false;
359     if (i == 0 && !VerifyCrossRefV4())
360       return false;
361   }
362   return true;
363 }
364 
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,uint32_t dwObjCount)365 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
366                                               uint32_t dwObjCount) {
367   if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
368     return false;
369 
370   m_pTrailer = LoadTrailerV4();
371   if (!m_pTrailer)
372     return false;
373 
374   int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
375   if (xrefsize == 0)
376     return false;
377 
378   std::vector<FX_FILESIZE> CrossRefList;
379   std::vector<FX_FILESIZE> XRefStreamList;
380   std::set<FX_FILESIZE> seen_xrefpos;
381 
382   CrossRefList.push_back(xrefpos);
383   XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm"));
384   seen_xrefpos.insert(xrefpos);
385 
386   xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev");
387   while (xrefpos) {
388     // Check for circular references.
389     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
390       return false;
391 
392     seen_xrefpos.insert(xrefpos);
393 
394     // SLOW ...
395     CrossRefList.insert(CrossRefList.begin(), xrefpos);
396     LoadCrossRefV4(xrefpos, 0, true);
397 
398     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
399     if (!pDict)
400       return false;
401 
402     xrefpos = GetDirectInteger(pDict.get(), "Prev");
403 
404     // SLOW ...
405     XRefStreamList.insert(XRefStreamList.begin(),
406                           pDict->GetIntegerFor("XRefStm"));
407     m_Trailers.push_back(std::move(pDict));
408   }
409 
410   for (size_t i = 1; i < CrossRefList.size(); ++i) {
411     if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false))
412       return false;
413   }
414   return true;
415 }
416 
LoadLinearizedCrossRefV4(FX_FILESIZE pos,uint32_t dwObjCount)417 bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
418                                            uint32_t dwObjCount) {
419   FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
420 
421   m_pSyntax->RestorePos(dwStartPos);
422   m_SortedOffset.insert(pos);
423 
424   uint32_t start_objnum = 0;
425   uint32_t count = dwObjCount;
426   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
427 
428   const int32_t recordsize = 20;
429   std::vector<char> buf(1024 * recordsize + 1);
430   buf[1024 * recordsize] = '\0';
431 
432   int32_t nBlocks = count / 1024 + 1;
433   for (int32_t block = 0; block < nBlocks; block++) {
434     int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
435     uint32_t dwReadSize = block_size * recordsize;
436     if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
437       return false;
438 
439     if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
440                               dwReadSize)) {
441       return false;
442     }
443 
444     for (int32_t i = 0; i < block_size; i++) {
445       uint32_t objnum = start_objnum + block * 1024 + i;
446       char* pEntry = &buf[i * recordsize];
447       if (pEntry[17] == 'f') {
448         m_ObjectInfo[objnum].pos = 0;
449         m_ObjectInfo[objnum].type = 0;
450       } else {
451         int32_t offset = FXSYS_atoi(pEntry);
452         if (offset == 0) {
453           for (int32_t c = 0; c < 10; c++) {
454             if (!std::isdigit(pEntry[c]))
455               return false;
456           }
457         }
458 
459         m_ObjectInfo[objnum].pos = offset;
460         int32_t version = FXSYS_atoi(pEntry + 11);
461         if (version >= 1)
462           m_bVersionUpdated = true;
463 
464         m_ObjectInfo[objnum].gennum = version;
465         if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
466           m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
467 
468         m_ObjectInfo[objnum].type = 1;
469       }
470     }
471   }
472   m_pSyntax->RestorePos(SavedPos + count * recordsize);
473   return true;
474 }
475 
LoadCrossRefV4(FX_FILESIZE pos,FX_FILESIZE streampos,bool bSkip)476 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
477                                  FX_FILESIZE streampos,
478                                  bool bSkip) {
479   m_pSyntax->RestorePos(pos);
480   if (m_pSyntax->GetKeyword() != "xref")
481     return false;
482 
483   m_SortedOffset.insert(pos);
484   if (streampos)
485     m_SortedOffset.insert(streampos);
486 
487   while (1) {
488     FX_FILESIZE SavedPos = m_pSyntax->SavePos();
489     bool bIsNumber;
490     CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
491     if (word.IsEmpty())
492       return false;
493 
494     if (!bIsNumber) {
495       m_pSyntax->RestorePos(SavedPos);
496       break;
497     }
498 
499     uint32_t start_objnum = FXSYS_atoui(word.c_str());
500     if (start_objnum >= kMaxObjectNumber)
501       return false;
502 
503     uint32_t count = m_pSyntax->GetDirectNum();
504     m_pSyntax->ToNextWord();
505     SavedPos = m_pSyntax->SavePos();
506     const int32_t recordsize = 20;
507 
508     m_dwXrefStartObjNum = start_objnum;
509     if (!bSkip) {
510       std::vector<char> buf(1024 * recordsize + 1);
511       buf[1024 * recordsize] = '\0';
512 
513       int32_t nBlocks = count / 1024 + 1;
514       for (int32_t block = 0; block < nBlocks; block++) {
515         int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
516         m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
517                              block_size * recordsize);
518 
519         for (int32_t i = 0; i < block_size; i++) {
520           uint32_t objnum = start_objnum + block * 1024 + i;
521           char* pEntry = &buf[i * recordsize];
522           if (pEntry[17] == 'f') {
523             m_ObjectInfo[objnum].pos = 0;
524             m_ObjectInfo[objnum].type = 0;
525           } else {
526             FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
527             if (offset == 0) {
528               for (int32_t c = 0; c < 10; c++) {
529                 if (!std::isdigit(pEntry[c]))
530                   return false;
531               }
532             }
533 
534             m_ObjectInfo[objnum].pos = offset;
535             int32_t version = FXSYS_atoi(pEntry + 11);
536             if (version >= 1)
537               m_bVersionUpdated = true;
538 
539             m_ObjectInfo[objnum].gennum = version;
540             if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
541               m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
542 
543             m_ObjectInfo[objnum].type = 1;
544           }
545         }
546       }
547     }
548     m_pSyntax->RestorePos(SavedPos + count * recordsize);
549   }
550   return !streampos || LoadCrossRefV5(&streampos, false);
551 }
552 
LoadAllCrossRefV5(FX_FILESIZE xrefpos)553 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
554   if (!LoadCrossRefV5(&xrefpos, true))
555     return false;
556 
557   std::set<FX_FILESIZE> seen_xrefpos;
558   while (xrefpos) {
559     seen_xrefpos.insert(xrefpos);
560     if (!LoadCrossRefV5(&xrefpos, false))
561       return false;
562 
563     // Check for circular references.
564     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
565       return false;
566   }
567   m_ObjectStreamMap.clear();
568   m_bXRefStream = true;
569   return true;
570 }
571 
RebuildCrossRef()572 bool CPDF_Parser::RebuildCrossRef() {
573   m_ObjectInfo.clear();
574   m_SortedOffset.clear();
575   m_pTrailer.reset();
576 
577   ParserState state = ParserState::kDefault;
578   int32_t inside_index = 0;
579   uint32_t objnum = 0;
580   uint32_t gennum = 0;
581   int32_t depth = 0;
582   const uint32_t kBufferSize = 4096;
583   std::vector<uint8_t> buffer(kBufferSize);
584 
585   FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
586   FX_FILESIZE start_pos = 0;
587   FX_FILESIZE start_pos1 = 0;
588   FX_FILESIZE last_obj = -1;
589   FX_FILESIZE last_xref = -1;
590   FX_FILESIZE last_trailer = -1;
591 
592   while (pos < m_pSyntax->m_FileLen) {
593     const FX_FILESIZE saved_pos = pos;
594     bool bOverFlow = false;
595     uint32_t size =
596         std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
597     if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
598       break;
599 
600     for (uint32_t i = 0; i < size; i++) {
601       uint8_t byte = buffer[i];
602       switch (state) {
603         case ParserState::kDefault:
604           if (PDFCharIsWhitespace(byte)) {
605             state = ParserState::kWhitespace;
606           } else if (std::isdigit(byte)) {
607             --i;
608             state = ParserState::kWhitespace;
609           } else if (byte == '%') {
610             inside_index = 0;
611             state = ParserState::kComment;
612           } else if (byte == '(') {
613             state = ParserState::kString;
614             depth = 1;
615           } else if (byte == '<') {
616             inside_index = 1;
617             state = ParserState::kHexString;
618           } else if (byte == '\\') {
619             state = ParserState::kEscapedString;
620           } else if (byte == 't') {
621             state = ParserState::kTrailer;
622             inside_index = 1;
623           }
624           break;
625 
626         case ParserState::kWhitespace:
627           if (std::isdigit(byte)) {
628             start_pos = pos + i;
629             state = ParserState::kObjNum;
630             objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
631           } else if (byte == 't') {
632             state = ParserState::kTrailer;
633             inside_index = 1;
634           } else if (byte == 'x') {
635             state = ParserState::kXref;
636             inside_index = 1;
637           } else if (!PDFCharIsWhitespace(byte)) {
638             --i;
639             state = ParserState::kDefault;
640           }
641           break;
642 
643         case ParserState::kObjNum:
644           if (std::isdigit(byte)) {
645             objnum =
646                 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
647           } else if (PDFCharIsWhitespace(byte)) {
648             state = ParserState::kPostObjNum;
649           } else {
650             --i;
651             state = ParserState::kEndObj;
652             inside_index = 0;
653           }
654           break;
655 
656         case ParserState::kPostObjNum:
657           if (std::isdigit(byte)) {
658             start_pos1 = pos + i;
659             state = ParserState::kGenNum;
660             gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
661           } else if (byte == 't') {
662             state = ParserState::kTrailer;
663             inside_index = 1;
664           } else if (!PDFCharIsWhitespace(byte)) {
665             --i;
666             state = ParserState::kDefault;
667           }
668           break;
669 
670         case ParserState::kGenNum:
671           if (std::isdigit(byte)) {
672             gennum =
673                 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
674           } else if (PDFCharIsWhitespace(byte)) {
675             state = ParserState::kPostGenNum;
676           } else {
677             --i;
678             state = ParserState::kDefault;
679           }
680           break;
681 
682         case ParserState::kPostGenNum:
683           if (byte == 'o') {
684             state = ParserState::kBeginObj;
685             inside_index = 1;
686           } else if (std::isdigit(byte)) {
687             objnum = gennum;
688             gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
689             start_pos = start_pos1;
690             start_pos1 = pos + i;
691             state = ParserState::kGenNum;
692           } else if (byte == 't') {
693             state = ParserState::kTrailer;
694             inside_index = 1;
695           } else if (!PDFCharIsWhitespace(byte)) {
696             --i;
697             state = ParserState::kDefault;
698           }
699           break;
700 
701         case ParserState::kBeginObj:
702           switch (inside_index) {
703             case 1:
704               if (byte != 'b') {
705                 --i;
706                 state = ParserState::kDefault;
707               } else {
708                 inside_index++;
709               }
710               break;
711             case 2:
712               if (byte != 'j') {
713                 --i;
714                 state = ParserState::kDefault;
715               } else {
716                 inside_index++;
717               }
718               break;
719             case 3:
720               if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
721                 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
722                 m_SortedOffset.insert(obj_pos);
723                 last_obj = start_pos;
724                 FX_FILESIZE obj_end = 0;
725                 std::unique_ptr<CPDF_Object> pObject =
726                     ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum,
727                                                   &obj_end);
728                 if (CPDF_Stream* pStream = ToStream(pObject.get())) {
729                   if (CPDF_Dictionary* pDict = pStream->GetDict()) {
730                     if ((pDict->KeyExist("Type")) &&
731                         (pDict->GetStringFor("Type") == "XRef" &&
732                          pDict->KeyExist("Size"))) {
733                       CPDF_Object* pRoot = pDict->GetObjectFor("Root");
734                       if (pRoot && pRoot->GetDict() &&
735                           pRoot->GetDict()->GetObjectFor("Pages")) {
736                         m_pTrailer = ToDictionary(pDict->Clone());
737                       }
738                     }
739                   }
740                 }
741 
742                 FX_FILESIZE offset = 0;
743                 m_pSyntax->RestorePos(obj_pos);
744                 offset = m_pSyntax->FindTag("obj", 0);
745                 if (offset == -1)
746                   offset = 0;
747                 else
748                   offset += 3;
749 
750                 FX_FILESIZE nLen = obj_end - obj_pos - offset;
751                 if ((uint32_t)nLen > size - i) {
752                   pos = obj_end + m_pSyntax->m_HeaderOffset;
753                   bOverFlow = true;
754                 } else {
755                   i += (uint32_t)nLen;
756                 }
757 
758                 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
759                     m_ObjectInfo[objnum].pos) {
760                   if (pObject) {
761                     uint32_t oldgen = GetObjectGenNum(objnum);
762                     m_ObjectInfo[objnum].pos = obj_pos;
763                     m_ObjectInfo[objnum].gennum = gennum;
764                     if (oldgen != gennum)
765                       m_bVersionUpdated = true;
766                   }
767                 } else {
768                   m_ObjectInfo[objnum].pos = obj_pos;
769                   m_ObjectInfo[objnum].type = 1;
770                   m_ObjectInfo[objnum].gennum = gennum;
771                 }
772               }
773               --i;
774               state = ParserState::kDefault;
775               break;
776           }
777           break;
778 
779         case ParserState::kTrailer:
780           if (inside_index == 7) {
781             if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
782               last_trailer = pos + i - 7;
783               m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
784 
785               std::unique_ptr<CPDF_Object> pObj =
786                   m_pSyntax->GetObject(m_pDocument, 0, 0, true);
787               if (pObj) {
788                 if (pObj->IsDictionary() || pObj->AsStream()) {
789                   CPDF_Stream* pStream = pObj->AsStream();
790                   if (CPDF_Dictionary* pTrailer =
791                           pStream ? pStream->GetDict() : pObj->AsDictionary()) {
792                     if (m_pTrailer) {
793                       CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
794                       CPDF_Reference* pRef = ToReference(pRoot);
795                       if (!pRoot ||
796                           (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
797                            m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
798                         auto it = pTrailer->begin();
799                         while (it != pTrailer->end()) {
800                           const CFX_ByteString& key = it->first;
801                           CPDF_Object* pElement = it->second.get();
802                           ++it;
803                           uint32_t dwObjNum =
804                               pElement ? pElement->GetObjNum() : 0;
805                           if (dwObjNum) {
806                             m_pTrailer->SetNewFor<CPDF_Reference>(
807                                 key, m_pDocument, dwObjNum);
808                           } else {
809                             m_pTrailer->SetFor(key, pElement->Clone());
810                           }
811                         }
812                       }
813                     } else {
814                       if (pObj->IsStream()) {
815                         m_pTrailer = ToDictionary(pTrailer->Clone());
816                       } else {
817                         m_pTrailer = ToDictionary(std::move(pObj));
818                       }
819 
820                       FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
821                       CFX_ByteString strWord = m_pSyntax->GetKeyword();
822                       if (!strWord.Compare("startxref")) {
823                         bool bNumber;
824                         CFX_ByteString bsOffset =
825                             m_pSyntax->GetNextWord(&bNumber);
826                         if (bNumber)
827                           m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
828                       }
829                       m_pSyntax->RestorePos(dwSavePos);
830                     }
831                   }
832                 }
833               }
834             }
835             --i;
836             state = ParserState::kDefault;
837           } else if (byte == "trailer"[inside_index]) {
838             inside_index++;
839           } else {
840             --i;
841             state = ParserState::kDefault;
842           }
843           break;
844 
845         case ParserState::kXref:
846           if (inside_index == 4) {
847             last_xref = pos + i - 4;
848             state = ParserState::kWhitespace;
849           } else if (byte == "xref"[inside_index]) {
850             inside_index++;
851           } else {
852             --i;
853             state = ParserState::kDefault;
854           }
855           break;
856 
857         case ParserState::kComment:
858           if (PDFCharIsLineEnding(byte))
859             state = ParserState::kDefault;
860           break;
861 
862         case ParserState::kString:
863           if (byte == ')') {
864             if (depth > 0)
865               depth--;
866           } else if (byte == '(') {
867             depth++;
868           }
869 
870           if (!depth)
871             state = ParserState::kDefault;
872           break;
873 
874         case ParserState::kHexString:
875           if (byte == '>' || (byte == '<' && inside_index == 1))
876             state = ParserState::kDefault;
877           inside_index = 0;
878           break;
879 
880         case ParserState::kEscapedString:
881           if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
882             --i;
883             state = ParserState::kDefault;
884           }
885           break;
886 
887         case ParserState::kEndObj:
888           if (PDFCharIsWhitespace(byte)) {
889             state = ParserState::kDefault;
890           } else if (byte == '%' || byte == '(' || byte == '<' ||
891                      byte == '\\') {
892             state = ParserState::kDefault;
893             --i;
894           } else if (inside_index == 6) {
895             state = ParserState::kDefault;
896             --i;
897           } else if (byte == "endobj"[inside_index]) {
898             inside_index++;
899           }
900           break;
901       }
902 
903       if (bOverFlow) {
904         size = 0;
905         break;
906       }
907     }
908     pos += size;
909 
910     // If the position has not changed at all or went backwards in a loop
911     // iteration, then break out to prevent infinite looping.
912     if (pos <= saved_pos)
913       break;
914   }
915 
916   if (last_xref != -1 && last_xref > last_obj)
917     last_trailer = last_xref;
918   else if (last_trailer == -1 || last_xref < last_obj)
919     last_trailer = m_pSyntax->m_FileLen;
920 
921   m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
922   return m_pTrailer && !m_ObjectInfo.empty();
923 }
924 
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)925 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
926   std::unique_ptr<CPDF_Object> pObject(
927       ParseIndirectObjectAt(m_pDocument, *pos, 0));
928   if (!pObject)
929     return false;
930 
931   uint32_t objnum = pObject->m_ObjNum;
932   if (!objnum)
933     return false;
934 
935   CPDF_Object* pUnownedObject = pObject.get();
936   if (m_pDocument) {
937     CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
938     if (pRootDict && pRootDict->GetObjNum() == objnum)
939       return false;
940     if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
941             objnum, std::move(pObject))) {
942       return false;
943     }
944   }
945 
946   CPDF_Stream* pStream = pUnownedObject->AsStream();
947   if (!pStream)
948     return false;
949 
950   CPDF_Dictionary* pDict = pStream->GetDict();
951   *pos = pDict->GetIntegerFor("Prev");
952   int32_t size = pDict->GetIntegerFor("Size");
953   if (size < 0)
954     return false;
955 
956   std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
957   if (bMainXRef) {
958     m_pTrailer = std::move(pNewTrailer);
959     ShrinkObjectMap(size);
960     for (auto& it : m_ObjectInfo)
961       it.second.type = 0;
962   } else {
963     m_Trailers.push_back(std::move(pNewTrailer));
964   }
965 
966   std::vector<std::pair<int32_t, int32_t>> arrIndex;
967   CPDF_Array* pArray = pDict->GetArrayFor("Index");
968   if (pArray) {
969     for (size_t i = 0; i < pArray->GetCount() / 2; i++) {
970       CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
971       CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
972 
973       if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
974         int nStartNum = pStartNumObj->GetInteger();
975         int nCount = pCountObj->GetInteger();
976         if (nStartNum >= 0 && nCount > 0)
977           arrIndex.push_back(std::make_pair(nStartNum, nCount));
978       }
979     }
980   }
981 
982   if (arrIndex.size() == 0)
983     arrIndex.push_back(std::make_pair(0, size));
984 
985   pArray = pDict->GetArrayFor("W");
986   if (!pArray)
987     return false;
988 
989   std::vector<uint32_t> WidthArray;
990   FX_SAFE_UINT32 dwAccWidth = 0;
991   for (size_t i = 0; i < pArray->GetCount(); ++i) {
992     WidthArray.push_back(pArray->GetIntegerAt(i));
993     dwAccWidth += WidthArray[i];
994   }
995 
996   if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
997     return false;
998 
999   uint32_t totalWidth = dwAccWidth.ValueOrDie();
1000   CPDF_StreamAcc acc;
1001   acc.LoadAllData(pStream);
1002 
1003   const uint8_t* pData = acc.GetData();
1004   uint32_t dwTotalSize = acc.GetSize();
1005   uint32_t segindex = 0;
1006   for (uint32_t i = 0; i < arrIndex.size(); i++) {
1007     int32_t startnum = arrIndex[i].first;
1008     if (startnum < 0)
1009       continue;
1010 
1011     m_dwXrefStartObjNum = pdfium::base::checked_cast<uint32_t>(startnum);
1012     uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second);
1013     FX_SAFE_UINT32 dwCaculatedSize = segindex;
1014     dwCaculatedSize += count;
1015     dwCaculatedSize *= totalWidth;
1016     if (!dwCaculatedSize.IsValid() ||
1017         dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1018       continue;
1019     }
1020 
1021     const uint8_t* segstart = pData + segindex * totalWidth;
1022     FX_SAFE_UINT32 dwMaxObjNum = startnum;
1023     dwMaxObjNum += count;
1024     uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
1025     if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
1026       continue;
1027 
1028     for (uint32_t j = 0; j < count; j++) {
1029       int32_t type = 1;
1030       const uint8_t* entrystart = segstart + j * totalWidth;
1031       if (WidthArray[0])
1032         type = GetVarInt(entrystart, WidthArray[0]);
1033 
1034       if (GetObjectType(startnum + j) == 255) {
1035         FX_FILESIZE offset =
1036             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1037         m_ObjectInfo[startnum + j].pos = offset;
1038         m_SortedOffset.insert(offset);
1039         continue;
1040       }
1041 
1042       if (GetObjectType(startnum + j))
1043         continue;
1044 
1045       m_ObjectInfo[startnum + j].type = type;
1046       if (type == 0) {
1047         m_ObjectInfo[startnum + j].pos = 0;
1048       } else {
1049         FX_FILESIZE offset =
1050             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1051         m_ObjectInfo[startnum + j].pos = offset;
1052         if (type == 1) {
1053           m_SortedOffset.insert(offset);
1054         } else {
1055           if (offset < 0 || !IsValidObjectNumber(offset))
1056             return false;
1057           m_ObjectInfo[offset].type = 255;
1058         }
1059       }
1060     }
1061     segindex += count;
1062   }
1063   return true;
1064 }
1065 
GetIDArray()1066 CPDF_Array* CPDF_Parser::GetIDArray() {
1067   if (!m_pTrailer)
1068     return nullptr;
1069 
1070   CPDF_Object* pID = m_pTrailer->GetObjectFor("ID");
1071   if (!pID)
1072     return nullptr;
1073 
1074   CPDF_Reference* pRef = pID->AsReference();
1075   if (!pRef)
1076     return ToArray(pID);
1077 
1078   std::unique_ptr<CPDF_Object> pNewObj =
1079       ParseIndirectObject(nullptr, pRef->GetRefObjNum());
1080   pID = pNewObj.get();
1081   m_pTrailer->SetFor("ID", std::move(pNewObj));
1082   return ToArray(pID);
1083 }
1084 
GetRootObjNum()1085 uint32_t CPDF_Parser::GetRootObjNum() {
1086   CPDF_Reference* pRef =
1087       ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Root") : nullptr);
1088   return pRef ? pRef->GetRefObjNum() : 0;
1089 }
1090 
GetInfoObjNum()1091 uint32_t CPDF_Parser::GetInfoObjNum() {
1092   CPDF_Reference* pRef =
1093       ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr);
1094   return pRef ? pRef->GetRefObjNum() : 0;
1095 }
1096 
ParseIndirectObject(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum)1097 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
1098     CPDF_IndirectObjectHolder* pObjList,
1099     uint32_t objnum) {
1100   if (!IsValidObjectNumber(objnum))
1101     return nullptr;
1102 
1103   // Prevent circular parsing the same object.
1104   if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1105     return nullptr;
1106 
1107   pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
1108   if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
1109     FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1110     if (pos <= 0)
1111       return nullptr;
1112     return ParseIndirectObjectAt(pObjList, pos, objnum);
1113   }
1114   if (GetObjectType(objnum) != 2)
1115     return nullptr;
1116 
1117   CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1118   if (!pObjStream)
1119     return nullptr;
1120 
1121   CFX_RetainPtr<IFX_MemoryStream> file = IFX_MemoryStream::Create(
1122       (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), false);
1123   CPDF_SyntaxParser syntax;
1124   syntax.InitParser(file, 0);
1125   const int32_t offset = GetStreamFirst(pObjStream);
1126 
1127   // Read object numbers from |pObjStream| into a cache.
1128   if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1129     for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1130       uint32_t thisnum = syntax.GetDirectNum();
1131       uint32_t thisoff = syntax.GetDirectNum();
1132       m_ObjCache[pObjStream][thisnum] = thisoff;
1133     }
1134   }
1135 
1136   const auto it = m_ObjCache[pObjStream].find(objnum);
1137   if (it == m_ObjCache[pObjStream].end())
1138     return nullptr;
1139 
1140   syntax.RestorePos(offset + it->second);
1141   return syntax.GetObject(pObjList, 0, 0, true);
1142 }
1143 
GetObjectStream(uint32_t objnum)1144 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(uint32_t objnum) {
1145   auto it = m_ObjectStreamMap.find(objnum);
1146   if (it != m_ObjectStreamMap.end())
1147     return it->second.get();
1148 
1149   if (!m_pDocument)
1150     return nullptr;
1151 
1152   const CPDF_Stream* pStream =
1153       ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
1154   if (!pStream)
1155     return nullptr;
1156 
1157   CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
1158   pStreamAcc->LoadAllData(pStream);
1159   m_ObjectStreamMap[objnum].reset(pStreamAcc);
1160   return pStreamAcc;
1161 }
1162 
GetObjectSize(uint32_t objnum) const1163 FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const {
1164   if (!IsValidObjectNumber(objnum))
1165     return 0;
1166 
1167   if (GetObjectType(objnum) == 2)
1168     objnum = GetObjectPositionOrZero(objnum);
1169 
1170   if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
1171     return 0;
1172 
1173   FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
1174   if (offset == 0)
1175     return 0;
1176 
1177   auto it = m_SortedOffset.find(offset);
1178   if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
1179     return 0;
1180 
1181   return *it - offset;
1182 }
1183 
GetIndirectBinary(uint32_t objnum,uint8_t * & pBuffer,uint32_t & size)1184 void CPDF_Parser::GetIndirectBinary(uint32_t objnum,
1185                                     uint8_t*& pBuffer,
1186                                     uint32_t& size) {
1187   pBuffer = nullptr;
1188   size = 0;
1189   if (!IsValidObjectNumber(objnum))
1190     return;
1191 
1192   if (GetObjectType(objnum) == 2) {
1193     CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1194     if (!pObjStream)
1195       return;
1196 
1197     int32_t offset = GetStreamFirst(pObjStream);
1198     const uint8_t* pData = pObjStream->GetData();
1199     uint32_t totalsize = pObjStream->GetSize();
1200     CFX_RetainPtr<IFX_MemoryStream> file =
1201         IFX_MemoryStream::Create((uint8_t*)pData, (size_t)totalsize, false);
1202     CPDF_SyntaxParser syntax;
1203     syntax.InitParser(file, 0);
1204 
1205     for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
1206       uint32_t thisnum = syntax.GetDirectNum();
1207       uint32_t thisoff = syntax.GetDirectNum();
1208       if (thisnum != objnum)
1209         continue;
1210 
1211       if (i == 1) {
1212         size = totalsize - (thisoff + offset);
1213       } else {
1214         syntax.GetDirectNum();  // Skip nextnum.
1215         uint32_t nextoff = syntax.GetDirectNum();
1216         size = nextoff - thisoff;
1217       }
1218 
1219       pBuffer = FX_Alloc(uint8_t, size);
1220       FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
1221       return;
1222     }
1223     return;
1224   }
1225 
1226   if (GetObjectType(objnum) != 1)
1227     return;
1228 
1229   FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1230   if (pos == 0)
1231     return;
1232 
1233   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1234   m_pSyntax->RestorePos(pos);
1235 
1236   bool bIsNumber;
1237   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1238   if (!bIsNumber) {
1239     m_pSyntax->RestorePos(SavedPos);
1240     return;
1241   }
1242 
1243   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1244   if (parser_objnum && parser_objnum != objnum) {
1245     m_pSyntax->RestorePos(SavedPos);
1246     return;
1247   }
1248 
1249   word = m_pSyntax->GetNextWord(&bIsNumber);
1250   if (!bIsNumber) {
1251     m_pSyntax->RestorePos(SavedPos);
1252     return;
1253   }
1254 
1255   if (m_pSyntax->GetKeyword() != "obj") {
1256     m_pSyntax->RestorePos(SavedPos);
1257     return;
1258   }
1259 
1260   auto it = m_SortedOffset.find(pos);
1261   if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
1262     m_pSyntax->RestorePos(SavedPos);
1263     return;
1264   }
1265 
1266   FX_FILESIZE nextoff = *it;
1267   bool bNextOffValid = false;
1268   if (nextoff != pos) {
1269     m_pSyntax->RestorePos(nextoff);
1270     word = m_pSyntax->GetNextWord(&bIsNumber);
1271     if (word == "xref") {
1272       bNextOffValid = true;
1273     } else if (bIsNumber) {
1274       word = m_pSyntax->GetNextWord(&bIsNumber);
1275       if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
1276         bNextOffValid = true;
1277       }
1278     }
1279   }
1280 
1281   if (!bNextOffValid) {
1282     m_pSyntax->RestorePos(pos);
1283     while (1) {
1284       if (m_pSyntax->GetKeyword() == "endobj")
1285         break;
1286 
1287       if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
1288         break;
1289     }
1290     nextoff = m_pSyntax->SavePos();
1291   }
1292 
1293   size = (uint32_t)(nextoff - pos);
1294   pBuffer = FX_Alloc(uint8_t, size);
1295   m_pSyntax->RestorePos(pos);
1296   m_pSyntax->ReadBlock(pBuffer, size);
1297   m_pSyntax->RestorePos(SavedPos);
1298 }
1299 
ParseIndirectObjectAt(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum)1300 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(
1301     CPDF_IndirectObjectHolder* pObjList,
1302     FX_FILESIZE pos,
1303     uint32_t objnum) {
1304   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1305   m_pSyntax->RestorePos(pos);
1306   bool bIsNumber;
1307   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1308   if (!bIsNumber) {
1309     m_pSyntax->RestorePos(SavedPos);
1310     return nullptr;
1311   }
1312 
1313   FX_FILESIZE objOffset = m_pSyntax->SavePos();
1314   objOffset -= word.GetLength();
1315   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1316   if (objnum && parser_objnum != objnum) {
1317     m_pSyntax->RestorePos(SavedPos);
1318     return nullptr;
1319   }
1320 
1321   word = m_pSyntax->GetNextWord(&bIsNumber);
1322   if (!bIsNumber) {
1323     m_pSyntax->RestorePos(SavedPos);
1324     return nullptr;
1325   }
1326 
1327   uint32_t parser_gennum = FXSYS_atoui(word.c_str());
1328   if (m_pSyntax->GetKeyword() != "obj") {
1329     m_pSyntax->RestorePos(SavedPos);
1330     return nullptr;
1331   }
1332 
1333   std::unique_ptr<CPDF_Object> pObj =
1334       m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
1335   m_pSyntax->SavePos();
1336 
1337   CFX_ByteString bsWord = m_pSyntax->GetKeyword();
1338   if (bsWord == "endobj")
1339     m_pSyntax->SavePos();
1340 
1341   m_pSyntax->RestorePos(SavedPos);
1342   if (pObj) {
1343     if (!objnum)
1344       pObj->m_ObjNum = parser_objnum;
1345     pObj->m_GenNum = parser_gennum;
1346   }
1347   return pObj;
1348 }
1349 
ParseIndirectObjectAtByStrict(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum,FX_FILESIZE * pResultPos)1350 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict(
1351     CPDF_IndirectObjectHolder* pObjList,
1352     FX_FILESIZE pos,
1353     uint32_t objnum,
1354     FX_FILESIZE* pResultPos) {
1355   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1356   m_pSyntax->RestorePos(pos);
1357 
1358   bool bIsNumber;
1359   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1360   if (!bIsNumber) {
1361     m_pSyntax->RestorePos(SavedPos);
1362     return nullptr;
1363   }
1364 
1365   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1366   if (objnum && parser_objnum != objnum) {
1367     m_pSyntax->RestorePos(SavedPos);
1368     return nullptr;
1369   }
1370 
1371   word = m_pSyntax->GetNextWord(&bIsNumber);
1372   if (!bIsNumber) {
1373     m_pSyntax->RestorePos(SavedPos);
1374     return nullptr;
1375   }
1376 
1377   uint32_t gennum = FXSYS_atoui(word.c_str());
1378   if (m_pSyntax->GetKeyword() != "obj") {
1379     m_pSyntax->RestorePos(SavedPos);
1380     return nullptr;
1381   }
1382 
1383   std::unique_ptr<CPDF_Object> pObj =
1384       m_pSyntax->GetObjectForStrict(pObjList, objnum, gennum);
1385 
1386   if (pResultPos)
1387     *pResultPos = m_pSyntax->m_Pos;
1388 
1389   m_pSyntax->RestorePos(SavedPos);
1390   return pObj;
1391 }
1392 
GetFirstPageNo() const1393 uint32_t CPDF_Parser::GetFirstPageNo() const {
1394   return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1395 }
1396 
LoadTrailerV4()1397 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1398   if (m_pSyntax->GetKeyword() != "trailer")
1399     return nullptr;
1400 
1401   return ToDictionary(m_pSyntax->GetObject(m_pDocument, 0, 0, true));
1402 }
1403 
GetPermissions() const1404 uint32_t CPDF_Parser::GetPermissions() const {
1405   if (!m_pSecurityHandler)
1406     return 0xFFFFFFFF;
1407 
1408   uint32_t dwPermission = m_pSecurityHandler->GetPermissions();
1409   if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") {
1410     // See PDF Reference 1.7, page 123, table 3.20.
1411     dwPermission &= 0xFFFFFFFC;
1412     dwPermission |= 0xFFFFF0C0;
1413   }
1414   return dwPermission;
1415 }
1416 
IsLinearizedFile(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,uint32_t offset)1417 bool CPDF_Parser::IsLinearizedFile(
1418     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
1419     uint32_t offset) {
1420   m_pSyntax->InitParser(pFileAccess, offset);
1421   m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
1422 
1423   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1424   bool bIsNumber;
1425   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1426   if (!bIsNumber)
1427     return false;
1428 
1429   uint32_t objnum = FXSYS_atoui(word.c_str());
1430   word = m_pSyntax->GetNextWord(&bIsNumber);
1431   if (!bIsNumber)
1432     return false;
1433 
1434   uint32_t gennum = FXSYS_atoui(word.c_str());
1435   if (m_pSyntax->GetKeyword() != "obj") {
1436     m_pSyntax->RestorePos(SavedPos);
1437     return false;
1438   }
1439 
1440   m_pLinearized = CPDF_LinearizedHeader::CreateForObject(
1441       m_pSyntax->GetObject(nullptr, objnum, gennum, true));
1442   if (!m_pLinearized)
1443     return false;
1444 
1445   m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1446   // Move parser onto first page xref table start.
1447   m_pSyntax->GetNextWord(nullptr);
1448   return true;
1449 }
1450 
StartLinearizedParse(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)1451 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
1452     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
1453     CPDF_Document* pDocument) {
1454   ASSERT(!m_bHasParsed);
1455   m_bXRefStream = false;
1456   m_LastXRefOffset = 0;
1457 
1458   int32_t offset = GetHeaderOffset(pFileAccess);
1459   if (offset == -1)
1460     return FORMAT_ERROR;
1461 
1462   if (!IsLinearizedFile(pFileAccess, offset)) {
1463     m_pSyntax->m_pFileAccess = nullptr;
1464     return StartParse(pFileAccess, std::move(pDocument));
1465   }
1466   m_bHasParsed = true;
1467   m_pDocument = pDocument;
1468 
1469   FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
1470   bool bXRefRebuilt = false;
1471   bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, false);
1472   if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
1473     if (!RebuildCrossRef())
1474       return FORMAT_ERROR;
1475 
1476     bXRefRebuilt = true;
1477     m_LastXRefOffset = 0;
1478   }
1479 
1480   if (bLoadV4) {
1481     m_pTrailer = LoadTrailerV4();
1482     if (!m_pTrailer)
1483       return SUCCESS;
1484 
1485     int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
1486     if (xrefsize > 0)
1487       ShrinkObjectMap(xrefsize);
1488   }
1489 
1490   Error eRet = SetEncryptHandler();
1491   if (eRet != SUCCESS)
1492     return eRet;
1493 
1494   m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1495   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1496     if (bXRefRebuilt)
1497       return FORMAT_ERROR;
1498 
1499     ReleaseEncryptHandler();
1500     if (!RebuildCrossRef())
1501       return FORMAT_ERROR;
1502 
1503     eRet = SetEncryptHandler();
1504     if (eRet != SUCCESS)
1505       return eRet;
1506 
1507     m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1508     if (!m_pDocument->GetRoot())
1509       return FORMAT_ERROR;
1510   }
1511 
1512   if (GetRootObjNum() == 0) {
1513     ReleaseEncryptHandler();
1514     if (!RebuildCrossRef() || GetRootObjNum() == 0)
1515       return FORMAT_ERROR;
1516 
1517     eRet = SetEncryptHandler();
1518     if (eRet != SUCCESS)
1519       return eRet;
1520   }
1521 
1522   if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1523     if (CPDF_Reference* pMetadata =
1524             ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")))
1525       m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
1526   }
1527   return SUCCESS;
1528 }
1529 
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1530 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1531   if (!LoadCrossRefV5(&xrefpos, false))
1532     return false;
1533 
1534   std::set<FX_FILESIZE> seen_xrefpos;
1535   while (xrefpos) {
1536     seen_xrefpos.insert(xrefpos);
1537     if (!LoadCrossRefV5(&xrefpos, false))
1538       return false;
1539 
1540     // Check for circular references.
1541     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1542       return false;
1543   }
1544   m_ObjectStreamMap.clear();
1545   m_bXRefStream = true;
1546   return true;
1547 }
1548 
LoadLinearizedMainXRefTable()1549 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1550   uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
1551   m_pSyntax->m_MetadataObjnum = 0;
1552   m_pTrailer.reset();
1553   m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
1554 
1555   uint8_t ch = 0;
1556   uint32_t dwCount = 0;
1557   m_pSyntax->GetNextChar(ch);
1558   while (PDFCharIsWhitespace(ch)) {
1559     ++dwCount;
1560     if (m_pSyntax->m_FileLen <=
1561         (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
1562       break;
1563     }
1564     m_pSyntax->GetNextChar(ch);
1565   }
1566   m_LastXRefOffset += dwCount;
1567   m_ObjectStreamMap.clear();
1568   m_ObjCache.clear();
1569 
1570   if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1571       !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1572     m_LastXRefOffset = 0;
1573     m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1574     return FORMAT_ERROR;
1575   }
1576 
1577   m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1578   return SUCCESS;
1579 }
1580