1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
8 
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12 
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_boolean.h"
16 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_name.h"
19 #include "core/fpdfapi/parser/cpdf_null.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_stream.h"
23 #include "core/fpdfapi/parser/cpdf_string.h"
24 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
25 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
26 #include "core/fxcrt/fx_ext.h"
27 #include "third_party/base/numerics/safe_math.h"
28 #include "third_party/base/ptr_util.h"
29 
30 namespace {
31 
32 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
33 
34 }  // namespace
35 
36 // static
37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
38 
CPDF_SyntaxParser()39 CPDF_SyntaxParser::CPDF_SyntaxParser()
40     : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {}
41 
CPDF_SyntaxParser(const CFX_WeakPtr<CFX_ByteStringPool> & pPool)42 CPDF_SyntaxParser::CPDF_SyntaxParser(
43     const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
44     : m_MetadataObjnum(0),
45       m_pFileAccess(nullptr),
46       m_pFileBuf(nullptr),
47       m_BufSize(CPDF_ModuleMgr::kFileBufSize),
48       m_pPool(pPool) {}
49 
~CPDF_SyntaxParser()50 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
51   FX_Free(m_pFileBuf);
52 }
53 
GetCharAt(FX_FILESIZE pos,uint8_t & ch)54 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
55   CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
56   m_Pos = pos;
57   return GetNextChar(ch);
58 }
59 
ReadChar(FX_FILESIZE read_pos,uint32_t read_size)60 bool CPDF_SyntaxParser::ReadChar(FX_FILESIZE read_pos, uint32_t read_size) {
61   if (static_cast<FX_FILESIZE>(read_pos + read_size) > m_FileLen) {
62     if (m_FileLen < static_cast<FX_FILESIZE>(read_size)) {
63       read_pos = 0;
64       read_size = static_cast<uint32_t>(m_FileLen);
65     } else {
66       read_pos = m_FileLen - read_size;
67     }
68   }
69   if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
70     return false;
71 
72   m_BufOffset = read_pos;
73   return true;
74 }
75 
GetNextChar(uint8_t & ch)76 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
77   FX_FILESIZE pos = m_Pos + m_HeaderOffset;
78   if (pos >= m_FileLen)
79     return false;
80 
81   if (CheckPosition(pos)) {
82     FX_FILESIZE read_pos = pos;
83     uint32_t read_size = m_BufSize;
84     read_size = std::min(read_size, static_cast<uint32_t>(m_FileLen));
85     if (!ReadChar(read_pos, read_size))
86       return false;
87   }
88   ch = m_pFileBuf[pos - m_BufOffset];
89   m_Pos++;
90   return true;
91 }
92 
GetCharAtBackward(FX_FILESIZE pos,uint8_t & ch)93 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
94   pos += m_HeaderOffset;
95   if (pos >= m_FileLen)
96     return false;
97 
98   if (CheckPosition(pos)) {
99     FX_FILESIZE read_pos;
100     if (pos < static_cast<FX_FILESIZE>(m_BufSize))
101       read_pos = 0;
102     else
103       read_pos = pos - m_BufSize + 1;
104     uint32_t read_size = m_BufSize;
105     if (!ReadChar(read_pos, read_size))
106       return false;
107   }
108   ch = m_pFileBuf[pos - m_BufOffset];
109   return true;
110 }
111 
ReadBlock(uint8_t * pBuf,uint32_t size)112 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
113   if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
114     return false;
115   m_Pos += size;
116   return true;
117 }
118 
GetNextWordInternal(bool * bIsNumber)119 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
120   m_WordSize = 0;
121   if (bIsNumber)
122     *bIsNumber = true;
123 
124   uint8_t ch;
125   if (!GetNextChar(ch))
126     return;
127 
128   while (1) {
129     while (PDFCharIsWhitespace(ch)) {
130       if (!GetNextChar(ch))
131         return;
132     }
133 
134     if (ch != '%')
135       break;
136 
137     while (1) {
138       if (!GetNextChar(ch))
139         return;
140       if (PDFCharIsLineEnding(ch))
141         break;
142     }
143   }
144 
145   if (PDFCharIsDelimiter(ch)) {
146     if (bIsNumber)
147       *bIsNumber = false;
148 
149     m_WordBuffer[m_WordSize++] = ch;
150     if (ch == '/') {
151       while (1) {
152         if (!GetNextChar(ch))
153           return;
154 
155         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
156           m_Pos--;
157           return;
158         }
159 
160         if (m_WordSize < sizeof(m_WordBuffer) - 1)
161           m_WordBuffer[m_WordSize++] = ch;
162       }
163     } else if (ch == '<') {
164       if (!GetNextChar(ch))
165         return;
166 
167       if (ch == '<')
168         m_WordBuffer[m_WordSize++] = ch;
169       else
170         m_Pos--;
171     } else if (ch == '>') {
172       if (!GetNextChar(ch))
173         return;
174 
175       if (ch == '>')
176         m_WordBuffer[m_WordSize++] = ch;
177       else
178         m_Pos--;
179     }
180     return;
181   }
182 
183   while (1) {
184     if (m_WordSize < sizeof(m_WordBuffer) - 1)
185       m_WordBuffer[m_WordSize++] = ch;
186 
187     if (!PDFCharIsNumeric(ch)) {
188       if (bIsNumber)
189         *bIsNumber = false;
190     }
191 
192     if (!GetNextChar(ch))
193       return;
194 
195     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
196       m_Pos--;
197       break;
198     }
199   }
200 }
201 
ReadString()202 CFX_ByteString CPDF_SyntaxParser::ReadString() {
203   uint8_t ch;
204   if (!GetNextChar(ch))
205     return CFX_ByteString();
206 
207   CFX_ByteTextBuf buf;
208   int32_t parlevel = 0;
209   ReadStatus status = ReadStatus::Normal;
210   int32_t iEscCode = 0;
211   while (1) {
212     switch (status) {
213       case ReadStatus::Normal:
214         if (ch == ')') {
215           if (parlevel == 0)
216             return buf.MakeString();
217           parlevel--;
218         } else if (ch == '(') {
219           parlevel++;
220         }
221         if (ch == '\\')
222           status = ReadStatus::Backslash;
223         else
224           buf.AppendChar(ch);
225         break;
226       case ReadStatus::Backslash:
227         if (ch >= '0' && ch <= '7') {
228           iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
229           status = ReadStatus::Octal;
230           break;
231         }
232 
233         if (ch == 'n') {
234           buf.AppendChar('\n');
235         } else if (ch == 'r') {
236           buf.AppendChar('\r');
237         } else if (ch == 't') {
238           buf.AppendChar('\t');
239         } else if (ch == 'b') {
240           buf.AppendChar('\b');
241         } else if (ch == 'f') {
242           buf.AppendChar('\f');
243         } else if (ch == '\r') {
244           status = ReadStatus::CarriageReturn;
245           break;
246         } else if (ch != '\n') {
247           buf.AppendChar(ch);
248         }
249         status = ReadStatus::Normal;
250         break;
251       case ReadStatus::Octal:
252         if (ch >= '0' && ch <= '7') {
253           iEscCode =
254               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
255           status = ReadStatus::FinishOctal;
256         } else {
257           buf.AppendChar(iEscCode);
258           status = ReadStatus::Normal;
259           continue;
260         }
261         break;
262       case ReadStatus::FinishOctal:
263         status = ReadStatus::Normal;
264         if (ch >= '0' && ch <= '7') {
265           iEscCode =
266               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
267           buf.AppendChar(iEscCode);
268         } else {
269           buf.AppendChar(iEscCode);
270           continue;
271         }
272         break;
273       case ReadStatus::CarriageReturn:
274         status = ReadStatus::Normal;
275         if (ch != '\n')
276           continue;
277         break;
278     }
279 
280     if (!GetNextChar(ch))
281       break;
282   }
283 
284   GetNextChar(ch);
285   return buf.MakeString();
286 }
287 
ReadHexString()288 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
289   uint8_t ch;
290   if (!GetNextChar(ch))
291     return CFX_ByteString();
292 
293   CFX_ByteTextBuf buf;
294   bool bFirst = true;
295   uint8_t code = 0;
296   while (1) {
297     if (ch == '>')
298       break;
299 
300     if (std::isxdigit(ch)) {
301       int val = FXSYS_toHexDigit(ch);
302       if (bFirst) {
303         code = val * 16;
304       } else {
305         code += val;
306         buf.AppendByte(code);
307       }
308       bFirst = !bFirst;
309     }
310 
311     if (!GetNextChar(ch))
312       break;
313   }
314   if (!bFirst)
315     buf.AppendByte(code);
316 
317   return buf.MakeString();
318 }
319 
ToNextLine()320 void CPDF_SyntaxParser::ToNextLine() {
321   uint8_t ch;
322   while (GetNextChar(ch)) {
323     if (ch == '\n')
324       break;
325 
326     if (ch == '\r') {
327       GetNextChar(ch);
328       if (ch != '\n')
329         --m_Pos;
330       break;
331     }
332   }
333 }
334 
ToNextWord()335 void CPDF_SyntaxParser::ToNextWord() {
336   uint8_t ch;
337   if (!GetNextChar(ch))
338     return;
339 
340   while (1) {
341     while (PDFCharIsWhitespace(ch)) {
342       if (!GetNextChar(ch))
343         return;
344     }
345 
346     if (ch != '%')
347       break;
348 
349     while (1) {
350       if (!GetNextChar(ch))
351         return;
352       if (PDFCharIsLineEnding(ch))
353         break;
354     }
355   }
356   m_Pos--;
357 }
358 
GetNextWord(bool * bIsNumber)359 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
360   GetNextWordInternal(bIsNumber);
361   return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
362 }
363 
GetKeyword()364 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
365   return GetNextWord(nullptr);
366 }
367 
GetObject(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum,uint32_t gennum,bool bDecrypt)368 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObject(
369     CPDF_IndirectObjectHolder* pObjList,
370     uint32_t objnum,
371     uint32_t gennum,
372     bool bDecrypt) {
373   CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
374   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
375     return nullptr;
376 
377   FX_FILESIZE SavedObjPos = m_Pos;
378   bool bIsNumber;
379   CFX_ByteString word = GetNextWord(&bIsNumber);
380   if (word.GetLength() == 0)
381     return nullptr;
382 
383   if (bIsNumber) {
384     FX_FILESIZE SavedPos = m_Pos;
385     CFX_ByteString nextword = GetNextWord(&bIsNumber);
386     if (bIsNumber) {
387       CFX_ByteString nextword2 = GetNextWord(nullptr);
388       if (nextword2 == "R") {
389         uint32_t objnum = FXSYS_atoui(word.c_str());
390         if (objnum == CPDF_Object::kInvalidObjNum)
391           return nullptr;
392         return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum);
393       }
394     }
395     m_Pos = SavedPos;
396     return pdfium::MakeUnique<CPDF_Number>(word.AsStringC());
397   }
398 
399   if (word == "true" || word == "false")
400     return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
401 
402   if (word == "null")
403     return pdfium::MakeUnique<CPDF_Null>();
404 
405   if (word == "(") {
406     CFX_ByteString str = ReadString();
407     if (m_pCryptoHandler && bDecrypt)
408       m_pCryptoHandler->Decrypt(objnum, gennum, str);
409     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
410   }
411   if (word == "<") {
412     CFX_ByteString str = ReadHexString();
413     if (m_pCryptoHandler && bDecrypt)
414       m_pCryptoHandler->Decrypt(objnum, gennum, str);
415     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
416   }
417   if (word == "[") {
418     std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>();
419     while (std::unique_ptr<CPDF_Object> pObj =
420                GetObject(pObjList, objnum, gennum, true)) {
421       pArray->Add(std::move(pObj));
422     }
423     return std::move(pArray);
424   }
425   if (word[0] == '/') {
426     return pdfium::MakeUnique<CPDF_Name>(
427         m_pPool,
428         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
429   }
430   if (word == "<<") {
431     int32_t nKeys = 0;
432     FX_FILESIZE dwSignValuePos = 0;
433     std::unique_ptr<CPDF_Dictionary> pDict =
434         pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
435     while (1) {
436       CFX_ByteString key = GetNextWord(nullptr);
437       if (key.IsEmpty())
438         return nullptr;
439 
440       FX_FILESIZE SavedPos = m_Pos - key.GetLength();
441       if (key == ">>")
442         break;
443 
444       if (key == "endobj") {
445         m_Pos = SavedPos;
446         break;
447       }
448       if (key[0] != '/')
449         continue;
450 
451       ++nKeys;
452       key = PDF_NameDecode(key);
453       if (key.IsEmpty())
454         continue;
455 
456       if (key == "/Contents")
457         dwSignValuePos = m_Pos;
458 
459       std::unique_ptr<CPDF_Object> pObj =
460           GetObject(pObjList, objnum, gennum, true);
461       if (!pObj)
462         continue;
463 
464       CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
465       pDict->SetFor(keyNoSlash, std::move(pObj));
466     }
467 
468     // Only when this is a signature dictionary and has contents, we reset the
469     // contents to the un-decrypted form.
470     if (pDict->IsSignatureDict() && dwSignValuePos) {
471       CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
472       m_Pos = dwSignValuePos;
473       pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false));
474     }
475 
476     FX_FILESIZE SavedPos = m_Pos;
477     CFX_ByteString nextword = GetNextWord(nullptr);
478     if (nextword != "stream") {
479       m_Pos = SavedPos;
480       return std::move(pDict);
481     }
482     return ReadStream(std::move(pDict), objnum, gennum);
483   }
484   if (word == ">>")
485     m_Pos = SavedObjPos;
486 
487   return nullptr;
488 }
489 
GetObjectForStrict(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum,uint32_t gennum)490 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectForStrict(
491     CPDF_IndirectObjectHolder* pObjList,
492     uint32_t objnum,
493     uint32_t gennum) {
494   CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
495   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
496     return nullptr;
497 
498   FX_FILESIZE SavedObjPos = m_Pos;
499   bool bIsNumber;
500   CFX_ByteString word = GetNextWord(&bIsNumber);
501   if (word.GetLength() == 0)
502     return nullptr;
503 
504   if (bIsNumber) {
505     FX_FILESIZE SavedPos = m_Pos;
506     CFX_ByteString nextword = GetNextWord(&bIsNumber);
507     if (bIsNumber) {
508       CFX_ByteString nextword2 = GetNextWord(nullptr);
509       if (nextword2 == "R") {
510         uint32_t objnum = FXSYS_atoui(word.c_str());
511         if (objnum == CPDF_Object::kInvalidObjNum)
512           return nullptr;
513         return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum);
514       }
515     }
516     m_Pos = SavedPos;
517     return pdfium::MakeUnique<CPDF_Number>(word.AsStringC());
518   }
519 
520   if (word == "true" || word == "false")
521     return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
522 
523   if (word == "null")
524     return pdfium::MakeUnique<CPDF_Null>();
525 
526   if (word == "(") {
527     CFX_ByteString str = ReadString();
528     if (m_pCryptoHandler)
529       m_pCryptoHandler->Decrypt(objnum, gennum, str);
530     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
531   }
532   if (word == "<") {
533     CFX_ByteString str = ReadHexString();
534     if (m_pCryptoHandler)
535       m_pCryptoHandler->Decrypt(objnum, gennum, str);
536     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
537   }
538   if (word == "[") {
539     std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>();
540     while (std::unique_ptr<CPDF_Object> pObj =
541                GetObject(pObjList, objnum, gennum, true)) {
542       pArray->Add(std::move(pObj));
543     }
544     return m_WordBuffer[0] == ']' ? std::move(pArray) : nullptr;
545   }
546   if (word[0] == '/') {
547     return pdfium::MakeUnique<CPDF_Name>(
548         m_pPool,
549         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
550   }
551   if (word == "<<") {
552     std::unique_ptr<CPDF_Dictionary> pDict =
553         pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
554     while (1) {
555       FX_FILESIZE SavedPos = m_Pos;
556       CFX_ByteString key = GetNextWord(nullptr);
557       if (key.IsEmpty())
558         return nullptr;
559 
560       if (key == ">>")
561         break;
562 
563       if (key == "endobj") {
564         m_Pos = SavedPos;
565         break;
566       }
567       if (key[0] != '/')
568         continue;
569 
570       key = PDF_NameDecode(key);
571       std::unique_ptr<CPDF_Object> obj(
572           GetObject(pObjList, objnum, gennum, true));
573       if (!obj) {
574         uint8_t ch;
575         while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
576           continue;
577         }
578         return nullptr;
579       }
580 
581       if (key.GetLength() > 1) {
582         pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1),
583                       std::move(obj));
584       }
585     }
586 
587     FX_FILESIZE SavedPos = m_Pos;
588     CFX_ByteString nextword = GetNextWord(nullptr);
589     if (nextword != "stream") {
590       m_Pos = SavedPos;
591       return std::move(pDict);
592     }
593     return ReadStream(std::move(pDict), objnum, gennum);
594   }
595   if (word == ">>")
596     m_Pos = SavedObjPos;
597 
598   return nullptr;
599 }
600 
ReadEOLMarkers(FX_FILESIZE pos)601 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
602   unsigned char byte1 = 0;
603   unsigned char byte2 = 0;
604 
605   GetCharAt(pos, byte1);
606   GetCharAt(pos + 1, byte2);
607 
608   if (byte1 == '\r' && byte2 == '\n')
609     return 2;
610 
611   if (byte1 == '\r' || byte1 == '\n')
612     return 1;
613 
614   return 0;
615 }
616 
ReadStream(std::unique_ptr<CPDF_Dictionary> pDict,uint32_t objnum,uint32_t gennum)617 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
618     std::unique_ptr<CPDF_Dictionary> pDict,
619     uint32_t objnum,
620     uint32_t gennum) {
621   CPDF_Object* pLenObj = pDict->GetObjectFor("Length");
622   FX_FILESIZE len = -1;
623   CPDF_Reference* pLenObjRef = ToReference(pLenObj);
624 
625   bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
626                                          pLenObjRef->GetRefObjNum() != objnum);
627   if (pLenObj && differingObjNum)
628     len = pLenObj->GetInteger();
629 
630   // Locate the start of stream.
631   ToNextLine();
632   FX_FILESIZE streamStartPos = m_Pos;
633 
634   const CFX_ByteStringC kEndStreamStr("endstream");
635   const CFX_ByteStringC kEndObjStr("endobj");
636 
637   CPDF_CryptoHandler* pCryptoHandler =
638       objnum == m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
639   if (!pCryptoHandler) {
640     bool bSearchForKeyword = true;
641     if (len >= 0) {
642       pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
643       pos += len;
644       if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
645         m_Pos = pos.ValueOrDie();
646 
647       m_Pos += ReadEOLMarkers(m_Pos);
648       FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
649       GetNextWordInternal(nullptr);
650       // Earlier version of PDF specification doesn't require EOL marker before
651       // 'endstream' keyword. If keyword 'endstream' follows the bytes in
652       // specified length, it signals the end of stream.
653       if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
654                        kEndStreamStr.GetLength()) == 0) {
655         bSearchForKeyword = false;
656       }
657     }
658 
659     if (bSearchForKeyword) {
660       // If len is not available, len needs to be calculated
661       // by searching the keywords "endstream" or "endobj".
662       m_Pos = streamStartPos;
663       FX_FILESIZE endStreamOffset = 0;
664       while (endStreamOffset >= 0) {
665         endStreamOffset = FindTag(kEndStreamStr, 0);
666 
667         // Can't find "endstream".
668         if (endStreamOffset < 0)
669           break;
670 
671         // Stop searching when "endstream" is found.
672         if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
673                         kEndStreamStr, true)) {
674           endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
675           break;
676         }
677       }
678 
679       m_Pos = streamStartPos;
680       FX_FILESIZE endObjOffset = 0;
681       while (endObjOffset >= 0) {
682         endObjOffset = FindTag(kEndObjStr, 0);
683 
684         // Can't find "endobj".
685         if (endObjOffset < 0)
686           break;
687 
688         // Stop searching when "endobj" is found.
689         if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
690                         true)) {
691           endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
692           break;
693         }
694       }
695 
696       // Can't find "endstream" or "endobj".
697       if (endStreamOffset < 0 && endObjOffset < 0)
698         return nullptr;
699 
700       if (endStreamOffset < 0 && endObjOffset >= 0) {
701         // Correct the position of end stream.
702         endStreamOffset = endObjOffset;
703       } else if (endStreamOffset >= 0 && endObjOffset < 0) {
704         // Correct the position of end obj.
705         endObjOffset = endStreamOffset;
706       } else if (endStreamOffset > endObjOffset) {
707         endStreamOffset = endObjOffset;
708       }
709       len = endStreamOffset;
710 
711       int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
712       if (numMarkers == 2) {
713         len -= 2;
714       } else {
715         numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
716         if (numMarkers == 1) {
717           len -= 1;
718         }
719       }
720       if (len < 0)
721         return nullptr;
722 
723       pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len));
724     }
725     m_Pos = streamStartPos;
726   }
727   if (len < 0)
728     return nullptr;
729 
730   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
731   if (len > 0) {
732     pData.reset(FX_Alloc(uint8_t, len));
733     ReadBlock(pData.get(), len);
734     if (pCryptoHandler) {
735       CFX_BinaryBuf dest_buf;
736       dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
737 
738       void* context = pCryptoHandler->DecryptStart(objnum, gennum);
739       pCryptoHandler->DecryptStream(context, pData.get(), len, dest_buf);
740       pCryptoHandler->DecryptFinish(context, dest_buf);
741       len = dest_buf.GetSize();
742       pData = dest_buf.DetachBuffer();
743     }
744   }
745 
746   auto pStream =
747       pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict));
748   streamStartPos = m_Pos;
749   FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
750   GetNextWordInternal(nullptr);
751 
752   int numMarkers = ReadEOLMarkers(m_Pos);
753   if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
754       numMarkers != 0 &&
755       FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(),
756                    kEndObjStr.GetLength()) == 0) {
757     m_Pos = streamStartPos;
758   }
759   return pStream;
760 }
761 
InitParser(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,uint32_t HeaderOffset)762 void CPDF_SyntaxParser::InitParser(
763     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
764     uint32_t HeaderOffset) {
765   FX_Free(m_pFileBuf);
766 
767   m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
768   m_HeaderOffset = HeaderOffset;
769   m_FileLen = pFileAccess->GetSize();
770   m_Pos = 0;
771   m_pFileAccess = pFileAccess;
772   m_BufOffset = 0;
773   pFileAccess->ReadBlock(m_pFileBuf, 0,
774                          std::min(m_BufSize, static_cast<uint32_t>(m_FileLen)));
775 }
776 
GetDirectNum()777 uint32_t CPDF_SyntaxParser::GetDirectNum() {
778   bool bIsNumber;
779   GetNextWordInternal(&bIsNumber);
780   if (!bIsNumber)
781     return 0;
782 
783   m_WordBuffer[m_WordSize] = 0;
784   return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
785 }
786 
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,const CFX_ByteStringC & tag,bool checkKeyword)787 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
788                                     FX_FILESIZE limit,
789                                     const CFX_ByteStringC& tag,
790                                     bool checkKeyword) {
791   const uint32_t taglen = tag.GetLength();
792 
793   bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
794   bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
795                      !PDFCharIsWhitespace(tag[taglen - 1]);
796 
797   uint8_t ch;
798   if (bCheckRight && startpos + (int32_t)taglen <= limit &&
799       GetCharAt(startpos + (int32_t)taglen, ch)) {
800     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
801         (checkKeyword && PDFCharIsDelimiter(ch))) {
802       return false;
803     }
804   }
805 
806   if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
807     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
808         (checkKeyword && PDFCharIsDelimiter(ch))) {
809       return false;
810     }
811   }
812   return true;
813 }
814 
815 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
816 // and drop the bool.
SearchWord(const CFX_ByteStringC & tag,bool bWholeWord,bool bForward,FX_FILESIZE limit)817 bool CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
818                                    bool bWholeWord,
819                                    bool bForward,
820                                    FX_FILESIZE limit) {
821   int32_t taglen = tag.GetLength();
822   if (taglen == 0)
823     return false;
824 
825   FX_FILESIZE pos = m_Pos;
826   int32_t offset = 0;
827   if (!bForward)
828     offset = taglen - 1;
829 
830   const uint8_t* tag_data = tag.raw_str();
831   uint8_t byte;
832   while (1) {
833     if (bForward) {
834       if (limit && pos >= m_Pos + limit)
835         return false;
836 
837       if (!GetCharAt(pos, byte))
838         return false;
839 
840     } else {
841       if (limit && pos <= m_Pos - limit)
842         return false;
843 
844       if (!GetCharAtBackward(pos, byte))
845         return false;
846     }
847 
848     if (byte == tag_data[offset]) {
849       if (bForward) {
850         offset++;
851         if (offset < taglen) {
852           pos++;
853           continue;
854         }
855       } else {
856         offset--;
857         if (offset >= 0) {
858           pos--;
859           continue;
860         }
861       }
862 
863       FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
864       if (!bWholeWord || IsWholeWord(startpos, limit, tag, false)) {
865         m_Pos = startpos;
866         return true;
867       }
868     }
869 
870     if (bForward) {
871       offset = byte == tag_data[0] ? 1 : 0;
872       pos++;
873     } else {
874       offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
875       pos--;
876     }
877 
878     if (pos < 0)
879       return false;
880   }
881 
882   return false;
883 }
884 
FindTag(const CFX_ByteStringC & tag,FX_FILESIZE limit)885 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
886                                        FX_FILESIZE limit) {
887   int32_t taglen = tag.GetLength();
888   int32_t match = 0;
889   limit += m_Pos;
890   FX_FILESIZE startpos = m_Pos;
891 
892   while (1) {
893     uint8_t ch;
894     if (!GetNextChar(ch))
895       return -1;
896 
897     if (ch == tag[match]) {
898       match++;
899       if (match == taglen)
900         return m_Pos - startpos - taglen;
901     } else {
902       match = ch == tag[0] ? 1 : 0;
903     }
904 
905     if (limit && m_Pos == limit)
906       return -1;
907   }
908   return -1;
909 }
910 
SetEncrypt(std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler)911 void CPDF_SyntaxParser::SetEncrypt(
912     std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
913   m_pCryptoHandler = std::move(pCryptoHandler);
914 }
915