1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
8 
9 #include <algorithm>
10 #include <sstream>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_boolean.h"
16 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_name.h"
19 #include "core/fpdfapi/parser/cpdf_null.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_read_validator.h"
22 #include "core/fpdfapi/parser/cpdf_reference.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_string.h"
25 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
26 #include "core/fxcrt/autorestorer.h"
27 #include "core/fxcrt/cfx_binarybuf.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "third_party/base/numerics/safe_math.h"
31 #include "third_party/base/ptr_util.h"
32 
33 namespace {
34 
35 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
36 
37 class ReadableSubStream final : public IFX_SeekableReadStream {
38  public:
ReadableSubStream(const RetainPtr<IFX_SeekableReadStream> & pFileRead,FX_FILESIZE part_offset,FX_FILESIZE part_size)39   ReadableSubStream(const RetainPtr<IFX_SeekableReadStream>& pFileRead,
40                     FX_FILESIZE part_offset,
41                     FX_FILESIZE part_size)
42       : m_pFileRead(pFileRead),
43         m_PartOffset(part_offset),
44         m_PartSize(part_size) {}
45 
46   ~ReadableSubStream() override = default;
47 
48   // IFX_SeekableReadStream overrides:
ReadBlockAtOffset(void * buffer,FX_FILESIZE offset,size_t size)49   bool ReadBlockAtOffset(void* buffer,
50                          FX_FILESIZE offset,
51                          size_t size) override {
52     FX_SAFE_FILESIZE safe_end = offset;
53     safe_end += size;
54     // Check that requested range is valid, to prevent calling of ReadBlock
55     // of original m_pFileRead with incorrect params.
56     if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize)
57       return false;
58 
59     return m_pFileRead->ReadBlockAtOffset(buffer, m_PartOffset + offset, size);
60   }
61 
GetSize()62   FX_FILESIZE GetSize() override { return m_PartSize; }
63 
64  private:
65   RetainPtr<IFX_SeekableReadStream> m_pFileRead;
66   FX_FILESIZE m_PartOffset;
67   FX_FILESIZE m_PartSize;
68 };
69 
70 }  // namespace
71 
72 // static
73 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
74 
75 // static
CreateForTesting(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,FX_FILESIZE HeaderOffset)76 std::unique_ptr<CPDF_SyntaxParser> CPDF_SyntaxParser::CreateForTesting(
77     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
78     FX_FILESIZE HeaderOffset) {
79   return pdfium::MakeUnique<CPDF_SyntaxParser>(
80       pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr),
81       HeaderOffset);
82 }
83 
CPDF_SyntaxParser(const RetainPtr<IFX_SeekableReadStream> & pFileAccess)84 CPDF_SyntaxParser::CPDF_SyntaxParser(
85     const RetainPtr<IFX_SeekableReadStream>& pFileAccess)
86     : CPDF_SyntaxParser(
87           pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr),
88           0) {}
89 
CPDF_SyntaxParser(const RetainPtr<CPDF_ReadValidator> & validator,FX_FILESIZE HeaderOffset)90 CPDF_SyntaxParser::CPDF_SyntaxParser(
91     const RetainPtr<CPDF_ReadValidator>& validator,
92     FX_FILESIZE HeaderOffset)
93     : m_pFileAccess(validator),
94       m_HeaderOffset(HeaderOffset),
95       m_FileLen(m_pFileAccess->GetSize()) {
96   ASSERT(m_HeaderOffset <= m_FileLen);
97 }
98 
99 CPDF_SyntaxParser::~CPDF_SyntaxParser() = default;
100 
GetCharAt(FX_FILESIZE pos,uint8_t & ch)101 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
102   AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
103   m_Pos = pos;
104   return GetNextChar(ch);
105 }
106 
ReadBlockAt(FX_FILESIZE read_pos)107 bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) {
108   if (read_pos >= m_FileLen)
109     return false;
110   size_t read_size = m_ReadBufferSize;
111   FX_SAFE_FILESIZE safe_end = read_pos;
112   safe_end += read_size;
113   if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
114     read_size = m_FileLen - read_pos;
115 
116   m_pFileBuf.resize(read_size);
117   if (!m_pFileAccess->ReadBlockAtOffset(m_pFileBuf.data(), read_pos,
118                                         read_size)) {
119     m_pFileBuf.clear();
120     return false;
121   }
122 
123   m_BufOffset = read_pos;
124   return true;
125 }
126 
GetNextChar(uint8_t & ch)127 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
128   FX_FILESIZE pos = m_Pos + m_HeaderOffset;
129   if (pos >= m_FileLen)
130     return false;
131 
132   if (!IsPositionRead(pos) && !ReadBlockAt(pos))
133     return false;
134 
135   ch = m_pFileBuf[pos - m_BufOffset];
136   m_Pos++;
137   return true;
138 }
139 
GetDocumentSize() const140 FX_FILESIZE CPDF_SyntaxParser::GetDocumentSize() const {
141   return m_FileLen - m_HeaderOffset;
142 }
143 
GetCharAtBackward(FX_FILESIZE pos,uint8_t * ch)144 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) {
145   pos += m_HeaderOffset;
146   if (pos >= m_FileLen)
147     return false;
148 
149   if (!IsPositionRead(pos)) {
150     FX_FILESIZE block_start = 0;
151     if (pos >= CPDF_Stream::kFileBufSize)
152       block_start = pos - CPDF_Stream::kFileBufSize + 1;
153     if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
154       return false;
155   }
156   *ch = m_pFileBuf[pos - m_BufOffset];
157   return true;
158 }
159 
ReadBlock(uint8_t * pBuf,uint32_t size)160 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
161   if (!m_pFileAccess->ReadBlockAtOffset(pBuf, m_Pos + m_HeaderOffset, size))
162     return false;
163   m_Pos += size;
164   return true;
165 }
166 
GetNextWordInternal(bool * bIsNumber)167 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
168   m_WordSize = 0;
169   if (bIsNumber)
170     *bIsNumber = true;
171 
172   ToNextWord();
173   uint8_t ch;
174   if (!GetNextChar(ch))
175     return;
176 
177   if (PDFCharIsDelimiter(ch)) {
178     if (bIsNumber)
179       *bIsNumber = false;
180 
181     m_WordBuffer[m_WordSize++] = ch;
182     if (ch == '/') {
183       while (1) {
184         if (!GetNextChar(ch))
185           return;
186 
187         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
188           m_Pos--;
189           return;
190         }
191 
192         if (m_WordSize < sizeof(m_WordBuffer) - 1)
193           m_WordBuffer[m_WordSize++] = ch;
194       }
195     } else if (ch == '<') {
196       if (!GetNextChar(ch))
197         return;
198 
199       if (ch == '<')
200         m_WordBuffer[m_WordSize++] = ch;
201       else
202         m_Pos--;
203     } else if (ch == '>') {
204       if (!GetNextChar(ch))
205         return;
206 
207       if (ch == '>')
208         m_WordBuffer[m_WordSize++] = ch;
209       else
210         m_Pos--;
211     }
212     return;
213   }
214 
215   while (1) {
216     if (m_WordSize < sizeof(m_WordBuffer) - 1)
217       m_WordBuffer[m_WordSize++] = ch;
218 
219     if (!PDFCharIsNumeric(ch)) {
220       if (bIsNumber)
221         *bIsNumber = false;
222     }
223 
224     if (!GetNextChar(ch))
225       return;
226 
227     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
228       m_Pos--;
229       break;
230     }
231   }
232 }
233 
ReadString()234 ByteString CPDF_SyntaxParser::ReadString() {
235   uint8_t ch;
236   if (!GetNextChar(ch))
237     return ByteString();
238 
239   std::ostringstream buf;
240   int32_t parlevel = 0;
241   ReadStatus status = ReadStatus::Normal;
242   int32_t iEscCode = 0;
243   while (1) {
244     switch (status) {
245       case ReadStatus::Normal:
246         if (ch == ')') {
247           if (parlevel == 0)
248             return ByteString(buf);
249           parlevel--;
250         } else if (ch == '(') {
251           parlevel++;
252         }
253         if (ch == '\\')
254           status = ReadStatus::Backslash;
255         else
256           buf << static_cast<char>(ch);
257         break;
258       case ReadStatus::Backslash:
259         if (FXSYS_IsOctalDigit(ch)) {
260           iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
261           status = ReadStatus::Octal;
262           break;
263         }
264 
265         if (ch == '\r') {
266           status = ReadStatus::CarriageReturn;
267           break;
268         }
269         if (ch == 'n') {
270           buf << '\n';
271         } else if (ch == 'r') {
272           buf << '\r';
273         } else if (ch == 't') {
274           buf << '\t';
275         } else if (ch == 'b') {
276           buf << '\b';
277         } else if (ch == 'f') {
278           buf << '\f';
279         } else if (ch != '\n') {
280           buf << static_cast<char>(ch);
281         }
282         status = ReadStatus::Normal;
283         break;
284       case ReadStatus::Octal:
285         if (FXSYS_IsOctalDigit(ch)) {
286           iEscCode =
287               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
288           status = ReadStatus::FinishOctal;
289         } else {
290           buf << static_cast<char>(iEscCode);
291           status = ReadStatus::Normal;
292           continue;
293         }
294         break;
295       case ReadStatus::FinishOctal:
296         status = ReadStatus::Normal;
297         if (FXSYS_IsOctalDigit(ch)) {
298           iEscCode =
299               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
300           buf << static_cast<char>(iEscCode);
301         } else {
302           buf << static_cast<char>(iEscCode);
303           continue;
304         }
305         break;
306       case ReadStatus::CarriageReturn:
307         status = ReadStatus::Normal;
308         if (ch != '\n')
309           continue;
310         break;
311     }
312 
313     if (!GetNextChar(ch))
314       break;
315   }
316 
317   GetNextChar(ch);
318   return ByteString(buf);
319 }
320 
ReadHexString()321 ByteString CPDF_SyntaxParser::ReadHexString() {
322   uint8_t ch;
323   if (!GetNextChar(ch))
324     return ByteString();
325 
326   std::ostringstream buf;
327   bool bFirst = true;
328   uint8_t code = 0;
329   while (1) {
330     if (ch == '>')
331       break;
332 
333     if (std::isxdigit(ch)) {
334       int val = FXSYS_HexCharToInt(ch);
335       if (bFirst) {
336         code = val * 16;
337       } else {
338         code += val;
339         buf << static_cast<char>(code);
340       }
341       bFirst = !bFirst;
342     }
343 
344     if (!GetNextChar(ch))
345       break;
346   }
347   if (!bFirst)
348     buf << static_cast<char>(code);
349 
350   return ByteString(buf);
351 }
352 
ToNextLine()353 void CPDF_SyntaxParser::ToNextLine() {
354   uint8_t ch;
355   while (GetNextChar(ch)) {
356     if (ch == '\n')
357       break;
358 
359     if (ch == '\r') {
360       GetNextChar(ch);
361       if (ch != '\n')
362         --m_Pos;
363       break;
364     }
365   }
366 }
367 
ToNextWord()368 void CPDF_SyntaxParser::ToNextWord() {
369   uint8_t ch;
370   if (!GetNextChar(ch))
371     return;
372 
373   while (1) {
374     while (PDFCharIsWhitespace(ch)) {
375       if (!GetNextChar(ch))
376         return;
377     }
378 
379     if (ch != '%')
380       break;
381 
382     while (1) {
383       if (!GetNextChar(ch))
384         return;
385       if (PDFCharIsLineEnding(ch))
386         break;
387     }
388   }
389   m_Pos--;
390 }
391 
GetNextWord(bool * bIsNumber)392 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
393   const CPDF_ReadValidator::Session read_session(GetValidator());
394   GetNextWordInternal(bIsNumber);
395   ByteString ret;
396   if (!GetValidator()->has_read_problems())
397     ret = ByteString(m_WordBuffer, m_WordSize);
398   return ret;
399 }
400 
PeekNextWord(bool * bIsNumber)401 ByteString CPDF_SyntaxParser::PeekNextWord(bool* bIsNumber) {
402   AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
403   return GetNextWord(bIsNumber);
404 }
405 
GetKeyword()406 ByteString CPDF_SyntaxParser::GetKeyword() {
407   return GetNextWord(nullptr);
408 }
409 
SetPos(FX_FILESIZE pos)410 void CPDF_SyntaxParser::SetPos(FX_FILESIZE pos) {
411   m_Pos = std::min(pos, m_FileLen);
412 }
413 
GetObjectBody(CPDF_IndirectObjectHolder * pObjList)414 RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody(
415     CPDF_IndirectObjectHolder* pObjList) {
416   const CPDF_ReadValidator::Session read_session(GetValidator());
417   auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose);
418   if (GetValidator()->has_read_problems())
419     return nullptr;
420   return result;
421 }
422 
GetObjectBodyInternal(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)423 RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal(
424     CPDF_IndirectObjectHolder* pObjList,
425     ParseType parse_type) {
426   AutoRestorer<int> depth_restorer(&s_CurrentRecursionDepth);
427   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
428     return nullptr;
429 
430   FX_FILESIZE SavedObjPos = m_Pos;
431   bool bIsNumber;
432   ByteString word = GetNextWord(&bIsNumber);
433   if (word.IsEmpty())
434     return nullptr;
435 
436   if (bIsNumber) {
437     AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
438     ByteString nextword = GetNextWord(&bIsNumber);
439     if (!bIsNumber)
440       return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
441 
442     ByteString nextword2 = GetNextWord(nullptr);
443     if (nextword2 != "R")
444       return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
445 
446     pos_restorer.AbandonRestoration();
447     uint32_t refnum = FXSYS_atoui(word.c_str());
448     if (refnum == CPDF_Object::kInvalidObjNum)
449       return nullptr;
450 
451     return pdfium::MakeRetain<CPDF_Reference>(pObjList, refnum);
452   }
453 
454   if (word == "true" || word == "false")
455     return pdfium::MakeRetain<CPDF_Boolean>(word == "true");
456 
457   if (word == "null")
458     return pdfium::MakeRetain<CPDF_Null>();
459 
460   if (word == "(") {
461     ByteString str = ReadString();
462     return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
463   }
464   if (word == "<") {
465     ByteString str = ReadHexString();
466     return pdfium::MakeRetain<CPDF_String>(m_pPool, str, true);
467   }
468   if (word == "[") {
469     auto pArray = pdfium::MakeRetain<CPDF_Array>();
470     while (RetainPtr<CPDF_Object> pObj =
471                GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
472       pArray->Append(std::move(pObj));
473     }
474     return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']')
475                ? std::move(pArray)
476                : nullptr;
477   }
478   if (word[0] == '/') {
479     return pdfium::MakeRetain<CPDF_Name>(
480         m_pPool,
481         PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)));
482   }
483   if (word == "<<") {
484     RetainPtr<CPDF_Dictionary> pDict =
485         pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
486     while (1) {
487       ByteString inner_word = GetNextWord(nullptr);
488       if (inner_word.IsEmpty())
489         return nullptr;
490 
491       FX_FILESIZE SavedPos = m_Pos - inner_word.GetLength();
492       if (inner_word == ">>")
493         break;
494 
495       if (inner_word == "endobj") {
496         m_Pos = SavedPos;
497         break;
498       }
499       if (inner_word[0] != '/')
500         continue;
501 
502       ByteString key = PDF_NameDecode(inner_word.AsStringView());
503       if (key.IsEmpty() && parse_type == ParseType::kLoose)
504         continue;
505 
506       RetainPtr<CPDF_Object> pObj =
507           GetObjectBodyInternal(pObjList, ParseType::kLoose);
508       if (!pObj) {
509         if (parse_type == ParseType::kLoose)
510           continue;
511 
512         ToNextLine();
513         return nullptr;
514       }
515 
516       if (!key.IsEmpty()) {
517         ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
518         pDict->SetFor(keyNoSlash, std::move(pObj));
519       }
520     }
521 
522     AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
523     if (GetNextWord(nullptr) != "stream")
524       return pDict;
525     pos_restorer.AbandonRestoration();
526     return ReadStream(std::move(pDict));
527   }
528   if (word == ">>")
529     m_Pos = SavedObjPos;
530 
531   return nullptr;
532 }
533 
GetIndirectObject(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)534 RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject(
535     CPDF_IndirectObjectHolder* pObjList,
536     ParseType parse_type) {
537   const CPDF_ReadValidator::Session read_session(GetValidator());
538   const FX_FILESIZE saved_pos = GetPos();
539   bool is_number = false;
540   ByteString word = GetNextWord(&is_number);
541   if (!is_number || word.IsEmpty()) {
542     SetPos(saved_pos);
543     return nullptr;
544   }
545   const uint32_t parser_objnum = FXSYS_atoui(word.c_str());
546 
547   word = GetNextWord(&is_number);
548   if (!is_number || word.IsEmpty()) {
549     SetPos(saved_pos);
550     return nullptr;
551   }
552   const uint32_t parser_gennum = FXSYS_atoui(word.c_str());
553 
554   if (GetKeyword() != "obj") {
555     SetPos(saved_pos);
556     return nullptr;
557   }
558 
559   RetainPtr<CPDF_Object> pObj = GetObjectBodyInternal(pObjList, parse_type);
560   if (pObj) {
561     pObj->SetObjNum(parser_objnum);
562     pObj->SetGenNum(parser_gennum);
563   }
564 
565   return GetValidator()->has_read_problems() ? nullptr : std::move(pObj);
566 }
567 
ReadEOLMarkers(FX_FILESIZE pos)568 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
569   unsigned char byte1 = 0;
570   unsigned char byte2 = 0;
571 
572   GetCharAt(pos, byte1);
573   GetCharAt(pos + 1, byte2);
574 
575   if (byte1 == '\r' && byte2 == '\n')
576     return 2;
577 
578   if (byte1 == '\r' || byte1 == '\n')
579     return 1;
580 
581   return 0;
582 }
583 
FindWordPos(ByteStringView word)584 FX_FILESIZE CPDF_SyntaxParser::FindWordPos(ByteStringView word) {
585   AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
586   FX_FILESIZE end_offset = FindTag(word);
587   while (end_offset >= 0) {
588     // Stop searching when word is found.
589     if (IsWholeWord(GetPos() - word.GetLength(), m_FileLen, word, true))
590       return GetPos() - word.GetLength();
591 
592     end_offset = FindTag(word);
593   }
594   return -1;
595 }
596 
FindStreamEndPos()597 FX_FILESIZE CPDF_SyntaxParser::FindStreamEndPos() {
598   const ByteStringView kEndStreamStr("endstream");
599   const ByteStringView kEndObjStr("endobj");
600 
601   FX_FILESIZE endStreamWordOffset = FindWordPos(kEndStreamStr);
602   FX_FILESIZE endObjWordOffset = FindWordPos(kEndObjStr);
603 
604   // Can't find "endstream" or "endobj".
605   if (endStreamWordOffset < 0 && endObjWordOffset < 0) {
606     return -1;
607   }
608 
609   if (endStreamWordOffset < 0 && endObjWordOffset >= 0) {
610     // Correct the position of end stream.
611     endStreamWordOffset = endObjWordOffset;
612   } else if (endStreamWordOffset >= 0 && endObjWordOffset < 0) {
613     // Correct the position of end obj.
614     endObjWordOffset = endStreamWordOffset;
615   } else if (endStreamWordOffset > endObjWordOffset) {
616     endStreamWordOffset = endObjWordOffset;
617   }
618 
619   int numMarkers = ReadEOLMarkers(endStreamWordOffset - 2);
620   if (numMarkers == 2) {
621     endStreamWordOffset -= 2;
622   } else {
623     numMarkers = ReadEOLMarkers(endStreamWordOffset - 1);
624     if (numMarkers == 1) {
625       endStreamWordOffset -= 1;
626     }
627   }
628   if (endStreamWordOffset < GetPos()) {
629     return -1;
630   }
631   return endStreamWordOffset;
632 }
633 
ReadStream(RetainPtr<CPDF_Dictionary> pDict)634 RetainPtr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
635     RetainPtr<CPDF_Dictionary> pDict) {
636   const CPDF_Number* pLenObj = ToNumber(pDict->GetDirectObjectFor("Length"));
637   FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
638 
639   // Locate the start of stream.
640   ToNextLine();
641   const FX_FILESIZE streamStartPos = GetPos();
642 
643   if (len > 0) {
644     FX_SAFE_FILESIZE pos = GetPos();
645     pos += len;
646     if (!pos.IsValid() || pos.ValueOrDie() >= m_FileLen)
647       len = -1;
648   }
649 
650   RetainPtr<IFX_SeekableReadStream> data;
651   if (len > 0) {
652     // Check data availability first to allow the Validator to request data
653     // smoothly, without jumps.
654     if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
655             m_HeaderOffset + GetPos(), len)) {
656       return nullptr;
657     }
658 
659     data = pdfium::MakeRetain<ReadableSubStream>(
660         GetValidator(), m_HeaderOffset + GetPos(), len);
661     SetPos(GetPos() + len);
662   }
663 
664   const ByteStringView kEndStreamStr("endstream");
665   const ByteStringView kEndObjStr("endobj");
666 
667   // Note, we allow zero length streams as we need to pass them through when we
668   // are importing pages into a new document.
669   if (len >= 0) {
670     const CPDF_ReadValidator::Session read_session(GetValidator());
671     m_Pos += ReadEOLMarkers(GetPos());
672     memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
673     GetNextWordInternal(nullptr);
674     if (GetValidator()->has_read_problems())
675       return nullptr;
676 
677     // Earlier version of PDF specification doesn't require EOL marker before
678     // 'endstream' keyword. If keyword 'endstream' follows the bytes in
679     // specified length, it signals the end of stream.
680     if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
681                kEndStreamStr.GetLength()) != 0) {
682       data.Reset();
683       len = -1;
684       SetPos(streamStartPos);
685     }
686   }
687 
688   if (len < 0) {
689     // If len is not available or incorrect, len needs to be calculated
690     // by searching the keywords "endstream" or "endobj".
691     const FX_FILESIZE streamEndPos = FindStreamEndPos();
692     if (streamEndPos < 0)
693       return nullptr;
694 
695     len = streamEndPos - streamStartPos;
696     ASSERT(len >= 0);
697     if (len > 0) {
698       SetPos(streamStartPos);
699       // Check data availability first to allow the Validator to request data
700       // smoothly, without jumps.
701       if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
702               m_HeaderOffset + GetPos(), len)) {
703         return nullptr;
704       }
705 
706       data = pdfium::MakeRetain<ReadableSubStream>(
707           GetValidator(), m_HeaderOffset + GetPos(), len);
708       SetPos(GetPos() + len);
709     }
710   }
711 
712   auto pStream = pdfium::MakeRetain<CPDF_Stream>();
713   if (data) {
714     pStream->InitStreamFromFile(data, std::move(pDict));
715   } else {
716     DCHECK(!len);
717     pStream->InitStream({}, std::move(pDict));  // Empty stream
718   }
719   const FX_FILESIZE end_stream_offset = GetPos();
720   memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
721   GetNextWordInternal(nullptr);
722 
723   int numMarkers = ReadEOLMarkers(GetPos());
724   if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
725       numMarkers != 0 &&
726       memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) {
727     SetPos(end_stream_offset);
728   }
729   return pStream;
730 }
731 
GetDirectNum()732 uint32_t CPDF_SyntaxParser::GetDirectNum() {
733   bool bIsNumber;
734   GetNextWordInternal(&bIsNumber);
735   if (!bIsNumber)
736     return 0;
737 
738   m_WordBuffer[m_WordSize] = 0;
739   return FXSYS_atoui(reinterpret_cast<const char*>(m_WordBuffer));
740 }
741 
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,ByteStringView tag,bool checkKeyword)742 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
743                                     FX_FILESIZE limit,
744                                     ByteStringView tag,
745                                     bool checkKeyword) {
746   const uint32_t taglen = tag.GetLength();
747 
748   bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
749   bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
750                      !PDFCharIsWhitespace(tag[taglen - 1]);
751 
752   uint8_t ch;
753   if (bCheckRight && startpos + (int32_t)taglen <= limit &&
754       GetCharAt(startpos + (int32_t)taglen, ch)) {
755     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
756         (checkKeyword && PDFCharIsDelimiter(ch))) {
757       return false;
758     }
759   }
760 
761   if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
762     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
763         (checkKeyword && PDFCharIsDelimiter(ch))) {
764       return false;
765     }
766   }
767   return true;
768 }
769 
BackwardsSearchToWord(ByteStringView word,FX_FILESIZE limit)770 bool CPDF_SyntaxParser::BackwardsSearchToWord(ByteStringView word,
771                                               FX_FILESIZE limit) {
772   int32_t taglen = word.GetLength();
773   if (taglen == 0)
774     return false;
775 
776   FX_FILESIZE pos = m_Pos;
777   int32_t offset = taglen - 1;
778   while (1) {
779     if (limit && pos <= m_Pos - limit)
780       return false;
781 
782     uint8_t byte;
783     if (!GetCharAtBackward(pos, &byte))
784       return false;
785 
786     if (byte == word[offset]) {
787       offset--;
788       if (offset >= 0) {
789         pos--;
790         continue;
791       }
792       if (IsWholeWord(pos, limit, word, false)) {
793         m_Pos = pos;
794         return true;
795       }
796     }
797     offset = byte == word[taglen - 1] ? taglen - 2 : taglen - 1;
798     pos--;
799     if (pos < 0)
800       return false;
801   }
802 }
803 
FindTag(ByteStringView tag)804 FX_FILESIZE CPDF_SyntaxParser::FindTag(ByteStringView tag) {
805   const FX_FILESIZE startpos = GetPos();
806   const int32_t taglen = tag.GetLength();
807   ASSERT(taglen > 0);
808 
809   int32_t match = 0;
810   while (1) {
811     uint8_t ch;
812     if (!GetNextChar(ch))
813       return -1;
814 
815     if (ch == tag[match]) {
816       match++;
817       if (match == taglen)
818         return GetPos() - startpos - taglen;
819     } else {
820       match = ch == tag[0] ? 1 : 0;
821     }
822   }
823   return -1;
824 }
825 
IsPositionRead(FX_FILESIZE pos) const826 bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const {
827   return m_BufOffset <= pos &&
828          pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
829 }
830