1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
8 
9 #include <limits>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_read_validator.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
20 #include "core/fxcrt/cfx_bitstream.h"
21 #include "core/fxcrt/fx_safe_types.h"
22 #include "third_party/base/numerics/safe_conversions.h"
23 #include "third_party/base/ptr_util.h"
24 #include "third_party/base/span.h"
25 
26 namespace {
27 
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_UINT32 & bits)28 bool CanReadFromBitStream(const CFX_BitStream* hStream,
29                           const FX_SAFE_UINT32& bits) {
30   return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
31 }
32 
33 // Sanity check values from the page table header. The note in the PDF 1.7
34 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
35 // is not useful either.
IsValidPageOffsetHintTableBitCount(uint32_t bits)36 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
37   return bits > 0 && bits <= 32;
38 }
39 
40 }  // namespace
41 
42 CPDF_HintTables::PageInfo::PageInfo() = default;
43 CPDF_HintTables::PageInfo::~PageInfo() = default;
44 
45 //  static
Parse(CPDF_SyntaxParser * parser,CPDF_LinearizedHeader * pLinearized)46 std::unique_ptr<CPDF_HintTables> CPDF_HintTables::Parse(
47     CPDF_SyntaxParser* parser,
48     CPDF_LinearizedHeader* pLinearized) {
49   ASSERT(parser);
50   if (!pLinearized || pLinearized->GetPageCount() <= 1 ||
51       !pLinearized->HasHintTable()) {
52     return nullptr;
53   }
54 
55   const FX_FILESIZE szHintStart = pLinearized->GetHintStart();
56   const uint32_t szHintLength = pLinearized->GetHintLength();
57 
58   if (!parser->GetValidator()->CheckDataRangeAndRequestIfUnavailable(
59           szHintStart, szHintLength)) {
60     return nullptr;
61   }
62 
63   parser->SetPos(szHintStart);
64   RetainPtr<CPDF_Stream> hints_stream = ToStream(
65       parser->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose));
66 
67   if (!hints_stream)
68     return nullptr;
69 
70   auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(
71       parser->GetValidator().Get(), pLinearized);
72   if (!pHintTables->LoadHintStream(hints_stream.Get()))
73     return nullptr;
74 
75   return pHintTables;
76 }
77 
CPDF_HintTables(CPDF_ReadValidator * pValidator,CPDF_LinearizedHeader * pLinearized)78 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
79                                  CPDF_LinearizedHeader* pLinearized)
80     : m_pValidator(pValidator),
81       m_pLinearized(pLinearized),
82       m_nFirstPageSharedObjs(0),
83       m_szFirstPageObjOffset(0) {
84   ASSERT(m_pLinearized);
85 }
86 
~CPDF_HintTables()87 CPDF_HintTables::~CPDF_HintTables() {}
88 
ReadPageHintTable(CFX_BitStream * hStream)89 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
90   const uint32_t nPages = m_pLinearized->GetPageCount();
91   if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum)
92     return false;
93 
94   const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo();
95   if (nFirstPageNum >= nPages)
96     return false;
97 
98   if (!hStream || hStream->IsEOF())
99     return false;
100 
101   const uint32_t kHeaderSize = 288;
102   if (hStream->BitsRemaining() < kHeaderSize)
103     return false;
104 
105   // Item 1: The least number of objects in a page.
106   const uint32_t dwObjLeastNum = hStream->GetBits(32);
107   if (!dwObjLeastNum)
108     return false;
109 
110   // Item 2: The location of the first page's page object.
111   const FX_FILESIZE szFirstObjLoc =
112       HintsOffsetToFileOffset(hStream->GetBits(32));
113   if (!szFirstObjLoc)
114     return false;
115 
116   m_szFirstPageObjOffset = szFirstObjLoc;
117 
118   // Item 3: The number of bits needed to represent the difference
119   // between the greatest and least number of objects in a page.
120   const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
121   if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
122     return false;
123 
124   // Item 4: The least length of a page in bytes.
125   const uint32_t dwPageLeastLen = hStream->GetBits(32);
126   if (!dwPageLeastLen)
127     return false;
128 
129   // Item 5: The number of bits needed to represent the difference
130   // between the greatest and least length of a page, in bytes.
131   const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
132   if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
133     return false;
134 
135   // Skip Item 6, 7, 8, 9 total 96 bits.
136   hStream->SkipBits(96);
137 
138   // Item 10: The number of bits needed to represent the greatest
139   // number of shared object references.
140   const uint32_t dwSharedObjBits = hStream->GetBits(16);
141   if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
142     return false;
143 
144   // Item 11: The number of bits needed to represent the numerically
145   // greatest shared object identifier used by the pages.
146   const uint32_t dwSharedIdBits = hStream->GetBits(16);
147   if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
148     return false;
149 
150   // Item 12: The number of bits needed to represent the numerator of
151   // the fractional position for each shared object reference. For each
152   // shared object referenced from a page, there is an indication of
153   // where in the page's content stream the object is first referenced.
154   const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
155   if (dwSharedNumeratorBits > 32)
156     return false;
157 
158   // Item 13: Skip Item 13 which has 16 bits.
159   hStream->SkipBits(16);
160 
161   FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
162   required_bits *= nPages;
163   if (!CanReadFromBitStream(hStream, required_bits))
164     return false;
165 
166   m_PageInfos = std::vector<PageInfo>(nPages);
167   m_PageInfos[nFirstPageNum].set_start_obj_num(
168       m_pLinearized->GetFirstPageObjNum());
169   // The object number of remaining pages starts from 1.
170   uint32_t dwStartObjNum = 1;
171   for (uint32_t i = 0; i < nPages; ++i) {
172     FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
173     safeDeltaObj += dwObjLeastNum;
174     if (!safeDeltaObj.IsValid())
175       return false;
176     m_PageInfos[i].set_objects_count(safeDeltaObj.ValueOrDie());
177     if (i == nFirstPageNum)
178       continue;
179     m_PageInfos[i].set_start_obj_num(dwStartObjNum);
180     dwStartObjNum += m_PageInfos[i].objects_count();
181   }
182   hStream->ByteAlign();
183 
184   required_bits = dwDeltaPageLenBits;
185   required_bits *= nPages;
186   if (!CanReadFromBitStream(hStream, required_bits))
187     return false;
188 
189   for (uint32_t i = 0; i < nPages; ++i) {
190     FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
191     safePageLen += dwPageLeastLen;
192     if (!safePageLen.IsValid())
193       return false;
194     m_PageInfos[i].set_page_length(safePageLen.ValueOrDie());
195   }
196 
197   ASSERT(m_szFirstPageObjOffset);
198   m_PageInfos[nFirstPageNum].set_page_offset(m_szFirstPageObjOffset);
199   FX_FILESIZE prev_page_end = m_pLinearized->GetFirstPageEndOffset();
200   for (uint32_t i = 0; i < nPages; ++i) {
201     if (i == nFirstPageNum)
202       continue;
203     m_PageInfos[i].set_page_offset(prev_page_end);
204     prev_page_end += m_PageInfos[i].page_length();
205   }
206   hStream->ByteAlign();
207 
208   // Number of shared objects.
209   required_bits = dwSharedObjBits;
210   required_bits *= nPages;
211   if (!CanReadFromBitStream(hStream, required_bits))
212     return false;
213 
214   std::vector<uint32_t> dwNSharedObjsArray(nPages);
215   for (uint32_t i = 0; i < nPages; i++)
216     dwNSharedObjsArray[i] = hStream->GetBits(dwSharedObjBits);
217   hStream->ByteAlign();
218 
219   // Array of identifiers, size = nshared_objects.
220   for (uint32_t i = 0; i < nPages; i++) {
221     required_bits = dwSharedIdBits;
222     required_bits *= dwNSharedObjsArray[i];
223     if (!CanReadFromBitStream(hStream, required_bits))
224       return false;
225 
226     for (uint32_t j = 0; j < dwNSharedObjsArray[i]; j++)
227       m_PageInfos[i].AddIdentifier(hStream->GetBits(dwSharedIdBits));
228   }
229   hStream->ByteAlign();
230 
231   if (dwSharedNumeratorBits) {
232     for (uint32_t i = 0; i < nPages; i++) {
233       FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i];
234       safeSize *= dwSharedNumeratorBits;
235       if (!CanReadFromBitStream(hStream, safeSize))
236         return false;
237 
238       hStream->SkipBits(safeSize.ValueOrDie());
239     }
240     hStream->ByteAlign();
241   }
242 
243   FX_SAFE_UINT32 safeTotalPageLen = nPages;
244   safeTotalPageLen *= dwDeltaPageLenBits;
245   if (!CanReadFromBitStream(hStream, safeTotalPageLen))
246     return false;
247 
248   hStream->SkipBits(safeTotalPageLen.ValueOrDie());
249   hStream->ByteAlign();
250   return true;
251 }
252 
ReadSharedObjHintTable(CFX_BitStream * hStream,uint32_t offset)253 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
254                                              uint32_t offset) {
255   if (!hStream || hStream->IsEOF())
256     return false;
257 
258   FX_SAFE_UINT32 bit_offset = offset;
259   bit_offset *= 8;
260   if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
261     return false;
262   hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
263 
264   const uint32_t kHeaderSize = 192;
265   if (hStream->BitsRemaining() < kHeaderSize)
266     return false;
267 
268   // Item 1: The object number of the first object in the shared objects
269   // section.
270   uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
271   if (!dwFirstSharedObjNum)
272     return false;
273 
274   // Item 2: The location of the first object in the shared objects section.
275   const FX_FILESIZE szFirstSharedObjLoc =
276       HintsOffsetToFileOffset(hStream->GetBits(32));
277   if (!szFirstSharedObjLoc)
278     return false;
279 
280   // Item 3: The number of shared object entries for the first page.
281   m_nFirstPageSharedObjs = hStream->GetBits(32);
282 
283   // Item 4: The number of shared object entries for the shared objects
284   // section, including the number of shared object entries for the first page.
285   uint32_t dwSharedObjTotal = hStream->GetBits(32);
286 
287   // Item 5: The number of bits needed to represent the greatest number of
288   // objects in a shared object group.
289   uint32_t dwSharedObjNumBits = hStream->GetBits(16);
290   if (dwSharedObjNumBits > 32)
291     return false;
292 
293   // Item 6: The least length of a shared object group in bytes.
294   uint32_t dwGroupLeastLen = hStream->GetBits(32);
295 
296   // Item 7: The number of bits needed to represent the difference between the
297   // greatest and least length of a shared object group, in bytes.
298   uint32_t dwDeltaGroupLen = hStream->GetBits(16);
299 
300   // Trying to decode more than 32 bits isn't going to work when we write into
301   // a uint32_t. Decoding 0 bits also makes no sense.
302   if (!IsValidPageOffsetHintTableBitCount(dwDeltaGroupLen))
303     return false;
304 
305   if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
306       m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
307       dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
308     return false;
309   }
310 
311   FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
312   required_bits *= dwDeltaGroupLen;
313   if (!CanReadFromBitStream(hStream, required_bits))
314     return false;
315 
316   if (dwSharedObjTotal > 0) {
317     uint32_t dwLastSharedObj = dwSharedObjTotal - 1;
318     if (dwLastSharedObj > m_nFirstPageSharedObjs) {
319       FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
320       safeObjNum += dwLastSharedObj - m_nFirstPageSharedObjs;
321       if (!safeObjNum.IsValid())
322         return false;
323     }
324   }
325 
326   m_SharedObjGroupInfos.resize(dwSharedObjTotal);
327   // Table F.6 –  Shared object hint table, shared object group entries:
328   // Item 1: A number that, when added to the least shared object
329   // group length.
330   FX_SAFE_FILESIZE prev_shared_group_end_offset = m_szFirstPageObjOffset;
331   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
332     if (i == m_nFirstPageSharedObjs)
333       prev_shared_group_end_offset = szFirstSharedObjLoc;
334 
335     FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
336     safeObjLen += dwGroupLeastLen;
337     if (!safeObjLen.IsValid())
338       return false;
339 
340     m_SharedObjGroupInfos[i].m_dwLength = safeObjLen.ValueOrDie();
341     m_SharedObjGroupInfos[i].m_szOffset =
342         prev_shared_group_end_offset.ValueOrDie();
343     prev_shared_group_end_offset += m_SharedObjGroupInfos[i].m_dwLength;
344     if (!prev_shared_group_end_offset.IsValid())
345       return false;
346   }
347 
348   hStream->ByteAlign();
349   {
350     // Item 2: A flag indicating whether the shared object signature (item 3) is
351     // present.
352     uint32_t signature_count = 0;
353     for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
354       signature_count += hStream->GetBits(1);
355     }
356     hStream->ByteAlign();
357     // Item 3: (Only if item 2 is 1) The shared object signature, a 16-byte MD5
358     // hash that uniquely identifies the resource that the group of objects
359     // represents.
360     if (signature_count) {
361       required_bits = signature_count;
362       required_bits *= 128;
363       if (!CanReadFromBitStream(hStream, required_bits))
364         return false;
365 
366       hStream->SkipBits(required_bits.ValueOrDie());
367       hStream->ByteAlign();
368     }
369   }
370   // Item 4: A number equal to 1 less than the number of objects in the group.
371   FX_SAFE_UINT32 cur_obj_num = m_pLinearized->GetFirstPageObjNum();
372   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
373     if (i == m_nFirstPageSharedObjs)
374       cur_obj_num = dwFirstSharedObjNum;
375 
376     FX_SAFE_UINT32 obj_count =
377         dwSharedObjNumBits ? hStream->GetBits(dwSharedObjNumBits) : 0;
378     obj_count += 1;
379     if (!obj_count.IsValid())
380       return false;
381 
382     uint32_t obj_num = cur_obj_num.ValueOrDie();
383     cur_obj_num += obj_count.ValueOrDie();
384     if (!cur_obj_num.IsValid())
385       return false;
386 
387     m_SharedObjGroupInfos[i].m_dwStartObjNum = obj_num;
388     m_SharedObjGroupInfos[i].m_dwObjectsCount = obj_count.ValueOrDie();
389   }
390 
391   hStream->ByteAlign();
392   return true;
393 }
394 
GetPagePos(uint32_t index,FX_FILESIZE * szPageStartPos,FX_FILESIZE * szPageLength,uint32_t * dwObjNum) const395 bool CPDF_HintTables::GetPagePos(uint32_t index,
396                                  FX_FILESIZE* szPageStartPos,
397                                  FX_FILESIZE* szPageLength,
398                                  uint32_t* dwObjNum) const {
399   if (index >= m_pLinearized->GetPageCount())
400     return false;
401 
402   *szPageStartPos = m_PageInfos[index].page_offset();
403   *szPageLength = m_PageInfos[index].page_length();
404   *dwObjNum = m_PageInfos[index].start_obj_num();
405   return true;
406 }
407 
CheckPage(uint32_t index)408 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
409   if (index == m_pLinearized->GetFirstPageNo())
410     return CPDF_DataAvail::DataAvailable;
411 
412   if (index >= m_pLinearized->GetPageCount())
413     return CPDF_DataAvail::DataError;
414 
415   const uint32_t dwLength = m_PageInfos[index].page_length();
416   if (!dwLength)
417     return CPDF_DataAvail::DataError;
418 
419   if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
420           m_PageInfos[index].page_offset(), dwLength)) {
421     return CPDF_DataAvail::DataNotAvailable;
422   }
423 
424   // Download data of shared objects in the page.
425   for (const uint32_t dwIndex : m_PageInfos[index].Identifiers()) {
426     if (dwIndex >= m_SharedObjGroupInfos.size())
427       continue;
428     const SharedObjGroupInfo& shared_group_info =
429         m_SharedObjGroupInfos[dwIndex];
430 
431     if (!shared_group_info.m_szOffset || !shared_group_info.m_dwLength)
432       return CPDF_DataAvail::DataError;
433 
434     if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
435             shared_group_info.m_szOffset, shared_group_info.m_dwLength)) {
436       return CPDF_DataAvail::DataNotAvailable;
437     }
438   }
439   return CPDF_DataAvail::DataAvailable;
440 }
441 
LoadHintStream(CPDF_Stream * pHintStream)442 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
443   if (!pHintStream || !m_pLinearized->HasHintTable())
444     return false;
445 
446   CPDF_Dictionary* pDict = pHintStream->GetDict();
447   CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr;
448   if (!pOffset || !pOffset->IsNumber())
449     return false;
450 
451   int shared_hint_table_offset = pOffset->GetInteger();
452   if (shared_hint_table_offset <= 0)
453     return false;
454 
455   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pHintStream);
456   pAcc->LoadAllDataFiltered();
457 
458   uint32_t size = pAcc->GetSize();
459   // The header section of page offset hint table is 36 bytes.
460   // The header section of shared object hint table is 24 bytes.
461   // Hint table has at least 60 bytes.
462   const uint32_t kMinStreamLength = 60;
463   if (size < kMinStreamLength)
464     return false;
465 
466   FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
467   if (!safe_shared_hint_table_offset.IsValid() ||
468       size < safe_shared_hint_table_offset.ValueOrDie()) {
469     return false;
470   }
471 
472   CFX_BitStream bs(pAcc->GetSpan().subspan(0, size));
473   return ReadPageHintTable(&bs) &&
474          ReadSharedObjHintTable(&bs, shared_hint_table_offset);
475 }
476 
HintsOffsetToFileOffset(uint32_t hints_offset) const477 FX_FILESIZE CPDF_HintTables::HintsOffsetToFileOffset(
478     uint32_t hints_offset) const {
479   FX_SAFE_FILESIZE file_offset = hints_offset;
480   if (!file_offset.IsValid())
481     return 0;
482 
483   // The resulting positions shall be interpreted as if the primary hint stream
484   // itself were not present. That is, a position greater than the hint stream
485   // offset shall have the hint stream length added to it to determine the
486   // actual offset relative to the beginning of the file.
487   // See specification PDF 32000-1:2008 Annex F.4 (Hint tables).
488   // Note: The PDF spec does not mention this, but positions equal to the hint
489   // stream offset also need to have the hint stream length added to it. e.g.
490   // There exists linearized PDFs generated by Adobe software that have this
491   // property.
492   if (file_offset.ValueOrDie() >= m_pLinearized->GetHintStart())
493     file_offset += m_pLinearized->GetHintLength();
494 
495   return file_offset.ValueOrDefault(0);
496 }
497