1 #include <qpdf/QPDF_Stream.hh>
2
3 #include <qpdf/QUtil.hh>
4 #include <qpdf/Pipeline.hh>
5 #include <qpdf/Pl_Flate.hh>
6 #include <qpdf/Pl_Buffer.hh>
7 #include <qpdf/Pl_Count.hh>
8 #include <qpdf/ContentNormalizer.hh>
9 #include <qpdf/QTC.hh>
10 #include <qpdf/QPDF.hh>
11 #include <qpdf/QPDFExc.hh>
12 #include <qpdf/Pl_QPDFTokenizer.hh>
13 #include <qpdf/QIntC.hh>
14 #include <qpdf/SF_FlateLzwDecode.hh>
15 #include <qpdf/SF_DCTDecode.hh>
16 #include <qpdf/SF_RunLengthDecode.hh>
17 #include <qpdf/SF_ASCII85Decode.hh>
18 #include <qpdf/SF_ASCIIHexDecode.hh>
19
20 #include <stdexcept>
21
22 class SF_Crypt: public QPDFStreamFilter
23 {
24 public:
25 SF_Crypt() = default;
26 virtual ~SF_Crypt() = default;
27
setDecodeParms(QPDFObjectHandle decode_parms)28 virtual bool setDecodeParms(QPDFObjectHandle decode_parms)
29 {
30 if (decode_parms.isNull())
31 {
32 return true;
33 }
34 bool filterable = true;
35 for (auto const& key: decode_parms.getKeys())
36 {
37 if (((key == "/Type") || (key == "/Name")) &&
38 (decode_parms.getKey("/Type").isNull() ||
39 (decode_parms.getKey("/Type").isName() &&
40 (decode_parms.getKey("/Type").getName() ==
41 "/CryptFilterDecodeParms"))))
42 {
43 // we handle this in decryptStream
44 }
45 else
46 {
47 filterable = false;
48 }
49 }
50 return filterable;
51 }
52
getDecodePipeline(Pipeline *)53 virtual Pipeline* getDecodePipeline(Pipeline*)
54 {
55 // Not used -- handled by pipeStreamData
56 return nullptr;
57 }
58 };
59
60 std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
61 // The PDF specification provides these filter abbreviations for
62 // use in inline images, but according to table H.1 in the pre-ISO
63 // versions of the PDF specification, Adobe Reader also accepts
64 // them for stream filters.
65 {"/AHx", "/ASCIIHexDecode"},
66 {"/A85", "/ASCII85Decode"},
67 {"/LZW", "/LZWDecode"},
68 {"/Fl", "/FlateDecode"},
69 {"/RL", "/RunLengthDecode"},
70 {"/CCF", "/CCITTFaxDecode"},
71 {"/DCT", "/DCTDecode"},
72 };
73
74 std::map<
75 std::string,
76 std::function<std::shared_ptr<QPDFStreamFilter>()>>
77 QPDF_Stream::filter_factories = {
__anon84f501840102() 78 {"/Crypt", []() { return std::make_shared<SF_Crypt>(); }},
79 {"/FlateDecode", SF_FlateLzwDecode::flate_factory},
80 {"/LZWDecode", SF_FlateLzwDecode::lzw_factory},
81 {"/RunLengthDecode", SF_RunLengthDecode::factory},
82 {"/DCTDecode", SF_DCTDecode::factory},
83 {"/ASCII85Decode", SF_ASCII85Decode::factory},
84 {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
85 };
86
QPDF_Stream(QPDF * qpdf,int objid,int generation,QPDFObjectHandle stream_dict,qpdf_offset_t offset,size_t length)87 QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
88 QPDFObjectHandle stream_dict,
89 qpdf_offset_t offset, size_t length) :
90 qpdf(qpdf),
91 objid(objid),
92 generation(generation),
93 filter_on_write(true),
94 stream_dict(stream_dict),
95 offset(offset),
96 length(length)
97 {
98 if (! stream_dict.isDictionary())
99 {
100 throw std::logic_error(
101 "stream object instantiated with non-dictionary "
102 "object for dictionary");
103 }
104 setStreamDescription();
105 }
106
~QPDF_Stream()107 QPDF_Stream::~QPDF_Stream()
108 {
109 }
110
111 void
registerStreamFilter(std::string const & filter_name,std::function<std::shared_ptr<QPDFStreamFilter> ()> factory)112 QPDF_Stream::registerStreamFilter(
113 std::string const& filter_name,
114 std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
115 {
116 filter_factories[filter_name] = factory;
117 }
118
119 void
setFilterOnWrite(bool val)120 QPDF_Stream::setFilterOnWrite(bool val)
121 {
122 this->filter_on_write = val;
123 }
124
125 bool
getFilterOnWrite() const126 QPDF_Stream::getFilterOnWrite() const
127 {
128 return this->filter_on_write;
129 }
130
131 void
releaseResolved()132 QPDF_Stream::releaseResolved()
133 {
134 this->stream_provider = 0;
135 QPDFObjectHandle::ReleaseResolver::releaseResolved(this->stream_dict);
136 }
137
138 void
setObjGen(int objid,int generation)139 QPDF_Stream::setObjGen(int objid, int generation)
140 {
141 if (! ((this->objid == 0) && (this->generation == 0)))
142 {
143 throw std::logic_error(
144 "attempt to set object ID and generation of a stream"
145 " that already has them");
146 }
147 this->objid = objid;
148 this->generation = generation;
149 }
150
151 std::string
unparse()152 QPDF_Stream::unparse()
153 {
154 // Unparse stream objects as indirect references
155 return QUtil::int_to_string(this->objid) + " " +
156 QUtil::int_to_string(this->generation) + " R";
157 }
158
159 JSON
getJSON()160 QPDF_Stream::getJSON()
161 {
162 return this->stream_dict.getJSON();
163 }
164
165 QPDFObject::object_type_e
getTypeCode() const166 QPDF_Stream::getTypeCode() const
167 {
168 return QPDFObject::ot_stream;
169 }
170
171 char const*
getTypeName() const172 QPDF_Stream::getTypeName() const
173 {
174 return "stream";
175 }
176
177 void
setDescription(QPDF * qpdf,std::string const & description)178 QPDF_Stream::setDescription(QPDF* qpdf, std::string const& description)
179 {
180 this->QPDFObject::setDescription(qpdf, description);
181 setDictDescription();
182 }
183
184 void
setStreamDescription()185 QPDF_Stream::setStreamDescription()
186 {
187 setDescription(
188 this->qpdf,
189 this->qpdf->getFilename() +
190 ", stream object " + QUtil::int_to_string(this->objid) + " " +
191 QUtil::int_to_string(this->generation));
192 }
193
194 void
setDictDescription()195 QPDF_Stream::setDictDescription()
196 {
197 QPDF* qpdf = 0;
198 std::string description;
199 if ((! this->stream_dict.hasObjectDescription()) &&
200 getDescription(qpdf, description))
201 {
202 this->stream_dict.setObjectDescription(
203 qpdf, description + " -> stream dictionary");
204 }
205 }
206
207 QPDFObjectHandle
getDict() const208 QPDF_Stream::getDict() const
209 {
210 return this->stream_dict;
211 }
212
213 bool
isDataModified() const214 QPDF_Stream::isDataModified() const
215 {
216 return (! this->token_filters.empty());
217 }
218
219 qpdf_offset_t
getOffset() const220 QPDF_Stream::getOffset() const
221 {
222 return this->offset;
223 }
224
225 size_t
getLength() const226 QPDF_Stream::getLength() const
227 {
228 return this->length;
229 }
230
231 PointerHolder<Buffer>
getStreamDataBuffer() const232 QPDF_Stream::getStreamDataBuffer() const
233 {
234 return this->stream_data;
235 }
236
237 PointerHolder<QPDFObjectHandle::StreamDataProvider>
getStreamDataProvider() const238 QPDF_Stream::getStreamDataProvider() const
239 {
240 return this->stream_provider;
241 }
242
243 PointerHolder<Buffer>
getStreamData(qpdf_stream_decode_level_e decode_level)244 QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
245 {
246 Pl_Buffer buf("stream data buffer");
247 bool filtered;
248 pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
249 if (! filtered)
250 {
251 throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
252 "", this->offset,
253 "getStreamData called on unfilterable stream");
254 }
255 QTC::TC("qpdf", "QPDF_Stream getStreamData");
256 return buf.getBuffer();
257 }
258
259 PointerHolder<Buffer>
getRawStreamData()260 QPDF_Stream::getRawStreamData()
261 {
262 Pl_Buffer buf("stream data buffer");
263 if (! pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false))
264 {
265 throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
266 "", this->offset,
267 "error getting raw stream data");
268 }
269 QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
270 return buf.getBuffer();
271 }
272
273 bool
filterable(std::vector<std::shared_ptr<QPDFStreamFilter>> & filters,bool & specialized_compression,bool & lossy_compression)274 QPDF_Stream::filterable(
275 std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
276 bool& specialized_compression,
277 bool& lossy_compression)
278 {
279 // Check filters
280
281 QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
282 bool filters_okay = true;
283
284 std::vector<std::string> filter_names;
285
286 if (filter_obj.isNull())
287 {
288 // No filters
289 }
290 else if (filter_obj.isName())
291 {
292 // One filter
293 filter_names.push_back(filter_obj.getName());
294 }
295 else if (filter_obj.isArray())
296 {
297 // Potentially multiple filters
298 int n = filter_obj.getArrayNItems();
299 for (int i = 0; i < n; ++i)
300 {
301 QPDFObjectHandle item = filter_obj.getArrayItem(i);
302 if (item.isName())
303 {
304 filter_names.push_back(item.getName());
305 }
306 else
307 {
308 filters_okay = false;
309 }
310 }
311 }
312 else
313 {
314 filters_okay = false;
315 }
316
317 if (! filters_okay)
318 {
319 QTC::TC("qpdf", "QPDF_Stream invalid filter");
320 warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
321 "", this->offset,
322 "stream filter type is not name or array"));
323 return false;
324 }
325
326 bool filterable = true;
327
328 for (auto& filter_name: filter_names)
329 {
330 if (filter_abbreviations.count(filter_name))
331 {
332 QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation");
333 filter_name = filter_abbreviations[filter_name];
334 }
335
336 auto ff = filter_factories.find(filter_name);
337 if (ff == filter_factories.end())
338 {
339 filterable = false;
340 }
341 else
342 {
343 filters.push_back((ff->second)());
344 }
345 }
346
347 if (! filterable)
348 {
349 return false;
350 }
351
352 // filters now contains a list of filters to be applied in order.
353 // See which ones we can support.
354
355 // See if we can support any decode parameters that are specified.
356
357 QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms");
358 std::vector<QPDFObjectHandle> decode_parms;
359 if (decode_obj.isArray() && (decode_obj.getArrayNItems() == 0))
360 {
361 decode_obj = QPDFObjectHandle::newNull();
362 }
363 if (decode_obj.isArray())
364 {
365 for (int i = 0; i < decode_obj.getArrayNItems(); ++i)
366 {
367 decode_parms.push_back(decode_obj.getArrayItem(i));
368 }
369 }
370 else
371 {
372 for (unsigned int i = 0; i < filter_names.size(); ++i)
373 {
374 decode_parms.push_back(decode_obj);
375 }
376 }
377
378 // Ignore /DecodeParms entirely if /Filters is empty. At least
379 // one case of a file whose /DecodeParms was [ << >> ] when
380 // /Filters was empty has been seen in the wild.
381 if ((filters.size() != 0) && (decode_parms.size() != filters.size()))
382 {
383 warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
384 "", this->offset,
385 "stream /DecodeParms length is"
386 " inconsistent with filters"));
387 filterable = false;
388 }
389
390 if (! filterable)
391 {
392 return false;
393 }
394
395 for (size_t i = 0; i < filters.size(); ++i)
396 {
397 auto filter = filters.at(i);
398 auto decode_item = decode_parms.at(i);
399
400 if (filter->setDecodeParms(decode_item))
401 {
402 if (filter->isSpecializedCompression())
403 {
404 specialized_compression = true;
405 }
406 if (filter->isLossyCompression())
407 {
408 specialized_compression = true;
409 lossy_compression = true;
410 }
411 }
412 else
413 {
414 filterable = false;
415 }
416 }
417
418 return filterable;
419 }
420
421 bool
pipeStreamData(Pipeline * pipeline,bool * filterp,int encode_flags,qpdf_stream_decode_level_e decode_level,bool suppress_warnings,bool will_retry)422 QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
423 int encode_flags,
424 qpdf_stream_decode_level_e decode_level,
425 bool suppress_warnings, bool will_retry)
426 {
427 std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
428 bool specialized_compression = false;
429 bool lossy_compression = false;
430 bool ignored;
431 if (filterp == nullptr)
432 {
433 filterp = &ignored;
434 }
435 bool& filter = *filterp;
436 filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none)));
437 bool success = true;
438 if (filter)
439 {
440 filter = filterable(
441 filters, specialized_compression, lossy_compression);
442 if ((decode_level < qpdf_dl_all) && lossy_compression)
443 {
444 filter = false;
445 }
446 if ((decode_level < qpdf_dl_specialized) && specialized_compression)
447 {
448 filter = false;
449 }
450 QTC::TC("qpdf", "QPDF_Stream special filters",
451 (! filter) ? 0 :
452 lossy_compression ? 1 :
453 specialized_compression ? 2 :
454 3);
455 }
456
457 if (pipeline == 0)
458 {
459 QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
460 // Return value is whether we can filter in this case.
461 return filter;
462 }
463
464 // Construct the pipeline in reverse order. Force pipelines we
465 // create to be deleted when this function finishes. Pipelines
466 // created by QPDFStreamFilter objects will be deleted by those
467 // objects.
468 std::vector<PointerHolder<Pipeline>> to_delete;
469
470 PointerHolder<ContentNormalizer> normalizer;
471 if (filter)
472 {
473 if (encode_flags & qpdf_ef_compress)
474 {
475 pipeline = new Pl_Flate("compress stream", pipeline,
476 Pl_Flate::a_deflate);
477 to_delete.push_back(pipeline);
478 }
479
480 if (encode_flags & qpdf_ef_normalize)
481 {
482 normalizer = new ContentNormalizer();
483 pipeline = new Pl_QPDFTokenizer(
484 "normalizer", normalizer.getPointer(), pipeline);
485 to_delete.push_back(pipeline);
486 }
487
488 for (std::vector<PointerHolder<
489 QPDFObjectHandle::TokenFilter> >::reverse_iterator iter =
490 this->token_filters.rbegin();
491 iter != this->token_filters.rend(); ++iter)
492 {
493 pipeline = new Pl_QPDFTokenizer(
494 "token filter", (*iter).getPointer(), pipeline);
495 to_delete.push_back(pipeline);
496 }
497
498 for (auto f_iter = filters.rbegin();
499 f_iter != filters.rend(); ++f_iter)
500 {
501 auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline);
502 if (decode_pipeline)
503 {
504 pipeline = decode_pipeline;
505 }
506 Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline);
507 if (flate != nullptr)
508 {
509 flate->setWarnCallback([this](char const* msg, int code) {
510 warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
511 "", this->offset, msg));
512 });
513 }
514 }
515 }
516
517 if (this->stream_data.getPointer())
518 {
519 QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
520 pipeline->write(this->stream_data->getBuffer(),
521 this->stream_data->getSize());
522 pipeline->finish();
523 }
524 else if (this->stream_provider.getPointer())
525 {
526 Pl_Count count("stream provider count", pipeline);
527 if (this->stream_provider->supportsRetry())
528 {
529 if (! this->stream_provider->provideStreamData(
530 this->objid, this->generation, &count,
531 suppress_warnings, will_retry))
532 {
533 filter = false;
534 success = false;
535 }
536 }
537 else
538 {
539 this->stream_provider->provideStreamData(
540 this->objid, this->generation, &count);
541 }
542 qpdf_offset_t actual_length = count.getCount();
543 qpdf_offset_t desired_length = 0;
544 if (success && this->stream_dict.hasKey("/Length"))
545 {
546 desired_length = this->stream_dict.getKey("/Length").getIntValue();
547 if (actual_length == desired_length)
548 {
549 QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
550 }
551 else
552 {
553 QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
554 // This would be caused by programmer error on the
555 // part of a library user, not by invalid input data.
556 throw std::runtime_error(
557 "stream data provider for " +
558 QUtil::int_to_string(this->objid) + " " +
559 QUtil::int_to_string(this->generation) +
560 " provided " +
561 QUtil::int_to_string(actual_length) +
562 " bytes instead of expected " +
563 QUtil::int_to_string(desired_length) + " bytes");
564 }
565 }
566 else if (success)
567 {
568 QTC::TC("qpdf", "QPDF_Stream provider length not provided");
569 this->stream_dict.replaceKey(
570 "/Length", QPDFObjectHandle::newInteger(actual_length));
571 }
572 }
573 else if (this->offset == 0)
574 {
575 QTC::TC("qpdf", "QPDF_Stream pipe no stream data");
576 throw std::logic_error(
577 "pipeStreamData called for stream with no data");
578 }
579 else
580 {
581 QTC::TC("qpdf", "QPDF_Stream pipe original stream data");
582 if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
583 this->offset, this->length,
584 this->stream_dict, pipeline,
585 suppress_warnings,
586 will_retry))
587 {
588 filter = false;
589 success = false;
590 }
591 }
592
593 if (filter &&
594 (! suppress_warnings) &&
595 normalizer.getPointer() &&
596 normalizer->anyBadTokens())
597 {
598 warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
599 "", this->offset,
600 "content normalization encountered bad tokens"));
601 if (normalizer->lastTokenWasBad())
602 {
603 QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
604 warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
605 "", this->offset,
606 "normalized content ended with a bad token;"
607 " you may be able to resolve this by"
608 " coalescing content streams in combination"
609 " with normalizing content. From the command"
610 " line, specify --coalesce-contents"));
611 }
612 warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
613 "", this->offset,
614 "Resulting stream data may be corrupted but is"
615 " may still useful for manual inspection."
616 " For more information on this warning, search"
617 " for content normalization in the manual."));
618 }
619
620 return success;
621 }
622
623 void
replaceStreamData(PointerHolder<Buffer> data,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)624 QPDF_Stream::replaceStreamData(PointerHolder<Buffer> data,
625 QPDFObjectHandle const& filter,
626 QPDFObjectHandle const& decode_parms)
627 {
628 this->stream_data = data;
629 this->stream_provider = 0;
630 replaceFilterData(filter, decode_parms, data->getSize());
631 }
632
633 void
replaceStreamData(PointerHolder<QPDFObjectHandle::StreamDataProvider> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)634 QPDF_Stream::replaceStreamData(
635 PointerHolder<QPDFObjectHandle::StreamDataProvider> provider,
636 QPDFObjectHandle const& filter,
637 QPDFObjectHandle const& decode_parms)
638 {
639 this->stream_provider = provider;
640 this->stream_data = 0;
641 replaceFilterData(filter, decode_parms, 0);
642 }
643
644 void
addTokenFilter(PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)645 QPDF_Stream::addTokenFilter(
646 PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)
647 {
648 this->token_filters.push_back(token_filter);
649 }
650
651 void
replaceFilterData(QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms,size_t length)652 QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
653 QPDFObjectHandle const& decode_parms,
654 size_t length)
655 {
656 this->stream_dict.replaceOrRemoveKey("/Filter", filter);
657 this->stream_dict.replaceOrRemoveKey("/DecodeParms", decode_parms);
658 if (length == 0)
659 {
660 QTC::TC("qpdf", "QPDF_Stream unknown stream length");
661 this->stream_dict.removeKey("/Length");
662 }
663 else
664 {
665 this->stream_dict.replaceKey(
666 "/Length", QPDFObjectHandle::newInteger(
667 QIntC::to_longlong(length)));
668 }
669 }
670
671 void
replaceDict(QPDFObjectHandle new_dict)672 QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
673 {
674 this->stream_dict = new_dict;
675 setDictDescription();
676 QPDFObjectHandle length_obj = new_dict.getKey("/Length");
677 if (length_obj.isInteger())
678 {
679 this->length = QIntC::to_size(length_obj.getUIntValue());
680 }
681 else
682 {
683 this->length = 0;
684 }
685 }
686
687 void
warn(QPDFExc const & e)688 QPDF_Stream::warn(QPDFExc const& e)
689 {
690 this->qpdf->warn(e);
691 }
692