1 #include <qpdf/QPDF_Stream.hh>
2 
3 #include <qpdf/QUtil.hh>
4 #include <qpdf/Pipeline.hh>
5 #include <qpdf/Pl_Flate.hh>
6 #include <qpdf/Pl_Buffer.hh>
7 #include <qpdf/Pl_Count.hh>
8 #include <qpdf/ContentNormalizer.hh>
9 #include <qpdf/QTC.hh>
10 #include <qpdf/QPDF.hh>
11 #include <qpdf/QPDFExc.hh>
12 #include <qpdf/Pl_QPDFTokenizer.hh>
13 #include <qpdf/QIntC.hh>
14 #include <qpdf/SF_FlateLzwDecode.hh>
15 #include <qpdf/SF_DCTDecode.hh>
16 #include <qpdf/SF_RunLengthDecode.hh>
17 #include <qpdf/SF_ASCII85Decode.hh>
18 #include <qpdf/SF_ASCIIHexDecode.hh>
19 
20 #include <stdexcept>
21 
22 class SF_Crypt: public QPDFStreamFilter
23 {
24   public:
25     SF_Crypt() = default;
26     virtual ~SF_Crypt() = default;
27 
setDecodeParms(QPDFObjectHandle decode_parms)28     virtual bool setDecodeParms(QPDFObjectHandle decode_parms)
29     {
30         if (decode_parms.isNull())
31         {
32             return true;
33         }
34         bool filterable = true;
35         for (auto const& key: decode_parms.getKeys())
36         {
37             if (((key == "/Type") || (key == "/Name")) &&
38                 (decode_parms.getKey("/Type").isNull() ||
39                  (decode_parms.getKey("/Type").isName() &&
40                   (decode_parms.getKey("/Type").getName() ==
41                    "/CryptFilterDecodeParms"))))
42             {
43                 // we handle this in decryptStream
44             }
45             else
46             {
47                 filterable = false;
48             }
49         }
50         return filterable;
51     }
52 
getDecodePipeline(Pipeline *)53     virtual Pipeline* getDecodePipeline(Pipeline*)
54     {
55         // Not used -- handled by pipeStreamData
56         return nullptr;
57     }
58 };
59 
60 std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
61     // The PDF specification provides these filter abbreviations for
62     // use in inline images, but according to table H.1 in the pre-ISO
63     // versions of the PDF specification, Adobe Reader also accepts
64     // them for stream filters.
65     {"/AHx", "/ASCIIHexDecode"},
66     {"/A85", "/ASCII85Decode"},
67     {"/LZW", "/LZWDecode"},
68     {"/Fl", "/FlateDecode"},
69     {"/RL", "/RunLengthDecode"},
70     {"/CCF", "/CCITTFaxDecode"},
71     {"/DCT", "/DCTDecode"},
72 };
73 
74 std::map<
75     std::string,
76     std::function<std::shared_ptr<QPDFStreamFilter>()>>
77 QPDF_Stream::filter_factories = {
__anon84f501840102() 78     {"/Crypt", []() { return std::make_shared<SF_Crypt>(); }},
79     {"/FlateDecode", SF_FlateLzwDecode::flate_factory},
80     {"/LZWDecode", SF_FlateLzwDecode::lzw_factory},
81     {"/RunLengthDecode", SF_RunLengthDecode::factory},
82     {"/DCTDecode", SF_DCTDecode::factory},
83     {"/ASCII85Decode", SF_ASCII85Decode::factory},
84     {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
85 };
86 
QPDF_Stream(QPDF * qpdf,int objid,int generation,QPDFObjectHandle stream_dict,qpdf_offset_t offset,size_t length)87 QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
88 			 QPDFObjectHandle stream_dict,
89 			 qpdf_offset_t offset, size_t length) :
90     qpdf(qpdf),
91     objid(objid),
92     generation(generation),
93     filter_on_write(true),
94     stream_dict(stream_dict),
95     offset(offset),
96     length(length)
97 {
98     if (! stream_dict.isDictionary())
99     {
100 	throw std::logic_error(
101 	    "stream object instantiated with non-dictionary "
102 	    "object for dictionary");
103     }
104     setStreamDescription();
105 }
106 
~QPDF_Stream()107 QPDF_Stream::~QPDF_Stream()
108 {
109 }
110 
111 void
registerStreamFilter(std::string const & filter_name,std::function<std::shared_ptr<QPDFStreamFilter> ()> factory)112 QPDF_Stream::registerStreamFilter(
113     std::string const& filter_name,
114     std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
115 {
116     filter_factories[filter_name] = factory;
117 }
118 
119 void
setFilterOnWrite(bool val)120 QPDF_Stream::setFilterOnWrite(bool val)
121 {
122     this->filter_on_write = val;
123 }
124 
125 bool
getFilterOnWrite() const126 QPDF_Stream::getFilterOnWrite() const
127 {
128     return this->filter_on_write;
129 }
130 
131 void
releaseResolved()132 QPDF_Stream::releaseResolved()
133 {
134     this->stream_provider = 0;
135     QPDFObjectHandle::ReleaseResolver::releaseResolved(this->stream_dict);
136 }
137 
138 void
setObjGen(int objid,int generation)139 QPDF_Stream::setObjGen(int objid, int generation)
140 {
141     if (! ((this->objid == 0) && (this->generation == 0)))
142     {
143 	throw std::logic_error(
144 	    "attempt to set object ID and generation of a stream"
145 	    " that already has them");
146     }
147     this->objid = objid;
148     this->generation = generation;
149 }
150 
151 std::string
unparse()152 QPDF_Stream::unparse()
153 {
154     // Unparse stream objects as indirect references
155     return QUtil::int_to_string(this->objid) + " " +
156 	QUtil::int_to_string(this->generation) + " R";
157 }
158 
159 JSON
getJSON()160 QPDF_Stream::getJSON()
161 {
162     return this->stream_dict.getJSON();
163 }
164 
165 QPDFObject::object_type_e
getTypeCode() const166 QPDF_Stream::getTypeCode() const
167 {
168     return QPDFObject::ot_stream;
169 }
170 
171 char const*
getTypeName() const172 QPDF_Stream::getTypeName() const
173 {
174     return "stream";
175 }
176 
177 void
setDescription(QPDF * qpdf,std::string const & description)178 QPDF_Stream::setDescription(QPDF* qpdf, std::string const& description)
179 {
180     this->QPDFObject::setDescription(qpdf, description);
181     setDictDescription();
182 }
183 
184 void
setStreamDescription()185 QPDF_Stream::setStreamDescription()
186 {
187     setDescription(
188         this->qpdf,
189         this->qpdf->getFilename() +
190         ", stream object " + QUtil::int_to_string(this->objid) + " " +
191         QUtil::int_to_string(this->generation));
192 }
193 
194 void
setDictDescription()195 QPDF_Stream::setDictDescription()
196 {
197     QPDF* qpdf = 0;
198     std::string description;
199     if ((! this->stream_dict.hasObjectDescription()) &&
200         getDescription(qpdf, description))
201     {
202         this->stream_dict.setObjectDescription(
203             qpdf, description + " -> stream dictionary");
204     }
205 }
206 
207 QPDFObjectHandle
getDict() const208 QPDF_Stream::getDict() const
209 {
210     return this->stream_dict;
211 }
212 
213 bool
isDataModified() const214 QPDF_Stream::isDataModified() const
215 {
216     return (! this->token_filters.empty());
217 }
218 
219 qpdf_offset_t
getOffset() const220 QPDF_Stream::getOffset() const
221 {
222     return this->offset;
223 }
224 
225 size_t
getLength() const226 QPDF_Stream::getLength() const
227 {
228     return this->length;
229 }
230 
231 PointerHolder<Buffer>
getStreamDataBuffer() const232 QPDF_Stream::getStreamDataBuffer() const
233 {
234     return this->stream_data;
235 }
236 
237 PointerHolder<QPDFObjectHandle::StreamDataProvider>
getStreamDataProvider() const238 QPDF_Stream::getStreamDataProvider() const
239 {
240     return this->stream_provider;
241 }
242 
243 PointerHolder<Buffer>
getStreamData(qpdf_stream_decode_level_e decode_level)244 QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
245 {
246     Pl_Buffer buf("stream data buffer");
247     bool filtered;
248     pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
249     if (! filtered)
250     {
251 	throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
252                       "", this->offset,
253                       "getStreamData called on unfilterable stream");
254     }
255     QTC::TC("qpdf", "QPDF_Stream getStreamData");
256     return buf.getBuffer();
257 }
258 
259 PointerHolder<Buffer>
getRawStreamData()260 QPDF_Stream::getRawStreamData()
261 {
262     Pl_Buffer buf("stream data buffer");
263     if (! pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false))
264     {
265 	throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
266                       "", this->offset,
267                       "error getting raw stream data");
268     }
269     QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
270     return buf.getBuffer();
271 }
272 
273 bool
filterable(std::vector<std::shared_ptr<QPDFStreamFilter>> & filters,bool & specialized_compression,bool & lossy_compression)274 QPDF_Stream::filterable(
275     std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
276     bool& specialized_compression,
277     bool& lossy_compression)
278 {
279     // Check filters
280 
281     QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
282     bool filters_okay = true;
283 
284     std::vector<std::string> filter_names;
285 
286     if (filter_obj.isNull())
287     {
288 	// No filters
289     }
290     else if (filter_obj.isName())
291     {
292 	// One filter
293 	filter_names.push_back(filter_obj.getName());
294     }
295     else if (filter_obj.isArray())
296     {
297 	// Potentially multiple filters
298 	int n = filter_obj.getArrayNItems();
299 	for (int i = 0; i < n; ++i)
300 	{
301 	    QPDFObjectHandle item = filter_obj.getArrayItem(i);
302 	    if (item.isName())
303 	    {
304 		filter_names.push_back(item.getName());
305 	    }
306 	    else
307 	    {
308 		filters_okay = false;
309 	    }
310 	}
311     }
312     else
313     {
314 	filters_okay = false;
315     }
316 
317     if (! filters_okay)
318     {
319 	QTC::TC("qpdf", "QPDF_Stream invalid filter");
320 	warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
321                      "", this->offset,
322                      "stream filter type is not name or array"));
323         return false;
324     }
325 
326     bool filterable = true;
327 
328     for (auto& filter_name: filter_names)
329     {
330 	if (filter_abbreviations.count(filter_name))
331 	{
332 	    QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation");
333 	    filter_name = filter_abbreviations[filter_name];
334 	}
335 
336         auto ff = filter_factories.find(filter_name);
337         if (ff == filter_factories.end())
338         {
339             filterable = false;
340         }
341         else
342         {
343             filters.push_back((ff->second)());
344         }
345     }
346 
347     if (! filterable)
348     {
349         return false;
350     }
351 
352     // filters now contains a list of filters to be applied in order.
353     // See which ones we can support.
354 
355     // See if we can support any decode parameters that are specified.
356 
357     QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms");
358     std::vector<QPDFObjectHandle> decode_parms;
359     if (decode_obj.isArray() && (decode_obj.getArrayNItems() == 0))
360     {
361         decode_obj = QPDFObjectHandle::newNull();
362     }
363     if (decode_obj.isArray())
364     {
365         for (int i = 0; i < decode_obj.getArrayNItems(); ++i)
366         {
367             decode_parms.push_back(decode_obj.getArrayItem(i));
368         }
369     }
370     else
371     {
372         for (unsigned int i = 0; i < filter_names.size(); ++i)
373         {
374             decode_parms.push_back(decode_obj);
375         }
376     }
377 
378     // Ignore /DecodeParms entirely if /Filters is empty.  At least
379     // one case of a file whose /DecodeParms was [ << >> ] when
380     // /Filters was empty has been seen in the wild.
381     if ((filters.size() != 0) && (decode_parms.size() != filters.size()))
382     {
383         warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
384                      "", this->offset,
385                      "stream /DecodeParms length is"
386                      " inconsistent with filters"));
387         filterable = false;
388     }
389 
390     if (! filterable)
391     {
392         return false;
393     }
394 
395     for (size_t i = 0; i < filters.size(); ++i)
396     {
397         auto filter = filters.at(i);
398         auto decode_item = decode_parms.at(i);
399 
400         if (filter->setDecodeParms(decode_item))
401         {
402             if (filter->isSpecializedCompression())
403             {
404                 specialized_compression = true;
405             }
406             if (filter->isLossyCompression())
407             {
408                 specialized_compression = true;
409                 lossy_compression = true;
410             }
411         }
412         else
413         {
414             filterable = false;
415         }
416     }
417 
418     return filterable;
419 }
420 
421 bool
pipeStreamData(Pipeline * pipeline,bool * filterp,int encode_flags,qpdf_stream_decode_level_e decode_level,bool suppress_warnings,bool will_retry)422 QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
423                             int encode_flags,
424                             qpdf_stream_decode_level_e decode_level,
425                             bool suppress_warnings, bool will_retry)
426 {
427     std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
428     bool specialized_compression = false;
429     bool lossy_compression = false;
430     bool ignored;
431     if (filterp == nullptr)
432     {
433         filterp = &ignored;
434     }
435     bool& filter = *filterp;
436     filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none)));
437     bool success = true;
438     if (filter)
439     {
440 	filter = filterable(
441             filters, specialized_compression, lossy_compression);
442         if ((decode_level < qpdf_dl_all) && lossy_compression)
443         {
444             filter = false;
445         }
446         if ((decode_level < qpdf_dl_specialized) && specialized_compression)
447         {
448             filter = false;
449         }
450         QTC::TC("qpdf", "QPDF_Stream special filters",
451                 (! filter) ? 0 :
452                 lossy_compression ? 1 :
453                 specialized_compression ? 2 :
454                 3);
455     }
456 
457     if (pipeline == 0)
458     {
459 	QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
460         // Return value is whether we can filter in this case.
461 	return filter;
462     }
463 
464     // Construct the pipeline in reverse order. Force pipelines we
465     // create to be deleted when this function finishes. Pipelines
466     // created by QPDFStreamFilter objects will be deleted by those
467     // objects.
468     std::vector<PointerHolder<Pipeline>> to_delete;
469 
470     PointerHolder<ContentNormalizer> normalizer;
471     if (filter)
472     {
473 	if (encode_flags & qpdf_ef_compress)
474 	{
475 	    pipeline = new Pl_Flate("compress stream", pipeline,
476 				    Pl_Flate::a_deflate);
477 	    to_delete.push_back(pipeline);
478 	}
479 
480 	if (encode_flags & qpdf_ef_normalize)
481 	{
482             normalizer = new ContentNormalizer();
483 	    pipeline = new Pl_QPDFTokenizer(
484                 "normalizer", normalizer.getPointer(), pipeline);
485 	    to_delete.push_back(pipeline);
486 	}
487 
488         for (std::vector<PointerHolder<
489                  QPDFObjectHandle::TokenFilter> >::reverse_iterator iter =
490                  this->token_filters.rbegin();
491              iter != this->token_filters.rend(); ++iter)
492         {
493             pipeline = new Pl_QPDFTokenizer(
494                 "token filter", (*iter).getPointer(), pipeline);
495             to_delete.push_back(pipeline);
496         }
497 
498 	for (auto f_iter = filters.rbegin();
499              f_iter != filters.rend(); ++f_iter)
500 	{
501             auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline);
502             if (decode_pipeline)
503             {
504                 pipeline = decode_pipeline;
505             }
506             Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline);
507             if (flate != nullptr)
508             {
509                 flate->setWarnCallback([this](char const* msg, int code) {
510                     warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
511                                  "", this->offset, msg));
512                 });
513             }
514 	}
515     }
516 
517     if (this->stream_data.getPointer())
518     {
519 	QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
520 	pipeline->write(this->stream_data->getBuffer(),
521 			this->stream_data->getSize());
522 	pipeline->finish();
523     }
524     else if (this->stream_provider.getPointer())
525     {
526 	Pl_Count count("stream provider count", pipeline);
527         if (this->stream_provider->supportsRetry())
528         {
529             if (! this->stream_provider->provideStreamData(
530                     this->objid, this->generation, &count,
531                     suppress_warnings, will_retry))
532             {
533                 filter = false;
534                 success = false;
535             }
536         }
537         else
538         {
539             this->stream_provider->provideStreamData(
540                 this->objid, this->generation, &count);
541         }
542 	qpdf_offset_t actual_length = count.getCount();
543 	qpdf_offset_t desired_length = 0;
544         if (success && this->stream_dict.hasKey("/Length"))
545         {
546 	    desired_length = this->stream_dict.getKey("/Length").getIntValue();
547             if (actual_length == desired_length)
548             {
549                 QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
550             }
551             else
552             {
553                 QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
554                 // This would be caused by programmer error on the
555                 // part of a library user, not by invalid input data.
556                 throw std::runtime_error(
557                     "stream data provider for " +
558                     QUtil::int_to_string(this->objid) + " " +
559                     QUtil::int_to_string(this->generation) +
560                     " provided " +
561                     QUtil::int_to_string(actual_length) +
562                     " bytes instead of expected " +
563                     QUtil::int_to_string(desired_length) + " bytes");
564             }
565         }
566         else if (success)
567         {
568             QTC::TC("qpdf", "QPDF_Stream provider length not provided");
569             this->stream_dict.replaceKey(
570                 "/Length", QPDFObjectHandle::newInteger(actual_length));
571         }
572     }
573     else if (this->offset == 0)
574     {
575 	QTC::TC("qpdf", "QPDF_Stream pipe no stream data");
576 	throw std::logic_error(
577 	    "pipeStreamData called for stream with no data");
578     }
579     else
580     {
581 	QTC::TC("qpdf", "QPDF_Stream pipe original stream data");
582 	if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
583                                          this->offset, this->length,
584                                          this->stream_dict, pipeline,
585                                          suppress_warnings,
586                                          will_retry))
587         {
588             filter = false;
589             success = false;
590         }
591     }
592 
593     if (filter &&
594         (! suppress_warnings) &&
595         normalizer.getPointer() &&
596         normalizer->anyBadTokens())
597     {
598         warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
599                      "", this->offset,
600                      "content normalization encountered bad tokens"));
601         if (normalizer->lastTokenWasBad())
602         {
603             QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
604             warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
605                          "", this->offset,
606                          "normalized content ended with a bad token;"
607                          " you may be able to resolve this by"
608                          " coalescing content streams in combination"
609                          " with normalizing content. From the command"
610                          " line, specify --coalesce-contents"));
611         }
612         warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
613                      "", this->offset,
614                      "Resulting stream data may be corrupted but is"
615                      " may still useful for manual inspection."
616                      " For more information on this warning, search"
617                      " for content normalization in the manual."));
618     }
619 
620     return success;
621 }
622 
623 void
replaceStreamData(PointerHolder<Buffer> data,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)624 QPDF_Stream::replaceStreamData(PointerHolder<Buffer> data,
625 			       QPDFObjectHandle const& filter,
626 			       QPDFObjectHandle const& decode_parms)
627 {
628     this->stream_data = data;
629     this->stream_provider = 0;
630     replaceFilterData(filter, decode_parms, data->getSize());
631 }
632 
633 void
replaceStreamData(PointerHolder<QPDFObjectHandle::StreamDataProvider> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)634 QPDF_Stream::replaceStreamData(
635     PointerHolder<QPDFObjectHandle::StreamDataProvider> provider,
636     QPDFObjectHandle const& filter,
637     QPDFObjectHandle const& decode_parms)
638 {
639     this->stream_provider = provider;
640     this->stream_data = 0;
641     replaceFilterData(filter, decode_parms, 0);
642 }
643 
644 void
addTokenFilter(PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)645 QPDF_Stream::addTokenFilter(
646     PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)
647 {
648     this->token_filters.push_back(token_filter);
649 }
650 
651 void
replaceFilterData(QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms,size_t length)652 QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
653 			       QPDFObjectHandle const& decode_parms,
654 			       size_t length)
655 {
656     this->stream_dict.replaceOrRemoveKey("/Filter", filter);
657     this->stream_dict.replaceOrRemoveKey("/DecodeParms", decode_parms);
658     if (length == 0)
659     {
660         QTC::TC("qpdf", "QPDF_Stream unknown stream length");
661         this->stream_dict.removeKey("/Length");
662     }
663     else
664     {
665         this->stream_dict.replaceKey(
666             "/Length", QPDFObjectHandle::newInteger(
667                 QIntC::to_longlong(length)));
668     }
669 }
670 
671 void
replaceDict(QPDFObjectHandle new_dict)672 QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
673 {
674     this->stream_dict = new_dict;
675     setDictDescription();
676     QPDFObjectHandle length_obj = new_dict.getKey("/Length");
677     if (length_obj.isInteger())
678     {
679         this->length = QIntC::to_size(length_obj.getUIntValue());
680     }
681     else
682     {
683         this->length = 0;
684     }
685 }
686 
687 void
warn(QPDFExc const & e)688 QPDF_Stream::warn(QPDFExc const& e)
689 {
690     this->qpdf->warn(e);
691 }
692