1 #include <qpdf/QPDFObjectHandle.hh>
2 
3 #include <qpdf/QPDF.hh>
4 #include <qpdf/QPDF_Bool.hh>
5 #include <qpdf/QPDF_Null.hh>
6 #include <qpdf/QPDF_Integer.hh>
7 #include <qpdf/QPDF_Real.hh>
8 #include <qpdf/QPDF_Name.hh>
9 #include <qpdf/QPDF_String.hh>
10 #include <qpdf/QPDF_Operator.hh>
11 #include <qpdf/QPDF_InlineImage.hh>
12 #include <qpdf/QPDF_Array.hh>
13 #include <qpdf/QPDF_Dictionary.hh>
14 #include <qpdf/QPDF_Stream.hh>
15 #include <qpdf/QPDF_Reserved.hh>
16 #include <qpdf/Pl_Buffer.hh>
17 #include <qpdf/Pl_QPDFTokenizer.hh>
18 #include <qpdf/BufferInputSource.hh>
19 #include <qpdf/QPDFExc.hh>
20 #include <qpdf/QPDFPageObjectHelper.hh>
21 #include <qpdf/SparseOHArray.hh>
22 #include <qpdf/QPDFMatrix.hh>
23 
24 #include <qpdf/QTC.hh>
25 #include <qpdf/QUtil.hh>
26 #include <qpdf/QIntC.hh>
27 
28 #include <stdexcept>
29 #include <stdlib.h>
30 #include <ctype.h>
31 #include <limits.h>
32 #include <cstring>
33 #include <algorithm>
34 
35 class TerminateParsing
36 {
37 };
38 
StreamDataProvider(bool supports_retry)39 QPDFObjectHandle::StreamDataProvider::StreamDataProvider(
40     bool supports_retry) :
41     supports_retry(supports_retry)
42 {
43 }
44 
45 void
provideStreamData(int objid,int generation,Pipeline * pipeline)46 QPDFObjectHandle::StreamDataProvider::provideStreamData(
47     int objid, int generation, Pipeline* pipeline)
48 {
49     throw std::logic_error(
50         "you must override provideStreamData -- see QPDFObjectHandle.hh");
51 }
52 
53 bool
provideStreamData(int objid,int generation,Pipeline * pipeline,bool suppress_warnings,bool will_retry)54 QPDFObjectHandle::StreamDataProvider::provideStreamData(
55     int objid, int generation, Pipeline* pipeline,
56     bool suppress_warnings, bool will_retry)
57 {
58     throw std::logic_error(
59         "you must override provideStreamData -- see QPDFObjectHandle.hh");
60     return false;
61 }
62 
63 bool
supportsRetry()64 QPDFObjectHandle::StreamDataProvider::supportsRetry()
65 {
66     return this->supports_retry;
67 }
68 
69 class CoalesceProvider: public QPDFObjectHandle::StreamDataProvider
70 {
71   public:
CoalesceProvider(QPDFObjectHandle containing_page,QPDFObjectHandle old_contents)72     CoalesceProvider(QPDFObjectHandle containing_page,
73                      QPDFObjectHandle old_contents) :
74         containing_page(containing_page),
75         old_contents(old_contents)
76     {
77     }
~CoalesceProvider()78     virtual ~CoalesceProvider()
79     {
80     }
81     virtual void provideStreamData(int objid, int generation,
82                                    Pipeline* pipeline);
83 
84   private:
85     QPDFObjectHandle containing_page;
86     QPDFObjectHandle old_contents;
87 };
88 
89 void
provideStreamData(int,int,Pipeline * p)90 CoalesceProvider::provideStreamData(int, int, Pipeline* p)
91 {
92     QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data");
93     std::string description = "page object " +
94         QUtil::int_to_string(containing_page.getObjectID()) + " " +
95         QUtil::int_to_string(containing_page.getGeneration());
96     std::string all_description;
97     old_contents.pipeContentStreams(p, description, all_description);
98 }
99 
100 void
handleEOF()101 QPDFObjectHandle::TokenFilter::handleEOF()
102 {
103 }
104 
105 void
setPipeline(Pipeline * p)106 QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p)
107 {
108     this->pipeline = p;
109 }
110 
111 void
write(char const * data,size_t len)112 QPDFObjectHandle::TokenFilter::write(char const* data, size_t len)
113 {
114     if (! this->pipeline)
115     {
116         return;
117     }
118     if (len)
119     {
120 	this->pipeline->write(QUtil::unsigned_char_pointer(data), len);
121     }
122 }
123 
124 void
write(std::string const & str)125 QPDFObjectHandle::TokenFilter::write(std::string const& str)
126 {
127     write(str.c_str(), str.length());
128 }
129 
130 void
writeToken(QPDFTokenizer::Token const & token)131 QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
132 {
133     std::string value = token.getRawValue();
134     write(value.c_str(), value.length());
135 }
136 
137 void
handleObject(QPDFObjectHandle)138 QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
139 {
140     throw std::logic_error("You must override one of the"
141                            " handleObject methods in ParserCallbacks");
142 }
143 
144 void
handleObject(QPDFObjectHandle oh,size_t,size_t)145 QPDFObjectHandle::ParserCallbacks::handleObject(
146     QPDFObjectHandle oh, size_t, size_t)
147 {
148     // This version of handleObject was added in qpdf 9. If the
149     // developer did not override it, fall back to the older
150     // interface.
151     handleObject(oh);
152 }
153 
154 void
contentSize(size_t)155 QPDFObjectHandle::ParserCallbacks::contentSize(size_t)
156 {
157     // Ignore by default; overriding this is optional.
158 }
159 
160 void
terminateParsing()161 QPDFObjectHandle::ParserCallbacks::terminateParsing()
162 {
163     throw TerminateParsing();
164 }
165 
166 class LastChar: public Pipeline
167 {
168   public:
169     LastChar(Pipeline* next);
170     virtual ~LastChar() = default;
171     virtual void write(unsigned char* data, size_t len);
172     virtual void finish();
173     unsigned char getLastChar();
174 
175   private:
176     unsigned char last_char;
177 };
178 
LastChar(Pipeline * next)179 LastChar::LastChar(Pipeline* next) :
180     Pipeline("lastchar", next),
181     last_char(0)
182 {
183 }
184 
185 void
write(unsigned char * data,size_t len)186 LastChar::write(unsigned char* data, size_t len)
187 {
188     if (len > 0)
189     {
190         this->last_char = data[len - 1];
191     }
192     getNext()->write(data, len);
193 }
194 
195 void
finish()196 LastChar::finish()
197 {
198     getNext()->finish();
199 }
200 
201 unsigned char
getLastChar()202 LastChar::getLastChar()
203 {
204     return this->last_char;
205 }
206 
QPDFObjectHandle()207 QPDFObjectHandle::QPDFObjectHandle() :
208     initialized(false),
209     qpdf(0),
210     objid(0),
211     generation(0),
212     reserved(false)
213 {
214 }
215 
QPDFObjectHandle(QPDF * qpdf,int objid,int generation)216 QPDFObjectHandle::QPDFObjectHandle(QPDF* qpdf, int objid, int generation) :
217     initialized(true),
218     qpdf(qpdf),
219     objid(objid),
220     generation(generation),
221     reserved(false)
222 {
223 }
224 
QPDFObjectHandle(QPDFObject * data)225 QPDFObjectHandle::QPDFObjectHandle(QPDFObject* data) :
226     initialized(true),
227     qpdf(0),
228     objid(0),
229     generation(0),
230     obj(data),
231     reserved(false)
232 {
233 }
234 
235 void
releaseResolved()236 QPDFObjectHandle::releaseResolved()
237 {
238     // Recursively break any resolved references to indirect objects.
239     // Do not cross over indirect object boundaries to avoid an
240     // infinite loop.  This method may only be called during final
241     // destruction.  See comments in QPDF::~QPDF().
242     if (isIndirect())
243     {
244 	if (this->obj.getPointer())
245 	{
246 	    this->obj = 0;
247 	}
248     }
249     else
250     {
251 	QPDFObject::ObjAccessor::releaseResolved(this->obj.getPointer());
252     }
253 }
254 
255 void
setObjectDescriptionFromInput(QPDFObjectHandle object,QPDF * context,std::string const & description,PointerHolder<InputSource> input,qpdf_offset_t offset)256 QPDFObjectHandle::setObjectDescriptionFromInput(
257     QPDFObjectHandle object, QPDF* context,
258     std::string const& description, PointerHolder<InputSource> input,
259     qpdf_offset_t offset)
260 {
261     object.setObjectDescription(
262         context,
263         input->getName() + ", " + description +
264         " at offset " + QUtil::int_to_string(offset));
265 }
266 
267 bool
isInitialized() const268 QPDFObjectHandle::isInitialized() const
269 {
270     return this->initialized;
271 }
272 
273 QPDFObject::object_type_e
getTypeCode()274 QPDFObjectHandle::getTypeCode()
275 {
276     if (this->initialized)
277     {
278         dereference();
279         return this->obj->getTypeCode();
280     }
281     else
282     {
283         return QPDFObject::ot_uninitialized;
284     }
285 }
286 
287 char const*
getTypeName()288 QPDFObjectHandle::getTypeName()
289 {
290     if (this->initialized)
291     {
292         dereference();
293         return this->obj->getTypeName();
294     }
295     else
296     {
297         return "uninitialized";
298     }
299 }
300 
301 template <class T>
302 class QPDFObjectTypeAccessor
303 {
304   public:
check(QPDFObject * o)305     static bool check(QPDFObject* o)
306     {
307 	return (o && dynamic_cast<T*>(o));
308     }
check(QPDFObject const * o)309     static bool check(QPDFObject const* o)
310     {
311 	return (o && dynamic_cast<T const*>(o));
312     }
313 };
314 
315 bool
isBool()316 QPDFObjectHandle::isBool()
317 {
318     if (! this->initialized)
319     {
320         return false;
321     }
322     dereference();
323     return QPDFObjectTypeAccessor<QPDF_Bool>::check(obj.getPointer());
324 }
325 
326 bool
isDirectNull() const327 QPDFObjectHandle::isDirectNull() const
328 {
329     // Don't call dereference() -- this is a const method, and we know
330     // objid == 0, so there's nothing to resolve.
331     return (this->initialized && (this->objid == 0) &&
332             QPDFObjectTypeAccessor<QPDF_Null>::check(obj.getPointer()));
333 }
334 
335 bool
isNull()336 QPDFObjectHandle::isNull()
337 {
338     if (! this->initialized)
339     {
340         return false;
341     }
342     dereference();
343     return QPDFObjectTypeAccessor<QPDF_Null>::check(obj.getPointer());
344 }
345 
346 bool
isInteger()347 QPDFObjectHandle::isInteger()
348 {
349     if (! this->initialized)
350     {
351         return false;
352     }
353     dereference();
354     return QPDFObjectTypeAccessor<QPDF_Integer>::check(obj.getPointer());
355 }
356 
357 bool
isReal()358 QPDFObjectHandle::isReal()
359 {
360     if (! this->initialized)
361     {
362         return false;
363     }
364     dereference();
365     return QPDFObjectTypeAccessor<QPDF_Real>::check(obj.getPointer());
366 }
367 
368 bool
isNumber()369 QPDFObjectHandle::isNumber()
370 {
371     return (isInteger() || isReal());
372 }
373 
374 double
getNumericValue()375 QPDFObjectHandle::getNumericValue()
376 {
377     double result = 0.0;
378     if (isInteger())
379     {
380 	result = static_cast<double>(getIntValue());
381     }
382     else if (isReal())
383     {
384 	result = atof(getRealValue().c_str());
385     }
386     else
387     {
388         typeWarning("number", "returning 0");
389         QTC::TC("qpdf", "QPDFObjectHandle numeric non-numeric");
390     }
391     return result;
392 }
393 
394 bool
isName()395 QPDFObjectHandle::isName()
396 {
397     if (! this->initialized)
398     {
399         return false;
400     }
401     dereference();
402     return QPDFObjectTypeAccessor<QPDF_Name>::check(obj.getPointer());
403 }
404 
405 bool
isString()406 QPDFObjectHandle::isString()
407 {
408     if (! this->initialized)
409     {
410         return false;
411     }
412     dereference();
413     return QPDFObjectTypeAccessor<QPDF_String>::check(obj.getPointer());
414 }
415 
416 bool
isOperator()417 QPDFObjectHandle::isOperator()
418 {
419     if (! this->initialized)
420     {
421         return false;
422     }
423     dereference();
424     return QPDFObjectTypeAccessor<QPDF_Operator>::check(obj.getPointer());
425 }
426 
427 bool
isInlineImage()428 QPDFObjectHandle::isInlineImage()
429 {
430     if (! this->initialized)
431     {
432         return false;
433     }
434     dereference();
435     return QPDFObjectTypeAccessor<QPDF_InlineImage>::check(obj.getPointer());
436 }
437 
438 bool
isArray()439 QPDFObjectHandle::isArray()
440 {
441     if (! this->initialized)
442     {
443         return false;
444     }
445     dereference();
446     return QPDFObjectTypeAccessor<QPDF_Array>::check(obj.getPointer());
447 }
448 
449 bool
isDictionary()450 QPDFObjectHandle::isDictionary()
451 {
452     if (! this->initialized)
453     {
454         return false;
455     }
456     dereference();
457     return QPDFObjectTypeAccessor<QPDF_Dictionary>::check(obj.getPointer());
458 }
459 
460 bool
isStream()461 QPDFObjectHandle::isStream()
462 {
463     if (! this->initialized)
464     {
465         return false;
466     }
467     dereference();
468     return QPDFObjectTypeAccessor<QPDF_Stream>::check(obj.getPointer());
469 }
470 
471 bool
isReserved()472 QPDFObjectHandle::isReserved()
473 {
474     if (! this->initialized)
475     {
476         return false;
477     }
478     // dereference will clear reserved if this has been replaced
479     dereference();
480     return this->reserved;
481 }
482 
483 bool
isIndirect()484 QPDFObjectHandle::isIndirect()
485 {
486     if (! this->initialized)
487     {
488         return false;
489     }
490     return (this->objid != 0);
491 }
492 
493 bool
isScalar()494 QPDFObjectHandle::isScalar()
495 {
496     return (! (isArray() || isDictionary() || isStream() ||
497                isOperator() || isInlineImage()));
498 }
499 
500 // Bool accessors
501 
502 bool
getBoolValue()503 QPDFObjectHandle::getBoolValue()
504 {
505     if (isBool())
506     {
507         return dynamic_cast<QPDF_Bool*>(obj.getPointer())->getVal();
508     }
509     else
510     {
511         typeWarning("boolean", "returning false");
512         QTC::TC("qpdf", "QPDFObjectHandle boolean returning false");
513         return false;
514     }
515 }
516 
517 // Integer accessors
518 
519 long long
getIntValue()520 QPDFObjectHandle::getIntValue()
521 {
522     if (isInteger())
523     {
524         return dynamic_cast<QPDF_Integer*>(obj.getPointer())->getVal();
525     }
526     else
527     {
528         typeWarning("integer", "returning 0");
529         QTC::TC("qpdf", "QPDFObjectHandle integer returning 0");
530         return 0;
531     }
532 }
533 
534 int
getIntValueAsInt()535 QPDFObjectHandle::getIntValueAsInt()
536 {
537     int result = 0;
538     long long v = getIntValue();
539     if (v < INT_MIN)
540     {
541         QTC::TC("qpdf", "QPDFObjectHandle int returning INT_MIN");
542         warnIfPossible(
543             "requested value of integer is too small; returning INT_MIN",
544             false);
545         result = INT_MIN;
546     }
547     else if (v > INT_MAX)
548     {
549         QTC::TC("qpdf", "QPDFObjectHandle int returning INT_MAX");
550         warnIfPossible(
551             "requested value of integer is too big; returning INT_MAX",
552             false);
553         result = INT_MAX;
554     }
555     else
556     {
557         result = static_cast<int>(v);
558     }
559     return result;
560 }
561 
562 unsigned long long
getUIntValue()563 QPDFObjectHandle::getUIntValue()
564 {
565     unsigned long long result = 0;
566     long long v = getIntValue();
567     if (v < 0)
568     {
569         QTC::TC("qpdf", "QPDFObjectHandle uint returning 0");
570         warnIfPossible(
571             "unsigned value request for negative number; returning 0",
572             false);
573     }
574     else
575     {
576         result = static_cast<unsigned long long>(v);
577     }
578     return result;
579 }
580 
581 unsigned int
getUIntValueAsUInt()582 QPDFObjectHandle::getUIntValueAsUInt()
583 {
584     unsigned int result = 0;
585     long long v = getIntValue();
586     if (v < 0)
587     {
588         QTC::TC("qpdf", "QPDFObjectHandle uint uint returning 0");
589         warnIfPossible(
590             "unsigned integer value request for negative number; returning 0",
591             false);
592         result = 0;
593     }
594     else if (v > UINT_MAX)
595     {
596         QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX");
597         warnIfPossible(
598             "requested value of unsigned integer is too big;"
599             " returning UINT_MAX",
600             false);
601         result = UINT_MAX;
602     }
603     else
604     {
605         result = static_cast<unsigned int>(v);
606     }
607     return result;
608 }
609 
610 // Real accessors
611 
612 std::string
getRealValue()613 QPDFObjectHandle::getRealValue()
614 {
615     if (isReal())
616     {
617         return dynamic_cast<QPDF_Real*>(obj.getPointer())->getVal();
618     }
619     else
620     {
621         typeWarning("real", "returning 0.0");
622         QTC::TC("qpdf", "QPDFObjectHandle real returning 0.0");
623         return "0.0";
624     }
625 }
626 
627 // Name accessors
628 
629 std::string
getName()630 QPDFObjectHandle::getName()
631 {
632     if (isName())
633     {
634         return dynamic_cast<QPDF_Name*>(obj.getPointer())->getName();
635     }
636     else
637     {
638         typeWarning("name", "returning dummy name");
639         QTC::TC("qpdf", "QPDFObjectHandle name returning dummy name");
640         return "/QPDFFakeName";
641     }
642 }
643 
644 // String accessors
645 
646 std::string
getStringValue()647 QPDFObjectHandle::getStringValue()
648 {
649     if (isString())
650     {
651         return dynamic_cast<QPDF_String*>(obj.getPointer())->getVal();
652     }
653     else
654     {
655         typeWarning("string", "returning empty string");
656         QTC::TC("qpdf", "QPDFObjectHandle string returning empty string");
657         return "";
658     }
659 }
660 
661 std::string
getUTF8Value()662 QPDFObjectHandle::getUTF8Value()
663 {
664     if (isString())
665     {
666         return dynamic_cast<QPDF_String*>(obj.getPointer())->getUTF8Val();
667     }
668     else
669     {
670         typeWarning("string", "returning empty string");
671         QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8");
672         return "";
673     }
674 }
675 
676 // Operator and Inline Image accessors
677 
678 std::string
getOperatorValue()679 QPDFObjectHandle::getOperatorValue()
680 {
681     if (isOperator())
682     {
683         return dynamic_cast<QPDF_Operator*>(obj.getPointer())->getVal();
684     }
685     else
686     {
687         typeWarning("operator", "returning fake value");
688         QTC::TC("qpdf", "QPDFObjectHandle operator returning fake value");
689         return "QPDFFAKE";
690     }
691 }
692 
693 std::string
getInlineImageValue()694 QPDFObjectHandle::getInlineImageValue()
695 {
696     if (isInlineImage())
697     {
698         return dynamic_cast<QPDF_InlineImage*>(obj.getPointer())->getVal();
699     }
700     else
701     {
702         typeWarning("inlineimage", "returning empty data");
703         QTC::TC("qpdf", "QPDFObjectHandle inlineimage returning empty data");
704         return "";
705     }
706 }
707 
708 // Array accessors
709 
710 QPDFObjectHandle::QPDFArrayItems
aitems()711 QPDFObjectHandle::aitems()
712 {
713     return QPDFArrayItems(*this);
714 }
715 
716 int
getArrayNItems()717 QPDFObjectHandle::getArrayNItems()
718 {
719     if (isArray())
720     {
721         return dynamic_cast<QPDF_Array*>(obj.getPointer())->getNItems();
722     }
723     else
724     {
725         typeWarning("array", "treating as empty");
726         QTC::TC("qpdf", "QPDFObjectHandle array treating as empty");
727         return 0;
728     }
729 }
730 
731 QPDFObjectHandle
getArrayItem(int n)732 QPDFObjectHandle::getArrayItem(int n)
733 {
734     QPDFObjectHandle result;
735     if (isArray() && (n < getArrayNItems()) && (n >= 0))
736     {
737         result = dynamic_cast<QPDF_Array*>(obj.getPointer())->getItem(n);
738     }
739     else
740     {
741         result = newNull();
742         if (isArray())
743         {
744             objectWarning("returning null for out of bounds array access");
745             QTC::TC("qpdf", "QPDFObjectHandle array bounds");
746         }
747         else
748         {
749             typeWarning("array", "returning null");
750             QTC::TC("qpdf", "QPDFObjectHandle array null for non-array");
751         }
752         QPDF* context = 0;
753         std::string description;
754         if (this->obj->getDescription(context, description))
755         {
756             result.setObjectDescription(
757                 context,
758                 description +
759                 " -> null returned from invalid array access");
760         }
761     }
762     return result;
763 }
764 
765 bool
isRectangle()766 QPDFObjectHandle::isRectangle()
767 {
768     if (! isArray())
769     {
770         return false;
771     }
772     if (getArrayNItems() != 4)
773     {
774         return false;
775     }
776     for (int i = 0; i < 4; ++i)
777     {
778         if (! getArrayItem(i).isNumber())
779         {
780             return false;
781         }
782     }
783     return true;
784 }
785 
786 bool
isMatrix()787 QPDFObjectHandle::isMatrix()
788 {
789     if (! isArray())
790     {
791         return false;
792     }
793     if (getArrayNItems() != 6)
794     {
795         return false;
796     }
797     for (int i = 0; i < 6; ++i)
798     {
799         if (! getArrayItem(i).isNumber())
800         {
801             return false;
802         }
803     }
804     return true;
805 }
806 
807 QPDFObjectHandle::Rectangle
getArrayAsRectangle()808 QPDFObjectHandle::getArrayAsRectangle()
809 {
810     Rectangle result;
811     if (isRectangle())
812     {
813         // Rectangle coordinates are always supposed to be llx, lly,
814         // urx, ury, but files have been found in the wild where
815         // llx > urx or lly > ury.
816         double i0 = getArrayItem(0).getNumericValue();
817         double i1 = getArrayItem(1).getNumericValue();
818         double i2 = getArrayItem(2).getNumericValue();
819         double i3 = getArrayItem(3).getNumericValue();
820         result = Rectangle(std::min(i0, i2),
821                            std::min(i1, i3),
822                            std::max(i0, i2),
823                            std::max(i1, i3));
824     }
825     return result;
826 }
827 
828 QPDFObjectHandle::Matrix
getArrayAsMatrix()829 QPDFObjectHandle::getArrayAsMatrix()
830 {
831     Matrix result;
832     if (isMatrix())
833     {
834         result = Matrix(getArrayItem(0).getNumericValue(),
835                         getArrayItem(1).getNumericValue(),
836                         getArrayItem(2).getNumericValue(),
837                         getArrayItem(3).getNumericValue(),
838                         getArrayItem(4).getNumericValue(),
839                         getArrayItem(5).getNumericValue());
840     }
841     return result;
842 }
843 
844 std::vector<QPDFObjectHandle>
getArrayAsVector()845 QPDFObjectHandle::getArrayAsVector()
846 {
847     std::vector<QPDFObjectHandle> result;
848     if (isArray())
849     {
850         dynamic_cast<QPDF_Array*>(obj.getPointer())->getAsVector(result);
851     }
852     else
853     {
854         typeWarning("array", "treating as empty");
855         QTC::TC("qpdf", "QPDFObjectHandle array treating as empty vector");
856     }
857     return result;
858 }
859 
860 // Array mutators
861 
862 void
setArrayItem(int n,QPDFObjectHandle const & item)863 QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item)
864 {
865     if (isArray())
866     {
867         checkOwnership(item);
868         dynamic_cast<QPDF_Array*>(obj.getPointer())->setItem(n, item);
869     }
870     else
871     {
872         typeWarning("array", "ignoring attempt to set item");
873         QTC::TC("qpdf", "QPDFObjectHandle array ignoring set item");
874     }
875 }
876 
877 void
setArrayFromVector(std::vector<QPDFObjectHandle> const & items)878 QPDFObjectHandle::setArrayFromVector(std::vector<QPDFObjectHandle> const& items)
879 {
880     if (isArray())
881     {
882         for (auto const& item: items)
883         {
884             checkOwnership(item);
885         }
886         dynamic_cast<QPDF_Array*>(obj.getPointer())->setFromVector(items);
887     }
888     else
889     {
890         typeWarning("array", "ignoring attempt to replace items");
891         QTC::TC("qpdf", "QPDFObjectHandle array ignoring replace items");
892     }
893 }
894 
895 void
insertItem(int at,QPDFObjectHandle const & item)896 QPDFObjectHandle::insertItem(int at, QPDFObjectHandle const& item)
897 {
898     if (isArray())
899     {
900         dynamic_cast<QPDF_Array*>(obj.getPointer())->insertItem(at, item);
901     }
902     else
903     {
904         typeWarning("array", "ignoring attempt to insert item");
905         QTC::TC("qpdf", "QPDFObjectHandle array ignoring insert item");
906     }
907 }
908 
909 void
appendItem(QPDFObjectHandle const & item)910 QPDFObjectHandle::appendItem(QPDFObjectHandle const& item)
911 {
912     if (isArray())
913     {
914         checkOwnership(item);
915         dynamic_cast<QPDF_Array*>(obj.getPointer())->appendItem(item);
916     }
917     else
918     {
919         typeWarning("array", "ignoring attempt to append item");
920         QTC::TC("qpdf", "QPDFObjectHandle array ignoring append item");
921     }
922 }
923 
924 void
eraseItem(int at)925 QPDFObjectHandle::eraseItem(int at)
926 {
927     if (isArray() && (at < getArrayNItems()) && (at >= 0))
928     {
929         dynamic_cast<QPDF_Array*>(obj.getPointer())->eraseItem(at);
930     }
931     else
932     {
933         if (isArray())
934         {
935             objectWarning("ignoring attempt to erase out of bounds array item");
936             QTC::TC("qpdf", "QPDFObjectHandle erase array bounds");
937         }
938         else
939         {
940             typeWarning("array", "ignoring attempt to erase item");
941             QTC::TC("qpdf", "QPDFObjectHandle array ignoring erase item");
942         }
943     }
944 }
945 
946 // Dictionary accessors
947 
948 QPDFObjectHandle::QPDFDictItems
ditems()949 QPDFObjectHandle::ditems()
950 {
951     return QPDFDictItems(*this);
952 }
953 
954 bool
hasKey(std::string const & key)955 QPDFObjectHandle::hasKey(std::string const& key)
956 {
957     if (isDictionary())
958     {
959         return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->hasKey(key);
960     }
961     else
962     {
963         typeWarning("dictionary",
964                     "returning false for a key containment request");
965         QTC::TC("qpdf", "QPDFObjectHandle dictionary false for hasKey");
966         return false;
967     }
968 }
969 
970 QPDFObjectHandle
getKey(std::string const & key)971 QPDFObjectHandle::getKey(std::string const& key)
972 {
973     QPDFObjectHandle result;
974     if (isDictionary())
975     {
976         result = dynamic_cast<QPDF_Dictionary*>(
977             obj.getPointer())->getKey(key);
978     }
979     else
980     {
981         typeWarning(
982             "dictionary", "returning null for attempted key retrieval");
983         QTC::TC("qpdf", "QPDFObjectHandle dictionary null for getKey");
984         result = newNull();
985         QPDF* qpdf = 0;
986         std::string description;
987         if (this->obj->getDescription(qpdf, description))
988         {
989             result.setObjectDescription(
990                 qpdf,
991                 description +
992                 " -> null returned from getting key " +
993                 key + " from non-Dictionary");
994         }
995     }
996     return result;
997 }
998 
999 std::set<std::string>
getKeys()1000 QPDFObjectHandle::getKeys()
1001 {
1002     std::set<std::string> result;
1003     if (isDictionary())
1004     {
1005         result = dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->getKeys();
1006     }
1007     else
1008     {
1009         typeWarning("dictionary", "treating as empty");
1010         QTC::TC("qpdf", "QPDFObjectHandle dictionary empty set for getKeys");
1011     }
1012     return result;
1013 }
1014 
1015 std::map<std::string, QPDFObjectHandle>
getDictAsMap()1016 QPDFObjectHandle::getDictAsMap()
1017 {
1018     std::map<std::string, QPDFObjectHandle> result;
1019     if (isDictionary())
1020     {
1021         result = dynamic_cast<QPDF_Dictionary*>(
1022             obj.getPointer())->getAsMap();
1023     }
1024     else
1025     {
1026         typeWarning("dictionary", "treating as empty");
1027         QTC::TC("qpdf", "QPDFObjectHandle dictionary empty map for asMap");
1028     }
1029     return result;
1030 }
1031 
1032 // Array and Name accessors
1033 bool
isOrHasName(std::string const & value)1034 QPDFObjectHandle::isOrHasName(std::string const& value)
1035 {
1036     if (isName() && (getName() == value))
1037     {
1038 	return true;
1039     }
1040     else if (isArray())
1041     {
1042 	int n = getArrayNItems();
1043 	for (int i = 0; i < n; ++i)
1044 	{
1045 	    QPDFObjectHandle item = getArrayItem(0);
1046 	    if (item.isName() && (item.getName() == value))
1047 	    {
1048 		return true;
1049 	    }
1050 	}
1051     }
1052     return false;
1053 }
1054 
1055 void
makeResourcesIndirect(QPDF & owning_qpdf)1056 QPDFObjectHandle::makeResourcesIndirect(QPDF& owning_qpdf)
1057 {
1058     if (! isDictionary())
1059     {
1060         return;
1061     }
1062     for (auto const& i1: ditems())
1063     {
1064         QPDFObjectHandle sub = i1.second;
1065         if (! sub.isDictionary())
1066         {
1067             continue;
1068         }
1069         for (auto i2: sub.ditems())
1070         {
1071             std::string const& key = i2.first;
1072             QPDFObjectHandle val = i2.second;
1073             if (! val.isIndirect())
1074             {
1075                 sub.replaceKey(key, owning_qpdf.makeIndirectObject(val));
1076             }
1077         }
1078     }
1079 }
1080 
1081 void
mergeResources(QPDFObjectHandle other)1082 QPDFObjectHandle::mergeResources(QPDFObjectHandle other)
1083 {
1084     mergeResources(other, nullptr);
1085 }
1086 
1087 void
mergeResources(QPDFObjectHandle other,std::map<std::string,std::map<std::string,std::string>> * conflicts)1088 QPDFObjectHandle::mergeResources(
1089     QPDFObjectHandle other,
1090     std::map<std::string, std::map<std::string, std::string>>* conflicts)
1091 {
1092     if (! (isDictionary() && other.isDictionary()))
1093     {
1094         QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
1095         return;
1096     }
1097 
1098     auto make_og_to_name = [](
1099         QPDFObjectHandle& dict,
1100         std::map<QPDFObjGen, std::string>& og_to_name)
1101     {
1102         for (auto i: dict.ditems())
1103         {
1104             if (i.second.isIndirect())
1105             {
1106                 og_to_name[i.second.getObjGen()] = i.first;
1107             }
1108         }
1109     };
1110 
1111     // This algorithm is described in comments in QPDFObjectHandle.hh
1112     // above the declaration of mergeResources.
1113     for (auto o_top: other.ditems())
1114     {
1115         std::string const& rtype = o_top.first;
1116         QPDFObjectHandle other_val = o_top.second;
1117         if (hasKey(rtype))
1118         {
1119             QPDFObjectHandle this_val = getKey(rtype);
1120             if (this_val.isDictionary() && other_val.isDictionary())
1121             {
1122                 if (this_val.isIndirect())
1123                 {
1124                     // Do this even if there are no keys. Various
1125                     // places in the code call mergeResources with
1126                     // resource dictionaries that contain empty
1127                     // subdictionaries just to get this shallow copy
1128                     // functionality.
1129                     QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
1130                     this_val = this_val.shallowCopy();
1131                     replaceKey(rtype, this_val);
1132                 }
1133                 std::map<QPDFObjGen, std::string> og_to_name;
1134                 std::set<std::string> rnames;
1135                 int min_suffix = 1;
1136                 bool initialized_maps = false;
1137                 for (auto ov_iter: other_val.ditems())
1138                 {
1139                     std::string const& key = ov_iter.first;
1140                     QPDFObjectHandle rval = ov_iter.second;
1141                     if (! this_val.hasKey(key))
1142                     {
1143                         if (! rval.isIndirect())
1144                         {
1145                             QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
1146                             rval = rval.shallowCopy();
1147                         }
1148                         this_val.replaceKey(key, rval);
1149                     }
1150                     else if (conflicts)
1151                     {
1152                         if (! initialized_maps)
1153                         {
1154                             make_og_to_name(this_val, og_to_name);
1155                             rnames = this_val.getResourceNames();
1156                             initialized_maps = true;
1157                         }
1158                         auto rval_og = rval.getObjGen();
1159                         if (rval.isIndirect() &&
1160                             og_to_name.count(rval_og))
1161                         {
1162                             QTC::TC("qpdf", "QPDFObjectHandle merge reuse");
1163                             auto new_key = og_to_name[rval_og];
1164                             if (new_key != key)
1165                             {
1166                                 (*conflicts)[rtype][key] = new_key;
1167                             }
1168                         }
1169                         else
1170                         {
1171                             QTC::TC("qpdf", "QPDFObjectHandle merge generate");
1172                             std::string new_key = getUniqueResourceName(
1173                                 key + "_", min_suffix, &rnames);
1174                             (*conflicts)[rtype][key] = new_key;
1175                             this_val.replaceKey(new_key, rval);
1176                         }
1177                     }
1178                 }
1179             }
1180             else if (this_val.isArray() && other_val.isArray())
1181             {
1182                 std::set<std::string> scalars;
1183                 for (auto this_item: this_val.aitems())
1184                 {
1185                     if (this_item.isScalar())
1186                     {
1187                         scalars.insert(this_item.unparse());
1188                     }
1189                 }
1190                 for (auto other_item: other_val.aitems())
1191                 {
1192                     if (other_item.isScalar())
1193                     {
1194                         if (scalars.count(other_item.unparse()) == 0)
1195                         {
1196                             QTC::TC("qpdf", "QPDFObjectHandle merge array");
1197                             this_val.appendItem(other_item);
1198                         }
1199                         else
1200                         {
1201                             QTC::TC("qpdf", "QPDFObjectHandle merge array dup");
1202                         }
1203                     }
1204                 }
1205             }
1206         }
1207         else
1208         {
1209             QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
1210             replaceKey(rtype, other_val.shallowCopy());
1211         }
1212     }
1213 }
1214 
1215 std::set<std::string>
getResourceNames()1216 QPDFObjectHandle::getResourceNames()
1217 {
1218     // Return second-level dictionary keys
1219     std::set<std::string> result;
1220     if (! isDictionary())
1221     {
1222         return result;
1223     }
1224     std::set<std::string> keys = getKeys();
1225     for (std::set<std::string>::iterator iter = keys.begin();
1226          iter != keys.end(); ++iter)
1227     {
1228         std::string const& key = *iter;
1229         QPDFObjectHandle val = getKey(key);
1230         if (val.isDictionary())
1231         {
1232             std::set<std::string> val_keys = val.getKeys();
1233             for (std::set<std::string>::iterator i2 = val_keys.begin();
1234                  i2 != val_keys.end(); ++i2)
1235             {
1236                 result.insert(*i2);
1237             }
1238         }
1239     }
1240     return result;
1241 }
1242 
1243 std::string
getUniqueResourceName(std::string const & prefix,int & min_suffix)1244 QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
1245                                         int& min_suffix)
1246 {
1247     return getUniqueResourceName(prefix, min_suffix, nullptr);
1248 }
1249 
1250 std::string
getUniqueResourceName(std::string const & prefix,int & min_suffix,std::set<std::string> * namesp)1251 QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
1252                                         int& min_suffix,
1253                                         std::set<std::string>* namesp)
1254 
1255 {
1256     std::set<std::string> names = (namesp ? *namesp : getResourceNames());
1257     int max_suffix = min_suffix + QIntC::to_int(names.size());
1258     while (min_suffix <= max_suffix)
1259     {
1260         std::string candidate = prefix + QUtil::int_to_string(min_suffix);
1261         if (names.count(candidate) == 0)
1262         {
1263             return candidate;
1264         }
1265         // Increment after return; min_suffix should be the value
1266         // used, not the next value.
1267         ++min_suffix;
1268     }
1269     // This could only happen if there is a coding error.
1270     // The number of candidates we test is more than the
1271     // number of keys we're checking against.
1272     throw std::logic_error("unable to find unconflicting name in"
1273                            " QPDFObjectHandle::getUniqueResourceName");
1274 }
1275 
1276 // Indirect object accessors
1277 QPDF*
getOwningQPDF()1278 QPDFObjectHandle::getOwningQPDF()
1279 {
1280     // Will be null for direct objects
1281     return this->qpdf;
1282 }
1283 
1284 // Dictionary mutators
1285 
1286 void
replaceKey(std::string const & key,QPDFObjectHandle value)1287 QPDFObjectHandle::replaceKey(std::string const& key,
1288 			    QPDFObjectHandle value)
1289 {
1290     if (isDictionary())
1291     {
1292         checkOwnership(value);
1293         dynamic_cast<QPDF_Dictionary*>(
1294             obj.getPointer())->replaceKey(key, value);
1295     }
1296     else
1297     {
1298         typeWarning("dictionary", "ignoring key replacement request");
1299         QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring replaceKey");
1300     }
1301 }
1302 
1303 void
removeKey(std::string const & key)1304 QPDFObjectHandle::removeKey(std::string const& key)
1305 {
1306     if (isDictionary())
1307     {
1308         dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->removeKey(key);
1309     }
1310     else
1311     {
1312         typeWarning("dictionary", "ignoring key removal request");
1313         QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring removeKey");
1314     }
1315 }
1316 
1317 void
replaceOrRemoveKey(std::string const & key,QPDFObjectHandle value)1318 QPDFObjectHandle::replaceOrRemoveKey(std::string const& key,
1319 				     QPDFObjectHandle value)
1320 {
1321     if (isDictionary())
1322     {
1323         checkOwnership(value);
1324         dynamic_cast<QPDF_Dictionary*>(
1325             obj.getPointer())->replaceOrRemoveKey(key, value);
1326     }
1327     else
1328     {
1329         typeWarning("dictionary", "ignoring key removal/replacement request");
1330         QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring removereplace");
1331     }
1332 }
1333 
1334 // Stream accessors
1335 QPDFObjectHandle
getDict()1336 QPDFObjectHandle::getDict()
1337 {
1338     assertStream();
1339     return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
1340 }
1341 
1342 void
setFilterOnWrite(bool val)1343 QPDFObjectHandle::setFilterOnWrite(bool val)
1344 {
1345     assertStream();
1346     dynamic_cast<QPDF_Stream*>(obj.getPointer())->setFilterOnWrite(val);
1347 }
1348 
1349 bool
getFilterOnWrite()1350 QPDFObjectHandle::getFilterOnWrite()
1351 {
1352     assertStream();
1353     return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getFilterOnWrite();
1354 }
1355 
1356 bool
isDataModified()1357 QPDFObjectHandle::isDataModified()
1358 {
1359     assertStream();
1360     return dynamic_cast<QPDF_Stream*>(obj.getPointer())->isDataModified();
1361 }
1362 
1363 void
replaceDict(QPDFObjectHandle new_dict)1364 QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
1365 {
1366     assertStream();
1367     dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
1368 }
1369 
1370 PointerHolder<Buffer>
getStreamData(qpdf_stream_decode_level_e level)1371 QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
1372 {
1373     assertStream();
1374     return dynamic_cast<QPDF_Stream*>(
1375         obj.getPointer())->getStreamData(level);
1376 }
1377 
1378 PointerHolder<Buffer>
getRawStreamData()1379 QPDFObjectHandle::getRawStreamData()
1380 {
1381     assertStream();
1382     return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getRawStreamData();
1383 }
1384 
1385 bool
pipeStreamData(Pipeline * p,bool * filtering_attempted,int encode_flags,qpdf_stream_decode_level_e decode_level,bool suppress_warnings,bool will_retry)1386 QPDFObjectHandle::pipeStreamData(Pipeline* p, bool* filtering_attempted,
1387                                  int encode_flags,
1388                                  qpdf_stream_decode_level_e decode_level,
1389                                  bool suppress_warnings, bool will_retry)
1390 {
1391     assertStream();
1392     return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
1393 	p, filtering_attempted, encode_flags, decode_level,
1394         suppress_warnings, will_retry);
1395 }
1396 
1397 bool
pipeStreamData(Pipeline * p,int encode_flags,qpdf_stream_decode_level_e decode_level,bool suppress_warnings,bool will_retry)1398 QPDFObjectHandle::pipeStreamData(Pipeline* p,
1399                                  int encode_flags,
1400                                  qpdf_stream_decode_level_e decode_level,
1401                                  bool suppress_warnings, bool will_retry)
1402 {
1403     assertStream();
1404     bool filtering_attempted;
1405     dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
1406 	p, &filtering_attempted, encode_flags, decode_level,
1407         suppress_warnings, will_retry);
1408     return filtering_attempted;
1409 }
1410 
1411 bool
pipeStreamData(Pipeline * p,bool filter,bool normalize,bool compress)1412 QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
1413 				 bool normalize, bool compress)
1414 {
1415     int encode_flags = 0;
1416     qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
1417     if (filter)
1418     {
1419         decode_level = qpdf_dl_generalized;
1420         if (normalize)
1421         {
1422             encode_flags |= qpdf_ef_normalize;
1423         }
1424         if (compress)
1425         {
1426             encode_flags |= qpdf_ef_compress;
1427         }
1428     }
1429     return pipeStreamData(p, encode_flags, decode_level, false);
1430 }
1431 
1432 void
replaceStreamData(PointerHolder<Buffer> data,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1433 QPDFObjectHandle::replaceStreamData(PointerHolder<Buffer> data,
1434 				    QPDFObjectHandle const& filter,
1435 				    QPDFObjectHandle const& decode_parms)
1436 {
1437     assertStream();
1438     dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1439 	data, filter, decode_parms);
1440 }
1441 
1442 void
replaceStreamData(std::string const & data,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1443 QPDFObjectHandle::replaceStreamData(std::string const& data,
1444 				    QPDFObjectHandle const& filter,
1445 				    QPDFObjectHandle const& decode_parms)
1446 {
1447     assertStream();
1448     PointerHolder<Buffer> b = new Buffer(data.length());
1449     unsigned char* bp = b->getBuffer();
1450     memcpy(bp, data.c_str(), data.length());
1451     dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1452 	b, filter, decode_parms);
1453 }
1454 
1455 void
replaceStreamData(PointerHolder<StreamDataProvider> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1456 QPDFObjectHandle::replaceStreamData(PointerHolder<StreamDataProvider> provider,
1457 				    QPDFObjectHandle const& filter,
1458 				    QPDFObjectHandle const& decode_parms)
1459 {
1460     assertStream();
1461     dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1462 	provider, filter, decode_parms);
1463 }
1464 
1465 class FunctionProvider: public QPDFObjectHandle::StreamDataProvider
1466 {
1467   public:
FunctionProvider(std::function<void (Pipeline *)> provider)1468     FunctionProvider(std::function<void(Pipeline*)> provider) :
1469         StreamDataProvider(false),
1470         p1(provider),
1471         p2(nullptr)
1472     {
1473     }
FunctionProvider(std::function<bool (Pipeline *,bool,bool)> provider)1474     FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) :
1475         StreamDataProvider(true),
1476         p1(nullptr),
1477         p2(provider)
1478     {
1479     }
1480 
provideStreamData(int,int,Pipeline * pipeline)1481     virtual void provideStreamData(int, int, Pipeline* pipeline) override
1482     {
1483         p1(pipeline);
1484     }
1485 
provideStreamData(int,int,Pipeline * pipeline,bool suppress_warnings,bool will_retry)1486     virtual bool provideStreamData(int, int, Pipeline* pipeline,
1487                                    bool suppress_warnings,
1488                                    bool will_retry) override
1489     {
1490         return p2(pipeline, suppress_warnings, will_retry);
1491     }
1492 
1493   private:
1494     std::function<void(Pipeline*)> p1;
1495     std::function<bool(Pipeline*, bool, bool)> p2;
1496 };
1497 
1498 void
replaceStreamData(std::function<void (Pipeline *)> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1499 QPDFObjectHandle::replaceStreamData(std::function<void(Pipeline*)> provider,
1500                                     QPDFObjectHandle const& filter,
1501                                     QPDFObjectHandle const& decode_parms)
1502 {
1503     assertStream();
1504     PointerHolder<StreamDataProvider> sdp = new FunctionProvider(provider);
1505     dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1506 	sdp, filter, decode_parms);
1507 }
1508 
1509 void
replaceStreamData(std::function<bool (Pipeline *,bool,bool)> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1510 QPDFObjectHandle::replaceStreamData(
1511     std::function<bool(Pipeline*, bool, bool)> provider,
1512     QPDFObjectHandle const& filter,
1513     QPDFObjectHandle const& decode_parms)
1514 {
1515     assertStream();
1516     PointerHolder<StreamDataProvider> sdp = new FunctionProvider(provider);
1517     dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1518 	sdp, filter, decode_parms);
1519 }
1520 
1521 QPDFObjGen
getObjGen() const1522 QPDFObjectHandle::getObjGen() const
1523 {
1524     return QPDFObjGen(this->objid, this->generation);
1525 }
1526 
1527 int
getObjectID() const1528 QPDFObjectHandle::getObjectID() const
1529 {
1530     return this->objid;
1531 }
1532 
1533 int
getGeneration() const1534 QPDFObjectHandle::getGeneration() const
1535 {
1536     return this->generation;
1537 }
1538 
1539 std::map<std::string, QPDFObjectHandle>
getPageImages()1540 QPDFObjectHandle::getPageImages()
1541 {
1542     return QPDFPageObjectHelper(*this).getImages();
1543 }
1544 
1545 std::vector<QPDFObjectHandle>
arrayOrStreamToStreamArray(std::string const & description,std::string & all_description)1546 QPDFObjectHandle::arrayOrStreamToStreamArray(
1547     std::string const& description, std::string& all_description)
1548 {
1549     all_description = description;
1550     std::vector<QPDFObjectHandle> result;
1551     if (isArray())
1552     {
1553 	int n_items = getArrayNItems();
1554 	for (int i = 0; i < n_items; ++i)
1555 	{
1556 	    QPDFObjectHandle item = getArrayItem(i);
1557 	    if (item.isStream())
1558             {
1559                 result.push_back(item);
1560             }
1561             else
1562 	    {
1563                 QTC::TC("qpdf", "QPDFObjectHandle non-stream in stream array");
1564                 warn(item.getOwningQPDF(),
1565                      QPDFExc(qpdf_e_damaged_pdf, description,
1566                              "item index " + QUtil::int_to_string(i) +
1567                              " (from 0)", 0,
1568                              "ignoring non-stream in an array of streams"));
1569 	    }
1570 	}
1571     }
1572     else if (isStream())
1573     {
1574 	result.push_back(*this);
1575     }
1576     else if (! isNull())
1577     {
1578         warn(getOwningQPDF(),
1579              QPDFExc(qpdf_e_damaged_pdf, "", description, 0,
1580                      " object is supposed to be a stream or an"
1581                      " array of streams but is neither"));
1582     }
1583 
1584     bool first = true;
1585     for (std::vector<QPDFObjectHandle>::iterator iter = result.begin();
1586          iter != result.end(); ++iter)
1587     {
1588         QPDFObjectHandle item = *iter;
1589         std::string og =
1590             QUtil::int_to_string(item.getObjectID()) + " " +
1591             QUtil::int_to_string(item.getGeneration());
1592         if (first)
1593         {
1594             first = false;
1595         }
1596         else
1597         {
1598             all_description += ",";
1599         }
1600         all_description += " stream " + og;
1601     }
1602 
1603     return result;
1604 }
1605 
1606 std::vector<QPDFObjectHandle>
getPageContents()1607 QPDFObjectHandle::getPageContents()
1608 {
1609     std::string description = "page object " +
1610         QUtil::int_to_string(this->objid) + " " +
1611         QUtil::int_to_string(this->generation);
1612     std::string all_description;
1613     return this->getKey("/Contents").arrayOrStreamToStreamArray(
1614         description, all_description);
1615 }
1616 
1617 void
addPageContents(QPDFObjectHandle new_contents,bool first)1618 QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
1619 {
1620     new_contents.assertStream();
1621 
1622     std::vector<QPDFObjectHandle> orig_contents = getPageContents();
1623 
1624     std::vector<QPDFObjectHandle> content_streams;
1625     if (first)
1626     {
1627 	QTC::TC("qpdf", "QPDFObjectHandle prepend page contents");
1628 	content_streams.push_back(new_contents);
1629     }
1630     for (std::vector<QPDFObjectHandle>::iterator iter = orig_contents.begin();
1631 	 iter != orig_contents.end(); ++iter)
1632     {
1633 	QTC::TC("qpdf", "QPDFObjectHandle append page contents");
1634 	content_streams.push_back(*iter);
1635     }
1636     if (! first)
1637     {
1638 	content_streams.push_back(new_contents);
1639     }
1640 
1641     QPDFObjectHandle contents = QPDFObjectHandle::newArray(content_streams);
1642     this->replaceKey("/Contents", contents);
1643 }
1644 
1645 void
rotatePage(int angle,bool relative)1646 QPDFObjectHandle::rotatePage(int angle, bool relative)
1647 {
1648     if ((angle % 90) != 0)
1649     {
1650         throw std::runtime_error(
1651             "QPDF::rotatePage called with an"
1652             " angle that is not a multiple of 90");
1653     }
1654     int new_angle = angle;
1655     if (relative)
1656     {
1657         int old_angle = 0;
1658         bool found_rotate = false;
1659         QPDFObjectHandle cur_obj = *this;
1660         bool searched_parent = false;
1661         std::set<QPDFObjGen> visited;
1662         while (! found_rotate)
1663         {
1664             if (visited.count(cur_obj.getObjGen()))
1665             {
1666                 // Don't get stuck in an infinite loop
1667                 break;
1668             }
1669             if (! visited.empty())
1670             {
1671                 searched_parent = true;
1672             }
1673             visited.insert(cur_obj.getObjGen());
1674             if (cur_obj.getKey("/Rotate").isInteger())
1675             {
1676                 found_rotate = true;
1677                 old_angle = cur_obj.getKey("/Rotate").getIntValueAsInt();
1678             }
1679             else if (cur_obj.getKey("/Parent").isDictionary())
1680             {
1681                 cur_obj = cur_obj.getKey("/Parent");
1682             }
1683             else
1684             {
1685                 break;
1686             }
1687         }
1688         QTC::TC("qpdf", "QPDFObjectHandle found old angle",
1689                 searched_parent ? 0 : 1);
1690         if ((old_angle % 90) != 0)
1691         {
1692             old_angle = 0;
1693         }
1694         new_angle += old_angle;
1695     }
1696     new_angle = (new_angle + 360) % 360;
1697     // Make this explicit even with new_angle == 0 since /Rotate can
1698     // be inherited.
1699     replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle));
1700 }
1701 
1702 void
coalesceContentStreams()1703 QPDFObjectHandle::coalesceContentStreams()
1704 {
1705     QPDFObjectHandle contents = this->getKey("/Contents");
1706     if (contents.isStream())
1707     {
1708         QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream");
1709         return;
1710     }
1711     else if (! contents.isArray())
1712     {
1713         // /Contents is optional for pages, and some very damaged
1714         // files may have pages that are invalid in other ways.
1715         return;
1716     }
1717     QPDF* qpdf = getOwningQPDF();
1718     if (qpdf == 0)
1719     {
1720         // Should not be possible for a page object to not have an
1721         // owning PDF unless it was manually constructed in some
1722         // incorrect way. However, it can happen in a PDF file whose
1723         // page structure is direct, which is against spec but still
1724         // possible to hand construct, as in fuzz issue 27393.
1725         throw std::runtime_error("coalesceContentStreams called on object"
1726                                  " with no associated PDF file");
1727     }
1728     QPDFObjectHandle new_contents = newStream(qpdf);
1729     this->replaceKey("/Contents", new_contents);
1730 
1731     PointerHolder<StreamDataProvider> provider =
1732         new CoalesceProvider(*this, contents);
1733     new_contents.replaceStreamData(provider, newNull(), newNull());
1734 }
1735 
1736 std::string
unparse()1737 QPDFObjectHandle::unparse()
1738 {
1739     std::string result;
1740     if (this->isIndirect())
1741     {
1742 	result = QUtil::int_to_string(this->objid) + " " +
1743 	    QUtil::int_to_string(this->generation) + " R";
1744     }
1745     else
1746     {
1747 	result = unparseResolved();
1748     }
1749     return result;
1750 }
1751 
1752 std::string
unparseResolved()1753 QPDFObjectHandle::unparseResolved()
1754 {
1755     dereference();
1756     if (this->reserved)
1757     {
1758         throw std::logic_error(
1759             "QPDFObjectHandle: attempting to unparse a reserved object");
1760     }
1761     return this->obj->unparse();
1762 }
1763 
1764 std::string
unparseBinary()1765 QPDFObjectHandle::unparseBinary()
1766 {
1767     if (this->isString())
1768     {
1769         return dynamic_cast<QPDF_String*>(
1770             this->obj.getPointer())->unparse(true);
1771     }
1772     else
1773     {
1774         return unparse();
1775     }
1776 }
1777 
1778 JSON
getJSON(bool dereference_indirect)1779 QPDFObjectHandle::getJSON(bool dereference_indirect)
1780 {
1781     if ((! dereference_indirect) && this->isIndirect())
1782     {
1783         return JSON::makeString(unparse());
1784     }
1785     else
1786     {
1787         dereference();
1788         if (this->reserved)
1789         {
1790             throw std::logic_error(
1791                 "QPDFObjectHandle: attempting to unparse a reserved object");
1792         }
1793         return this->obj->getJSON();
1794     }
1795 }
1796 
1797 QPDFObjectHandle
wrapInArray()1798 QPDFObjectHandle::wrapInArray()
1799 {
1800     if (isArray())
1801     {
1802         return *this;
1803     }
1804     QPDFObjectHandle result = QPDFObjectHandle::newArray();
1805     result.appendItem(*this);
1806     return result;
1807 }
1808 
1809 QPDFObjectHandle
parse(std::string const & object_str,std::string const & object_description)1810 QPDFObjectHandle::parse(std::string const& object_str,
1811                         std::string const& object_description)
1812 {
1813     return parse(nullptr, object_str, object_description);
1814 }
1815 
1816 QPDFObjectHandle
parse(QPDF * context,std::string const & object_str,std::string const & object_description)1817 QPDFObjectHandle::parse(QPDF* context,
1818                         std::string const& object_str,
1819                         std::string const& object_description)
1820 {
1821     PointerHolder<InputSource> input =
1822         new BufferInputSource("parsed object", object_str);
1823     QPDFTokenizer tokenizer;
1824     bool empty = false;
1825     QPDFObjectHandle result =
1826         parse(input, object_description, tokenizer, empty, 0, context);
1827     size_t offset = QIntC::to_size(input->tell());
1828     while (offset < object_str.length())
1829     {
1830         if (! isspace(object_str.at(offset)))
1831         {
1832             QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
1833             throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
1834                           object_description,
1835                           input->getLastOffset(),
1836                           "trailing data found parsing object from string");
1837         }
1838         ++offset;
1839     }
1840     return result;
1841 }
1842 
1843 void
pipePageContents(Pipeline * p)1844 QPDFObjectHandle::pipePageContents(Pipeline* p)
1845 {
1846     std::string description = "page object " +
1847         QUtil::int_to_string(this->objid) + " " +
1848         QUtil::int_to_string(this->generation);
1849     std::string all_description;
1850     this->getKey("/Contents").pipeContentStreams(
1851         p, description, all_description);
1852 }
1853 
1854 void
pipeContentStreams(Pipeline * p,std::string const & description,std::string & all_description)1855 QPDFObjectHandle::pipeContentStreams(
1856     Pipeline* p, std::string const& description, std::string& all_description)
1857 {
1858     std::vector<QPDFObjectHandle> streams =
1859         arrayOrStreamToStreamArray(
1860             description, all_description);
1861     bool need_newline = false;
1862     Pl_Buffer buf("concatenated content stream buffer");
1863     for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
1864          iter != streams.end(); ++iter)
1865     {
1866         if (need_newline)
1867         {
1868             buf.write(QUtil::unsigned_char_pointer("\n"), 1);
1869         }
1870         LastChar lc(&buf);
1871         QPDFObjectHandle stream = *iter;
1872         std::string og =
1873             QUtil::int_to_string(stream.getObjectID()) + " " +
1874             QUtil::int_to_string(stream.getGeneration());
1875         std::string w_description = "content stream object " + og;
1876         if (! stream.pipeStreamData(&lc, 0, qpdf_dl_specialized))
1877         {
1878             QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
1879             throw QPDFExc(qpdf_e_damaged_pdf, "content stream",
1880                           w_description, 0,
1881                           "errors while decoding content stream");
1882         }
1883         lc.finish();
1884         need_newline = (lc.getLastChar() != static_cast<unsigned char>('\n'));
1885         QTC::TC("qpdf", "QPDFObjectHandle need_newline",
1886                 need_newline ? 0 : 1);
1887     }
1888     std::unique_ptr<Buffer> b(buf.getBuffer());
1889     p->write(b->getBuffer(), b->getSize());
1890     p->finish();
1891 }
1892 
1893 void
parsePageContents(ParserCallbacks * callbacks)1894 QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
1895 {
1896     std::string description = "page object " +
1897         QUtil::int_to_string(this->objid) + " " +
1898         QUtil::int_to_string(this->generation);
1899     this->getKey("/Contents").parseContentStream_internal(
1900         description, callbacks);
1901 }
1902 
1903 void
parseAsContents(ParserCallbacks * callbacks)1904 QPDFObjectHandle::parseAsContents(ParserCallbacks* callbacks)
1905 {
1906     std::string description = "object " +
1907         QUtil::int_to_string(this->objid) + " " +
1908         QUtil::int_to_string(this->generation);
1909     this->parseContentStream_internal(description, callbacks);
1910 }
1911 
1912 void
filterPageContents(TokenFilter * filter,Pipeline * next)1913 QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
1914 {
1915     std::string description = "token filter for page object " +
1916         QUtil::int_to_string(this->objid) + " " +
1917         QUtil::int_to_string(this->generation);
1918     Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
1919     this->pipePageContents(&token_pipeline);
1920 }
1921 
1922 void
filterAsContents(TokenFilter * filter,Pipeline * next)1923 QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
1924 {
1925     std::string description = "token filter for object " +
1926         QUtil::int_to_string(this->objid) + " " +
1927         QUtil::int_to_string(this->generation);
1928     Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
1929     this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
1930 }
1931 
1932 void
parseContentStream(QPDFObjectHandle stream_or_array,ParserCallbacks * callbacks)1933 QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
1934                                      ParserCallbacks* callbacks)
1935 {
1936     stream_or_array.parseContentStream_internal(
1937         "content stream objects", callbacks);
1938 }
1939 
1940 void
parseContentStream_internal(std::string const & description,ParserCallbacks * callbacks)1941 QPDFObjectHandle::parseContentStream_internal(
1942     std::string const& description,
1943     ParserCallbacks* callbacks)
1944 {
1945     Pl_Buffer buf("concatenated stream data buffer");
1946     std::string all_description;
1947     pipeContentStreams(&buf, description, all_description);
1948     PointerHolder<Buffer> stream_data = buf.getBuffer();
1949     callbacks->contentSize(stream_data->getSize());
1950     try
1951     {
1952         parseContentStream_data(stream_data, all_description,
1953                                 callbacks, getOwningQPDF());
1954     }
1955     catch (TerminateParsing&)
1956     {
1957         return;
1958     }
1959     callbacks->handleEOF();
1960 }
1961 
1962 void
parseContentStream_data(PointerHolder<Buffer> stream_data,std::string const & description,ParserCallbacks * callbacks,QPDF * context)1963 QPDFObjectHandle::parseContentStream_data(
1964     PointerHolder<Buffer> stream_data,
1965     std::string const& description,
1966     ParserCallbacks* callbacks,
1967     QPDF* context)
1968 {
1969     size_t stream_length = stream_data->getSize();
1970     PointerHolder<InputSource> input =
1971         new BufferInputSource(description, stream_data.getPointer());
1972     QPDFTokenizer tokenizer;
1973     tokenizer.allowEOF();
1974     bool empty = false;
1975     while (QIntC::to_size(input->tell()) < stream_length)
1976     {
1977         // Read a token and seek to the beginning. The offset we get
1978         // from this process is the beginning of the next
1979         // non-ignorable (space, comment) token. This way, the offset
1980         // and don't including ignorable content.
1981         tokenizer.readToken(input, "content", true);
1982         qpdf_offset_t offset = input->getLastOffset();
1983         input->seek(offset, SEEK_SET);
1984         QPDFObjectHandle obj =
1985             parseInternal(input, "content", tokenizer,
1986                           empty, 0, context, true);
1987         if (! obj.isInitialized())
1988         {
1989             // EOF
1990             break;
1991         }
1992         size_t length = QIntC::to_size(input->tell() - offset);
1993 
1994         callbacks->handleObject(obj, QIntC::to_size(offset), length);
1995         if (obj.isOperator() && (obj.getOperatorValue() == "ID"))
1996         {
1997             // Discard next character; it is the space after ID that
1998             // terminated the token.  Read until end of inline image.
1999             char ch;
2000             input->read(&ch, 1);
2001             tokenizer.expectInlineImage(input);
2002             QPDFTokenizer::Token t =
2003                 tokenizer.readToken(input, description, true);
2004             offset = input->getLastOffset();
2005             length = QIntC::to_size(input->tell() - offset);
2006             if (t.getType() == QPDFTokenizer::tt_bad)
2007             {
2008                 QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
2009                 warn(context,
2010                      QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2011                              "stream data", input->tell(),
2012                              "EOF found while reading inline image"));
2013             }
2014             else
2015             {
2016                 std::string inline_image = t.getValue();
2017                 QTC::TC("qpdf", "QPDFObjectHandle inline image token");
2018                 callbacks->handleObject(
2019                     QPDFObjectHandle::newInlineImage(inline_image),
2020                     QIntC::to_size(offset), length);
2021             }
2022         }
2023     }
2024 }
2025 
2026 void
addContentTokenFilter(PointerHolder<TokenFilter> filter)2027 QPDFObjectHandle::addContentTokenFilter(PointerHolder<TokenFilter> filter)
2028 {
2029     coalesceContentStreams();
2030     this->getKey("/Contents").addTokenFilter(filter);
2031 }
2032 
2033 void
addTokenFilter(PointerHolder<TokenFilter> filter)2034 QPDFObjectHandle::addTokenFilter(PointerHolder<TokenFilter> filter)
2035 {
2036     assertStream();
2037     return dynamic_cast<QPDF_Stream*>(
2038         obj.getPointer())->addTokenFilter(filter);
2039 }
2040 
2041 QPDFObjectHandle
parse(PointerHolder<InputSource> input,std::string const & object_description,QPDFTokenizer & tokenizer,bool & empty,StringDecrypter * decrypter,QPDF * context)2042 QPDFObjectHandle::parse(PointerHolder<InputSource> input,
2043                         std::string const& object_description,
2044                         QPDFTokenizer& tokenizer, bool& empty,
2045                         StringDecrypter* decrypter, QPDF* context)
2046 {
2047     return parseInternal(input, object_description, tokenizer, empty,
2048                          decrypter, context, false);
2049 }
2050 
2051 QPDFObjectHandle
parseInternal(PointerHolder<InputSource> input,std::string const & object_description,QPDFTokenizer & tokenizer,bool & empty,StringDecrypter * decrypter,QPDF * context,bool content_stream)2052 QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
2053                                 std::string const& object_description,
2054                                 QPDFTokenizer& tokenizer, bool& empty,
2055                                 StringDecrypter* decrypter, QPDF* context,
2056                                 bool content_stream)
2057 {
2058     // This method must take care not to resolve any objects. Don't
2059     // check the type of any object without first ensuring that it is
2060     // a direct object. Otherwise, doing so may have the side effect
2061     // of reading the object and changing the file pointer. If you do
2062     // this, it will cause a logic error to be thrown from
2063     // QPDF::inParse().
2064 
2065     QPDF::ParseGuard pg(context);
2066 
2067     empty = false;
2068 
2069     QPDFObjectHandle object;
2070     bool set_offset = false;
2071 
2072     std::vector<SparseOHArray> olist_stack;
2073     olist_stack.push_back(SparseOHArray());
2074     std::vector<parser_state_e> state_stack;
2075     state_stack.push_back(st_top);
2076     std::vector<qpdf_offset_t> offset_stack;
2077     qpdf_offset_t offset = input->tell();
2078     offset_stack.push_back(offset);
2079     bool done = false;
2080     int bad_count = 0;
2081     int good_count = 0;
2082     bool b_contents = false;
2083     std::vector<std::string> contents_string_stack;
2084     contents_string_stack.push_back("");
2085     std::vector<qpdf_offset_t> contents_offset_stack;
2086     contents_offset_stack.push_back(-1);
2087     while (! done)
2088     {
2089         bool bad = false;
2090         SparseOHArray& olist = olist_stack.back();
2091         parser_state_e state = state_stack.back();
2092         offset = offset_stack.back();
2093         std::string& contents_string = contents_string_stack.back();
2094         qpdf_offset_t& contents_offset = contents_offset_stack.back();
2095 
2096 	object = QPDFObjectHandle();
2097 	set_offset = false;
2098 
2099 	QPDFTokenizer::Token token =
2100             tokenizer.readToken(input, object_description, true);
2101         std::string const& token_error_message = token.getErrorMessage();
2102         if (! token_error_message.empty())
2103         {
2104             // Tokens other than tt_bad can still generate warnings.
2105             warn(context,
2106                  QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2107                          object_description,
2108                          input->getLastOffset(),
2109                          token_error_message));
2110         }
2111 
2112 	switch (token.getType())
2113 	{
2114           case QPDFTokenizer::tt_eof:
2115             if (! content_stream)
2116             {
2117                 QTC::TC("qpdf", "QPDFObjectHandle eof in parseInternal");
2118                 warn(context,
2119                      QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2120                              object_description,
2121                              input->getLastOffset(),
2122                              "unexpected EOF"));
2123             }
2124             bad = true;
2125             state = st_eof;
2126             break;
2127 
2128           case QPDFTokenizer::tt_bad:
2129 	    QTC::TC("qpdf", "QPDFObjectHandle bad token in parse");
2130             bad = true;
2131             object = newNull();
2132 	    break;
2133 
2134 	  case QPDFTokenizer::tt_brace_open:
2135 	  case QPDFTokenizer::tt_brace_close:
2136 	    QTC::TC("qpdf", "QPDFObjectHandle bad brace");
2137             warn(context,
2138                  QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2139                          object_description,
2140                          input->getLastOffset(),
2141                          "treating unexpected brace token as null"));
2142             bad = true;
2143             object = newNull();
2144 	    break;
2145 
2146 	  case QPDFTokenizer::tt_array_close:
2147 	    if (state == st_array)
2148 	    {
2149                 state = st_stop;
2150 	    }
2151 	    else
2152 	    {
2153 		QTC::TC("qpdf", "QPDFObjectHandle bad array close");
2154                 warn(context,
2155                      QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2156                              object_description,
2157                              input->getLastOffset(),
2158                              "treating unexpected array close token as null"));
2159                 bad = true;
2160                 object = newNull();
2161 	    }
2162 	    break;
2163 
2164 	  case QPDFTokenizer::tt_dict_close:
2165 	    if (state == st_dictionary)
2166 	    {
2167                 state = st_stop;
2168 	    }
2169 	    else
2170 	    {
2171 		QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
2172                 warn(context,
2173                      QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2174                              object_description,
2175                              input->getLastOffset(),
2176                              "unexpected dictionary close token"));
2177                 bad = true;
2178                 object = newNull();
2179 	    }
2180 	    break;
2181 
2182 	  case QPDFTokenizer::tt_array_open:
2183 	  case QPDFTokenizer::tt_dict_open:
2184             if (olist_stack.size() > 500)
2185             {
2186 		QTC::TC("qpdf", "QPDFObjectHandle too deep");
2187                 warn(context,
2188                      QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2189                              object_description,
2190                              input->getLastOffset(),
2191                              "ignoring excessively deeply nested data structure"));
2192                 bad = true;
2193                 object = newNull();
2194                 state = st_top;
2195             }
2196             else
2197             {
2198                 olist_stack.push_back(SparseOHArray());
2199                 state = st_start;
2200                 offset_stack.push_back(input->tell());
2201                 state_stack.push_back(
2202                     (token.getType() == QPDFTokenizer::tt_array_open) ?
2203                     st_array : st_dictionary);
2204                 b_contents = false;
2205                 contents_string_stack.push_back("");
2206                 contents_offset_stack.push_back(-1);
2207             }
2208 	    break;
2209 
2210 	  case QPDFTokenizer::tt_bool:
2211 	    object = newBool((token.getValue() == "true"));
2212 	    break;
2213 
2214 	  case QPDFTokenizer::tt_null:
2215 	    object = newNull();
2216 	    break;
2217 
2218 	  case QPDFTokenizer::tt_integer:
2219 	    object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
2220 	    break;
2221 
2222 	  case QPDFTokenizer::tt_real:
2223 	    object = newReal(token.getValue());
2224 	    break;
2225 
2226 	  case QPDFTokenizer::tt_name:
2227 	    {
2228 		std::string name = token.getValue();
2229 		object = newName(name);
2230 
2231 		if (name == "/Contents")
2232 		{
2233 		    b_contents = true;
2234 		}
2235 		else
2236 		{
2237 		    b_contents = false;
2238 		}
2239 	    }
2240 	    break;
2241 
2242 	  case QPDFTokenizer::tt_word:
2243 	    {
2244 		std::string const& value = token.getValue();
2245                 if (content_stream)
2246                 {
2247                     object = QPDFObjectHandle::newOperator(value);
2248                 }
2249 		else if ((value == "R") && (state != st_top) &&
2250                          (olist.size() >= 2) &&
2251                          (! olist.at(olist.size() - 1).isIndirect()) &&
2252                          (olist.at(olist.size() - 1).isInteger()) &&
2253                          (! olist.at(olist.size() - 2).isIndirect()) &&
2254                          (olist.at(olist.size() - 2).isInteger()))
2255 		{
2256                     if (context == 0)
2257                     {
2258                         QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
2259                         throw std::logic_error(
2260                             "QPDFObjectHandle::parse called without context"
2261                             " on an object with indirect references");
2262                     }
2263 		    // Try to resolve indirect objects
2264 		    object = newIndirect(
2265 			context,
2266 			olist.at(olist.size() - 2).getIntValueAsInt(),
2267 			olist.at(olist.size() - 1).getIntValueAsInt());
2268 		    olist.remove_last();
2269 		    olist.remove_last();
2270 		}
2271 		else if ((value == "endobj") && (state == st_top))
2272 		{
2273 		    // We just saw endobj without having read
2274 		    // anything.  Treat this as a null and do not move
2275 		    // the input source's offset.
2276 		    object = newNull();
2277 		    input->seek(input->getLastOffset(), SEEK_SET);
2278                     empty = true;
2279 		}
2280 		else
2281 		{
2282                     QTC::TC("qpdf", "QPDFObjectHandle treat word as string");
2283                     warn(context,
2284                          QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2285                                  object_description,
2286                                  input->getLastOffset(),
2287                                  "unknown token while reading object;"
2288                                  " treating as string"));
2289                     bad = true;
2290                     object = newString(value);
2291 		}
2292 	    }
2293 	    break;
2294 
2295 	  case QPDFTokenizer::tt_string:
2296 	    {
2297 		std::string val = token.getValue();
2298                 if (decrypter)
2299                 {
2300                     if (b_contents)
2301                     {
2302                         contents_string = val;
2303                         contents_offset = input->getLastOffset();
2304                         b_contents = false;
2305                     }
2306                     decrypter->decryptString(val);
2307                 }
2308 		object = QPDFObjectHandle::newString(val);
2309 	    }
2310 
2311 	    break;
2312 
2313 	  default:
2314             warn(context,
2315                  QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2316                          object_description,
2317                          input->getLastOffset(),
2318                          "treating unknown token type as null while "
2319                          "reading object"));
2320             bad = true;
2321             object = newNull();
2322 	    break;
2323 	}
2324 
2325         if ((! object.isInitialized()) &&
2326             (! ((state == st_start) ||
2327                 (state == st_stop) ||
2328                 (state == st_eof))))
2329         {
2330             throw std::logic_error(
2331                 "QPDFObjectHandle::parseInternal: "
2332                 "unexpected uninitialized object");
2333             object = newNull();
2334         }
2335 
2336         if (bad)
2337         {
2338             ++bad_count;
2339             good_count = 0;
2340         }
2341         else
2342         {
2343             ++good_count;
2344             if (good_count > 3)
2345             {
2346                 bad_count = 0;
2347             }
2348         }
2349         if (bad_count > 5)
2350         {
2351             // We had too many consecutive errors without enough
2352             // intervening successful objects. Give up.
2353             warn(context,
2354                  QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2355                          object_description,
2356                          input->getLastOffset(),
2357                          "too many errors; giving up on reading object"));
2358             state = st_top;
2359             object = newNull();
2360         }
2361 
2362         switch (state)
2363         {
2364           case st_eof:
2365             if (state_stack.size() > 1)
2366             {
2367                 warn(context,
2368                      QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2369                              object_description,
2370                              input->getLastOffset(),
2371                              "parse error while reading object"));
2372             }
2373             done = true;
2374             // In content stream mode, leave object uninitialized to
2375             // indicate EOF
2376             if (! content_stream)
2377             {
2378                 object = newNull();
2379             }
2380             break;
2381 
2382           case st_dictionary:
2383           case st_array:
2384             setObjectDescriptionFromInput(
2385                 object, context, object_description, input,
2386                 input->getLastOffset());
2387             object.setParsedOffset(input->getLastOffset());
2388             set_offset = true;
2389             olist.append(object);
2390             break;
2391 
2392           case st_top:
2393             done = true;
2394             break;
2395 
2396           case st_start:
2397             break;
2398 
2399           case st_stop:
2400             if ((state_stack.size() < 2) || (olist_stack.size() < 2))
2401             {
2402                 throw std::logic_error(
2403                     "QPDFObjectHandle::parseInternal: st_stop encountered"
2404                     " with insufficient elements in stack");
2405             }
2406             parser_state_e old_state = state_stack.back();
2407             state_stack.pop_back();
2408             if (old_state == st_array)
2409             {
2410                 // There's no newArray(SparseOHArray) since
2411                 // SparseOHArray is not part of the public API.
2412                 object = QPDFObjectHandle(new QPDF_Array(olist));
2413                 setObjectDescriptionFromInput(
2414                     object, context, object_description, input, offset);
2415                 // The `offset` points to the next of "[". Set the
2416                 // rewind offset to point to the beginning of "[".
2417                 // This has been explicitly tested with whitespace
2418                 // surrounding the array start delimiter.
2419                 // getLastOffset points to the array end token and
2420                 // therefore can't be used here.
2421                 object.setParsedOffset(offset - 1);
2422                 set_offset = true;
2423             }
2424             else if (old_state == st_dictionary)
2425             {
2426                 // Convert list to map. Alternating elements are keys.
2427                 // Attempt to recover more or less gracefully from
2428                 // invalid dictionaries.
2429                 std::set<std::string> names;
2430                 size_t n_elements = olist.size();
2431                 for (size_t i = 0; i < n_elements; ++i)
2432                 {
2433                     QPDFObjectHandle oh = olist.at(i);
2434                     if ((! oh.isIndirect()) && oh.isName())
2435                     {
2436                         names.insert(oh.getName());
2437                     }
2438                 }
2439 
2440                 std::map<std::string, QPDFObjectHandle> dict;
2441                 int next_fake_key = 1;
2442                 for (unsigned int i = 0; i < olist.size(); ++i)
2443                 {
2444                     QPDFObjectHandle key_obj = olist.at(i);
2445                     QPDFObjectHandle val;
2446                     if (key_obj.isIndirect() || (! key_obj.isName()))
2447                     {
2448                         bool found_fake = false;
2449                         std::string candidate;
2450                         while (! found_fake)
2451                         {
2452                             candidate =
2453                                 "/QPDFFake" +
2454                                 QUtil::int_to_string(next_fake_key++);
2455                             found_fake = (names.count(candidate) == 0);
2456                             QTC::TC("qpdf", "QPDFObjectHandle found fake",
2457                                     (found_fake ? 0 : 1));
2458                         }
2459                         warn(context,
2460                              QPDFExc(
2461                                  qpdf_e_damaged_pdf,
2462                                  input->getName(), object_description, offset,
2463                                  "expected dictionary key but found"
2464                                  " non-name object; inserting key " +
2465                                  candidate));
2466                         val = key_obj;
2467                         key_obj = newName(candidate);
2468                     }
2469                     else if (i + 1 >= olist.size())
2470                     {
2471                         QTC::TC("qpdf", "QPDFObjectHandle no val for last key");
2472                         warn(context,
2473                              QPDFExc(
2474                                  qpdf_e_damaged_pdf,
2475                                  input->getName(), object_description, offset,
2476                                  "dictionary ended prematurely; "
2477                                  "using null as value for last key"));
2478                         val = newNull();
2479                         setObjectDescriptionFromInput(
2480                             val, context, object_description, input, offset);
2481                     }
2482                     else
2483                     {
2484                         val = olist.at(++i);
2485                     }
2486                     std::string key = key_obj.getName();
2487                     if (dict.count(key) > 0)
2488                     {
2489                         QTC::TC("qpdf", "QPDFObjectHandle duplicate dict key");
2490                         warn(context,
2491                              QPDFExc(
2492                                  qpdf_e_damaged_pdf,
2493                                  input->getName(), object_description, offset,
2494                                  "dictionary has duplicated key " + key +
2495                                  "; last occurrence overrides earlier ones"));
2496                     }
2497                     dict[key] = val;
2498                 }
2499 		if (!contents_string.empty() &&
2500 		    dict.count("/Type") &&
2501 		    dict["/Type"].isName() &&
2502 		    dict["/Type"].getName() == "/Sig" &&
2503 		    dict.count("/ByteRange") &&
2504 		    dict.count("/Contents") &&
2505 		    dict["/Contents"].isString())
2506 		{
2507 		    dict["/Contents"]
2508 		      = QPDFObjectHandle::newString(contents_string);
2509 		    dict["/Contents"].setParsedOffset(contents_offset);
2510 		}
2511                 object = newDictionary(dict);
2512                 setObjectDescriptionFromInput(
2513                     object, context, object_description, input, offset);
2514                 // The `offset` points to the next of "<<". Set the
2515                 // rewind offset to point to the beginning of "<<".
2516                 // This has been explicitly tested with whitespace
2517                 // surrounding the dictionary start delimiter.
2518                 // getLastOffset points to the dictionary end token
2519                 // and therefore can't be used here.
2520                 object.setParsedOffset(offset - 2);
2521                 set_offset = true;
2522             }
2523             olist_stack.pop_back();
2524             offset_stack.pop_back();
2525             if (state_stack.back() == st_top)
2526             {
2527                 done = true;
2528             }
2529             else
2530             {
2531                 olist_stack.back().append(object);
2532             }
2533             contents_string_stack.pop_back();
2534             contents_offset_stack.pop_back();
2535         }
2536     }
2537 
2538     if (! set_offset)
2539     {
2540         setObjectDescriptionFromInput(
2541             object, context, object_description, input, offset);
2542         object.setParsedOffset(offset);
2543     }
2544     return object;
2545 }
2546 
2547 qpdf_offset_t
getParsedOffset()2548 QPDFObjectHandle::getParsedOffset()
2549 {
2550     dereference();
2551     return this->obj->getParsedOffset();
2552 }
2553 
2554 void
setParsedOffset(qpdf_offset_t offset)2555 QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset)
2556 {
2557     // This is called during parsing on newly created direct objects,
2558     // so we can't call dereference() here.
2559     if (this->obj.getPointer())
2560     {
2561         this->obj->setParsedOffset(offset);
2562     }
2563 }
2564 
2565 QPDFObjectHandle
newIndirect(QPDF * qpdf,int objid,int generation)2566 QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
2567 {
2568     if (objid == 0)
2569     {
2570         // Special case: QPDF uses objid 0 as a sentinel for direct
2571         // objects, and the PDF specification doesn't allow for object
2572         // 0. Treat indirect references to object 0 as null so that we
2573         // never create an indirect object with objid 0.
2574         QTC::TC("qpdf", "QPDFObjectHandle indirect with 0 objid");
2575         return newNull();
2576     }
2577 
2578     return QPDFObjectHandle(qpdf, objid, generation);
2579 }
2580 
2581 QPDFObjectHandle
newBool(bool value)2582 QPDFObjectHandle::newBool(bool value)
2583 {
2584     return QPDFObjectHandle(new QPDF_Bool(value));
2585 }
2586 
2587 QPDFObjectHandle
newNull()2588 QPDFObjectHandle::newNull()
2589 {
2590     return QPDFObjectHandle(new QPDF_Null());
2591 }
2592 
2593 QPDFObjectHandle
newInteger(long long value)2594 QPDFObjectHandle::newInteger(long long value)
2595 {
2596     return QPDFObjectHandle(new QPDF_Integer(value));
2597 }
2598 
2599 QPDFObjectHandle
newReal(std::string const & value)2600 QPDFObjectHandle::newReal(std::string const& value)
2601 {
2602     return QPDFObjectHandle(new QPDF_Real(value));
2603 }
2604 
2605 QPDFObjectHandle
newReal(double value,int decimal_places)2606 QPDFObjectHandle::newReal(double value, int decimal_places)
2607 {
2608     return QPDFObjectHandle(
2609         new QPDF_Real(value, decimal_places, true));
2610 }
2611 
2612 QPDFObjectHandle
newReal(double value,int decimal_places,bool trim_trailing_zeroes)2613 QPDFObjectHandle::newReal(double value, int decimal_places,
2614                           bool trim_trailing_zeroes)
2615 {
2616     return QPDFObjectHandle(
2617         new QPDF_Real(value, decimal_places, trim_trailing_zeroes));
2618 }
2619 
2620 QPDFObjectHandle
newName(std::string const & name)2621 QPDFObjectHandle::newName(std::string const& name)
2622 {
2623     return QPDFObjectHandle(new QPDF_Name(name));
2624 }
2625 
2626 QPDFObjectHandle
newString(std::string const & str)2627 QPDFObjectHandle::newString(std::string const& str)
2628 {
2629     return QPDFObjectHandle(new QPDF_String(str));
2630 }
2631 
2632 QPDFObjectHandle
newUnicodeString(std::string const & utf8_str)2633 QPDFObjectHandle::newUnicodeString(std::string const& utf8_str)
2634 {
2635     return QPDFObjectHandle(QPDF_String::new_utf16(utf8_str));
2636 }
2637 
2638 QPDFObjectHandle
newOperator(std::string const & value)2639 QPDFObjectHandle::newOperator(std::string const& value)
2640 {
2641     return QPDFObjectHandle(new QPDF_Operator(value));
2642 }
2643 
2644 QPDFObjectHandle
newInlineImage(std::string const & value)2645 QPDFObjectHandle::newInlineImage(std::string const& value)
2646 {
2647     return QPDFObjectHandle(new QPDF_InlineImage(value));
2648 }
2649 
2650 QPDFObjectHandle
newArray()2651 QPDFObjectHandle::newArray()
2652 {
2653     return newArray(std::vector<QPDFObjectHandle>());
2654 }
2655 
2656 QPDFObjectHandle
newArray(std::vector<QPDFObjectHandle> const & items)2657 QPDFObjectHandle::newArray(std::vector<QPDFObjectHandle> const& items)
2658 {
2659     return QPDFObjectHandle(new QPDF_Array(items));
2660 }
2661 
2662 QPDFObjectHandle
newArray(Rectangle const & rect)2663 QPDFObjectHandle::newArray(Rectangle const& rect)
2664 {
2665     std::vector<QPDFObjectHandle> items;
2666     items.push_back(newReal(rect.llx));
2667     items.push_back(newReal(rect.lly));
2668     items.push_back(newReal(rect.urx));
2669     items.push_back(newReal(rect.ury));
2670     return newArray(items);
2671 }
2672 
2673 QPDFObjectHandle
newArray(Matrix const & matrix)2674 QPDFObjectHandle::newArray(Matrix const& matrix)
2675 {
2676     std::vector<QPDFObjectHandle> items;
2677     items.push_back(newReal(matrix.a));
2678     items.push_back(newReal(matrix.b));
2679     items.push_back(newReal(matrix.c));
2680     items.push_back(newReal(matrix.d));
2681     items.push_back(newReal(matrix.e));
2682     items.push_back(newReal(matrix.f));
2683     return newArray(items);
2684 }
2685 
2686 QPDFObjectHandle
newArray(QPDFMatrix const & matrix)2687 QPDFObjectHandle::newArray(QPDFMatrix const& matrix)
2688 {
2689     std::vector<QPDFObjectHandle> items;
2690     items.push_back(newReal(matrix.a));
2691     items.push_back(newReal(matrix.b));
2692     items.push_back(newReal(matrix.c));
2693     items.push_back(newReal(matrix.d));
2694     items.push_back(newReal(matrix.e));
2695     items.push_back(newReal(matrix.f));
2696     return newArray(items);
2697 }
2698 
2699 QPDFObjectHandle
newFromRectangle(Rectangle const & rect)2700 QPDFObjectHandle::newFromRectangle(Rectangle const& rect)
2701 {
2702     return newArray(rect);
2703 }
2704 
2705 QPDFObjectHandle
newFromMatrix(Matrix const & m)2706 QPDFObjectHandle::newFromMatrix(Matrix const& m)
2707 {
2708     return newArray(m);
2709 }
2710 
2711 QPDFObjectHandle
newFromMatrix(QPDFMatrix const & m)2712 QPDFObjectHandle::newFromMatrix(QPDFMatrix const& m)
2713 {
2714     return newArray(m);
2715 }
2716 
2717 QPDFObjectHandle
newDictionary()2718 QPDFObjectHandle::newDictionary()
2719 {
2720     return newDictionary(std::map<std::string, QPDFObjectHandle>());
2721 }
2722 
2723 QPDFObjectHandle
newDictionary(std::map<std::string,QPDFObjectHandle> const & items)2724 QPDFObjectHandle::newDictionary(
2725     std::map<std::string, QPDFObjectHandle> const& items)
2726 {
2727     return QPDFObjectHandle(new QPDF_Dictionary(items));
2728 }
2729 
2730 
2731 QPDFObjectHandle
newStream(QPDF * qpdf,int objid,int generation,QPDFObjectHandle stream_dict,qpdf_offset_t offset,size_t length)2732 QPDFObjectHandle::newStream(QPDF* qpdf, int objid, int generation,
2733 			    QPDFObjectHandle stream_dict,
2734 			    qpdf_offset_t offset, size_t length)
2735 {
2736     QPDFObjectHandle result = QPDFObjectHandle(new QPDF_Stream(
2737 				qpdf, objid, generation,
2738 				stream_dict, offset, length));
2739     if (offset)
2740     {
2741         result.setParsedOffset(offset);
2742     }
2743     return result;
2744 }
2745 
2746 QPDFObjectHandle
newStream(QPDF * qpdf)2747 QPDFObjectHandle::newStream(QPDF* qpdf)
2748 {
2749     if (qpdf == 0)
2750     {
2751         throw std::runtime_error(
2752             "attempt to create stream in null qpdf object");
2753     }
2754     QTC::TC("qpdf", "QPDFObjectHandle newStream");
2755     QPDFObjectHandle stream_dict = newDictionary();
2756     QPDFObjectHandle result = qpdf->makeIndirectObject(
2757 	QPDFObjectHandle(
2758 	    new QPDF_Stream(qpdf, 0, 0, stream_dict, 0, 0)));
2759     result.dereference();
2760     QPDF_Stream* stream =
2761         dynamic_cast<QPDF_Stream*>(result.obj.getPointer());
2762     stream->setObjGen(result.getObjectID(), result.getGeneration());
2763     return result;
2764 }
2765 
2766 QPDFObjectHandle
newStream(QPDF * qpdf,PointerHolder<Buffer> data)2767 QPDFObjectHandle::newStream(QPDF* qpdf, PointerHolder<Buffer> data)
2768 {
2769     QTC::TC("qpdf", "QPDFObjectHandle newStream with data");
2770     QPDFObjectHandle result = newStream(qpdf);
2771     result.replaceStreamData(data, newNull(), newNull());
2772     return result;
2773 }
2774 
2775 QPDFObjectHandle
newStream(QPDF * qpdf,std::string const & data)2776 QPDFObjectHandle::newStream(QPDF* qpdf, std::string const& data)
2777 {
2778     QTC::TC("qpdf", "QPDFObjectHandle newStream with string");
2779     QPDFObjectHandle result = newStream(qpdf);
2780     result.replaceStreamData(data, newNull(), newNull());
2781     return result;
2782 }
2783 
2784 QPDFObjectHandle
newReserved(QPDF * qpdf)2785 QPDFObjectHandle::newReserved(QPDF* qpdf)
2786 {
2787     // Reserve a spot for this object by assigning it an object
2788     // number, but then return an unresolved handle to the object.
2789     QPDFObjectHandle reserved = qpdf->makeIndirectObject(
2790 	QPDFObjectHandle(new QPDF_Reserved()));
2791     QPDFObjectHandle result =
2792         newIndirect(qpdf, reserved.objid, reserved.generation);
2793     result.reserved = true;
2794     return result;
2795 }
2796 
2797 void
setObjectDescription(QPDF * owning_qpdf,std::string const & object_description)2798 QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf,
2799                                        std::string const& object_description)
2800 {
2801     // This is called during parsing on newly created direct objects,
2802     // so we can't call dereference() here.
2803     if (isInitialized() && this->obj.getPointer())
2804     {
2805         this->obj->setDescription(owning_qpdf, object_description);
2806     }
2807 }
2808 
2809 bool
hasObjectDescription()2810 QPDFObjectHandle::hasObjectDescription()
2811 {
2812     if (isInitialized())
2813     {
2814         dereference();
2815         if (this->obj.getPointer())
2816         {
2817             return this->obj->hasDescription();
2818         }
2819     }
2820     return false;
2821 }
2822 
2823 QPDFObjectHandle
shallowCopy()2824 QPDFObjectHandle::shallowCopy()
2825 {
2826     QPDFObjectHandle result;
2827     shallowCopyInternal(result, false);
2828     return result;
2829 }
2830 
2831 QPDFObjectHandle
unsafeShallowCopy()2832 QPDFObjectHandle::unsafeShallowCopy()
2833 {
2834     QPDFObjectHandle result;
2835     shallowCopyInternal(result, true);
2836     return result;
2837 }
2838 
2839 void
shallowCopyInternal(QPDFObjectHandle & new_obj,bool first_level_only)2840 QPDFObjectHandle::shallowCopyInternal(QPDFObjectHandle& new_obj,
2841                                       bool first_level_only)
2842 {
2843     assertInitialized();
2844 
2845     if (isStream())
2846     {
2847 	QTC::TC("qpdf", "QPDFObjectHandle ERR shallow copy stream");
2848 	throw std::runtime_error(
2849 	    "attempt to make a shallow copy of a stream");
2850     }
2851 
2852     if (isArray())
2853     {
2854 	QTC::TC("qpdf", "QPDFObjectHandle shallow copy array");
2855         // No newArray for shallow copying the sparse array
2856         QPDF_Array* arr = dynamic_cast<QPDF_Array*>(obj.getPointer());
2857         new_obj = QPDFObjectHandle(
2858             new QPDF_Array(arr->getElementsForShallowCopy()));
2859     }
2860     else if (isDictionary())
2861     {
2862 	QTC::TC("qpdf", "QPDFObjectHandle shallow copy dictionary");
2863         new_obj = newDictionary(getDictAsMap());
2864     }
2865     else
2866     {
2867 	QTC::TC("qpdf", "QPDFObjectHandle shallow copy scalar");
2868         new_obj = *this;
2869     }
2870 
2871     std::set<QPDFObjGen> visited;
2872     new_obj.copyObject(visited, false, first_level_only, false);
2873 }
2874 
2875 void
copyObject(std::set<QPDFObjGen> & visited,bool cross_indirect,bool first_level_only,bool stop_at_streams)2876 QPDFObjectHandle::copyObject(std::set<QPDFObjGen>& visited,
2877                              bool cross_indirect, bool first_level_only,
2878                              bool stop_at_streams)
2879 {
2880     assertInitialized();
2881 
2882     if (isStream())
2883     {
2884 	QTC::TC("qpdf", "QPDFObjectHandle copy stream",
2885                 stop_at_streams ? 0 : 1);
2886         if (stop_at_streams)
2887         {
2888             return;
2889         }
2890 	throw std::runtime_error(
2891 	    "attempt to make a stream into a direct object");
2892     }
2893 
2894     QPDFObjGen cur_og(this->objid, this->generation);
2895     if (cur_og.getObj() != 0)
2896     {
2897 	if (visited.count(cur_og))
2898 	{
2899 	    QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
2900 	    throw std::runtime_error(
2901 		"loop detected while converting object from "
2902 		"indirect to direct");
2903 	}
2904 	visited.insert(cur_og);
2905     }
2906 
2907     if (isReserved())
2908     {
2909         throw std::logic_error(
2910             "QPDFObjectHandle: attempting to make a"
2911             " reserved object handle direct");
2912     }
2913 
2914     dereference();
2915     this->qpdf = 0;
2916     this->objid = 0;
2917     this->generation = 0;
2918 
2919     PointerHolder<QPDFObject> new_obj;
2920 
2921     if (isBool())
2922     {
2923 	QTC::TC("qpdf", "QPDFObjectHandle clone bool");
2924 	new_obj = new QPDF_Bool(getBoolValue());
2925     }
2926     else if (isNull())
2927     {
2928 	QTC::TC("qpdf", "QPDFObjectHandle clone null");
2929 	new_obj = new QPDF_Null();
2930     }
2931     else if (isInteger())
2932     {
2933 	QTC::TC("qpdf", "QPDFObjectHandle clone integer");
2934 	new_obj = new QPDF_Integer(getIntValue());
2935     }
2936     else if (isReal())
2937     {
2938 	QTC::TC("qpdf", "QPDFObjectHandle clone real");
2939 	new_obj = new QPDF_Real(getRealValue());
2940     }
2941     else if (isName())
2942     {
2943 	QTC::TC("qpdf", "QPDFObjectHandle clone name");
2944 	new_obj = new QPDF_Name(getName());
2945     }
2946     else if (isString())
2947     {
2948 	QTC::TC("qpdf", "QPDFObjectHandle clone string");
2949 	new_obj = new QPDF_String(getStringValue());
2950     }
2951     else if (isArray())
2952     {
2953 	QTC::TC("qpdf", "QPDFObjectHandle clone array");
2954 	std::vector<QPDFObjectHandle> items;
2955 	int n = getArrayNItems();
2956 	for (int i = 0; i < n; ++i)
2957 	{
2958 	    items.push_back(getArrayItem(i));
2959             if ((! first_level_only) &&
2960                 (cross_indirect || (! items.back().isIndirect())))
2961             {
2962                 items.back().copyObject(
2963                     visited, cross_indirect,
2964                     first_level_only, stop_at_streams);
2965             }
2966 	}
2967 	new_obj = new QPDF_Array(items);
2968     }
2969     else if (isDictionary())
2970     {
2971 	QTC::TC("qpdf", "QPDFObjectHandle clone dictionary");
2972 	std::set<std::string> keys = getKeys();
2973 	std::map<std::string, QPDFObjectHandle> items;
2974 	for (std::set<std::string>::iterator iter = keys.begin();
2975 	     iter != keys.end(); ++iter)
2976 	{
2977 	    items[*iter] = getKey(*iter);
2978             if ((! first_level_only) &&
2979                 (cross_indirect || (! items[*iter].isIndirect())))
2980             {
2981                 items[*iter].copyObject(
2982                     visited, cross_indirect,
2983                     first_level_only, stop_at_streams);
2984             }
2985 	}
2986 	new_obj = new QPDF_Dictionary(items);
2987     }
2988     else
2989     {
2990 	throw std::logic_error("QPDFObjectHandle::makeDirectInternal: "
2991 			       "unknown object type");
2992     }
2993 
2994     this->obj = new_obj;
2995 
2996     if (cur_og.getObj())
2997     {
2998 	visited.erase(cur_og);
2999     }
3000 }
3001 
3002 QPDFObjectHandle
copyStream()3003 QPDFObjectHandle::copyStream()
3004 {
3005     assertStream();
3006     QPDFObjectHandle result = newStream(this->getOwningQPDF());
3007     QPDFObjectHandle dict = result.getDict();
3008     QPDFObjectHandle old_dict = getDict();
3009     for (auto& iter: QPDFDictItems(old_dict))
3010     {
3011         if (iter.second.isIndirect())
3012         {
3013             dict.replaceKey(iter.first, iter.second);
3014         }
3015         else
3016         {
3017             dict.replaceKey(iter.first, iter.second.shallowCopy());
3018         }
3019     }
3020     QPDF::StreamCopier::copyStreamData(getOwningQPDF(), result, *this);
3021     return result;
3022 }
3023 
3024 void
makeDirect()3025 QPDFObjectHandle::makeDirect()
3026 {
3027     makeDirect(false);
3028 }
3029 
3030 void
makeDirect(bool allow_streams)3031 QPDFObjectHandle::makeDirect(bool allow_streams)
3032 {
3033     std::set<QPDFObjGen> visited;
3034     copyObject(visited, true, false, allow_streams);
3035 }
3036 
3037 void
assertInitialized() const3038 QPDFObjectHandle::assertInitialized() const
3039 {
3040     if (! this->initialized)
3041     {
3042 	throw std::logic_error("operation attempted on uninitialized "
3043 			       "QPDFObjectHandle");
3044     }
3045 }
3046 
3047 void
typeWarning(char const * expected_type,std::string const & warning)3048 QPDFObjectHandle::typeWarning(char const* expected_type,
3049                               std::string const& warning)
3050 {
3051     QPDF* context = nullptr;
3052     std::string description;
3053     dereference();
3054     this->obj->getDescription(context, description);
3055     // Null context handled by warn
3056     warn(context,
3057          QPDFExc(qpdf_e_object,
3058                  "", description, 0,
3059                  std::string("operation for ") + expected_type +
3060                  " attempted on object of type " +
3061                  getTypeName() + ": " + warning));
3062 }
3063 
3064 void
warnIfPossible(std::string const & warning,bool throw_if_no_description)3065 QPDFObjectHandle::warnIfPossible(std::string const& warning,
3066                                  bool throw_if_no_description)
3067 {
3068     QPDF* context = 0;
3069     std::string description;
3070     dereference();
3071     if (this->obj->getDescription(context, description))
3072     {
3073         warn(context,
3074              QPDFExc(
3075                  qpdf_e_damaged_pdf,
3076                  "", description, 0,
3077                  warning));
3078     }
3079     else if (throw_if_no_description)
3080     {
3081         throw std::runtime_error(warning);
3082     }
3083 }
3084 
3085 void
objectWarning(std::string const & warning)3086 QPDFObjectHandle::objectWarning(std::string const& warning)
3087 {
3088     QPDF* context = nullptr;
3089     std::string description;
3090     dereference();
3091     this->obj->getDescription(context, description);
3092     // Null context handled by warn
3093     warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
3094 }
3095 
3096 void
assertType(char const * type_name,bool istype)3097 QPDFObjectHandle::assertType(char const* type_name, bool istype)
3098 {
3099     if (! istype)
3100     {
3101 	throw std::runtime_error(std::string("operation for ") + type_name +
3102                                  " attempted on object of type " +
3103                                  getTypeName());
3104     }
3105 }
3106 
3107 void
assertNull()3108 QPDFObjectHandle::assertNull()
3109 {
3110     assertType("null", isNull());
3111 }
3112 
3113 void
assertBool()3114 QPDFObjectHandle::assertBool()
3115 {
3116     assertType("boolean", isBool());
3117 }
3118 
3119 void
assertInteger()3120 QPDFObjectHandle::assertInteger()
3121 {
3122     assertType("integer", isInteger());
3123 }
3124 
3125 void
assertReal()3126 QPDFObjectHandle::assertReal()
3127 {
3128     assertType("real", isReal());
3129 }
3130 
3131 void
assertName()3132 QPDFObjectHandle::assertName()
3133 {
3134     assertType("name", isName());
3135 }
3136 
3137 void
assertString()3138 QPDFObjectHandle::assertString()
3139 {
3140     assertType("string", isString());
3141 }
3142 
3143 void
assertOperator()3144 QPDFObjectHandle::assertOperator()
3145 {
3146     assertType("operator", isOperator());
3147 }
3148 
3149 void
assertInlineImage()3150 QPDFObjectHandle::assertInlineImage()
3151 {
3152     assertType("inlineimage", isInlineImage());
3153 }
3154 
3155 void
assertArray()3156 QPDFObjectHandle::assertArray()
3157 {
3158     assertType("array", isArray());
3159 }
3160 
3161 void
assertDictionary()3162 QPDFObjectHandle::assertDictionary()
3163 {
3164     assertType("dictionary", isDictionary());
3165 }
3166 
3167 void
assertStream()3168 QPDFObjectHandle::assertStream()
3169 {
3170     assertType("stream", isStream());
3171 }
3172 
3173 void
assertReserved()3174 QPDFObjectHandle::assertReserved()
3175 {
3176     assertType("reserved", isReserved());
3177 }
3178 
3179 void
assertIndirect()3180 QPDFObjectHandle::assertIndirect()
3181 {
3182     if (! isIndirect())
3183     {
3184 	throw std::logic_error(
3185             "operation for indirect object attempted on direct object");
3186     }
3187 }
3188 
3189 void
assertScalar()3190 QPDFObjectHandle::assertScalar()
3191 {
3192     assertType("scalar", isScalar());
3193 }
3194 
3195 void
assertNumber()3196 QPDFObjectHandle::assertNumber()
3197 {
3198     assertType("number", isNumber());
3199 }
3200 
3201 bool
isPageObject()3202 QPDFObjectHandle::isPageObject()
3203 {
3204     // See comments in QPDFObjectHandle.hh.
3205     if (getOwningQPDF() == nullptr)
3206     {
3207         return false;
3208     }
3209     // getAllPages repairs /Type when traversing the page tree.
3210     getOwningQPDF()->getAllPages();
3211     if (! this->isDictionary())
3212     {
3213         return false;
3214     }
3215     if (this->hasKey("/Type"))
3216     {
3217         QPDFObjectHandle type = this->getKey("/Type");
3218         if (type.isName() && (type.getName() == "/Page"))
3219         {
3220             return true;
3221         }
3222         // Files have been seen in the wild that have /Type (Page)
3223         else if (type.isString() && (type.getStringValue() == "Page"))
3224         {
3225             return true;
3226         }
3227         else
3228         {
3229             return false;
3230         }
3231     }
3232     return false;
3233 }
3234 
3235 bool
isPagesObject()3236 QPDFObjectHandle::isPagesObject()
3237 {
3238     if (getOwningQPDF() == nullptr)
3239     {
3240         return false;
3241     }
3242     // getAllPages repairs /Type when traversing the page tree.
3243     getOwningQPDF()->getAllPages();
3244     return (this->isDictionary() &&
3245             this->hasKey("/Type") &&
3246             this->getKey("/Type").isName() &&
3247             this->getKey("/Type").getName() == "/Pages");
3248 }
3249 
3250 bool
isFormXObject()3251 QPDFObjectHandle::isFormXObject()
3252 {
3253     if (! this->isStream())
3254     {
3255         return false;
3256     }
3257     QPDFObjectHandle dict = this->getDict();
3258     return (dict.getKey("/Type").isName() &&
3259             ("/XObject" == dict.getKey("/Type").getName()) &&
3260             dict.getKey("/Subtype").isName() &&
3261             ("/Form" == dict.getKey("/Subtype").getName()));
3262 }
3263 
3264 bool
isImage(bool exclude_imagemask)3265 QPDFObjectHandle::isImage(bool exclude_imagemask)
3266 {
3267     if (! this->isStream())
3268     {
3269         return false;
3270     }
3271     QPDFObjectHandle dict = this->getDict();
3272     return (dict.hasKey("/Subtype") &&
3273             (dict.getKey("/Subtype").getName() == "/Image") &&
3274             ((! exclude_imagemask) ||
3275              (! (dict.getKey("/ImageMask").isBool() &&
3276                  dict.getKey("/ImageMask").getBoolValue()))));
3277 }
3278 
3279 void
checkOwnership(QPDFObjectHandle const & item) const3280 QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const
3281 {
3282     if ((this->qpdf != nullptr) &&
3283         (item.qpdf != nullptr) &&
3284         (this->qpdf != item.qpdf))
3285     {
3286         QTC::TC("qpdf", "QPDFObjectHandle check ownership");
3287         throw std::logic_error(
3288             "Attempting to add an object from a different QPDF."
3289             " Use QPDF::copyForeignObject to add objects from another file.");
3290     }
3291 }
3292 
3293 void
assertPageObject()3294 QPDFObjectHandle::assertPageObject()
3295 {
3296     if (! isPageObject())
3297     {
3298 	throw std::runtime_error("page operation called on non-Page object");
3299     }
3300 }
3301 
3302 void
dereference()3303 QPDFObjectHandle::dereference()
3304 {
3305     if (! this->initialized)
3306     {
3307         throw std::logic_error(
3308             "attempted to dereference an uninitialized QPDFObjectHandle");
3309     }
3310     if (this->obj.getPointer() && this->objid &&
3311         QPDF::Resolver::objectChanged(
3312             this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj))
3313     {
3314         this->obj = nullptr;
3315     }
3316     if (this->obj.getPointer() == 0)
3317     {
3318         PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve(
3319 	    this->qpdf, this->objid, this->generation);
3320 	if (obj.getPointer() == 0)
3321 	{
3322             // QPDF::resolve never returns an uninitialized object, but
3323             // check just in case.
3324 	    this->obj = new QPDF_Null();
3325 	}
3326         else if (dynamic_cast<QPDF_Reserved*>(obj.getPointer()))
3327         {
3328             // Do not resolve
3329         }
3330         else
3331         {
3332             this->reserved = false;
3333             this->obj = obj;
3334         }
3335     }
3336 }
3337 
3338 void
warn(QPDF * qpdf,QPDFExc const & e)3339 QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e)
3340 {
3341     // If parsing on behalf of a QPDF object and want to give a
3342     // warning, we can warn through the object. If parsing for some
3343     // other reason, such as an explicit creation of an object from a
3344     // string, then just throw the exception.
3345     if (qpdf)
3346     {
3347         qpdf->warn(e);
3348     }
3349     else
3350     {
3351         throw e;
3352     }
3353 }
3354 
QPDFDictItems(QPDFObjectHandle const & oh)3355 QPDFObjectHandle::QPDFDictItems::QPDFDictItems(QPDFObjectHandle const& oh) :
3356     oh(oh)
3357 {
3358 }
3359 
3360 QPDFObjectHandle::QPDFDictItems::iterator&
operator ++()3361 QPDFObjectHandle::QPDFDictItems::iterator::operator++()
3362 {
3363     ++this->m->iter;
3364     updateIValue();
3365     return *this;
3366 }
3367 
3368 QPDFObjectHandle::QPDFDictItems::iterator&
operator --()3369 QPDFObjectHandle::QPDFDictItems::iterator::operator--()
3370 {
3371     --this->m->iter;
3372     updateIValue();
3373     return *this;
3374 }
3375 
3376 QPDFObjectHandle::QPDFDictItems::iterator::reference
operator *()3377 QPDFObjectHandle::QPDFDictItems::iterator:: operator*()
3378 {
3379     updateIValue();
3380     return this->ivalue;
3381 }
3382 
3383 QPDFObjectHandle::QPDFDictItems::iterator::pointer
operator ->()3384 QPDFObjectHandle::QPDFDictItems::iterator::operator->()
3385 {
3386     updateIValue();
3387     return &this->ivalue;
3388 }
3389 
3390 bool
operator ==(iterator const & other) const3391 QPDFObjectHandle::QPDFDictItems::iterator::operator==(
3392     iterator const& other) const
3393 {
3394     if (this->m->is_end && other.m->is_end)
3395     {
3396         return true;
3397     }
3398     if (this->m->is_end || other.m->is_end)
3399     {
3400         return false;
3401     }
3402     return (this->ivalue.first == other.ivalue.first);
3403 }
3404 
iterator(QPDFObjectHandle & oh,bool for_begin)3405 QPDFObjectHandle::QPDFDictItems::iterator::iterator(
3406     QPDFObjectHandle& oh, bool for_begin) :
3407     m(new Members(oh, for_begin))
3408 {
3409     updateIValue();
3410 }
3411 
3412 void
updateIValue()3413 QPDFObjectHandle::QPDFDictItems::iterator::updateIValue()
3414 {
3415     this->m->is_end = (this->m->iter == this->m->keys.end());
3416     if (this->m->is_end)
3417     {
3418         this->ivalue.first = "";
3419         this->ivalue.second = QPDFObjectHandle();
3420     }
3421     else
3422     {
3423         this->ivalue.first = *(this->m->iter);
3424         this->ivalue.second = this->m->oh.getKey(this->ivalue.first);
3425     }
3426 }
3427 
Members(QPDFObjectHandle & oh,bool for_begin)3428 QPDFObjectHandle::QPDFDictItems::iterator::Members::Members(
3429     QPDFObjectHandle& oh, bool for_begin) :
3430     oh(oh)
3431 {
3432     this->keys = oh.getKeys();
3433     this->iter = for_begin ? this->keys.begin() : this->keys.end();
3434 }
3435 
3436 QPDFObjectHandle::QPDFDictItems::iterator
begin()3437 QPDFObjectHandle::QPDFDictItems::begin()
3438 {
3439     return iterator(oh, true);
3440 }
3441 
3442 QPDFObjectHandle::QPDFDictItems::iterator
end()3443 QPDFObjectHandle::QPDFDictItems::end()
3444 {
3445     return iterator(oh, false);
3446 }
3447 
QPDFArrayItems(QPDFObjectHandle const & oh)3448 QPDFObjectHandle::QPDFArrayItems::QPDFArrayItems(QPDFObjectHandle const& oh) :
3449     oh(oh)
3450 {
3451 }
3452 
3453 QPDFObjectHandle::QPDFArrayItems::iterator&
operator ++()3454 QPDFObjectHandle::QPDFArrayItems::iterator::operator++()
3455 {
3456     if (! this->m->is_end)
3457     {
3458         ++this->m->item_number;
3459         updateIValue();
3460     }
3461     return *this;
3462 }
3463 
3464 QPDFObjectHandle::QPDFArrayItems::iterator&
operator --()3465 QPDFObjectHandle::QPDFArrayItems::iterator::operator--()
3466 {
3467     if (this->m->item_number > 0)
3468     {
3469         --this->m->item_number;
3470         updateIValue();
3471     }
3472     return *this;
3473 }
3474 
3475 QPDFObjectHandle::QPDFArrayItems::iterator::reference
operator *()3476 QPDFObjectHandle::QPDFArrayItems::iterator:: operator*()
3477 {
3478     updateIValue();
3479     return this->ivalue;
3480 }
3481 
3482 QPDFObjectHandle::QPDFArrayItems::iterator::pointer
operator ->()3483 QPDFObjectHandle::QPDFArrayItems::iterator::operator->()
3484 {
3485     updateIValue();
3486     return &this->ivalue;
3487 }
3488 
3489 bool
operator ==(iterator const & other) const3490 QPDFObjectHandle::QPDFArrayItems::iterator::operator==(
3491     iterator const& other) const
3492 {
3493     return (this->m->item_number == other.m->item_number);
3494 }
3495 
iterator(QPDFObjectHandle & oh,bool for_begin)3496 QPDFObjectHandle::QPDFArrayItems::iterator::iterator(
3497     QPDFObjectHandle& oh, bool for_begin) :
3498     m(new Members(oh, for_begin))
3499 {
3500     updateIValue();
3501 }
3502 
3503 void
updateIValue()3504 QPDFObjectHandle::QPDFArrayItems::iterator::updateIValue()
3505 {
3506     this->m->is_end = (this->m->item_number >= this->m->oh.getArrayNItems());
3507     if (this->m->is_end)
3508     {
3509         this->ivalue = QPDFObjectHandle();
3510     }
3511     else
3512     {
3513         this->ivalue = this->m->oh.getArrayItem(this->m->item_number);
3514     }
3515 }
3516 
Members(QPDFObjectHandle & oh,bool for_begin)3517 QPDFObjectHandle::QPDFArrayItems::iterator::Members::Members(
3518     QPDFObjectHandle& oh, bool for_begin) :
3519     oh(oh)
3520 {
3521     this->item_number = for_begin ? 0 : oh.getArrayNItems();
3522 }
3523 
3524 QPDFObjectHandle::QPDFArrayItems::iterator
begin()3525 QPDFObjectHandle::QPDFArrayItems::begin()
3526 {
3527     return iterator(oh, true);
3528 }
3529 
3530 QPDFObjectHandle::QPDFArrayItems::iterator
end()3531 QPDFObjectHandle::QPDFArrayItems::end()
3532 {
3533     return iterator(oh, false);
3534 }
3535