1 #include <qpdf/QPDFObjectHandle.hh>
2
3 #include <qpdf/QPDF.hh>
4 #include <qpdf/QPDF_Bool.hh>
5 #include <qpdf/QPDF_Null.hh>
6 #include <qpdf/QPDF_Integer.hh>
7 #include <qpdf/QPDF_Real.hh>
8 #include <qpdf/QPDF_Name.hh>
9 #include <qpdf/QPDF_String.hh>
10 #include <qpdf/QPDF_Operator.hh>
11 #include <qpdf/QPDF_InlineImage.hh>
12 #include <qpdf/QPDF_Array.hh>
13 #include <qpdf/QPDF_Dictionary.hh>
14 #include <qpdf/QPDF_Stream.hh>
15 #include <qpdf/QPDF_Reserved.hh>
16 #include <qpdf/Pl_Buffer.hh>
17 #include <qpdf/Pl_QPDFTokenizer.hh>
18 #include <qpdf/BufferInputSource.hh>
19 #include <qpdf/QPDFExc.hh>
20 #include <qpdf/QPDFPageObjectHelper.hh>
21 #include <qpdf/SparseOHArray.hh>
22 #include <qpdf/QPDFMatrix.hh>
23
24 #include <qpdf/QTC.hh>
25 #include <qpdf/QUtil.hh>
26 #include <qpdf/QIntC.hh>
27
28 #include <stdexcept>
29 #include <stdlib.h>
30 #include <ctype.h>
31 #include <limits.h>
32 #include <cstring>
33 #include <algorithm>
34
35 class TerminateParsing
36 {
37 };
38
StreamDataProvider(bool supports_retry)39 QPDFObjectHandle::StreamDataProvider::StreamDataProvider(
40 bool supports_retry) :
41 supports_retry(supports_retry)
42 {
43 }
44
45 void
provideStreamData(int objid,int generation,Pipeline * pipeline)46 QPDFObjectHandle::StreamDataProvider::provideStreamData(
47 int objid, int generation, Pipeline* pipeline)
48 {
49 throw std::logic_error(
50 "you must override provideStreamData -- see QPDFObjectHandle.hh");
51 }
52
53 bool
provideStreamData(int objid,int generation,Pipeline * pipeline,bool suppress_warnings,bool will_retry)54 QPDFObjectHandle::StreamDataProvider::provideStreamData(
55 int objid, int generation, Pipeline* pipeline,
56 bool suppress_warnings, bool will_retry)
57 {
58 throw std::logic_error(
59 "you must override provideStreamData -- see QPDFObjectHandle.hh");
60 return false;
61 }
62
63 bool
supportsRetry()64 QPDFObjectHandle::StreamDataProvider::supportsRetry()
65 {
66 return this->supports_retry;
67 }
68
69 class CoalesceProvider: public QPDFObjectHandle::StreamDataProvider
70 {
71 public:
CoalesceProvider(QPDFObjectHandle containing_page,QPDFObjectHandle old_contents)72 CoalesceProvider(QPDFObjectHandle containing_page,
73 QPDFObjectHandle old_contents) :
74 containing_page(containing_page),
75 old_contents(old_contents)
76 {
77 }
~CoalesceProvider()78 virtual ~CoalesceProvider()
79 {
80 }
81 virtual void provideStreamData(int objid, int generation,
82 Pipeline* pipeline);
83
84 private:
85 QPDFObjectHandle containing_page;
86 QPDFObjectHandle old_contents;
87 };
88
89 void
provideStreamData(int,int,Pipeline * p)90 CoalesceProvider::provideStreamData(int, int, Pipeline* p)
91 {
92 QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data");
93 std::string description = "page object " +
94 QUtil::int_to_string(containing_page.getObjectID()) + " " +
95 QUtil::int_to_string(containing_page.getGeneration());
96 std::string all_description;
97 old_contents.pipeContentStreams(p, description, all_description);
98 }
99
100 void
handleEOF()101 QPDFObjectHandle::TokenFilter::handleEOF()
102 {
103 }
104
105 void
setPipeline(Pipeline * p)106 QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p)
107 {
108 this->pipeline = p;
109 }
110
111 void
write(char const * data,size_t len)112 QPDFObjectHandle::TokenFilter::write(char const* data, size_t len)
113 {
114 if (! this->pipeline)
115 {
116 return;
117 }
118 if (len)
119 {
120 this->pipeline->write(QUtil::unsigned_char_pointer(data), len);
121 }
122 }
123
124 void
write(std::string const & str)125 QPDFObjectHandle::TokenFilter::write(std::string const& str)
126 {
127 write(str.c_str(), str.length());
128 }
129
130 void
writeToken(QPDFTokenizer::Token const & token)131 QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
132 {
133 std::string value = token.getRawValue();
134 write(value.c_str(), value.length());
135 }
136
137 void
handleObject(QPDFObjectHandle)138 QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
139 {
140 throw std::logic_error("You must override one of the"
141 " handleObject methods in ParserCallbacks");
142 }
143
144 void
handleObject(QPDFObjectHandle oh,size_t,size_t)145 QPDFObjectHandle::ParserCallbacks::handleObject(
146 QPDFObjectHandle oh, size_t, size_t)
147 {
148 // This version of handleObject was added in qpdf 9. If the
149 // developer did not override it, fall back to the older
150 // interface.
151 handleObject(oh);
152 }
153
154 void
contentSize(size_t)155 QPDFObjectHandle::ParserCallbacks::contentSize(size_t)
156 {
157 // Ignore by default; overriding this is optional.
158 }
159
160 void
terminateParsing()161 QPDFObjectHandle::ParserCallbacks::terminateParsing()
162 {
163 throw TerminateParsing();
164 }
165
166 class LastChar: public Pipeline
167 {
168 public:
169 LastChar(Pipeline* next);
170 virtual ~LastChar() = default;
171 virtual void write(unsigned char* data, size_t len);
172 virtual void finish();
173 unsigned char getLastChar();
174
175 private:
176 unsigned char last_char;
177 };
178
LastChar(Pipeline * next)179 LastChar::LastChar(Pipeline* next) :
180 Pipeline("lastchar", next),
181 last_char(0)
182 {
183 }
184
185 void
write(unsigned char * data,size_t len)186 LastChar::write(unsigned char* data, size_t len)
187 {
188 if (len > 0)
189 {
190 this->last_char = data[len - 1];
191 }
192 getNext()->write(data, len);
193 }
194
195 void
finish()196 LastChar::finish()
197 {
198 getNext()->finish();
199 }
200
201 unsigned char
getLastChar()202 LastChar::getLastChar()
203 {
204 return this->last_char;
205 }
206
QPDFObjectHandle()207 QPDFObjectHandle::QPDFObjectHandle() :
208 initialized(false),
209 qpdf(0),
210 objid(0),
211 generation(0),
212 reserved(false)
213 {
214 }
215
QPDFObjectHandle(QPDF * qpdf,int objid,int generation)216 QPDFObjectHandle::QPDFObjectHandle(QPDF* qpdf, int objid, int generation) :
217 initialized(true),
218 qpdf(qpdf),
219 objid(objid),
220 generation(generation),
221 reserved(false)
222 {
223 }
224
QPDFObjectHandle(QPDFObject * data)225 QPDFObjectHandle::QPDFObjectHandle(QPDFObject* data) :
226 initialized(true),
227 qpdf(0),
228 objid(0),
229 generation(0),
230 obj(data),
231 reserved(false)
232 {
233 }
234
235 void
releaseResolved()236 QPDFObjectHandle::releaseResolved()
237 {
238 // Recursively break any resolved references to indirect objects.
239 // Do not cross over indirect object boundaries to avoid an
240 // infinite loop. This method may only be called during final
241 // destruction. See comments in QPDF::~QPDF().
242 if (isIndirect())
243 {
244 if (this->obj.getPointer())
245 {
246 this->obj = 0;
247 }
248 }
249 else
250 {
251 QPDFObject::ObjAccessor::releaseResolved(this->obj.getPointer());
252 }
253 }
254
255 void
setObjectDescriptionFromInput(QPDFObjectHandle object,QPDF * context,std::string const & description,PointerHolder<InputSource> input,qpdf_offset_t offset)256 QPDFObjectHandle::setObjectDescriptionFromInput(
257 QPDFObjectHandle object, QPDF* context,
258 std::string const& description, PointerHolder<InputSource> input,
259 qpdf_offset_t offset)
260 {
261 object.setObjectDescription(
262 context,
263 input->getName() + ", " + description +
264 " at offset " + QUtil::int_to_string(offset));
265 }
266
267 bool
isInitialized() const268 QPDFObjectHandle::isInitialized() const
269 {
270 return this->initialized;
271 }
272
273 QPDFObject::object_type_e
getTypeCode()274 QPDFObjectHandle::getTypeCode()
275 {
276 if (this->initialized)
277 {
278 dereference();
279 return this->obj->getTypeCode();
280 }
281 else
282 {
283 return QPDFObject::ot_uninitialized;
284 }
285 }
286
287 char const*
getTypeName()288 QPDFObjectHandle::getTypeName()
289 {
290 if (this->initialized)
291 {
292 dereference();
293 return this->obj->getTypeName();
294 }
295 else
296 {
297 return "uninitialized";
298 }
299 }
300
301 template <class T>
302 class QPDFObjectTypeAccessor
303 {
304 public:
check(QPDFObject * o)305 static bool check(QPDFObject* o)
306 {
307 return (o && dynamic_cast<T*>(o));
308 }
check(QPDFObject const * o)309 static bool check(QPDFObject const* o)
310 {
311 return (o && dynamic_cast<T const*>(o));
312 }
313 };
314
315 bool
isBool()316 QPDFObjectHandle::isBool()
317 {
318 if (! this->initialized)
319 {
320 return false;
321 }
322 dereference();
323 return QPDFObjectTypeAccessor<QPDF_Bool>::check(obj.getPointer());
324 }
325
326 bool
isDirectNull() const327 QPDFObjectHandle::isDirectNull() const
328 {
329 // Don't call dereference() -- this is a const method, and we know
330 // objid == 0, so there's nothing to resolve.
331 return (this->initialized && (this->objid == 0) &&
332 QPDFObjectTypeAccessor<QPDF_Null>::check(obj.getPointer()));
333 }
334
335 bool
isNull()336 QPDFObjectHandle::isNull()
337 {
338 if (! this->initialized)
339 {
340 return false;
341 }
342 dereference();
343 return QPDFObjectTypeAccessor<QPDF_Null>::check(obj.getPointer());
344 }
345
346 bool
isInteger()347 QPDFObjectHandle::isInteger()
348 {
349 if (! this->initialized)
350 {
351 return false;
352 }
353 dereference();
354 return QPDFObjectTypeAccessor<QPDF_Integer>::check(obj.getPointer());
355 }
356
357 bool
isReal()358 QPDFObjectHandle::isReal()
359 {
360 if (! this->initialized)
361 {
362 return false;
363 }
364 dereference();
365 return QPDFObjectTypeAccessor<QPDF_Real>::check(obj.getPointer());
366 }
367
368 bool
isNumber()369 QPDFObjectHandle::isNumber()
370 {
371 return (isInteger() || isReal());
372 }
373
374 double
getNumericValue()375 QPDFObjectHandle::getNumericValue()
376 {
377 double result = 0.0;
378 if (isInteger())
379 {
380 result = static_cast<double>(getIntValue());
381 }
382 else if (isReal())
383 {
384 result = atof(getRealValue().c_str());
385 }
386 else
387 {
388 typeWarning("number", "returning 0");
389 QTC::TC("qpdf", "QPDFObjectHandle numeric non-numeric");
390 }
391 return result;
392 }
393
394 bool
isName()395 QPDFObjectHandle::isName()
396 {
397 if (! this->initialized)
398 {
399 return false;
400 }
401 dereference();
402 return QPDFObjectTypeAccessor<QPDF_Name>::check(obj.getPointer());
403 }
404
405 bool
isString()406 QPDFObjectHandle::isString()
407 {
408 if (! this->initialized)
409 {
410 return false;
411 }
412 dereference();
413 return QPDFObjectTypeAccessor<QPDF_String>::check(obj.getPointer());
414 }
415
416 bool
isOperator()417 QPDFObjectHandle::isOperator()
418 {
419 if (! this->initialized)
420 {
421 return false;
422 }
423 dereference();
424 return QPDFObjectTypeAccessor<QPDF_Operator>::check(obj.getPointer());
425 }
426
427 bool
isInlineImage()428 QPDFObjectHandle::isInlineImage()
429 {
430 if (! this->initialized)
431 {
432 return false;
433 }
434 dereference();
435 return QPDFObjectTypeAccessor<QPDF_InlineImage>::check(obj.getPointer());
436 }
437
438 bool
isArray()439 QPDFObjectHandle::isArray()
440 {
441 if (! this->initialized)
442 {
443 return false;
444 }
445 dereference();
446 return QPDFObjectTypeAccessor<QPDF_Array>::check(obj.getPointer());
447 }
448
449 bool
isDictionary()450 QPDFObjectHandle::isDictionary()
451 {
452 if (! this->initialized)
453 {
454 return false;
455 }
456 dereference();
457 return QPDFObjectTypeAccessor<QPDF_Dictionary>::check(obj.getPointer());
458 }
459
460 bool
isStream()461 QPDFObjectHandle::isStream()
462 {
463 if (! this->initialized)
464 {
465 return false;
466 }
467 dereference();
468 return QPDFObjectTypeAccessor<QPDF_Stream>::check(obj.getPointer());
469 }
470
471 bool
isReserved()472 QPDFObjectHandle::isReserved()
473 {
474 if (! this->initialized)
475 {
476 return false;
477 }
478 // dereference will clear reserved if this has been replaced
479 dereference();
480 return this->reserved;
481 }
482
483 bool
isIndirect()484 QPDFObjectHandle::isIndirect()
485 {
486 if (! this->initialized)
487 {
488 return false;
489 }
490 return (this->objid != 0);
491 }
492
493 bool
isScalar()494 QPDFObjectHandle::isScalar()
495 {
496 return (! (isArray() || isDictionary() || isStream() ||
497 isOperator() || isInlineImage()));
498 }
499
500 // Bool accessors
501
502 bool
getBoolValue()503 QPDFObjectHandle::getBoolValue()
504 {
505 if (isBool())
506 {
507 return dynamic_cast<QPDF_Bool*>(obj.getPointer())->getVal();
508 }
509 else
510 {
511 typeWarning("boolean", "returning false");
512 QTC::TC("qpdf", "QPDFObjectHandle boolean returning false");
513 return false;
514 }
515 }
516
517 // Integer accessors
518
519 long long
getIntValue()520 QPDFObjectHandle::getIntValue()
521 {
522 if (isInteger())
523 {
524 return dynamic_cast<QPDF_Integer*>(obj.getPointer())->getVal();
525 }
526 else
527 {
528 typeWarning("integer", "returning 0");
529 QTC::TC("qpdf", "QPDFObjectHandle integer returning 0");
530 return 0;
531 }
532 }
533
534 int
getIntValueAsInt()535 QPDFObjectHandle::getIntValueAsInt()
536 {
537 int result = 0;
538 long long v = getIntValue();
539 if (v < INT_MIN)
540 {
541 QTC::TC("qpdf", "QPDFObjectHandle int returning INT_MIN");
542 warnIfPossible(
543 "requested value of integer is too small; returning INT_MIN",
544 false);
545 result = INT_MIN;
546 }
547 else if (v > INT_MAX)
548 {
549 QTC::TC("qpdf", "QPDFObjectHandle int returning INT_MAX");
550 warnIfPossible(
551 "requested value of integer is too big; returning INT_MAX",
552 false);
553 result = INT_MAX;
554 }
555 else
556 {
557 result = static_cast<int>(v);
558 }
559 return result;
560 }
561
562 unsigned long long
getUIntValue()563 QPDFObjectHandle::getUIntValue()
564 {
565 unsigned long long result = 0;
566 long long v = getIntValue();
567 if (v < 0)
568 {
569 QTC::TC("qpdf", "QPDFObjectHandle uint returning 0");
570 warnIfPossible(
571 "unsigned value request for negative number; returning 0",
572 false);
573 }
574 else
575 {
576 result = static_cast<unsigned long long>(v);
577 }
578 return result;
579 }
580
581 unsigned int
getUIntValueAsUInt()582 QPDFObjectHandle::getUIntValueAsUInt()
583 {
584 unsigned int result = 0;
585 long long v = getIntValue();
586 if (v < 0)
587 {
588 QTC::TC("qpdf", "QPDFObjectHandle uint uint returning 0");
589 warnIfPossible(
590 "unsigned integer value request for negative number; returning 0",
591 false);
592 result = 0;
593 }
594 else if (v > UINT_MAX)
595 {
596 QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX");
597 warnIfPossible(
598 "requested value of unsigned integer is too big;"
599 " returning UINT_MAX",
600 false);
601 result = UINT_MAX;
602 }
603 else
604 {
605 result = static_cast<unsigned int>(v);
606 }
607 return result;
608 }
609
610 // Real accessors
611
612 std::string
getRealValue()613 QPDFObjectHandle::getRealValue()
614 {
615 if (isReal())
616 {
617 return dynamic_cast<QPDF_Real*>(obj.getPointer())->getVal();
618 }
619 else
620 {
621 typeWarning("real", "returning 0.0");
622 QTC::TC("qpdf", "QPDFObjectHandle real returning 0.0");
623 return "0.0";
624 }
625 }
626
627 // Name accessors
628
629 std::string
getName()630 QPDFObjectHandle::getName()
631 {
632 if (isName())
633 {
634 return dynamic_cast<QPDF_Name*>(obj.getPointer())->getName();
635 }
636 else
637 {
638 typeWarning("name", "returning dummy name");
639 QTC::TC("qpdf", "QPDFObjectHandle name returning dummy name");
640 return "/QPDFFakeName";
641 }
642 }
643
644 // String accessors
645
646 std::string
getStringValue()647 QPDFObjectHandle::getStringValue()
648 {
649 if (isString())
650 {
651 return dynamic_cast<QPDF_String*>(obj.getPointer())->getVal();
652 }
653 else
654 {
655 typeWarning("string", "returning empty string");
656 QTC::TC("qpdf", "QPDFObjectHandle string returning empty string");
657 return "";
658 }
659 }
660
661 std::string
getUTF8Value()662 QPDFObjectHandle::getUTF8Value()
663 {
664 if (isString())
665 {
666 return dynamic_cast<QPDF_String*>(obj.getPointer())->getUTF8Val();
667 }
668 else
669 {
670 typeWarning("string", "returning empty string");
671 QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8");
672 return "";
673 }
674 }
675
676 // Operator and Inline Image accessors
677
678 std::string
getOperatorValue()679 QPDFObjectHandle::getOperatorValue()
680 {
681 if (isOperator())
682 {
683 return dynamic_cast<QPDF_Operator*>(obj.getPointer())->getVal();
684 }
685 else
686 {
687 typeWarning("operator", "returning fake value");
688 QTC::TC("qpdf", "QPDFObjectHandle operator returning fake value");
689 return "QPDFFAKE";
690 }
691 }
692
693 std::string
getInlineImageValue()694 QPDFObjectHandle::getInlineImageValue()
695 {
696 if (isInlineImage())
697 {
698 return dynamic_cast<QPDF_InlineImage*>(obj.getPointer())->getVal();
699 }
700 else
701 {
702 typeWarning("inlineimage", "returning empty data");
703 QTC::TC("qpdf", "QPDFObjectHandle inlineimage returning empty data");
704 return "";
705 }
706 }
707
708 // Array accessors
709
710 QPDFObjectHandle::QPDFArrayItems
aitems()711 QPDFObjectHandle::aitems()
712 {
713 return QPDFArrayItems(*this);
714 }
715
716 int
getArrayNItems()717 QPDFObjectHandle::getArrayNItems()
718 {
719 if (isArray())
720 {
721 return dynamic_cast<QPDF_Array*>(obj.getPointer())->getNItems();
722 }
723 else
724 {
725 typeWarning("array", "treating as empty");
726 QTC::TC("qpdf", "QPDFObjectHandle array treating as empty");
727 return 0;
728 }
729 }
730
731 QPDFObjectHandle
getArrayItem(int n)732 QPDFObjectHandle::getArrayItem(int n)
733 {
734 QPDFObjectHandle result;
735 if (isArray() && (n < getArrayNItems()) && (n >= 0))
736 {
737 result = dynamic_cast<QPDF_Array*>(obj.getPointer())->getItem(n);
738 }
739 else
740 {
741 result = newNull();
742 if (isArray())
743 {
744 objectWarning("returning null for out of bounds array access");
745 QTC::TC("qpdf", "QPDFObjectHandle array bounds");
746 }
747 else
748 {
749 typeWarning("array", "returning null");
750 QTC::TC("qpdf", "QPDFObjectHandle array null for non-array");
751 }
752 QPDF* context = 0;
753 std::string description;
754 if (this->obj->getDescription(context, description))
755 {
756 result.setObjectDescription(
757 context,
758 description +
759 " -> null returned from invalid array access");
760 }
761 }
762 return result;
763 }
764
765 bool
isRectangle()766 QPDFObjectHandle::isRectangle()
767 {
768 if (! isArray())
769 {
770 return false;
771 }
772 if (getArrayNItems() != 4)
773 {
774 return false;
775 }
776 for (int i = 0; i < 4; ++i)
777 {
778 if (! getArrayItem(i).isNumber())
779 {
780 return false;
781 }
782 }
783 return true;
784 }
785
786 bool
isMatrix()787 QPDFObjectHandle::isMatrix()
788 {
789 if (! isArray())
790 {
791 return false;
792 }
793 if (getArrayNItems() != 6)
794 {
795 return false;
796 }
797 for (int i = 0; i < 6; ++i)
798 {
799 if (! getArrayItem(i).isNumber())
800 {
801 return false;
802 }
803 }
804 return true;
805 }
806
807 QPDFObjectHandle::Rectangle
getArrayAsRectangle()808 QPDFObjectHandle::getArrayAsRectangle()
809 {
810 Rectangle result;
811 if (isRectangle())
812 {
813 // Rectangle coordinates are always supposed to be llx, lly,
814 // urx, ury, but files have been found in the wild where
815 // llx > urx or lly > ury.
816 double i0 = getArrayItem(0).getNumericValue();
817 double i1 = getArrayItem(1).getNumericValue();
818 double i2 = getArrayItem(2).getNumericValue();
819 double i3 = getArrayItem(3).getNumericValue();
820 result = Rectangle(std::min(i0, i2),
821 std::min(i1, i3),
822 std::max(i0, i2),
823 std::max(i1, i3));
824 }
825 return result;
826 }
827
828 QPDFObjectHandle::Matrix
getArrayAsMatrix()829 QPDFObjectHandle::getArrayAsMatrix()
830 {
831 Matrix result;
832 if (isMatrix())
833 {
834 result = Matrix(getArrayItem(0).getNumericValue(),
835 getArrayItem(1).getNumericValue(),
836 getArrayItem(2).getNumericValue(),
837 getArrayItem(3).getNumericValue(),
838 getArrayItem(4).getNumericValue(),
839 getArrayItem(5).getNumericValue());
840 }
841 return result;
842 }
843
844 std::vector<QPDFObjectHandle>
getArrayAsVector()845 QPDFObjectHandle::getArrayAsVector()
846 {
847 std::vector<QPDFObjectHandle> result;
848 if (isArray())
849 {
850 dynamic_cast<QPDF_Array*>(obj.getPointer())->getAsVector(result);
851 }
852 else
853 {
854 typeWarning("array", "treating as empty");
855 QTC::TC("qpdf", "QPDFObjectHandle array treating as empty vector");
856 }
857 return result;
858 }
859
860 // Array mutators
861
862 void
setArrayItem(int n,QPDFObjectHandle const & item)863 QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item)
864 {
865 if (isArray())
866 {
867 checkOwnership(item);
868 dynamic_cast<QPDF_Array*>(obj.getPointer())->setItem(n, item);
869 }
870 else
871 {
872 typeWarning("array", "ignoring attempt to set item");
873 QTC::TC("qpdf", "QPDFObjectHandle array ignoring set item");
874 }
875 }
876
877 void
setArrayFromVector(std::vector<QPDFObjectHandle> const & items)878 QPDFObjectHandle::setArrayFromVector(std::vector<QPDFObjectHandle> const& items)
879 {
880 if (isArray())
881 {
882 for (auto const& item: items)
883 {
884 checkOwnership(item);
885 }
886 dynamic_cast<QPDF_Array*>(obj.getPointer())->setFromVector(items);
887 }
888 else
889 {
890 typeWarning("array", "ignoring attempt to replace items");
891 QTC::TC("qpdf", "QPDFObjectHandle array ignoring replace items");
892 }
893 }
894
895 void
insertItem(int at,QPDFObjectHandle const & item)896 QPDFObjectHandle::insertItem(int at, QPDFObjectHandle const& item)
897 {
898 if (isArray())
899 {
900 dynamic_cast<QPDF_Array*>(obj.getPointer())->insertItem(at, item);
901 }
902 else
903 {
904 typeWarning("array", "ignoring attempt to insert item");
905 QTC::TC("qpdf", "QPDFObjectHandle array ignoring insert item");
906 }
907 }
908
909 void
appendItem(QPDFObjectHandle const & item)910 QPDFObjectHandle::appendItem(QPDFObjectHandle const& item)
911 {
912 if (isArray())
913 {
914 checkOwnership(item);
915 dynamic_cast<QPDF_Array*>(obj.getPointer())->appendItem(item);
916 }
917 else
918 {
919 typeWarning("array", "ignoring attempt to append item");
920 QTC::TC("qpdf", "QPDFObjectHandle array ignoring append item");
921 }
922 }
923
924 void
eraseItem(int at)925 QPDFObjectHandle::eraseItem(int at)
926 {
927 if (isArray() && (at < getArrayNItems()) && (at >= 0))
928 {
929 dynamic_cast<QPDF_Array*>(obj.getPointer())->eraseItem(at);
930 }
931 else
932 {
933 if (isArray())
934 {
935 objectWarning("ignoring attempt to erase out of bounds array item");
936 QTC::TC("qpdf", "QPDFObjectHandle erase array bounds");
937 }
938 else
939 {
940 typeWarning("array", "ignoring attempt to erase item");
941 QTC::TC("qpdf", "QPDFObjectHandle array ignoring erase item");
942 }
943 }
944 }
945
946 // Dictionary accessors
947
948 QPDFObjectHandle::QPDFDictItems
ditems()949 QPDFObjectHandle::ditems()
950 {
951 return QPDFDictItems(*this);
952 }
953
954 bool
hasKey(std::string const & key)955 QPDFObjectHandle::hasKey(std::string const& key)
956 {
957 if (isDictionary())
958 {
959 return dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->hasKey(key);
960 }
961 else
962 {
963 typeWarning("dictionary",
964 "returning false for a key containment request");
965 QTC::TC("qpdf", "QPDFObjectHandle dictionary false for hasKey");
966 return false;
967 }
968 }
969
970 QPDFObjectHandle
getKey(std::string const & key)971 QPDFObjectHandle::getKey(std::string const& key)
972 {
973 QPDFObjectHandle result;
974 if (isDictionary())
975 {
976 result = dynamic_cast<QPDF_Dictionary*>(
977 obj.getPointer())->getKey(key);
978 }
979 else
980 {
981 typeWarning(
982 "dictionary", "returning null for attempted key retrieval");
983 QTC::TC("qpdf", "QPDFObjectHandle dictionary null for getKey");
984 result = newNull();
985 QPDF* qpdf = 0;
986 std::string description;
987 if (this->obj->getDescription(qpdf, description))
988 {
989 result.setObjectDescription(
990 qpdf,
991 description +
992 " -> null returned from getting key " +
993 key + " from non-Dictionary");
994 }
995 }
996 return result;
997 }
998
999 std::set<std::string>
getKeys()1000 QPDFObjectHandle::getKeys()
1001 {
1002 std::set<std::string> result;
1003 if (isDictionary())
1004 {
1005 result = dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->getKeys();
1006 }
1007 else
1008 {
1009 typeWarning("dictionary", "treating as empty");
1010 QTC::TC("qpdf", "QPDFObjectHandle dictionary empty set for getKeys");
1011 }
1012 return result;
1013 }
1014
1015 std::map<std::string, QPDFObjectHandle>
getDictAsMap()1016 QPDFObjectHandle::getDictAsMap()
1017 {
1018 std::map<std::string, QPDFObjectHandle> result;
1019 if (isDictionary())
1020 {
1021 result = dynamic_cast<QPDF_Dictionary*>(
1022 obj.getPointer())->getAsMap();
1023 }
1024 else
1025 {
1026 typeWarning("dictionary", "treating as empty");
1027 QTC::TC("qpdf", "QPDFObjectHandle dictionary empty map for asMap");
1028 }
1029 return result;
1030 }
1031
1032 // Array and Name accessors
1033 bool
isOrHasName(std::string const & value)1034 QPDFObjectHandle::isOrHasName(std::string const& value)
1035 {
1036 if (isName() && (getName() == value))
1037 {
1038 return true;
1039 }
1040 else if (isArray())
1041 {
1042 int n = getArrayNItems();
1043 for (int i = 0; i < n; ++i)
1044 {
1045 QPDFObjectHandle item = getArrayItem(0);
1046 if (item.isName() && (item.getName() == value))
1047 {
1048 return true;
1049 }
1050 }
1051 }
1052 return false;
1053 }
1054
1055 void
makeResourcesIndirect(QPDF & owning_qpdf)1056 QPDFObjectHandle::makeResourcesIndirect(QPDF& owning_qpdf)
1057 {
1058 if (! isDictionary())
1059 {
1060 return;
1061 }
1062 for (auto const& i1: ditems())
1063 {
1064 QPDFObjectHandle sub = i1.second;
1065 if (! sub.isDictionary())
1066 {
1067 continue;
1068 }
1069 for (auto i2: sub.ditems())
1070 {
1071 std::string const& key = i2.first;
1072 QPDFObjectHandle val = i2.second;
1073 if (! val.isIndirect())
1074 {
1075 sub.replaceKey(key, owning_qpdf.makeIndirectObject(val));
1076 }
1077 }
1078 }
1079 }
1080
1081 void
mergeResources(QPDFObjectHandle other)1082 QPDFObjectHandle::mergeResources(QPDFObjectHandle other)
1083 {
1084 mergeResources(other, nullptr);
1085 }
1086
1087 void
mergeResources(QPDFObjectHandle other,std::map<std::string,std::map<std::string,std::string>> * conflicts)1088 QPDFObjectHandle::mergeResources(
1089 QPDFObjectHandle other,
1090 std::map<std::string, std::map<std::string, std::string>>* conflicts)
1091 {
1092 if (! (isDictionary() && other.isDictionary()))
1093 {
1094 QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
1095 return;
1096 }
1097
1098 auto make_og_to_name = [](
1099 QPDFObjectHandle& dict,
1100 std::map<QPDFObjGen, std::string>& og_to_name)
1101 {
1102 for (auto i: dict.ditems())
1103 {
1104 if (i.second.isIndirect())
1105 {
1106 og_to_name[i.second.getObjGen()] = i.first;
1107 }
1108 }
1109 };
1110
1111 // This algorithm is described in comments in QPDFObjectHandle.hh
1112 // above the declaration of mergeResources.
1113 for (auto o_top: other.ditems())
1114 {
1115 std::string const& rtype = o_top.first;
1116 QPDFObjectHandle other_val = o_top.second;
1117 if (hasKey(rtype))
1118 {
1119 QPDFObjectHandle this_val = getKey(rtype);
1120 if (this_val.isDictionary() && other_val.isDictionary())
1121 {
1122 if (this_val.isIndirect())
1123 {
1124 // Do this even if there are no keys. Various
1125 // places in the code call mergeResources with
1126 // resource dictionaries that contain empty
1127 // subdictionaries just to get this shallow copy
1128 // functionality.
1129 QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
1130 this_val = this_val.shallowCopy();
1131 replaceKey(rtype, this_val);
1132 }
1133 std::map<QPDFObjGen, std::string> og_to_name;
1134 std::set<std::string> rnames;
1135 int min_suffix = 1;
1136 bool initialized_maps = false;
1137 for (auto ov_iter: other_val.ditems())
1138 {
1139 std::string const& key = ov_iter.first;
1140 QPDFObjectHandle rval = ov_iter.second;
1141 if (! this_val.hasKey(key))
1142 {
1143 if (! rval.isIndirect())
1144 {
1145 QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
1146 rval = rval.shallowCopy();
1147 }
1148 this_val.replaceKey(key, rval);
1149 }
1150 else if (conflicts)
1151 {
1152 if (! initialized_maps)
1153 {
1154 make_og_to_name(this_val, og_to_name);
1155 rnames = this_val.getResourceNames();
1156 initialized_maps = true;
1157 }
1158 auto rval_og = rval.getObjGen();
1159 if (rval.isIndirect() &&
1160 og_to_name.count(rval_og))
1161 {
1162 QTC::TC("qpdf", "QPDFObjectHandle merge reuse");
1163 auto new_key = og_to_name[rval_og];
1164 if (new_key != key)
1165 {
1166 (*conflicts)[rtype][key] = new_key;
1167 }
1168 }
1169 else
1170 {
1171 QTC::TC("qpdf", "QPDFObjectHandle merge generate");
1172 std::string new_key = getUniqueResourceName(
1173 key + "_", min_suffix, &rnames);
1174 (*conflicts)[rtype][key] = new_key;
1175 this_val.replaceKey(new_key, rval);
1176 }
1177 }
1178 }
1179 }
1180 else if (this_val.isArray() && other_val.isArray())
1181 {
1182 std::set<std::string> scalars;
1183 for (auto this_item: this_val.aitems())
1184 {
1185 if (this_item.isScalar())
1186 {
1187 scalars.insert(this_item.unparse());
1188 }
1189 }
1190 for (auto other_item: other_val.aitems())
1191 {
1192 if (other_item.isScalar())
1193 {
1194 if (scalars.count(other_item.unparse()) == 0)
1195 {
1196 QTC::TC("qpdf", "QPDFObjectHandle merge array");
1197 this_val.appendItem(other_item);
1198 }
1199 else
1200 {
1201 QTC::TC("qpdf", "QPDFObjectHandle merge array dup");
1202 }
1203 }
1204 }
1205 }
1206 }
1207 else
1208 {
1209 QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
1210 replaceKey(rtype, other_val.shallowCopy());
1211 }
1212 }
1213 }
1214
1215 std::set<std::string>
getResourceNames()1216 QPDFObjectHandle::getResourceNames()
1217 {
1218 // Return second-level dictionary keys
1219 std::set<std::string> result;
1220 if (! isDictionary())
1221 {
1222 return result;
1223 }
1224 std::set<std::string> keys = getKeys();
1225 for (std::set<std::string>::iterator iter = keys.begin();
1226 iter != keys.end(); ++iter)
1227 {
1228 std::string const& key = *iter;
1229 QPDFObjectHandle val = getKey(key);
1230 if (val.isDictionary())
1231 {
1232 std::set<std::string> val_keys = val.getKeys();
1233 for (std::set<std::string>::iterator i2 = val_keys.begin();
1234 i2 != val_keys.end(); ++i2)
1235 {
1236 result.insert(*i2);
1237 }
1238 }
1239 }
1240 return result;
1241 }
1242
1243 std::string
getUniqueResourceName(std::string const & prefix,int & min_suffix)1244 QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
1245 int& min_suffix)
1246 {
1247 return getUniqueResourceName(prefix, min_suffix, nullptr);
1248 }
1249
1250 std::string
getUniqueResourceName(std::string const & prefix,int & min_suffix,std::set<std::string> * namesp)1251 QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
1252 int& min_suffix,
1253 std::set<std::string>* namesp)
1254
1255 {
1256 std::set<std::string> names = (namesp ? *namesp : getResourceNames());
1257 int max_suffix = min_suffix + QIntC::to_int(names.size());
1258 while (min_suffix <= max_suffix)
1259 {
1260 std::string candidate = prefix + QUtil::int_to_string(min_suffix);
1261 if (names.count(candidate) == 0)
1262 {
1263 return candidate;
1264 }
1265 // Increment after return; min_suffix should be the value
1266 // used, not the next value.
1267 ++min_suffix;
1268 }
1269 // This could only happen if there is a coding error.
1270 // The number of candidates we test is more than the
1271 // number of keys we're checking against.
1272 throw std::logic_error("unable to find unconflicting name in"
1273 " QPDFObjectHandle::getUniqueResourceName");
1274 }
1275
1276 // Indirect object accessors
1277 QPDF*
getOwningQPDF()1278 QPDFObjectHandle::getOwningQPDF()
1279 {
1280 // Will be null for direct objects
1281 return this->qpdf;
1282 }
1283
1284 // Dictionary mutators
1285
1286 void
replaceKey(std::string const & key,QPDFObjectHandle value)1287 QPDFObjectHandle::replaceKey(std::string const& key,
1288 QPDFObjectHandle value)
1289 {
1290 if (isDictionary())
1291 {
1292 checkOwnership(value);
1293 dynamic_cast<QPDF_Dictionary*>(
1294 obj.getPointer())->replaceKey(key, value);
1295 }
1296 else
1297 {
1298 typeWarning("dictionary", "ignoring key replacement request");
1299 QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring replaceKey");
1300 }
1301 }
1302
1303 void
removeKey(std::string const & key)1304 QPDFObjectHandle::removeKey(std::string const& key)
1305 {
1306 if (isDictionary())
1307 {
1308 dynamic_cast<QPDF_Dictionary*>(obj.getPointer())->removeKey(key);
1309 }
1310 else
1311 {
1312 typeWarning("dictionary", "ignoring key removal request");
1313 QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring removeKey");
1314 }
1315 }
1316
1317 void
replaceOrRemoveKey(std::string const & key,QPDFObjectHandle value)1318 QPDFObjectHandle::replaceOrRemoveKey(std::string const& key,
1319 QPDFObjectHandle value)
1320 {
1321 if (isDictionary())
1322 {
1323 checkOwnership(value);
1324 dynamic_cast<QPDF_Dictionary*>(
1325 obj.getPointer())->replaceOrRemoveKey(key, value);
1326 }
1327 else
1328 {
1329 typeWarning("dictionary", "ignoring key removal/replacement request");
1330 QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring removereplace");
1331 }
1332 }
1333
1334 // Stream accessors
1335 QPDFObjectHandle
getDict()1336 QPDFObjectHandle::getDict()
1337 {
1338 assertStream();
1339 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
1340 }
1341
1342 void
setFilterOnWrite(bool val)1343 QPDFObjectHandle::setFilterOnWrite(bool val)
1344 {
1345 assertStream();
1346 dynamic_cast<QPDF_Stream*>(obj.getPointer())->setFilterOnWrite(val);
1347 }
1348
1349 bool
getFilterOnWrite()1350 QPDFObjectHandle::getFilterOnWrite()
1351 {
1352 assertStream();
1353 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getFilterOnWrite();
1354 }
1355
1356 bool
isDataModified()1357 QPDFObjectHandle::isDataModified()
1358 {
1359 assertStream();
1360 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->isDataModified();
1361 }
1362
1363 void
replaceDict(QPDFObjectHandle new_dict)1364 QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
1365 {
1366 assertStream();
1367 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
1368 }
1369
1370 PointerHolder<Buffer>
getStreamData(qpdf_stream_decode_level_e level)1371 QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
1372 {
1373 assertStream();
1374 return dynamic_cast<QPDF_Stream*>(
1375 obj.getPointer())->getStreamData(level);
1376 }
1377
1378 PointerHolder<Buffer>
getRawStreamData()1379 QPDFObjectHandle::getRawStreamData()
1380 {
1381 assertStream();
1382 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getRawStreamData();
1383 }
1384
1385 bool
pipeStreamData(Pipeline * p,bool * filtering_attempted,int encode_flags,qpdf_stream_decode_level_e decode_level,bool suppress_warnings,bool will_retry)1386 QPDFObjectHandle::pipeStreamData(Pipeline* p, bool* filtering_attempted,
1387 int encode_flags,
1388 qpdf_stream_decode_level_e decode_level,
1389 bool suppress_warnings, bool will_retry)
1390 {
1391 assertStream();
1392 return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
1393 p, filtering_attempted, encode_flags, decode_level,
1394 suppress_warnings, will_retry);
1395 }
1396
1397 bool
pipeStreamData(Pipeline * p,int encode_flags,qpdf_stream_decode_level_e decode_level,bool suppress_warnings,bool will_retry)1398 QPDFObjectHandle::pipeStreamData(Pipeline* p,
1399 int encode_flags,
1400 qpdf_stream_decode_level_e decode_level,
1401 bool suppress_warnings, bool will_retry)
1402 {
1403 assertStream();
1404 bool filtering_attempted;
1405 dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
1406 p, &filtering_attempted, encode_flags, decode_level,
1407 suppress_warnings, will_retry);
1408 return filtering_attempted;
1409 }
1410
1411 bool
pipeStreamData(Pipeline * p,bool filter,bool normalize,bool compress)1412 QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
1413 bool normalize, bool compress)
1414 {
1415 int encode_flags = 0;
1416 qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
1417 if (filter)
1418 {
1419 decode_level = qpdf_dl_generalized;
1420 if (normalize)
1421 {
1422 encode_flags |= qpdf_ef_normalize;
1423 }
1424 if (compress)
1425 {
1426 encode_flags |= qpdf_ef_compress;
1427 }
1428 }
1429 return pipeStreamData(p, encode_flags, decode_level, false);
1430 }
1431
1432 void
replaceStreamData(PointerHolder<Buffer> data,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1433 QPDFObjectHandle::replaceStreamData(PointerHolder<Buffer> data,
1434 QPDFObjectHandle const& filter,
1435 QPDFObjectHandle const& decode_parms)
1436 {
1437 assertStream();
1438 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1439 data, filter, decode_parms);
1440 }
1441
1442 void
replaceStreamData(std::string const & data,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1443 QPDFObjectHandle::replaceStreamData(std::string const& data,
1444 QPDFObjectHandle const& filter,
1445 QPDFObjectHandle const& decode_parms)
1446 {
1447 assertStream();
1448 PointerHolder<Buffer> b = new Buffer(data.length());
1449 unsigned char* bp = b->getBuffer();
1450 memcpy(bp, data.c_str(), data.length());
1451 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1452 b, filter, decode_parms);
1453 }
1454
1455 void
replaceStreamData(PointerHolder<StreamDataProvider> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1456 QPDFObjectHandle::replaceStreamData(PointerHolder<StreamDataProvider> provider,
1457 QPDFObjectHandle const& filter,
1458 QPDFObjectHandle const& decode_parms)
1459 {
1460 assertStream();
1461 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1462 provider, filter, decode_parms);
1463 }
1464
1465 class FunctionProvider: public QPDFObjectHandle::StreamDataProvider
1466 {
1467 public:
FunctionProvider(std::function<void (Pipeline *)> provider)1468 FunctionProvider(std::function<void(Pipeline*)> provider) :
1469 StreamDataProvider(false),
1470 p1(provider),
1471 p2(nullptr)
1472 {
1473 }
FunctionProvider(std::function<bool (Pipeline *,bool,bool)> provider)1474 FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) :
1475 StreamDataProvider(true),
1476 p1(nullptr),
1477 p2(provider)
1478 {
1479 }
1480
provideStreamData(int,int,Pipeline * pipeline)1481 virtual void provideStreamData(int, int, Pipeline* pipeline) override
1482 {
1483 p1(pipeline);
1484 }
1485
provideStreamData(int,int,Pipeline * pipeline,bool suppress_warnings,bool will_retry)1486 virtual bool provideStreamData(int, int, Pipeline* pipeline,
1487 bool suppress_warnings,
1488 bool will_retry) override
1489 {
1490 return p2(pipeline, suppress_warnings, will_retry);
1491 }
1492
1493 private:
1494 std::function<void(Pipeline*)> p1;
1495 std::function<bool(Pipeline*, bool, bool)> p2;
1496 };
1497
1498 void
replaceStreamData(std::function<void (Pipeline *)> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1499 QPDFObjectHandle::replaceStreamData(std::function<void(Pipeline*)> provider,
1500 QPDFObjectHandle const& filter,
1501 QPDFObjectHandle const& decode_parms)
1502 {
1503 assertStream();
1504 PointerHolder<StreamDataProvider> sdp = new FunctionProvider(provider);
1505 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1506 sdp, filter, decode_parms);
1507 }
1508
1509 void
replaceStreamData(std::function<bool (Pipeline *,bool,bool)> provider,QPDFObjectHandle const & filter,QPDFObjectHandle const & decode_parms)1510 QPDFObjectHandle::replaceStreamData(
1511 std::function<bool(Pipeline*, bool, bool)> provider,
1512 QPDFObjectHandle const& filter,
1513 QPDFObjectHandle const& decode_parms)
1514 {
1515 assertStream();
1516 PointerHolder<StreamDataProvider> sdp = new FunctionProvider(provider);
1517 dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceStreamData(
1518 sdp, filter, decode_parms);
1519 }
1520
1521 QPDFObjGen
getObjGen() const1522 QPDFObjectHandle::getObjGen() const
1523 {
1524 return QPDFObjGen(this->objid, this->generation);
1525 }
1526
1527 int
getObjectID() const1528 QPDFObjectHandle::getObjectID() const
1529 {
1530 return this->objid;
1531 }
1532
1533 int
getGeneration() const1534 QPDFObjectHandle::getGeneration() const
1535 {
1536 return this->generation;
1537 }
1538
1539 std::map<std::string, QPDFObjectHandle>
getPageImages()1540 QPDFObjectHandle::getPageImages()
1541 {
1542 return QPDFPageObjectHelper(*this).getImages();
1543 }
1544
1545 std::vector<QPDFObjectHandle>
arrayOrStreamToStreamArray(std::string const & description,std::string & all_description)1546 QPDFObjectHandle::arrayOrStreamToStreamArray(
1547 std::string const& description, std::string& all_description)
1548 {
1549 all_description = description;
1550 std::vector<QPDFObjectHandle> result;
1551 if (isArray())
1552 {
1553 int n_items = getArrayNItems();
1554 for (int i = 0; i < n_items; ++i)
1555 {
1556 QPDFObjectHandle item = getArrayItem(i);
1557 if (item.isStream())
1558 {
1559 result.push_back(item);
1560 }
1561 else
1562 {
1563 QTC::TC("qpdf", "QPDFObjectHandle non-stream in stream array");
1564 warn(item.getOwningQPDF(),
1565 QPDFExc(qpdf_e_damaged_pdf, description,
1566 "item index " + QUtil::int_to_string(i) +
1567 " (from 0)", 0,
1568 "ignoring non-stream in an array of streams"));
1569 }
1570 }
1571 }
1572 else if (isStream())
1573 {
1574 result.push_back(*this);
1575 }
1576 else if (! isNull())
1577 {
1578 warn(getOwningQPDF(),
1579 QPDFExc(qpdf_e_damaged_pdf, "", description, 0,
1580 " object is supposed to be a stream or an"
1581 " array of streams but is neither"));
1582 }
1583
1584 bool first = true;
1585 for (std::vector<QPDFObjectHandle>::iterator iter = result.begin();
1586 iter != result.end(); ++iter)
1587 {
1588 QPDFObjectHandle item = *iter;
1589 std::string og =
1590 QUtil::int_to_string(item.getObjectID()) + " " +
1591 QUtil::int_to_string(item.getGeneration());
1592 if (first)
1593 {
1594 first = false;
1595 }
1596 else
1597 {
1598 all_description += ",";
1599 }
1600 all_description += " stream " + og;
1601 }
1602
1603 return result;
1604 }
1605
1606 std::vector<QPDFObjectHandle>
getPageContents()1607 QPDFObjectHandle::getPageContents()
1608 {
1609 std::string description = "page object " +
1610 QUtil::int_to_string(this->objid) + " " +
1611 QUtil::int_to_string(this->generation);
1612 std::string all_description;
1613 return this->getKey("/Contents").arrayOrStreamToStreamArray(
1614 description, all_description);
1615 }
1616
1617 void
addPageContents(QPDFObjectHandle new_contents,bool first)1618 QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
1619 {
1620 new_contents.assertStream();
1621
1622 std::vector<QPDFObjectHandle> orig_contents = getPageContents();
1623
1624 std::vector<QPDFObjectHandle> content_streams;
1625 if (first)
1626 {
1627 QTC::TC("qpdf", "QPDFObjectHandle prepend page contents");
1628 content_streams.push_back(new_contents);
1629 }
1630 for (std::vector<QPDFObjectHandle>::iterator iter = orig_contents.begin();
1631 iter != orig_contents.end(); ++iter)
1632 {
1633 QTC::TC("qpdf", "QPDFObjectHandle append page contents");
1634 content_streams.push_back(*iter);
1635 }
1636 if (! first)
1637 {
1638 content_streams.push_back(new_contents);
1639 }
1640
1641 QPDFObjectHandle contents = QPDFObjectHandle::newArray(content_streams);
1642 this->replaceKey("/Contents", contents);
1643 }
1644
1645 void
rotatePage(int angle,bool relative)1646 QPDFObjectHandle::rotatePage(int angle, bool relative)
1647 {
1648 if ((angle % 90) != 0)
1649 {
1650 throw std::runtime_error(
1651 "QPDF::rotatePage called with an"
1652 " angle that is not a multiple of 90");
1653 }
1654 int new_angle = angle;
1655 if (relative)
1656 {
1657 int old_angle = 0;
1658 bool found_rotate = false;
1659 QPDFObjectHandle cur_obj = *this;
1660 bool searched_parent = false;
1661 std::set<QPDFObjGen> visited;
1662 while (! found_rotate)
1663 {
1664 if (visited.count(cur_obj.getObjGen()))
1665 {
1666 // Don't get stuck in an infinite loop
1667 break;
1668 }
1669 if (! visited.empty())
1670 {
1671 searched_parent = true;
1672 }
1673 visited.insert(cur_obj.getObjGen());
1674 if (cur_obj.getKey("/Rotate").isInteger())
1675 {
1676 found_rotate = true;
1677 old_angle = cur_obj.getKey("/Rotate").getIntValueAsInt();
1678 }
1679 else if (cur_obj.getKey("/Parent").isDictionary())
1680 {
1681 cur_obj = cur_obj.getKey("/Parent");
1682 }
1683 else
1684 {
1685 break;
1686 }
1687 }
1688 QTC::TC("qpdf", "QPDFObjectHandle found old angle",
1689 searched_parent ? 0 : 1);
1690 if ((old_angle % 90) != 0)
1691 {
1692 old_angle = 0;
1693 }
1694 new_angle += old_angle;
1695 }
1696 new_angle = (new_angle + 360) % 360;
1697 // Make this explicit even with new_angle == 0 since /Rotate can
1698 // be inherited.
1699 replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle));
1700 }
1701
1702 void
coalesceContentStreams()1703 QPDFObjectHandle::coalesceContentStreams()
1704 {
1705 QPDFObjectHandle contents = this->getKey("/Contents");
1706 if (contents.isStream())
1707 {
1708 QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream");
1709 return;
1710 }
1711 else if (! contents.isArray())
1712 {
1713 // /Contents is optional for pages, and some very damaged
1714 // files may have pages that are invalid in other ways.
1715 return;
1716 }
1717 QPDF* qpdf = getOwningQPDF();
1718 if (qpdf == 0)
1719 {
1720 // Should not be possible for a page object to not have an
1721 // owning PDF unless it was manually constructed in some
1722 // incorrect way. However, it can happen in a PDF file whose
1723 // page structure is direct, which is against spec but still
1724 // possible to hand construct, as in fuzz issue 27393.
1725 throw std::runtime_error("coalesceContentStreams called on object"
1726 " with no associated PDF file");
1727 }
1728 QPDFObjectHandle new_contents = newStream(qpdf);
1729 this->replaceKey("/Contents", new_contents);
1730
1731 PointerHolder<StreamDataProvider> provider =
1732 new CoalesceProvider(*this, contents);
1733 new_contents.replaceStreamData(provider, newNull(), newNull());
1734 }
1735
1736 std::string
unparse()1737 QPDFObjectHandle::unparse()
1738 {
1739 std::string result;
1740 if (this->isIndirect())
1741 {
1742 result = QUtil::int_to_string(this->objid) + " " +
1743 QUtil::int_to_string(this->generation) + " R";
1744 }
1745 else
1746 {
1747 result = unparseResolved();
1748 }
1749 return result;
1750 }
1751
1752 std::string
unparseResolved()1753 QPDFObjectHandle::unparseResolved()
1754 {
1755 dereference();
1756 if (this->reserved)
1757 {
1758 throw std::logic_error(
1759 "QPDFObjectHandle: attempting to unparse a reserved object");
1760 }
1761 return this->obj->unparse();
1762 }
1763
1764 std::string
unparseBinary()1765 QPDFObjectHandle::unparseBinary()
1766 {
1767 if (this->isString())
1768 {
1769 return dynamic_cast<QPDF_String*>(
1770 this->obj.getPointer())->unparse(true);
1771 }
1772 else
1773 {
1774 return unparse();
1775 }
1776 }
1777
1778 JSON
getJSON(bool dereference_indirect)1779 QPDFObjectHandle::getJSON(bool dereference_indirect)
1780 {
1781 if ((! dereference_indirect) && this->isIndirect())
1782 {
1783 return JSON::makeString(unparse());
1784 }
1785 else
1786 {
1787 dereference();
1788 if (this->reserved)
1789 {
1790 throw std::logic_error(
1791 "QPDFObjectHandle: attempting to unparse a reserved object");
1792 }
1793 return this->obj->getJSON();
1794 }
1795 }
1796
1797 QPDFObjectHandle
wrapInArray()1798 QPDFObjectHandle::wrapInArray()
1799 {
1800 if (isArray())
1801 {
1802 return *this;
1803 }
1804 QPDFObjectHandle result = QPDFObjectHandle::newArray();
1805 result.appendItem(*this);
1806 return result;
1807 }
1808
1809 QPDFObjectHandle
parse(std::string const & object_str,std::string const & object_description)1810 QPDFObjectHandle::parse(std::string const& object_str,
1811 std::string const& object_description)
1812 {
1813 return parse(nullptr, object_str, object_description);
1814 }
1815
1816 QPDFObjectHandle
parse(QPDF * context,std::string const & object_str,std::string const & object_description)1817 QPDFObjectHandle::parse(QPDF* context,
1818 std::string const& object_str,
1819 std::string const& object_description)
1820 {
1821 PointerHolder<InputSource> input =
1822 new BufferInputSource("parsed object", object_str);
1823 QPDFTokenizer tokenizer;
1824 bool empty = false;
1825 QPDFObjectHandle result =
1826 parse(input, object_description, tokenizer, empty, 0, context);
1827 size_t offset = QIntC::to_size(input->tell());
1828 while (offset < object_str.length())
1829 {
1830 if (! isspace(object_str.at(offset)))
1831 {
1832 QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
1833 throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
1834 object_description,
1835 input->getLastOffset(),
1836 "trailing data found parsing object from string");
1837 }
1838 ++offset;
1839 }
1840 return result;
1841 }
1842
1843 void
pipePageContents(Pipeline * p)1844 QPDFObjectHandle::pipePageContents(Pipeline* p)
1845 {
1846 std::string description = "page object " +
1847 QUtil::int_to_string(this->objid) + " " +
1848 QUtil::int_to_string(this->generation);
1849 std::string all_description;
1850 this->getKey("/Contents").pipeContentStreams(
1851 p, description, all_description);
1852 }
1853
1854 void
pipeContentStreams(Pipeline * p,std::string const & description,std::string & all_description)1855 QPDFObjectHandle::pipeContentStreams(
1856 Pipeline* p, std::string const& description, std::string& all_description)
1857 {
1858 std::vector<QPDFObjectHandle> streams =
1859 arrayOrStreamToStreamArray(
1860 description, all_description);
1861 bool need_newline = false;
1862 Pl_Buffer buf("concatenated content stream buffer");
1863 for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
1864 iter != streams.end(); ++iter)
1865 {
1866 if (need_newline)
1867 {
1868 buf.write(QUtil::unsigned_char_pointer("\n"), 1);
1869 }
1870 LastChar lc(&buf);
1871 QPDFObjectHandle stream = *iter;
1872 std::string og =
1873 QUtil::int_to_string(stream.getObjectID()) + " " +
1874 QUtil::int_to_string(stream.getGeneration());
1875 std::string w_description = "content stream object " + og;
1876 if (! stream.pipeStreamData(&lc, 0, qpdf_dl_specialized))
1877 {
1878 QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
1879 throw QPDFExc(qpdf_e_damaged_pdf, "content stream",
1880 w_description, 0,
1881 "errors while decoding content stream");
1882 }
1883 lc.finish();
1884 need_newline = (lc.getLastChar() != static_cast<unsigned char>('\n'));
1885 QTC::TC("qpdf", "QPDFObjectHandle need_newline",
1886 need_newline ? 0 : 1);
1887 }
1888 std::unique_ptr<Buffer> b(buf.getBuffer());
1889 p->write(b->getBuffer(), b->getSize());
1890 p->finish();
1891 }
1892
1893 void
parsePageContents(ParserCallbacks * callbacks)1894 QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
1895 {
1896 std::string description = "page object " +
1897 QUtil::int_to_string(this->objid) + " " +
1898 QUtil::int_to_string(this->generation);
1899 this->getKey("/Contents").parseContentStream_internal(
1900 description, callbacks);
1901 }
1902
1903 void
parseAsContents(ParserCallbacks * callbacks)1904 QPDFObjectHandle::parseAsContents(ParserCallbacks* callbacks)
1905 {
1906 std::string description = "object " +
1907 QUtil::int_to_string(this->objid) + " " +
1908 QUtil::int_to_string(this->generation);
1909 this->parseContentStream_internal(description, callbacks);
1910 }
1911
1912 void
filterPageContents(TokenFilter * filter,Pipeline * next)1913 QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
1914 {
1915 std::string description = "token filter for page object " +
1916 QUtil::int_to_string(this->objid) + " " +
1917 QUtil::int_to_string(this->generation);
1918 Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
1919 this->pipePageContents(&token_pipeline);
1920 }
1921
1922 void
filterAsContents(TokenFilter * filter,Pipeline * next)1923 QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
1924 {
1925 std::string description = "token filter for object " +
1926 QUtil::int_to_string(this->objid) + " " +
1927 QUtil::int_to_string(this->generation);
1928 Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
1929 this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
1930 }
1931
1932 void
parseContentStream(QPDFObjectHandle stream_or_array,ParserCallbacks * callbacks)1933 QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
1934 ParserCallbacks* callbacks)
1935 {
1936 stream_or_array.parseContentStream_internal(
1937 "content stream objects", callbacks);
1938 }
1939
1940 void
parseContentStream_internal(std::string const & description,ParserCallbacks * callbacks)1941 QPDFObjectHandle::parseContentStream_internal(
1942 std::string const& description,
1943 ParserCallbacks* callbacks)
1944 {
1945 Pl_Buffer buf("concatenated stream data buffer");
1946 std::string all_description;
1947 pipeContentStreams(&buf, description, all_description);
1948 PointerHolder<Buffer> stream_data = buf.getBuffer();
1949 callbacks->contentSize(stream_data->getSize());
1950 try
1951 {
1952 parseContentStream_data(stream_data, all_description,
1953 callbacks, getOwningQPDF());
1954 }
1955 catch (TerminateParsing&)
1956 {
1957 return;
1958 }
1959 callbacks->handleEOF();
1960 }
1961
1962 void
parseContentStream_data(PointerHolder<Buffer> stream_data,std::string const & description,ParserCallbacks * callbacks,QPDF * context)1963 QPDFObjectHandle::parseContentStream_data(
1964 PointerHolder<Buffer> stream_data,
1965 std::string const& description,
1966 ParserCallbacks* callbacks,
1967 QPDF* context)
1968 {
1969 size_t stream_length = stream_data->getSize();
1970 PointerHolder<InputSource> input =
1971 new BufferInputSource(description, stream_data.getPointer());
1972 QPDFTokenizer tokenizer;
1973 tokenizer.allowEOF();
1974 bool empty = false;
1975 while (QIntC::to_size(input->tell()) < stream_length)
1976 {
1977 // Read a token and seek to the beginning. The offset we get
1978 // from this process is the beginning of the next
1979 // non-ignorable (space, comment) token. This way, the offset
1980 // and don't including ignorable content.
1981 tokenizer.readToken(input, "content", true);
1982 qpdf_offset_t offset = input->getLastOffset();
1983 input->seek(offset, SEEK_SET);
1984 QPDFObjectHandle obj =
1985 parseInternal(input, "content", tokenizer,
1986 empty, 0, context, true);
1987 if (! obj.isInitialized())
1988 {
1989 // EOF
1990 break;
1991 }
1992 size_t length = QIntC::to_size(input->tell() - offset);
1993
1994 callbacks->handleObject(obj, QIntC::to_size(offset), length);
1995 if (obj.isOperator() && (obj.getOperatorValue() == "ID"))
1996 {
1997 // Discard next character; it is the space after ID that
1998 // terminated the token. Read until end of inline image.
1999 char ch;
2000 input->read(&ch, 1);
2001 tokenizer.expectInlineImage(input);
2002 QPDFTokenizer::Token t =
2003 tokenizer.readToken(input, description, true);
2004 offset = input->getLastOffset();
2005 length = QIntC::to_size(input->tell() - offset);
2006 if (t.getType() == QPDFTokenizer::tt_bad)
2007 {
2008 QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
2009 warn(context,
2010 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2011 "stream data", input->tell(),
2012 "EOF found while reading inline image"));
2013 }
2014 else
2015 {
2016 std::string inline_image = t.getValue();
2017 QTC::TC("qpdf", "QPDFObjectHandle inline image token");
2018 callbacks->handleObject(
2019 QPDFObjectHandle::newInlineImage(inline_image),
2020 QIntC::to_size(offset), length);
2021 }
2022 }
2023 }
2024 }
2025
2026 void
addContentTokenFilter(PointerHolder<TokenFilter> filter)2027 QPDFObjectHandle::addContentTokenFilter(PointerHolder<TokenFilter> filter)
2028 {
2029 coalesceContentStreams();
2030 this->getKey("/Contents").addTokenFilter(filter);
2031 }
2032
2033 void
addTokenFilter(PointerHolder<TokenFilter> filter)2034 QPDFObjectHandle::addTokenFilter(PointerHolder<TokenFilter> filter)
2035 {
2036 assertStream();
2037 return dynamic_cast<QPDF_Stream*>(
2038 obj.getPointer())->addTokenFilter(filter);
2039 }
2040
2041 QPDFObjectHandle
parse(PointerHolder<InputSource> input,std::string const & object_description,QPDFTokenizer & tokenizer,bool & empty,StringDecrypter * decrypter,QPDF * context)2042 QPDFObjectHandle::parse(PointerHolder<InputSource> input,
2043 std::string const& object_description,
2044 QPDFTokenizer& tokenizer, bool& empty,
2045 StringDecrypter* decrypter, QPDF* context)
2046 {
2047 return parseInternal(input, object_description, tokenizer, empty,
2048 decrypter, context, false);
2049 }
2050
2051 QPDFObjectHandle
parseInternal(PointerHolder<InputSource> input,std::string const & object_description,QPDFTokenizer & tokenizer,bool & empty,StringDecrypter * decrypter,QPDF * context,bool content_stream)2052 QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
2053 std::string const& object_description,
2054 QPDFTokenizer& tokenizer, bool& empty,
2055 StringDecrypter* decrypter, QPDF* context,
2056 bool content_stream)
2057 {
2058 // This method must take care not to resolve any objects. Don't
2059 // check the type of any object without first ensuring that it is
2060 // a direct object. Otherwise, doing so may have the side effect
2061 // of reading the object and changing the file pointer. If you do
2062 // this, it will cause a logic error to be thrown from
2063 // QPDF::inParse().
2064
2065 QPDF::ParseGuard pg(context);
2066
2067 empty = false;
2068
2069 QPDFObjectHandle object;
2070 bool set_offset = false;
2071
2072 std::vector<SparseOHArray> olist_stack;
2073 olist_stack.push_back(SparseOHArray());
2074 std::vector<parser_state_e> state_stack;
2075 state_stack.push_back(st_top);
2076 std::vector<qpdf_offset_t> offset_stack;
2077 qpdf_offset_t offset = input->tell();
2078 offset_stack.push_back(offset);
2079 bool done = false;
2080 int bad_count = 0;
2081 int good_count = 0;
2082 bool b_contents = false;
2083 std::vector<std::string> contents_string_stack;
2084 contents_string_stack.push_back("");
2085 std::vector<qpdf_offset_t> contents_offset_stack;
2086 contents_offset_stack.push_back(-1);
2087 while (! done)
2088 {
2089 bool bad = false;
2090 SparseOHArray& olist = olist_stack.back();
2091 parser_state_e state = state_stack.back();
2092 offset = offset_stack.back();
2093 std::string& contents_string = contents_string_stack.back();
2094 qpdf_offset_t& contents_offset = contents_offset_stack.back();
2095
2096 object = QPDFObjectHandle();
2097 set_offset = false;
2098
2099 QPDFTokenizer::Token token =
2100 tokenizer.readToken(input, object_description, true);
2101 std::string const& token_error_message = token.getErrorMessage();
2102 if (! token_error_message.empty())
2103 {
2104 // Tokens other than tt_bad can still generate warnings.
2105 warn(context,
2106 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2107 object_description,
2108 input->getLastOffset(),
2109 token_error_message));
2110 }
2111
2112 switch (token.getType())
2113 {
2114 case QPDFTokenizer::tt_eof:
2115 if (! content_stream)
2116 {
2117 QTC::TC("qpdf", "QPDFObjectHandle eof in parseInternal");
2118 warn(context,
2119 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2120 object_description,
2121 input->getLastOffset(),
2122 "unexpected EOF"));
2123 }
2124 bad = true;
2125 state = st_eof;
2126 break;
2127
2128 case QPDFTokenizer::tt_bad:
2129 QTC::TC("qpdf", "QPDFObjectHandle bad token in parse");
2130 bad = true;
2131 object = newNull();
2132 break;
2133
2134 case QPDFTokenizer::tt_brace_open:
2135 case QPDFTokenizer::tt_brace_close:
2136 QTC::TC("qpdf", "QPDFObjectHandle bad brace");
2137 warn(context,
2138 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2139 object_description,
2140 input->getLastOffset(),
2141 "treating unexpected brace token as null"));
2142 bad = true;
2143 object = newNull();
2144 break;
2145
2146 case QPDFTokenizer::tt_array_close:
2147 if (state == st_array)
2148 {
2149 state = st_stop;
2150 }
2151 else
2152 {
2153 QTC::TC("qpdf", "QPDFObjectHandle bad array close");
2154 warn(context,
2155 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2156 object_description,
2157 input->getLastOffset(),
2158 "treating unexpected array close token as null"));
2159 bad = true;
2160 object = newNull();
2161 }
2162 break;
2163
2164 case QPDFTokenizer::tt_dict_close:
2165 if (state == st_dictionary)
2166 {
2167 state = st_stop;
2168 }
2169 else
2170 {
2171 QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
2172 warn(context,
2173 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2174 object_description,
2175 input->getLastOffset(),
2176 "unexpected dictionary close token"));
2177 bad = true;
2178 object = newNull();
2179 }
2180 break;
2181
2182 case QPDFTokenizer::tt_array_open:
2183 case QPDFTokenizer::tt_dict_open:
2184 if (olist_stack.size() > 500)
2185 {
2186 QTC::TC("qpdf", "QPDFObjectHandle too deep");
2187 warn(context,
2188 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2189 object_description,
2190 input->getLastOffset(),
2191 "ignoring excessively deeply nested data structure"));
2192 bad = true;
2193 object = newNull();
2194 state = st_top;
2195 }
2196 else
2197 {
2198 olist_stack.push_back(SparseOHArray());
2199 state = st_start;
2200 offset_stack.push_back(input->tell());
2201 state_stack.push_back(
2202 (token.getType() == QPDFTokenizer::tt_array_open) ?
2203 st_array : st_dictionary);
2204 b_contents = false;
2205 contents_string_stack.push_back("");
2206 contents_offset_stack.push_back(-1);
2207 }
2208 break;
2209
2210 case QPDFTokenizer::tt_bool:
2211 object = newBool((token.getValue() == "true"));
2212 break;
2213
2214 case QPDFTokenizer::tt_null:
2215 object = newNull();
2216 break;
2217
2218 case QPDFTokenizer::tt_integer:
2219 object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
2220 break;
2221
2222 case QPDFTokenizer::tt_real:
2223 object = newReal(token.getValue());
2224 break;
2225
2226 case QPDFTokenizer::tt_name:
2227 {
2228 std::string name = token.getValue();
2229 object = newName(name);
2230
2231 if (name == "/Contents")
2232 {
2233 b_contents = true;
2234 }
2235 else
2236 {
2237 b_contents = false;
2238 }
2239 }
2240 break;
2241
2242 case QPDFTokenizer::tt_word:
2243 {
2244 std::string const& value = token.getValue();
2245 if (content_stream)
2246 {
2247 object = QPDFObjectHandle::newOperator(value);
2248 }
2249 else if ((value == "R") && (state != st_top) &&
2250 (olist.size() >= 2) &&
2251 (! olist.at(olist.size() - 1).isIndirect()) &&
2252 (olist.at(olist.size() - 1).isInteger()) &&
2253 (! olist.at(olist.size() - 2).isIndirect()) &&
2254 (olist.at(olist.size() - 2).isInteger()))
2255 {
2256 if (context == 0)
2257 {
2258 QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
2259 throw std::logic_error(
2260 "QPDFObjectHandle::parse called without context"
2261 " on an object with indirect references");
2262 }
2263 // Try to resolve indirect objects
2264 object = newIndirect(
2265 context,
2266 olist.at(olist.size() - 2).getIntValueAsInt(),
2267 olist.at(olist.size() - 1).getIntValueAsInt());
2268 olist.remove_last();
2269 olist.remove_last();
2270 }
2271 else if ((value == "endobj") && (state == st_top))
2272 {
2273 // We just saw endobj without having read
2274 // anything. Treat this as a null and do not move
2275 // the input source's offset.
2276 object = newNull();
2277 input->seek(input->getLastOffset(), SEEK_SET);
2278 empty = true;
2279 }
2280 else
2281 {
2282 QTC::TC("qpdf", "QPDFObjectHandle treat word as string");
2283 warn(context,
2284 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2285 object_description,
2286 input->getLastOffset(),
2287 "unknown token while reading object;"
2288 " treating as string"));
2289 bad = true;
2290 object = newString(value);
2291 }
2292 }
2293 break;
2294
2295 case QPDFTokenizer::tt_string:
2296 {
2297 std::string val = token.getValue();
2298 if (decrypter)
2299 {
2300 if (b_contents)
2301 {
2302 contents_string = val;
2303 contents_offset = input->getLastOffset();
2304 b_contents = false;
2305 }
2306 decrypter->decryptString(val);
2307 }
2308 object = QPDFObjectHandle::newString(val);
2309 }
2310
2311 break;
2312
2313 default:
2314 warn(context,
2315 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2316 object_description,
2317 input->getLastOffset(),
2318 "treating unknown token type as null while "
2319 "reading object"));
2320 bad = true;
2321 object = newNull();
2322 break;
2323 }
2324
2325 if ((! object.isInitialized()) &&
2326 (! ((state == st_start) ||
2327 (state == st_stop) ||
2328 (state == st_eof))))
2329 {
2330 throw std::logic_error(
2331 "QPDFObjectHandle::parseInternal: "
2332 "unexpected uninitialized object");
2333 object = newNull();
2334 }
2335
2336 if (bad)
2337 {
2338 ++bad_count;
2339 good_count = 0;
2340 }
2341 else
2342 {
2343 ++good_count;
2344 if (good_count > 3)
2345 {
2346 bad_count = 0;
2347 }
2348 }
2349 if (bad_count > 5)
2350 {
2351 // We had too many consecutive errors without enough
2352 // intervening successful objects. Give up.
2353 warn(context,
2354 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2355 object_description,
2356 input->getLastOffset(),
2357 "too many errors; giving up on reading object"));
2358 state = st_top;
2359 object = newNull();
2360 }
2361
2362 switch (state)
2363 {
2364 case st_eof:
2365 if (state_stack.size() > 1)
2366 {
2367 warn(context,
2368 QPDFExc(qpdf_e_damaged_pdf, input->getName(),
2369 object_description,
2370 input->getLastOffset(),
2371 "parse error while reading object"));
2372 }
2373 done = true;
2374 // In content stream mode, leave object uninitialized to
2375 // indicate EOF
2376 if (! content_stream)
2377 {
2378 object = newNull();
2379 }
2380 break;
2381
2382 case st_dictionary:
2383 case st_array:
2384 setObjectDescriptionFromInput(
2385 object, context, object_description, input,
2386 input->getLastOffset());
2387 object.setParsedOffset(input->getLastOffset());
2388 set_offset = true;
2389 olist.append(object);
2390 break;
2391
2392 case st_top:
2393 done = true;
2394 break;
2395
2396 case st_start:
2397 break;
2398
2399 case st_stop:
2400 if ((state_stack.size() < 2) || (olist_stack.size() < 2))
2401 {
2402 throw std::logic_error(
2403 "QPDFObjectHandle::parseInternal: st_stop encountered"
2404 " with insufficient elements in stack");
2405 }
2406 parser_state_e old_state = state_stack.back();
2407 state_stack.pop_back();
2408 if (old_state == st_array)
2409 {
2410 // There's no newArray(SparseOHArray) since
2411 // SparseOHArray is not part of the public API.
2412 object = QPDFObjectHandle(new QPDF_Array(olist));
2413 setObjectDescriptionFromInput(
2414 object, context, object_description, input, offset);
2415 // The `offset` points to the next of "[". Set the
2416 // rewind offset to point to the beginning of "[".
2417 // This has been explicitly tested with whitespace
2418 // surrounding the array start delimiter.
2419 // getLastOffset points to the array end token and
2420 // therefore can't be used here.
2421 object.setParsedOffset(offset - 1);
2422 set_offset = true;
2423 }
2424 else if (old_state == st_dictionary)
2425 {
2426 // Convert list to map. Alternating elements are keys.
2427 // Attempt to recover more or less gracefully from
2428 // invalid dictionaries.
2429 std::set<std::string> names;
2430 size_t n_elements = olist.size();
2431 for (size_t i = 0; i < n_elements; ++i)
2432 {
2433 QPDFObjectHandle oh = olist.at(i);
2434 if ((! oh.isIndirect()) && oh.isName())
2435 {
2436 names.insert(oh.getName());
2437 }
2438 }
2439
2440 std::map<std::string, QPDFObjectHandle> dict;
2441 int next_fake_key = 1;
2442 for (unsigned int i = 0; i < olist.size(); ++i)
2443 {
2444 QPDFObjectHandle key_obj = olist.at(i);
2445 QPDFObjectHandle val;
2446 if (key_obj.isIndirect() || (! key_obj.isName()))
2447 {
2448 bool found_fake = false;
2449 std::string candidate;
2450 while (! found_fake)
2451 {
2452 candidate =
2453 "/QPDFFake" +
2454 QUtil::int_to_string(next_fake_key++);
2455 found_fake = (names.count(candidate) == 0);
2456 QTC::TC("qpdf", "QPDFObjectHandle found fake",
2457 (found_fake ? 0 : 1));
2458 }
2459 warn(context,
2460 QPDFExc(
2461 qpdf_e_damaged_pdf,
2462 input->getName(), object_description, offset,
2463 "expected dictionary key but found"
2464 " non-name object; inserting key " +
2465 candidate));
2466 val = key_obj;
2467 key_obj = newName(candidate);
2468 }
2469 else if (i + 1 >= olist.size())
2470 {
2471 QTC::TC("qpdf", "QPDFObjectHandle no val for last key");
2472 warn(context,
2473 QPDFExc(
2474 qpdf_e_damaged_pdf,
2475 input->getName(), object_description, offset,
2476 "dictionary ended prematurely; "
2477 "using null as value for last key"));
2478 val = newNull();
2479 setObjectDescriptionFromInput(
2480 val, context, object_description, input, offset);
2481 }
2482 else
2483 {
2484 val = olist.at(++i);
2485 }
2486 std::string key = key_obj.getName();
2487 if (dict.count(key) > 0)
2488 {
2489 QTC::TC("qpdf", "QPDFObjectHandle duplicate dict key");
2490 warn(context,
2491 QPDFExc(
2492 qpdf_e_damaged_pdf,
2493 input->getName(), object_description, offset,
2494 "dictionary has duplicated key " + key +
2495 "; last occurrence overrides earlier ones"));
2496 }
2497 dict[key] = val;
2498 }
2499 if (!contents_string.empty() &&
2500 dict.count("/Type") &&
2501 dict["/Type"].isName() &&
2502 dict["/Type"].getName() == "/Sig" &&
2503 dict.count("/ByteRange") &&
2504 dict.count("/Contents") &&
2505 dict["/Contents"].isString())
2506 {
2507 dict["/Contents"]
2508 = QPDFObjectHandle::newString(contents_string);
2509 dict["/Contents"].setParsedOffset(contents_offset);
2510 }
2511 object = newDictionary(dict);
2512 setObjectDescriptionFromInput(
2513 object, context, object_description, input, offset);
2514 // The `offset` points to the next of "<<". Set the
2515 // rewind offset to point to the beginning of "<<".
2516 // This has been explicitly tested with whitespace
2517 // surrounding the dictionary start delimiter.
2518 // getLastOffset points to the dictionary end token
2519 // and therefore can't be used here.
2520 object.setParsedOffset(offset - 2);
2521 set_offset = true;
2522 }
2523 olist_stack.pop_back();
2524 offset_stack.pop_back();
2525 if (state_stack.back() == st_top)
2526 {
2527 done = true;
2528 }
2529 else
2530 {
2531 olist_stack.back().append(object);
2532 }
2533 contents_string_stack.pop_back();
2534 contents_offset_stack.pop_back();
2535 }
2536 }
2537
2538 if (! set_offset)
2539 {
2540 setObjectDescriptionFromInput(
2541 object, context, object_description, input, offset);
2542 object.setParsedOffset(offset);
2543 }
2544 return object;
2545 }
2546
2547 qpdf_offset_t
getParsedOffset()2548 QPDFObjectHandle::getParsedOffset()
2549 {
2550 dereference();
2551 return this->obj->getParsedOffset();
2552 }
2553
2554 void
setParsedOffset(qpdf_offset_t offset)2555 QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset)
2556 {
2557 // This is called during parsing on newly created direct objects,
2558 // so we can't call dereference() here.
2559 if (this->obj.getPointer())
2560 {
2561 this->obj->setParsedOffset(offset);
2562 }
2563 }
2564
2565 QPDFObjectHandle
newIndirect(QPDF * qpdf,int objid,int generation)2566 QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
2567 {
2568 if (objid == 0)
2569 {
2570 // Special case: QPDF uses objid 0 as a sentinel for direct
2571 // objects, and the PDF specification doesn't allow for object
2572 // 0. Treat indirect references to object 0 as null so that we
2573 // never create an indirect object with objid 0.
2574 QTC::TC("qpdf", "QPDFObjectHandle indirect with 0 objid");
2575 return newNull();
2576 }
2577
2578 return QPDFObjectHandle(qpdf, objid, generation);
2579 }
2580
2581 QPDFObjectHandle
newBool(bool value)2582 QPDFObjectHandle::newBool(bool value)
2583 {
2584 return QPDFObjectHandle(new QPDF_Bool(value));
2585 }
2586
2587 QPDFObjectHandle
newNull()2588 QPDFObjectHandle::newNull()
2589 {
2590 return QPDFObjectHandle(new QPDF_Null());
2591 }
2592
2593 QPDFObjectHandle
newInteger(long long value)2594 QPDFObjectHandle::newInteger(long long value)
2595 {
2596 return QPDFObjectHandle(new QPDF_Integer(value));
2597 }
2598
2599 QPDFObjectHandle
newReal(std::string const & value)2600 QPDFObjectHandle::newReal(std::string const& value)
2601 {
2602 return QPDFObjectHandle(new QPDF_Real(value));
2603 }
2604
2605 QPDFObjectHandle
newReal(double value,int decimal_places)2606 QPDFObjectHandle::newReal(double value, int decimal_places)
2607 {
2608 return QPDFObjectHandle(
2609 new QPDF_Real(value, decimal_places, true));
2610 }
2611
2612 QPDFObjectHandle
newReal(double value,int decimal_places,bool trim_trailing_zeroes)2613 QPDFObjectHandle::newReal(double value, int decimal_places,
2614 bool trim_trailing_zeroes)
2615 {
2616 return QPDFObjectHandle(
2617 new QPDF_Real(value, decimal_places, trim_trailing_zeroes));
2618 }
2619
2620 QPDFObjectHandle
newName(std::string const & name)2621 QPDFObjectHandle::newName(std::string const& name)
2622 {
2623 return QPDFObjectHandle(new QPDF_Name(name));
2624 }
2625
2626 QPDFObjectHandle
newString(std::string const & str)2627 QPDFObjectHandle::newString(std::string const& str)
2628 {
2629 return QPDFObjectHandle(new QPDF_String(str));
2630 }
2631
2632 QPDFObjectHandle
newUnicodeString(std::string const & utf8_str)2633 QPDFObjectHandle::newUnicodeString(std::string const& utf8_str)
2634 {
2635 return QPDFObjectHandle(QPDF_String::new_utf16(utf8_str));
2636 }
2637
2638 QPDFObjectHandle
newOperator(std::string const & value)2639 QPDFObjectHandle::newOperator(std::string const& value)
2640 {
2641 return QPDFObjectHandle(new QPDF_Operator(value));
2642 }
2643
2644 QPDFObjectHandle
newInlineImage(std::string const & value)2645 QPDFObjectHandle::newInlineImage(std::string const& value)
2646 {
2647 return QPDFObjectHandle(new QPDF_InlineImage(value));
2648 }
2649
2650 QPDFObjectHandle
newArray()2651 QPDFObjectHandle::newArray()
2652 {
2653 return newArray(std::vector<QPDFObjectHandle>());
2654 }
2655
2656 QPDFObjectHandle
newArray(std::vector<QPDFObjectHandle> const & items)2657 QPDFObjectHandle::newArray(std::vector<QPDFObjectHandle> const& items)
2658 {
2659 return QPDFObjectHandle(new QPDF_Array(items));
2660 }
2661
2662 QPDFObjectHandle
newArray(Rectangle const & rect)2663 QPDFObjectHandle::newArray(Rectangle const& rect)
2664 {
2665 std::vector<QPDFObjectHandle> items;
2666 items.push_back(newReal(rect.llx));
2667 items.push_back(newReal(rect.lly));
2668 items.push_back(newReal(rect.urx));
2669 items.push_back(newReal(rect.ury));
2670 return newArray(items);
2671 }
2672
2673 QPDFObjectHandle
newArray(Matrix const & matrix)2674 QPDFObjectHandle::newArray(Matrix const& matrix)
2675 {
2676 std::vector<QPDFObjectHandle> items;
2677 items.push_back(newReal(matrix.a));
2678 items.push_back(newReal(matrix.b));
2679 items.push_back(newReal(matrix.c));
2680 items.push_back(newReal(matrix.d));
2681 items.push_back(newReal(matrix.e));
2682 items.push_back(newReal(matrix.f));
2683 return newArray(items);
2684 }
2685
2686 QPDFObjectHandle
newArray(QPDFMatrix const & matrix)2687 QPDFObjectHandle::newArray(QPDFMatrix const& matrix)
2688 {
2689 std::vector<QPDFObjectHandle> items;
2690 items.push_back(newReal(matrix.a));
2691 items.push_back(newReal(matrix.b));
2692 items.push_back(newReal(matrix.c));
2693 items.push_back(newReal(matrix.d));
2694 items.push_back(newReal(matrix.e));
2695 items.push_back(newReal(matrix.f));
2696 return newArray(items);
2697 }
2698
2699 QPDFObjectHandle
newFromRectangle(Rectangle const & rect)2700 QPDFObjectHandle::newFromRectangle(Rectangle const& rect)
2701 {
2702 return newArray(rect);
2703 }
2704
2705 QPDFObjectHandle
newFromMatrix(Matrix const & m)2706 QPDFObjectHandle::newFromMatrix(Matrix const& m)
2707 {
2708 return newArray(m);
2709 }
2710
2711 QPDFObjectHandle
newFromMatrix(QPDFMatrix const & m)2712 QPDFObjectHandle::newFromMatrix(QPDFMatrix const& m)
2713 {
2714 return newArray(m);
2715 }
2716
2717 QPDFObjectHandle
newDictionary()2718 QPDFObjectHandle::newDictionary()
2719 {
2720 return newDictionary(std::map<std::string, QPDFObjectHandle>());
2721 }
2722
2723 QPDFObjectHandle
newDictionary(std::map<std::string,QPDFObjectHandle> const & items)2724 QPDFObjectHandle::newDictionary(
2725 std::map<std::string, QPDFObjectHandle> const& items)
2726 {
2727 return QPDFObjectHandle(new QPDF_Dictionary(items));
2728 }
2729
2730
2731 QPDFObjectHandle
newStream(QPDF * qpdf,int objid,int generation,QPDFObjectHandle stream_dict,qpdf_offset_t offset,size_t length)2732 QPDFObjectHandle::newStream(QPDF* qpdf, int objid, int generation,
2733 QPDFObjectHandle stream_dict,
2734 qpdf_offset_t offset, size_t length)
2735 {
2736 QPDFObjectHandle result = QPDFObjectHandle(new QPDF_Stream(
2737 qpdf, objid, generation,
2738 stream_dict, offset, length));
2739 if (offset)
2740 {
2741 result.setParsedOffset(offset);
2742 }
2743 return result;
2744 }
2745
2746 QPDFObjectHandle
newStream(QPDF * qpdf)2747 QPDFObjectHandle::newStream(QPDF* qpdf)
2748 {
2749 if (qpdf == 0)
2750 {
2751 throw std::runtime_error(
2752 "attempt to create stream in null qpdf object");
2753 }
2754 QTC::TC("qpdf", "QPDFObjectHandle newStream");
2755 QPDFObjectHandle stream_dict = newDictionary();
2756 QPDFObjectHandle result = qpdf->makeIndirectObject(
2757 QPDFObjectHandle(
2758 new QPDF_Stream(qpdf, 0, 0, stream_dict, 0, 0)));
2759 result.dereference();
2760 QPDF_Stream* stream =
2761 dynamic_cast<QPDF_Stream*>(result.obj.getPointer());
2762 stream->setObjGen(result.getObjectID(), result.getGeneration());
2763 return result;
2764 }
2765
2766 QPDFObjectHandle
newStream(QPDF * qpdf,PointerHolder<Buffer> data)2767 QPDFObjectHandle::newStream(QPDF* qpdf, PointerHolder<Buffer> data)
2768 {
2769 QTC::TC("qpdf", "QPDFObjectHandle newStream with data");
2770 QPDFObjectHandle result = newStream(qpdf);
2771 result.replaceStreamData(data, newNull(), newNull());
2772 return result;
2773 }
2774
2775 QPDFObjectHandle
newStream(QPDF * qpdf,std::string const & data)2776 QPDFObjectHandle::newStream(QPDF* qpdf, std::string const& data)
2777 {
2778 QTC::TC("qpdf", "QPDFObjectHandle newStream with string");
2779 QPDFObjectHandle result = newStream(qpdf);
2780 result.replaceStreamData(data, newNull(), newNull());
2781 return result;
2782 }
2783
2784 QPDFObjectHandle
newReserved(QPDF * qpdf)2785 QPDFObjectHandle::newReserved(QPDF* qpdf)
2786 {
2787 // Reserve a spot for this object by assigning it an object
2788 // number, but then return an unresolved handle to the object.
2789 QPDFObjectHandle reserved = qpdf->makeIndirectObject(
2790 QPDFObjectHandle(new QPDF_Reserved()));
2791 QPDFObjectHandle result =
2792 newIndirect(qpdf, reserved.objid, reserved.generation);
2793 result.reserved = true;
2794 return result;
2795 }
2796
2797 void
setObjectDescription(QPDF * owning_qpdf,std::string const & object_description)2798 QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf,
2799 std::string const& object_description)
2800 {
2801 // This is called during parsing on newly created direct objects,
2802 // so we can't call dereference() here.
2803 if (isInitialized() && this->obj.getPointer())
2804 {
2805 this->obj->setDescription(owning_qpdf, object_description);
2806 }
2807 }
2808
2809 bool
hasObjectDescription()2810 QPDFObjectHandle::hasObjectDescription()
2811 {
2812 if (isInitialized())
2813 {
2814 dereference();
2815 if (this->obj.getPointer())
2816 {
2817 return this->obj->hasDescription();
2818 }
2819 }
2820 return false;
2821 }
2822
2823 QPDFObjectHandle
shallowCopy()2824 QPDFObjectHandle::shallowCopy()
2825 {
2826 QPDFObjectHandle result;
2827 shallowCopyInternal(result, false);
2828 return result;
2829 }
2830
2831 QPDFObjectHandle
unsafeShallowCopy()2832 QPDFObjectHandle::unsafeShallowCopy()
2833 {
2834 QPDFObjectHandle result;
2835 shallowCopyInternal(result, true);
2836 return result;
2837 }
2838
2839 void
shallowCopyInternal(QPDFObjectHandle & new_obj,bool first_level_only)2840 QPDFObjectHandle::shallowCopyInternal(QPDFObjectHandle& new_obj,
2841 bool first_level_only)
2842 {
2843 assertInitialized();
2844
2845 if (isStream())
2846 {
2847 QTC::TC("qpdf", "QPDFObjectHandle ERR shallow copy stream");
2848 throw std::runtime_error(
2849 "attempt to make a shallow copy of a stream");
2850 }
2851
2852 if (isArray())
2853 {
2854 QTC::TC("qpdf", "QPDFObjectHandle shallow copy array");
2855 // No newArray for shallow copying the sparse array
2856 QPDF_Array* arr = dynamic_cast<QPDF_Array*>(obj.getPointer());
2857 new_obj = QPDFObjectHandle(
2858 new QPDF_Array(arr->getElementsForShallowCopy()));
2859 }
2860 else if (isDictionary())
2861 {
2862 QTC::TC("qpdf", "QPDFObjectHandle shallow copy dictionary");
2863 new_obj = newDictionary(getDictAsMap());
2864 }
2865 else
2866 {
2867 QTC::TC("qpdf", "QPDFObjectHandle shallow copy scalar");
2868 new_obj = *this;
2869 }
2870
2871 std::set<QPDFObjGen> visited;
2872 new_obj.copyObject(visited, false, first_level_only, false);
2873 }
2874
2875 void
copyObject(std::set<QPDFObjGen> & visited,bool cross_indirect,bool first_level_only,bool stop_at_streams)2876 QPDFObjectHandle::copyObject(std::set<QPDFObjGen>& visited,
2877 bool cross_indirect, bool first_level_only,
2878 bool stop_at_streams)
2879 {
2880 assertInitialized();
2881
2882 if (isStream())
2883 {
2884 QTC::TC("qpdf", "QPDFObjectHandle copy stream",
2885 stop_at_streams ? 0 : 1);
2886 if (stop_at_streams)
2887 {
2888 return;
2889 }
2890 throw std::runtime_error(
2891 "attempt to make a stream into a direct object");
2892 }
2893
2894 QPDFObjGen cur_og(this->objid, this->generation);
2895 if (cur_og.getObj() != 0)
2896 {
2897 if (visited.count(cur_og))
2898 {
2899 QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
2900 throw std::runtime_error(
2901 "loop detected while converting object from "
2902 "indirect to direct");
2903 }
2904 visited.insert(cur_og);
2905 }
2906
2907 if (isReserved())
2908 {
2909 throw std::logic_error(
2910 "QPDFObjectHandle: attempting to make a"
2911 " reserved object handle direct");
2912 }
2913
2914 dereference();
2915 this->qpdf = 0;
2916 this->objid = 0;
2917 this->generation = 0;
2918
2919 PointerHolder<QPDFObject> new_obj;
2920
2921 if (isBool())
2922 {
2923 QTC::TC("qpdf", "QPDFObjectHandle clone bool");
2924 new_obj = new QPDF_Bool(getBoolValue());
2925 }
2926 else if (isNull())
2927 {
2928 QTC::TC("qpdf", "QPDFObjectHandle clone null");
2929 new_obj = new QPDF_Null();
2930 }
2931 else if (isInteger())
2932 {
2933 QTC::TC("qpdf", "QPDFObjectHandle clone integer");
2934 new_obj = new QPDF_Integer(getIntValue());
2935 }
2936 else if (isReal())
2937 {
2938 QTC::TC("qpdf", "QPDFObjectHandle clone real");
2939 new_obj = new QPDF_Real(getRealValue());
2940 }
2941 else if (isName())
2942 {
2943 QTC::TC("qpdf", "QPDFObjectHandle clone name");
2944 new_obj = new QPDF_Name(getName());
2945 }
2946 else if (isString())
2947 {
2948 QTC::TC("qpdf", "QPDFObjectHandle clone string");
2949 new_obj = new QPDF_String(getStringValue());
2950 }
2951 else if (isArray())
2952 {
2953 QTC::TC("qpdf", "QPDFObjectHandle clone array");
2954 std::vector<QPDFObjectHandle> items;
2955 int n = getArrayNItems();
2956 for (int i = 0; i < n; ++i)
2957 {
2958 items.push_back(getArrayItem(i));
2959 if ((! first_level_only) &&
2960 (cross_indirect || (! items.back().isIndirect())))
2961 {
2962 items.back().copyObject(
2963 visited, cross_indirect,
2964 first_level_only, stop_at_streams);
2965 }
2966 }
2967 new_obj = new QPDF_Array(items);
2968 }
2969 else if (isDictionary())
2970 {
2971 QTC::TC("qpdf", "QPDFObjectHandle clone dictionary");
2972 std::set<std::string> keys = getKeys();
2973 std::map<std::string, QPDFObjectHandle> items;
2974 for (std::set<std::string>::iterator iter = keys.begin();
2975 iter != keys.end(); ++iter)
2976 {
2977 items[*iter] = getKey(*iter);
2978 if ((! first_level_only) &&
2979 (cross_indirect || (! items[*iter].isIndirect())))
2980 {
2981 items[*iter].copyObject(
2982 visited, cross_indirect,
2983 first_level_only, stop_at_streams);
2984 }
2985 }
2986 new_obj = new QPDF_Dictionary(items);
2987 }
2988 else
2989 {
2990 throw std::logic_error("QPDFObjectHandle::makeDirectInternal: "
2991 "unknown object type");
2992 }
2993
2994 this->obj = new_obj;
2995
2996 if (cur_og.getObj())
2997 {
2998 visited.erase(cur_og);
2999 }
3000 }
3001
3002 QPDFObjectHandle
copyStream()3003 QPDFObjectHandle::copyStream()
3004 {
3005 assertStream();
3006 QPDFObjectHandle result = newStream(this->getOwningQPDF());
3007 QPDFObjectHandle dict = result.getDict();
3008 QPDFObjectHandle old_dict = getDict();
3009 for (auto& iter: QPDFDictItems(old_dict))
3010 {
3011 if (iter.second.isIndirect())
3012 {
3013 dict.replaceKey(iter.first, iter.second);
3014 }
3015 else
3016 {
3017 dict.replaceKey(iter.first, iter.second.shallowCopy());
3018 }
3019 }
3020 QPDF::StreamCopier::copyStreamData(getOwningQPDF(), result, *this);
3021 return result;
3022 }
3023
3024 void
makeDirect()3025 QPDFObjectHandle::makeDirect()
3026 {
3027 makeDirect(false);
3028 }
3029
3030 void
makeDirect(bool allow_streams)3031 QPDFObjectHandle::makeDirect(bool allow_streams)
3032 {
3033 std::set<QPDFObjGen> visited;
3034 copyObject(visited, true, false, allow_streams);
3035 }
3036
3037 void
assertInitialized() const3038 QPDFObjectHandle::assertInitialized() const
3039 {
3040 if (! this->initialized)
3041 {
3042 throw std::logic_error("operation attempted on uninitialized "
3043 "QPDFObjectHandle");
3044 }
3045 }
3046
3047 void
typeWarning(char const * expected_type,std::string const & warning)3048 QPDFObjectHandle::typeWarning(char const* expected_type,
3049 std::string const& warning)
3050 {
3051 QPDF* context = nullptr;
3052 std::string description;
3053 dereference();
3054 this->obj->getDescription(context, description);
3055 // Null context handled by warn
3056 warn(context,
3057 QPDFExc(qpdf_e_object,
3058 "", description, 0,
3059 std::string("operation for ") + expected_type +
3060 " attempted on object of type " +
3061 getTypeName() + ": " + warning));
3062 }
3063
3064 void
warnIfPossible(std::string const & warning,bool throw_if_no_description)3065 QPDFObjectHandle::warnIfPossible(std::string const& warning,
3066 bool throw_if_no_description)
3067 {
3068 QPDF* context = 0;
3069 std::string description;
3070 dereference();
3071 if (this->obj->getDescription(context, description))
3072 {
3073 warn(context,
3074 QPDFExc(
3075 qpdf_e_damaged_pdf,
3076 "", description, 0,
3077 warning));
3078 }
3079 else if (throw_if_no_description)
3080 {
3081 throw std::runtime_error(warning);
3082 }
3083 }
3084
3085 void
objectWarning(std::string const & warning)3086 QPDFObjectHandle::objectWarning(std::string const& warning)
3087 {
3088 QPDF* context = nullptr;
3089 std::string description;
3090 dereference();
3091 this->obj->getDescription(context, description);
3092 // Null context handled by warn
3093 warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
3094 }
3095
3096 void
assertType(char const * type_name,bool istype)3097 QPDFObjectHandle::assertType(char const* type_name, bool istype)
3098 {
3099 if (! istype)
3100 {
3101 throw std::runtime_error(std::string("operation for ") + type_name +
3102 " attempted on object of type " +
3103 getTypeName());
3104 }
3105 }
3106
3107 void
assertNull()3108 QPDFObjectHandle::assertNull()
3109 {
3110 assertType("null", isNull());
3111 }
3112
3113 void
assertBool()3114 QPDFObjectHandle::assertBool()
3115 {
3116 assertType("boolean", isBool());
3117 }
3118
3119 void
assertInteger()3120 QPDFObjectHandle::assertInteger()
3121 {
3122 assertType("integer", isInteger());
3123 }
3124
3125 void
assertReal()3126 QPDFObjectHandle::assertReal()
3127 {
3128 assertType("real", isReal());
3129 }
3130
3131 void
assertName()3132 QPDFObjectHandle::assertName()
3133 {
3134 assertType("name", isName());
3135 }
3136
3137 void
assertString()3138 QPDFObjectHandle::assertString()
3139 {
3140 assertType("string", isString());
3141 }
3142
3143 void
assertOperator()3144 QPDFObjectHandle::assertOperator()
3145 {
3146 assertType("operator", isOperator());
3147 }
3148
3149 void
assertInlineImage()3150 QPDFObjectHandle::assertInlineImage()
3151 {
3152 assertType("inlineimage", isInlineImage());
3153 }
3154
3155 void
assertArray()3156 QPDFObjectHandle::assertArray()
3157 {
3158 assertType("array", isArray());
3159 }
3160
3161 void
assertDictionary()3162 QPDFObjectHandle::assertDictionary()
3163 {
3164 assertType("dictionary", isDictionary());
3165 }
3166
3167 void
assertStream()3168 QPDFObjectHandle::assertStream()
3169 {
3170 assertType("stream", isStream());
3171 }
3172
3173 void
assertReserved()3174 QPDFObjectHandle::assertReserved()
3175 {
3176 assertType("reserved", isReserved());
3177 }
3178
3179 void
assertIndirect()3180 QPDFObjectHandle::assertIndirect()
3181 {
3182 if (! isIndirect())
3183 {
3184 throw std::logic_error(
3185 "operation for indirect object attempted on direct object");
3186 }
3187 }
3188
3189 void
assertScalar()3190 QPDFObjectHandle::assertScalar()
3191 {
3192 assertType("scalar", isScalar());
3193 }
3194
3195 void
assertNumber()3196 QPDFObjectHandle::assertNumber()
3197 {
3198 assertType("number", isNumber());
3199 }
3200
3201 bool
isPageObject()3202 QPDFObjectHandle::isPageObject()
3203 {
3204 // See comments in QPDFObjectHandle.hh.
3205 if (getOwningQPDF() == nullptr)
3206 {
3207 return false;
3208 }
3209 // getAllPages repairs /Type when traversing the page tree.
3210 getOwningQPDF()->getAllPages();
3211 if (! this->isDictionary())
3212 {
3213 return false;
3214 }
3215 if (this->hasKey("/Type"))
3216 {
3217 QPDFObjectHandle type = this->getKey("/Type");
3218 if (type.isName() && (type.getName() == "/Page"))
3219 {
3220 return true;
3221 }
3222 // Files have been seen in the wild that have /Type (Page)
3223 else if (type.isString() && (type.getStringValue() == "Page"))
3224 {
3225 return true;
3226 }
3227 else
3228 {
3229 return false;
3230 }
3231 }
3232 return false;
3233 }
3234
3235 bool
isPagesObject()3236 QPDFObjectHandle::isPagesObject()
3237 {
3238 if (getOwningQPDF() == nullptr)
3239 {
3240 return false;
3241 }
3242 // getAllPages repairs /Type when traversing the page tree.
3243 getOwningQPDF()->getAllPages();
3244 return (this->isDictionary() &&
3245 this->hasKey("/Type") &&
3246 this->getKey("/Type").isName() &&
3247 this->getKey("/Type").getName() == "/Pages");
3248 }
3249
3250 bool
isFormXObject()3251 QPDFObjectHandle::isFormXObject()
3252 {
3253 if (! this->isStream())
3254 {
3255 return false;
3256 }
3257 QPDFObjectHandle dict = this->getDict();
3258 return (dict.getKey("/Type").isName() &&
3259 ("/XObject" == dict.getKey("/Type").getName()) &&
3260 dict.getKey("/Subtype").isName() &&
3261 ("/Form" == dict.getKey("/Subtype").getName()));
3262 }
3263
3264 bool
isImage(bool exclude_imagemask)3265 QPDFObjectHandle::isImage(bool exclude_imagemask)
3266 {
3267 if (! this->isStream())
3268 {
3269 return false;
3270 }
3271 QPDFObjectHandle dict = this->getDict();
3272 return (dict.hasKey("/Subtype") &&
3273 (dict.getKey("/Subtype").getName() == "/Image") &&
3274 ((! exclude_imagemask) ||
3275 (! (dict.getKey("/ImageMask").isBool() &&
3276 dict.getKey("/ImageMask").getBoolValue()))));
3277 }
3278
3279 void
checkOwnership(QPDFObjectHandle const & item) const3280 QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const
3281 {
3282 if ((this->qpdf != nullptr) &&
3283 (item.qpdf != nullptr) &&
3284 (this->qpdf != item.qpdf))
3285 {
3286 QTC::TC("qpdf", "QPDFObjectHandle check ownership");
3287 throw std::logic_error(
3288 "Attempting to add an object from a different QPDF."
3289 " Use QPDF::copyForeignObject to add objects from another file.");
3290 }
3291 }
3292
3293 void
assertPageObject()3294 QPDFObjectHandle::assertPageObject()
3295 {
3296 if (! isPageObject())
3297 {
3298 throw std::runtime_error("page operation called on non-Page object");
3299 }
3300 }
3301
3302 void
dereference()3303 QPDFObjectHandle::dereference()
3304 {
3305 if (! this->initialized)
3306 {
3307 throw std::logic_error(
3308 "attempted to dereference an uninitialized QPDFObjectHandle");
3309 }
3310 if (this->obj.getPointer() && this->objid &&
3311 QPDF::Resolver::objectChanged(
3312 this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj))
3313 {
3314 this->obj = nullptr;
3315 }
3316 if (this->obj.getPointer() == 0)
3317 {
3318 PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve(
3319 this->qpdf, this->objid, this->generation);
3320 if (obj.getPointer() == 0)
3321 {
3322 // QPDF::resolve never returns an uninitialized object, but
3323 // check just in case.
3324 this->obj = new QPDF_Null();
3325 }
3326 else if (dynamic_cast<QPDF_Reserved*>(obj.getPointer()))
3327 {
3328 // Do not resolve
3329 }
3330 else
3331 {
3332 this->reserved = false;
3333 this->obj = obj;
3334 }
3335 }
3336 }
3337
3338 void
warn(QPDF * qpdf,QPDFExc const & e)3339 QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e)
3340 {
3341 // If parsing on behalf of a QPDF object and want to give a
3342 // warning, we can warn through the object. If parsing for some
3343 // other reason, such as an explicit creation of an object from a
3344 // string, then just throw the exception.
3345 if (qpdf)
3346 {
3347 qpdf->warn(e);
3348 }
3349 else
3350 {
3351 throw e;
3352 }
3353 }
3354
QPDFDictItems(QPDFObjectHandle const & oh)3355 QPDFObjectHandle::QPDFDictItems::QPDFDictItems(QPDFObjectHandle const& oh) :
3356 oh(oh)
3357 {
3358 }
3359
3360 QPDFObjectHandle::QPDFDictItems::iterator&
operator ++()3361 QPDFObjectHandle::QPDFDictItems::iterator::operator++()
3362 {
3363 ++this->m->iter;
3364 updateIValue();
3365 return *this;
3366 }
3367
3368 QPDFObjectHandle::QPDFDictItems::iterator&
operator --()3369 QPDFObjectHandle::QPDFDictItems::iterator::operator--()
3370 {
3371 --this->m->iter;
3372 updateIValue();
3373 return *this;
3374 }
3375
3376 QPDFObjectHandle::QPDFDictItems::iterator::reference
operator *()3377 QPDFObjectHandle::QPDFDictItems::iterator:: operator*()
3378 {
3379 updateIValue();
3380 return this->ivalue;
3381 }
3382
3383 QPDFObjectHandle::QPDFDictItems::iterator::pointer
operator ->()3384 QPDFObjectHandle::QPDFDictItems::iterator::operator->()
3385 {
3386 updateIValue();
3387 return &this->ivalue;
3388 }
3389
3390 bool
operator ==(iterator const & other) const3391 QPDFObjectHandle::QPDFDictItems::iterator::operator==(
3392 iterator const& other) const
3393 {
3394 if (this->m->is_end && other.m->is_end)
3395 {
3396 return true;
3397 }
3398 if (this->m->is_end || other.m->is_end)
3399 {
3400 return false;
3401 }
3402 return (this->ivalue.first == other.ivalue.first);
3403 }
3404
iterator(QPDFObjectHandle & oh,bool for_begin)3405 QPDFObjectHandle::QPDFDictItems::iterator::iterator(
3406 QPDFObjectHandle& oh, bool for_begin) :
3407 m(new Members(oh, for_begin))
3408 {
3409 updateIValue();
3410 }
3411
3412 void
updateIValue()3413 QPDFObjectHandle::QPDFDictItems::iterator::updateIValue()
3414 {
3415 this->m->is_end = (this->m->iter == this->m->keys.end());
3416 if (this->m->is_end)
3417 {
3418 this->ivalue.first = "";
3419 this->ivalue.second = QPDFObjectHandle();
3420 }
3421 else
3422 {
3423 this->ivalue.first = *(this->m->iter);
3424 this->ivalue.second = this->m->oh.getKey(this->ivalue.first);
3425 }
3426 }
3427
Members(QPDFObjectHandle & oh,bool for_begin)3428 QPDFObjectHandle::QPDFDictItems::iterator::Members::Members(
3429 QPDFObjectHandle& oh, bool for_begin) :
3430 oh(oh)
3431 {
3432 this->keys = oh.getKeys();
3433 this->iter = for_begin ? this->keys.begin() : this->keys.end();
3434 }
3435
3436 QPDFObjectHandle::QPDFDictItems::iterator
begin()3437 QPDFObjectHandle::QPDFDictItems::begin()
3438 {
3439 return iterator(oh, true);
3440 }
3441
3442 QPDFObjectHandle::QPDFDictItems::iterator
end()3443 QPDFObjectHandle::QPDFDictItems::end()
3444 {
3445 return iterator(oh, false);
3446 }
3447
QPDFArrayItems(QPDFObjectHandle const & oh)3448 QPDFObjectHandle::QPDFArrayItems::QPDFArrayItems(QPDFObjectHandle const& oh) :
3449 oh(oh)
3450 {
3451 }
3452
3453 QPDFObjectHandle::QPDFArrayItems::iterator&
operator ++()3454 QPDFObjectHandle::QPDFArrayItems::iterator::operator++()
3455 {
3456 if (! this->m->is_end)
3457 {
3458 ++this->m->item_number;
3459 updateIValue();
3460 }
3461 return *this;
3462 }
3463
3464 QPDFObjectHandle::QPDFArrayItems::iterator&
operator --()3465 QPDFObjectHandle::QPDFArrayItems::iterator::operator--()
3466 {
3467 if (this->m->item_number > 0)
3468 {
3469 --this->m->item_number;
3470 updateIValue();
3471 }
3472 return *this;
3473 }
3474
3475 QPDFObjectHandle::QPDFArrayItems::iterator::reference
operator *()3476 QPDFObjectHandle::QPDFArrayItems::iterator:: operator*()
3477 {
3478 updateIValue();
3479 return this->ivalue;
3480 }
3481
3482 QPDFObjectHandle::QPDFArrayItems::iterator::pointer
operator ->()3483 QPDFObjectHandle::QPDFArrayItems::iterator::operator->()
3484 {
3485 updateIValue();
3486 return &this->ivalue;
3487 }
3488
3489 bool
operator ==(iterator const & other) const3490 QPDFObjectHandle::QPDFArrayItems::iterator::operator==(
3491 iterator const& other) const
3492 {
3493 return (this->m->item_number == other.m->item_number);
3494 }
3495
iterator(QPDFObjectHandle & oh,bool for_begin)3496 QPDFObjectHandle::QPDFArrayItems::iterator::iterator(
3497 QPDFObjectHandle& oh, bool for_begin) :
3498 m(new Members(oh, for_begin))
3499 {
3500 updateIValue();
3501 }
3502
3503 void
updateIValue()3504 QPDFObjectHandle::QPDFArrayItems::iterator::updateIValue()
3505 {
3506 this->m->is_end = (this->m->item_number >= this->m->oh.getArrayNItems());
3507 if (this->m->is_end)
3508 {
3509 this->ivalue = QPDFObjectHandle();
3510 }
3511 else
3512 {
3513 this->ivalue = this->m->oh.getArrayItem(this->m->item_number);
3514 }
3515 }
3516
Members(QPDFObjectHandle & oh,bool for_begin)3517 QPDFObjectHandle::QPDFArrayItems::iterator::Members::Members(
3518 QPDFObjectHandle& oh, bool for_begin) :
3519 oh(oh)
3520 {
3521 this->item_number = for_begin ? 0 : oh.getArrayNItems();
3522 }
3523
3524 QPDFObjectHandle::QPDFArrayItems::iterator
begin()3525 QPDFObjectHandle::QPDFArrayItems::begin()
3526 {
3527 return iterator(oh, true);
3528 }
3529
3530 QPDFObjectHandle::QPDFArrayItems::iterator
end()3531 QPDFObjectHandle::QPDFArrayItems::end()
3532 {
3533 return iterator(oh, false);
3534 }
3535