1 /*
2  * Copyright (C) 2009-2011, Pino Toscano <pino@kde.org>
3  * Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
4  * Copyright (C) 2017, Albert Astals Cid <aacid@kde.org>
5  * Copyright (C) 2018, 2020, Adam Reichold <adam.reichold@t-online.de>
6  * Copyright (C) 2019, Masamichi Hosoda <trueroad@trueroad.jp>
7  * Copyright (C) 2019, 2020, Oliver Sander <oliver.sander@tu-dresden.de>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2, or (at your option)
12  * any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
22  */
23 
24 /**
25  \file poppler-document.h
26  */
27 #include "poppler-destination.h"
28 #include "poppler-document.h"
29 #include "poppler-embedded-file.h"
30 #include "poppler-page.h"
31 #include "poppler-toc.h"
32 
33 #include "poppler-destination-private.h"
34 #include "poppler-document-private.h"
35 #include "poppler-embedded-file-private.h"
36 #include "poppler-page-private.h"
37 #include "poppler-private.h"
38 #include "poppler-toc-private.h"
39 
40 #include "Catalog.h"
41 #include "DateInfo.h"
42 #include "ErrorCodes.h"
43 #include "GlobalParams.h"
44 #include "Link.h"
45 #include "Outline.h"
46 
47 #include <algorithm>
48 #include <iterator>
49 #include <memory>
50 
51 using namespace poppler;
52 
document_private(GooString * file_path,const std::string & owner_password,const std::string & user_password)53 document_private::document_private(GooString *file_path, const std::string &owner_password, const std::string &user_password) : document_private()
54 {
55     GooString goo_owner_password(owner_password.c_str());
56     GooString goo_user_password(user_password.c_str());
57     doc = new PDFDoc(file_path, &goo_owner_password, &goo_user_password);
58 }
59 
document_private(byte_array * file_data,const std::string & owner_password,const std::string & user_password)60 document_private::document_private(byte_array *file_data, const std::string &owner_password, const std::string &user_password) : document_private()
61 {
62     file_data->swap(doc_data);
63     MemStream *memstr = new MemStream(&doc_data[0], 0, doc_data.size(), Object(objNull));
64     GooString goo_owner_password(owner_password.c_str());
65     GooString goo_user_password(user_password.c_str());
66     doc = new PDFDoc(memstr, &goo_owner_password, &goo_user_password);
67 }
68 
document_private(const char * file_data,int file_data_length,const std::string & owner_password,const std::string & user_password)69 document_private::document_private(const char *file_data, int file_data_length, const std::string &owner_password, const std::string &user_password) : document_private()
70 {
71     raw_doc_data = file_data;
72     raw_doc_data_length = file_data_length;
73     MemStream *memstr = new MemStream(const_cast<char *>(raw_doc_data), 0, raw_doc_data_length, Object(objNull));
74     GooString goo_owner_password(owner_password.c_str());
75     GooString goo_user_password(user_password.c_str());
76     doc = new PDFDoc(memstr, &goo_owner_password, &goo_user_password);
77 }
78 
document_private()79 document_private::document_private() : GlobalParamsIniter(detail::error_function), doc(nullptr), raw_doc_data(nullptr), raw_doc_data_length(0), is_locked(false) { }
80 
~document_private()81 document_private::~document_private()
82 {
83     delete_all(embedded_files);
84 
85     delete doc;
86 }
87 
check_document(document_private * doc,byte_array * file_data)88 document *document_private::check_document(document_private *doc, byte_array *file_data)
89 {
90     if (doc->doc->isOk() || doc->doc->getErrorCode() == errEncrypted) {
91         if (doc->doc->getErrorCode() == errEncrypted) {
92             doc->is_locked = true;
93         }
94         return new document(*doc);
95     } else {
96         // put back the document data where it was before
97         if (file_data) {
98             file_data->swap(doc->doc_data);
99         }
100         delete doc;
101     }
102     return nullptr;
103 }
104 
105 /**
106  \class poppler::document poppler-document.h "poppler/cpp/poppler-document.h"
107 
108  Represents a PDF %document.
109  */
110 
111 /**
112  \enum poppler::document::page_mode_enum
113 
114  The various page modes available in a PDF %document.
115 */
116 /**
117  \var poppler::document::page_mode_enum poppler::document::use_none
118 
119  The %document specifies no particular page mode.
120 */
121 /**
122  \var poppler::document::page_mode_enum poppler::document::use_outlines
123 
124  The %document specifies its TOC (table of contents) should be open.
125 */
126 /**
127  \var poppler::document::page_mode_enum poppler::document::use_thumbs
128 
129  The %document specifies that should be open a view of the thumbnails of its
130  pages.
131 */
132 /**
133  \var poppler::document::page_mode_enum poppler::document::fullscreen
134 
135  The %document specifies it wants to be open in a fullscreen mode.
136 */
137 /**
138  \var poppler::document::page_mode_enum poppler::document::use_oc
139 
140  The %document specifies that should be open a view of its Optional Content
141  (also known as layers).
142 */
143 /**
144  \var poppler::document::page_mode_enum poppler::document::use_attach
145 
146  The %document specifies that should be open a view of its %document-level
147  attachments.
148  */
149 
document(document_private & dd)150 document::document(document_private &dd) : d(&dd) { }
151 
~document()152 document::~document()
153 {
154     delete d;
155 }
156 
157 /**
158  \returns whether the current %document is locked
159  */
is_locked() const160 bool document::is_locked() const
161 {
162     return d->is_locked;
163 }
164 
165 /**
166  Unlocks the current document, if locked.
167 
168  \returns the new locking status of the document
169  */
unlock(const std::string & owner_password,const std::string & user_password)170 bool document::unlock(const std::string &owner_password, const std::string &user_password)
171 {
172     if (d->is_locked) {
173         document_private *newdoc = nullptr;
174         if (d->doc_data.size() > 0) {
175             newdoc = new document_private(&d->doc_data, owner_password, user_password);
176         } else if (d->raw_doc_data) {
177             newdoc = new document_private(d->raw_doc_data, d->raw_doc_data_length, owner_password, user_password);
178         } else {
179             newdoc = new document_private(new GooString(d->doc->getFileName()), owner_password, user_password);
180         }
181         if (!newdoc->doc->isOk()) {
182             d->doc_data.swap(newdoc->doc_data);
183             delete newdoc;
184         } else {
185             delete d;
186             d = newdoc;
187             d->is_locked = false;
188         }
189     }
190     return d->is_locked;
191 }
192 
193 /**
194  \returns the eventual page mode specified by the current PDF %document
195  */
page_mode() const196 document::page_mode_enum document::page_mode() const
197 {
198     switch (d->doc->getCatalog()->getPageMode()) {
199     case Catalog::pageModeNone:
200         return use_none;
201     case Catalog::pageModeOutlines:
202         return use_outlines;
203     case Catalog::pageModeThumbs:
204         return use_thumbs;
205     case Catalog::pageModeFullScreen:
206         return fullscreen;
207     case Catalog::pageModeOC:
208         return use_oc;
209     case Catalog::pageModeAttach:
210         return use_attach;
211     default:
212         return use_none;
213     }
214 }
215 
216 /**
217  \returns the eventual page layout specified by the current PDF %document
218  */
page_layout() const219 document::page_layout_enum document::page_layout() const
220 {
221     switch (d->doc->getCatalog()->getPageLayout()) {
222     case Catalog::pageLayoutNone:
223         return no_layout;
224     case Catalog::pageLayoutSinglePage:
225         return single_page;
226     case Catalog::pageLayoutOneColumn:
227         return one_column;
228     case Catalog::pageLayoutTwoColumnLeft:
229         return two_column_left;
230     case Catalog::pageLayoutTwoColumnRight:
231         return two_column_right;
232     case Catalog::pageLayoutTwoPageLeft:
233         return two_page_left;
234     case Catalog::pageLayoutTwoPageRight:
235         return two_page_right;
236     default:
237         return no_layout;
238     }
239 }
240 
241 /**
242  Gets the version of the current PDF %document.
243 
244  Example:
245  \code
246  poppler::document *doc = ...;
247  // for example, if the document is PDF 1.6:
248  int major = 0, minor = 0;
249  doc->get_pdf_version(&major, &minor);
250  // major == 1
251  // minor == 6
252  \endcode
253 
254  \param major if not NULL, will be set to the "major" number of the version
255  \param minor if not NULL, will be set to the "minor" number of the version
256  */
get_pdf_version(int * major,int * minor) const257 void document::get_pdf_version(int *major, int *minor) const
258 {
259     if (major) {
260         *major = d->doc->getPDFMajorVersion();
261     }
262     if (minor) {
263         *minor = d->doc->getPDFMinorVersion();
264     }
265 }
266 
267 /**
268  \returns all the information keys available in the %document
269  \see info_key, info_date
270  */
info_keys() const271 std::vector<std::string> document::info_keys() const
272 {
273     if (d->is_locked) {
274         return std::vector<std::string>();
275     }
276 
277     Object info = d->doc->getDocInfo();
278     if (!info.isDict()) {
279         return std::vector<std::string>();
280     }
281 
282     Dict *info_dict = info.getDict();
283     std::vector<std::string> keys(info_dict->getLength());
284     for (int i = 0; i < info_dict->getLength(); ++i) {
285         keys[i] = std::string(info_dict->getKey(i));
286     }
287 
288     return keys;
289 }
290 
291 /**
292  Gets the value of the specified \p key of the document information.
293 
294  \returns the value for the \p key, or an empty string if not available
295  \see info_keys, info_date
296  */
info_key(const std::string & key) const297 ustring document::info_key(const std::string &key) const
298 {
299     if (d->is_locked) {
300         return ustring();
301     }
302 
303     std::unique_ptr<GooString> goo_value(d->doc->getDocInfoStringEntry(key.c_str()));
304     if (!goo_value.get()) {
305         return ustring();
306     }
307 
308     return detail::unicode_GooString_to_ustring(goo_value.get());
309 }
310 
311 /**
312  Sets the value of the specified \p key of the %document information to \p val.
313  If \p val is empty, the entry specified by \p key is removed.
314 
315  \returns true on success, false on failure
316  */
set_info_key(const std::string & key,const ustring & val)317 bool document::set_info_key(const std::string &key, const ustring &val)
318 {
319     if (d->is_locked) {
320         return false;
321     }
322 
323     GooString *goo_val;
324 
325     if (val.empty()) {
326         goo_val = nullptr;
327     } else {
328         goo_val = detail::ustring_to_unicode_GooString(val);
329     }
330 
331     d->doc->setDocInfoStringEntry(key.c_str(), goo_val);
332     return true;
333 }
334 
335 /**
336  Gets the time_t value of the specified \p key of the document
337  information.
338 
339  \returns the time_t value for the \p key
340  \see info_keys, info_date
341  */
info_date(const std::string & key) const342 time_type document::info_date(const std::string &key) const
343 {
344     if (d->is_locked) {
345         return time_type(-1);
346     }
347 
348     std::unique_ptr<GooString> goo_date(d->doc->getDocInfoStringEntry(key.c_str()));
349     if (!goo_date.get()) {
350         return time_type(-1);
351     }
352 
353     return dateStringToTime(goo_date.get());
354 }
355 
356 /**
357  Sets the time_type value of the specified \p key of the %document information
358  to \p val.
359  If \p val == time_type(-1), the entry specified by \p key is removed.
360 
361  \returns true on success, false on failure
362  */
set_info_date(const std::string & key,time_type val)363 bool document::set_info_date(const std::string &key, time_type val)
364 {
365     if (d->is_locked) {
366         return false;
367     }
368 
369     GooString *goo_date;
370 
371     if (val == time_type(-1)) {
372         goo_date = nullptr;
373     } else {
374         time_t t = static_cast<time_t>(val);
375         goo_date = timeToDateString(&t);
376     }
377 
378     d->doc->setDocInfoStringEntry(key.c_str(), goo_date);
379     return true;
380 }
381 
382 /**
383  Gets the %document's title.
384 
385  \returns the document's title, or an empty string if not available
386  \see set_title, info_key
387  */
get_title() const388 ustring document::get_title() const
389 {
390     if (d->is_locked) {
391         return ustring();
392     }
393 
394     std::unique_ptr<GooString> goo_title(d->doc->getDocInfoTitle());
395     if (!goo_title.get()) {
396         return ustring();
397     }
398 
399     return detail::unicode_GooString_to_ustring(goo_title.get());
400 }
401 
402 /**
403  Sets the %document's title to \p title.
404  If \p title is empty, the %document's title is removed.
405 
406  \returns true on success, false on failure
407  */
set_title(const ustring & title)408 bool document::set_title(const ustring &title)
409 {
410     if (d->is_locked) {
411         return false;
412     }
413 
414     GooString *goo_title;
415 
416     if (title.empty()) {
417         goo_title = nullptr;
418     } else {
419         goo_title = detail::ustring_to_unicode_GooString(title);
420     }
421 
422     d->doc->setDocInfoTitle(goo_title);
423     return true;
424 }
425 
426 /**
427  Gets the document's author.
428 
429  \returns the document's author, or an empty string if not available
430  \see set_author, info_key
431  */
get_author() const432 ustring document::get_author() const
433 {
434     if (d->is_locked) {
435         return ustring();
436     }
437 
438     std::unique_ptr<GooString> goo_author(d->doc->getDocInfoAuthor());
439     if (!goo_author.get()) {
440         return ustring();
441     }
442 
443     return detail::unicode_GooString_to_ustring(goo_author.get());
444 }
445 
446 /**
447  Sets the %document's author to \p author.
448  If \p author is empty, the %document's author is removed.
449 
450  \returns true on success, false on failure
451  */
set_author(const ustring & author)452 bool document::set_author(const ustring &author)
453 {
454     if (d->is_locked) {
455         return false;
456     }
457 
458     GooString *goo_author;
459 
460     if (author.empty()) {
461         goo_author = nullptr;
462     } else {
463         goo_author = detail::ustring_to_unicode_GooString(author);
464     }
465 
466     d->doc->setDocInfoAuthor(goo_author);
467     return true;
468 }
469 
470 /**
471  Gets the document's subject.
472 
473  \returns the document's subject, or an empty string if not available
474  \see set_subject, info_key
475  */
get_subject() const476 ustring document::get_subject() const
477 {
478     if (d->is_locked) {
479         return ustring();
480     }
481 
482     std::unique_ptr<GooString> goo_subject(d->doc->getDocInfoSubject());
483     if (!goo_subject.get()) {
484         return ustring();
485     }
486 
487     return detail::unicode_GooString_to_ustring(goo_subject.get());
488 }
489 
490 /**
491  Sets the %document's subject to \p subject.
492  If \p subject is empty, the %document's subject is removed.
493 
494  \returns true on success, false on failure
495  */
set_subject(const ustring & subject)496 bool document::set_subject(const ustring &subject)
497 {
498     if (d->is_locked) {
499         return false;
500     }
501 
502     GooString *goo_subject;
503 
504     if (subject.empty()) {
505         goo_subject = nullptr;
506     } else {
507         goo_subject = detail::ustring_to_unicode_GooString(subject);
508     }
509 
510     d->doc->setDocInfoSubject(goo_subject);
511     return true;
512 }
513 
514 /**
515  Gets the document's keywords.
516 
517  \returns the document's keywords, or an empty string if not available
518  \see set_keywords, info_key
519  */
get_keywords() const520 ustring document::get_keywords() const
521 {
522     if (d->is_locked) {
523         return ustring();
524     }
525 
526     std::unique_ptr<GooString> goo_keywords(d->doc->getDocInfoKeywords());
527     if (!goo_keywords.get()) {
528         return ustring();
529     }
530 
531     return detail::unicode_GooString_to_ustring(goo_keywords.get());
532 }
533 
534 /**
535  Sets the %document's keywords to \p keywords.
536  If \p keywords is empty, the %document's keywords are removed.
537 
538  \returns true on success, false on failure
539  */
set_keywords(const ustring & keywords)540 bool document::set_keywords(const ustring &keywords)
541 {
542     if (d->is_locked) {
543         return false;
544     }
545 
546     GooString *goo_keywords;
547 
548     if (keywords.empty()) {
549         goo_keywords = nullptr;
550     } else {
551         goo_keywords = detail::ustring_to_unicode_GooString(keywords);
552     }
553 
554     d->doc->setDocInfoKeywords(goo_keywords);
555     return true;
556 }
557 
558 /**
559  Gets the document's creator.
560 
561  \returns the document's creator, or an empty string if not available
562  \see set_creator, info_key
563  */
get_creator() const564 ustring document::get_creator() const
565 {
566     if (d->is_locked) {
567         return ustring();
568     }
569 
570     std::unique_ptr<GooString> goo_creator(d->doc->getDocInfoCreator());
571     if (!goo_creator.get()) {
572         return ustring();
573     }
574 
575     return detail::unicode_GooString_to_ustring(goo_creator.get());
576 }
577 
578 /**
579  Sets the %document's creator to \p creator.
580  If \p creator is empty, the %document's creator is removed.
581 
582  \returns true on success, false on failure
583  */
set_creator(const ustring & creator)584 bool document::set_creator(const ustring &creator)
585 {
586     if (d->is_locked) {
587         return false;
588     }
589 
590     GooString *goo_creator;
591 
592     if (creator.empty()) {
593         goo_creator = nullptr;
594     } else {
595         goo_creator = detail::ustring_to_unicode_GooString(creator);
596     }
597 
598     d->doc->setDocInfoCreator(goo_creator);
599     return true;
600 }
601 
602 /**
603  Gets the document's producer.
604 
605  \returns the document's producer, or an empty string if not available
606  \see set_producer, info_key
607  */
get_producer() const608 ustring document::get_producer() const
609 {
610     if (d->is_locked) {
611         return ustring();
612     }
613 
614     std::unique_ptr<GooString> goo_producer(d->doc->getDocInfoProducer());
615     if (!goo_producer.get()) {
616         return ustring();
617     }
618 
619     return detail::unicode_GooString_to_ustring(goo_producer.get());
620 }
621 
622 /**
623  Sets the %document's producer to \p producer.
624  If \p producer is empty, the %document's producer is removed.
625 
626  \returns true on success, false on failure
627  */
set_producer(const ustring & producer)628 bool document::set_producer(const ustring &producer)
629 {
630     if (d->is_locked) {
631         return false;
632     }
633 
634     GooString *goo_producer;
635 
636     if (producer.empty()) {
637         goo_producer = nullptr;
638     } else {
639         goo_producer = detail::ustring_to_unicode_GooString(producer);
640     }
641 
642     d->doc->setDocInfoProducer(goo_producer);
643     return true;
644 }
645 
646 /**
647  Gets the document's creation date as a time_type value.
648 
649  \returns the document's creation date as a time_type value
650  \see set_creation_date, info_date
651  */
get_creation_date() const652 time_type document::get_creation_date() const
653 {
654     if (d->is_locked) {
655         return time_type(-1);
656     }
657 
658     std::unique_ptr<GooString> goo_creation_date(d->doc->getDocInfoCreatDate());
659     if (!goo_creation_date.get()) {
660         return time_type(-1);
661     }
662 
663     return dateStringToTime(goo_creation_date.get());
664 }
665 
666 /**
667  Sets the %document's creation date to \p creation_date.
668  If \p creation_date == time_type(-1), the %document's creation date is removed.
669 
670  \returns true on success, false on failure
671  */
set_creation_date(time_type creation_date)672 bool document::set_creation_date(time_type creation_date)
673 {
674     if (d->is_locked) {
675         return false;
676     }
677 
678     GooString *goo_creation_date;
679 
680     if (creation_date == time_type(-1)) {
681         goo_creation_date = nullptr;
682     } else {
683         time_t t = static_cast<time_t>(creation_date);
684         goo_creation_date = timeToDateString(&t);
685     }
686 
687     d->doc->setDocInfoCreatDate(goo_creation_date);
688     return true;
689 }
690 
691 /**
692  Gets the document's modification date as a time_type value.
693 
694  \returns the document's modification date as a time_type value
695  \see set_modification_date, info_date
696  */
get_modification_date() const697 time_type document::get_modification_date() const
698 {
699     if (d->is_locked) {
700         return time_type(-1);
701     }
702 
703     std::unique_ptr<GooString> goo_modification_date(d->doc->getDocInfoModDate());
704     if (!goo_modification_date.get()) {
705         return time_type(-1);
706     }
707 
708     return dateStringToTime(goo_modification_date.get());
709 }
710 
711 /**
712  Sets the %document's modification date to \p mod_date.
713  If \p mod_date == time_type(-1), the %document's modification date is removed.
714 
715  \returns true on success, false on failure
716  */
set_modification_date(time_type mod_date)717 bool document::set_modification_date(time_type mod_date)
718 {
719     if (d->is_locked) {
720         return false;
721     }
722 
723     GooString *goo_mod_date;
724 
725     if (mod_date == time_type(-1)) {
726         goo_mod_date = nullptr;
727     } else {
728         time_t t = static_cast<time_t>(mod_date);
729         goo_mod_date = timeToDateString(&t);
730     }
731 
732     d->doc->setDocInfoModDate(goo_mod_date);
733     return true;
734 }
735 
736 /**
737  Removes the %document's Info dictionary.
738 
739  \returns true on success, false on failure
740  */
remove_info()741 bool document::remove_info()
742 {
743     if (d->is_locked) {
744         return false;
745     }
746 
747     d->doc->removeDocInfo();
748     return true;
749 }
750 
751 /**
752  \returns whether the document is encrypted
753  */
is_encrypted() const754 bool document::is_encrypted() const
755 {
756     return d->doc->isEncrypted();
757 }
758 
759 /**
760  \returns whether the document is linearized
761  */
is_linearized() const762 bool document::is_linearized() const
763 {
764     return d->doc->isLinearized();
765 }
766 
767 /**
768  Check for available "document permission".
769 
770  \returns whether the specified permission is allowed
771  */
has_permission(permission_enum which) const772 bool document::has_permission(permission_enum which) const
773 {
774     switch (which) {
775     case perm_print:
776         return d->doc->okToPrint();
777     case perm_change:
778         return d->doc->okToChange();
779     case perm_copy:
780         return d->doc->okToCopy();
781     case perm_add_notes:
782         return d->doc->okToAddNotes();
783     case perm_fill_forms:
784         return d->doc->okToFillForm();
785     case perm_accessibility:
786         return d->doc->okToAccessibility();
787     case perm_assemble:
788         return d->doc->okToAssemble();
789     case perm_print_high_resolution:
790         return d->doc->okToPrintHighRes();
791     }
792     return true;
793 }
794 
795 /**
796  Reads the %document metadata string.
797 
798  \return the %document metadata string
799  */
metadata() const800 ustring document::metadata() const
801 {
802     std::unique_ptr<GooString> md(d->doc->getCatalog()->readMetadata());
803     if (md.get()) {
804         return detail::unicode_GooString_to_ustring(md.get());
805     }
806     return ustring();
807 }
808 
809 /**
810  Gets the IDs of the current PDF %document, if available.
811 
812  \param permanent_id if not NULL, will be set to the permanent ID of the %document
813  \param update_id if not NULL, will be set to the update ID of the %document
814 
815  \returns whether the document has the IDs
816 
817  \since 0.16
818  */
get_pdf_id(std::string * permanent_id,std::string * update_id) const819 bool document::get_pdf_id(std::string *permanent_id, std::string *update_id) const
820 {
821     GooString goo_permanent_id;
822     GooString goo_update_id;
823 
824     if (!d->doc->getID(permanent_id ? &goo_permanent_id : nullptr, update_id ? &goo_update_id : nullptr)) {
825         return false;
826     }
827 
828     if (permanent_id) {
829         *permanent_id = goo_permanent_id.c_str();
830     }
831     if (update_id) {
832         *update_id = goo_update_id.c_str();
833     }
834 
835     return true;
836 }
837 
838 /**
839  Document page count.
840 
841  \returns the number of pages of the document
842  */
pages() const843 int document::pages() const
844 {
845     return d->doc->getNumPages();
846 }
847 
848 /**
849  Document page by label reading.
850 
851  This creates a new page representing the %document %page whose label is the
852  specified \p label. If there is no page with that \p label, NULL is returned.
853 
854  \returns a new page object or NULL
855  */
create_page(const ustring & label) const856 page *document::create_page(const ustring &label) const
857 {
858     std::unique_ptr<GooString> goolabel(detail::ustring_to_unicode_GooString(label));
859     int index = 0;
860 
861     if (!d->doc->getCatalog()->labelToIndex(goolabel.get(), &index)) {
862         return nullptr;
863     }
864     return create_page(index);
865 }
866 
867 /**
868  Document page by index reading.
869 
870  This creates a new page representing the \p index -th %page of the %document.
871  \note the page indexes are in the range [0, pages()[.
872 
873  \returns a new page object or NULL
874  */
create_page(int index) const875 page *document::create_page(int index) const
876 {
877     if (index >= 0 && index < d->doc->getNumPages()) {
878         page *p = new page(d, index);
879         if (p->d->page) {
880             return p;
881         } else {
882             delete p;
883             return nullptr;
884         }
885     } else {
886         return nullptr;
887     }
888 }
889 
890 /**
891  Reads all the font information of the %document.
892 
893  \note this can be slow for big documents; prefer the use of a font_iterator
894  to read incrementally page by page
895  \see create_font_iterator
896  */
fonts() const897 std::vector<font_info> document::fonts() const
898 {
899     std::vector<font_info> result;
900     font_iterator it(0, d);
901     while (it.has_next()) {
902         const std::vector<font_info> l = it.next();
903         std::copy(l.begin(), l.end(), std::back_inserter(result));
904     }
905     return result;
906 }
907 
908 /**
909  Creates a new font iterator.
910 
911  This creates a new font iterator for reading the font information of the
912  %document page by page, starting at the specified \p start_page (0 if not
913  specified).
914 
915  \returns a new font iterator
916  */
create_font_iterator(int start_page) const917 font_iterator *document::create_font_iterator(int start_page) const
918 {
919     return new font_iterator(start_page, d);
920 }
921 
922 /**
923  Reads the TOC (table of contents) of the %document.
924 
925  \returns a new toc object if a TOC is available, NULL otherwise
926  */
create_toc() const927 toc *document::create_toc() const
928 {
929     return toc_private::load_from_outline(d->doc->getOutline());
930 }
931 
932 /**
933  Reads whether the current document has %document-level embedded files
934  (attachments).
935 
936  This is a very fast way to know whether there are embedded files (also known
937  as "attachments") at the %document-level. Note this does not take into account
938  files embedded in other ways (e.g. to annotations).
939 
940  \returns whether the document has embedded files
941  */
has_embedded_files() const942 bool document::has_embedded_files() const
943 {
944     return d->doc->getCatalog()->numEmbeddedFiles() > 0;
945 }
946 
947 /**
948  Reads all the %document-level embedded files of the %document.
949 
950  \returns the %document-level embedded files
951  */
embedded_files() const952 std::vector<embedded_file *> document::embedded_files() const
953 {
954     if (d->is_locked) {
955         return std::vector<embedded_file *>();
956     }
957 
958     if (d->embedded_files.empty() && d->doc->getCatalog()->numEmbeddedFiles() > 0) {
959         const int num = d->doc->getCatalog()->numEmbeddedFiles();
960         d->embedded_files.resize(num);
961         for (int i = 0; i < num; ++i) {
962             FileSpec *fs = d->doc->getCatalog()->embeddedFile(i);
963             d->embedded_files[i] = embedded_file_private::create(fs);
964         }
965     }
966     return d->embedded_files;
967 }
968 
969 /**
970  Creates a map of all the named destinations in the %document.
971 
972  \note The destination names may contain \\0 and other binary values
973  so they are not printable and cannot convert to null-terminated C strings.
974 
975  \returns the map of the each name and destination
976 
977  \since 0.74
978  */
create_destination_map() const979 std::map<std::string, destination> document::create_destination_map() const
980 {
981     std::map<std::string, destination> m;
982 
983     Catalog *catalog = d->doc->getCatalog();
984     if (!catalog)
985         return m;
986 
987     // Iterate from name-dict
988     const int nDests = catalog->numDests();
989     for (int i = 0; i < nDests; ++i) {
990         std::string key(catalog->getDestsName(i));
991         std::unique_ptr<LinkDest> link_dest = catalog->getDestsDest(i);
992 
993         if (link_dest) {
994             destination dest(new destination_private(link_dest.get(), d->doc));
995 
996             m.emplace(std::move(key), std::move(dest));
997         }
998     }
999 
1000     // Iterate from name-tree
1001     const int nDestsNameTree = catalog->numDestNameTree();
1002     for (int i = 0; i < nDestsNameTree; ++i) {
1003         std::string key(catalog->getDestNameTreeName(i)->c_str(), catalog->getDestNameTreeName(i)->getLength());
1004         std::unique_ptr<LinkDest> link_dest = catalog->getDestNameTreeDest(i);
1005 
1006         if (link_dest) {
1007             destination dest(new destination_private(link_dest.get(), d->doc));
1008 
1009             m.emplace(std::move(key), std::move(dest));
1010         }
1011     }
1012 
1013     return m;
1014 }
1015 
1016 /**
1017  Saves the %document to file \p file_name.
1018 
1019  \returns true on success, false on failure
1020  */
save(const std::string & file_name) const1021 bool document::save(const std::string &file_name) const
1022 {
1023     if (d->is_locked) {
1024         return false;
1025     }
1026 
1027     GooString fname(file_name.c_str());
1028     return d->doc->saveAs(&fname) == errNone;
1029 }
1030 
1031 /**
1032  Saves the original version of the %document to file \p file_name.
1033 
1034  \returns true on success, false on failure
1035  */
save_a_copy(const std::string & file_name) const1036 bool document::save_a_copy(const std::string &file_name) const
1037 {
1038     if (d->is_locked) {
1039         return false;
1040     }
1041 
1042     GooString fname(file_name.c_str());
1043     return d->doc->saveWithoutChangesAs(&fname) == errNone;
1044 }
1045 
1046 /**
1047  Tries to load a PDF %document from the specified file.
1048 
1049  \param file_name the file to open
1050  \returns a new document if the load succeeded (even if the document is locked),
1051           NULL otherwise
1052  */
load_from_file(const std::string & file_name,const std::string & owner_password,const std::string & user_password)1053 document *document::load_from_file(const std::string &file_name, const std::string &owner_password, const std::string &user_password)
1054 {
1055     document_private *doc = new document_private(new GooString(file_name.c_str()), owner_password, user_password);
1056     return document_private::check_document(doc, nullptr);
1057 }
1058 
1059 /**
1060  Tries to load a PDF %document from the specified data.
1061 
1062  \note if the loading succeeds, the document takes ownership of the
1063        \p file_data (swap()ing it)
1064 
1065  \param file_data the data representing a document to open
1066  \returns a new document if the load succeeded (even if the document is locked),
1067           NULL otherwise
1068  */
load_from_data(byte_array * file_data,const std::string & owner_password,const std::string & user_password)1069 document *document::load_from_data(byte_array *file_data, const std::string &owner_password, const std::string &user_password)
1070 {
1071     if (!file_data || file_data->size() < 10) {
1072         return nullptr;
1073     }
1074 
1075     document_private *doc = new document_private(file_data, owner_password, user_password);
1076     return document_private::check_document(doc, file_data);
1077 }
1078 
1079 /**
1080  Tries to load a PDF %document from the specified data buffer.
1081 
1082  \note the buffer must remain valid for the whole lifetime of the returned
1083        document
1084 
1085  \param file_data the data buffer representing a document to open
1086  \param file_data_length the length of the data buffer
1087 
1088  \returns a new document if the load succeeded (even if the document is locked),
1089           NULL otherwise
1090 
1091  \since 0.16
1092  */
load_from_raw_data(const char * file_data,int file_data_length,const std::string & owner_password,const std::string & user_password)1093 document *document::load_from_raw_data(const char *file_data, int file_data_length, const std::string &owner_password, const std::string &user_password)
1094 {
1095     if (!file_data || file_data_length < 10) {
1096         return nullptr;
1097     }
1098 
1099     document_private *doc = new document_private(file_data, file_data_length, owner_password, user_password);
1100     return document_private::check_document(doc, nullptr);
1101 }
1102