1 //========================================================================
2 //
3 // PDFDoc.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2021 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013, 2018 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
35 // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
36 // Copyright (C) 2015 André Esser <bepandre@hotmail.com>
37 // Copyright (C) 2016, 2020 Jakub Alba <jakubalba@gmail.com>
38 // Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
39 // Copyright (C) 2017 Fredrik Fornwall <fredrik@fornwall.net>
40 // Copyright (C) 2018 Ben Timby <btimby@gmail.com>
41 // Copyright (C) 2018 Evangelos Foutras <evangelos@foutrelis.com>
42 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
43 // Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
44 // Copyright (C) 2018 Philipp Knechtges <philipp-dev@knechtges.com>
45 // Copyright (C) 2019 Christian Persch <chpe@src.gnome.org>
46 // Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com>
47 // Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de>
48 // Copyright (C) 2020 Adam Sampson <ats@offog.org>
49 // Copyright (C) 2021 Oliver Sander <oliver.sander@tu-dresden.de>
50 // Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
51 // Copyright (C) 2021 RM <rm+git@arcsin.org>
52 // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
53 // Copyright (C) 2021 Marek Kasik <mkasik@redhat.com>
54 //
55 // To see a description of the changes please see the Changelog file that
56 // came with your tarball or type make ChangeLog if you are building from git
57 //
58 //========================================================================
59
60 #include <config.h>
61 #include <poppler-config.h>
62
63 #include <cctype>
64 #include <clocale>
65 #include <cstdio>
66 #include <cerrno>
67 #include <climits>
68 #include <cstdlib>
69 #include <cstddef>
70 #include <cstring>
71 #include <ctime>
72 #include <regex>
73 #include <sys/stat.h>
74 #include "goo/glibc.h"
75 #include "goo/gstrtod.h"
76 #include "goo/GooString.h"
77 #include "goo/gfile.h"
78 #include "poppler-config.h"
79 #include "GlobalParams.h"
80 #include "Page.h"
81 #include "Catalog.h"
82 #include "Stream.h"
83 #include "XRef.h"
84 #include "Linearization.h"
85 #include "Link.h"
86 #include "OutputDev.h"
87 #include "Error.h"
88 #include "ErrorCodes.h"
89 #include "Lexer.h"
90 #include "Parser.h"
91 #include "SecurityHandler.h"
92 #include "Decrypt.h"
93 #include "Outline.h"
94 #include "PDFDoc.h"
95 #include "Hints.h"
96 #include "UTF.h"
97 #include "JSInfo.h"
98 #include "ImageEmbeddingUtils.h"
99
100 //------------------------------------------------------------------------
101
102 #define headerSearchSize \
103 1024 // read this many bytes at beginning of
104 // file to look for '%PDF'
105 #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
106
107 #define linearizationSearchSize \
108 1024 // read this many bytes at beginning of
109 // file to look for linearization
110 // dictionary
111
112 #define xrefSearchSize \
113 1024 // read this many bytes at end of file
114 // to look for 'startxref'
115
116 //------------------------------------------------------------------------
117 // PDFDoc
118 //------------------------------------------------------------------------
119
120 #define pdfdocLocker() std::unique_lock<std::recursive_mutex> locker(mutex)
121
init()122 void PDFDoc::init()
123 {
124 ok = false;
125 errCode = errNone;
126 fileName = nullptr;
127 file = nullptr;
128 str = nullptr;
129 xref = nullptr;
130 linearization = nullptr;
131 catalog = nullptr;
132 hints = nullptr;
133 outline = nullptr;
134 startXRefPos = -1;
135 secHdlr = nullptr;
136 pageCache = nullptr;
137 }
138
PDFDoc()139 PDFDoc::PDFDoc()
140 {
141 init();
142 }
143
PDFDoc(const GooString * fileNameA,const GooString * ownerPassword,const GooString * userPassword,void * guiDataA,const std::function<void ()> & xrefReconstructedCallback)144 PDFDoc::PDFDoc(const GooString *fileNameA, const GooString *ownerPassword, const GooString *userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback)
145 {
146 #ifdef _WIN32
147 int n, i;
148 #endif
149
150 init();
151
152 fileName = fileNameA;
153 guiData = guiDataA;
154 #ifdef _WIN32
155 n = fileName->getLength();
156 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
157 for (i = 0; i < n; ++i) {
158 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
159 }
160 fileNameU[n] = L'\0';
161 #endif
162
163 // try to open file
164 #ifdef _WIN32
165 wchar_t *wFileName = (wchar_t *)utf8ToUtf16(fileName->c_str());
166 file = GooFile::open(wFileName);
167 gfree(wFileName);
168 #else
169 file = GooFile::open(fileName->toStr());
170 #endif
171 if (file == nullptr) {
172 // fopen() has failed.
173 // Keep a copy of the errno returned by fopen so that it can be
174 // referred to later.
175 fopenErrno = errno;
176 error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
177 errCode = errOpenFile;
178 return;
179 }
180
181 // create stream
182 str = new FileStream(file, 0, false, file->size(), Object(objNull));
183
184 ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
185 }
186
187 #ifdef _WIN32
PDFDoc(wchar_t * fileNameA,int fileNameLen,GooString * ownerPassword,GooString * userPassword,void * guiDataA,const std::function<void ()> & xrefReconstructedCallback)188 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword, GooString *userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback)
189 {
190 OSVERSIONINFO version;
191 int i;
192
193 init();
194
195 guiData = guiDataA;
196
197 // save both Unicode and 8-bit copies of the file name
198 GooString *fileNameG = new GooString();
199 fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
200 for (i = 0; i < fileNameLen; ++i) {
201 fileNameG->append((char)fileNameA[i]);
202 fileNameU[i] = fileNameA[i];
203 }
204 fileName = fileNameG;
205 fileNameU[fileNameLen] = L'\0';
206
207 // try to open file
208 // NB: _wfopen is only available in NT
209 version.dwOSVersionInfoSize = sizeof(version);
210 GetVersionEx(&version);
211 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
212 file = GooFile::open(fileNameU);
213 } else {
214 file = GooFile::open(fileName->toStr());
215 }
216 if (!file) {
217 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
218 errCode = errOpenFile;
219 return;
220 }
221
222 // create stream
223 str = new FileStream(file, 0, false, file->size(), Object(objNull));
224
225 ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
226 }
227 #endif
228
PDFDoc(BaseStream * strA,const GooString * ownerPassword,const GooString * userPassword,void * guiDataA,const std::function<void ()> & xrefReconstructedCallback)229 PDFDoc::PDFDoc(BaseStream *strA, const GooString *ownerPassword, const GooString *userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback)
230 {
231 #ifdef _WIN32
232 int n, i;
233 #endif
234
235 init();
236 guiData = guiDataA;
237 if (strA->getFileName()) {
238 fileName = strA->getFileName()->copy();
239 #ifdef _WIN32
240 n = fileName->getLength();
241 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
242 for (i = 0; i < n; ++i) {
243 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
244 }
245 fileNameU[n] = L'\0';
246 #endif
247 } else {
248 fileName = nullptr;
249 #ifdef _WIN32
250 fileNameU = NULL;
251 #endif
252 }
253 str = strA;
254 ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
255 }
256
setup(const GooString * ownerPassword,const GooString * userPassword,const std::function<void ()> & xrefReconstructedCallback)257 bool PDFDoc::setup(const GooString *ownerPassword, const GooString *userPassword, const std::function<void()> &xrefReconstructedCallback)
258 {
259 pdfdocLocker();
260
261 if (str->getLength() <= 0) {
262 error(errSyntaxError, -1, "Document stream is empty");
263 errCode = errDamaged;
264 return false;
265 }
266
267 str->setPos(0, -1);
268 if (str->getPos() < 0) {
269 error(errSyntaxError, -1, "Document base stream is not seekable");
270 errCode = errFileIO;
271 return false;
272 }
273
274 str->reset();
275
276 // check footer
277 // Adobe does not seem to enforce %%EOF, so we do the same
278 // if (!checkFooter()) return false;
279
280 // check header
281 checkHeader();
282
283 bool wasReconstructed = false;
284
285 // read xref table
286 xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed, false, xrefReconstructedCallback);
287 if (!xref->isOk()) {
288 if (wasReconstructed) {
289 delete xref;
290 startXRefPos = -1;
291 xref = new XRef(str, getStartXRef(true), getMainXRefEntriesOffset(true), &wasReconstructed, false, xrefReconstructedCallback);
292 }
293 if (!xref->isOk()) {
294 error(errSyntaxError, -1, "Couldn't read xref table");
295 errCode = xref->getErrorCode();
296 return false;
297 }
298 }
299
300 // check for encryption
301 if (!checkEncryption(ownerPassword, userPassword)) {
302 errCode = errEncrypted;
303 return false;
304 }
305
306 // read catalog
307 catalog = new Catalog(this);
308 if (catalog && !catalog->isOk()) {
309 if (!wasReconstructed) {
310 // try one more time to construct the Catalog, maybe the problem is damaged XRef
311 delete catalog;
312 delete xref;
313 xref = new XRef(str, 0, 0, nullptr, true, xrefReconstructedCallback);
314 catalog = new Catalog(this);
315 }
316
317 if (catalog && !catalog->isOk()) {
318 error(errSyntaxError, -1, "Couldn't read page catalog");
319 errCode = errBadCatalog;
320 return false;
321 }
322 }
323
324 // Extract PDF Subtype information
325 extractPDFSubtype();
326
327 // done
328 return true;
329 }
330
~PDFDoc()331 PDFDoc::~PDFDoc()
332 {
333 if (pageCache) {
334 for (int i = 0; i < getNumPages(); i++) {
335 if (pageCache[i]) {
336 delete pageCache[i];
337 }
338 }
339 gfree(pageCache);
340 }
341 delete secHdlr;
342 if (outline) {
343 delete outline;
344 }
345 if (catalog) {
346 delete catalog;
347 }
348 if (xref) {
349 delete xref;
350 }
351 if (hints) {
352 delete hints;
353 }
354 if (linearization) {
355 delete linearization;
356 }
357 if (str) {
358 delete str;
359 }
360 if (file) {
361 delete file;
362 }
363 if (fileName) {
364 delete fileName;
365 }
366 #ifdef _WIN32
367 if (fileNameU) {
368 gfree(fileNameU);
369 }
370 #endif
371 }
372
373 // Check for a %%EOF at the end of this stream
checkFooter()374 bool PDFDoc::checkFooter()
375 {
376 // we look in the last 1024 chars because Adobe does the same
377 char *eof = new char[1025];
378 Goffset pos = str->getPos();
379 str->setPos(1024, -1);
380 int i, ch;
381 for (i = 0; i < 1024; i++) {
382 ch = str->getChar();
383 if (ch == EOF)
384 break;
385 eof[i] = ch;
386 }
387 eof[i] = '\0';
388
389 bool found = false;
390 for (i = i - 5; i >= 0; i--) {
391 if (strncmp(&eof[i], "%%EOF", 5) == 0) {
392 found = true;
393 break;
394 }
395 }
396 if (!found) {
397 error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
398 errCode = errDamaged;
399 delete[] eof;
400 return false;
401 }
402 delete[] eof;
403 str->setPos(pos);
404 return true;
405 }
406
407 // Check for a PDF header on this stream. Skip past some garbage
408 // if necessary.
checkHeader()409 void PDFDoc::checkHeader()
410 {
411 char hdrBuf[headerSearchSize + 1];
412 char *p;
413 char *tokptr;
414 int i;
415 int bytesRead;
416
417 headerPdfMajorVersion = 0;
418 headerPdfMinorVersion = 0;
419
420 // read up to headerSearchSize bytes from the beginning of the document
421 for (i = 0; i < headerSearchSize; ++i) {
422 const int c = str->getChar();
423 if (c == EOF)
424 break;
425 hdrBuf[i] = c;
426 }
427 bytesRead = i;
428 hdrBuf[bytesRead] = '\0';
429
430 // find the start of the PDF header if it exists and parse the version
431 bool headerFound = false;
432 for (i = 0; i < bytesRead - 5; ++i) {
433 if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
434 headerFound = true;
435 break;
436 }
437 }
438 if (!headerFound) {
439 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
440 return;
441 }
442 str->moveStart(i);
443 if (!(p = strtok_r(&hdrBuf[i + 5], " \t\n\r", &tokptr))) {
444 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
445 return;
446 }
447 sscanf(p, "%d.%d", &headerPdfMajorVersion, &headerPdfMinorVersion);
448 // We don't do the version check. Don't add it back in.
449 }
450
checkEncryption(const GooString * ownerPassword,const GooString * userPassword)451 bool PDFDoc::checkEncryption(const GooString *ownerPassword, const GooString *userPassword)
452 {
453 bool encrypted;
454 bool ret;
455
456 Object encrypt = xref->getTrailerDict()->dictLookup("Encrypt");
457 if ((encrypted = encrypt.isDict())) {
458 if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
459 if (secHdlr->isUnencrypted()) {
460 // no encryption
461 ret = true;
462 } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
463 // authorization succeeded
464 xref->setEncryption(secHdlr->getPermissionFlags(), secHdlr->getOwnerPasswordOk(), secHdlr->getFileKey(), secHdlr->getFileKeyLength(), secHdlr->getEncVersion(), secHdlr->getEncRevision(), secHdlr->getEncAlgorithm());
465 ret = true;
466 } else {
467 // authorization failed
468 ret = false;
469 }
470 } else {
471 // couldn't find the matching security handler
472 ret = false;
473 }
474 } else {
475 // document is not encrypted
476 ret = true;
477 }
478 return ret;
479 }
480
pdfPartFromString(PDFSubtype subtype,GooString * pdfSubtypeVersion)481 static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, GooString *pdfSubtypeVersion)
482 {
483 const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?");
484 std::smatch match;
485 const std::string &pdfsubver = pdfSubtypeVersion->toStr();
486 PDFSubtypePart subtypePart = subtypePartNone;
487
488 if (std::regex_search(pdfsubver, match, regex)) {
489 int date = 0;
490 const int part = std::stoi(match.str(1));
491
492 if (match[2].matched) {
493 date = std::stoi(match.str(2));
494 }
495
496 switch (subtype) {
497 case subtypePDFX:
498 switch (part) {
499 case 1:
500 switch (date) {
501 case 2001:
502 default:
503 subtypePart = subtypePart1;
504 break;
505 case 2003:
506 subtypePart = subtypePart4;
507 break;
508 }
509 break;
510 case 2:
511 subtypePart = subtypePart5;
512 break;
513 case 3:
514 switch (date) {
515 case 2002:
516 default:
517 subtypePart = subtypePart3;
518 break;
519 case 2003:
520 subtypePart = subtypePart6;
521 break;
522 }
523 break;
524 case 4:
525 subtypePart = subtypePart7;
526 break;
527 case 5:
528 subtypePart = subtypePart8;
529 break;
530 }
531 break;
532 default:
533 subtypePart = (PDFSubtypePart)part;
534 break;
535 }
536 }
537
538 return subtypePart;
539 }
540
pdfConformanceFromString(GooString * pdfSubtypeVersion)541 static PDFSubtypeConformance pdfConformanceFromString(GooString *pdfSubtypeVersion)
542 {
543 const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)");
544 std::smatch match;
545 const std::string &pdfsubver = pdfSubtypeVersion->toStr();
546 PDFSubtypeConformance pdfConf = subtypeConfNone;
547
548 // match contains the PDF conformance (A, B, G, N, P, PG or U)
549 if (std::regex_search(pdfsubver, match, regex)) {
550 GooString *conf = new GooString(match.str(1));
551 // Convert to lowercase as the conformance may appear in both cases
552 conf->lowerCase();
553 if (conf->cmp("a") == 0) {
554 pdfConf = subtypeConfA;
555 } else if (conf->cmp("b") == 0) {
556 pdfConf = subtypeConfB;
557 } else if (conf->cmp("g") == 0) {
558 pdfConf = subtypeConfG;
559 } else if (conf->cmp("n") == 0) {
560 pdfConf = subtypeConfN;
561 } else if (conf->cmp("p") == 0) {
562 pdfConf = subtypeConfP;
563 } else if (conf->cmp("pg") == 0) {
564 pdfConf = subtypeConfPG;
565 } else if (conf->cmp("u") == 0) {
566 pdfConf = subtypeConfU;
567 } else {
568 pdfConf = subtypeConfNone;
569 }
570 delete conf;
571 }
572
573 return pdfConf;
574 }
575
extractPDFSubtype()576 void PDFDoc::extractPDFSubtype()
577 {
578 pdfSubtype = subtypeNull;
579 pdfPart = subtypePartNull;
580 pdfConformance = subtypeConfNull;
581
582 GooString *pdfSubtypeVersion = nullptr;
583 // Find PDF InfoDict subtype key if any
584 if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFA1Version"))) {
585 pdfSubtype = subtypePDFA;
586 } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFEVersion"))) {
587 pdfSubtype = subtypePDFE;
588 } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFUAVersion"))) {
589 pdfSubtype = subtypePDFUA;
590 } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFVTVersion"))) {
591 pdfSubtype = subtypePDFVT;
592 } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFXVersion"))) {
593 pdfSubtype = subtypePDFX;
594 } else {
595 pdfSubtype = subtypeNone;
596 pdfPart = subtypePartNone;
597 pdfConformance = subtypeConfNone;
598 return;
599 }
600
601 // Extract part from version string
602 pdfPart = pdfPartFromString(pdfSubtype, pdfSubtypeVersion);
603
604 // Extract conformance from version string
605 pdfConformance = pdfConformanceFromString(pdfSubtypeVersion);
606
607 delete pdfSubtypeVersion;
608 }
609
addSignatureFieldsToVector(FormField * ff,std::vector<FormFieldSignature * > & res)610 static void addSignatureFieldsToVector(FormField *ff, std::vector<FormFieldSignature *> &res)
611 {
612 if (ff->getNumChildren() == 0) {
613 if (ff->getType() == formSignature) {
614 res.push_back(static_cast<FormFieldSignature *>(ff));
615 }
616 } else {
617 for (int i = 0; i < ff->getNumChildren(); ++i) {
618 FormField *children = ff->getChildren(i);
619 addSignatureFieldsToVector(children, res);
620 }
621 }
622 }
623
getSignatureFields()624 std::vector<FormFieldSignature *> PDFDoc::getSignatureFields()
625 {
626 std::vector<FormFieldSignature *> res;
627
628 const Form *f = catalog->getForm();
629 if (!f)
630 return res;
631
632 const int nRootFields = f->getNumFields();
633 for (int i = 0; i < nRootFields; ++i) {
634 FormField *ff = f->getRootField(i);
635 addSignatureFieldsToVector(ff, res);
636 }
637 return res;
638 }
639
getNumSignatureFields()640 int PDFDoc::getNumSignatureFields()
641 {
642 const Form *f = catalog->getForm();
643
644 if (!f)
645 return 0;
646
647 return f->getNumFields();
648 }
649
displayPage(OutputDev * out,int page,double hDPI,double vDPI,int rotate,bool useMediaBox,bool crop,bool printing,bool (* abortCheckCbk)(void * data),void * abortCheckCbkData,bool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,bool copyXRef)650 void PDFDoc::displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData,
651 bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef)
652 {
653 if (globalParams->getPrintCommands()) {
654 printf("***** page %d *****\n", page);
655 }
656
657 if (getPage(page))
658 getPage(page)->display(out, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
659 }
660
displayPages(OutputDev * out,int firstPage,int lastPage,double hDPI,double vDPI,int rotate,bool useMediaBox,bool crop,bool printing,bool (* abortCheckCbk)(void * data),void * abortCheckCbkData,bool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData)661 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData,
662 bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData)
663 {
664 int page;
665
666 for (page = firstPage; page <= lastPage; ++page) {
667 displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData);
668 }
669 }
670
displayPageSlice(OutputDev * out,int page,double hDPI,double vDPI,int rotate,bool useMediaBox,bool crop,bool printing,int sliceX,int sliceY,int sliceW,int sliceH,bool (* abortCheckCbk)(void * data),void * abortCheckCbkData,bool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,bool copyXRef)671 void PDFDoc::displayPageSlice(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, int sliceX, int sliceY, int sliceW, int sliceH, bool (*abortCheckCbk)(void *data),
672 void *abortCheckCbkData, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef)
673 {
674 if (getPage(page))
675 getPage(page)->displaySlice(out, hDPI, vDPI, rotate, useMediaBox, crop, sliceX, sliceY, sliceW, sliceH, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
676 }
677
getLinks(int page)678 std::unique_ptr<Links> PDFDoc::getLinks(int page)
679 {
680 Page *p = getPage(page);
681 if (!p) {
682 return std::make_unique<Links>(nullptr);
683 }
684 return p->getLinks();
685 }
686
processLinks(OutputDev * out,int page)687 void PDFDoc::processLinks(OutputDev *out, int page)
688 {
689 if (getPage(page))
690 getPage(page)->processLinks(out);
691 }
692
getLinearization()693 Linearization *PDFDoc::getLinearization()
694 {
695 if (!linearization) {
696 linearization = new Linearization(str);
697 linearizationState = 0;
698 }
699 return linearization;
700 }
701
checkLinearization()702 bool PDFDoc::checkLinearization()
703 {
704 if (linearization == nullptr)
705 return false;
706 if (linearizationState == 1)
707 return true;
708 if (linearizationState == 2)
709 return false;
710 if (!hints) {
711 hints = new Hints(str, linearization, getXRef(), secHdlr);
712 }
713 if (!hints->isOk()) {
714 linearizationState = 2;
715 return false;
716 }
717 for (int page = 1; page <= linearization->getNumPages(); page++) {
718 Ref pageRef;
719
720 pageRef.num = hints->getPageObjectNum(page);
721 if (!pageRef.num) {
722 linearizationState = 2;
723 return false;
724 }
725
726 // check for bogus ref - this can happen in corrupted PDF files
727 if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
728 linearizationState = 2;
729 return false;
730 }
731
732 pageRef.gen = xref->getEntry(pageRef.num)->gen;
733 Object obj = xref->fetch(pageRef);
734 if (!obj.isDict("Page")) {
735 linearizationState = 2;
736 return false;
737 }
738 }
739 linearizationState = 1;
740 return true;
741 }
742
isLinearized(bool tryingToReconstruct)743 bool PDFDoc::isLinearized(bool tryingToReconstruct)
744 {
745 if ((str->getLength()) && (getLinearization()->getLength() == str->getLength()))
746 return true;
747 else {
748 if (tryingToReconstruct)
749 return getLinearization()->getLength() > 0;
750 else
751 return false;
752 }
753 }
754
setDocInfoStringEntry(const char * key,GooString * value)755 void PDFDoc::setDocInfoStringEntry(const char *key, GooString *value)
756 {
757 bool removeEntry = !value || value->getLength() == 0 || value->hasJustUnicodeMarker();
758 if (removeEntry) {
759 delete value;
760 }
761
762 Object infoObj = getDocInfo();
763 if (infoObj.isNull() && removeEntry) {
764 // No info dictionary, so no entry to remove.
765 return;
766 }
767
768 Ref infoObjRef;
769 infoObj = xref->createDocInfoIfNeeded(&infoObjRef);
770 if (removeEntry) {
771 infoObj.dictSet(key, Object(objNull));
772 } else {
773 infoObj.dictSet(key, Object(value));
774 }
775
776 if (infoObj.dictGetLength() == 0) {
777 // Info dictionary is empty. Remove it altogether.
778 removeDocInfo();
779 } else {
780 xref->setModifiedObject(&infoObj, infoObjRef);
781 }
782 }
783
getDocInfoStringEntry(const char * key)784 GooString *PDFDoc::getDocInfoStringEntry(const char *key)
785 {
786 Object infoObj = getDocInfo();
787 if (!infoObj.isDict()) {
788 return nullptr;
789 }
790
791 Object entryObj = infoObj.dictLookup(key);
792
793 GooString *result;
794
795 if (entryObj.isString()) {
796 result = entryObj.takeString();
797 } else {
798 result = nullptr;
799 }
800
801 return result;
802 }
803
get_id(const GooString * encodedidstring,GooString * id)804 static bool get_id(const GooString *encodedidstring, GooString *id)
805 {
806 const char *encodedid = encodedidstring->c_str();
807 char pdfid[pdfIdLength + 1];
808 int n;
809
810 if (encodedidstring->getLength() != pdfIdLength / 2)
811 return false;
812
813 n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff, encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff,
814 encodedid[7] & 0xff, encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff, encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
815 if (n != pdfIdLength)
816 return false;
817
818 id->Set(pdfid, pdfIdLength);
819 return true;
820 }
821
getID(GooString * permanent_id,GooString * update_id) const822 bool PDFDoc::getID(GooString *permanent_id, GooString *update_id) const
823 {
824 Object obj = xref->getTrailerDict()->dictLookup("ID");
825
826 if (obj.isArray() && obj.arrayGetLength() == 2) {
827 if (permanent_id) {
828 Object obj2 = obj.arrayGet(0);
829 if (obj2.isString()) {
830 if (!get_id(obj2.getString(), permanent_id)) {
831 return false;
832 }
833 } else {
834 error(errSyntaxError, -1, "Invalid permanent ID");
835 return false;
836 }
837 }
838
839 if (update_id) {
840 Object obj2 = obj.arrayGet(1);
841 if (obj2.isString()) {
842 if (!get_id(obj2.getString(), update_id)) {
843 return false;
844 }
845 } else {
846 error(errSyntaxError, -1, "Invalid update ID");
847 return false;
848 }
849 }
850
851 return true;
852 }
853
854 return false;
855 }
856
getHints()857 Hints *PDFDoc::getHints()
858 {
859 if (!hints && isLinearized()) {
860 hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
861 }
862
863 return hints;
864 }
865
savePageAs(const GooString * name,int pageNo)866 int PDFDoc::savePageAs(const GooString *name, int pageNo)
867 {
868 FILE *f;
869 OutStream *outStr;
870 XRef *yRef, *countRef;
871
872 if (file && file->modificationTimeChangedSinceOpen())
873 return errFileChangedSinceOpen;
874
875 int rootNum = getXRef()->getNumObjects() + 1;
876
877 // Make sure that special flags are set, because we are going to read
878 // all objects, including Unencrypted ones.
879 xref->scanSpecialFlags();
880
881 unsigned char *fileKey;
882 CryptAlgorithm encAlgorithm;
883 int keyLength;
884 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
885
886 if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
887 error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages());
888 return errOpenFile;
889 }
890 const PDFRectangle *cropBox = nullptr;
891 if (getCatalog()->getPage(pageNo)->isCropped()) {
892 cropBox = getCatalog()->getPage(pageNo)->getCropBox();
893 }
894 replacePageDict(pageNo, getCatalog()->getPage(pageNo)->getRotate(), getCatalog()->getPage(pageNo)->getMediaBox(), cropBox);
895 Ref *refPage = getCatalog()->getPageRef(pageNo);
896 Object page = getXRef()->fetch(*refPage);
897
898 if (!(f = openFile(name->c_str(), "wb"))) {
899 error(errIO, -1, "Couldn't open file '{0:t}'", name);
900 return errOpenFile;
901 }
902 outStr = new FileOutStream(f, 0);
903
904 yRef = new XRef(getXRef()->getTrailerDict());
905
906 if (secHdlr != nullptr && !secHdlr->isUnencrypted()) {
907 yRef->setEncryption(secHdlr->getPermissionFlags(), secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
908 }
909 countRef = new XRef();
910 Object *trailerObj = getXRef()->getTrailerDict();
911 if (trailerObj->isDict()) {
912 markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
913 }
914 yRef->add(0, 65535, 0, false);
915 writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
916
917 // get and mark info dict
918 Object infoObj = getXRef()->getDocInfo();
919 if (infoObj.isDict()) {
920 Dict *infoDict = infoObj.getDict();
921 markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
922 if (trailerObj->isDict()) {
923 Dict *trailerDict = trailerObj->getDict();
924 const Object &ref = trailerDict->lookupNF("Info");
925 if (ref.isRef()) {
926 yRef->add(ref.getRef(), 0, true);
927 if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
928 yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
929 }
930 }
931 }
932 }
933
934 // get and mark output intents etc.
935 Object catObj = getXRef()->getCatalog();
936 Dict *catDict = catObj.getDict();
937 Object pagesObj = catDict->lookup("Pages");
938 Object afObj = catDict->lookupNF("AcroForm").copy();
939 if (!afObj.isNull()) {
940 markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
941 }
942 Dict *pagesDict = pagesObj.getDict();
943 Object resourcesObj = pagesDict->lookup("Resources");
944 if (resourcesObj.isDict())
945 markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
946 markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
947
948 Dict *pageDict = page.getDict();
949 if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
950 Object *resourceDictObject = getCatalog()->getPage(pageNo)->getResourceDictObject();
951 if (resourceDictObject->isDict()) {
952 resourcesObj = resourceDictObject->copy();
953 markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
954 }
955 }
956 markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
957 Object annotsObj = pageDict->lookupNF("Annots").copy();
958 if (!annotsObj.isNull()) {
959 markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
960 }
961 yRef->markUnencrypted();
962 writePageObjects(outStr, yRef, 0);
963
964 yRef->add(rootNum, 0, outStr->getPos(), true);
965 outStr->printf("%d 0 obj\n", rootNum);
966 outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
967 for (int j = 0; j < catDict->getLength(); j++) {
968 const char *key = catDict->getKey(j);
969 if (strcmp(key, "Type") != 0 && strcmp(key, "Catalog") != 0 && strcmp(key, "Pages") != 0) {
970 if (j > 0)
971 outStr->printf(" ");
972 Object value = catDict->getValNF(j).copy();
973 outStr->printf("/%s ", key);
974 writeObject(&value, outStr, getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
975 }
976 }
977 outStr->printf(">>\nendobj\n");
978
979 yRef->add(rootNum + 1, 0, outStr->getPos(), true);
980 outStr->printf("%d 0 obj\n", rootNum + 1);
981 outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
982 if (resourcesObj.isDict()) {
983 outStr->printf("/Resources ");
984 writeObject(&resourcesObj, outStr, getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
985 }
986 outStr->printf(">>\n");
987 outStr->printf("endobj\n");
988
989 yRef->add(rootNum + 2, 0, outStr->getPos(), true);
990 outStr->printf("%d 0 obj\n", rootNum + 2);
991 outStr->printf("<< ");
992 for (int n = 0; n < pageDict->getLength(); n++) {
993 if (n > 0)
994 outStr->printf(" ");
995 const char *key = pageDict->getKey(n);
996 Object value = pageDict->getValNF(n).copy();
997 if (strcmp(key, "Parent") == 0) {
998 outStr->printf("/Parent %d 0 R", rootNum + 1);
999 } else {
1000 outStr->printf("/%s ", key);
1001 writeObject(&value, outStr, getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
1002 }
1003 }
1004 outStr->printf(" >>\nendobj\n");
1005
1006 Goffset uxrefOffset = outStr->getPos();
1007 Ref ref;
1008 ref.num = rootNum;
1009 ref.gen = 0;
1010 Object trailerDict = createTrailerDict(rootNum + 3, false, 0, &ref, getXRef(), name->c_str(), uxrefOffset);
1011 writeXRefTableTrailer(std::move(trailerDict), yRef, false /* do not write unnecessary entries */, uxrefOffset, outStr, getXRef());
1012
1013 outStr->close();
1014 fclose(f);
1015 delete yRef;
1016 delete countRef;
1017 delete outStr;
1018
1019 return errNone;
1020 }
1021
saveAs(const GooString * name,PDFWriteMode mode)1022 int PDFDoc::saveAs(const GooString *name, PDFWriteMode mode)
1023 {
1024 FILE *f;
1025 OutStream *outStr;
1026 int res;
1027
1028 if (!(f = openFile(name->c_str(), "wb"))) {
1029 error(errIO, -1, "Couldn't open file '{0:t}'", name);
1030 return errOpenFile;
1031 }
1032 outStr = new FileOutStream(f, 0);
1033 res = saveAs(outStr, mode);
1034 delete outStr;
1035 fclose(f);
1036 return res;
1037 }
1038
saveAs(OutStream * outStr,PDFWriteMode mode)1039 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode)
1040 {
1041 if (file && file->modificationTimeChangedSinceOpen())
1042 return errFileChangedSinceOpen;
1043
1044 if (!xref->isModified() && mode == writeStandard) {
1045 // simply copy the original file
1046 saveWithoutChangesAs(outStr);
1047 } else if (mode == writeForceRewrite) {
1048 saveCompleteRewrite(outStr);
1049 } else {
1050 saveIncrementalUpdate(outStr);
1051 }
1052
1053 return errNone;
1054 }
1055
saveWithoutChangesAs(const GooString * name)1056 int PDFDoc::saveWithoutChangesAs(const GooString *name)
1057 {
1058 FILE *f;
1059 OutStream *outStr;
1060 int res;
1061
1062 if (!(f = openFile(name->c_str(), "wb"))) {
1063 error(errIO, -1, "Couldn't open file '{0:t}'", name);
1064 return errOpenFile;
1065 }
1066
1067 outStr = new FileOutStream(f, 0);
1068 res = saveWithoutChangesAs(outStr);
1069 delete outStr;
1070
1071 fclose(f);
1072
1073 return res;
1074 }
1075
saveWithoutChangesAs(OutStream * outStr)1076 int PDFDoc::saveWithoutChangesAs(OutStream *outStr)
1077 {
1078 int c;
1079
1080 if (file && file->modificationTimeChangedSinceOpen())
1081 return errFileChangedSinceOpen;
1082
1083 BaseStream *copyStr = str->copy();
1084 copyStr->reset();
1085 while ((c = copyStr->getChar()) != EOF) {
1086 outStr->put(c);
1087 }
1088 copyStr->close();
1089 delete copyStr;
1090
1091 return errNone;
1092 }
1093
saveIncrementalUpdate(OutStream * outStr)1094 void PDFDoc::saveIncrementalUpdate(OutStream *outStr)
1095 {
1096 XRef *uxref;
1097 int c;
1098 // copy the original file
1099 BaseStream *copyStr = str->copy();
1100 copyStr->reset();
1101 while ((c = copyStr->getChar()) != EOF) {
1102 outStr->put(c);
1103 }
1104 copyStr->close();
1105 delete copyStr;
1106
1107 unsigned char *fileKey;
1108 CryptAlgorithm encAlgorithm;
1109 int keyLength;
1110 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1111
1112 uxref = new XRef();
1113 uxref->add(0, 65535, 0, false);
1114 xref->lock();
1115 for (int i = 0; i < xref->getNumObjects(); i++) {
1116 if ((xref->getEntry(i)->type == xrefEntryFree) && (xref->getEntry(i)->gen == 0)) // we skip the irrelevant free objects
1117 continue;
1118
1119 if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { // we have an updated object
1120 Ref ref;
1121 ref.num = i;
1122 ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
1123 if (xref->getEntry(i)->type != xrefEntryFree) {
1124 Object obj1 = xref->fetch(ref, 1 /* recursion */);
1125 Goffset offset = writeObjectHeader(&ref, outStr);
1126 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
1127 writeObjectFooter(outStr);
1128 uxref->add(ref, offset, true);
1129 } else {
1130 uxref->add(ref, 0, false);
1131 }
1132 }
1133 }
1134 xref->unlock();
1135 // because of "uxref->add(0, 65535, 0, false);" uxref->getNumObjects() will
1136 // always be >= 1; if it is 1, it means there is nothing to update
1137 if (uxref->getNumObjects() == 1) {
1138 delete uxref;
1139 return;
1140 }
1141
1142 Goffset uxrefOffset = outStr->getPos();
1143 int numobjects = xref->getNumObjects();
1144 const char *fileNameA = fileName ? fileName->c_str() : nullptr;
1145 Ref rootRef, uxrefStreamRef;
1146 rootRef.num = getXRef()->getRootNum();
1147 rootRef.gen = getXRef()->getRootGen();
1148
1149 // Output a xref stream if there is a xref stream already
1150 bool xRefStream = xref->isXRefStream();
1151
1152 if (xRefStream) {
1153 // Append an entry for the xref stream itself
1154 uxrefStreamRef.num = numobjects++;
1155 uxrefStreamRef.gen = 0;
1156 uxref->add(uxrefStreamRef, uxrefOffset, true);
1157 }
1158
1159 Object trailerDict = createTrailerDict(numobjects, true, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
1160 if (xRefStream) {
1161 writeXRefStreamTrailer(std::move(trailerDict), uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
1162 } else {
1163 writeXRefTableTrailer(std::move(trailerDict), uxref, false, uxrefOffset, outStr, getXRef());
1164 }
1165
1166 delete uxref;
1167 }
1168
saveCompleteRewrite(OutStream * outStr)1169 void PDFDoc::saveCompleteRewrite(OutStream *outStr)
1170 {
1171 // Make sure that special flags are set, because we are going to read
1172 // all objects, including Unencrypted ones.
1173 xref->scanSpecialFlags();
1174
1175 unsigned char *fileKey;
1176 CryptAlgorithm encAlgorithm;
1177 int keyLength;
1178 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1179
1180 writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
1181 XRef *uxref = new XRef();
1182 uxref->add(0, 65535, 0, false);
1183 xref->lock();
1184 for (int i = 0; i < xref->getNumObjects(); i++) {
1185 Ref ref;
1186 XRefEntryType type = xref->getEntry(i)->type;
1187 if (type == xrefEntryFree) {
1188 ref.num = i;
1189 ref.gen = xref->getEntry(i)->gen;
1190 /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
1191 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
1192 if (ref.gen > 0 && ref.num > 0)
1193 uxref->add(ref, 0, false);
1194 } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
1195 // This entry must not be written, put a free entry instead (with incremented gen)
1196 ref.num = i;
1197 ref.gen = xref->getEntry(i)->gen + 1;
1198 uxref->add(ref, 0, false);
1199 } else if (type == xrefEntryUncompressed) {
1200 ref.num = i;
1201 ref.gen = xref->getEntry(i)->gen;
1202 Object obj1 = xref->fetch(ref, 1 /* recursion */);
1203 Goffset offset = writeObjectHeader(&ref, outStr);
1204 // Write unencrypted objects in unencrypted form
1205 if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
1206 writeObject(&obj1, outStr, nullptr, cryptRC4, 0, 0, 0);
1207 } else {
1208 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
1209 }
1210 writeObjectFooter(outStr);
1211 uxref->add(ref, offset, true);
1212 } else if (type == xrefEntryCompressed) {
1213 ref.num = i;
1214 ref.gen = 0; // compressed entries have gen == 0
1215 Object obj1 = xref->fetch(ref, 1 /* recursion */);
1216 Goffset offset = writeObjectHeader(&ref, outStr);
1217 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
1218 writeObjectFooter(outStr);
1219 uxref->add(ref, offset, true);
1220 }
1221 }
1222 xref->unlock();
1223 Goffset uxrefOffset = outStr->getPos();
1224 writeXRefTableTrailer(uxrefOffset, uxref, true /* write all entries */, uxref->getNumObjects(), outStr, false /* complete rewrite */);
1225 delete uxref;
1226 }
1227
writeDictionnary(Dict * dict,OutStream * outStr,XRef * xRef,unsigned int numOffset,unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,Ref ref,std::set<Dict * > * alreadyWrittenDicts)1228 void PDFDoc::writeDictionnary(Dict *dict, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts)
1229 {
1230 bool deleteSet = false;
1231 if (!alreadyWrittenDicts) {
1232 alreadyWrittenDicts = new std::set<Dict *>;
1233 deleteSet = true;
1234 }
1235
1236 if (alreadyWrittenDicts->find(dict) != alreadyWrittenDicts->end()) {
1237 error(errSyntaxWarning, -1, "PDFDoc::writeDictionnary: Found recursive dicts");
1238 if (deleteSet)
1239 delete alreadyWrittenDicts;
1240 return;
1241 } else {
1242 alreadyWrittenDicts->insert(dict);
1243 }
1244
1245 outStr->printf("<<");
1246 for (int i = 0; i < dict->getLength(); i++) {
1247 GooString keyName(dict->getKey(i));
1248 GooString *keyNameToPrint = keyName.sanitizedName(false /* non ps mode */);
1249 outStr->printf("/%s ", keyNameToPrint->c_str());
1250 delete keyNameToPrint;
1251 Object obj1 = dict->getValNF(i).copy();
1252 writeObject(&obj1, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1253 }
1254 outStr->printf(">> ");
1255
1256 if (deleteSet) {
1257 delete alreadyWrittenDicts;
1258 }
1259 }
1260
writeStream(Stream * str,OutStream * outStr)1261 void PDFDoc::writeStream(Stream *str, OutStream *outStr)
1262 {
1263 outStr->printf("stream\r\n");
1264 str->reset();
1265 for (int c = str->getChar(); c != EOF; c = str->getChar()) {
1266 outStr->printf("%c", c);
1267 }
1268 outStr->printf("\r\nendstream\r\n");
1269 }
1270
writeRawStream(Stream * str,OutStream * outStr)1271 void PDFDoc::writeRawStream(Stream *str, OutStream *outStr)
1272 {
1273 Object obj1 = str->getDict()->lookup("Length");
1274 if (!obj1.isInt() && !obj1.isInt64()) {
1275 error(errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1276 return;
1277 }
1278
1279 Goffset length;
1280 if (obj1.isInt())
1281 length = obj1.getInt();
1282 else
1283 length = obj1.getInt64();
1284
1285 outStr->printf("stream\r\n");
1286 str->unfilteredReset();
1287 for (Goffset i = 0; i < length; i++) {
1288 int c = str->getUnfilteredChar();
1289 if (unlikely(c == EOF)) {
1290 error(errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1291 break;
1292 }
1293 outStr->printf("%c", c);
1294 }
1295 str->reset();
1296 outStr->printf("\r\nendstream\r\n");
1297 }
1298
writeString(const GooString * s,OutStream * outStr,const unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,Ref ref)1299 void PDFDoc::writeString(const GooString *s, OutStream *outStr, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref)
1300 {
1301 // Encrypt string if encryption is enabled
1302 GooString *sEnc = nullptr;
1303 if (fileKey) {
1304 EncryptStream *enc = new EncryptStream(new MemStream(s->c_str(), 0, s->getLength(), Object(objNull)), fileKey, encAlgorithm, keyLength, ref);
1305 sEnc = new GooString();
1306 int c;
1307 enc->reset();
1308 while ((c = enc->getChar()) != EOF) {
1309 sEnc->append((char)c);
1310 }
1311
1312 delete enc;
1313 s = sEnc;
1314 }
1315
1316 // Write data
1317 if (s->hasUnicodeMarker()) {
1318 // unicode string don't necessary end with \0
1319 const char *c = s->c_str();
1320 outStr->printf("(");
1321 for (int i = 0; i < s->getLength(); i++) {
1322 char unescaped = *(c + i) & 0x000000ff;
1323 // escape if needed
1324 if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1325 outStr->printf("%c", '\\');
1326 outStr->printf("%c", unescaped);
1327 }
1328 outStr->printf(") ");
1329 } else {
1330 const char *c = s->c_str();
1331 outStr->printf("(");
1332 for (int i = 0; i < s->getLength(); i++) {
1333 char unescaped = *(c + i) & 0x000000ff;
1334 // escape if needed
1335 if (unescaped == '\r')
1336 outStr->printf("\\r");
1337 else if (unescaped == '\n')
1338 outStr->printf("\\n");
1339 else {
1340 if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1341 outStr->printf("%c", '\\');
1342 }
1343 outStr->printf("%c", unescaped);
1344 }
1345 }
1346 outStr->printf(") ");
1347 }
1348
1349 delete sEnc;
1350 }
1351
writeObjectHeader(Ref * ref,OutStream * outStr)1352 Goffset PDFDoc::writeObjectHeader(Ref *ref, OutStream *outStr)
1353 {
1354 Goffset offset = outStr->getPos();
1355 outStr->printf("%i %i obj\r\n", ref->num, ref->gen);
1356 return offset;
1357 }
1358
writeObject(Object * obj,OutStream * outStr,XRef * xRef,unsigned int numOffset,unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,std::set<Dict * > * alreadyWrittenDicts)1359 void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict *> *alreadyWrittenDicts)
1360 {
1361 writeObject(obj, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, { objNum, objGen }, alreadyWrittenDicts);
1362 }
1363
writeObject(Object * obj,OutStream * outStr,XRef * xRef,unsigned int numOffset,unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,Ref ref,std::set<Dict * > * alreadyWrittenDicts)1364 void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts)
1365 {
1366 Array *array;
1367
1368 switch (obj->getType()) {
1369 case objBool:
1370 outStr->printf("%s ", obj->getBool() ? "true" : "false");
1371 break;
1372 case objInt:
1373 outStr->printf("%i ", obj->getInt());
1374 break;
1375 case objInt64:
1376 outStr->printf("%lli ", obj->getInt64());
1377 break;
1378 case objReal: {
1379 GooString s;
1380 s.appendf("{0:.10g}", obj->getReal());
1381 outStr->printf("%s ", s.c_str());
1382 break;
1383 }
1384 case objString:
1385 writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, ref);
1386 break;
1387 case objHexString: {
1388 const GooString *s = obj->getHexString();
1389 outStr->printf("<");
1390 for (int i = 0; i < s->getLength(); i++) {
1391 outStr->printf("%02x", s->getChar(i) & 0xff);
1392 }
1393 outStr->printf("> ");
1394 break;
1395 }
1396 case objName: {
1397 GooString name(obj->getName());
1398 GooString *nameToPrint = name.sanitizedName(false /* non ps mode */);
1399 outStr->printf("/%s ", nameToPrint->c_str());
1400 delete nameToPrint;
1401 break;
1402 }
1403 case objNull:
1404 outStr->printf("null ");
1405 break;
1406 case objArray:
1407 array = obj->getArray();
1408 outStr->printf("[");
1409 for (int i = 0; i < array->getLength(); i++) {
1410 Object obj1 = array->getNF(i).copy();
1411 writeObject(&obj1, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref);
1412 }
1413 outStr->printf("] ");
1414 break;
1415 case objDict:
1416 writeDictionnary(obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1417 break;
1418 case objStream: {
1419 // We can't modify stream with the current implementation (no write functions in Stream API)
1420 // => the only type of streams which that have been modified are internal streams (=strWeird)
1421 Stream *stream = obj->getStream();
1422 if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1423 // we write the stream unencoded => TODO: write stream encoder
1424
1425 // Encrypt stream
1426 EncryptStream *encStream = nullptr;
1427 bool removeFilter = true;
1428 if (stream->getKind() == strWeird && fileKey) {
1429 Object filter = stream->getDict()->lookup("Filter");
1430 if (!filter.isName("Crypt")) {
1431 if (filter.isArray()) {
1432 for (int i = 0; i < filter.arrayGetLength(); i++) {
1433 Object filterEle = filter.arrayGet(i);
1434 if (filterEle.isName("Crypt")) {
1435 removeFilter = false;
1436 break;
1437 }
1438 }
1439 if (removeFilter) {
1440 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1441 encStream->setAutoDelete(false);
1442 stream = encStream;
1443 }
1444 } else {
1445 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1446 encStream->setAutoDelete(false);
1447 stream = encStream;
1448 }
1449 } else {
1450 removeFilter = false;
1451 }
1452 } else if (fileKey != nullptr) { // Encrypt stream
1453 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1454 encStream->setAutoDelete(false);
1455 stream = encStream;
1456 }
1457
1458 stream->reset();
1459 // recalculate stream length
1460 Goffset tmp = 0;
1461 for (int c = stream->getChar(); c != EOF; c = stream->getChar()) {
1462 tmp++;
1463 }
1464 stream->getDict()->set("Length", Object(tmp));
1465
1466 // Remove Stream encoding
1467 AutoFreeMemStream *internalStream = dynamic_cast<AutoFreeMemStream *>(stream);
1468 if (internalStream && internalStream->isFilterRemovalForbidden()) {
1469 removeFilter = false;
1470 }
1471 if (removeFilter) {
1472 stream->getDict()->remove("Filter");
1473 }
1474 stream->getDict()->remove("DecodeParms");
1475
1476 writeDictionnary(stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1477 writeStream(stream, outStr);
1478 delete encStream;
1479 } else if (fileKey != nullptr && stream->getKind() == strFile && static_cast<FileStream *>(stream)->getNeedsEncryptionOnSave()) {
1480 EncryptStream *encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1481 encStream->setAutoDelete(false);
1482 writeDictionnary(encStream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1483 writeStream(encStream, outStr);
1484 delete encStream;
1485 } else {
1486 // raw stream copy
1487 FilterStream *fs = dynamic_cast<FilterStream *>(stream);
1488 if (fs) {
1489 BaseStream *bs = fs->getBaseStream();
1490 if (bs) {
1491 Goffset streamEnd;
1492 if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1493 Goffset val = streamEnd - bs->getStart();
1494 stream->getDict()->set("Length", Object(val));
1495 }
1496 }
1497 }
1498 writeDictionnary(stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1499 writeRawStream(stream, outStr);
1500 }
1501 break;
1502 }
1503 case objRef:
1504 outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1505 break;
1506 case objCmd:
1507 outStr->printf("%s\n", obj->getCmd());
1508 break;
1509 case objError:
1510 outStr->printf("error\r\n");
1511 break;
1512 case objEOF:
1513 outStr->printf("eof\r\n");
1514 break;
1515 case objNone:
1516 outStr->printf("none\r\n");
1517 break;
1518 default:
1519 error(errUnimplemented, -1, "Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1520 break;
1521 }
1522 }
1523
writeObjectFooter(OutStream * outStr)1524 void PDFDoc::writeObjectFooter(OutStream *outStr)
1525 {
1526 outStr->printf("\r\nendobj\r\n");
1527 }
1528
createTrailerDict(int uxrefSize,bool incrUpdate,Goffset startxRef,Ref * root,XRef * xRef,const char * fileName,Goffset fileSize)1529 Object PDFDoc::createTrailerDict(int uxrefSize, bool incrUpdate, Goffset startxRef, Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1530 {
1531 Dict *trailerDict = new Dict(xRef);
1532 trailerDict->set("Size", Object(uxrefSize));
1533
1534 // build a new ID, as recommended in the reference, uses:
1535 // - current time
1536 // - file name
1537 // - file size
1538 // - values of entry in information dictionnary
1539 GooString message;
1540 char buffer[256];
1541 sprintf(buffer, "%i", (int)time(nullptr));
1542 message.append(buffer);
1543
1544 if (fileName)
1545 message.append(fileName);
1546
1547 sprintf(buffer, "%lli", (long long)fileSize);
1548 message.append(buffer);
1549
1550 // info dict -- only use text string
1551 if (!xRef->getTrailerDict()->isNone()) {
1552 Object docInfo = xRef->getDocInfo();
1553 if (docInfo.isDict()) {
1554 for (int i = 0; i < docInfo.getDict()->getLength(); i++) {
1555 Object obj2 = docInfo.getDict()->getVal(i);
1556 if (obj2.isString()) {
1557 message.append(obj2.getString());
1558 }
1559 }
1560 }
1561 }
1562
1563 bool hasEncrypt = false;
1564 if (!xRef->getTrailerDict()->isNone()) {
1565 Object obj2 = xRef->getTrailerDict()->dictLookupNF("Encrypt").copy();
1566 if (!obj2.isNull()) {
1567 trailerDict->set("Encrypt", std::move(obj2));
1568 hasEncrypt = true;
1569 }
1570 }
1571
1572 // calculate md5 digest
1573 unsigned char digest[16];
1574 md5((unsigned char *)message.c_str(), message.getLength(), digest);
1575
1576 // create ID array
1577 // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1578 if (incrUpdate || hasEncrypt) {
1579 // only update the second part of the array
1580 Object obj4 = xRef->getTrailerDict()->getDict()->lookup("ID");
1581 if (!obj4.isArray()) {
1582 error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1583 } else {
1584 Array *array = new Array(xRef);
1585 // Get the first part of the ID
1586 array->add(obj4.arrayGet(0));
1587 array->add(Object(new GooString((const char *)digest, 16)));
1588 trailerDict->set("ID", Object(array));
1589 }
1590 } else {
1591 // new file => same values for the two identifiers
1592 Array *array = new Array(xRef);
1593 array->add(Object(new GooString((const char *)digest, 16)));
1594 array->add(Object(new GooString((const char *)digest, 16)));
1595 trailerDict->set("ID", Object(array));
1596 }
1597
1598 trailerDict->set("Root", Object(*root));
1599
1600 if (incrUpdate) {
1601 trailerDict->set("Prev", Object(startxRef));
1602 }
1603
1604 if (!xRef->getTrailerDict()->isNone()) {
1605 Object obj5 = xRef->getDocInfoNF();
1606 if (!obj5.isNull()) {
1607 trailerDict->set("Info", std::move(obj5));
1608 }
1609 }
1610
1611 return Object(trailerDict);
1612 }
1613
writeXRefTableTrailer(Object && trailerDict,XRef * uxref,bool writeAllEntries,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1614 void PDFDoc::writeXRefTableTrailer(Object &&trailerDict, XRef *uxref, bool writeAllEntries, Goffset uxrefOffset, OutStream *outStr, XRef *xRef)
1615 {
1616 uxref->writeTableToFile(outStr, writeAllEntries);
1617 outStr->printf("trailer\r\n");
1618 writeDictionnary(trailerDict.getDict(), outStr, xRef, 0, nullptr, cryptRC4, 0, { 0, 0 }, nullptr);
1619 outStr->printf("\r\nstartxref\r\n");
1620 outStr->printf("%lli\r\n", uxrefOffset);
1621 outStr->printf("%%%%EOF\r\n");
1622 }
1623
writeXRefStreamTrailer(Object && trailerDict,XRef * uxref,Ref * uxrefStreamRef,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1624 void PDFDoc::writeXRefStreamTrailer(Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream *outStr, XRef *xRef)
1625 {
1626 GooString stmData;
1627
1628 // Fill stmData and some trailerDict fields
1629 uxref->writeStreamToBuffer(&stmData, trailerDict.getDict(), xRef);
1630
1631 // Create XRef stream object and write it
1632 MemStream *mStream = new MemStream(stmData.c_str(), 0, stmData.getLength(), std::move(trailerDict));
1633 writeObjectHeader(uxrefStreamRef, outStr);
1634 Object obj1(static_cast<Stream *>(mStream));
1635 writeObject(&obj1, outStr, xRef, 0, nullptr, cryptRC4, 0, 0, 0);
1636 writeObjectFooter(outStr);
1637
1638 outStr->printf("startxref\r\n");
1639 outStr->printf("%lli\r\n", uxrefOffset);
1640 outStr->printf("%%%%EOF\r\n");
1641 }
1642
writeXRefTableTrailer(Goffset uxrefOffset,XRef * uxref,bool writeAllEntries,int uxrefSize,OutStream * outStr,bool incrUpdate)1643 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, bool writeAllEntries, int uxrefSize, OutStream *outStr, bool incrUpdate)
1644 {
1645 const char *fileNameA = fileName ? fileName->c_str() : nullptr;
1646 // file size (doesn't include the trailer)
1647 unsigned int fileSize = 0;
1648 int c;
1649 str->reset();
1650 while ((c = str->getChar()) != EOF) {
1651 fileSize++;
1652 }
1653 str->close();
1654 Ref ref;
1655 ref.num = getXRef()->getRootNum();
1656 ref.gen = getXRef()->getRootGen();
1657 Object trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize);
1658 writeXRefTableTrailer(std::move(trailerDict), uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1659 }
1660
writeHeader(OutStream * outStr,int major,int minor)1661 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1662 {
1663 outStr->printf("%%PDF-%d.%d\n", major, minor);
1664 outStr->printf("%%%c%c%c%c\n", 0xE2, 0xE3, 0xCF, 0xD3);
1665 }
1666
markDictionnary(Dict * dict,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum,std::set<Dict * > * alreadyMarkedDicts)1667 void PDFDoc::markDictionnary(Dict *dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
1668 {
1669 bool deleteSet = false;
1670 if (!alreadyMarkedDicts) {
1671 alreadyMarkedDicts = new std::set<Dict *>;
1672 deleteSet = true;
1673 }
1674
1675 if (alreadyMarkedDicts->find(dict) != alreadyMarkedDicts->end()) {
1676 error(errSyntaxWarning, -1, "PDFDoc::markDictionnary: Found recursive dicts");
1677 if (deleteSet)
1678 delete alreadyMarkedDicts;
1679 return;
1680 } else {
1681 alreadyMarkedDicts->insert(dict);
1682 }
1683
1684 for (int i = 0; i < dict->getLength(); i++) {
1685 const char *key = dict->getKey(i);
1686 if (strcmp(key, "Annots") != 0) {
1687 Object obj1 = dict->getValNF(i).copy();
1688 markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1689 } else {
1690 Object annotsObj = dict->getValNF(i).copy();
1691 if (!annotsObj.isNull()) {
1692 markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum, alreadyMarkedDicts);
1693 }
1694 }
1695 }
1696
1697 if (deleteSet) {
1698 delete alreadyMarkedDicts;
1699 }
1700 }
1701
markObject(Object * obj,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum,std::set<Dict * > * alreadyMarkedDicts)1702 void PDFDoc::markObject(Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
1703 {
1704 Array *array;
1705
1706 switch (obj->getType()) {
1707 case objArray:
1708 array = obj->getArray();
1709 for (int i = 0; i < array->getLength(); i++) {
1710 Object obj1 = array->getNF(i).copy();
1711 markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1712 }
1713 break;
1714 case objDict:
1715 markDictionnary(obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1716 break;
1717 case objStream: {
1718 Stream *stream = obj->getStream();
1719 markDictionnary(stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1720 } break;
1721 case objRef: {
1722 if (obj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1723 if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1724 return; // already marked as free => should be replaced
1725 }
1726 xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, true);
1727 if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1728 xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1729 }
1730 }
1731 if (obj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1732 countRef->add(obj->getRef().num + numOffset, 1, 0, true);
1733 } else {
1734 XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1735 entry->gen++;
1736 if (entry->gen > 9)
1737 break;
1738 }
1739 Object obj1 = getXRef()->fetch(obj->getRef());
1740 markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1741 } break;
1742 default:
1743 break;
1744 }
1745 }
1746
replacePageDict(int pageNo,int rotate,const PDFRectangle * mediaBox,const PDFRectangle * cropBox)1747 void PDFDoc::replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox)
1748 {
1749 Ref *refPage = getCatalog()->getPageRef(pageNo);
1750 Object page = getXRef()->fetch(*refPage);
1751 Dict *pageDict = page.getDict();
1752 pageDict->remove("MediaBoxssdf");
1753 pageDict->remove("MediaBox");
1754 pageDict->remove("CropBox");
1755 pageDict->remove("ArtBox");
1756 pageDict->remove("BleedBox");
1757 pageDict->remove("TrimBox");
1758 pageDict->remove("Rotate");
1759 Array *mediaBoxArray = new Array(getXRef());
1760 mediaBoxArray->add(Object(mediaBox->x1));
1761 mediaBoxArray->add(Object(mediaBox->y1));
1762 mediaBoxArray->add(Object(mediaBox->x2));
1763 mediaBoxArray->add(Object(mediaBox->y2));
1764 Object mediaBoxObject(mediaBoxArray);
1765 Object trimBoxObject = mediaBoxObject.copy();
1766 pageDict->add("MediaBox", std::move(mediaBoxObject));
1767 if (cropBox != nullptr) {
1768 Array *cropBoxArray = new Array(getXRef());
1769 cropBoxArray->add(Object(cropBox->x1));
1770 cropBoxArray->add(Object(cropBox->y1));
1771 cropBoxArray->add(Object(cropBox->x2));
1772 cropBoxArray->add(Object(cropBox->y2));
1773 Object cropBoxObject(cropBoxArray);
1774 trimBoxObject = cropBoxObject.copy();
1775 pageDict->add("CropBox", std::move(cropBoxObject));
1776 }
1777 pageDict->add("TrimBox", std::move(trimBoxObject));
1778 pageDict->add("Rotate", Object(rotate));
1779 getXRef()->setModifiedObject(&page, *refPage);
1780 }
1781
markPageObjects(Dict * pageDict,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum,std::set<Dict * > * alreadyMarkedDicts)1782 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
1783 {
1784 pageDict->remove("OpenAction");
1785 pageDict->remove("Outlines");
1786 pageDict->remove("StructTreeRoot");
1787
1788 for (int n = 0; n < pageDict->getLength(); n++) {
1789 const char *key = pageDict->getKey(n);
1790 Object value = pageDict->getValNF(n).copy();
1791 if (strcmp(key, "Parent") != 0 && strcmp(key, "Pages") != 0 && strcmp(key, "AcroForm") != 0 && strcmp(key, "Annots") != 0 && strcmp(key, "P") != 0 && strcmp(key, "Root") != 0) {
1792 markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1793 }
1794 }
1795 }
1796
markAnnotations(Object * annotsObj,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldPageNum,int newPageNum,std::set<Dict * > * alreadyMarkedDicts)1797 bool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set<Dict *> *alreadyMarkedDicts)
1798 {
1799 bool modified = false;
1800 Object annots = annotsObj->fetch(getXRef());
1801 if (annots.isArray()) {
1802 Array *array = annots.getArray();
1803 for (int i = array->getLength() - 1; i >= 0; i--) {
1804 Object obj1 = array->get(i);
1805 if (obj1.isDict()) {
1806 Dict *dict = obj1.getDict();
1807 Object type = dict->lookup("Type");
1808 if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1809 const Object &obj2 = dict->lookupNF("P");
1810 if (obj2.isRef()) {
1811 if (obj2.getRef().num == oldPageNum) {
1812 const Object &obj3 = array->getNF(i);
1813 if (obj3.isRef()) {
1814 Ref r;
1815 r.num = newPageNum;
1816 r.gen = 0;
1817 dict->set("P", Object(r));
1818 getXRef()->setModifiedObject(&obj1, obj3.getRef());
1819 }
1820 } else if (obj2.getRef().num == newPageNum) {
1821 continue;
1822 } else {
1823 Object page = getXRef()->fetch(obj2.getRef());
1824 if (page.isDict()) {
1825 Dict *pageDict = page.getDict();
1826 Object pagetype = pageDict->lookup("Type");
1827 if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1828 continue;
1829 }
1830 }
1831 array->remove(i);
1832 modified = true;
1833 continue;
1834 }
1835 }
1836 }
1837 markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum, alreadyMarkedDicts);
1838 }
1839 obj1 = array->getNF(i).copy();
1840 if (obj1.isRef()) {
1841 if (obj1.getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1842 if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1843 continue; // already marked as free => should be replaced
1844 }
1845 xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, true);
1846 if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1847 xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1848 }
1849 }
1850 if (obj1.getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1851 countRef->add(obj1.getRef().num + numOffset, 1, 0, true);
1852 } else {
1853 XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1854 entry->gen++;
1855 }
1856 }
1857 }
1858 }
1859 if (annotsObj->isRef()) {
1860 if (annotsObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1861 if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1862 return modified; // already marked as free => should be replaced
1863 }
1864 xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, true);
1865 if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1866 xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1867 }
1868 }
1869 if (annotsObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1870 countRef->add(annotsObj->getRef().num + numOffset, 1, 0, true);
1871 } else {
1872 XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1873 entry->gen++;
1874 }
1875 getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1876 }
1877 return modified;
1878 }
1879
markAcroForm(Object * afObj,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum)1880 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum)
1881 {
1882 bool modified = false;
1883 Object acroform = afObj->fetch(getXRef());
1884 if (acroform.isDict()) {
1885 Dict *dict = acroform.getDict();
1886 for (int i = 0; i < dict->getLength(); i++) {
1887 if (strcmp(dict->getKey(i), "Fields") == 0) {
1888 Object fields = dict->getValNF(i).copy();
1889 modified = markAnnotations(&fields, xRef, countRef, numOffset, oldRefNum, newRefNum);
1890 } else {
1891 Object obj = dict->getValNF(i).copy();
1892 markObject(&obj, xRef, countRef, numOffset, oldRefNum, newRefNum);
1893 }
1894 }
1895 }
1896 if (afObj->isRef()) {
1897 if (afObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1898 if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1899 return; // already marked as free => should be replaced
1900 }
1901 xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, true);
1902 if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1903 xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1904 }
1905 }
1906 if (afObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1907 countRef->add(afObj->getRef().num + numOffset, 1, 0, true);
1908 } else {
1909 XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1910 entry->gen++;
1911 }
1912 if (modified) {
1913 getXRef()->setModifiedObject(&acroform, afObj->getRef());
1914 }
1915 }
1916 return;
1917 }
1918
writePageObjects(OutStream * outStr,XRef * xRef,unsigned int numOffset,bool combine)1919 unsigned int PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine)
1920 {
1921 unsigned int objectsCount = 0; // count the number of objects in the XRef(s)
1922 unsigned char *fileKey;
1923 CryptAlgorithm encAlgorithm;
1924 int keyLength;
1925 xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1926
1927 for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1928 if (xRef->getEntry(n)->type != xrefEntryFree) {
1929 Ref ref;
1930 ref.num = n;
1931 ref.gen = xRef->getEntry(n)->gen;
1932 objectsCount++;
1933 Object obj = getXRef()->fetch(ref.num - numOffset, ref.gen);
1934 Goffset offset = writeObjectHeader(&ref, outStr);
1935 if (combine) {
1936 writeObject(&obj, outStr, getXRef(), numOffset, nullptr, cryptRC4, 0, 0, 0);
1937 } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1938 writeObject(&obj, outStr, nullptr, cryptRC4, 0, 0, 0);
1939 } else {
1940 writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref);
1941 }
1942 writeObjectFooter(outStr);
1943 xRef->add(ref, offset, true);
1944 }
1945 }
1946 return objectsCount;
1947 }
1948
getOutline()1949 Outline *PDFDoc::getOutline()
1950 {
1951 if (!outline) {
1952 pdfdocLocker();
1953 // read outline
1954 outline = new Outline(catalog->getOutline(), xref, this);
1955 }
1956
1957 return outline;
1958 }
1959
ErrorPDFDoc(int errorCode,const GooString * fileNameA)1960 std::unique_ptr<PDFDoc> PDFDoc::ErrorPDFDoc(int errorCode, const GooString *fileNameA)
1961 {
1962 // We cannot call std::make_unique here because the PDFDoc constructor is private
1963 PDFDoc *doc = new PDFDoc();
1964 doc->errCode = errorCode;
1965 doc->fileName = fileNameA;
1966
1967 return std::unique_ptr<PDFDoc>(doc);
1968 }
1969
strToLongLong(const char * s)1970 long long PDFDoc::strToLongLong(const char *s)
1971 {
1972 long long x, d;
1973 const char *p;
1974
1975 x = 0;
1976 for (p = s; *p && isdigit(*p & 0xff); ++p) {
1977 d = *p - '0';
1978 if (x > (LLONG_MAX - d) / 10) {
1979 break;
1980 }
1981 x = 10 * x + d;
1982 }
1983 return x;
1984 }
1985
1986 // Read the 'startxref' position.
getStartXRef(bool tryingToReconstruct)1987 Goffset PDFDoc::getStartXRef(bool tryingToReconstruct)
1988 {
1989 if (startXRefPos == -1) {
1990
1991 if (isLinearized(tryingToReconstruct)) {
1992 char buf[linearizationSearchSize + 1];
1993 int c, n, i;
1994
1995 str->setPos(0);
1996 for (n = 0; n < linearizationSearchSize; ++n) {
1997 if ((c = str->getChar()) == EOF) {
1998 break;
1999 }
2000 buf[n] = c;
2001 }
2002 buf[n] = '\0';
2003
2004 // find end of first obj (linearization dictionary)
2005 startXRefPos = 0;
2006 for (i = 0; i < n; i++) {
2007 if (!strncmp("endobj", &buf[i], 6)) {
2008 i += 6;
2009 // skip whitespace
2010 while (buf[i] && Lexer::isSpace(buf[i]))
2011 ++i;
2012 startXRefPos = i;
2013 break;
2014 }
2015 }
2016 } else {
2017 char buf[xrefSearchSize + 1];
2018 const char *p;
2019 int c, n, i;
2020
2021 // read last xrefSearchSize bytes
2022 int segnum = 0;
2023 int maxXRefSearch = 24576;
2024 if (str->getLength() < maxXRefSearch)
2025 maxXRefSearch = str->getLength();
2026 for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
2027 str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
2028 for (n = 0; n < xrefSearchSize; ++n) {
2029 if ((c = str->getChar()) == EOF) {
2030 break;
2031 }
2032 buf[n] = c;
2033 }
2034 buf[n] = '\0';
2035
2036 // find startxref
2037 for (i = n - 9; i >= 0; --i) {
2038 if (!strncmp(&buf[i], "startxref", 9)) {
2039 break;
2040 }
2041 }
2042 if (i < 0) {
2043 startXRefPos = 0;
2044 } else {
2045 for (p = &buf[i + 9]; isspace(*p); ++p)
2046 ;
2047 startXRefPos = strToLongLong(p);
2048 break;
2049 }
2050 }
2051 }
2052 }
2053
2054 return startXRefPos;
2055 }
2056
getMainXRefEntriesOffset(bool tryingToReconstruct)2057 Goffset PDFDoc::getMainXRefEntriesOffset(bool tryingToReconstruct)
2058 {
2059 unsigned int mainXRefEntriesOffset = 0;
2060
2061 if (isLinearized(tryingToReconstruct)) {
2062 mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
2063 }
2064
2065 return mainXRefEntriesOffset;
2066 }
2067
getNumPages()2068 int PDFDoc::getNumPages()
2069 {
2070 if (isLinearized()) {
2071 int n;
2072 if ((n = getLinearization()->getNumPages())) {
2073 return n;
2074 }
2075 }
2076
2077 return catalog->getNumPages();
2078 }
2079
parsePage(int page)2080 Page *PDFDoc::parsePage(int page)
2081 {
2082 Ref pageRef;
2083
2084 pageRef.num = getHints()->getPageObjectNum(page);
2085 if (!pageRef.num) {
2086 error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
2087 return nullptr;
2088 }
2089
2090 // check for bogus ref - this can happen in corrupted PDF files
2091 if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
2092 error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
2093 return nullptr;
2094 }
2095
2096 pageRef.gen = xref->getEntry(pageRef.num)->gen;
2097 Object obj = xref->fetch(pageRef);
2098 if (!obj.isDict("Page")) {
2099 error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
2100 return nullptr;
2101 }
2102 Dict *pageDict = obj.getDict();
2103
2104 return new Page(this, page, std::move(obj), pageRef, new PageAttrs(nullptr, pageDict), catalog->getForm());
2105 }
2106
getPage(int page)2107 Page *PDFDoc::getPage(int page)
2108 {
2109 if ((page < 1) || page > getNumPages())
2110 return nullptr;
2111
2112 if (isLinearized() && checkLinearization()) {
2113 pdfdocLocker();
2114 if (!pageCache) {
2115 pageCache = (Page **)gmallocn(getNumPages(), sizeof(Page *));
2116 for (int i = 0; i < getNumPages(); i++) {
2117 pageCache[i] = nullptr;
2118 }
2119 }
2120 if (!pageCache[page - 1]) {
2121 pageCache[page - 1] = parsePage(page);
2122 }
2123 if (pageCache[page - 1]) {
2124 return pageCache[page - 1];
2125 } else {
2126 error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
2127 }
2128 }
2129
2130 return catalog->getPage(page);
2131 }
2132
hasJavascript()2133 bool PDFDoc::hasJavascript()
2134 {
2135 JSInfo jsInfo(this);
2136 jsInfo.scanJS(getNumPages(), true);
2137 return jsInfo.containsJS();
2138 }
2139
sign(const char * saveFilename,const char * certNickname,const char * password,GooString * partialFieldName,int page,const PDFRectangle & rect,const GooString & signatureText,const GooString & signatureTextLeft,double fontSize,std::unique_ptr<AnnotColor> && fontColor,double borderWidth,std::unique_ptr<AnnotColor> && borderColor,std::unique_ptr<AnnotColor> && backgroundColor,const GooString * reason,const GooString * location,const std::string & imagePath)2140 bool PDFDoc::sign(const char *saveFilename, const char *certNickname, const char *password, GooString *partialFieldName, int page, const PDFRectangle &rect, const GooString &signatureText, const GooString &signatureTextLeft,
2141 double fontSize, std::unique_ptr<AnnotColor> &&fontColor, double borderWidth, std::unique_ptr<AnnotColor> &&borderColor, std::unique_ptr<AnnotColor> &&backgroundColor, const GooString *reason, const GooString *location,
2142 const std::string &imagePath)
2143 {
2144 ::Page *destPage = getPage(page);
2145 if (destPage == nullptr) {
2146 return false;
2147 }
2148 Ref imageResourceRef = Ref::INVALID();
2149 if (!imagePath.empty()) {
2150 imageResourceRef = ImageEmbeddingUtils::embed(xref, imagePath);
2151 if (imageResourceRef == Ref::INVALID()) {
2152 return false;
2153 }
2154 }
2155
2156 const DefaultAppearance da { { objName, "SigFont" }, fontSize, std::move(fontColor) };
2157
2158 Object annotObj = Object(new Dict(getXRef()));
2159 annotObj.dictSet("Type", Object(objName, "Annot"));
2160 annotObj.dictSet("Subtype", Object(objName, "Widget"));
2161 annotObj.dictSet("FT", Object(objName, "Sig"));
2162 annotObj.dictSet("T", Object(partialFieldName));
2163 Array *rectArray = new Array(getXRef());
2164 rectArray->add(Object(rect.x1));
2165 rectArray->add(Object(rect.y1));
2166 rectArray->add(Object(rect.x2));
2167 rectArray->add(Object(rect.y2));
2168 annotObj.dictSet("Rect", Object(rectArray));
2169
2170 GooString *daStr = da.toAppearanceString();
2171 annotObj.dictSet("DA", Object(daStr));
2172
2173 const Ref ref = getXRef()->addIndirectObject(annotObj);
2174 catalog->addFormToAcroForm(ref);
2175
2176 std::unique_ptr<::FormFieldSignature> field = std::make_unique<::FormFieldSignature>(this, Object(annotObj.getDict()), ref, nullptr, nullptr);
2177 field->setCustomAppearanceContent(signatureText);
2178 field->setCustomAppearanceLeftContent(signatureTextLeft);
2179 field->setImageResource(imageResourceRef);
2180
2181 Object refObj(ref);
2182 AnnotWidget *signatureAnnot = new AnnotWidget(this, &annotObj, &refObj, field.get());
2183 signatureAnnot->setFlags(signatureAnnot->getFlags() | Annot::flagPrint | Annot::flagLocked | Annot::flagNoRotate);
2184 Dict dummy(getXRef());
2185 auto appearCharacs = std::make_unique<AnnotAppearanceCharacs>(&dummy);
2186 appearCharacs->setBorderColor(std::move(borderColor));
2187 appearCharacs->setBackColor(std::move(backgroundColor));
2188 signatureAnnot->setAppearCharacs(std::move(appearCharacs));
2189
2190 signatureAnnot->generateFieldAppearance();
2191 signatureAnnot->updateAppearanceStream();
2192
2193 FormWidget *formWidget = field->getWidget(field->getNumWidgets() - 1);
2194 formWidget->setWidgetAnnotation(signatureAnnot);
2195
2196 destPage->addAnnot(signatureAnnot);
2197
2198 std::unique_ptr<AnnotBorder> border(new AnnotBorderArray());
2199 border->setWidth(borderWidth);
2200 signatureAnnot->setBorder(std::move(border));
2201
2202 FormWidgetSignature *fws = dynamic_cast<FormWidgetSignature *>(formWidget);
2203 if (fws) {
2204 const bool res = fws->signDocument(saveFilename, certNickname, "SHA256", password, reason, location);
2205
2206 // Now remove the signature stuff in case the user wants to continue editing stuff
2207 // So the document object is clean
2208 const Object &vRefObj = annotObj.dictLookupNF("V");
2209 if (vRefObj.isRef()) {
2210 getXRef()->removeIndirectObject(vRefObj.getRef());
2211 }
2212 destPage->removeAnnot(signatureAnnot);
2213 catalog->removeFormFromAcroForm(ref);
2214 getXRef()->removeIndirectObject(ref);
2215
2216 return res;
2217 }
2218
2219 return false;
2220 }
2221