1 //========================================================================
2 //
3 // PDFDoc.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2021 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013, 2018 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
35 // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
36 // Copyright (C) 2015 André Esser <bepandre@hotmail.com>
37 // Copyright (C) 2016, 2020 Jakub Alba <jakubalba@gmail.com>
38 // Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
39 // Copyright (C) 2017 Fredrik Fornwall <fredrik@fornwall.net>
40 // Copyright (C) 2018 Ben Timby <btimby@gmail.com>
41 // Copyright (C) 2018 Evangelos Foutras <evangelos@foutrelis.com>
42 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
43 // Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
44 // Copyright (C) 2018 Philipp Knechtges <philipp-dev@knechtges.com>
45 // Copyright (C) 2019 Christian Persch <chpe@src.gnome.org>
46 // Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com>
47 // Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de>
48 // Copyright (C) 2020 Adam Sampson <ats@offog.org>
49 // Copyright (C) 2021 Oliver Sander <oliver.sander@tu-dresden.de>
50 // Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
51 // Copyright (C) 2021 RM <rm+git@arcsin.org>
52 // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
53 // Copyright (C) 2021 Marek Kasik <mkasik@redhat.com>
54 //
55 // To see a description of the changes please see the Changelog file that
56 // came with your tarball or type make ChangeLog if you are building from git
57 //
58 //========================================================================
59 
60 #include <config.h>
61 #include <poppler-config.h>
62 
63 #include <cctype>
64 #include <clocale>
65 #include <cstdio>
66 #include <cerrno>
67 #include <climits>
68 #include <cstdlib>
69 #include <cstddef>
70 #include <cstring>
71 #include <ctime>
72 #include <regex>
73 #include <sys/stat.h>
74 #include "goo/glibc.h"
75 #include "goo/gstrtod.h"
76 #include "goo/GooString.h"
77 #include "goo/gfile.h"
78 #include "poppler-config.h"
79 #include "GlobalParams.h"
80 #include "Page.h"
81 #include "Catalog.h"
82 #include "Stream.h"
83 #include "XRef.h"
84 #include "Linearization.h"
85 #include "Link.h"
86 #include "OutputDev.h"
87 #include "Error.h"
88 #include "ErrorCodes.h"
89 #include "Lexer.h"
90 #include "Parser.h"
91 #include "SecurityHandler.h"
92 #include "Decrypt.h"
93 #include "Outline.h"
94 #include "PDFDoc.h"
95 #include "Hints.h"
96 #include "UTF.h"
97 #include "JSInfo.h"
98 #include "ImageEmbeddingUtils.h"
99 
100 //------------------------------------------------------------------------
101 
102 #define headerSearchSize                                                                                                                                                                                                                       \
103     1024 // read this many bytes at beginning of
104          //   file to look for '%PDF'
105 #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
106 
107 #define linearizationSearchSize                                                                                                                                                                                                                \
108     1024 // read this many bytes at beginning of
109          // file to look for linearization
110          // dictionary
111 
112 #define xrefSearchSize                                                                                                                                                                                                                         \
113     1024 // read this many bytes at end of file
114          //   to look for 'startxref'
115 
116 //------------------------------------------------------------------------
117 // PDFDoc
118 //------------------------------------------------------------------------
119 
120 #define pdfdocLocker() std::unique_lock<std::recursive_mutex> locker(mutex)
121 
init()122 void PDFDoc::init()
123 {
124     ok = false;
125     errCode = errNone;
126     fileName = nullptr;
127     file = nullptr;
128     str = nullptr;
129     xref = nullptr;
130     linearization = nullptr;
131     catalog = nullptr;
132     hints = nullptr;
133     outline = nullptr;
134     startXRefPos = -1;
135     secHdlr = nullptr;
136     pageCache = nullptr;
137 }
138 
PDFDoc()139 PDFDoc::PDFDoc()
140 {
141     init();
142 }
143 
PDFDoc(const GooString * fileNameA,const GooString * ownerPassword,const GooString * userPassword,void * guiDataA,const std::function<void ()> & xrefReconstructedCallback)144 PDFDoc::PDFDoc(const GooString *fileNameA, const GooString *ownerPassword, const GooString *userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback)
145 {
146 #ifdef _WIN32
147     int n, i;
148 #endif
149 
150     init();
151 
152     fileName = fileNameA;
153     guiData = guiDataA;
154 #ifdef _WIN32
155     n = fileName->getLength();
156     fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
157     for (i = 0; i < n; ++i) {
158         fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
159     }
160     fileNameU[n] = L'\0';
161 #endif
162 
163     // try to open file
164 #ifdef _WIN32
165     wchar_t *wFileName = (wchar_t *)utf8ToUtf16(fileName->c_str());
166     file = GooFile::open(wFileName);
167     gfree(wFileName);
168 #else
169     file = GooFile::open(fileName->toStr());
170 #endif
171     if (file == nullptr) {
172         // fopen() has failed.
173         // Keep a copy of the errno returned by fopen so that it can be
174         // referred to later.
175         fopenErrno = errno;
176         error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
177         errCode = errOpenFile;
178         return;
179     }
180 
181     // create stream
182     str = new FileStream(file, 0, false, file->size(), Object(objNull));
183 
184     ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
185 }
186 
187 #ifdef _WIN32
PDFDoc(wchar_t * fileNameA,int fileNameLen,GooString * ownerPassword,GooString * userPassword,void * guiDataA,const std::function<void ()> & xrefReconstructedCallback)188 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword, GooString *userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback)
189 {
190     OSVERSIONINFO version;
191     int i;
192 
193     init();
194 
195     guiData = guiDataA;
196 
197     // save both Unicode and 8-bit copies of the file name
198     GooString *fileNameG = new GooString();
199     fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
200     for (i = 0; i < fileNameLen; ++i) {
201         fileNameG->append((char)fileNameA[i]);
202         fileNameU[i] = fileNameA[i];
203     }
204     fileName = fileNameG;
205     fileNameU[fileNameLen] = L'\0';
206 
207     // try to open file
208     // NB: _wfopen is only available in NT
209     version.dwOSVersionInfoSize = sizeof(version);
210     GetVersionEx(&version);
211     if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
212         file = GooFile::open(fileNameU);
213     } else {
214         file = GooFile::open(fileName->toStr());
215     }
216     if (!file) {
217         error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
218         errCode = errOpenFile;
219         return;
220     }
221 
222     // create stream
223     str = new FileStream(file, 0, false, file->size(), Object(objNull));
224 
225     ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
226 }
227 #endif
228 
PDFDoc(BaseStream * strA,const GooString * ownerPassword,const GooString * userPassword,void * guiDataA,const std::function<void ()> & xrefReconstructedCallback)229 PDFDoc::PDFDoc(BaseStream *strA, const GooString *ownerPassword, const GooString *userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback)
230 {
231 #ifdef _WIN32
232     int n, i;
233 #endif
234 
235     init();
236     guiData = guiDataA;
237     if (strA->getFileName()) {
238         fileName = strA->getFileName()->copy();
239 #ifdef _WIN32
240         n = fileName->getLength();
241         fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
242         for (i = 0; i < n; ++i) {
243             fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
244         }
245         fileNameU[n] = L'\0';
246 #endif
247     } else {
248         fileName = nullptr;
249 #ifdef _WIN32
250         fileNameU = NULL;
251 #endif
252     }
253     str = strA;
254     ok = setup(ownerPassword, userPassword, xrefReconstructedCallback);
255 }
256 
setup(const GooString * ownerPassword,const GooString * userPassword,const std::function<void ()> & xrefReconstructedCallback)257 bool PDFDoc::setup(const GooString *ownerPassword, const GooString *userPassword, const std::function<void()> &xrefReconstructedCallback)
258 {
259     pdfdocLocker();
260 
261     if (str->getLength() <= 0) {
262         error(errSyntaxError, -1, "Document stream is empty");
263         errCode = errDamaged;
264         return false;
265     }
266 
267     str->setPos(0, -1);
268     if (str->getPos() < 0) {
269         error(errSyntaxError, -1, "Document base stream is not seekable");
270         errCode = errFileIO;
271         return false;
272     }
273 
274     str->reset();
275 
276     // check footer
277     // Adobe does not seem to enforce %%EOF, so we do the same
278     //  if (!checkFooter()) return false;
279 
280     // check header
281     checkHeader();
282 
283     bool wasReconstructed = false;
284 
285     // read xref table
286     xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed, false, xrefReconstructedCallback);
287     if (!xref->isOk()) {
288         if (wasReconstructed) {
289             delete xref;
290             startXRefPos = -1;
291             xref = new XRef(str, getStartXRef(true), getMainXRefEntriesOffset(true), &wasReconstructed, false, xrefReconstructedCallback);
292         }
293         if (!xref->isOk()) {
294             error(errSyntaxError, -1, "Couldn't read xref table");
295             errCode = xref->getErrorCode();
296             return false;
297         }
298     }
299 
300     // check for encryption
301     if (!checkEncryption(ownerPassword, userPassword)) {
302         errCode = errEncrypted;
303         return false;
304     }
305 
306     // read catalog
307     catalog = new Catalog(this);
308     if (catalog && !catalog->isOk()) {
309         if (!wasReconstructed) {
310             // try one more time to construct the Catalog, maybe the problem is damaged XRef
311             delete catalog;
312             delete xref;
313             xref = new XRef(str, 0, 0, nullptr, true, xrefReconstructedCallback);
314             catalog = new Catalog(this);
315         }
316 
317         if (catalog && !catalog->isOk()) {
318             error(errSyntaxError, -1, "Couldn't read page catalog");
319             errCode = errBadCatalog;
320             return false;
321         }
322     }
323 
324     // Extract PDF Subtype information
325     extractPDFSubtype();
326 
327     // done
328     return true;
329 }
330 
~PDFDoc()331 PDFDoc::~PDFDoc()
332 {
333     if (pageCache) {
334         for (int i = 0; i < getNumPages(); i++) {
335             if (pageCache[i]) {
336                 delete pageCache[i];
337             }
338         }
339         gfree(pageCache);
340     }
341     delete secHdlr;
342     if (outline) {
343         delete outline;
344     }
345     if (catalog) {
346         delete catalog;
347     }
348     if (xref) {
349         delete xref;
350     }
351     if (hints) {
352         delete hints;
353     }
354     if (linearization) {
355         delete linearization;
356     }
357     if (str) {
358         delete str;
359     }
360     if (file) {
361         delete file;
362     }
363     if (fileName) {
364         delete fileName;
365     }
366 #ifdef _WIN32
367     if (fileNameU) {
368         gfree(fileNameU);
369     }
370 #endif
371 }
372 
373 // Check for a %%EOF at the end of this stream
checkFooter()374 bool PDFDoc::checkFooter()
375 {
376     // we look in the last 1024 chars because Adobe does the same
377     char *eof = new char[1025];
378     Goffset pos = str->getPos();
379     str->setPos(1024, -1);
380     int i, ch;
381     for (i = 0; i < 1024; i++) {
382         ch = str->getChar();
383         if (ch == EOF)
384             break;
385         eof[i] = ch;
386     }
387     eof[i] = '\0';
388 
389     bool found = false;
390     for (i = i - 5; i >= 0; i--) {
391         if (strncmp(&eof[i], "%%EOF", 5) == 0) {
392             found = true;
393             break;
394         }
395     }
396     if (!found) {
397         error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
398         errCode = errDamaged;
399         delete[] eof;
400         return false;
401     }
402     delete[] eof;
403     str->setPos(pos);
404     return true;
405 }
406 
407 // Check for a PDF header on this stream.  Skip past some garbage
408 // if necessary.
checkHeader()409 void PDFDoc::checkHeader()
410 {
411     char hdrBuf[headerSearchSize + 1];
412     char *p;
413     char *tokptr;
414     int i;
415     int bytesRead;
416 
417     headerPdfMajorVersion = 0;
418     headerPdfMinorVersion = 0;
419 
420     // read up to headerSearchSize bytes from the beginning of the document
421     for (i = 0; i < headerSearchSize; ++i) {
422         const int c = str->getChar();
423         if (c == EOF)
424             break;
425         hdrBuf[i] = c;
426     }
427     bytesRead = i;
428     hdrBuf[bytesRead] = '\0';
429 
430     // find the start of the PDF header if it exists and parse the version
431     bool headerFound = false;
432     for (i = 0; i < bytesRead - 5; ++i) {
433         if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
434             headerFound = true;
435             break;
436         }
437     }
438     if (!headerFound) {
439         error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
440         return;
441     }
442     str->moveStart(i);
443     if (!(p = strtok_r(&hdrBuf[i + 5], " \t\n\r", &tokptr))) {
444         error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
445         return;
446     }
447     sscanf(p, "%d.%d", &headerPdfMajorVersion, &headerPdfMinorVersion);
448     // We don't do the version check. Don't add it back in.
449 }
450 
checkEncryption(const GooString * ownerPassword,const GooString * userPassword)451 bool PDFDoc::checkEncryption(const GooString *ownerPassword, const GooString *userPassword)
452 {
453     bool encrypted;
454     bool ret;
455 
456     Object encrypt = xref->getTrailerDict()->dictLookup("Encrypt");
457     if ((encrypted = encrypt.isDict())) {
458         if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
459             if (secHdlr->isUnencrypted()) {
460                 // no encryption
461                 ret = true;
462             } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
463                 // authorization succeeded
464                 xref->setEncryption(secHdlr->getPermissionFlags(), secHdlr->getOwnerPasswordOk(), secHdlr->getFileKey(), secHdlr->getFileKeyLength(), secHdlr->getEncVersion(), secHdlr->getEncRevision(), secHdlr->getEncAlgorithm());
465                 ret = true;
466             } else {
467                 // authorization failed
468                 ret = false;
469             }
470         } else {
471             // couldn't find the matching security handler
472             ret = false;
473         }
474     } else {
475         // document is not encrypted
476         ret = true;
477     }
478     return ret;
479 }
480 
pdfPartFromString(PDFSubtype subtype,GooString * pdfSubtypeVersion)481 static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, GooString *pdfSubtypeVersion)
482 {
483     const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?");
484     std::smatch match;
485     const std::string &pdfsubver = pdfSubtypeVersion->toStr();
486     PDFSubtypePart subtypePart = subtypePartNone;
487 
488     if (std::regex_search(pdfsubver, match, regex)) {
489         int date = 0;
490         const int part = std::stoi(match.str(1));
491 
492         if (match[2].matched) {
493             date = std::stoi(match.str(2));
494         }
495 
496         switch (subtype) {
497         case subtypePDFX:
498             switch (part) {
499             case 1:
500                 switch (date) {
501                 case 2001:
502                 default:
503                     subtypePart = subtypePart1;
504                     break;
505                 case 2003:
506                     subtypePart = subtypePart4;
507                     break;
508                 }
509                 break;
510             case 2:
511                 subtypePart = subtypePart5;
512                 break;
513             case 3:
514                 switch (date) {
515                 case 2002:
516                 default:
517                     subtypePart = subtypePart3;
518                     break;
519                 case 2003:
520                     subtypePart = subtypePart6;
521                     break;
522                 }
523                 break;
524             case 4:
525                 subtypePart = subtypePart7;
526                 break;
527             case 5:
528                 subtypePart = subtypePart8;
529                 break;
530             }
531             break;
532         default:
533             subtypePart = (PDFSubtypePart)part;
534             break;
535         }
536     }
537 
538     return subtypePart;
539 }
540 
pdfConformanceFromString(GooString * pdfSubtypeVersion)541 static PDFSubtypeConformance pdfConformanceFromString(GooString *pdfSubtypeVersion)
542 {
543     const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)");
544     std::smatch match;
545     const std::string &pdfsubver = pdfSubtypeVersion->toStr();
546     PDFSubtypeConformance pdfConf = subtypeConfNone;
547 
548     // match contains the PDF conformance (A, B, G, N, P, PG or U)
549     if (std::regex_search(pdfsubver, match, regex)) {
550         GooString *conf = new GooString(match.str(1));
551         // Convert to lowercase as the conformance may appear in both cases
552         conf->lowerCase();
553         if (conf->cmp("a") == 0) {
554             pdfConf = subtypeConfA;
555         } else if (conf->cmp("b") == 0) {
556             pdfConf = subtypeConfB;
557         } else if (conf->cmp("g") == 0) {
558             pdfConf = subtypeConfG;
559         } else if (conf->cmp("n") == 0) {
560             pdfConf = subtypeConfN;
561         } else if (conf->cmp("p") == 0) {
562             pdfConf = subtypeConfP;
563         } else if (conf->cmp("pg") == 0) {
564             pdfConf = subtypeConfPG;
565         } else if (conf->cmp("u") == 0) {
566             pdfConf = subtypeConfU;
567         } else {
568             pdfConf = subtypeConfNone;
569         }
570         delete conf;
571     }
572 
573     return pdfConf;
574 }
575 
extractPDFSubtype()576 void PDFDoc::extractPDFSubtype()
577 {
578     pdfSubtype = subtypeNull;
579     pdfPart = subtypePartNull;
580     pdfConformance = subtypeConfNull;
581 
582     GooString *pdfSubtypeVersion = nullptr;
583     // Find PDF InfoDict subtype key if any
584     if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFA1Version"))) {
585         pdfSubtype = subtypePDFA;
586     } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFEVersion"))) {
587         pdfSubtype = subtypePDFE;
588     } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFUAVersion"))) {
589         pdfSubtype = subtypePDFUA;
590     } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFVTVersion"))) {
591         pdfSubtype = subtypePDFVT;
592     } else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFXVersion"))) {
593         pdfSubtype = subtypePDFX;
594     } else {
595         pdfSubtype = subtypeNone;
596         pdfPart = subtypePartNone;
597         pdfConformance = subtypeConfNone;
598         return;
599     }
600 
601     // Extract part from version string
602     pdfPart = pdfPartFromString(pdfSubtype, pdfSubtypeVersion);
603 
604     // Extract conformance from version string
605     pdfConformance = pdfConformanceFromString(pdfSubtypeVersion);
606 
607     delete pdfSubtypeVersion;
608 }
609 
addSignatureFieldsToVector(FormField * ff,std::vector<FormFieldSignature * > & res)610 static void addSignatureFieldsToVector(FormField *ff, std::vector<FormFieldSignature *> &res)
611 {
612     if (ff->getNumChildren() == 0) {
613         if (ff->getType() == formSignature) {
614             res.push_back(static_cast<FormFieldSignature *>(ff));
615         }
616     } else {
617         for (int i = 0; i < ff->getNumChildren(); ++i) {
618             FormField *children = ff->getChildren(i);
619             addSignatureFieldsToVector(children, res);
620         }
621     }
622 }
623 
getSignatureFields()624 std::vector<FormFieldSignature *> PDFDoc::getSignatureFields()
625 {
626     std::vector<FormFieldSignature *> res;
627 
628     const Form *f = catalog->getForm();
629     if (!f)
630         return res;
631 
632     const int nRootFields = f->getNumFields();
633     for (int i = 0; i < nRootFields; ++i) {
634         FormField *ff = f->getRootField(i);
635         addSignatureFieldsToVector(ff, res);
636     }
637     return res;
638 }
639 
getNumSignatureFields()640 int PDFDoc::getNumSignatureFields()
641 {
642     const Form *f = catalog->getForm();
643 
644     if (!f)
645         return 0;
646 
647     return f->getNumFields();
648 }
649 
displayPage(OutputDev * out,int page,double hDPI,double vDPI,int rotate,bool useMediaBox,bool crop,bool printing,bool (* abortCheckCbk)(void * data),void * abortCheckCbkData,bool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,bool copyXRef)650 void PDFDoc::displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData,
651                          bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef)
652 {
653     if (globalParams->getPrintCommands()) {
654         printf("***** page %d *****\n", page);
655     }
656 
657     if (getPage(page))
658         getPage(page)->display(out, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
659 }
660 
displayPages(OutputDev * out,int firstPage,int lastPage,double hDPI,double vDPI,int rotate,bool useMediaBox,bool crop,bool printing,bool (* abortCheckCbk)(void * data),void * abortCheckCbkData,bool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData)661 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData,
662                           bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData)
663 {
664     int page;
665 
666     for (page = firstPage; page <= lastPage; ++page) {
667         displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData);
668     }
669 }
670 
displayPageSlice(OutputDev * out,int page,double hDPI,double vDPI,int rotate,bool useMediaBox,bool crop,bool printing,int sliceX,int sliceY,int sliceW,int sliceH,bool (* abortCheckCbk)(void * data),void * abortCheckCbkData,bool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,bool copyXRef)671 void PDFDoc::displayPageSlice(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, int sliceX, int sliceY, int sliceW, int sliceH, bool (*abortCheckCbk)(void *data),
672                               void *abortCheckCbkData, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef)
673 {
674     if (getPage(page))
675         getPage(page)->displaySlice(out, hDPI, vDPI, rotate, useMediaBox, crop, sliceX, sliceY, sliceW, sliceH, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
676 }
677 
getLinks(int page)678 std::unique_ptr<Links> PDFDoc::getLinks(int page)
679 {
680     Page *p = getPage(page);
681     if (!p) {
682         return std::make_unique<Links>(nullptr);
683     }
684     return p->getLinks();
685 }
686 
processLinks(OutputDev * out,int page)687 void PDFDoc::processLinks(OutputDev *out, int page)
688 {
689     if (getPage(page))
690         getPage(page)->processLinks(out);
691 }
692 
getLinearization()693 Linearization *PDFDoc::getLinearization()
694 {
695     if (!linearization) {
696         linearization = new Linearization(str);
697         linearizationState = 0;
698     }
699     return linearization;
700 }
701 
checkLinearization()702 bool PDFDoc::checkLinearization()
703 {
704     if (linearization == nullptr)
705         return false;
706     if (linearizationState == 1)
707         return true;
708     if (linearizationState == 2)
709         return false;
710     if (!hints) {
711         hints = new Hints(str, linearization, getXRef(), secHdlr);
712     }
713     if (!hints->isOk()) {
714         linearizationState = 2;
715         return false;
716     }
717     for (int page = 1; page <= linearization->getNumPages(); page++) {
718         Ref pageRef;
719 
720         pageRef.num = hints->getPageObjectNum(page);
721         if (!pageRef.num) {
722             linearizationState = 2;
723             return false;
724         }
725 
726         // check for bogus ref - this can happen in corrupted PDF files
727         if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
728             linearizationState = 2;
729             return false;
730         }
731 
732         pageRef.gen = xref->getEntry(pageRef.num)->gen;
733         Object obj = xref->fetch(pageRef);
734         if (!obj.isDict("Page")) {
735             linearizationState = 2;
736             return false;
737         }
738     }
739     linearizationState = 1;
740     return true;
741 }
742 
isLinearized(bool tryingToReconstruct)743 bool PDFDoc::isLinearized(bool tryingToReconstruct)
744 {
745     if ((str->getLength()) && (getLinearization()->getLength() == str->getLength()))
746         return true;
747     else {
748         if (tryingToReconstruct)
749             return getLinearization()->getLength() > 0;
750         else
751             return false;
752     }
753 }
754 
setDocInfoStringEntry(const char * key,GooString * value)755 void PDFDoc::setDocInfoStringEntry(const char *key, GooString *value)
756 {
757     bool removeEntry = !value || value->getLength() == 0 || value->hasJustUnicodeMarker();
758     if (removeEntry) {
759         delete value;
760     }
761 
762     Object infoObj = getDocInfo();
763     if (infoObj.isNull() && removeEntry) {
764         // No info dictionary, so no entry to remove.
765         return;
766     }
767 
768     Ref infoObjRef;
769     infoObj = xref->createDocInfoIfNeeded(&infoObjRef);
770     if (removeEntry) {
771         infoObj.dictSet(key, Object(objNull));
772     } else {
773         infoObj.dictSet(key, Object(value));
774     }
775 
776     if (infoObj.dictGetLength() == 0) {
777         // Info dictionary is empty. Remove it altogether.
778         removeDocInfo();
779     } else {
780         xref->setModifiedObject(&infoObj, infoObjRef);
781     }
782 }
783 
getDocInfoStringEntry(const char * key)784 GooString *PDFDoc::getDocInfoStringEntry(const char *key)
785 {
786     Object infoObj = getDocInfo();
787     if (!infoObj.isDict()) {
788         return nullptr;
789     }
790 
791     Object entryObj = infoObj.dictLookup(key);
792 
793     GooString *result;
794 
795     if (entryObj.isString()) {
796         result = entryObj.takeString();
797     } else {
798         result = nullptr;
799     }
800 
801     return result;
802 }
803 
get_id(const GooString * encodedidstring,GooString * id)804 static bool get_id(const GooString *encodedidstring, GooString *id)
805 {
806     const char *encodedid = encodedidstring->c_str();
807     char pdfid[pdfIdLength + 1];
808     int n;
809 
810     if (encodedidstring->getLength() != pdfIdLength / 2)
811         return false;
812 
813     n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff, encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff,
814                 encodedid[7] & 0xff, encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff, encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
815     if (n != pdfIdLength)
816         return false;
817 
818     id->Set(pdfid, pdfIdLength);
819     return true;
820 }
821 
getID(GooString * permanent_id,GooString * update_id) const822 bool PDFDoc::getID(GooString *permanent_id, GooString *update_id) const
823 {
824     Object obj = xref->getTrailerDict()->dictLookup("ID");
825 
826     if (obj.isArray() && obj.arrayGetLength() == 2) {
827         if (permanent_id) {
828             Object obj2 = obj.arrayGet(0);
829             if (obj2.isString()) {
830                 if (!get_id(obj2.getString(), permanent_id)) {
831                     return false;
832                 }
833             } else {
834                 error(errSyntaxError, -1, "Invalid permanent ID");
835                 return false;
836             }
837         }
838 
839         if (update_id) {
840             Object obj2 = obj.arrayGet(1);
841             if (obj2.isString()) {
842                 if (!get_id(obj2.getString(), update_id)) {
843                     return false;
844                 }
845             } else {
846                 error(errSyntaxError, -1, "Invalid update ID");
847                 return false;
848             }
849         }
850 
851         return true;
852     }
853 
854     return false;
855 }
856 
getHints()857 Hints *PDFDoc::getHints()
858 {
859     if (!hints && isLinearized()) {
860         hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
861     }
862 
863     return hints;
864 }
865 
savePageAs(const GooString * name,int pageNo)866 int PDFDoc::savePageAs(const GooString *name, int pageNo)
867 {
868     FILE *f;
869     OutStream *outStr;
870     XRef *yRef, *countRef;
871 
872     if (file && file->modificationTimeChangedSinceOpen())
873         return errFileChangedSinceOpen;
874 
875     int rootNum = getXRef()->getNumObjects() + 1;
876 
877     // Make sure that special flags are set, because we are going to read
878     // all objects, including Unencrypted ones.
879     xref->scanSpecialFlags();
880 
881     unsigned char *fileKey;
882     CryptAlgorithm encAlgorithm;
883     int keyLength;
884     xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
885 
886     if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
887         error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages());
888         return errOpenFile;
889     }
890     const PDFRectangle *cropBox = nullptr;
891     if (getCatalog()->getPage(pageNo)->isCropped()) {
892         cropBox = getCatalog()->getPage(pageNo)->getCropBox();
893     }
894     replacePageDict(pageNo, getCatalog()->getPage(pageNo)->getRotate(), getCatalog()->getPage(pageNo)->getMediaBox(), cropBox);
895     Ref *refPage = getCatalog()->getPageRef(pageNo);
896     Object page = getXRef()->fetch(*refPage);
897 
898     if (!(f = openFile(name->c_str(), "wb"))) {
899         error(errIO, -1, "Couldn't open file '{0:t}'", name);
900         return errOpenFile;
901     }
902     outStr = new FileOutStream(f, 0);
903 
904     yRef = new XRef(getXRef()->getTrailerDict());
905 
906     if (secHdlr != nullptr && !secHdlr->isUnencrypted()) {
907         yRef->setEncryption(secHdlr->getPermissionFlags(), secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
908     }
909     countRef = new XRef();
910     Object *trailerObj = getXRef()->getTrailerDict();
911     if (trailerObj->isDict()) {
912         markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
913     }
914     yRef->add(0, 65535, 0, false);
915     writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
916 
917     // get and mark info dict
918     Object infoObj = getXRef()->getDocInfo();
919     if (infoObj.isDict()) {
920         Dict *infoDict = infoObj.getDict();
921         markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
922         if (trailerObj->isDict()) {
923             Dict *trailerDict = trailerObj->getDict();
924             const Object &ref = trailerDict->lookupNF("Info");
925             if (ref.isRef()) {
926                 yRef->add(ref.getRef(), 0, true);
927                 if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
928                     yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
929                 }
930             }
931         }
932     }
933 
934     // get and mark output intents etc.
935     Object catObj = getXRef()->getCatalog();
936     Dict *catDict = catObj.getDict();
937     Object pagesObj = catDict->lookup("Pages");
938     Object afObj = catDict->lookupNF("AcroForm").copy();
939     if (!afObj.isNull()) {
940         markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
941     }
942     Dict *pagesDict = pagesObj.getDict();
943     Object resourcesObj = pagesDict->lookup("Resources");
944     if (resourcesObj.isDict())
945         markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
946     markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
947 
948     Dict *pageDict = page.getDict();
949     if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
950         Object *resourceDictObject = getCatalog()->getPage(pageNo)->getResourceDictObject();
951         if (resourceDictObject->isDict()) {
952             resourcesObj = resourceDictObject->copy();
953             markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
954         }
955     }
956     markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
957     Object annotsObj = pageDict->lookupNF("Annots").copy();
958     if (!annotsObj.isNull()) {
959         markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
960     }
961     yRef->markUnencrypted();
962     writePageObjects(outStr, yRef, 0);
963 
964     yRef->add(rootNum, 0, outStr->getPos(), true);
965     outStr->printf("%d 0 obj\n", rootNum);
966     outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
967     for (int j = 0; j < catDict->getLength(); j++) {
968         const char *key = catDict->getKey(j);
969         if (strcmp(key, "Type") != 0 && strcmp(key, "Catalog") != 0 && strcmp(key, "Pages") != 0) {
970             if (j > 0)
971                 outStr->printf(" ");
972             Object value = catDict->getValNF(j).copy();
973             outStr->printf("/%s ", key);
974             writeObject(&value, outStr, getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
975         }
976     }
977     outStr->printf(">>\nendobj\n");
978 
979     yRef->add(rootNum + 1, 0, outStr->getPos(), true);
980     outStr->printf("%d 0 obj\n", rootNum + 1);
981     outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
982     if (resourcesObj.isDict()) {
983         outStr->printf("/Resources ");
984         writeObject(&resourcesObj, outStr, getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
985     }
986     outStr->printf(">>\n");
987     outStr->printf("endobj\n");
988 
989     yRef->add(rootNum + 2, 0, outStr->getPos(), true);
990     outStr->printf("%d 0 obj\n", rootNum + 2);
991     outStr->printf("<< ");
992     for (int n = 0; n < pageDict->getLength(); n++) {
993         if (n > 0)
994             outStr->printf(" ");
995         const char *key = pageDict->getKey(n);
996         Object value = pageDict->getValNF(n).copy();
997         if (strcmp(key, "Parent") == 0) {
998             outStr->printf("/Parent %d 0 R", rootNum + 1);
999         } else {
1000             outStr->printf("/%s ", key);
1001             writeObject(&value, outStr, getXRef(), 0, nullptr, cryptRC4, 0, 0, 0);
1002         }
1003     }
1004     outStr->printf(" >>\nendobj\n");
1005 
1006     Goffset uxrefOffset = outStr->getPos();
1007     Ref ref;
1008     ref.num = rootNum;
1009     ref.gen = 0;
1010     Object trailerDict = createTrailerDict(rootNum + 3, false, 0, &ref, getXRef(), name->c_str(), uxrefOffset);
1011     writeXRefTableTrailer(std::move(trailerDict), yRef, false /* do not write unnecessary entries */, uxrefOffset, outStr, getXRef());
1012 
1013     outStr->close();
1014     fclose(f);
1015     delete yRef;
1016     delete countRef;
1017     delete outStr;
1018 
1019     return errNone;
1020 }
1021 
saveAs(const GooString * name,PDFWriteMode mode)1022 int PDFDoc::saveAs(const GooString *name, PDFWriteMode mode)
1023 {
1024     FILE *f;
1025     OutStream *outStr;
1026     int res;
1027 
1028     if (!(f = openFile(name->c_str(), "wb"))) {
1029         error(errIO, -1, "Couldn't open file '{0:t}'", name);
1030         return errOpenFile;
1031     }
1032     outStr = new FileOutStream(f, 0);
1033     res = saveAs(outStr, mode);
1034     delete outStr;
1035     fclose(f);
1036     return res;
1037 }
1038 
saveAs(OutStream * outStr,PDFWriteMode mode)1039 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode)
1040 {
1041     if (file && file->modificationTimeChangedSinceOpen())
1042         return errFileChangedSinceOpen;
1043 
1044     if (!xref->isModified() && mode == writeStandard) {
1045         // simply copy the original file
1046         saveWithoutChangesAs(outStr);
1047     } else if (mode == writeForceRewrite) {
1048         saveCompleteRewrite(outStr);
1049     } else {
1050         saveIncrementalUpdate(outStr);
1051     }
1052 
1053     return errNone;
1054 }
1055 
saveWithoutChangesAs(const GooString * name)1056 int PDFDoc::saveWithoutChangesAs(const GooString *name)
1057 {
1058     FILE *f;
1059     OutStream *outStr;
1060     int res;
1061 
1062     if (!(f = openFile(name->c_str(), "wb"))) {
1063         error(errIO, -1, "Couldn't open file '{0:t}'", name);
1064         return errOpenFile;
1065     }
1066 
1067     outStr = new FileOutStream(f, 0);
1068     res = saveWithoutChangesAs(outStr);
1069     delete outStr;
1070 
1071     fclose(f);
1072 
1073     return res;
1074 }
1075 
saveWithoutChangesAs(OutStream * outStr)1076 int PDFDoc::saveWithoutChangesAs(OutStream *outStr)
1077 {
1078     int c;
1079 
1080     if (file && file->modificationTimeChangedSinceOpen())
1081         return errFileChangedSinceOpen;
1082 
1083     BaseStream *copyStr = str->copy();
1084     copyStr->reset();
1085     while ((c = copyStr->getChar()) != EOF) {
1086         outStr->put(c);
1087     }
1088     copyStr->close();
1089     delete copyStr;
1090 
1091     return errNone;
1092 }
1093 
saveIncrementalUpdate(OutStream * outStr)1094 void PDFDoc::saveIncrementalUpdate(OutStream *outStr)
1095 {
1096     XRef *uxref;
1097     int c;
1098     // copy the original file
1099     BaseStream *copyStr = str->copy();
1100     copyStr->reset();
1101     while ((c = copyStr->getChar()) != EOF) {
1102         outStr->put(c);
1103     }
1104     copyStr->close();
1105     delete copyStr;
1106 
1107     unsigned char *fileKey;
1108     CryptAlgorithm encAlgorithm;
1109     int keyLength;
1110     xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1111 
1112     uxref = new XRef();
1113     uxref->add(0, 65535, 0, false);
1114     xref->lock();
1115     for (int i = 0; i < xref->getNumObjects(); i++) {
1116         if ((xref->getEntry(i)->type == xrefEntryFree) && (xref->getEntry(i)->gen == 0)) // we skip the irrelevant free objects
1117             continue;
1118 
1119         if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { // we have an updated object
1120             Ref ref;
1121             ref.num = i;
1122             ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
1123             if (xref->getEntry(i)->type != xrefEntryFree) {
1124                 Object obj1 = xref->fetch(ref, 1 /* recursion */);
1125                 Goffset offset = writeObjectHeader(&ref, outStr);
1126                 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
1127                 writeObjectFooter(outStr);
1128                 uxref->add(ref, offset, true);
1129             } else {
1130                 uxref->add(ref, 0, false);
1131             }
1132         }
1133     }
1134     xref->unlock();
1135     // because of "uxref->add(0, 65535, 0, false);" uxref->getNumObjects() will
1136     // always be >= 1; if it is 1, it means there is nothing to update
1137     if (uxref->getNumObjects() == 1) {
1138         delete uxref;
1139         return;
1140     }
1141 
1142     Goffset uxrefOffset = outStr->getPos();
1143     int numobjects = xref->getNumObjects();
1144     const char *fileNameA = fileName ? fileName->c_str() : nullptr;
1145     Ref rootRef, uxrefStreamRef;
1146     rootRef.num = getXRef()->getRootNum();
1147     rootRef.gen = getXRef()->getRootGen();
1148 
1149     // Output a xref stream if there is a xref stream already
1150     bool xRefStream = xref->isXRefStream();
1151 
1152     if (xRefStream) {
1153         // Append an entry for the xref stream itself
1154         uxrefStreamRef.num = numobjects++;
1155         uxrefStreamRef.gen = 0;
1156         uxref->add(uxrefStreamRef, uxrefOffset, true);
1157     }
1158 
1159     Object trailerDict = createTrailerDict(numobjects, true, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
1160     if (xRefStream) {
1161         writeXRefStreamTrailer(std::move(trailerDict), uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
1162     } else {
1163         writeXRefTableTrailer(std::move(trailerDict), uxref, false, uxrefOffset, outStr, getXRef());
1164     }
1165 
1166     delete uxref;
1167 }
1168 
saveCompleteRewrite(OutStream * outStr)1169 void PDFDoc::saveCompleteRewrite(OutStream *outStr)
1170 {
1171     // Make sure that special flags are set, because we are going to read
1172     // all objects, including Unencrypted ones.
1173     xref->scanSpecialFlags();
1174 
1175     unsigned char *fileKey;
1176     CryptAlgorithm encAlgorithm;
1177     int keyLength;
1178     xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1179 
1180     writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
1181     XRef *uxref = new XRef();
1182     uxref->add(0, 65535, 0, false);
1183     xref->lock();
1184     for (int i = 0; i < xref->getNumObjects(); i++) {
1185         Ref ref;
1186         XRefEntryType type = xref->getEntry(i)->type;
1187         if (type == xrefEntryFree) {
1188             ref.num = i;
1189             ref.gen = xref->getEntry(i)->gen;
1190             /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
1191                 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
1192             if (ref.gen > 0 && ref.num > 0)
1193                 uxref->add(ref, 0, false);
1194         } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
1195             // This entry must not be written, put a free entry instead (with incremented gen)
1196             ref.num = i;
1197             ref.gen = xref->getEntry(i)->gen + 1;
1198             uxref->add(ref, 0, false);
1199         } else if (type == xrefEntryUncompressed) {
1200             ref.num = i;
1201             ref.gen = xref->getEntry(i)->gen;
1202             Object obj1 = xref->fetch(ref, 1 /* recursion */);
1203             Goffset offset = writeObjectHeader(&ref, outStr);
1204             // Write unencrypted objects in unencrypted form
1205             if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
1206                 writeObject(&obj1, outStr, nullptr, cryptRC4, 0, 0, 0);
1207             } else {
1208                 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
1209             }
1210             writeObjectFooter(outStr);
1211             uxref->add(ref, offset, true);
1212         } else if (type == xrefEntryCompressed) {
1213             ref.num = i;
1214             ref.gen = 0; // compressed entries have gen == 0
1215             Object obj1 = xref->fetch(ref, 1 /* recursion */);
1216             Goffset offset = writeObjectHeader(&ref, outStr);
1217             writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref);
1218             writeObjectFooter(outStr);
1219             uxref->add(ref, offset, true);
1220         }
1221     }
1222     xref->unlock();
1223     Goffset uxrefOffset = outStr->getPos();
1224     writeXRefTableTrailer(uxrefOffset, uxref, true /* write all entries */, uxref->getNumObjects(), outStr, false /* complete rewrite */);
1225     delete uxref;
1226 }
1227 
writeDictionnary(Dict * dict,OutStream * outStr,XRef * xRef,unsigned int numOffset,unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,Ref ref,std::set<Dict * > * alreadyWrittenDicts)1228 void PDFDoc::writeDictionnary(Dict *dict, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts)
1229 {
1230     bool deleteSet = false;
1231     if (!alreadyWrittenDicts) {
1232         alreadyWrittenDicts = new std::set<Dict *>;
1233         deleteSet = true;
1234     }
1235 
1236     if (alreadyWrittenDicts->find(dict) != alreadyWrittenDicts->end()) {
1237         error(errSyntaxWarning, -1, "PDFDoc::writeDictionnary: Found recursive dicts");
1238         if (deleteSet)
1239             delete alreadyWrittenDicts;
1240         return;
1241     } else {
1242         alreadyWrittenDicts->insert(dict);
1243     }
1244 
1245     outStr->printf("<<");
1246     for (int i = 0; i < dict->getLength(); i++) {
1247         GooString keyName(dict->getKey(i));
1248         GooString *keyNameToPrint = keyName.sanitizedName(false /* non ps mode */);
1249         outStr->printf("/%s ", keyNameToPrint->c_str());
1250         delete keyNameToPrint;
1251         Object obj1 = dict->getValNF(i).copy();
1252         writeObject(&obj1, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1253     }
1254     outStr->printf(">> ");
1255 
1256     if (deleteSet) {
1257         delete alreadyWrittenDicts;
1258     }
1259 }
1260 
writeStream(Stream * str,OutStream * outStr)1261 void PDFDoc::writeStream(Stream *str, OutStream *outStr)
1262 {
1263     outStr->printf("stream\r\n");
1264     str->reset();
1265     for (int c = str->getChar(); c != EOF; c = str->getChar()) {
1266         outStr->printf("%c", c);
1267     }
1268     outStr->printf("\r\nendstream\r\n");
1269 }
1270 
writeRawStream(Stream * str,OutStream * outStr)1271 void PDFDoc::writeRawStream(Stream *str, OutStream *outStr)
1272 {
1273     Object obj1 = str->getDict()->lookup("Length");
1274     if (!obj1.isInt() && !obj1.isInt64()) {
1275         error(errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1276         return;
1277     }
1278 
1279     Goffset length;
1280     if (obj1.isInt())
1281         length = obj1.getInt();
1282     else
1283         length = obj1.getInt64();
1284 
1285     outStr->printf("stream\r\n");
1286     str->unfilteredReset();
1287     for (Goffset i = 0; i < length; i++) {
1288         int c = str->getUnfilteredChar();
1289         if (unlikely(c == EOF)) {
1290             error(errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1291             break;
1292         }
1293         outStr->printf("%c", c);
1294     }
1295     str->reset();
1296     outStr->printf("\r\nendstream\r\n");
1297 }
1298 
writeString(const GooString * s,OutStream * outStr,const unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,Ref ref)1299 void PDFDoc::writeString(const GooString *s, OutStream *outStr, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref)
1300 {
1301     // Encrypt string if encryption is enabled
1302     GooString *sEnc = nullptr;
1303     if (fileKey) {
1304         EncryptStream *enc = new EncryptStream(new MemStream(s->c_str(), 0, s->getLength(), Object(objNull)), fileKey, encAlgorithm, keyLength, ref);
1305         sEnc = new GooString();
1306         int c;
1307         enc->reset();
1308         while ((c = enc->getChar()) != EOF) {
1309             sEnc->append((char)c);
1310         }
1311 
1312         delete enc;
1313         s = sEnc;
1314     }
1315 
1316     // Write data
1317     if (s->hasUnicodeMarker()) {
1318         // unicode string don't necessary end with \0
1319         const char *c = s->c_str();
1320         outStr->printf("(");
1321         for (int i = 0; i < s->getLength(); i++) {
1322             char unescaped = *(c + i) & 0x000000ff;
1323             // escape if needed
1324             if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1325                 outStr->printf("%c", '\\');
1326             outStr->printf("%c", unescaped);
1327         }
1328         outStr->printf(") ");
1329     } else {
1330         const char *c = s->c_str();
1331         outStr->printf("(");
1332         for (int i = 0; i < s->getLength(); i++) {
1333             char unescaped = *(c + i) & 0x000000ff;
1334             // escape if needed
1335             if (unescaped == '\r')
1336                 outStr->printf("\\r");
1337             else if (unescaped == '\n')
1338                 outStr->printf("\\n");
1339             else {
1340                 if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1341                     outStr->printf("%c", '\\');
1342                 }
1343                 outStr->printf("%c", unescaped);
1344             }
1345         }
1346         outStr->printf(") ");
1347     }
1348 
1349     delete sEnc;
1350 }
1351 
writeObjectHeader(Ref * ref,OutStream * outStr)1352 Goffset PDFDoc::writeObjectHeader(Ref *ref, OutStream *outStr)
1353 {
1354     Goffset offset = outStr->getPos();
1355     outStr->printf("%i %i obj\r\n", ref->num, ref->gen);
1356     return offset;
1357 }
1358 
writeObject(Object * obj,OutStream * outStr,XRef * xRef,unsigned int numOffset,unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,std::set<Dict * > * alreadyWrittenDicts)1359 void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict *> *alreadyWrittenDicts)
1360 {
1361     writeObject(obj, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, { objNum, objGen }, alreadyWrittenDicts);
1362 }
1363 
writeObject(Object * obj,OutStream * outStr,XRef * xRef,unsigned int numOffset,unsigned char * fileKey,CryptAlgorithm encAlgorithm,int keyLength,Ref ref,std::set<Dict * > * alreadyWrittenDicts)1364 void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts)
1365 {
1366     Array *array;
1367 
1368     switch (obj->getType()) {
1369     case objBool:
1370         outStr->printf("%s ", obj->getBool() ? "true" : "false");
1371         break;
1372     case objInt:
1373         outStr->printf("%i ", obj->getInt());
1374         break;
1375     case objInt64:
1376         outStr->printf("%lli ", obj->getInt64());
1377         break;
1378     case objReal: {
1379         GooString s;
1380         s.appendf("{0:.10g}", obj->getReal());
1381         outStr->printf("%s ", s.c_str());
1382         break;
1383     }
1384     case objString:
1385         writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, ref);
1386         break;
1387     case objHexString: {
1388         const GooString *s = obj->getHexString();
1389         outStr->printf("<");
1390         for (int i = 0; i < s->getLength(); i++) {
1391             outStr->printf("%02x", s->getChar(i) & 0xff);
1392         }
1393         outStr->printf("> ");
1394         break;
1395     }
1396     case objName: {
1397         GooString name(obj->getName());
1398         GooString *nameToPrint = name.sanitizedName(false /* non ps mode */);
1399         outStr->printf("/%s ", nameToPrint->c_str());
1400         delete nameToPrint;
1401         break;
1402     }
1403     case objNull:
1404         outStr->printf("null ");
1405         break;
1406     case objArray:
1407         array = obj->getArray();
1408         outStr->printf("[");
1409         for (int i = 0; i < array->getLength(); i++) {
1410             Object obj1 = array->getNF(i).copy();
1411             writeObject(&obj1, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref);
1412         }
1413         outStr->printf("] ");
1414         break;
1415     case objDict:
1416         writeDictionnary(obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1417         break;
1418     case objStream: {
1419         // We can't modify stream with the current implementation (no write functions in Stream API)
1420         // => the only type of streams which that have been modified are internal streams (=strWeird)
1421         Stream *stream = obj->getStream();
1422         if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1423             // we write the stream unencoded => TODO: write stream encoder
1424 
1425             // Encrypt stream
1426             EncryptStream *encStream = nullptr;
1427             bool removeFilter = true;
1428             if (stream->getKind() == strWeird && fileKey) {
1429                 Object filter = stream->getDict()->lookup("Filter");
1430                 if (!filter.isName("Crypt")) {
1431                     if (filter.isArray()) {
1432                         for (int i = 0; i < filter.arrayGetLength(); i++) {
1433                             Object filterEle = filter.arrayGet(i);
1434                             if (filterEle.isName("Crypt")) {
1435                                 removeFilter = false;
1436                                 break;
1437                             }
1438                         }
1439                         if (removeFilter) {
1440                             encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1441                             encStream->setAutoDelete(false);
1442                             stream = encStream;
1443                         }
1444                     } else {
1445                         encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1446                         encStream->setAutoDelete(false);
1447                         stream = encStream;
1448                     }
1449                 } else {
1450                     removeFilter = false;
1451                 }
1452             } else if (fileKey != nullptr) { // Encrypt stream
1453                 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1454                 encStream->setAutoDelete(false);
1455                 stream = encStream;
1456             }
1457 
1458             stream->reset();
1459             // recalculate stream length
1460             Goffset tmp = 0;
1461             for (int c = stream->getChar(); c != EOF; c = stream->getChar()) {
1462                 tmp++;
1463             }
1464             stream->getDict()->set("Length", Object(tmp));
1465 
1466             // Remove Stream encoding
1467             AutoFreeMemStream *internalStream = dynamic_cast<AutoFreeMemStream *>(stream);
1468             if (internalStream && internalStream->isFilterRemovalForbidden()) {
1469                 removeFilter = false;
1470             }
1471             if (removeFilter) {
1472                 stream->getDict()->remove("Filter");
1473             }
1474             stream->getDict()->remove("DecodeParms");
1475 
1476             writeDictionnary(stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1477             writeStream(stream, outStr);
1478             delete encStream;
1479         } else if (fileKey != nullptr && stream->getKind() == strFile && static_cast<FileStream *>(stream)->getNeedsEncryptionOnSave()) {
1480             EncryptStream *encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref);
1481             encStream->setAutoDelete(false);
1482             writeDictionnary(encStream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1483             writeStream(encStream, outStr);
1484             delete encStream;
1485         } else {
1486             // raw stream copy
1487             FilterStream *fs = dynamic_cast<FilterStream *>(stream);
1488             if (fs) {
1489                 BaseStream *bs = fs->getBaseStream();
1490                 if (bs) {
1491                     Goffset streamEnd;
1492                     if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1493                         Goffset val = streamEnd - bs->getStart();
1494                         stream->getDict()->set("Length", Object(val));
1495                     }
1496                 }
1497             }
1498             writeDictionnary(stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts);
1499             writeRawStream(stream, outStr);
1500         }
1501         break;
1502     }
1503     case objRef:
1504         outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1505         break;
1506     case objCmd:
1507         outStr->printf("%s\n", obj->getCmd());
1508         break;
1509     case objError:
1510         outStr->printf("error\r\n");
1511         break;
1512     case objEOF:
1513         outStr->printf("eof\r\n");
1514         break;
1515     case objNone:
1516         outStr->printf("none\r\n");
1517         break;
1518     default:
1519         error(errUnimplemented, -1, "Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1520         break;
1521     }
1522 }
1523 
writeObjectFooter(OutStream * outStr)1524 void PDFDoc::writeObjectFooter(OutStream *outStr)
1525 {
1526     outStr->printf("\r\nendobj\r\n");
1527 }
1528 
createTrailerDict(int uxrefSize,bool incrUpdate,Goffset startxRef,Ref * root,XRef * xRef,const char * fileName,Goffset fileSize)1529 Object PDFDoc::createTrailerDict(int uxrefSize, bool incrUpdate, Goffset startxRef, Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1530 {
1531     Dict *trailerDict = new Dict(xRef);
1532     trailerDict->set("Size", Object(uxrefSize));
1533 
1534     // build a new ID, as recommended in the reference, uses:
1535     // - current time
1536     // - file name
1537     // - file size
1538     // - values of entry in information dictionnary
1539     GooString message;
1540     char buffer[256];
1541     sprintf(buffer, "%i", (int)time(nullptr));
1542     message.append(buffer);
1543 
1544     if (fileName)
1545         message.append(fileName);
1546 
1547     sprintf(buffer, "%lli", (long long)fileSize);
1548     message.append(buffer);
1549 
1550     // info dict -- only use text string
1551     if (!xRef->getTrailerDict()->isNone()) {
1552         Object docInfo = xRef->getDocInfo();
1553         if (docInfo.isDict()) {
1554             for (int i = 0; i < docInfo.getDict()->getLength(); i++) {
1555                 Object obj2 = docInfo.getDict()->getVal(i);
1556                 if (obj2.isString()) {
1557                     message.append(obj2.getString());
1558                 }
1559             }
1560         }
1561     }
1562 
1563     bool hasEncrypt = false;
1564     if (!xRef->getTrailerDict()->isNone()) {
1565         Object obj2 = xRef->getTrailerDict()->dictLookupNF("Encrypt").copy();
1566         if (!obj2.isNull()) {
1567             trailerDict->set("Encrypt", std::move(obj2));
1568             hasEncrypt = true;
1569         }
1570     }
1571 
1572     // calculate md5 digest
1573     unsigned char digest[16];
1574     md5((unsigned char *)message.c_str(), message.getLength(), digest);
1575 
1576     // create ID array
1577     // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1578     if (incrUpdate || hasEncrypt) {
1579         // only update the second part of the array
1580         Object obj4 = xRef->getTrailerDict()->getDict()->lookup("ID");
1581         if (!obj4.isArray()) {
1582             error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1583         } else {
1584             Array *array = new Array(xRef);
1585             // Get the first part of the ID
1586             array->add(obj4.arrayGet(0));
1587             array->add(Object(new GooString((const char *)digest, 16)));
1588             trailerDict->set("ID", Object(array));
1589         }
1590     } else {
1591         // new file => same values for the two identifiers
1592         Array *array = new Array(xRef);
1593         array->add(Object(new GooString((const char *)digest, 16)));
1594         array->add(Object(new GooString((const char *)digest, 16)));
1595         trailerDict->set("ID", Object(array));
1596     }
1597 
1598     trailerDict->set("Root", Object(*root));
1599 
1600     if (incrUpdate) {
1601         trailerDict->set("Prev", Object(startxRef));
1602     }
1603 
1604     if (!xRef->getTrailerDict()->isNone()) {
1605         Object obj5 = xRef->getDocInfoNF();
1606         if (!obj5.isNull()) {
1607             trailerDict->set("Info", std::move(obj5));
1608         }
1609     }
1610 
1611     return Object(trailerDict);
1612 }
1613 
writeXRefTableTrailer(Object && trailerDict,XRef * uxref,bool writeAllEntries,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1614 void PDFDoc::writeXRefTableTrailer(Object &&trailerDict, XRef *uxref, bool writeAllEntries, Goffset uxrefOffset, OutStream *outStr, XRef *xRef)
1615 {
1616     uxref->writeTableToFile(outStr, writeAllEntries);
1617     outStr->printf("trailer\r\n");
1618     writeDictionnary(trailerDict.getDict(), outStr, xRef, 0, nullptr, cryptRC4, 0, { 0, 0 }, nullptr);
1619     outStr->printf("\r\nstartxref\r\n");
1620     outStr->printf("%lli\r\n", uxrefOffset);
1621     outStr->printf("%%%%EOF\r\n");
1622 }
1623 
writeXRefStreamTrailer(Object && trailerDict,XRef * uxref,Ref * uxrefStreamRef,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1624 void PDFDoc::writeXRefStreamTrailer(Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream *outStr, XRef *xRef)
1625 {
1626     GooString stmData;
1627 
1628     // Fill stmData and some trailerDict fields
1629     uxref->writeStreamToBuffer(&stmData, trailerDict.getDict(), xRef);
1630 
1631     // Create XRef stream object and write it
1632     MemStream *mStream = new MemStream(stmData.c_str(), 0, stmData.getLength(), std::move(trailerDict));
1633     writeObjectHeader(uxrefStreamRef, outStr);
1634     Object obj1(static_cast<Stream *>(mStream));
1635     writeObject(&obj1, outStr, xRef, 0, nullptr, cryptRC4, 0, 0, 0);
1636     writeObjectFooter(outStr);
1637 
1638     outStr->printf("startxref\r\n");
1639     outStr->printf("%lli\r\n", uxrefOffset);
1640     outStr->printf("%%%%EOF\r\n");
1641 }
1642 
writeXRefTableTrailer(Goffset uxrefOffset,XRef * uxref,bool writeAllEntries,int uxrefSize,OutStream * outStr,bool incrUpdate)1643 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, bool writeAllEntries, int uxrefSize, OutStream *outStr, bool incrUpdate)
1644 {
1645     const char *fileNameA = fileName ? fileName->c_str() : nullptr;
1646     // file size (doesn't include the trailer)
1647     unsigned int fileSize = 0;
1648     int c;
1649     str->reset();
1650     while ((c = str->getChar()) != EOF) {
1651         fileSize++;
1652     }
1653     str->close();
1654     Ref ref;
1655     ref.num = getXRef()->getRootNum();
1656     ref.gen = getXRef()->getRootGen();
1657     Object trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize);
1658     writeXRefTableTrailer(std::move(trailerDict), uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1659 }
1660 
writeHeader(OutStream * outStr,int major,int minor)1661 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1662 {
1663     outStr->printf("%%PDF-%d.%d\n", major, minor);
1664     outStr->printf("%%%c%c%c%c\n", 0xE2, 0xE3, 0xCF, 0xD3);
1665 }
1666 
markDictionnary(Dict * dict,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum,std::set<Dict * > * alreadyMarkedDicts)1667 void PDFDoc::markDictionnary(Dict *dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
1668 {
1669     bool deleteSet = false;
1670     if (!alreadyMarkedDicts) {
1671         alreadyMarkedDicts = new std::set<Dict *>;
1672         deleteSet = true;
1673     }
1674 
1675     if (alreadyMarkedDicts->find(dict) != alreadyMarkedDicts->end()) {
1676         error(errSyntaxWarning, -1, "PDFDoc::markDictionnary: Found recursive dicts");
1677         if (deleteSet)
1678             delete alreadyMarkedDicts;
1679         return;
1680     } else {
1681         alreadyMarkedDicts->insert(dict);
1682     }
1683 
1684     for (int i = 0; i < dict->getLength(); i++) {
1685         const char *key = dict->getKey(i);
1686         if (strcmp(key, "Annots") != 0) {
1687             Object obj1 = dict->getValNF(i).copy();
1688             markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1689         } else {
1690             Object annotsObj = dict->getValNF(i).copy();
1691             if (!annotsObj.isNull()) {
1692                 markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum, alreadyMarkedDicts);
1693             }
1694         }
1695     }
1696 
1697     if (deleteSet) {
1698         delete alreadyMarkedDicts;
1699     }
1700 }
1701 
markObject(Object * obj,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum,std::set<Dict * > * alreadyMarkedDicts)1702 void PDFDoc::markObject(Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
1703 {
1704     Array *array;
1705 
1706     switch (obj->getType()) {
1707     case objArray:
1708         array = obj->getArray();
1709         for (int i = 0; i < array->getLength(); i++) {
1710             Object obj1 = array->getNF(i).copy();
1711             markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1712         }
1713         break;
1714     case objDict:
1715         markDictionnary(obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1716         break;
1717     case objStream: {
1718         Stream *stream = obj->getStream();
1719         markDictionnary(stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1720     } break;
1721     case objRef: {
1722         if (obj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1723             if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1724                 return; // already marked as free => should be replaced
1725             }
1726             xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, true);
1727             if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1728                 xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1729             }
1730         }
1731         if (obj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1732             countRef->add(obj->getRef().num + numOffset, 1, 0, true);
1733         } else {
1734             XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1735             entry->gen++;
1736             if (entry->gen > 9)
1737                 break;
1738         }
1739         Object obj1 = getXRef()->fetch(obj->getRef());
1740         markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1741     } break;
1742     default:
1743         break;
1744     }
1745 }
1746 
replacePageDict(int pageNo,int rotate,const PDFRectangle * mediaBox,const PDFRectangle * cropBox)1747 void PDFDoc::replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox)
1748 {
1749     Ref *refPage = getCatalog()->getPageRef(pageNo);
1750     Object page = getXRef()->fetch(*refPage);
1751     Dict *pageDict = page.getDict();
1752     pageDict->remove("MediaBoxssdf");
1753     pageDict->remove("MediaBox");
1754     pageDict->remove("CropBox");
1755     pageDict->remove("ArtBox");
1756     pageDict->remove("BleedBox");
1757     pageDict->remove("TrimBox");
1758     pageDict->remove("Rotate");
1759     Array *mediaBoxArray = new Array(getXRef());
1760     mediaBoxArray->add(Object(mediaBox->x1));
1761     mediaBoxArray->add(Object(mediaBox->y1));
1762     mediaBoxArray->add(Object(mediaBox->x2));
1763     mediaBoxArray->add(Object(mediaBox->y2));
1764     Object mediaBoxObject(mediaBoxArray);
1765     Object trimBoxObject = mediaBoxObject.copy();
1766     pageDict->add("MediaBox", std::move(mediaBoxObject));
1767     if (cropBox != nullptr) {
1768         Array *cropBoxArray = new Array(getXRef());
1769         cropBoxArray->add(Object(cropBox->x1));
1770         cropBoxArray->add(Object(cropBox->y1));
1771         cropBoxArray->add(Object(cropBox->x2));
1772         cropBoxArray->add(Object(cropBox->y2));
1773         Object cropBoxObject(cropBoxArray);
1774         trimBoxObject = cropBoxObject.copy();
1775         pageDict->add("CropBox", std::move(cropBoxObject));
1776     }
1777     pageDict->add("TrimBox", std::move(trimBoxObject));
1778     pageDict->add("Rotate", Object(rotate));
1779     getXRef()->setModifiedObject(&page, *refPage);
1780 }
1781 
markPageObjects(Dict * pageDict,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum,std::set<Dict * > * alreadyMarkedDicts)1782 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts)
1783 {
1784     pageDict->remove("OpenAction");
1785     pageDict->remove("Outlines");
1786     pageDict->remove("StructTreeRoot");
1787 
1788     for (int n = 0; n < pageDict->getLength(); n++) {
1789         const char *key = pageDict->getKey(n);
1790         Object value = pageDict->getValNF(n).copy();
1791         if (strcmp(key, "Parent") != 0 && strcmp(key, "Pages") != 0 && strcmp(key, "AcroForm") != 0 && strcmp(key, "Annots") != 0 && strcmp(key, "P") != 0 && strcmp(key, "Root") != 0) {
1792             markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
1793         }
1794     }
1795 }
1796 
markAnnotations(Object * annotsObj,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldPageNum,int newPageNum,std::set<Dict * > * alreadyMarkedDicts)1797 bool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set<Dict *> *alreadyMarkedDicts)
1798 {
1799     bool modified = false;
1800     Object annots = annotsObj->fetch(getXRef());
1801     if (annots.isArray()) {
1802         Array *array = annots.getArray();
1803         for (int i = array->getLength() - 1; i >= 0; i--) {
1804             Object obj1 = array->get(i);
1805             if (obj1.isDict()) {
1806                 Dict *dict = obj1.getDict();
1807                 Object type = dict->lookup("Type");
1808                 if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1809                     const Object &obj2 = dict->lookupNF("P");
1810                     if (obj2.isRef()) {
1811                         if (obj2.getRef().num == oldPageNum) {
1812                             const Object &obj3 = array->getNF(i);
1813                             if (obj3.isRef()) {
1814                                 Ref r;
1815                                 r.num = newPageNum;
1816                                 r.gen = 0;
1817                                 dict->set("P", Object(r));
1818                                 getXRef()->setModifiedObject(&obj1, obj3.getRef());
1819                             }
1820                         } else if (obj2.getRef().num == newPageNum) {
1821                             continue;
1822                         } else {
1823                             Object page = getXRef()->fetch(obj2.getRef());
1824                             if (page.isDict()) {
1825                                 Dict *pageDict = page.getDict();
1826                                 Object pagetype = pageDict->lookup("Type");
1827                                 if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1828                                     continue;
1829                                 }
1830                             }
1831                             array->remove(i);
1832                             modified = true;
1833                             continue;
1834                         }
1835                     }
1836                 }
1837                 markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum, alreadyMarkedDicts);
1838             }
1839             obj1 = array->getNF(i).copy();
1840             if (obj1.isRef()) {
1841                 if (obj1.getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1842                     if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1843                         continue; // already marked as free => should be replaced
1844                     }
1845                     xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, true);
1846                     if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1847                         xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1848                     }
1849                 }
1850                 if (obj1.getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1851                     countRef->add(obj1.getRef().num + numOffset, 1, 0, true);
1852                 } else {
1853                     XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1854                     entry->gen++;
1855                 }
1856             }
1857         }
1858     }
1859     if (annotsObj->isRef()) {
1860         if (annotsObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1861             if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1862                 return modified; // already marked as free => should be replaced
1863             }
1864             xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, true);
1865             if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1866                 xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1867             }
1868         }
1869         if (annotsObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1870             countRef->add(annotsObj->getRef().num + numOffset, 1, 0, true);
1871         } else {
1872             XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1873             entry->gen++;
1874         }
1875         getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1876     }
1877     return modified;
1878 }
1879 
markAcroForm(Object * afObj,XRef * xRef,XRef * countRef,unsigned int numOffset,int oldRefNum,int newRefNum)1880 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum)
1881 {
1882     bool modified = false;
1883     Object acroform = afObj->fetch(getXRef());
1884     if (acroform.isDict()) {
1885         Dict *dict = acroform.getDict();
1886         for (int i = 0; i < dict->getLength(); i++) {
1887             if (strcmp(dict->getKey(i), "Fields") == 0) {
1888                 Object fields = dict->getValNF(i).copy();
1889                 modified = markAnnotations(&fields, xRef, countRef, numOffset, oldRefNum, newRefNum);
1890             } else {
1891                 Object obj = dict->getValNF(i).copy();
1892                 markObject(&obj, xRef, countRef, numOffset, oldRefNum, newRefNum);
1893             }
1894         }
1895     }
1896     if (afObj->isRef()) {
1897         if (afObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1898             if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1899                 return; // already marked as free => should be replaced
1900             }
1901             xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, true);
1902             if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1903                 xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1904             }
1905         }
1906         if (afObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1907             countRef->add(afObj->getRef().num + numOffset, 1, 0, true);
1908         } else {
1909             XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1910             entry->gen++;
1911         }
1912         if (modified) {
1913             getXRef()->setModifiedObject(&acroform, afObj->getRef());
1914         }
1915     }
1916     return;
1917 }
1918 
writePageObjects(OutStream * outStr,XRef * xRef,unsigned int numOffset,bool combine)1919 unsigned int PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine)
1920 {
1921     unsigned int objectsCount = 0; // count the number of objects in the XRef(s)
1922     unsigned char *fileKey;
1923     CryptAlgorithm encAlgorithm;
1924     int keyLength;
1925     xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1926 
1927     for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1928         if (xRef->getEntry(n)->type != xrefEntryFree) {
1929             Ref ref;
1930             ref.num = n;
1931             ref.gen = xRef->getEntry(n)->gen;
1932             objectsCount++;
1933             Object obj = getXRef()->fetch(ref.num - numOffset, ref.gen);
1934             Goffset offset = writeObjectHeader(&ref, outStr);
1935             if (combine) {
1936                 writeObject(&obj, outStr, getXRef(), numOffset, nullptr, cryptRC4, 0, 0, 0);
1937             } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1938                 writeObject(&obj, outStr, nullptr, cryptRC4, 0, 0, 0);
1939             } else {
1940                 writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref);
1941             }
1942             writeObjectFooter(outStr);
1943             xRef->add(ref, offset, true);
1944         }
1945     }
1946     return objectsCount;
1947 }
1948 
getOutline()1949 Outline *PDFDoc::getOutline()
1950 {
1951     if (!outline) {
1952         pdfdocLocker();
1953         // read outline
1954         outline = new Outline(catalog->getOutline(), xref, this);
1955     }
1956 
1957     return outline;
1958 }
1959 
ErrorPDFDoc(int errorCode,const GooString * fileNameA)1960 std::unique_ptr<PDFDoc> PDFDoc::ErrorPDFDoc(int errorCode, const GooString *fileNameA)
1961 {
1962     // We cannot call std::make_unique here because the PDFDoc constructor is private
1963     PDFDoc *doc = new PDFDoc();
1964     doc->errCode = errorCode;
1965     doc->fileName = fileNameA;
1966 
1967     return std::unique_ptr<PDFDoc>(doc);
1968 }
1969 
strToLongLong(const char * s)1970 long long PDFDoc::strToLongLong(const char *s)
1971 {
1972     long long x, d;
1973     const char *p;
1974 
1975     x = 0;
1976     for (p = s; *p && isdigit(*p & 0xff); ++p) {
1977         d = *p - '0';
1978         if (x > (LLONG_MAX - d) / 10) {
1979             break;
1980         }
1981         x = 10 * x + d;
1982     }
1983     return x;
1984 }
1985 
1986 // Read the 'startxref' position.
getStartXRef(bool tryingToReconstruct)1987 Goffset PDFDoc::getStartXRef(bool tryingToReconstruct)
1988 {
1989     if (startXRefPos == -1) {
1990 
1991         if (isLinearized(tryingToReconstruct)) {
1992             char buf[linearizationSearchSize + 1];
1993             int c, n, i;
1994 
1995             str->setPos(0);
1996             for (n = 0; n < linearizationSearchSize; ++n) {
1997                 if ((c = str->getChar()) == EOF) {
1998                     break;
1999                 }
2000                 buf[n] = c;
2001             }
2002             buf[n] = '\0';
2003 
2004             // find end of first obj (linearization dictionary)
2005             startXRefPos = 0;
2006             for (i = 0; i < n; i++) {
2007                 if (!strncmp("endobj", &buf[i], 6)) {
2008                     i += 6;
2009                     // skip whitespace
2010                     while (buf[i] && Lexer::isSpace(buf[i]))
2011                         ++i;
2012                     startXRefPos = i;
2013                     break;
2014                 }
2015             }
2016         } else {
2017             char buf[xrefSearchSize + 1];
2018             const char *p;
2019             int c, n, i;
2020 
2021             // read last xrefSearchSize bytes
2022             int segnum = 0;
2023             int maxXRefSearch = 24576;
2024             if (str->getLength() < maxXRefSearch)
2025                 maxXRefSearch = str->getLength();
2026             for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
2027                 str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
2028                 for (n = 0; n < xrefSearchSize; ++n) {
2029                     if ((c = str->getChar()) == EOF) {
2030                         break;
2031                     }
2032                     buf[n] = c;
2033                 }
2034                 buf[n] = '\0';
2035 
2036                 // find startxref
2037                 for (i = n - 9; i >= 0; --i) {
2038                     if (!strncmp(&buf[i], "startxref", 9)) {
2039                         break;
2040                     }
2041                 }
2042                 if (i < 0) {
2043                     startXRefPos = 0;
2044                 } else {
2045                     for (p = &buf[i + 9]; isspace(*p); ++p)
2046                         ;
2047                     startXRefPos = strToLongLong(p);
2048                     break;
2049                 }
2050             }
2051         }
2052     }
2053 
2054     return startXRefPos;
2055 }
2056 
getMainXRefEntriesOffset(bool tryingToReconstruct)2057 Goffset PDFDoc::getMainXRefEntriesOffset(bool tryingToReconstruct)
2058 {
2059     unsigned int mainXRefEntriesOffset = 0;
2060 
2061     if (isLinearized(tryingToReconstruct)) {
2062         mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
2063     }
2064 
2065     return mainXRefEntriesOffset;
2066 }
2067 
getNumPages()2068 int PDFDoc::getNumPages()
2069 {
2070     if (isLinearized()) {
2071         int n;
2072         if ((n = getLinearization()->getNumPages())) {
2073             return n;
2074         }
2075     }
2076 
2077     return catalog->getNumPages();
2078 }
2079 
parsePage(int page)2080 Page *PDFDoc::parsePage(int page)
2081 {
2082     Ref pageRef;
2083 
2084     pageRef.num = getHints()->getPageObjectNum(page);
2085     if (!pageRef.num) {
2086         error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
2087         return nullptr;
2088     }
2089 
2090     // check for bogus ref - this can happen in corrupted PDF files
2091     if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
2092         error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
2093         return nullptr;
2094     }
2095 
2096     pageRef.gen = xref->getEntry(pageRef.num)->gen;
2097     Object obj = xref->fetch(pageRef);
2098     if (!obj.isDict("Page")) {
2099         error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
2100         return nullptr;
2101     }
2102     Dict *pageDict = obj.getDict();
2103 
2104     return new Page(this, page, std::move(obj), pageRef, new PageAttrs(nullptr, pageDict), catalog->getForm());
2105 }
2106 
getPage(int page)2107 Page *PDFDoc::getPage(int page)
2108 {
2109     if ((page < 1) || page > getNumPages())
2110         return nullptr;
2111 
2112     if (isLinearized() && checkLinearization()) {
2113         pdfdocLocker();
2114         if (!pageCache) {
2115             pageCache = (Page **)gmallocn(getNumPages(), sizeof(Page *));
2116             for (int i = 0; i < getNumPages(); i++) {
2117                 pageCache[i] = nullptr;
2118             }
2119         }
2120         if (!pageCache[page - 1]) {
2121             pageCache[page - 1] = parsePage(page);
2122         }
2123         if (pageCache[page - 1]) {
2124             return pageCache[page - 1];
2125         } else {
2126             error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
2127         }
2128     }
2129 
2130     return catalog->getPage(page);
2131 }
2132 
hasJavascript()2133 bool PDFDoc::hasJavascript()
2134 {
2135     JSInfo jsInfo(this);
2136     jsInfo.scanJS(getNumPages(), true);
2137     return jsInfo.containsJS();
2138 }
2139 
sign(const char * saveFilename,const char * certNickname,const char * password,GooString * partialFieldName,int page,const PDFRectangle & rect,const GooString & signatureText,const GooString & signatureTextLeft,double fontSize,std::unique_ptr<AnnotColor> && fontColor,double borderWidth,std::unique_ptr<AnnotColor> && borderColor,std::unique_ptr<AnnotColor> && backgroundColor,const GooString * reason,const GooString * location,const std::string & imagePath)2140 bool PDFDoc::sign(const char *saveFilename, const char *certNickname, const char *password, GooString *partialFieldName, int page, const PDFRectangle &rect, const GooString &signatureText, const GooString &signatureTextLeft,
2141                   double fontSize, std::unique_ptr<AnnotColor> &&fontColor, double borderWidth, std::unique_ptr<AnnotColor> &&borderColor, std::unique_ptr<AnnotColor> &&backgroundColor, const GooString *reason, const GooString *location,
2142                   const std::string &imagePath)
2143 {
2144     ::Page *destPage = getPage(page);
2145     if (destPage == nullptr) {
2146         return false;
2147     }
2148     Ref imageResourceRef = Ref::INVALID();
2149     if (!imagePath.empty()) {
2150         imageResourceRef = ImageEmbeddingUtils::embed(xref, imagePath);
2151         if (imageResourceRef == Ref::INVALID()) {
2152             return false;
2153         }
2154     }
2155 
2156     const DefaultAppearance da { { objName, "SigFont" }, fontSize, std::move(fontColor) };
2157 
2158     Object annotObj = Object(new Dict(getXRef()));
2159     annotObj.dictSet("Type", Object(objName, "Annot"));
2160     annotObj.dictSet("Subtype", Object(objName, "Widget"));
2161     annotObj.dictSet("FT", Object(objName, "Sig"));
2162     annotObj.dictSet("T", Object(partialFieldName));
2163     Array *rectArray = new Array(getXRef());
2164     rectArray->add(Object(rect.x1));
2165     rectArray->add(Object(rect.y1));
2166     rectArray->add(Object(rect.x2));
2167     rectArray->add(Object(rect.y2));
2168     annotObj.dictSet("Rect", Object(rectArray));
2169 
2170     GooString *daStr = da.toAppearanceString();
2171     annotObj.dictSet("DA", Object(daStr));
2172 
2173     const Ref ref = getXRef()->addIndirectObject(annotObj);
2174     catalog->addFormToAcroForm(ref);
2175 
2176     std::unique_ptr<::FormFieldSignature> field = std::make_unique<::FormFieldSignature>(this, Object(annotObj.getDict()), ref, nullptr, nullptr);
2177     field->setCustomAppearanceContent(signatureText);
2178     field->setCustomAppearanceLeftContent(signatureTextLeft);
2179     field->setImageResource(imageResourceRef);
2180 
2181     Object refObj(ref);
2182     AnnotWidget *signatureAnnot = new AnnotWidget(this, &annotObj, &refObj, field.get());
2183     signatureAnnot->setFlags(signatureAnnot->getFlags() | Annot::flagPrint | Annot::flagLocked | Annot::flagNoRotate);
2184     Dict dummy(getXRef());
2185     auto appearCharacs = std::make_unique<AnnotAppearanceCharacs>(&dummy);
2186     appearCharacs->setBorderColor(std::move(borderColor));
2187     appearCharacs->setBackColor(std::move(backgroundColor));
2188     signatureAnnot->setAppearCharacs(std::move(appearCharacs));
2189 
2190     signatureAnnot->generateFieldAppearance();
2191     signatureAnnot->updateAppearanceStream();
2192 
2193     FormWidget *formWidget = field->getWidget(field->getNumWidgets() - 1);
2194     formWidget->setWidgetAnnotation(signatureAnnot);
2195 
2196     destPage->addAnnot(signatureAnnot);
2197 
2198     std::unique_ptr<AnnotBorder> border(new AnnotBorderArray());
2199     border->setWidth(borderWidth);
2200     signatureAnnot->setBorder(std::move(border));
2201 
2202     FormWidgetSignature *fws = dynamic_cast<FormWidgetSignature *>(formWidget);
2203     if (fws) {
2204         const bool res = fws->signDocument(saveFilename, certNickname, "SHA256", password, reason, location);
2205 
2206         // Now remove the signature stuff in case the user wants to continue editing stuff
2207         // So the document object is clean
2208         const Object &vRefObj = annotObj.dictLookupNF("V");
2209         if (vRefObj.isRef()) {
2210             getXRef()->removeIndirectObject(vRefObj.getRef());
2211         }
2212         destPage->removeAnnot(signatureAnnot);
2213         catalog->removeFormFromAcroForm(ref);
2214         getXRef()->removeIndirectObject(ref);
2215 
2216         return res;
2217     }
2218 
2219     return false;
2220 }
2221