1 //========================================================================
2 //
3 // PDFDoc.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2015 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
35 // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
36 // Copyright (C) 2015 André Esser <bepandre@hotmail.com>
37 //
38 // To see a description of the changes please see the Changelog file that
39 // came with your tarball or type make ChangeLog if you are building from git
40 //
41 //========================================================================
42
43 #include <config.h>
44
45 #ifdef USE_GCC_PRAGMAS
46 #pragma implementation
47 #endif
48
49 #include <ctype.h>
50 #include <locale.h>
51 #include <stdio.h>
52 #include <errno.h>
53 #include <stdlib.h>
54 #include <stddef.h>
55 #include <string.h>
56 #include <time.h>
57 #include <sys/stat.h>
58 #include "goo/gstrtod.h"
59 #include "goo/GooString.h"
60 #include "goo/gfile.h"
61 #include "poppler-config.h"
62 #include "GlobalParams.h"
63 #include "Page.h"
64 #include "Catalog.h"
65 #include "Stream.h"
66 #include "XRef.h"
67 #include "Linearization.h"
68 #include "Link.h"
69 #include "OutputDev.h"
70 #include "Error.h"
71 #include "ErrorCodes.h"
72 #include "Lexer.h"
73 #include "Parser.h"
74 #include "SecurityHandler.h"
75 #include "Decrypt.h"
76 #ifndef DISABLE_OUTLINE
77 #include "Outline.h"
78 #endif
79 #include "PDFDoc.h"
80 #include "Hints.h"
81
82 #if MULTITHREADED
83 # define pdfdocLocker() MutexLocker locker(&mutex)
84 #else
85 # define pdfdocLocker()
86 #endif
87
88 //------------------------------------------------------------------------
89
90 #define headerSearchSize 1024 // read this many bytes at beginning of
91 // file to look for '%PDF'
92 #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length
93
94 #define linearizationSearchSize 1024 // read this many bytes at beginning of
95 // file to look for linearization
96 // dictionary
97
98 #define xrefSearchSize 1024 // read this many bytes at end of file
99 // to look for 'startxref'
100
101 //------------------------------------------------------------------------
102 // PDFDoc
103 //------------------------------------------------------------------------
104
init()105 void PDFDoc::init()
106 {
107 #if MULTITHREADED
108 gInitMutex(&mutex);
109 #endif
110 ok = gFalse;
111 errCode = errNone;
112 fileName = NULL;
113 file = NULL;
114 str = NULL;
115 xref = NULL;
116 linearization = NULL;
117 catalog = NULL;
118 hints = NULL;
119 #ifndef DISABLE_OUTLINE
120 outline = NULL;
121 #endif
122 startXRefPos = -1;
123 secHdlr = NULL;
124 pageCache = NULL;
125 }
126
PDFDoc()127 PDFDoc::PDFDoc()
128 {
129 init();
130 }
131
PDFDoc(GooString * fileNameA,GooString * ownerPassword,GooString * userPassword,void * guiDataA)132 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
133 GooString *userPassword, void *guiDataA) {
134 Object obj;
135 #ifdef _WIN32
136 int n, i;
137 #endif
138
139 init();
140
141 fileName = fileNameA;
142 guiData = guiDataA;
143 #ifdef _WIN32
144 n = fileName->getLength();
145 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
146 for (i = 0; i < n; ++i) {
147 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
148 }
149 fileNameU[n] = L'\0';
150 #endif
151
152 // try to open file
153 file = GooFile::open(fileName);
154 if (file == NULL) {
155 // fopen() has failed.
156 // Keep a copy of the errno returned by fopen so that it can be
157 // referred to later.
158 fopenErrno = errno;
159 error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
160 errCode = errOpenFile;
161 return;
162 }
163
164 // create stream
165 obj.initNull();
166 str = new FileStream(file, 0, gFalse, file->size(), &obj);
167
168 ok = setup(ownerPassword, userPassword);
169 }
170
171 #ifdef _WIN32
PDFDoc(wchar_t * fileNameA,int fileNameLen,GooString * ownerPassword,GooString * userPassword,void * guiDataA)172 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
173 GooString *userPassword, void *guiDataA) {
174 OSVERSIONINFO version;
175 Object obj;
176 int i;
177
178 init();
179
180 guiData = guiDataA;
181
182 // save both Unicode and 8-bit copies of the file name
183 fileName = new GooString();
184 fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
185 for (i = 0; i < fileNameLen; ++i) {
186 fileName->append((char)fileNameA[i]);
187 fileNameU[i] = fileNameA[i];
188 }
189 fileNameU[fileNameLen] = L'\0';
190
191 // try to open file
192 // NB: _wfopen is only available in NT
193 version.dwOSVersionInfoSize = sizeof(version);
194 GetVersionEx(&version);
195 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
196 file = GooFile::open(fileNameU);
197 } else {
198 file = GooFile::open(fileName);
199 }
200 if (!file) {
201 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
202 errCode = errOpenFile;
203 return;
204 }
205
206 // create stream
207 obj.initNull();
208 str = new FileStream(file, 0, gFalse, file->size(), &obj);
209
210 ok = setup(ownerPassword, userPassword);
211 }
212 #endif
213
PDFDoc(BaseStream * strA,GooString * ownerPassword,GooString * userPassword,void * guiDataA)214 PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
215 GooString *userPassword, void *guiDataA) {
216 #ifdef _WIN32
217 int n, i;
218 #endif
219
220 init();
221 guiData = guiDataA;
222 if (strA->getFileName()) {
223 fileName = strA->getFileName()->copy();
224 #ifdef _WIN32
225 n = fileName->getLength();
226 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
227 for (i = 0; i < n; ++i) {
228 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
229 }
230 fileNameU[n] = L'\0';
231 #endif
232 } else {
233 fileName = NULL;
234 #ifdef _WIN32
235 fileNameU = NULL;
236 #endif
237 }
238 str = strA;
239 ok = setup(ownerPassword, userPassword);
240 }
241
setup(GooString * ownerPassword,GooString * userPassword)242 GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
243 pdfdocLocker();
244 str->setPos(0, -1);
245 if (str->getPos() < 0)
246 {
247 error(errSyntaxError, -1, "Document base stream is not seekable");
248 return gFalse;
249 }
250
251 str->reset();
252
253 // check footer
254 // Adobe does not seem to enforce %%EOF, so we do the same
255 // if (!checkFooter()) return gFalse;
256
257 // check header
258 checkHeader();
259
260 GBool wasReconstructed = false;
261
262 // read xref table
263 xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
264 if (!xref->isOk()) {
265 if (wasReconstructed) {
266 delete xref;
267 startXRefPos = -1;
268 xref = new XRef(str, getStartXRef(gTrue), getMainXRefEntriesOffset(gTrue), &wasReconstructed);
269 }
270 if (!xref->isOk()) {
271 error(errSyntaxError, -1, "Couldn't read xref table");
272 errCode = xref->getErrorCode();
273 return gFalse;
274 }
275 }
276
277 // check for encryption
278 if (!checkEncryption(ownerPassword, userPassword)) {
279 errCode = errEncrypted;
280 return gFalse;
281 }
282
283 // read catalog
284 catalog = new Catalog(this);
285 if (catalog && !catalog->isOk()) {
286 if (!wasReconstructed)
287 {
288 // try one more time to contruct the Catalog, maybe the problem is damaged XRef
289 delete catalog;
290 delete xref;
291 xref = new XRef(str, 0, 0, NULL, true);
292 catalog = new Catalog(this);
293 }
294
295 if (catalog && !catalog->isOk()) {
296 error(errSyntaxError, -1, "Couldn't read page catalog");
297 errCode = errBadCatalog;
298 return gFalse;
299 }
300 }
301
302 // done
303 return gTrue;
304 }
305
~PDFDoc()306 PDFDoc::~PDFDoc() {
307 if (pageCache) {
308 for (int i = 0; i < getNumPages(); i++) {
309 if (pageCache[i]) {
310 delete pageCache[i];
311 }
312 }
313 gfree(pageCache);
314 }
315 delete secHdlr;
316 #ifndef DISABLE_OUTLINE
317 if (outline) {
318 delete outline;
319 }
320 #endif
321 if (catalog) {
322 delete catalog;
323 }
324 if (xref) {
325 delete xref;
326 }
327 if (hints) {
328 delete hints;
329 }
330 if (linearization) {
331 delete linearization;
332 }
333 if (str) {
334 delete str;
335 }
336 if (file) {
337 delete file;
338 }
339 if (fileName) {
340 delete fileName;
341 }
342 #ifdef _WIN32
343 if (fileNameU) {
344 gfree(fileNameU);
345 }
346 #endif
347 #if MULTITHREADED
348 gDestroyMutex(&mutex);
349 #endif
350 }
351
352
353 // Check for a %%EOF at the end of this stream
checkFooter()354 GBool PDFDoc::checkFooter() {
355 // we look in the last 1024 chars because Adobe does the same
356 char *eof = new char[1025];
357 Goffset pos = str->getPos();
358 str->setPos(1024, -1);
359 int i, ch;
360 for (i = 0; i < 1024; i++)
361 {
362 ch = str->getChar();
363 if (ch == EOF)
364 break;
365 eof[i] = ch;
366 }
367 eof[i] = '\0';
368
369 bool found = false;
370 for (i = i - 5; i >= 0; i--) {
371 if (strncmp (&eof[i], "%%EOF", 5) == 0) {
372 found = true;
373 break;
374 }
375 }
376 if (!found)
377 {
378 error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
379 errCode = errDamaged;
380 delete[] eof;
381 return gFalse;
382 }
383 delete[] eof;
384 str->setPos(pos);
385 return gTrue;
386 }
387
388 // Check for a PDF header on this stream. Skip past some garbage
389 // if necessary.
checkHeader()390 void PDFDoc::checkHeader() {
391 char hdrBuf[headerSearchSize+1];
392 char *p;
393 char *tokptr;
394 int i;
395
396 pdfMajorVersion = 0;
397 pdfMinorVersion = 0;
398 for (i = 0; i < headerSearchSize; ++i) {
399 hdrBuf[i] = str->getChar();
400 }
401 hdrBuf[headerSearchSize] = '\0';
402 for (i = 0; i < headerSearchSize - 5; ++i) {
403 if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
404 break;
405 }
406 }
407 if (i >= headerSearchSize - 5) {
408 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
409 return;
410 }
411 str->moveStart(i);
412 if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
413 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
414 return;
415 }
416 sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
417 // We don't do the version check. Don't add it back in.
418 }
419
checkEncryption(GooString * ownerPassword,GooString * userPassword)420 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
421 Object encrypt;
422 GBool encrypted;
423 GBool ret;
424
425 xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
426 if ((encrypted = encrypt.isDict())) {
427 if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
428 if (secHdlr->isUnencrypted()) {
429 // no encryption
430 ret = gTrue;
431 } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
432 // authorization succeeded
433 xref->setEncryption(secHdlr->getPermissionFlags(),
434 secHdlr->getOwnerPasswordOk(),
435 secHdlr->getFileKey(),
436 secHdlr->getFileKeyLength(),
437 secHdlr->getEncVersion(),
438 secHdlr->getEncRevision(),
439 secHdlr->getEncAlgorithm());
440 ret = gTrue;
441 } else {
442 // authorization failed
443 ret = gFalse;
444 }
445 } else {
446 // couldn't find the matching security handler
447 ret = gFalse;
448 }
449 } else {
450 // document is not encrypted
451 ret = gTrue;
452 }
453 encrypt.free();
454 return ret;
455 }
456
getSignatureWidgets()457 std::vector<FormWidgetSignature*> PDFDoc::getSignatureWidgets()
458 {
459 int num_pages = getNumPages();
460 FormPageWidgets *page_widgets = NULL;
461 std::vector<FormWidgetSignature*> widget_vector;
462
463 for (int i = 1; i <= num_pages; i++) {
464 Page *p = getCatalog()->getPage(i);
465 if (p) {
466 page_widgets = p->getFormWidgets();
467 for (int j = 0; page_widgets != NULL && j < page_widgets->getNumWidgets(); j++) {
468 if (page_widgets->getWidget(j)->getType() == formSignature) {
469 widget_vector.push_back(static_cast<FormWidgetSignature*>(page_widgets->getWidget(j)));
470 }
471 }
472 delete page_widgets;
473 }
474 }
475 return widget_vector;
476 }
477
displayPage(OutputDev * out,int page,double hDPI,double vDPI,int rotate,GBool useMediaBox,GBool crop,GBool printing,GBool (* abortCheckCbk)(void * data),void * abortCheckCbkData,GBool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,GBool copyXRef)478 void PDFDoc::displayPage(OutputDev *out, int page,
479 double hDPI, double vDPI, int rotate,
480 GBool useMediaBox, GBool crop, GBool printing,
481 GBool (*abortCheckCbk)(void *data),
482 void *abortCheckCbkData,
483 GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
484 void *annotDisplayDecideCbkData, GBool copyXRef) {
485 if (globalParams->getPrintCommands()) {
486 printf("***** page %d *****\n", page);
487 }
488
489 if (getPage(page))
490 getPage(page)->display(out, hDPI, vDPI,
491 rotate, useMediaBox, crop, printing,
492 abortCheckCbk, abortCheckCbkData,
493 annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
494
495 }
496
displayPages(OutputDev * out,int firstPage,int lastPage,double hDPI,double vDPI,int rotate,GBool useMediaBox,GBool crop,GBool printing,GBool (* abortCheckCbk)(void * data),void * abortCheckCbkData,GBool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData)497 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
498 double hDPI, double vDPI, int rotate,
499 GBool useMediaBox, GBool crop, GBool printing,
500 GBool (*abortCheckCbk)(void *data),
501 void *abortCheckCbkData,
502 GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
503 void *annotDisplayDecideCbkData) {
504 int page;
505
506 for (page = firstPage; page <= lastPage; ++page) {
507 displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
508 abortCheckCbk, abortCheckCbkData,
509 annotDisplayDecideCbk, annotDisplayDecideCbkData);
510 }
511 }
512
displayPageSlice(OutputDev * out,int page,double hDPI,double vDPI,int rotate,GBool useMediaBox,GBool crop,GBool printing,int sliceX,int sliceY,int sliceW,int sliceH,GBool (* abortCheckCbk)(void * data),void * abortCheckCbkData,GBool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,GBool copyXRef)513 void PDFDoc::displayPageSlice(OutputDev *out, int page,
514 double hDPI, double vDPI, int rotate,
515 GBool useMediaBox, GBool crop, GBool printing,
516 int sliceX, int sliceY, int sliceW, int sliceH,
517 GBool (*abortCheckCbk)(void *data),
518 void *abortCheckCbkData,
519 GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
520 void *annotDisplayDecideCbkData, GBool copyXRef) {
521 if (getPage(page))
522 getPage(page)->displaySlice(out, hDPI, vDPI,
523 rotate, useMediaBox, crop,
524 sliceX, sliceY, sliceW, sliceH,
525 printing,
526 abortCheckCbk, abortCheckCbkData,
527 annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
528 }
529
getLinks(int page)530 Links *PDFDoc::getLinks(int page) {
531 Page *p = getPage(page);
532 if (!p) {
533 return new Links (NULL);
534 }
535 return p->getLinks();
536 }
537
processLinks(OutputDev * out,int page)538 void PDFDoc::processLinks(OutputDev *out, int page) {
539 if (getPage(page))
540 getPage(page)->processLinks(out);
541 }
542
getLinearization()543 Linearization *PDFDoc::getLinearization()
544 {
545 if (!linearization) {
546 linearization = new Linearization(str);
547 linearizationState = 0;
548 }
549 return linearization;
550 }
551
checkLinearization()552 GBool PDFDoc::checkLinearization() {
553 if (linearization == NULL)
554 return gFalse;
555 if (linearizationState == 1)
556 return gTrue;
557 if (linearizationState == 2)
558 return gFalse;
559 if (!hints) {
560 hints = new Hints(str, linearization, getXRef(), secHdlr);
561 }
562 for (int page = 1; page <= linearization->getNumPages(); page++) {
563 Object obj;
564 Ref pageRef;
565
566 pageRef.num = hints->getPageObjectNum(page);
567 if (!pageRef.num) {
568 linearizationState = 2;
569 return gFalse;
570 }
571
572 // check for bogus ref - this can happen in corrupted PDF files
573 if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
574 linearizationState = 2;
575 return gFalse;
576 }
577
578 pageRef.gen = xref->getEntry(pageRef.num)->gen;
579 xref->fetch(pageRef.num, pageRef.gen, &obj);
580 if (!obj.isDict("Page")) {
581 obj.free();
582 linearizationState = 2;
583 return gFalse;
584 }
585 obj.free();
586 }
587 linearizationState = 1;
588 return gTrue;
589 }
590
isLinearized(GBool tryingToReconstruct)591 GBool PDFDoc::isLinearized(GBool tryingToReconstruct) {
592 if ((str->getLength()) &&
593 (getLinearization()->getLength() == str->getLength()))
594 return gTrue;
595 else {
596 if (tryingToReconstruct)
597 return getLinearization()->getLength() > 0;
598 else
599 return gFalse;
600 }
601 }
602
603 static GBool
get_id(GooString * encodedidstring,GooString * id)604 get_id (GooString *encodedidstring, GooString *id) {
605 const char *encodedid = encodedidstring->getCString();
606 char pdfid[pdfIdLength + 1];
607 int n;
608
609 if (encodedidstring->getLength() != pdfIdLength / 2)
610 return gFalse;
611
612 n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
613 encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
614 encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
615 encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
616 encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
617 if (n != pdfIdLength)
618 return gFalse;
619
620 id->Set(pdfid, pdfIdLength);
621 return gTrue;
622 }
623
getID(GooString * permanent_id,GooString * update_id)624 GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
625 Object obj;
626 xref->getTrailerDict()->dictLookup ("ID", &obj);
627
628 if (obj.isArray() && obj.arrayGetLength() == 2) {
629 Object obj2;
630
631 if (permanent_id) {
632 if (obj.arrayGet(0, &obj2)->isString()) {
633 if (!get_id (obj2.getString(), permanent_id)) {
634 obj2.free();
635 return gFalse;
636 }
637 } else {
638 error(errSyntaxError, -1, "Invalid permanent ID");
639 obj2.free();
640 return gFalse;
641 }
642 obj2.free();
643 }
644
645 if (update_id) {
646 if (obj.arrayGet(1, &obj2)->isString()) {
647 if (!get_id (obj2.getString(), update_id)) {
648 obj2.free();
649 return gFalse;
650 }
651 } else {
652 error(errSyntaxError, -1, "Invalid update ID");
653 obj2.free();
654 return gFalse;
655 }
656 obj2.free();
657 }
658
659 obj.free();
660
661 return gTrue;
662 }
663 obj.free();
664
665 return gFalse;
666 }
667
getHints()668 Hints *PDFDoc::getHints()
669 {
670 if (!hints && isLinearized()) {
671 hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
672 }
673
674 return hints;
675 }
676
savePageAs(GooString * name,int pageNo)677 int PDFDoc::savePageAs(GooString *name, int pageNo)
678 {
679 FILE *f;
680 OutStream *outStr;
681 XRef *yRef, *countRef;
682 int rootNum = getXRef()->getNumObjects() + 1;
683
684 // Make sure that special flags are set, because we are going to read
685 // all objects, including Unencrypted ones.
686 xref->scanSpecialFlags();
687
688 Guchar *fileKey;
689 CryptAlgorithm encAlgorithm;
690 int keyLength;
691 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
692
693 if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
694 error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages() );
695 return errOpenFile;
696 }
697 PDFRectangle *cropBox = NULL;
698 if (getCatalog()->getPage(pageNo)->isCropped()) {
699 cropBox = getCatalog()->getPage(pageNo)->getCropBox();
700 }
701 replacePageDict(pageNo,
702 getCatalog()->getPage(pageNo)->getRotate(),
703 getCatalog()->getPage(pageNo)->getMediaBox(),
704 cropBox);
705 Ref *refPage = getCatalog()->getPageRef(pageNo);
706 Object page;
707 getXRef()->fetch(refPage->num, refPage->gen, &page);
708
709 if (!(f = fopen(name->getCString(), "wb"))) {
710 error(errIO, -1, "Couldn't open file '{0:t}'", name);
711 return errOpenFile;
712 }
713 outStr = new FileOutStream(f,0);
714
715 yRef = new XRef(getXRef()->getTrailerDict());
716
717 if (secHdlr != NULL && !secHdlr->isUnencrypted()) {
718 yRef->setEncryption(secHdlr->getPermissionFlags(),
719 secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
720 }
721 countRef = new XRef();
722 Object *trailerObj = getXRef()->getTrailerDict();
723 if (trailerObj->isDict()) {
724 markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
725 }
726 yRef->add(0, 65535, 0, gFalse);
727 writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
728
729 // get and mark info dict
730 Object infoObj;
731 getXRef()->getDocInfo(&infoObj);
732 if (infoObj.isDict()) {
733 Dict *infoDict = infoObj.getDict();
734 markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
735 if (trailerObj->isDict()) {
736 Dict *trailerDict = trailerObj->getDict();
737 Object ref;
738 trailerDict->lookupNF("Info", &ref);
739 if (ref.isRef()) {
740 yRef->add(ref.getRef().num, ref.getRef().gen, 0, gTrue);
741 if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
742 yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
743 }
744 }
745 ref.free();
746 }
747 }
748 infoObj.free();
749
750 // get and mark output intents etc.
751 Object catObj, pagesObj, resourcesObj, annotsObj, afObj;
752 getXRef()->getCatalog(&catObj);
753 Dict *catDict = catObj.getDict();
754 catDict->lookup("Pages", &pagesObj);
755 catDict->lookupNF("AcroForm", &afObj);
756 if (!afObj.isNull()) {
757 markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
758 afObj.free();
759 }
760 Dict *pagesDict = pagesObj.getDict();
761 pagesDict->lookup("Resources", &resourcesObj);
762 if (resourcesObj.isDict())
763 markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
764 markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
765
766 Dict *pageDict = page.getDict();
767 if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
768 Dict *resourceDict = getCatalog()->getPage(pageNo)->getResourceDict();
769 if (resourceDict != NULL) {
770 resourcesObj.initDict(resourceDict);
771 markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
772 }
773 }
774 markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
775 pageDict->lookupNF("Annots", &annotsObj);
776 if (!annotsObj.isNull()) {
777 markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
778 annotsObj.free();
779 }
780 yRef->markUnencrypted();
781 writePageObjects(outStr, yRef, 0);
782
783 yRef->add(rootNum,0,outStr->getPos(),gTrue);
784 outStr->printf("%d 0 obj\n", rootNum);
785 outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
786 for (int j = 0; j < catDict->getLength(); j++) {
787 const char *key = catDict->getKey(j);
788 if (strcmp(key, "Type") != 0 &&
789 strcmp(key, "Catalog") != 0 &&
790 strcmp(key, "Pages") != 0)
791 {
792 if (j > 0) outStr->printf(" ");
793 Object value; catDict->getValNF(j, &value);
794 outStr->printf("/%s ", key);
795 writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
796 value.free();
797 }
798 }
799 catObj.free();
800 pagesObj.free();
801 outStr->printf(">>\nendobj\n");
802
803 yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
804 outStr->printf("%d 0 obj\n", rootNum + 1);
805 outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
806 if (resourcesObj.isDict()) {
807 outStr->printf("/Resources ");
808 writeObject(&resourcesObj, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
809 resourcesObj.free();
810 }
811 outStr->printf(">>\n");
812 outStr->printf("endobj\n");
813
814 yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
815 outStr->printf("%d 0 obj\n", rootNum + 2);
816 outStr->printf("<< ");
817 for (int n = 0; n < pageDict->getLength(); n++) {
818 if (n > 0) outStr->printf(" ");
819 const char *key = pageDict->getKey(n);
820 Object value; pageDict->getValNF(n, &value);
821 if (strcmp(key, "Parent") == 0) {
822 outStr->printf("/Parent %d 0 R", rootNum + 1);
823 } else {
824 outStr->printf("/%s ", key);
825 writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
826 }
827 value.free();
828 }
829 outStr->printf(" >>\nendobj\n");
830 page.free();
831
832 Goffset uxrefOffset = outStr->getPos();
833 Ref ref;
834 ref.num = rootNum;
835 ref.gen = 0;
836 Dict *trailerDict = createTrailerDict(rootNum + 3, gFalse, 0, &ref, getXRef(),
837 name->getCString(), uxrefOffset);
838 writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */,
839 uxrefOffset, outStr, getXRef());
840 delete trailerDict;
841
842 outStr->close();
843 fclose(f);
844 delete yRef;
845 delete countRef;
846 delete outStr;
847
848 return errNone;
849 }
850
saveAs(GooString * name,PDFWriteMode mode)851 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
852 FILE *f;
853 OutStream *outStr;
854 int res;
855
856 if (!(f = fopen(name->getCString(), "wb"))) {
857 error(errIO, -1, "Couldn't open file '{0:t}'", name);
858 return errOpenFile;
859 }
860 outStr = new FileOutStream(f,0);
861 res = saveAs(outStr, mode);
862 delete outStr;
863 fclose(f);
864 return res;
865 }
866
saveAs(OutStream * outStr,PDFWriteMode mode)867 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
868
869 // find if we have updated objects
870 GBool updated = gFalse;
871 for(int i=0; i<xref->getNumObjects(); i++) {
872 if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) {
873 updated = gTrue;
874 break;
875 }
876 }
877
878 if (!updated && mode == writeStandard) {
879 // simply copy the original file
880 saveWithoutChangesAs (outStr);
881 } else if (mode == writeForceRewrite) {
882 saveCompleteRewrite(outStr);
883 } else {
884 saveIncrementalUpdate(outStr);
885 }
886
887 return errNone;
888 }
889
saveWithoutChangesAs(GooString * name)890 int PDFDoc::saveWithoutChangesAs(GooString *name) {
891 FILE *f;
892 OutStream *outStr;
893 int res;
894
895 if (!(f = fopen(name->getCString(), "wb"))) {
896 error(errIO, -1, "Couldn't open file '{0:t}'", name);
897 return errOpenFile;
898 }
899
900 outStr = new FileOutStream(f,0);
901 res = saveWithoutChangesAs(outStr);
902 delete outStr;
903
904 fclose(f);
905
906 return res;
907 }
908
saveWithoutChangesAs(OutStream * outStr)909 int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
910 int c;
911
912 BaseStream *copyStr = str->copy();
913 copyStr->reset();
914 while ((c = copyStr->getChar()) != EOF) {
915 outStr->put(c);
916 }
917 copyStr->close();
918 delete copyStr;
919
920 return errNone;
921 }
922
saveIncrementalUpdate(OutStream * outStr)923 void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
924 {
925 XRef *uxref;
926 int c;
927 //copy the original file
928 BaseStream *copyStr = str->copy();
929 copyStr->reset();
930 while ((c = copyStr->getChar()) != EOF) {
931 outStr->put(c);
932 }
933 copyStr->close();
934 delete copyStr;
935
936 Guchar *fileKey;
937 CryptAlgorithm encAlgorithm;
938 int keyLength;
939 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
940
941 uxref = new XRef();
942 uxref->add(0, 65535, 0, gFalse);
943 xref->lock();
944 for(int i=0; i<xref->getNumObjects(); i++) {
945 if ((xref->getEntry(i)->type == xrefEntryFree) &&
946 (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
947 continue;
948
949 if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { //we have an updated object
950 Ref ref;
951 ref.num = i;
952 ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
953 if (xref->getEntry(i)->type != xrefEntryFree) {
954 Object obj1;
955 xref->fetch(ref.num, ref.gen, &obj1, 1);
956 Goffset offset = writeObjectHeader(&ref, outStr);
957 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
958 writeObjectFooter(outStr);
959 uxref->add(ref.num, ref.gen, offset, gTrue);
960 obj1.free();
961 } else {
962 uxref->add(ref.num, ref.gen, 0, gFalse);
963 }
964 }
965 }
966 xref->unlock();
967 if (uxref->getNumObjects() == 0) { //we have nothing to update
968 delete uxref;
969 return;
970 }
971
972 Goffset uxrefOffset = outStr->getPos();
973 int numobjects = xref->getNumObjects();
974 const char *fileNameA = fileName ? fileName->getCString() : NULL;
975 Ref rootRef, uxrefStreamRef;
976 rootRef.num = getXRef()->getRootNum();
977 rootRef.gen = getXRef()->getRootGen();
978
979 // Output a xref stream if there is a xref stream already
980 GBool xRefStream = xref->isXRefStream();
981
982 if (xRefStream) {
983 // Append an entry for the xref stream itself
984 uxrefStreamRef.num = numobjects++;
985 uxrefStreamRef.gen = 0;
986 uxref->add(uxrefStreamRef.num, uxrefStreamRef.gen, uxrefOffset, gTrue);
987 }
988
989 Dict *trailerDict = createTrailerDict(numobjects, gTrue, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
990 if (xRefStream) {
991 writeXRefStreamTrailer(trailerDict, uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
992 } else {
993 writeXRefTableTrailer(trailerDict, uxref, gFalse, uxrefOffset, outStr, getXRef());
994 }
995
996 delete trailerDict;
997 delete uxref;
998 }
999
saveCompleteRewrite(OutStream * outStr)1000 void PDFDoc::saveCompleteRewrite (OutStream* outStr)
1001 {
1002 // Make sure that special flags are set, because we are going to read
1003 // all objects, including Unencrypted ones.
1004 xref->scanSpecialFlags();
1005
1006 Guchar *fileKey;
1007 CryptAlgorithm encAlgorithm;
1008 int keyLength;
1009 xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1010
1011 outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
1012 XRef *uxref = new XRef();
1013 uxref->add(0, 65535, 0, gFalse);
1014 xref->lock();
1015 for(int i=0; i<xref->getNumObjects(); i++) {
1016 Object obj1;
1017 Ref ref;
1018 XRefEntryType type = xref->getEntry(i)->type;
1019 if (type == xrefEntryFree) {
1020 ref.num = i;
1021 ref.gen = xref->getEntry(i)->gen;
1022 /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
1023 and we don't want the one with num=0 because it has already been added (gen = 65535)*/
1024 if (ref.gen > 0 && ref.num > 0)
1025 uxref->add(ref.num, ref.gen, 0, gFalse);
1026 } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
1027 // This entry must not be written, put a free entry instead (with incremented gen)
1028 ref.num = i;
1029 ref.gen = xref->getEntry(i)->gen + 1;
1030 uxref->add(ref.num, ref.gen, 0, gFalse);
1031 } else if (type == xrefEntryUncompressed){
1032 ref.num = i;
1033 ref.gen = xref->getEntry(i)->gen;
1034 xref->fetch(ref.num, ref.gen, &obj1, 1);
1035 Goffset offset = writeObjectHeader(&ref, outStr);
1036 // Write unencrypted objects in unencrypted form
1037 if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
1038 writeObject(&obj1, outStr, NULL, cryptRC4, 0, 0, 0);
1039 } else {
1040 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1041 }
1042 writeObjectFooter(outStr);
1043 uxref->add(ref.num, ref.gen, offset, gTrue);
1044 obj1.free();
1045 } else if (type == xrefEntryCompressed) {
1046 ref.num = i;
1047 ref.gen = 0; //compressed entries have gen == 0
1048 xref->fetch(ref.num, ref.gen, &obj1, 1);
1049 Goffset offset = writeObjectHeader(&ref, outStr);
1050 writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1051 writeObjectFooter(outStr);
1052 uxref->add(ref.num, ref.gen, offset, gTrue);
1053 obj1.free();
1054 }
1055 }
1056 xref->unlock();
1057 Goffset uxrefOffset = outStr->getPos();
1058 writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */,
1059 uxref->getNumObjects(), outStr, gFalse /* complete rewrite */);
1060 delete uxref;
1061 }
1062
writeDictionnary(Dict * dict,OutStream * outStr,XRef * xRef,Guint numOffset,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen)1063 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1064 CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1065 {
1066 Object obj1;
1067 outStr->printf("<<");
1068 for (int i=0; i<dict->getLength(); i++) {
1069 GooString keyName(dict->getKey(i));
1070 GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
1071 outStr->printf("/%s ", keyNameToPrint->getCString());
1072 delete keyNameToPrint;
1073 writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1074 obj1.free();
1075 }
1076 outStr->printf(">> ");
1077 }
1078
writeStream(Stream * str,OutStream * outStr)1079 void PDFDoc::writeStream (Stream* str, OutStream* outStr)
1080 {
1081 outStr->printf("stream\r\n");
1082 str->reset();
1083 for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
1084 outStr->printf("%c", c);
1085 }
1086 outStr->printf("\r\nendstream\r\n");
1087 }
1088
writeRawStream(Stream * str,OutStream * outStr)1089 void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
1090 {
1091 Object obj1;
1092 str->getDict()->lookup("Length", &obj1);
1093 if (!obj1.isInt() && !obj1.isInt64()) {
1094 error (errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1095 return;
1096 }
1097
1098 Goffset length;
1099 if (obj1.isInt())
1100 length = obj1.getInt();
1101 else
1102 length = obj1.getInt64();
1103 obj1.free();
1104
1105 outStr->printf("stream\r\n");
1106 str->unfilteredReset();
1107 for (Goffset i = 0; i < length; i++) {
1108 int c = str->getUnfilteredChar();
1109 if (unlikely(c == EOF)) {
1110 error (errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1111 break;
1112 }
1113 outStr->printf("%c", c);
1114 }
1115 str->reset();
1116 outStr->printf("\r\nendstream\r\n");
1117 }
1118
writeString(GooString * s,OutStream * outStr,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen)1119 void PDFDoc::writeString (GooString* s, OutStream* outStr, Guchar *fileKey,
1120 CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1121 {
1122 // Encrypt string if encryption is enabled
1123 GooString *sEnc = NULL;
1124 if (fileKey) {
1125 Object obj;
1126 EncryptStream *enc = new EncryptStream(new MemStream(s->getCString(), 0, s->getLength(), obj.initNull()),
1127 fileKey, encAlgorithm, keyLength, objNum, objGen);
1128 sEnc = new GooString();
1129 int c;
1130 enc->reset();
1131 while ((c = enc->getChar()) != EOF) {
1132 sEnc->append((char)c);
1133 }
1134
1135 delete enc;
1136 s = sEnc;
1137 }
1138
1139 // Write data
1140 if (s->hasUnicodeMarker()) {
1141 //unicode string don't necessary end with \0
1142 const char* c = s->getCString();
1143 outStr->printf("(");
1144 for(int i=0; i<s->getLength(); i++) {
1145 char unescaped = *(c+i)&0x000000ff;
1146 //escape if needed
1147 if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1148 outStr->printf("%c", '\\');
1149 outStr->printf("%c", unescaped);
1150 }
1151 outStr->printf(") ");
1152 } else {
1153 const char* c = s->getCString();
1154 outStr->printf("(");
1155 for(int i=0; i<s->getLength(); i++) {
1156 char unescaped = *(c+i)&0x000000ff;
1157 //escape if needed
1158 if (unescaped == '\r')
1159 outStr->printf("\\r");
1160 else if (unescaped == '\n')
1161 outStr->printf("\\n");
1162 else {
1163 if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1164 outStr->printf("%c", '\\');
1165 }
1166 outStr->printf("%c", unescaped);
1167 }
1168 }
1169 outStr->printf(") ");
1170 }
1171
1172 delete sEnc;
1173 }
1174
writeObjectHeader(Ref * ref,OutStream * outStr)1175 Goffset PDFDoc::writeObjectHeader (Ref *ref, OutStream* outStr)
1176 {
1177 Goffset offset = outStr->getPos();
1178 outStr->printf("%i %i obj ", ref->num, ref->gen);
1179 return offset;
1180 }
1181
writeObject(Object * obj,OutStream * outStr,XRef * xRef,Guint numOffset,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen)1182 void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1183 CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1184 {
1185 Array *array;
1186 Object obj1;
1187 Goffset tmp;
1188
1189 switch (obj->getType()) {
1190 case objBool:
1191 outStr->printf("%s ", obj->getBool()?"true":"false");
1192 break;
1193 case objInt:
1194 outStr->printf("%i ", obj->getInt());
1195 break;
1196 case objInt64:
1197 outStr->printf("%lli ", obj->getInt64());
1198 break;
1199 case objReal:
1200 {
1201 GooString s;
1202 s.appendf("{0:.10g}", obj->getReal());
1203 outStr->printf("%s ", s.getCString());
1204 break;
1205 }
1206 case objString:
1207 writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen);
1208 break;
1209 case objName:
1210 {
1211 GooString name(obj->getName());
1212 GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
1213 outStr->printf("/%s ", nameToPrint->getCString());
1214 delete nameToPrint;
1215 break;
1216 }
1217 case objNull:
1218 outStr->printf( "null ");
1219 break;
1220 case objArray:
1221 array = obj->getArray();
1222 outStr->printf("[");
1223 for (int i=0; i<array->getLength(); i++) {
1224 writeObject(array->getNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1225 obj1.free();
1226 }
1227 outStr->printf("] ");
1228 break;
1229 case objDict:
1230 writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1231 break;
1232 case objStream:
1233 {
1234 //We can't modify stream with the current implementation (no write functions in Stream API)
1235 // => the only type of streams which that have been modified are internal streams (=strWeird)
1236 Stream *stream = obj->getStream();
1237 if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1238 //we write the stream unencoded => TODO: write stream encoder
1239
1240 // Encrypt stream
1241 EncryptStream *encStream = NULL;
1242 GBool removeFilter = gTrue;
1243 if (stream->getKind() == strWeird && fileKey) {
1244 Object filter;
1245 stream->getDict()->lookup("Filter", &filter);
1246 if (!filter.isName("Crypt")) {
1247 if (filter.isArray()) {
1248 for (int i = 0; i < filter.arrayGetLength(); i++) {
1249 Object filterEle;
1250 filter.arrayGet(i, &filterEle);
1251 if (filterEle.isName("Crypt")) {
1252 filterEle.free();
1253 removeFilter = gFalse;
1254 break;
1255 }
1256 filterEle.free();
1257 }
1258 if (removeFilter) {
1259 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1260 encStream->setAutoDelete(gFalse);
1261 stream = encStream;
1262 }
1263 } else {
1264 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1265 encStream->setAutoDelete(gFalse);
1266 stream = encStream;
1267 }
1268 } else {
1269 removeFilter = gFalse;
1270 }
1271 filter.free();
1272 } else if (fileKey != NULL) { // Encrypt stream
1273 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1274 encStream->setAutoDelete(gFalse);
1275 stream = encStream;
1276 }
1277
1278 stream->reset();
1279 //recalculate stream length
1280 tmp = 0;
1281 for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
1282 tmp++;
1283 }
1284 obj1.initInt64(tmp);
1285 stream->getDict()->set("Length", &obj1);
1286
1287 //Remove Stream encoding
1288 if (removeFilter) {
1289 stream->getDict()->remove("Filter");
1290 }
1291 stream->getDict()->remove("DecodeParms");
1292
1293 writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1294 writeStream (stream,outStr);
1295 delete encStream;
1296 obj1.free();
1297 } else {
1298 //raw stream copy
1299 FilterStream *fs = dynamic_cast<FilterStream*>(stream);
1300 if (fs) {
1301 BaseStream *bs = fs->getBaseStream();
1302 if (bs) {
1303 Goffset streamEnd;
1304 if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1305 Object val;
1306 val.initInt64(streamEnd - bs->getStart());
1307 stream->getDict()->set("Length", &val);
1308 }
1309 }
1310 }
1311 writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1312 writeRawStream (stream, outStr);
1313 }
1314 break;
1315 }
1316 case objRef:
1317 outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1318 break;
1319 case objCmd:
1320 outStr->printf("%s\n", obj->getCmd());
1321 break;
1322 case objError:
1323 outStr->printf("error\r\n");
1324 break;
1325 case objEOF:
1326 outStr->printf("eof\r\n");
1327 break;
1328 case objNone:
1329 outStr->printf("none\r\n");
1330 break;
1331 default:
1332 error(errUnimplemented, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1333 break;
1334 }
1335 }
1336
writeObjectFooter(OutStream * outStr)1337 void PDFDoc::writeObjectFooter (OutStream* outStr)
1338 {
1339 outStr->printf("endobj\r\n");
1340 }
1341
createTrailerDict(int uxrefSize,GBool incrUpdate,Goffset startxRef,Ref * root,XRef * xRef,const char * fileName,Goffset fileSize)1342 Dict *PDFDoc::createTrailerDict(int uxrefSize, GBool incrUpdate, Goffset startxRef,
1343 Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1344 {
1345 Dict *trailerDict = new Dict(xRef);
1346 Object obj1;
1347 obj1.initInt(uxrefSize);
1348 trailerDict->set("Size", &obj1);
1349 obj1.free();
1350
1351 //build a new ID, as recommended in the reference, uses:
1352 // - current time
1353 // - file name
1354 // - file size
1355 // - values of entry in information dictionnary
1356 GooString message;
1357 char buffer[256];
1358 sprintf(buffer, "%i", (int)time(NULL));
1359 message.append(buffer);
1360
1361 if (fileName)
1362 message.append(fileName);
1363
1364 sprintf(buffer, "%lli", (long long)fileSize);
1365 message.append(buffer);
1366
1367 //info dict -- only use text string
1368 if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
1369 for(int i=0; i<obj1.getDict()->getLength(); i++) {
1370 Object obj2;
1371 obj1.getDict()->getVal(i, &obj2);
1372 if (obj2.isString()) {
1373 message.append(obj2.getString());
1374 }
1375 obj2.free();
1376 }
1377 }
1378 obj1.free();
1379
1380 GBool hasEncrypt = gFalse;
1381 if (!xRef->getTrailerDict()->isNone()) {
1382 Object obj2;
1383 xRef->getTrailerDict()->dictLookupNF("Encrypt", &obj2);
1384 if (!obj2.isNull()) {
1385 trailerDict->set("Encrypt", &obj2);
1386 hasEncrypt = gTrue;
1387 obj2.free();
1388 }
1389 }
1390
1391 //calculate md5 digest
1392 Guchar digest[16];
1393 md5((Guchar*)message.getCString(), message.getLength(), digest);
1394 obj1.initString(new GooString((const char*)digest, 16));
1395
1396 //create ID array
1397 Object obj2,obj3,obj5;
1398 obj2.initArray(xRef);
1399
1400 // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1401 if (incrUpdate || hasEncrypt) {
1402 Object obj4;
1403 //only update the second part of the array
1404 xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
1405 if (!obj4.isArray()) {
1406 error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1407 } else {
1408 //Get the first part of the ID
1409 obj4.arrayGet(0,&obj3);
1410
1411 obj2.arrayAdd(&obj3);
1412 obj2.arrayAdd(&obj1);
1413 trailerDict->set("ID", &obj2);
1414 }
1415 obj4.free();
1416 } else {
1417 //new file => same values for the two identifiers
1418 obj2.arrayAdd(&obj1);
1419 obj1.initString(new GooString((const char*)digest, 16));
1420 obj2.arrayAdd(&obj1);
1421 trailerDict->set("ID", &obj2);
1422 }
1423
1424 obj1.initRef(root->num, root->gen);
1425 trailerDict->set("Root", &obj1);
1426
1427 if (incrUpdate) {
1428 obj1.initInt64(startxRef);
1429 trailerDict->set("Prev", &obj1);
1430 }
1431
1432 if (!xRef->getTrailerDict()->isNone()) {
1433 xRef->getDocInfoNF(&obj5);
1434 if (!obj5.isNull()) {
1435 trailerDict->set("Info", &obj5);
1436 }
1437 }
1438
1439 return trailerDict;
1440 }
1441
writeXRefTableTrailer(Dict * trailerDict,XRef * uxref,GBool writeAllEntries,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1442 void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAllEntries, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1443 {
1444 uxref->writeTableToFile( outStr, writeAllEntries );
1445 outStr->printf( "trailer\r\n");
1446 writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1447 outStr->printf( "\r\nstartxref\r\n");
1448 outStr->printf( "%lli\r\n", uxrefOffset);
1449 outStr->printf( "%%%%EOF\r\n");
1450 }
1451
writeXRefStreamTrailer(Dict * trailerDict,XRef * uxref,Ref * uxrefStreamRef,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1452 void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1453 {
1454 GooString stmData;
1455
1456 // Fill stmData and some trailerDict fields
1457 uxref->writeStreamToBuffer(&stmData, trailerDict, xRef);
1458
1459 // Create XRef stream object and write it
1460 Object obj1;
1461 MemStream *mStream = new MemStream( stmData.getCString(), 0,
1462 stmData.getLength(), obj1.initDict(trailerDict) );
1463 writeObjectHeader(uxrefStreamRef, outStr);
1464 writeObject(obj1.initStream(mStream), outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1465 writeObjectFooter(outStr);
1466 obj1.free();
1467
1468 outStr->printf( "startxref\r\n");
1469 outStr->printf( "%lli\r\n", uxrefOffset);
1470 outStr->printf( "%%%%EOF\r\n");
1471 }
1472
writeXRefTableTrailer(Goffset uxrefOffset,XRef * uxref,GBool writeAllEntries,int uxrefSize,OutStream * outStr,GBool incrUpdate)1473 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,
1474 int uxrefSize, OutStream* outStr, GBool incrUpdate)
1475 {
1476 const char *fileNameA = fileName ? fileName->getCString() : NULL;
1477 // file size (doesn't include the trailer)
1478 unsigned int fileSize = 0;
1479 int c;
1480 str->reset();
1481 while ((c = str->getChar()) != EOF) {
1482 fileSize++;
1483 }
1484 str->close();
1485 Ref ref;
1486 ref.num = getXRef()->getRootNum();
1487 ref.gen = getXRef()->getRootGen();
1488 Dict * trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref,
1489 getXRef(), fileNameA, fileSize);
1490 writeXRefTableTrailer(trailerDict, uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1491 delete trailerDict;
1492 }
1493
writeHeader(OutStream * outStr,int major,int minor)1494 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1495 {
1496 outStr->printf("%%PDF-%d.%d\n", major, minor);
1497 outStr->printf("%%\xE2\xE3\xCF\xD3\n");
1498 }
1499
markDictionnary(Dict * dict,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1500 void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1501 {
1502 Object obj1;
1503 for (int i=0; i<dict->getLength(); i++) {
1504 const char *key = dict->getKey(i);
1505 if (strcmp(key, "Annots") != 0) {
1506 markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1507 } else {
1508 Object annotsObj;
1509 dict->getValNF(i, &annotsObj);
1510 if (!annotsObj.isNull()) {
1511 markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum);
1512 annotsObj.free();
1513 }
1514 }
1515 obj1.free();
1516 }
1517 }
1518
markObject(Object * obj,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1519 void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1520 {
1521 Array *array;
1522 Object obj1;
1523
1524 switch (obj->getType()) {
1525 case objArray:
1526 array = obj->getArray();
1527 for (int i=0; i<array->getLength(); i++) {
1528 markObject(array->getNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1529 obj1.free();
1530 }
1531 break;
1532 case objDict:
1533 markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1534 break;
1535 case objStream:
1536 {
1537 Stream *stream = obj->getStream();
1538 markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1539 }
1540 break;
1541 case objRef:
1542 {
1543 if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1544 if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1545 return; // already marked as free => should be replaced
1546 }
1547 xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
1548 if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1549 xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1550 }
1551 }
1552 if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1553 countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
1554 {
1555 countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
1556 } else {
1557 XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1558 entry->gen++;
1559 if (entry->gen > 9)
1560 break;
1561 }
1562 Object obj1;
1563 getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
1564 markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1565 obj1.free();
1566 }
1567 break;
1568 default:
1569 break;
1570 }
1571 }
1572
replacePageDict(int pageNo,int rotate,PDFRectangle * mediaBox,PDFRectangle * cropBox)1573 void PDFDoc::replacePageDict(int pageNo, int rotate,
1574 PDFRectangle *mediaBox,
1575 PDFRectangle *cropBox)
1576 {
1577 Ref *refPage = getCatalog()->getPageRef(pageNo);
1578 Object page;
1579 getXRef()->fetch(refPage->num, refPage->gen, &page);
1580 Dict *pageDict = page.getDict();
1581 pageDict->remove("MediaBoxssdf");
1582 pageDict->remove("MediaBox");
1583 pageDict->remove("CropBox");
1584 pageDict->remove("ArtBox");
1585 pageDict->remove("BleedBox");
1586 pageDict->remove("TrimBox");
1587 pageDict->remove("Rotate");
1588 Object mediaBoxObj;
1589 mediaBoxObj.initArray(getXRef());
1590 Object murx;
1591 murx.initReal(mediaBox->x1);
1592 Object mury;
1593 mury.initReal(mediaBox->y1);
1594 Object mllx;
1595 mllx.initReal(mediaBox->x2);
1596 Object mlly;
1597 mlly.initReal(mediaBox->y2);
1598 mediaBoxObj.arrayAdd(&murx);
1599 mediaBoxObj.arrayAdd(&mury);
1600 mediaBoxObj.arrayAdd(&mllx);
1601 mediaBoxObj.arrayAdd(&mlly);
1602 pageDict->add(copyString("MediaBox"), &mediaBoxObj);
1603 if (cropBox != NULL) {
1604 Object cropBoxObj;
1605 cropBoxObj.initArray(getXRef());
1606 Object curx;
1607 curx.initReal(cropBox->x1);
1608 Object cury;
1609 cury.initReal(cropBox->y1);
1610 Object cllx;
1611 cllx.initReal(cropBox->x2);
1612 Object clly;
1613 clly.initReal(cropBox->y2);
1614 cropBoxObj.arrayAdd(&curx);
1615 cropBoxObj.arrayAdd(&cury);
1616 cropBoxObj.arrayAdd(&cllx);
1617 cropBoxObj.arrayAdd(&clly);
1618 pageDict->add(copyString("CropBox"), &cropBoxObj);
1619 cropBoxObj.getArray()->incRef();
1620 pageDict->add(copyString("TrimBox"), &cropBoxObj);
1621 } else {
1622 mediaBoxObj.getArray()->incRef();
1623 pageDict->add(copyString("TrimBox"), &mediaBoxObj);
1624 }
1625 Object rotateObj;
1626 rotateObj.initInt(rotate);
1627 pageDict->add(copyString("Rotate"), &rotateObj);
1628 getXRef()->setModifiedObject(&page, *refPage);
1629 page.free();
1630 }
1631
markPageObjects(Dict * pageDict,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1632 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1633 {
1634 pageDict->remove("OpenAction");
1635 pageDict->remove("Outlines");
1636 pageDict->remove("StructTreeRoot");
1637
1638 for (int n = 0; n < pageDict->getLength(); n++) {
1639 const char *key = pageDict->getKey(n);
1640 Object value; pageDict->getValNF(n, &value);
1641 if (strcmp(key, "Parent") != 0 &&
1642 strcmp(key, "Pages") != 0 &&
1643 strcmp(key, "AcroForm") != 0 &&
1644 strcmp(key, "Annots") != 0 &&
1645 strcmp(key, "P") != 0 &&
1646 strcmp(key, "Root") != 0) {
1647 markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum);
1648 }
1649 value.free();
1650 }
1651 }
1652
markAnnotations(Object * annotsObj,XRef * xRef,XRef * countRef,Guint numOffset,int oldPageNum,int newPageNum)1653 GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum) {
1654 Object annots;
1655 GBool modified = gFalse;
1656 annotsObj->fetch(getXRef(), &annots);
1657 if (annots.isArray()) {
1658 Array *array = annots.getArray();
1659 for (int i=array->getLength() - 1; i >= 0; i--) {
1660 Object obj1;
1661 if (array->get(i, &obj1)->isDict()) {
1662 Object type;
1663 Dict *dict = obj1.getDict();
1664 dict->lookup("Type", &type);
1665 if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1666 Object obj2;
1667 if (dict->lookupNF("P", &obj2)->isRef()) {
1668 if (obj2.getRef().num == oldPageNum) {
1669 Object obj3;
1670 array->getNF(i, &obj3);
1671 if (obj3.isRef()) {
1672 Object *newRef = new Object();
1673 newRef->initRef(newPageNum, 0);
1674 dict->set("P", newRef);
1675 getXRef()->setModifiedObject(&obj1, obj3.getRef());
1676 }
1677 obj3.free();
1678 } else if (obj2.getRef().num == newPageNum) {
1679 obj1.free();
1680 obj2.free();
1681 type.free();
1682 continue;
1683 } else {
1684 Object page;
1685 getXRef()->fetch(obj2.getRef().num, obj2.getRef().gen, &page);
1686 if (page.isDict()) {
1687 Object pagetype;
1688 Dict *dict = page.getDict();
1689 dict->lookup("Type", &pagetype);
1690 if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1691 obj1.free();
1692 obj2.free();
1693 type.free();
1694 page.free();
1695 pagetype.free();
1696 continue;
1697 }
1698 pagetype.free();
1699 }
1700 page.free();
1701 obj1.free();
1702 obj2.free();
1703 type.free();
1704 array->remove(i);
1705 modified = gTrue;
1706 continue;
1707 }
1708 }
1709 obj2.free();
1710 }
1711 type.free();
1712 markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum);
1713 }
1714 obj1.free();
1715 array->getNF(i, &obj1);
1716 if (obj1.isRef()) {
1717 if (obj1.getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1718 if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1719 continue; // already marked as free => should be replaced
1720 }
1721 xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, gTrue);
1722 if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1723 xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1724 }
1725 }
1726 if (obj1.getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1727 countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree)
1728 {
1729 countRef->add(obj1.getRef().num + numOffset, 1, 0, gTrue);
1730 } else {
1731 XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1732 entry->gen++;
1733 }
1734 }
1735 obj1.free();
1736 }
1737 }
1738 if (annotsObj->isRef()) {
1739 if (annotsObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1740 if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1741 return modified; // already marked as free => should be replaced
1742 }
1743 xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, gTrue);
1744 if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1745 xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1746 }
1747 }
1748 if (annotsObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1749 countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree)
1750 {
1751 countRef->add(annotsObj->getRef().num + numOffset, 1, 0, gTrue);
1752 } else {
1753 XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1754 entry->gen++;
1755 }
1756 getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1757 }
1758 annots.free();
1759 return modified;
1760 }
1761
markAcroForm(Object * afObj,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1762 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum) {
1763 Object acroform;
1764 GBool modified = gFalse;
1765 afObj->fetch(getXRef(), &acroform);
1766 if (acroform.isDict()) {
1767 Dict *dict = acroform.getDict();
1768 for (int i=0; i < dict->getLength(); i++) {
1769 if (strcmp(dict->getKey(i), "Fields") == 0) {
1770 Object fields;
1771 modified = markAnnotations(dict->getValNF(i, &fields), xRef, countRef, numOffset, oldRefNum, newRefNum);
1772 fields.free();
1773 } else {
1774 Object obj;
1775 markObject(dict->getValNF(i, &obj), xRef, countRef, numOffset, oldRefNum, newRefNum);
1776 obj.free();
1777 }
1778 }
1779 }
1780 if (afObj->isRef()) {
1781 if (afObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1782 if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1783 return; // already marked as free => should be replaced
1784 }
1785 xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, gTrue);
1786 if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1787 xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1788 }
1789 }
1790 if (afObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1791 countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree)
1792 {
1793 countRef->add(afObj->getRef().num + numOffset, 1, 0, gTrue);
1794 } else {
1795 XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1796 entry->gen++;
1797 }
1798 if (modified){
1799 getXRef()->setModifiedObject(&acroform, afObj->getRef());
1800 }
1801 }
1802 acroform.free();
1803 return;
1804 }
1805
writePageObjects(OutStream * outStr,XRef * xRef,Guint numOffset,GBool combine)1806 Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine)
1807 {
1808 Guint objectsCount = 0; //count the number of objects in the XRef(s)
1809 Guchar *fileKey;
1810 CryptAlgorithm encAlgorithm;
1811 int keyLength;
1812 xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1813
1814 for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1815 if (xRef->getEntry(n)->type != xrefEntryFree) {
1816 Object obj;
1817 Ref ref;
1818 ref.num = n;
1819 ref.gen = xRef->getEntry(n)->gen;
1820 objectsCount++;
1821 getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
1822 Goffset offset = writeObjectHeader(&ref, outStr);
1823 if (combine) {
1824 writeObject(&obj, outStr, getXRef(), numOffset, NULL, cryptRC4, 0, 0, 0);
1825 } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1826 writeObject(&obj, outStr, NULL, cryptRC4, 0, 0, 0);
1827 } else {
1828 writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1829 }
1830 writeObjectFooter(outStr);
1831 xRef->add(ref.num, ref.gen, offset, gTrue);
1832 obj.free();
1833 }
1834 }
1835 return objectsCount;
1836 }
1837
1838 #ifndef DISABLE_OUTLINE
getOutline()1839 Outline *PDFDoc::getOutline()
1840 {
1841 if (!outline) {
1842 pdfdocLocker();
1843 // read outline
1844 outline = new Outline(catalog->getOutline(), xref);
1845 }
1846
1847 return outline;
1848 }
1849 #endif
1850
ErrorPDFDoc(int errorCode,GooString * fileNameA)1851 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
1852 {
1853 PDFDoc *doc = new PDFDoc();
1854 doc->errCode = errorCode;
1855 doc->fileName = fileNameA;
1856
1857 return doc;
1858 }
1859
strToLongLong(char * s)1860 long long PDFDoc::strToLongLong(char *s) {
1861 long long x, d;
1862 char *p;
1863
1864 x = 0;
1865 for (p = s; *p && isdigit(*p & 0xff); ++p) {
1866 d = *p - '0';
1867 if (x > (LLONG_MAX - d) / 10) {
1868 break;
1869 }
1870 x = 10 * x + d;
1871 }
1872 return x;
1873 }
1874
1875 // Read the 'startxref' position.
getStartXRef(GBool tryingToReconstruct)1876 Goffset PDFDoc::getStartXRef(GBool tryingToReconstruct)
1877 {
1878 if (startXRefPos == -1) {
1879
1880 if (isLinearized(tryingToReconstruct)) {
1881 char buf[linearizationSearchSize+1];
1882 int c, n, i;
1883
1884 str->setPos(0);
1885 for (n = 0; n < linearizationSearchSize; ++n) {
1886 if ((c = str->getChar()) == EOF) {
1887 break;
1888 }
1889 buf[n] = c;
1890 }
1891 buf[n] = '\0';
1892
1893 // find end of first obj (linearization dictionary)
1894 startXRefPos = 0;
1895 for (i = 0; i < n; i++) {
1896 if (!strncmp("endobj", &buf[i], 6)) {
1897 i += 6;
1898 //skip whitespace
1899 while (buf[i] && Lexer::isSpace(buf[i])) ++i;
1900 startXRefPos = i;
1901 break;
1902 }
1903 }
1904 } else {
1905 char buf[xrefSearchSize+1];
1906 char *p;
1907 int c, n, i;
1908
1909 // read last xrefSearchSize bytes
1910 int segnum = 0;
1911 int maxXRefSearch = 24576;
1912 if (str->getLength() < maxXRefSearch) maxXRefSearch = str->getLength();
1913 for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
1914 str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
1915 for (n = 0; n < xrefSearchSize; ++n) {
1916 if ((c = str->getChar()) == EOF) {
1917 break;
1918 }
1919 buf[n] = c;
1920 }
1921 buf[n] = '\0';
1922
1923 // find startxref
1924 for (i = n - 9; i >= 0; --i) {
1925 if (!strncmp(&buf[i], "startxref", 9)) {
1926 break;
1927 }
1928 }
1929 if (i < 0) {
1930 startXRefPos = 0;
1931 } else {
1932 for (p = &buf[i + 9]; isspace(*p); ++p);
1933 startXRefPos = strToLongLong(p);
1934 break;
1935 }
1936 }
1937 }
1938
1939 }
1940
1941 return startXRefPos;
1942 }
1943
getMainXRefEntriesOffset(GBool tryingToReconstruct)1944 Goffset PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct)
1945 {
1946 Guint mainXRefEntriesOffset = 0;
1947
1948 if (isLinearized(tryingToReconstruct)) {
1949 mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
1950 }
1951
1952 return mainXRefEntriesOffset;
1953 }
1954
getNumPages()1955 int PDFDoc::getNumPages()
1956 {
1957 if (isLinearized()) {
1958 int n;
1959 if ((n = getLinearization()->getNumPages())) {
1960 return n;
1961 }
1962 }
1963
1964 return catalog->getNumPages();
1965 }
1966
parsePage(int page)1967 Page *PDFDoc::parsePage(int page)
1968 {
1969 Page *p = NULL;
1970 Object obj;
1971 Ref pageRef;
1972 Dict *pageDict;
1973
1974 pageRef.num = getHints()->getPageObjectNum(page);
1975 if (!pageRef.num) {
1976 error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
1977 return NULL;
1978 }
1979
1980 // check for bogus ref - this can happen in corrupted PDF files
1981 if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
1982 error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
1983 return NULL;
1984 }
1985
1986 pageRef.gen = xref->getEntry(pageRef.num)->gen;
1987 xref->fetch(pageRef.num, pageRef.gen, &obj);
1988 if (!obj.isDict("Page")) {
1989 obj.free();
1990 error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
1991 return NULL;
1992 }
1993 pageDict = obj.getDict();
1994
1995 p = new Page(this, page, pageDict, pageRef,
1996 new PageAttrs(NULL, pageDict), catalog->getForm());
1997 obj.free();
1998
1999 return p;
2000 }
2001
getPage(int page)2002 Page *PDFDoc::getPage(int page)
2003 {
2004 if ((page < 1) || page > getNumPages()) return NULL;
2005
2006 if (isLinearized() && checkLinearization()) {
2007 pdfdocLocker();
2008 if (!pageCache) {
2009 pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
2010 for (int i = 0; i < getNumPages(); i++) {
2011 pageCache[i] = NULL;
2012 }
2013 }
2014 if (!pageCache[page-1]) {
2015 pageCache[page-1] = parsePage(page);
2016 }
2017 if (pageCache[page-1]) {
2018 return pageCache[page-1];
2019 } else {
2020 error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
2021 }
2022 }
2023
2024 return catalog->getPage(page);
2025 }
2026