1 //========================================================================
2 //
3 // PDFDoc.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17 // Copyright (C) 2005, 2007-2009, 2011-2015 Albert Astals Cid <aacid@kde.org>
18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
35 // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
36 // Copyright (C) 2015 André Esser <bepandre@hotmail.com>
37 //
38 // To see a description of the changes please see the Changelog file that
39 // came with your tarball or type make ChangeLog if you are building from git
40 //
41 //========================================================================
42 
43 #include <config.h>
44 
45 #ifdef USE_GCC_PRAGMAS
46 #pragma implementation
47 #endif
48 
49 #include <ctype.h>
50 #include <locale.h>
51 #include <stdio.h>
52 #include <errno.h>
53 #include <stdlib.h>
54 #include <stddef.h>
55 #include <string.h>
56 #include <time.h>
57 #include <sys/stat.h>
58 #include "goo/gstrtod.h"
59 #include "goo/GooString.h"
60 #include "goo/gfile.h"
61 #include "poppler-config.h"
62 #include "GlobalParams.h"
63 #include "Page.h"
64 #include "Catalog.h"
65 #include "Stream.h"
66 #include "XRef.h"
67 #include "Linearization.h"
68 #include "Link.h"
69 #include "OutputDev.h"
70 #include "Error.h"
71 #include "ErrorCodes.h"
72 #include "Lexer.h"
73 #include "Parser.h"
74 #include "SecurityHandler.h"
75 #include "Decrypt.h"
76 #ifndef DISABLE_OUTLINE
77 #include "Outline.h"
78 #endif
79 #include "PDFDoc.h"
80 #include "Hints.h"
81 
82 #if MULTITHREADED
83 #  define pdfdocLocker()   MutexLocker locker(&mutex)
84 #else
85 #  define pdfdocLocker()
86 #endif
87 
88 //------------------------------------------------------------------------
89 
90 #define headerSearchSize 1024	// read this many bytes at beginning of
91 				//   file to look for '%PDF'
92 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
93 
94 #define linearizationSearchSize 1024	// read this many bytes at beginning of
95 					// file to look for linearization
96 					// dictionary
97 
98 #define xrefSearchSize 1024	// read this many bytes at end of file
99 				//   to look for 'startxref'
100 
101 //------------------------------------------------------------------------
102 // PDFDoc
103 //------------------------------------------------------------------------
104 
init()105 void PDFDoc::init()
106 {
107 #if MULTITHREADED
108   gInitMutex(&mutex);
109 #endif
110   ok = gFalse;
111   errCode = errNone;
112   fileName = NULL;
113   file = NULL;
114   str = NULL;
115   xref = NULL;
116   linearization = NULL;
117   catalog = NULL;
118   hints = NULL;
119 #ifndef DISABLE_OUTLINE
120   outline = NULL;
121 #endif
122   startXRefPos = -1;
123   secHdlr = NULL;
124   pageCache = NULL;
125 }
126 
PDFDoc()127 PDFDoc::PDFDoc()
128 {
129   init();
130 }
131 
PDFDoc(GooString * fileNameA,GooString * ownerPassword,GooString * userPassword,void * guiDataA)132 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
133 	       GooString *userPassword, void *guiDataA) {
134   Object obj;
135 #ifdef _WIN32
136   int n, i;
137 #endif
138 
139   init();
140 
141   fileName = fileNameA;
142   guiData = guiDataA;
143 #ifdef _WIN32
144   n = fileName->getLength();
145   fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
146   for (i = 0; i < n; ++i) {
147     fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
148   }
149   fileNameU[n] = L'\0';
150 #endif
151 
152   // try to open file
153   file = GooFile::open(fileName);
154   if (file == NULL) {
155     // fopen() has failed.
156     // Keep a copy of the errno returned by fopen so that it can be
157     // referred to later.
158     fopenErrno = errno;
159     error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
160     errCode = errOpenFile;
161     return;
162   }
163 
164   // create stream
165   obj.initNull();
166   str = new FileStream(file, 0, gFalse, file->size(), &obj);
167 
168   ok = setup(ownerPassword, userPassword);
169 }
170 
171 #ifdef _WIN32
PDFDoc(wchar_t * fileNameA,int fileNameLen,GooString * ownerPassword,GooString * userPassword,void * guiDataA)172 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
173 	       GooString *userPassword, void *guiDataA) {
174   OSVERSIONINFO version;
175   Object obj;
176   int i;
177 
178   init();
179 
180   guiData = guiDataA;
181 
182   // save both Unicode and 8-bit copies of the file name
183   fileName = new GooString();
184   fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
185   for (i = 0; i < fileNameLen; ++i) {
186     fileName->append((char)fileNameA[i]);
187     fileNameU[i] = fileNameA[i];
188   }
189   fileNameU[fileNameLen] = L'\0';
190 
191   // try to open file
192   // NB: _wfopen is only available in NT
193   version.dwOSVersionInfoSize = sizeof(version);
194   GetVersionEx(&version);
195   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
196     file = GooFile::open(fileNameU);
197   } else {
198     file = GooFile::open(fileName);
199   }
200   if (!file) {
201     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
202     errCode = errOpenFile;
203     return;
204   }
205 
206   // create stream
207   obj.initNull();
208   str = new FileStream(file, 0, gFalse, file->size(), &obj);
209 
210   ok = setup(ownerPassword, userPassword);
211 }
212 #endif
213 
PDFDoc(BaseStream * strA,GooString * ownerPassword,GooString * userPassword,void * guiDataA)214 PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
215 	       GooString *userPassword, void *guiDataA) {
216 #ifdef _WIN32
217   int n, i;
218 #endif
219 
220   init();
221   guiData = guiDataA;
222   if (strA->getFileName()) {
223     fileName = strA->getFileName()->copy();
224 #ifdef _WIN32
225     n = fileName->getLength();
226     fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
227     for (i = 0; i < n; ++i) {
228       fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
229     }
230     fileNameU[n] = L'\0';
231 #endif
232   } else {
233     fileName = NULL;
234 #ifdef _WIN32
235     fileNameU = NULL;
236 #endif
237   }
238   str = strA;
239   ok = setup(ownerPassword, userPassword);
240 }
241 
setup(GooString * ownerPassword,GooString * userPassword)242 GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
243   pdfdocLocker();
244   str->setPos(0, -1);
245   if (str->getPos() < 0)
246   {
247     error(errSyntaxError, -1, "Document base stream is not seekable");
248     return gFalse;
249   }
250 
251   str->reset();
252 
253   // check footer
254   // Adobe does not seem to enforce %%EOF, so we do the same
255 //  if (!checkFooter()) return gFalse;
256 
257   // check header
258   checkHeader();
259 
260   GBool wasReconstructed = false;
261 
262   // read xref table
263   xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
264   if (!xref->isOk()) {
265     if (wasReconstructed) {
266       delete xref;
267       startXRefPos = -1;
268       xref = new XRef(str, getStartXRef(gTrue), getMainXRefEntriesOffset(gTrue), &wasReconstructed);
269     }
270     if (!xref->isOk()) {
271       error(errSyntaxError, -1, "Couldn't read xref table");
272       errCode = xref->getErrorCode();
273       return gFalse;
274     }
275   }
276 
277   // check for encryption
278   if (!checkEncryption(ownerPassword, userPassword)) {
279     errCode = errEncrypted;
280     return gFalse;
281   }
282 
283   // read catalog
284   catalog = new Catalog(this);
285   if (catalog && !catalog->isOk()) {
286     if (!wasReconstructed)
287     {
288       // try one more time to contruct the Catalog, maybe the problem is damaged XRef
289       delete catalog;
290       delete xref;
291       xref = new XRef(str, 0, 0, NULL, true);
292       catalog = new Catalog(this);
293     }
294 
295     if (catalog && !catalog->isOk()) {
296       error(errSyntaxError, -1, "Couldn't read page catalog");
297       errCode = errBadCatalog;
298       return gFalse;
299     }
300   }
301 
302   // done
303   return gTrue;
304 }
305 
~PDFDoc()306 PDFDoc::~PDFDoc() {
307   if (pageCache) {
308     for (int i = 0; i < getNumPages(); i++) {
309       if (pageCache[i]) {
310         delete pageCache[i];
311       }
312     }
313     gfree(pageCache);
314   }
315   delete secHdlr;
316 #ifndef DISABLE_OUTLINE
317   if (outline) {
318     delete outline;
319   }
320 #endif
321   if (catalog) {
322     delete catalog;
323   }
324   if (xref) {
325     delete xref;
326   }
327   if (hints) {
328     delete hints;
329   }
330   if (linearization) {
331     delete linearization;
332   }
333   if (str) {
334     delete str;
335   }
336   if (file) {
337     delete file;
338   }
339   if (fileName) {
340     delete fileName;
341   }
342 #ifdef _WIN32
343   if (fileNameU) {
344     gfree(fileNameU);
345   }
346 #endif
347 #if MULTITHREADED
348   gDestroyMutex(&mutex);
349 #endif
350 }
351 
352 
353 // Check for a %%EOF at the end of this stream
checkFooter()354 GBool PDFDoc::checkFooter() {
355   // we look in the last 1024 chars because Adobe does the same
356   char *eof = new char[1025];
357   Goffset pos = str->getPos();
358   str->setPos(1024, -1);
359   int i, ch;
360   for (i = 0; i < 1024; i++)
361   {
362     ch = str->getChar();
363     if (ch == EOF)
364       break;
365     eof[i] = ch;
366   }
367   eof[i] = '\0';
368 
369   bool found = false;
370   for (i = i - 5; i >= 0; i--) {
371     if (strncmp (&eof[i], "%%EOF", 5) == 0) {
372       found = true;
373       break;
374     }
375   }
376   if (!found)
377   {
378     error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
379     errCode = errDamaged;
380     delete[] eof;
381     return gFalse;
382   }
383   delete[] eof;
384   str->setPos(pos);
385   return gTrue;
386 }
387 
388 // Check for a PDF header on this stream.  Skip past some garbage
389 // if necessary.
checkHeader()390 void PDFDoc::checkHeader() {
391   char hdrBuf[headerSearchSize+1];
392   char *p;
393   char *tokptr;
394   int i;
395 
396   pdfMajorVersion = 0;
397   pdfMinorVersion = 0;
398   for (i = 0; i < headerSearchSize; ++i) {
399     hdrBuf[i] = str->getChar();
400   }
401   hdrBuf[headerSearchSize] = '\0';
402   for (i = 0; i < headerSearchSize - 5; ++i) {
403     if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
404       break;
405     }
406   }
407   if (i >= headerSearchSize - 5) {
408     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
409     return;
410   }
411   str->moveStart(i);
412   if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
413     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
414     return;
415   }
416   sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
417   // We don't do the version check. Don't add it back in.
418 }
419 
checkEncryption(GooString * ownerPassword,GooString * userPassword)420 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
421   Object encrypt;
422   GBool encrypted;
423   GBool ret;
424 
425   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
426   if ((encrypted = encrypt.isDict())) {
427     if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
428       if (secHdlr->isUnencrypted()) {
429 	// no encryption
430 	ret = gTrue;
431       } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
432 	// authorization succeeded
433        	xref->setEncryption(secHdlr->getPermissionFlags(),
434 			    secHdlr->getOwnerPasswordOk(),
435 			    secHdlr->getFileKey(),
436 			    secHdlr->getFileKeyLength(),
437 			    secHdlr->getEncVersion(),
438 			    secHdlr->getEncRevision(),
439 			    secHdlr->getEncAlgorithm());
440 	ret = gTrue;
441       } else {
442 	// authorization failed
443 	ret = gFalse;
444       }
445     } else {
446       // couldn't find the matching security handler
447       ret = gFalse;
448     }
449   } else {
450     // document is not encrypted
451     ret = gTrue;
452   }
453   encrypt.free();
454   return ret;
455 }
456 
getSignatureWidgets()457 std::vector<FormWidgetSignature*> PDFDoc::getSignatureWidgets()
458 {
459   int num_pages = getNumPages();
460   FormPageWidgets *page_widgets = NULL;
461   std::vector<FormWidgetSignature*> widget_vector;
462 
463   for (int i = 1; i <= num_pages; i++) {
464     Page *p = getCatalog()->getPage(i);
465     if (p) {
466       page_widgets = p->getFormWidgets();
467       for (int j = 0; page_widgets != NULL && j < page_widgets->getNumWidgets(); j++) {
468 	if (page_widgets->getWidget(j)->getType() == formSignature) {
469 	    widget_vector.push_back(static_cast<FormWidgetSignature*>(page_widgets->getWidget(j)));
470 	}
471       }
472       delete page_widgets;
473     }
474   }
475   return widget_vector;
476 }
477 
displayPage(OutputDev * out,int page,double hDPI,double vDPI,int rotate,GBool useMediaBox,GBool crop,GBool printing,GBool (* abortCheckCbk)(void * data),void * abortCheckCbkData,GBool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,GBool copyXRef)478 void PDFDoc::displayPage(OutputDev *out, int page,
479 			 double hDPI, double vDPI, int rotate,
480 			 GBool useMediaBox, GBool crop, GBool printing,
481 			 GBool (*abortCheckCbk)(void *data),
482 			 void *abortCheckCbkData,
483                          GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
484                          void *annotDisplayDecideCbkData, GBool copyXRef) {
485   if (globalParams->getPrintCommands()) {
486     printf("***** page %d *****\n", page);
487   }
488 
489   if (getPage(page))
490     getPage(page)->display(out, hDPI, vDPI,
491 				    rotate, useMediaBox, crop, printing,
492 				    abortCheckCbk, abortCheckCbkData,
493 				    annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
494 
495 }
496 
displayPages(OutputDev * out,int firstPage,int lastPage,double hDPI,double vDPI,int rotate,GBool useMediaBox,GBool crop,GBool printing,GBool (* abortCheckCbk)(void * data),void * abortCheckCbkData,GBool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData)497 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
498 			  double hDPI, double vDPI, int rotate,
499 			  GBool useMediaBox, GBool crop, GBool printing,
500 			  GBool (*abortCheckCbk)(void *data),
501 			  void *abortCheckCbkData,
502                           GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
503                           void *annotDisplayDecideCbkData) {
504   int page;
505 
506   for (page = firstPage; page <= lastPage; ++page) {
507     displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
508 		abortCheckCbk, abortCheckCbkData,
509                 annotDisplayDecideCbk, annotDisplayDecideCbkData);
510   }
511 }
512 
displayPageSlice(OutputDev * out,int page,double hDPI,double vDPI,int rotate,GBool useMediaBox,GBool crop,GBool printing,int sliceX,int sliceY,int sliceW,int sliceH,GBool (* abortCheckCbk)(void * data),void * abortCheckCbkData,GBool (* annotDisplayDecideCbk)(Annot * annot,void * user_data),void * annotDisplayDecideCbkData,GBool copyXRef)513 void PDFDoc::displayPageSlice(OutputDev *out, int page,
514 			      double hDPI, double vDPI, int rotate,
515 			      GBool useMediaBox, GBool crop, GBool printing,
516 			      int sliceX, int sliceY, int sliceW, int sliceH,
517 			      GBool (*abortCheckCbk)(void *data),
518 			      void *abortCheckCbkData,
519                               GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
520                               void *annotDisplayDecideCbkData, GBool copyXRef) {
521   if (getPage(page))
522     getPage(page)->displaySlice(out, hDPI, vDPI,
523 					 rotate, useMediaBox, crop,
524 					 sliceX, sliceY, sliceW, sliceH,
525 					 printing,
526 					 abortCheckCbk, abortCheckCbkData,
527 					 annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
528 }
529 
getLinks(int page)530 Links *PDFDoc::getLinks(int page) {
531   Page *p = getPage(page);
532   if (!p) {
533     return new Links (NULL);
534   }
535   return p->getLinks();
536 }
537 
processLinks(OutputDev * out,int page)538 void PDFDoc::processLinks(OutputDev *out, int page) {
539   if (getPage(page))
540     getPage(page)->processLinks(out);
541 }
542 
getLinearization()543 Linearization *PDFDoc::getLinearization()
544 {
545   if (!linearization) {
546     linearization = new Linearization(str);
547     linearizationState = 0;
548   }
549   return linearization;
550 }
551 
checkLinearization()552 GBool PDFDoc::checkLinearization() {
553   if (linearization == NULL)
554     return gFalse;
555   if (linearizationState == 1)
556     return gTrue;
557   if (linearizationState == 2)
558     return gFalse;
559   if (!hints) {
560     hints = new Hints(str, linearization, getXRef(), secHdlr);
561   }
562   for (int page = 1; page <= linearization->getNumPages(); page++) {
563     Object obj;
564     Ref pageRef;
565 
566     pageRef.num = hints->getPageObjectNum(page);
567     if (!pageRef.num) {
568       linearizationState = 2;
569       return gFalse;
570     }
571 
572     // check for bogus ref - this can happen in corrupted PDF files
573     if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
574       linearizationState = 2;
575       return gFalse;
576     }
577 
578     pageRef.gen = xref->getEntry(pageRef.num)->gen;
579     xref->fetch(pageRef.num, pageRef.gen, &obj);
580     if (!obj.isDict("Page")) {
581       obj.free();
582       linearizationState = 2;
583       return gFalse;
584     }
585     obj.free();
586   }
587   linearizationState = 1;
588   return gTrue;
589 }
590 
isLinearized(GBool tryingToReconstruct)591 GBool PDFDoc::isLinearized(GBool tryingToReconstruct) {
592   if ((str->getLength()) &&
593       (getLinearization()->getLength() == str->getLength()))
594     return gTrue;
595   else {
596     if (tryingToReconstruct)
597       return getLinearization()->getLength() > 0;
598     else
599       return gFalse;
600   }
601 }
602 
603 static GBool
get_id(GooString * encodedidstring,GooString * id)604 get_id (GooString *encodedidstring, GooString *id) {
605   const char *encodedid = encodedidstring->getCString();
606   char pdfid[pdfIdLength + 1];
607   int n;
608 
609   if (encodedidstring->getLength() != pdfIdLength / 2)
610     return gFalse;
611 
612   n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
613 	      encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
614 	      encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
615 	      encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
616 	      encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
617   if (n != pdfIdLength)
618     return gFalse;
619 
620   id->Set(pdfid, pdfIdLength);
621   return gTrue;
622 }
623 
getID(GooString * permanent_id,GooString * update_id)624 GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
625   Object obj;
626   xref->getTrailerDict()->dictLookup ("ID", &obj);
627 
628   if (obj.isArray() && obj.arrayGetLength() == 2) {
629     Object obj2;
630 
631     if (permanent_id) {
632       if (obj.arrayGet(0, &obj2)->isString()) {
633         if (!get_id (obj2.getString(), permanent_id)) {
634 	  obj2.free();
635 	  return gFalse;
636 	}
637       } else {
638         error(errSyntaxError, -1, "Invalid permanent ID");
639 	obj2.free();
640 	return gFalse;
641       }
642       obj2.free();
643     }
644 
645     if (update_id) {
646       if (obj.arrayGet(1, &obj2)->isString()) {
647         if (!get_id (obj2.getString(), update_id)) {
648 	  obj2.free();
649 	  return gFalse;
650 	}
651       } else {
652         error(errSyntaxError, -1, "Invalid update ID");
653 	obj2.free();
654 	return gFalse;
655       }
656       obj2.free();
657     }
658 
659     obj.free();
660 
661     return gTrue;
662   }
663   obj.free();
664 
665   return gFalse;
666 }
667 
getHints()668 Hints *PDFDoc::getHints()
669 {
670   if (!hints && isLinearized()) {
671     hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
672   }
673 
674   return hints;
675 }
676 
savePageAs(GooString * name,int pageNo)677 int PDFDoc::savePageAs(GooString *name, int pageNo)
678 {
679   FILE *f;
680   OutStream *outStr;
681   XRef *yRef, *countRef;
682   int rootNum = getXRef()->getNumObjects() + 1;
683 
684   // Make sure that special flags are set, because we are going to read
685   // all objects, including Unencrypted ones.
686   xref->scanSpecialFlags();
687 
688   Guchar *fileKey;
689   CryptAlgorithm encAlgorithm;
690   int keyLength;
691   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
692 
693   if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
694     error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages() );
695     return errOpenFile;
696   }
697   PDFRectangle *cropBox = NULL;
698   if (getCatalog()->getPage(pageNo)->isCropped()) {
699     cropBox = getCatalog()->getPage(pageNo)->getCropBox();
700   }
701   replacePageDict(pageNo,
702     getCatalog()->getPage(pageNo)->getRotate(),
703     getCatalog()->getPage(pageNo)->getMediaBox(),
704     cropBox);
705   Ref *refPage = getCatalog()->getPageRef(pageNo);
706   Object page;
707   getXRef()->fetch(refPage->num, refPage->gen, &page);
708 
709   if (!(f = fopen(name->getCString(), "wb"))) {
710     error(errIO, -1, "Couldn't open file '{0:t}'", name);
711     return errOpenFile;
712   }
713   outStr = new FileOutStream(f,0);
714 
715   yRef = new XRef(getXRef()->getTrailerDict());
716 
717   if (secHdlr != NULL && !secHdlr->isUnencrypted()) {
718     yRef->setEncryption(secHdlr->getPermissionFlags(),
719       secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
720   }
721   countRef = new XRef();
722   Object *trailerObj = getXRef()->getTrailerDict();
723   if (trailerObj->isDict()) {
724     markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
725   }
726   yRef->add(0, 65535, 0, gFalse);
727   writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
728 
729   // get and mark info dict
730   Object infoObj;
731   getXRef()->getDocInfo(&infoObj);
732   if (infoObj.isDict()) {
733     Dict *infoDict = infoObj.getDict();
734     markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
735     if (trailerObj->isDict()) {
736       Dict *trailerDict = trailerObj->getDict();
737       Object ref;
738       trailerDict->lookupNF("Info", &ref);
739       if (ref.isRef()) {
740         yRef->add(ref.getRef().num, ref.getRef().gen, 0, gTrue);
741         if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
742           yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
743         }
744       }
745       ref.free();
746     }
747   }
748   infoObj.free();
749 
750   // get and mark output intents etc.
751   Object catObj, pagesObj, resourcesObj, annotsObj, afObj;
752   getXRef()->getCatalog(&catObj);
753   Dict *catDict = catObj.getDict();
754   catDict->lookup("Pages", &pagesObj);
755   catDict->lookupNF("AcroForm", &afObj);
756   if (!afObj.isNull()) {
757     markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
758     afObj.free();
759   }
760   Dict *pagesDict = pagesObj.getDict();
761   pagesDict->lookup("Resources", &resourcesObj);
762   if (resourcesObj.isDict())
763     markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
764   markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
765 
766   Dict *pageDict = page.getDict();
767   if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
768     Dict *resourceDict = getCatalog()->getPage(pageNo)->getResourceDict();
769     if (resourceDict != NULL) {
770       resourcesObj.initDict(resourceDict);
771       markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
772     }
773   }
774   markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
775   pageDict->lookupNF("Annots", &annotsObj);
776   if (!annotsObj.isNull()) {
777     markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
778     annotsObj.free();
779   }
780   yRef->markUnencrypted();
781   writePageObjects(outStr, yRef, 0);
782 
783   yRef->add(rootNum,0,outStr->getPos(),gTrue);
784   outStr->printf("%d 0 obj\n", rootNum);
785   outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
786   for (int j = 0; j < catDict->getLength(); j++) {
787     const char *key = catDict->getKey(j);
788     if (strcmp(key, "Type") != 0 &&
789       strcmp(key, "Catalog") != 0 &&
790       strcmp(key, "Pages") != 0)
791     {
792       if (j > 0) outStr->printf(" ");
793       Object value; catDict->getValNF(j, &value);
794       outStr->printf("/%s ", key);
795       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
796       value.free();
797     }
798   }
799   catObj.free();
800   pagesObj.free();
801   outStr->printf(">>\nendobj\n");
802 
803   yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
804   outStr->printf("%d 0 obj\n", rootNum + 1);
805   outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
806   if (resourcesObj.isDict()) {
807     outStr->printf("/Resources ");
808     writeObject(&resourcesObj, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
809     resourcesObj.free();
810   }
811   outStr->printf(">>\n");
812   outStr->printf("endobj\n");
813 
814   yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
815   outStr->printf("%d 0 obj\n", rootNum + 2);
816   outStr->printf("<< ");
817   for (int n = 0; n < pageDict->getLength(); n++) {
818     if (n > 0) outStr->printf(" ");
819     const char *key = pageDict->getKey(n);
820     Object value; pageDict->getValNF(n, &value);
821     if (strcmp(key, "Parent") == 0) {
822       outStr->printf("/Parent %d 0 R", rootNum + 1);
823     } else {
824       outStr->printf("/%s ", key);
825       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
826     }
827     value.free();
828   }
829   outStr->printf(" >>\nendobj\n");
830   page.free();
831 
832   Goffset uxrefOffset = outStr->getPos();
833   Ref ref;
834   ref.num = rootNum;
835   ref.gen = 0;
836   Dict *trailerDict = createTrailerDict(rootNum + 3, gFalse, 0, &ref, getXRef(),
837                                         name->getCString(), uxrefOffset);
838   writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */,
839                         uxrefOffset, outStr, getXRef());
840   delete trailerDict;
841 
842   outStr->close();
843   fclose(f);
844   delete yRef;
845   delete countRef;
846   delete outStr;
847 
848   return errNone;
849 }
850 
saveAs(GooString * name,PDFWriteMode mode)851 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
852   FILE *f;
853   OutStream *outStr;
854   int res;
855 
856   if (!(f = fopen(name->getCString(), "wb"))) {
857     error(errIO, -1, "Couldn't open file '{0:t}'", name);
858     return errOpenFile;
859   }
860   outStr = new FileOutStream(f,0);
861   res = saveAs(outStr, mode);
862   delete outStr;
863   fclose(f);
864   return res;
865 }
866 
saveAs(OutStream * outStr,PDFWriteMode mode)867 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
868 
869   // find if we have updated objects
870   GBool updated = gFalse;
871   for(int i=0; i<xref->getNumObjects(); i++) {
872     if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) {
873       updated = gTrue;
874       break;
875     }
876   }
877 
878   if (!updated && mode == writeStandard) {
879     // simply copy the original file
880     saveWithoutChangesAs (outStr);
881   } else if (mode == writeForceRewrite) {
882     saveCompleteRewrite(outStr);
883   } else {
884     saveIncrementalUpdate(outStr);
885   }
886 
887   return errNone;
888 }
889 
saveWithoutChangesAs(GooString * name)890 int PDFDoc::saveWithoutChangesAs(GooString *name) {
891   FILE *f;
892   OutStream *outStr;
893   int res;
894 
895   if (!(f = fopen(name->getCString(), "wb"))) {
896     error(errIO, -1, "Couldn't open file '{0:t}'", name);
897     return errOpenFile;
898   }
899 
900   outStr = new FileOutStream(f,0);
901   res = saveWithoutChangesAs(outStr);
902   delete outStr;
903 
904   fclose(f);
905 
906   return res;
907 }
908 
saveWithoutChangesAs(OutStream * outStr)909 int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
910   int c;
911 
912   BaseStream *copyStr = str->copy();
913   copyStr->reset();
914   while ((c = copyStr->getChar()) != EOF) {
915     outStr->put(c);
916   }
917   copyStr->close();
918   delete copyStr;
919 
920   return errNone;
921 }
922 
saveIncrementalUpdate(OutStream * outStr)923 void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
924 {
925   XRef *uxref;
926   int c;
927   //copy the original file
928   BaseStream *copyStr = str->copy();
929   copyStr->reset();
930   while ((c = copyStr->getChar()) != EOF) {
931     outStr->put(c);
932   }
933   copyStr->close();
934   delete copyStr;
935 
936   Guchar *fileKey;
937   CryptAlgorithm encAlgorithm;
938   int keyLength;
939   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
940 
941   uxref = new XRef();
942   uxref->add(0, 65535, 0, gFalse);
943   xref->lock();
944   for(int i=0; i<xref->getNumObjects(); i++) {
945     if ((xref->getEntry(i)->type == xrefEntryFree) &&
946         (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
947       continue;
948 
949     if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { //we have an updated object
950       Ref ref;
951       ref.num = i;
952       ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
953       if (xref->getEntry(i)->type != xrefEntryFree) {
954         Object obj1;
955         xref->fetch(ref.num, ref.gen, &obj1, 1);
956         Goffset offset = writeObjectHeader(&ref, outStr);
957         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
958         writeObjectFooter(outStr);
959         uxref->add(ref.num, ref.gen, offset, gTrue);
960         obj1.free();
961       } else {
962         uxref->add(ref.num, ref.gen, 0, gFalse);
963       }
964     }
965   }
966   xref->unlock();
967   if (uxref->getNumObjects() == 0) { //we have nothing to update
968     delete uxref;
969     return;
970   }
971 
972   Goffset uxrefOffset = outStr->getPos();
973   int numobjects = xref->getNumObjects();
974   const char *fileNameA = fileName ? fileName->getCString() : NULL;
975   Ref rootRef, uxrefStreamRef;
976   rootRef.num = getXRef()->getRootNum();
977   rootRef.gen = getXRef()->getRootGen();
978 
979   // Output a xref stream if there is a xref stream already
980   GBool xRefStream = xref->isXRefStream();
981 
982   if (xRefStream) {
983     // Append an entry for the xref stream itself
984     uxrefStreamRef.num = numobjects++;
985     uxrefStreamRef.gen = 0;
986     uxref->add(uxrefStreamRef.num, uxrefStreamRef.gen, uxrefOffset, gTrue);
987   }
988 
989   Dict *trailerDict = createTrailerDict(numobjects, gTrue, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
990   if (xRefStream) {
991     writeXRefStreamTrailer(trailerDict, uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
992   } else {
993     writeXRefTableTrailer(trailerDict, uxref, gFalse, uxrefOffset, outStr, getXRef());
994   }
995 
996   delete trailerDict;
997   delete uxref;
998 }
999 
saveCompleteRewrite(OutStream * outStr)1000 void PDFDoc::saveCompleteRewrite (OutStream* outStr)
1001 {
1002   // Make sure that special flags are set, because we are going to read
1003   // all objects, including Unencrypted ones.
1004   xref->scanSpecialFlags();
1005 
1006   Guchar *fileKey;
1007   CryptAlgorithm encAlgorithm;
1008   int keyLength;
1009   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1010 
1011   outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
1012   XRef *uxref = new XRef();
1013   uxref->add(0, 65535, 0, gFalse);
1014   xref->lock();
1015   for(int i=0; i<xref->getNumObjects(); i++) {
1016     Object obj1;
1017     Ref ref;
1018     XRefEntryType type = xref->getEntry(i)->type;
1019     if (type == xrefEntryFree) {
1020       ref.num = i;
1021       ref.gen = xref->getEntry(i)->gen;
1022       /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
1023           and we don't want the one with num=0 because it has already been added (gen = 65535)*/
1024       if (ref.gen > 0 && ref.num > 0)
1025         uxref->add(ref.num, ref.gen, 0, gFalse);
1026     } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
1027       // This entry must not be written, put a free entry instead (with incremented gen)
1028       ref.num = i;
1029       ref.gen = xref->getEntry(i)->gen + 1;
1030       uxref->add(ref.num, ref.gen, 0, gFalse);
1031     } else if (type == xrefEntryUncompressed){
1032       ref.num = i;
1033       ref.gen = xref->getEntry(i)->gen;
1034       xref->fetch(ref.num, ref.gen, &obj1, 1);
1035       Goffset offset = writeObjectHeader(&ref, outStr);
1036       // Write unencrypted objects in unencrypted form
1037       if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
1038         writeObject(&obj1, outStr, NULL, cryptRC4, 0, 0, 0);
1039       } else {
1040         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1041       }
1042       writeObjectFooter(outStr);
1043       uxref->add(ref.num, ref.gen, offset, gTrue);
1044       obj1.free();
1045     } else if (type == xrefEntryCompressed) {
1046       ref.num = i;
1047       ref.gen = 0; //compressed entries have gen == 0
1048       xref->fetch(ref.num, ref.gen, &obj1, 1);
1049       Goffset offset = writeObjectHeader(&ref, outStr);
1050       writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1051       writeObjectFooter(outStr);
1052       uxref->add(ref.num, ref.gen, offset, gTrue);
1053       obj1.free();
1054     }
1055   }
1056   xref->unlock();
1057   Goffset uxrefOffset = outStr->getPos();
1058   writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */,
1059                         uxref->getNumObjects(), outStr, gFalse /* complete rewrite */);
1060   delete uxref;
1061 }
1062 
writeDictionnary(Dict * dict,OutStream * outStr,XRef * xRef,Guint numOffset,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen)1063 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1064                                CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1065 {
1066   Object obj1;
1067   outStr->printf("<<");
1068   for (int i=0; i<dict->getLength(); i++) {
1069     GooString keyName(dict->getKey(i));
1070     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
1071     outStr->printf("/%s ", keyNameToPrint->getCString());
1072     delete keyNameToPrint;
1073     writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1074     obj1.free();
1075   }
1076   outStr->printf(">> ");
1077 }
1078 
writeStream(Stream * str,OutStream * outStr)1079 void PDFDoc::writeStream (Stream* str, OutStream* outStr)
1080 {
1081   outStr->printf("stream\r\n");
1082   str->reset();
1083   for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
1084     outStr->printf("%c", c);
1085   }
1086   outStr->printf("\r\nendstream\r\n");
1087 }
1088 
writeRawStream(Stream * str,OutStream * outStr)1089 void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
1090 {
1091   Object obj1;
1092   str->getDict()->lookup("Length", &obj1);
1093   if (!obj1.isInt() && !obj1.isInt64()) {
1094     error (errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1095     return;
1096   }
1097 
1098   Goffset length;
1099   if (obj1.isInt())
1100     length = obj1.getInt();
1101   else
1102     length = obj1.getInt64();
1103   obj1.free();
1104 
1105   outStr->printf("stream\r\n");
1106   str->unfilteredReset();
1107   for (Goffset i = 0; i < length; i++) {
1108     int c = str->getUnfilteredChar();
1109     if (unlikely(c == EOF)) {
1110       error (errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1111       break;
1112     }
1113     outStr->printf("%c", c);
1114   }
1115   str->reset();
1116   outStr->printf("\r\nendstream\r\n");
1117 }
1118 
writeString(GooString * s,OutStream * outStr,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen)1119 void PDFDoc::writeString (GooString* s, OutStream* outStr, Guchar *fileKey,
1120                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1121 {
1122   // Encrypt string if encryption is enabled
1123   GooString *sEnc = NULL;
1124   if (fileKey) {
1125     Object obj;
1126     EncryptStream *enc = new EncryptStream(new MemStream(s->getCString(), 0, s->getLength(), obj.initNull()),
1127                                            fileKey, encAlgorithm, keyLength, objNum, objGen);
1128     sEnc = new GooString();
1129     int c;
1130     enc->reset();
1131     while ((c = enc->getChar()) != EOF) {
1132       sEnc->append((char)c);
1133     }
1134 
1135     delete enc;
1136     s = sEnc;
1137   }
1138 
1139   // Write data
1140   if (s->hasUnicodeMarker()) {
1141     //unicode string don't necessary end with \0
1142     const char* c = s->getCString();
1143     outStr->printf("(");
1144     for(int i=0; i<s->getLength(); i++) {
1145       char unescaped = *(c+i)&0x000000ff;
1146       //escape if needed
1147       if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1148         outStr->printf("%c", '\\');
1149       outStr->printf("%c", unescaped);
1150     }
1151     outStr->printf(") ");
1152   } else {
1153     const char* c = s->getCString();
1154     outStr->printf("(");
1155     for(int i=0; i<s->getLength(); i++) {
1156       char unescaped = *(c+i)&0x000000ff;
1157       //escape if needed
1158       if (unescaped == '\r')
1159         outStr->printf("\\r");
1160       else if (unescaped == '\n')
1161         outStr->printf("\\n");
1162       else {
1163         if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1164           outStr->printf("%c", '\\');
1165         }
1166         outStr->printf("%c", unescaped);
1167       }
1168     }
1169     outStr->printf(") ");
1170   }
1171 
1172   delete sEnc;
1173 }
1174 
writeObjectHeader(Ref * ref,OutStream * outStr)1175 Goffset PDFDoc::writeObjectHeader (Ref *ref, OutStream* outStr)
1176 {
1177   Goffset offset = outStr->getPos();
1178   outStr->printf("%i %i obj ", ref->num, ref->gen);
1179   return offset;
1180 }
1181 
writeObject(Object * obj,OutStream * outStr,XRef * xRef,Guint numOffset,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen)1182 void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1183                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1184 {
1185   Array *array;
1186   Object obj1;
1187   Goffset tmp;
1188 
1189   switch (obj->getType()) {
1190     case objBool:
1191       outStr->printf("%s ", obj->getBool()?"true":"false");
1192       break;
1193     case objInt:
1194       outStr->printf("%i ", obj->getInt());
1195       break;
1196     case objInt64:
1197       outStr->printf("%lli ", obj->getInt64());
1198       break;
1199     case objReal:
1200     {
1201       GooString s;
1202       s.appendf("{0:.10g}", obj->getReal());
1203       outStr->printf("%s ", s.getCString());
1204       break;
1205     }
1206     case objString:
1207       writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen);
1208       break;
1209     case objName:
1210     {
1211       GooString name(obj->getName());
1212       GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
1213       outStr->printf("/%s ", nameToPrint->getCString());
1214       delete nameToPrint;
1215       break;
1216     }
1217     case objNull:
1218       outStr->printf( "null ");
1219       break;
1220     case objArray:
1221       array = obj->getArray();
1222       outStr->printf("[");
1223       for (int i=0; i<array->getLength(); i++) {
1224         writeObject(array->getNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1225         obj1.free();
1226       }
1227       outStr->printf("] ");
1228       break;
1229     case objDict:
1230       writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1231       break;
1232     case objStream:
1233       {
1234         //We can't modify stream with the current implementation (no write functions in Stream API)
1235         // => the only type of streams which that have been modified are internal streams (=strWeird)
1236         Stream *stream = obj->getStream();
1237         if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1238           //we write the stream unencoded => TODO: write stream encoder
1239 
1240           // Encrypt stream
1241           EncryptStream *encStream = NULL;
1242           GBool removeFilter = gTrue;
1243           if (stream->getKind() == strWeird && fileKey) {
1244             Object filter;
1245             stream->getDict()->lookup("Filter", &filter);
1246             if (!filter.isName("Crypt")) {
1247               if (filter.isArray()) {
1248                 for (int i = 0; i < filter.arrayGetLength(); i++) {
1249                   Object filterEle;
1250                   filter.arrayGet(i, &filterEle);
1251                   if (filterEle.isName("Crypt")) {
1252                     filterEle.free();
1253                     removeFilter = gFalse;
1254                     break;
1255                   }
1256                   filterEle.free();
1257                 }
1258                 if (removeFilter) {
1259                   encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1260                   encStream->setAutoDelete(gFalse);
1261                   stream = encStream;
1262                 }
1263               } else {
1264                 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1265                 encStream->setAutoDelete(gFalse);
1266                 stream = encStream;
1267               }
1268             } else {
1269               removeFilter = gFalse;
1270             }
1271             filter.free();
1272           } else if (fileKey != NULL) { // Encrypt stream
1273             encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1274             encStream->setAutoDelete(gFalse);
1275             stream = encStream;
1276           }
1277 
1278           stream->reset();
1279           //recalculate stream length
1280           tmp = 0;
1281           for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
1282             tmp++;
1283           }
1284           obj1.initInt64(tmp);
1285           stream->getDict()->set("Length", &obj1);
1286 
1287           //Remove Stream encoding
1288           if (removeFilter) {
1289             stream->getDict()->remove("Filter");
1290           }
1291           stream->getDict()->remove("DecodeParms");
1292 
1293           writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1294           writeStream (stream,outStr);
1295           delete encStream;
1296           obj1.free();
1297         } else {
1298           //raw stream copy
1299           FilterStream *fs = dynamic_cast<FilterStream*>(stream);
1300           if (fs) {
1301             BaseStream *bs = fs->getBaseStream();
1302             if (bs) {
1303               Goffset streamEnd;
1304                 if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1305                   Object val;
1306                   val.initInt64(streamEnd - bs->getStart());
1307                   stream->getDict()->set("Length", &val);
1308                 }
1309               }
1310           }
1311           writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1312           writeRawStream (stream, outStr);
1313         }
1314         break;
1315       }
1316     case objRef:
1317       outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1318       break;
1319     case objCmd:
1320       outStr->printf("%s\n", obj->getCmd());
1321       break;
1322     case objError:
1323       outStr->printf("error\r\n");
1324       break;
1325     case objEOF:
1326       outStr->printf("eof\r\n");
1327       break;
1328     case objNone:
1329       outStr->printf("none\r\n");
1330       break;
1331     default:
1332       error(errUnimplemented, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1333       break;
1334   }
1335 }
1336 
writeObjectFooter(OutStream * outStr)1337 void PDFDoc::writeObjectFooter (OutStream* outStr)
1338 {
1339   outStr->printf("endobj\r\n");
1340 }
1341 
createTrailerDict(int uxrefSize,GBool incrUpdate,Goffset startxRef,Ref * root,XRef * xRef,const char * fileName,Goffset fileSize)1342 Dict *PDFDoc::createTrailerDict(int uxrefSize, GBool incrUpdate, Goffset startxRef,
1343                                 Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1344 {
1345   Dict *trailerDict = new Dict(xRef);
1346   Object obj1;
1347   obj1.initInt(uxrefSize);
1348   trailerDict->set("Size", &obj1);
1349   obj1.free();
1350 
1351   //build a new ID, as recommended in the reference, uses:
1352   // - current time
1353   // - file name
1354   // - file size
1355   // - values of entry in information dictionnary
1356   GooString message;
1357   char buffer[256];
1358   sprintf(buffer, "%i", (int)time(NULL));
1359   message.append(buffer);
1360 
1361   if (fileName)
1362     message.append(fileName);
1363 
1364   sprintf(buffer, "%lli", (long long)fileSize);
1365   message.append(buffer);
1366 
1367   //info dict -- only use text string
1368   if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
1369     for(int i=0; i<obj1.getDict()->getLength(); i++) {
1370       Object obj2;
1371       obj1.getDict()->getVal(i, &obj2);
1372       if (obj2.isString()) {
1373         message.append(obj2.getString());
1374       }
1375       obj2.free();
1376     }
1377   }
1378   obj1.free();
1379 
1380   GBool hasEncrypt = gFalse;
1381   if (!xRef->getTrailerDict()->isNone()) {
1382     Object obj2;
1383     xRef->getTrailerDict()->dictLookupNF("Encrypt", &obj2);
1384     if (!obj2.isNull()) {
1385       trailerDict->set("Encrypt", &obj2);
1386       hasEncrypt = gTrue;
1387       obj2.free();
1388     }
1389   }
1390 
1391   //calculate md5 digest
1392   Guchar digest[16];
1393   md5((Guchar*)message.getCString(), message.getLength(), digest);
1394   obj1.initString(new GooString((const char*)digest, 16));
1395 
1396   //create ID array
1397   Object obj2,obj3,obj5;
1398   obj2.initArray(xRef);
1399 
1400   // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1401   if (incrUpdate || hasEncrypt) {
1402     Object obj4;
1403     //only update the second part of the array
1404     xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
1405     if (!obj4.isArray()) {
1406       error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1407     } else {
1408       //Get the first part of the ID
1409       obj4.arrayGet(0,&obj3);
1410 
1411       obj2.arrayAdd(&obj3);
1412       obj2.arrayAdd(&obj1);
1413       trailerDict->set("ID", &obj2);
1414     }
1415     obj4.free();
1416   } else {
1417     //new file => same values for the two identifiers
1418     obj2.arrayAdd(&obj1);
1419     obj1.initString(new GooString((const char*)digest, 16));
1420     obj2.arrayAdd(&obj1);
1421     trailerDict->set("ID", &obj2);
1422   }
1423 
1424   obj1.initRef(root->num, root->gen);
1425   trailerDict->set("Root", &obj1);
1426 
1427   if (incrUpdate) {
1428     obj1.initInt64(startxRef);
1429     trailerDict->set("Prev", &obj1);
1430   }
1431 
1432   if (!xRef->getTrailerDict()->isNone()) {
1433     xRef->getDocInfoNF(&obj5);
1434     if (!obj5.isNull()) {
1435       trailerDict->set("Info", &obj5);
1436     }
1437   }
1438 
1439   return trailerDict;
1440 }
1441 
writeXRefTableTrailer(Dict * trailerDict,XRef * uxref,GBool writeAllEntries,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1442 void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAllEntries, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1443 {
1444   uxref->writeTableToFile( outStr, writeAllEntries );
1445   outStr->printf( "trailer\r\n");
1446   writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1447   outStr->printf( "\r\nstartxref\r\n");
1448   outStr->printf( "%lli\r\n", uxrefOffset);
1449   outStr->printf( "%%%%EOF\r\n");
1450 }
1451 
writeXRefStreamTrailer(Dict * trailerDict,XRef * uxref,Ref * uxrefStreamRef,Goffset uxrefOffset,OutStream * outStr,XRef * xRef)1452 void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1453 {
1454   GooString stmData;
1455 
1456   // Fill stmData and some trailerDict fields
1457   uxref->writeStreamToBuffer(&stmData, trailerDict, xRef);
1458 
1459   // Create XRef stream object and write it
1460   Object obj1;
1461   MemStream *mStream = new MemStream( stmData.getCString(), 0,
1462                                       stmData.getLength(), obj1.initDict(trailerDict) );
1463   writeObjectHeader(uxrefStreamRef, outStr);
1464   writeObject(obj1.initStream(mStream), outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1465   writeObjectFooter(outStr);
1466   obj1.free();
1467 
1468   outStr->printf( "startxref\r\n");
1469   outStr->printf( "%lli\r\n", uxrefOffset);
1470   outStr->printf( "%%%%EOF\r\n");
1471 }
1472 
writeXRefTableTrailer(Goffset uxrefOffset,XRef * uxref,GBool writeAllEntries,int uxrefSize,OutStream * outStr,GBool incrUpdate)1473 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,
1474                                    int uxrefSize, OutStream* outStr, GBool incrUpdate)
1475 {
1476   const char *fileNameA = fileName ? fileName->getCString() : NULL;
1477   // file size (doesn't include the trailer)
1478   unsigned int fileSize = 0;
1479   int c;
1480   str->reset();
1481   while ((c = str->getChar()) != EOF) {
1482     fileSize++;
1483   }
1484   str->close();
1485   Ref ref;
1486   ref.num = getXRef()->getRootNum();
1487   ref.gen = getXRef()->getRootGen();
1488   Dict * trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref,
1489                                          getXRef(), fileNameA, fileSize);
1490   writeXRefTableTrailer(trailerDict, uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1491   delete trailerDict;
1492 }
1493 
writeHeader(OutStream * outStr,int major,int minor)1494 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1495 {
1496    outStr->printf("%%PDF-%d.%d\n", major, minor);
1497    outStr->printf("%%\xE2\xE3\xCF\xD3\n");
1498 }
1499 
markDictionnary(Dict * dict,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1500 void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1501 {
1502   Object obj1;
1503   for (int i=0; i<dict->getLength(); i++) {
1504     const char *key = dict->getKey(i);
1505     if (strcmp(key, "Annots") != 0) {
1506       markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1507     } else {
1508       Object annotsObj;
1509       dict->getValNF(i, &annotsObj);
1510       if (!annotsObj.isNull()) {
1511         markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum);
1512         annotsObj.free();
1513       }
1514     }
1515     obj1.free();
1516   }
1517 }
1518 
markObject(Object * obj,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1519 void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1520 {
1521   Array *array;
1522   Object obj1;
1523 
1524   switch (obj->getType()) {
1525     case objArray:
1526       array = obj->getArray();
1527       for (int i=0; i<array->getLength(); i++) {
1528         markObject(array->getNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1529         obj1.free();
1530       }
1531       break;
1532     case objDict:
1533       markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1534       break;
1535     case objStream:
1536       {
1537         Stream *stream = obj->getStream();
1538         markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1539       }
1540       break;
1541     case objRef:
1542       {
1543         if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1544           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1545             return;  // already marked as free => should be replaced
1546           }
1547           xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
1548           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1549             xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1550           }
1551         }
1552         if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1553             countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
1554         {
1555           countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
1556         } else {
1557           XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1558           entry->gen++;
1559           if (entry->gen > 9)
1560             break;
1561         }
1562         Object obj1;
1563         getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
1564         markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1565         obj1.free();
1566       }
1567       break;
1568     default:
1569       break;
1570   }
1571 }
1572 
replacePageDict(int pageNo,int rotate,PDFRectangle * mediaBox,PDFRectangle * cropBox)1573 void PDFDoc::replacePageDict(int pageNo, int rotate,
1574                              PDFRectangle *mediaBox,
1575                              PDFRectangle *cropBox)
1576 {
1577   Ref *refPage = getCatalog()->getPageRef(pageNo);
1578   Object page;
1579   getXRef()->fetch(refPage->num, refPage->gen, &page);
1580   Dict *pageDict = page.getDict();
1581   pageDict->remove("MediaBoxssdf");
1582   pageDict->remove("MediaBox");
1583   pageDict->remove("CropBox");
1584   pageDict->remove("ArtBox");
1585   pageDict->remove("BleedBox");
1586   pageDict->remove("TrimBox");
1587   pageDict->remove("Rotate");
1588   Object mediaBoxObj;
1589   mediaBoxObj.initArray(getXRef());
1590   Object murx;
1591   murx.initReal(mediaBox->x1);
1592   Object mury;
1593   mury.initReal(mediaBox->y1);
1594   Object mllx;
1595   mllx.initReal(mediaBox->x2);
1596   Object mlly;
1597   mlly.initReal(mediaBox->y2);
1598   mediaBoxObj.arrayAdd(&murx);
1599   mediaBoxObj.arrayAdd(&mury);
1600   mediaBoxObj.arrayAdd(&mllx);
1601   mediaBoxObj.arrayAdd(&mlly);
1602   pageDict->add(copyString("MediaBox"), &mediaBoxObj);
1603   if (cropBox != NULL) {
1604     Object cropBoxObj;
1605     cropBoxObj.initArray(getXRef());
1606     Object curx;
1607     curx.initReal(cropBox->x1);
1608     Object cury;
1609     cury.initReal(cropBox->y1);
1610     Object cllx;
1611     cllx.initReal(cropBox->x2);
1612     Object clly;
1613     clly.initReal(cropBox->y2);
1614     cropBoxObj.arrayAdd(&curx);
1615     cropBoxObj.arrayAdd(&cury);
1616     cropBoxObj.arrayAdd(&cllx);
1617     cropBoxObj.arrayAdd(&clly);
1618     pageDict->add(copyString("CropBox"), &cropBoxObj);
1619     cropBoxObj.getArray()->incRef();
1620     pageDict->add(copyString("TrimBox"), &cropBoxObj);
1621   } else {
1622     mediaBoxObj.getArray()->incRef();
1623     pageDict->add(copyString("TrimBox"), &mediaBoxObj);
1624   }
1625   Object rotateObj;
1626   rotateObj.initInt(rotate);
1627   pageDict->add(copyString("Rotate"), &rotateObj);
1628   getXRef()->setModifiedObject(&page, *refPage);
1629   page.free();
1630 }
1631 
markPageObjects(Dict * pageDict,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1632 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1633 {
1634   pageDict->remove("OpenAction");
1635   pageDict->remove("Outlines");
1636   pageDict->remove("StructTreeRoot");
1637 
1638   for (int n = 0; n < pageDict->getLength(); n++) {
1639     const char *key = pageDict->getKey(n);
1640     Object value; pageDict->getValNF(n, &value);
1641     if (strcmp(key, "Parent") != 0 &&
1642 	      strcmp(key, "Pages") != 0 &&
1643 	      strcmp(key, "AcroForm") != 0 &&
1644 	      strcmp(key, "Annots") != 0 &&
1645 	      strcmp(key, "P") != 0 &&
1646         strcmp(key, "Root") != 0) {
1647       markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum);
1648     }
1649     value.free();
1650   }
1651 }
1652 
markAnnotations(Object * annotsObj,XRef * xRef,XRef * countRef,Guint numOffset,int oldPageNum,int newPageNum)1653 GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum) {
1654   Object annots;
1655   GBool modified = gFalse;
1656   annotsObj->fetch(getXRef(), &annots);
1657   if (annots.isArray()) {
1658       Array *array = annots.getArray();
1659       for (int i=array->getLength() - 1; i >= 0; i--) {
1660         Object obj1;
1661         if (array->get(i, &obj1)->isDict()) {
1662           Object type;
1663           Dict *dict = obj1.getDict();
1664           dict->lookup("Type", &type);
1665           if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1666             Object obj2;
1667             if (dict->lookupNF("P", &obj2)->isRef()) {
1668               if (obj2.getRef().num == oldPageNum) {
1669                 Object obj3;
1670                 array->getNF(i, &obj3);
1671                 if (obj3.isRef()) {
1672                   Object *newRef = new Object();
1673                   newRef->initRef(newPageNum, 0);
1674                   dict->set("P", newRef);
1675                   getXRef()->setModifiedObject(&obj1, obj3.getRef());
1676                 }
1677                 obj3.free();
1678               } else if (obj2.getRef().num == newPageNum) {
1679                 obj1.free();
1680                 obj2.free();
1681                 type.free();
1682                 continue;
1683               } else {
1684                 Object page;
1685                 getXRef()->fetch(obj2.getRef().num, obj2.getRef().gen, &page);
1686                 if (page.isDict()) {
1687                   Object pagetype;
1688                   Dict *dict = page.getDict();
1689                   dict->lookup("Type", &pagetype);
1690                   if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1691                     obj1.free();
1692                     obj2.free();
1693                     type.free();
1694                     page.free();
1695                     pagetype.free();
1696                     continue;
1697                   }
1698                   pagetype.free();
1699                 }
1700                 page.free();
1701                 obj1.free();
1702                 obj2.free();
1703                 type.free();
1704                 array->remove(i);
1705                 modified = gTrue;
1706                 continue;
1707               }
1708             }
1709             obj2.free();
1710           }
1711           type.free();
1712           markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum);
1713         }
1714         obj1.free();
1715         array->getNF(i, &obj1);
1716         if (obj1.isRef()) {
1717           if (obj1.getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1718             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1719               continue;  // already marked as free => should be replaced
1720             }
1721             xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, gTrue);
1722             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1723               xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1724             }
1725           }
1726           if (obj1.getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1727               countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree)
1728           {
1729             countRef->add(obj1.getRef().num + numOffset, 1, 0, gTrue);
1730           } else {
1731             XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1732             entry->gen++;
1733           }
1734         }
1735         obj1.free();
1736       }
1737   }
1738   if (annotsObj->isRef()) {
1739     if (annotsObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1740       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1741         return modified;  // already marked as free => should be replaced
1742       }
1743       xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, gTrue);
1744       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1745         xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1746       }
1747     }
1748     if (annotsObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1749         countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree)
1750     {
1751       countRef->add(annotsObj->getRef().num + numOffset, 1, 0, gTrue);
1752     } else {
1753       XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1754       entry->gen++;
1755     }
1756     getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1757   }
1758   annots.free();
1759   return modified;
1760 }
1761 
markAcroForm(Object * afObj,XRef * xRef,XRef * countRef,Guint numOffset,int oldRefNum,int newRefNum)1762 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum) {
1763   Object acroform;
1764   GBool modified = gFalse;
1765   afObj->fetch(getXRef(), &acroform);
1766   if (acroform.isDict()) {
1767       Dict *dict = acroform.getDict();
1768       for (int i=0; i < dict->getLength(); i++) {
1769         if (strcmp(dict->getKey(i), "Fields") == 0) {
1770           Object fields;
1771           modified = markAnnotations(dict->getValNF(i, &fields), xRef, countRef, numOffset, oldRefNum, newRefNum);
1772           fields.free();
1773         } else {
1774           Object obj;
1775           markObject(dict->getValNF(i, &obj), xRef, countRef, numOffset, oldRefNum, newRefNum);
1776           obj.free();
1777         }
1778       }
1779   }
1780   if (afObj->isRef()) {
1781     if (afObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1782       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1783         return;  // already marked as free => should be replaced
1784       }
1785       xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, gTrue);
1786       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1787         xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1788       }
1789     }
1790     if (afObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1791         countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree)
1792     {
1793       countRef->add(afObj->getRef().num + numOffset, 1, 0, gTrue);
1794     } else {
1795       XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1796       entry->gen++;
1797     }
1798     if (modified){
1799       getXRef()->setModifiedObject(&acroform, afObj->getRef());
1800     }
1801   }
1802   acroform.free();
1803   return;
1804 }
1805 
writePageObjects(OutStream * outStr,XRef * xRef,Guint numOffset,GBool combine)1806 Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine)
1807 {
1808   Guint objectsCount = 0; //count the number of objects in the XRef(s)
1809   Guchar *fileKey;
1810   CryptAlgorithm encAlgorithm;
1811   int keyLength;
1812   xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1813 
1814   for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1815     if (xRef->getEntry(n)->type != xrefEntryFree) {
1816       Object obj;
1817       Ref ref;
1818       ref.num = n;
1819       ref.gen = xRef->getEntry(n)->gen;
1820       objectsCount++;
1821       getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
1822       Goffset offset = writeObjectHeader(&ref, outStr);
1823       if (combine) {
1824         writeObject(&obj, outStr, getXRef(), numOffset, NULL, cryptRC4, 0, 0, 0);
1825       } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1826         writeObject(&obj, outStr, NULL, cryptRC4, 0, 0, 0);
1827       } else {
1828         writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1829       }
1830       writeObjectFooter(outStr);
1831       xRef->add(ref.num, ref.gen, offset, gTrue);
1832       obj.free();
1833     }
1834   }
1835   return objectsCount;
1836 }
1837 
1838 #ifndef DISABLE_OUTLINE
getOutline()1839 Outline *PDFDoc::getOutline()
1840 {
1841   if (!outline) {
1842     pdfdocLocker();
1843     // read outline
1844     outline = new Outline(catalog->getOutline(), xref);
1845   }
1846 
1847   return outline;
1848 }
1849 #endif
1850 
ErrorPDFDoc(int errorCode,GooString * fileNameA)1851 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
1852 {
1853   PDFDoc *doc = new PDFDoc();
1854   doc->errCode = errorCode;
1855   doc->fileName = fileNameA;
1856 
1857   return doc;
1858 }
1859 
strToLongLong(char * s)1860 long long PDFDoc::strToLongLong(char *s) {
1861   long long x, d;
1862   char *p;
1863 
1864   x = 0;
1865   for (p = s; *p && isdigit(*p & 0xff); ++p) {
1866     d = *p - '0';
1867     if (x > (LLONG_MAX - d) / 10) {
1868       break;
1869     }
1870     x = 10 * x + d;
1871   }
1872   return x;
1873 }
1874 
1875 // Read the 'startxref' position.
getStartXRef(GBool tryingToReconstruct)1876 Goffset PDFDoc::getStartXRef(GBool tryingToReconstruct)
1877 {
1878   if (startXRefPos == -1) {
1879 
1880     if (isLinearized(tryingToReconstruct)) {
1881       char buf[linearizationSearchSize+1];
1882       int c, n, i;
1883 
1884       str->setPos(0);
1885       for (n = 0; n < linearizationSearchSize; ++n) {
1886         if ((c = str->getChar()) == EOF) {
1887           break;
1888         }
1889         buf[n] = c;
1890       }
1891       buf[n] = '\0';
1892 
1893       // find end of first obj (linearization dictionary)
1894       startXRefPos = 0;
1895       for (i = 0; i < n; i++) {
1896         if (!strncmp("endobj", &buf[i], 6)) {
1897 	  i += 6;
1898 	  //skip whitespace
1899 	  while (buf[i] && Lexer::isSpace(buf[i])) ++i;
1900 	  startXRefPos = i;
1901 	  break;
1902         }
1903       }
1904     } else {
1905       char buf[xrefSearchSize+1];
1906       char *p;
1907       int c, n, i;
1908 
1909       // read last xrefSearchSize bytes
1910       int segnum = 0;
1911       int maxXRefSearch = 24576;
1912       if (str->getLength() < maxXRefSearch) maxXRefSearch = str->getLength();
1913       for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
1914         str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
1915         for (n = 0; n < xrefSearchSize; ++n) {
1916           if ((c = str->getChar()) == EOF) {
1917             break;
1918           }
1919           buf[n] = c;
1920         }
1921         buf[n] = '\0';
1922 
1923         // find startxref
1924         for (i = n - 9; i >= 0; --i) {
1925           if (!strncmp(&buf[i], "startxref", 9)) {
1926             break;
1927           }
1928         }
1929         if (i < 0) {
1930           startXRefPos = 0;
1931         } else {
1932           for (p = &buf[i + 9]; isspace(*p); ++p);
1933           startXRefPos = strToLongLong(p);
1934           break;
1935         }
1936       }
1937     }
1938 
1939   }
1940 
1941   return startXRefPos;
1942 }
1943 
getMainXRefEntriesOffset(GBool tryingToReconstruct)1944 Goffset PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct)
1945 {
1946   Guint mainXRefEntriesOffset = 0;
1947 
1948   if (isLinearized(tryingToReconstruct)) {
1949     mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
1950   }
1951 
1952   return mainXRefEntriesOffset;
1953 }
1954 
getNumPages()1955 int PDFDoc::getNumPages()
1956 {
1957   if (isLinearized()) {
1958     int n;
1959     if ((n = getLinearization()->getNumPages())) {
1960       return n;
1961     }
1962   }
1963 
1964   return catalog->getNumPages();
1965 }
1966 
parsePage(int page)1967 Page *PDFDoc::parsePage(int page)
1968 {
1969   Page *p = NULL;
1970   Object obj;
1971   Ref pageRef;
1972   Dict *pageDict;
1973 
1974   pageRef.num = getHints()->getPageObjectNum(page);
1975   if (!pageRef.num) {
1976     error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
1977     return NULL;
1978   }
1979 
1980   // check for bogus ref - this can happen in corrupted PDF files
1981   if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
1982     error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
1983     return NULL;
1984   }
1985 
1986   pageRef.gen = xref->getEntry(pageRef.num)->gen;
1987   xref->fetch(pageRef.num, pageRef.gen, &obj);
1988   if (!obj.isDict("Page")) {
1989     obj.free();
1990     error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
1991     return NULL;
1992   }
1993   pageDict = obj.getDict();
1994 
1995   p = new Page(this, page, pageDict, pageRef,
1996                new PageAttrs(NULL, pageDict), catalog->getForm());
1997   obj.free();
1998 
1999   return p;
2000 }
2001 
getPage(int page)2002 Page *PDFDoc::getPage(int page)
2003 {
2004   if ((page < 1) || page > getNumPages()) return NULL;
2005 
2006   if (isLinearized() && checkLinearization()) {
2007     pdfdocLocker();
2008     if (!pageCache) {
2009       pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
2010       for (int i = 0; i < getNumPages(); i++) {
2011         pageCache[i] = NULL;
2012       }
2013     }
2014     if (!pageCache[page-1]) {
2015       pageCache[page-1] = parsePage(page);
2016     }
2017     if (pageCache[page-1]) {
2018        return pageCache[page-1];
2019     } else {
2020        error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
2021     }
2022   }
2023 
2024   return catalog->getPage(page);
2025 }
2026