1 //========================================================================
2 //
3 // pdf-fullrewrite.cc
4 //
5 // Copyright 2007 Julien Rebetez
6 // Copyright 2012 Fabio D'Urso
7 //
8 //========================================================================
9 
10 #include "GlobalParams.h"
11 #include "Error.h"
12 #include "Object.h"
13 #include "PDFDoc.h"
14 #include "XRef.h"
15 #include "goo/GooString.h"
16 #include "utils/parseargs.h"
17 
18 static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc);
19 static bool compareObjects(const Object *objA, const Object *objB);
20 
21 static char ownerPassword[33] = "\001";
22 static char userPassword[33] = "\001";
23 static bool forceIncremental = false;
24 static bool checkOutput = false;
25 static bool printHelp = false;
26 
27 static const ArgDesc argDesc[] = { { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
28                                    { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
29                                    { "-i", argFlag, &forceIncremental, 0, "incremental update mode" },
30                                    { "-check", argFlag, &checkOutput, 0, "verify the generated document" },
31                                    { "-h", argFlag, &printHelp, 0, "print usage information" },
32                                    { "-help", argFlag, &printHelp, 0, "print usage information" },
33                                    { "--help", argFlag, &printHelp, 0, "print usage information" },
34                                    { "-?", argFlag, &printHelp, 0, "print usage information" },
35                                    {} };
36 
main(int argc,char * argv[])37 int main(int argc, char *argv[])
38 {
39     PDFDoc *doc = nullptr;
40     PDFDoc *docOut = nullptr;
41     GooString *inputName = nullptr;
42     GooString *outputName = nullptr;
43     GooString *ownerPW = nullptr;
44     GooString *userPW = nullptr;
45     int res = 0;
46 
47     // parse args
48     bool ok = parseArgs(argDesc, &argc, argv);
49     if (!ok || (argc < 3) || printHelp) {
50         printUsage(argv[0], "INPUT-FILE OUTPUT-FILE", argDesc);
51         if (!printHelp) {
52             res = 1;
53         }
54         goto done;
55     }
56 
57     inputName = new GooString(argv[1]);
58     outputName = new GooString(argv[2]);
59 
60     if (ownerPassword[0] != '\001') {
61         ownerPW = new GooString(ownerPassword);
62     }
63     if (userPassword[0] != '\001') {
64         userPW = new GooString(userPassword);
65     }
66 
67     // load input document
68     globalParams = std::make_unique<GlobalParams>();
69     doc = new PDFDoc(inputName, ownerPW, userPW);
70     if (!doc->isOk()) {
71         fprintf(stderr, "Error loading input document\n");
72         res = 1;
73         goto done;
74     }
75 
76     // save it back (in rewrite or incremental update mode)
77     if (doc->saveAs(outputName, forceIncremental ? writeForceIncremental : writeForceRewrite) != 0) {
78         fprintf(stderr, "Error saving document\n");
79         res = 1;
80         goto done;
81     }
82 
83     if (checkOutput) {
84         // open the generated document to verify it
85         docOut = new PDFDoc(outputName, ownerPW, userPW);
86         if (!docOut->isOk()) {
87             fprintf(stderr, "Error loading generated document\n");
88             res = 1;
89         } else if (!compareDocuments(doc, docOut)) {
90             fprintf(stderr, "Verification failed\n");
91             res = 1;
92         }
93     } else {
94         delete outputName;
95     }
96 
97 done:
98     delete docOut;
99     delete doc;
100     delete userPW;
101     delete ownerPW;
102     return res;
103 }
104 
compareDictionaries(Dict * dictA,Dict * dictB)105 static bool compareDictionaries(Dict *dictA, Dict *dictB)
106 {
107     const int length = dictA->getLength();
108     if (dictB->getLength() != length)
109         return false;
110 
111     /* Check that every key in dictA is contained in dictB.
112      * Since keys are unique and we've already checked that dictA and dictB
113      * contain the same number of entries, we don't need to check that every key
114      * in dictB is also contained in dictA */
115     for (int i = 0; i < length; ++i) {
116         const char *key = dictA->getKey(i);
117         const Object &valA = dictA->getValNF(i);
118         const Object &valB = dictB->lookupNF(key);
119         if (!compareObjects(&valA, &valB))
120             return false;
121     }
122 
123     return true;
124 }
125 
compareObjects(const Object * objA,const Object * objB)126 static bool compareObjects(const Object *objA, const Object *objB)
127 {
128     switch (objA->getType()) {
129     case objBool: {
130         if (objB->getType() != objBool) {
131             return false;
132         } else {
133             return (objA->getBool() == objB->getBool());
134         }
135     }
136     case objInt:
137     case objInt64:
138     case objReal: {
139         if (!objB->isNum()) {
140             return false;
141         } else {
142             // Fuzzy comparison
143             const double diff = objA->getNum() - objB->getNum();
144             return (-0.01 < diff) && (diff < 0.01);
145         }
146     }
147     case objString: {
148         if (objB->getType() != objString) {
149             return false;
150         } else {
151             const GooString *strA = objA->getString();
152             const GooString *strB = objB->getString();
153             return (strA->cmp(strB) == 0);
154         }
155     }
156     case objName: {
157         if (objB->getType() != objName) {
158             return false;
159         } else {
160             GooString nameA(objA->getName());
161             GooString nameB(objB->getName());
162             return (nameA.cmp(&nameB) == 0);
163         }
164     }
165     case objNull: {
166         if (objB->getType() != objNull) {
167             return false;
168         } else {
169             return true;
170         }
171     }
172     case objArray: {
173         if (objB->getType() != objArray) {
174             return false;
175         } else {
176             Array *arrayA = objA->getArray();
177             Array *arrayB = objB->getArray();
178             const int length = arrayA->getLength();
179             if (arrayB->getLength() != length) {
180                 return false;
181             } else {
182                 for (int i = 0; i < length; ++i) {
183                     const Object &elemA = arrayA->getNF(i);
184                     const Object &elemB = arrayB->getNF(i);
185                     if (!compareObjects(&elemA, &elemB)) {
186                         return false;
187                     }
188                 }
189                 return true;
190             }
191         }
192     }
193     case objDict: {
194         if (objB->getType() != objDict) {
195             return false;
196         } else {
197             Dict *dictA = objA->getDict();
198             Dict *dictB = objB->getDict();
199             return compareDictionaries(dictA, dictB);
200         }
201     }
202     case objStream: {
203         if (objB->getType() != objStream) {
204             return false;
205         } else {
206             Stream *streamA = objA->getStream();
207             Stream *streamB = objB->getStream();
208             if (!compareDictionaries(streamA->getDict(), streamB->getDict())) {
209                 return false;
210             } else {
211                 int c;
212                 streamA->reset();
213                 streamB->reset();
214                 do {
215                     c = streamA->getChar();
216                     if (c != streamB->getChar()) {
217                         return false;
218                     }
219                 } while (c != EOF);
220                 return true;
221             }
222         }
223         return true;
224     }
225     case objRef: {
226         if (objB->getType() != objRef) {
227             return false;
228         } else {
229             const Ref refA = objA->getRef();
230             const Ref refB = objB->getRef();
231             return refA == refB;
232         }
233     }
234     default: {
235         fprintf(stderr, "compareObjects failed: unexpected object type %u\n", objA->getType());
236         return false;
237     }
238     }
239 }
240 
compareDocuments(PDFDoc * origDoc,PDFDoc * newDoc)241 static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc)
242 {
243     bool result = true;
244     XRef *origXRef = origDoc->getXRef();
245     XRef *newXRef = newDoc->getXRef();
246 
247     // Make sure that special flags are set in both documents
248     origXRef->scanSpecialFlags();
249     newXRef->scanSpecialFlags();
250 
251     // Compare XRef tables' size
252     const int origNumObjects = origXRef->getNumObjects();
253     const int newNumObjects = newXRef->getNumObjects();
254     if (forceIncremental && origXRef->isXRefStream()) {
255         // In case of incremental update, expect a new entry to be appended to store the new XRef stream
256         if (origNumObjects + 1 != newNumObjects) {
257             fprintf(stderr, "XRef table: Unexpected number of entries (%d+1 != %d)\n", origNumObjects, newNumObjects);
258             result = false;
259         }
260     } else {
261         // In all other cases the number of entries must be the same
262         if (origNumObjects != newNumObjects) {
263             fprintf(stderr, "XRef table: Different number of entries (%d != %d)\n", origNumObjects, newNumObjects);
264             result = false;
265         }
266     }
267 
268     // Compare each XRef entry
269     const int numObjects = (origNumObjects < newNumObjects) ? origNumObjects : newNumObjects;
270     for (int i = 0; i < numObjects; ++i) {
271         XRefEntryType origType = origXRef->getEntry(i)->type;
272         XRefEntryType newType = newXRef->getEntry(i)->type;
273         const int origGenNum = (origType != xrefEntryCompressed) ? origXRef->getEntry(i)->gen : 0;
274         const int newGenNum = (newType != xrefEntryCompressed) ? newXRef->getEntry(i)->gen : 0;
275 
276         // Check that DontRewrite entries are freed in full rewrite mode
277         if (!forceIncremental && origXRef->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
278             if (newType != xrefEntryFree || origGenNum + 1 != newGenNum) {
279                 fprintf(stderr, "XRef entry %u: DontRewrite entry was not freed correctly\n", i);
280                 result = false;
281             }
282             continue; // There's nothing left to check for this entry
283         }
284 
285         // Compare generation numbers
286         // Object num 0 should always have gen 65535 according to specs, but some
287         // documents have it set to 0. We always write 65535 in output
288         if (i != 0) {
289             if (origGenNum != newGenNum) {
290                 fprintf(stderr, "XRef entry %u: generation numbers differ (%d != %d)\n", i, origGenNum, newGenNum);
291                 result = false;
292                 continue;
293             }
294         } else {
295             if (newGenNum != 65535) {
296                 fprintf(stderr, "XRef entry %u: generation number was expected to be 65535 (%d != 65535)\n", i, newGenNum);
297                 result = false;
298                 continue;
299             }
300         }
301 
302         // Compare object flags. A failure shows that there's some error in XRef::scanSpecialFlags()
303         if (origXRef->getEntry(i)->flags != newXRef->getEntry(i)->flags) {
304             fprintf(stderr, "XRef entry %u: flags detected by scanSpecialFlags differ (%d != %d)\n", i, origXRef->getEntry(i)->flags, newXRef->getEntry(i)->flags);
305             result = false;
306         }
307 
308         // Check that either both are free or both are in use
309         if ((origType == xrefEntryFree) != (newType == xrefEntryFree)) {
310             const char *origStatus = (origType == xrefEntryFree) ? "free" : "in use";
311             const char *newStatus = (newType == xrefEntryFree) ? "free" : "in use";
312             fprintf(stderr, "XRef entry %u: usage status differs (%s != %s)\n", i, origStatus, newStatus);
313             result = false;
314             continue;
315         }
316 
317         // Skip free entries
318         if (origType == xrefEntryFree) {
319             continue;
320         }
321 
322         // Compare contents
323         Object origObj = origXRef->fetch(i, origGenNum);
324         Object newObj = newXRef->fetch(i, newGenNum);
325         if (!compareObjects(&origObj, &newObj)) {
326             fprintf(stderr, "XRef entry %u: contents differ\n", i);
327             result = false;
328         }
329     }
330 
331     return result;
332 }
333