1 //========================================================================
2 //
3 // pdf-fullrewrite.cc
4 //
5 // Copyright 2007 Julien Rebetez
6 // Copyright 2012 Fabio D'Urso
7 //
8 //========================================================================
9
10 #include "GlobalParams.h"
11 #include "Error.h"
12 #include "Object.h"
13 #include "PDFDoc.h"
14 #include "XRef.h"
15 #include "goo/GooString.h"
16 #include "utils/parseargs.h"
17
18 static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc);
19 static bool compareObjects(const Object *objA, const Object *objB);
20
21 static char ownerPassword[33] = "\001";
22 static char userPassword[33] = "\001";
23 static bool forceIncremental = false;
24 static bool checkOutput = false;
25 static bool printHelp = false;
26
27 static const ArgDesc argDesc[] = { { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
28 { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
29 { "-i", argFlag, &forceIncremental, 0, "incremental update mode" },
30 { "-check", argFlag, &checkOutput, 0, "verify the generated document" },
31 { "-h", argFlag, &printHelp, 0, "print usage information" },
32 { "-help", argFlag, &printHelp, 0, "print usage information" },
33 { "--help", argFlag, &printHelp, 0, "print usage information" },
34 { "-?", argFlag, &printHelp, 0, "print usage information" },
35 {} };
36
main(int argc,char * argv[])37 int main(int argc, char *argv[])
38 {
39 PDFDoc *doc = nullptr;
40 PDFDoc *docOut = nullptr;
41 GooString *inputName = nullptr;
42 GooString *outputName = nullptr;
43 GooString *ownerPW = nullptr;
44 GooString *userPW = nullptr;
45 int res = 0;
46
47 // parse args
48 bool ok = parseArgs(argDesc, &argc, argv);
49 if (!ok || (argc < 3) || printHelp) {
50 printUsage(argv[0], "INPUT-FILE OUTPUT-FILE", argDesc);
51 if (!printHelp) {
52 res = 1;
53 }
54 goto done;
55 }
56
57 inputName = new GooString(argv[1]);
58 outputName = new GooString(argv[2]);
59
60 if (ownerPassword[0] != '\001') {
61 ownerPW = new GooString(ownerPassword);
62 }
63 if (userPassword[0] != '\001') {
64 userPW = new GooString(userPassword);
65 }
66
67 // load input document
68 globalParams = std::make_unique<GlobalParams>();
69 doc = new PDFDoc(inputName, ownerPW, userPW);
70 if (!doc->isOk()) {
71 fprintf(stderr, "Error loading input document\n");
72 res = 1;
73 goto done;
74 }
75
76 // save it back (in rewrite or incremental update mode)
77 if (doc->saveAs(outputName, forceIncremental ? writeForceIncremental : writeForceRewrite) != 0) {
78 fprintf(stderr, "Error saving document\n");
79 res = 1;
80 goto done;
81 }
82
83 if (checkOutput) {
84 // open the generated document to verify it
85 docOut = new PDFDoc(outputName, ownerPW, userPW);
86 if (!docOut->isOk()) {
87 fprintf(stderr, "Error loading generated document\n");
88 res = 1;
89 } else if (!compareDocuments(doc, docOut)) {
90 fprintf(stderr, "Verification failed\n");
91 res = 1;
92 }
93 } else {
94 delete outputName;
95 }
96
97 done:
98 delete docOut;
99 delete doc;
100 delete userPW;
101 delete ownerPW;
102 return res;
103 }
104
compareDictionaries(Dict * dictA,Dict * dictB)105 static bool compareDictionaries(Dict *dictA, Dict *dictB)
106 {
107 const int length = dictA->getLength();
108 if (dictB->getLength() != length)
109 return false;
110
111 /* Check that every key in dictA is contained in dictB.
112 * Since keys are unique and we've already checked that dictA and dictB
113 * contain the same number of entries, we don't need to check that every key
114 * in dictB is also contained in dictA */
115 for (int i = 0; i < length; ++i) {
116 const char *key = dictA->getKey(i);
117 const Object &valA = dictA->getValNF(i);
118 const Object &valB = dictB->lookupNF(key);
119 if (!compareObjects(&valA, &valB))
120 return false;
121 }
122
123 return true;
124 }
125
compareObjects(const Object * objA,const Object * objB)126 static bool compareObjects(const Object *objA, const Object *objB)
127 {
128 switch (objA->getType()) {
129 case objBool: {
130 if (objB->getType() != objBool) {
131 return false;
132 } else {
133 return (objA->getBool() == objB->getBool());
134 }
135 }
136 case objInt:
137 case objInt64:
138 case objReal: {
139 if (!objB->isNum()) {
140 return false;
141 } else {
142 // Fuzzy comparison
143 const double diff = objA->getNum() - objB->getNum();
144 return (-0.01 < diff) && (diff < 0.01);
145 }
146 }
147 case objString: {
148 if (objB->getType() != objString) {
149 return false;
150 } else {
151 const GooString *strA = objA->getString();
152 const GooString *strB = objB->getString();
153 return (strA->cmp(strB) == 0);
154 }
155 }
156 case objName: {
157 if (objB->getType() != objName) {
158 return false;
159 } else {
160 GooString nameA(objA->getName());
161 GooString nameB(objB->getName());
162 return (nameA.cmp(&nameB) == 0);
163 }
164 }
165 case objNull: {
166 if (objB->getType() != objNull) {
167 return false;
168 } else {
169 return true;
170 }
171 }
172 case objArray: {
173 if (objB->getType() != objArray) {
174 return false;
175 } else {
176 Array *arrayA = objA->getArray();
177 Array *arrayB = objB->getArray();
178 const int length = arrayA->getLength();
179 if (arrayB->getLength() != length) {
180 return false;
181 } else {
182 for (int i = 0; i < length; ++i) {
183 const Object &elemA = arrayA->getNF(i);
184 const Object &elemB = arrayB->getNF(i);
185 if (!compareObjects(&elemA, &elemB)) {
186 return false;
187 }
188 }
189 return true;
190 }
191 }
192 }
193 case objDict: {
194 if (objB->getType() != objDict) {
195 return false;
196 } else {
197 Dict *dictA = objA->getDict();
198 Dict *dictB = objB->getDict();
199 return compareDictionaries(dictA, dictB);
200 }
201 }
202 case objStream: {
203 if (objB->getType() != objStream) {
204 return false;
205 } else {
206 Stream *streamA = objA->getStream();
207 Stream *streamB = objB->getStream();
208 if (!compareDictionaries(streamA->getDict(), streamB->getDict())) {
209 return false;
210 } else {
211 int c;
212 streamA->reset();
213 streamB->reset();
214 do {
215 c = streamA->getChar();
216 if (c != streamB->getChar()) {
217 return false;
218 }
219 } while (c != EOF);
220 return true;
221 }
222 }
223 return true;
224 }
225 case objRef: {
226 if (objB->getType() != objRef) {
227 return false;
228 } else {
229 const Ref refA = objA->getRef();
230 const Ref refB = objB->getRef();
231 return refA == refB;
232 }
233 }
234 default: {
235 fprintf(stderr, "compareObjects failed: unexpected object type %u\n", objA->getType());
236 return false;
237 }
238 }
239 }
240
compareDocuments(PDFDoc * origDoc,PDFDoc * newDoc)241 static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc)
242 {
243 bool result = true;
244 XRef *origXRef = origDoc->getXRef();
245 XRef *newXRef = newDoc->getXRef();
246
247 // Make sure that special flags are set in both documents
248 origXRef->scanSpecialFlags();
249 newXRef->scanSpecialFlags();
250
251 // Compare XRef tables' size
252 const int origNumObjects = origXRef->getNumObjects();
253 const int newNumObjects = newXRef->getNumObjects();
254 if (forceIncremental && origXRef->isXRefStream()) {
255 // In case of incremental update, expect a new entry to be appended to store the new XRef stream
256 if (origNumObjects + 1 != newNumObjects) {
257 fprintf(stderr, "XRef table: Unexpected number of entries (%d+1 != %d)\n", origNumObjects, newNumObjects);
258 result = false;
259 }
260 } else {
261 // In all other cases the number of entries must be the same
262 if (origNumObjects != newNumObjects) {
263 fprintf(stderr, "XRef table: Different number of entries (%d != %d)\n", origNumObjects, newNumObjects);
264 result = false;
265 }
266 }
267
268 // Compare each XRef entry
269 const int numObjects = (origNumObjects < newNumObjects) ? origNumObjects : newNumObjects;
270 for (int i = 0; i < numObjects; ++i) {
271 XRefEntryType origType = origXRef->getEntry(i)->type;
272 XRefEntryType newType = newXRef->getEntry(i)->type;
273 const int origGenNum = (origType != xrefEntryCompressed) ? origXRef->getEntry(i)->gen : 0;
274 const int newGenNum = (newType != xrefEntryCompressed) ? newXRef->getEntry(i)->gen : 0;
275
276 // Check that DontRewrite entries are freed in full rewrite mode
277 if (!forceIncremental && origXRef->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
278 if (newType != xrefEntryFree || origGenNum + 1 != newGenNum) {
279 fprintf(stderr, "XRef entry %u: DontRewrite entry was not freed correctly\n", i);
280 result = false;
281 }
282 continue; // There's nothing left to check for this entry
283 }
284
285 // Compare generation numbers
286 // Object num 0 should always have gen 65535 according to specs, but some
287 // documents have it set to 0. We always write 65535 in output
288 if (i != 0) {
289 if (origGenNum != newGenNum) {
290 fprintf(stderr, "XRef entry %u: generation numbers differ (%d != %d)\n", i, origGenNum, newGenNum);
291 result = false;
292 continue;
293 }
294 } else {
295 if (newGenNum != 65535) {
296 fprintf(stderr, "XRef entry %u: generation number was expected to be 65535 (%d != 65535)\n", i, newGenNum);
297 result = false;
298 continue;
299 }
300 }
301
302 // Compare object flags. A failure shows that there's some error in XRef::scanSpecialFlags()
303 if (origXRef->getEntry(i)->flags != newXRef->getEntry(i)->flags) {
304 fprintf(stderr, "XRef entry %u: flags detected by scanSpecialFlags differ (%d != %d)\n", i, origXRef->getEntry(i)->flags, newXRef->getEntry(i)->flags);
305 result = false;
306 }
307
308 // Check that either both are free or both are in use
309 if ((origType == xrefEntryFree) != (newType == xrefEntryFree)) {
310 const char *origStatus = (origType == xrefEntryFree) ? "free" : "in use";
311 const char *newStatus = (newType == xrefEntryFree) ? "free" : "in use";
312 fprintf(stderr, "XRef entry %u: usage status differs (%s != %s)\n", i, origStatus, newStatus);
313 result = false;
314 continue;
315 }
316
317 // Skip free entries
318 if (origType == xrefEntryFree) {
319 continue;
320 }
321
322 // Compare contents
323 Object origObj = origXRef->fetch(i, origGenNum);
324 Object newObj = newXRef->fetch(i, newGenNum);
325 if (!compareObjects(&origObj, &newObj)) {
326 fprintf(stderr, "XRef entry %u: contents differ\n", i);
327 result = false;
328 }
329 }
330
331 return result;
332 }
333