1 //========================================================================
2 //
3 // Parser.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2006, 2009, 201, 2010, 2013, 2014 Albert Astals Cid <aacid@kde.org>
17 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
18 // Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
19 // Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
20 // Copyright (C) 2013 Adrian Johnson <ajohnson@redneon.com>
21 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
22 //
23 // To see a description of the changes please see the Changelog file that
24 // came with your tarball or type make ChangeLog if you are building from git
25 //
26 //========================================================================
27 
28 #include <config.h>
29 
30 #ifdef USE_GCC_PRAGMAS
31 #pragma implementation
32 #endif
33 
34 #include <stddef.h>
35 #include "Object.h"
36 #include "Array.h"
37 #include "Dict.h"
38 #include "Decrypt.h"
39 #include "Parser.h"
40 #include "XRef.h"
41 #include "Error.h"
42 
43 // Max number of nested objects.  This is used to catch infinite loops
44 // in the object structure. And also technically valid files with
45 // lots of nested arrays that made us consume all the stack
46 #define recursionLimit 500
47 
Parser(XRef * xrefA,Lexer * lexerA,GBool allowStreamsA)48 Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
49   xref = xrefA;
50   lexer = lexerA;
51   inlineImg = 0;
52   allowStreams = allowStreamsA;
53   lexer->getObj(&buf1);
54   lexer->getObj(&buf2);
55 }
56 
~Parser()57 Parser::~Parser() {
58   buf1.free();
59   buf2.free();
60   delete lexer;
61 }
62 
getObj(Object * obj,int recursion)63 Object *Parser::getObj(Object *obj, int recursion)
64 {
65   return getObj(obj, gFalse, NULL, cryptRC4, 0, 0, 0, recursion);
66 }
67 
getObj(Object * obj,GBool simpleOnly,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,int recursion,GBool strict)68 Object *Parser::getObj(Object *obj, GBool simpleOnly,
69            Guchar *fileKey,
70 		       CryptAlgorithm encAlgorithm, int keyLength,
71 		       int objNum, int objGen, int recursion,
72 		       GBool strict) {
73   char *key;
74   Stream *str;
75   Object obj2;
76   int num;
77   DecryptStream *decrypt;
78   GooString *s, *s2;
79   int c;
80 
81   // refill buffer after inline image data
82   if (inlineImg == 2) {
83     buf1.free();
84     buf2.free();
85     lexer->getObj(&buf1);
86     lexer->getObj(&buf2);
87     inlineImg = 0;
88   }
89 
90   // array
91   if (!simpleOnly && likely(recursion < recursionLimit) && buf1.isCmd("[")) {
92     shift();
93     obj->initArray(xref);
94     while (!buf1.isCmd("]") && !buf1.isEOF())
95       obj->arrayAdd(getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength,
96 			   objNum, objGen, recursion + 1));
97     if (buf1.isEOF()) {
98       error(errSyntaxError, getPos(), "End of file inside array");
99       if (strict) goto err;
100     }
101     shift();
102 
103   // dictionary or stream
104   } else if (!simpleOnly && likely(recursion < recursionLimit) && buf1.isCmd("<<")) {
105     shift(objNum);
106     obj->initDict(xref);
107     while (!buf1.isCmd(">>") && !buf1.isEOF()) {
108       if (!buf1.isName()) {
109 	error(errSyntaxError, getPos(), "Dictionary key must be a name object");
110 	if (strict) goto err;
111 	shift();
112       } else {
113 	// buf1 might go away in shift(), so construct the key
114 	key = copyString(buf1.getName());
115 	shift();
116 	if (buf1.isEOF() || buf1.isError()) {
117 	  gfree(key);
118 	  if (strict && buf1.isError()) goto err;
119 	  break;
120 	}
121 	obj->dictAdd(key, getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1));
122       }
123     }
124     if (buf1.isEOF()) {
125       error(errSyntaxError, getPos(), "End of file inside dictionary");
126       if (strict) goto err;
127     }
128     // stream objects are not allowed inside content streams or
129     // object streams
130     if (buf2.isCmd("stream")) {
131       if (allowStreams && (str = makeStream(obj, fileKey, encAlgorithm, keyLength,
132                                             objNum, objGen, recursion + 1,
133                                             strict))) {
134         obj->initStream(str);
135       } else {
136         obj->free();
137         obj->initError();
138       }
139     } else {
140       shift();
141     }
142 
143   // indirect reference or integer
144   } else if (buf1.isInt()) {
145     num = buf1.getInt();
146     shift();
147     if (buf1.isInt() && buf2.isCmd("R")) {
148       obj->initRef(num, buf1.getInt());
149       shift();
150       shift();
151     } else {
152       obj->initInt(num);
153     }
154 
155   // string
156   } else if (buf1.isString() && fileKey) {
157     s = buf1.getString();
158     s2 = new GooString();
159     obj2.initNull();
160     decrypt = new DecryptStream(new MemStream(s->getCString(), 0,
161 					      s->getLength(), &obj2),
162 				fileKey, encAlgorithm, keyLength,
163 				objNum, objGen);
164     decrypt->reset();
165     while ((c = decrypt->getChar()) != EOF) {
166       s2->append((char)c);
167     }
168     delete decrypt;
169     obj->initString(s2);
170     shift();
171 
172   // simple object
173   } else {
174     // avoid re-allocating memory for complex objects like strings by
175     // shallow copy of <buf1> to <obj> and nulling <buf1> so that
176     // subsequent buf1.free() won't free this memory
177     buf1.shallowCopy(obj);
178     buf1.initNull();
179     shift();
180   }
181 
182   return obj;
183 
184 err:
185   obj->free();
186   obj->initError();
187   return obj;
188 
189 }
190 
makeStream(Object * dict,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,int recursion,GBool strict)191 Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
192 			   CryptAlgorithm encAlgorithm, int keyLength,
193 			   int objNum, int objGen, int recursion,
194                            GBool strict) {
195   Object obj;
196   BaseStream *baseStr;
197   Stream *str;
198   Goffset length;
199   Goffset pos, endPos;
200 
201   // get stream start position
202   lexer->skipToNextLine();
203   if (!(str = lexer->getStream())) {
204     return NULL;
205   }
206   pos = str->getPos();
207 
208   // get length
209   dict->dictLookup("Length", &obj, recursion);
210   if (obj.isInt()) {
211     length = obj.getInt();
212     obj.free();
213   } else if (obj.isInt64()) {
214     length = obj.getInt64();
215     obj.free();
216   } else {
217     error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
218     obj.free();
219     if (strict) return NULL;
220     length = 0;
221   }
222 
223   // check for length in damaged file
224   if (xref && xref->getStreamEnd(pos, &endPos)) {
225     length = endPos - pos;
226   }
227 
228   // in badly damaged PDF files, we can run off the end of the input
229   // stream immediately after the "stream" token
230   if (!lexer->getStream()) {
231     return NULL;
232   }
233   baseStr = lexer->getStream()->getBaseStream();
234 
235   // skip over stream data
236   if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) {
237       // take into account the fact that we've cached one value
238       pos = pos - 1;
239       lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED;
240   }
241   lexer->setPos(pos + length);
242 
243   // refill token buffers and check for 'endstream'
244   shift();  // kill '>>'
245   shift("endstream", objNum);  // kill 'stream'
246   if (buf1.isCmd("endstream")) {
247     shift();
248   } else {
249     error(errSyntaxError, getPos(), "Missing 'endstream' or incorrect stream length");
250     if (strict) return NULL;
251     if (xref && lexer->getStream()) {
252       // shift until we find the proper endstream or we change to another object or reach eof
253       length = lexer->getPos() - pos;
254       if (buf1.isCmd("endstream")) {
255         obj.initInt64(length);
256         dict->dictSet("Length", &obj);
257         obj.free();
258       }
259     } else {
260       // When building the xref we can't use it so use this
261       // kludge for broken PDF files: just add 5k to the length, and
262       // hope its enough
263       length += 5000;
264     }
265   }
266 
267   // make base stream
268   str = baseStr->makeSubStream(pos, gTrue, length, dict);
269 
270   // handle decryption
271   if (fileKey) {
272     str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
273 			    objNum, objGen);
274   }
275 
276   // get filters
277   str = str->addFilters(dict, recursion);
278 
279   return str;
280 }
281 
shift(int objNum)282 void Parser::shift(int objNum) {
283   if (inlineImg > 0) {
284     if (inlineImg < 2) {
285       ++inlineImg;
286     } else {
287       // in a damaged content stream, if 'ID' shows up in the middle
288       // of a dictionary, we need to reset
289       inlineImg = 0;
290     }
291   } else if (buf2.isCmd("ID")) {
292     lexer->skipChar();		// skip char after 'ID' command
293     inlineImg = 1;
294   }
295   buf1.free();
296   buf2.shallowCopy(&buf1);
297   if (inlineImg > 0)		// don't buffer inline image data
298     buf2.initNull();
299   else
300     lexer->getObj(&buf2, objNum);
301 }
302 
shift(const char * cmdA,int objNum)303 void Parser::shift(const char *cmdA, int objNum) {
304   if (inlineImg > 0) {
305     if (inlineImg < 2) {
306       ++inlineImg;
307     } else {
308       // in a damaged content stream, if 'ID' shows up in the middle
309       // of a dictionary, we need to reset
310       inlineImg = 0;
311     }
312   } else if (buf2.isCmd("ID")) {
313     lexer->skipChar();		// skip char after 'ID' command
314     inlineImg = 1;
315   }
316   buf1.free();
317   buf2.shallowCopy(&buf1);
318   if (inlineImg > 0) {
319     buf2.initNull();
320   } else if (buf1.isCmd(cmdA)) {
321     lexer->getObj(&buf2, objNum);
322   } else {
323     lexer->getObj(&buf2, cmdA, objNum);
324   }
325 }
326