1 //========================================================================
2 //
3 // Parser.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 #include <aconf.h>
10 
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
14 
15 #include <stddef.h>
16 #include "Object.h"
17 #include "Array.h"
18 #include "Dict.h"
19 #include "Decrypt.h"
20 #include "Parser.h"
21 #include "XRef.h"
22 #include "Error.h"
23 
24 // Max number of nested objects.  This is used to catch infinite loops
25 // in the object structure.
26 #define recursionLimit 500
27 
Parser(XRef * xrefA,Lexer * lexerA,GBool allowStreamsA)28 Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
29   xref = xrefA;
30   lexer = lexerA;
31   inlineImg = 0;
32   allowStreams = allowStreamsA;
33   lexer->getObj(&buf1);
34   lexer->getObj(&buf2);
35 }
36 
~Parser()37 Parser::~Parser() {
38   buf1.free();
39   buf2.free();
40   delete lexer;
41 }
42 
getObj(Object * obj,GBool simpleOnly,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,int recursion)43 Object *Parser::getObj(Object *obj, GBool simpleOnly,
44 		       Guchar *fileKey,
45 		       CryptAlgorithm encAlgorithm, int keyLength,
46 		       int objNum, int objGen, int recursion) {
47   char *key;
48   Stream *str;
49   Object obj2;
50   int num;
51   DecryptStream *decrypt;
52   GString *s, *s2;
53   int c;
54 
55   // refill buffer after inline image data
56   if (inlineImg == 2) {
57     buf1.free();
58     buf2.free();
59     lexer->getObj(&buf1);
60     lexer->getObj(&buf2);
61     inlineImg = 0;
62   }
63 
64   // array
65   if (!simpleOnly && recursion < recursionLimit && buf1.isCmd("[")) {
66     shift();
67     obj->initArray(xref);
68     while (!buf1.isCmd("]") && !buf1.isEOF())
69       obj->arrayAdd(getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength,
70 			   objNum, objGen, recursion + 1));
71     if (buf1.isEOF())
72       error(errSyntaxError, getPos(), "End of file inside array");
73     shift();
74 
75   // dictionary or stream
76   } else if (!simpleOnly && recursion < recursionLimit && buf1.isCmd("<<")) {
77     shift();
78     obj->initDict(xref);
79     while (!buf1.isCmd(">>") && !buf1.isEOF()) {
80       if (!buf1.isName()) {
81 	error(errSyntaxError, getPos(),
82 	      "Dictionary key must be a name object");
83 	shift();
84       } else {
85 	key = copyString(buf1.getName());
86 	shift();
87 	if (buf1.isEOF() || buf1.isError()) {
88 	  gfree(key);
89 	  break;
90 	}
91 	obj->dictAdd(key, getObj(&obj2, gFalse,
92 				 fileKey, encAlgorithm, keyLength,
93 				 objNum, objGen, recursion + 1));
94       }
95     }
96     if (buf1.isEOF())
97       error(errSyntaxError, getPos(), "End of file inside dictionary");
98     // stream objects are not allowed inside content streams or
99     // object streams
100     if (allowStreams && buf2.isCmd("stream")) {
101       if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
102 			    objNum, objGen, recursion + 1))) {
103 	obj->initStream(str);
104       } else {
105 	obj->free();
106 	obj->initError();
107       }
108     } else {
109       shift();
110     }
111 
112   // indirect reference or integer
113   } else if (buf1.isInt()) {
114     num = buf1.getInt();
115     shift();
116     if (buf1.isInt() && buf2.isCmd("R")) {
117       obj->initRef(num, buf1.getInt());
118       shift();
119       shift();
120     } else {
121       obj->initInt(num);
122     }
123 
124   // string
125   } else if (buf1.isString() && fileKey) {
126     s = buf1.getString();
127     s2 = new GString();
128     obj2.initNull();
129     decrypt = new DecryptStream(new MemStream(s->getCString(), 0,
130 					      s->getLength(), &obj2),
131 				fileKey, encAlgorithm, keyLength,
132 				objNum, objGen);
133     decrypt->reset();
134     while ((c = decrypt->getChar()) != EOF) {
135       s2->append((char)c);
136     }
137     delete decrypt;
138     obj->initString(s2);
139     shift();
140 
141   // simple object
142   } else {
143     buf1.copy(obj);
144     shift();
145   }
146 
147   return obj;
148 }
149 
makeStream(Object * dict,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,int recursion)150 Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
151 			   CryptAlgorithm encAlgorithm, int keyLength,
152 			   int objNum, int objGen, int recursion) {
153   Object obj;
154   BaseStream *baseStr;
155   Stream *str;
156   GFileOffset pos, endPos, length;
157 
158   // get stream start position
159   lexer->skipToNextLine();
160   if (!(str = lexer->getStream())) {
161     return NULL;
162   }
163   pos = str->getPos();
164 
165   // check for length in damaged file
166   if (xref && xref->getStreamEnd(pos, &endPos)) {
167     length = endPos - pos;
168 
169   // get length from the stream object
170   } else {
171     dict->dictLookup("Length", &obj, recursion);
172     if (obj.isInt()) {
173       length = (GFileOffset)(Guint)obj.getInt();
174       obj.free();
175     } else {
176       error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
177       obj.free();
178       return NULL;
179     }
180   }
181 
182   // in badly damaged PDF files, we can run off the end of the input
183   // stream immediately after the "stream" token
184   if (!lexer->getStream()) {
185     return NULL;
186   }
187   baseStr = lexer->getStream()->getBaseStream();
188 
189   // skip over stream data
190   lexer->setPos(pos + length);
191 
192   // refill token buffers and check for 'endstream'
193   shift();  // kill '>>'
194   shift();  // kill 'stream'
195   if (buf1.isCmd("endstream")) {
196     shift();
197   } else {
198     error(errSyntaxError, getPos(), "Missing 'endstream'");
199     // kludge for broken PDF files: just add 5k to the length, and
200     // hope its enough
201     length += 5000;
202   }
203 
204   // make base stream
205   str = baseStr->makeSubStream(pos, gTrue, length, dict);
206 
207   // handle decryption
208   if (fileKey) {
209     str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
210 			    objNum, objGen);
211   }
212 
213   // get filters
214   str = str->addFilters(dict, recursion);
215 
216   return str;
217 }
218 
shift()219 void Parser::shift() {
220   if (inlineImg > 0) {
221     if (inlineImg < 2) {
222       ++inlineImg;
223     } else {
224       // in a damaged content stream, if 'ID' shows up in the middle
225       // of a dictionary, we need to reset
226       inlineImg = 0;
227     }
228   } else if (buf2.isCmd("ID")) {
229     lexer->skipChar();		// skip char after 'ID' command
230     inlineImg = 1;
231   }
232   buf1.free();
233   buf1 = buf2;
234   if (inlineImg > 0)		// don't buffer inline image data
235     buf2.initNull();
236   else
237     lexer->getObj(&buf2);
238 }
239