1 //========================================================================
2 //
3 // Parser.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 #include <aconf.h>
10
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
14
15 #include <stddef.h>
16 #include "Object.h"
17 #include "Array.h"
18 #include "Dict.h"
19 #include "Decrypt.h"
20 #include "Parser.h"
21 #include "XRef.h"
22 #include "Error.h"
23
24 // Max number of nested objects. This is used to catch infinite loops
25 // in the object structure.
26 #define recursionLimit 500
27
Parser(XRef * xrefA,Lexer * lexerA,GBool allowStreamsA)28 Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
29 xref = xrefA;
30 lexer = lexerA;
31 inlineImg = 0;
32 allowStreams = allowStreamsA;
33 lexer->getObj(&buf1);
34 lexer->getObj(&buf2);
35 }
36
~Parser()37 Parser::~Parser() {
38 buf1.free();
39 buf2.free();
40 delete lexer;
41 }
42
getObj(Object * obj,GBool simpleOnly,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,int recursion)43 Object *Parser::getObj(Object *obj, GBool simpleOnly,
44 Guchar *fileKey,
45 CryptAlgorithm encAlgorithm, int keyLength,
46 int objNum, int objGen, int recursion) {
47 char *key;
48 Stream *str;
49 Object obj2;
50 int num;
51 DecryptStream *decrypt;
52 GString *s, *s2;
53 int c;
54
55 // refill buffer after inline image data
56 if (inlineImg == 2) {
57 buf1.free();
58 buf2.free();
59 lexer->getObj(&buf1);
60 lexer->getObj(&buf2);
61 inlineImg = 0;
62 }
63
64 // array
65 if (!simpleOnly && recursion < recursionLimit && buf1.isCmd("[")) {
66 shift();
67 obj->initArray(xref);
68 while (!buf1.isCmd("]") && !buf1.isEOF())
69 obj->arrayAdd(getObj(&obj2, gFalse, fileKey, encAlgorithm, keyLength,
70 objNum, objGen, recursion + 1));
71 if (buf1.isEOF())
72 error(errSyntaxError, getPos(), "End of file inside array");
73 shift();
74
75 // dictionary or stream
76 } else if (!simpleOnly && recursion < recursionLimit && buf1.isCmd("<<")) {
77 shift();
78 obj->initDict(xref);
79 while (!buf1.isCmd(">>") && !buf1.isEOF()) {
80 if (!buf1.isName()) {
81 error(errSyntaxError, getPos(),
82 "Dictionary key must be a name object");
83 shift();
84 } else {
85 key = copyString(buf1.getName());
86 shift();
87 if (buf1.isEOF() || buf1.isError()) {
88 gfree(key);
89 break;
90 }
91 obj->dictAdd(key, getObj(&obj2, gFalse,
92 fileKey, encAlgorithm, keyLength,
93 objNum, objGen, recursion + 1));
94 }
95 }
96 if (buf1.isEOF())
97 error(errSyntaxError, getPos(), "End of file inside dictionary");
98 // stream objects are not allowed inside content streams or
99 // object streams
100 if (allowStreams && buf2.isCmd("stream")) {
101 if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
102 objNum, objGen, recursion + 1))) {
103 obj->initStream(str);
104 } else {
105 obj->free();
106 obj->initError();
107 }
108 } else {
109 shift();
110 }
111
112 // indirect reference or integer
113 } else if (buf1.isInt()) {
114 num = buf1.getInt();
115 shift();
116 if (buf1.isInt() && buf2.isCmd("R")) {
117 obj->initRef(num, buf1.getInt());
118 shift();
119 shift();
120 } else {
121 obj->initInt(num);
122 }
123
124 // string
125 } else if (buf1.isString() && fileKey) {
126 s = buf1.getString();
127 s2 = new GString();
128 obj2.initNull();
129 decrypt = new DecryptStream(new MemStream(s->getCString(), 0,
130 s->getLength(), &obj2),
131 fileKey, encAlgorithm, keyLength,
132 objNum, objGen);
133 decrypt->reset();
134 while ((c = decrypt->getChar()) != EOF) {
135 s2->append((char)c);
136 }
137 delete decrypt;
138 obj->initString(s2);
139 shift();
140
141 // simple object
142 } else {
143 buf1.copy(obj);
144 shift();
145 }
146
147 return obj;
148 }
149
makeStream(Object * dict,Guchar * fileKey,CryptAlgorithm encAlgorithm,int keyLength,int objNum,int objGen,int recursion)150 Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
151 CryptAlgorithm encAlgorithm, int keyLength,
152 int objNum, int objGen, int recursion) {
153 Object obj;
154 BaseStream *baseStr;
155 Stream *str;
156 GFileOffset pos, endPos, length;
157
158 // get stream start position
159 lexer->skipToNextLine();
160 if (!(str = lexer->getStream())) {
161 return NULL;
162 }
163 pos = str->getPos();
164
165 // check for length in damaged file
166 if (xref && xref->getStreamEnd(pos, &endPos)) {
167 length = endPos - pos;
168
169 // get length from the stream object
170 } else {
171 dict->dictLookup("Length", &obj, recursion);
172 if (obj.isInt()) {
173 length = (GFileOffset)(Guint)obj.getInt();
174 obj.free();
175 } else {
176 error(errSyntaxError, getPos(), "Bad 'Length' attribute in stream");
177 obj.free();
178 return NULL;
179 }
180 }
181
182 // in badly damaged PDF files, we can run off the end of the input
183 // stream immediately after the "stream" token
184 if (!lexer->getStream()) {
185 return NULL;
186 }
187 baseStr = lexer->getStream()->getBaseStream();
188
189 // skip over stream data
190 lexer->setPos(pos + length);
191
192 // refill token buffers and check for 'endstream'
193 shift(); // kill '>>'
194 shift(); // kill 'stream'
195 if (buf1.isCmd("endstream")) {
196 shift();
197 } else {
198 error(errSyntaxError, getPos(), "Missing 'endstream'");
199 // kludge for broken PDF files: just add 5k to the length, and
200 // hope its enough
201 length += 5000;
202 }
203
204 // make base stream
205 str = baseStr->makeSubStream(pos, gTrue, length, dict);
206
207 // handle decryption
208 if (fileKey) {
209 str = new DecryptStream(str, fileKey, encAlgorithm, keyLength,
210 objNum, objGen);
211 }
212
213 // get filters
214 str = str->addFilters(dict, recursion);
215
216 return str;
217 }
218
shift()219 void Parser::shift() {
220 if (inlineImg > 0) {
221 if (inlineImg < 2) {
222 ++inlineImg;
223 } else {
224 // in a damaged content stream, if 'ID' shows up in the middle
225 // of a dictionary, we need to reset
226 inlineImg = 0;
227 }
228 } else if (buf2.isCmd("ID")) {
229 lexer->skipChar(); // skip char after 'ID' command
230 inlineImg = 1;
231 }
232 buf1.free();
233 buf1 = buf2;
234 if (inlineImg > 0) // don't buffer inline image data
235 buf2.initNull();
236 else
237 lexer->getObj(&buf2);
238 }
239