1 #include "foxparse.h"
2 #include "foxobjtab.h"
3 #include "foxdetect.h"
4 #include "foxreport.h"
5 #include "foxrecovery.h"
6 #include "foxsemantics.h"
7 #include "foxtoken.h"
8 #include "foxdecode.h"
9 
10 //Necessaries
11 static PDFToken *currentToken = NULL;
12 static FILE *file = NULL;
13 
14 //Needed for error recovery and token rewinds
15 static long int lastposition = 0;
16 
17 //Leaving in for now, but needs fixin'
18 //Do not like this.
19 static PDFSyntaxNode * filterList = NULL;
20 
21 /**
22  *
23  * Syntax Rules for PDF format
24  *
25  */
26 static PDFSyntaxNode *root();          //ROOT = HEADER CONTENT
27 static PDFSyntaxNode *header();        //HEADER = PDFVERS | PDFVERS BINHEAD
28 static PDFSyntaxNode *content();       //CONTENT = DATA | DATA CONTENT
29 static PDFSyntaxNode *data();          //DATA = BODY XREF TRAILER | BODY TRAILER
30 static PDFSyntaxNode *body();          //BODY = OBJECT | OBJECT BODY
31 static PDFSyntaxNode *object();        //OBJECT = INTEGER INTEGER obj TYPE endobj
32 static PDFSyntaxNode *arraycontent();  //ARRAYCONTENT = TYPE | TYPE ARRAYCONTENT
33 static PDFSyntaxNode *type();          //TYPE = BOOLEAN | NUMBER | LITSRING | HEXSTRING |
34                                        //       NAME | ARRAY | NULLOBJ |
35                                        //       DICTIONARY | DICTIONARY STREAM
36 static PDFSyntaxNode *boolean();       //BOOLEAN = true | false
37 static PDFSyntaxNode *litString();     //LITSTRING = ( litstring )
38 static PDFSyntaxNode *hexString();     //HEXSTRING = < hexstring >
39 static PDFSyntaxNode *name();          //NAME = item from list of names
40 static PDFSyntaxNode *array();         //ARRAY = [ ARRAYCONTENT ]
41 static PDFSyntaxNode *dictionary();    //DICTIONARY = << ENTRY >>
42 static PDFSyntaxNode *entry();         //ENTRY = NAME TYPE | NAME TYPE ENTRY
43 static PDFSyntaxNode *stream();        //STREAM = DICTIONARY stream rawbytes endstream
44 static PDFSyntaxNode *nullobj();       //NULLOBJ = null
45 static PDFSyntaxNode *xref();          //XREF = xref SUBSECTION
46 static PDFSyntaxNode *subsection();    //SUBSECTION = INTEGER INTEGER=numrows numrows*XREFTABLE |
47                                        //             INTEGER INTEGER=numrows numrows*XREFTABLE SUBSECTION
48 static PDFSyntaxNode *trailer();       //TRAILER = trailer DICTIONARY startxref INTEGER END |
49                                        //          startxref INTEGER END
50 static PDFSyntaxNode *number();        //NUMBER = real | integer
51 static PDFSyntaxNode *integer();       //INTEGER = integer
52 static PDFSyntaxNode *xreftable();     //XREFTABLE = INTEGER INTEGER IDENT (f|n)
53 static PDFSyntaxNode *reference();     //REFERENCE = ref
54 
55 static void destroyNodeTree(PDFSyntaxNode *node);
56 
57 
58 /**This function attempts to resolve all hitherto unresolved indirect objects
59  * once parsing has reached the end of the file.
60  *
61  * If all indirect references have a corresponding object, this phase
62  * should take care of them.
63  */
resolveAllIndirect()64 PDFSyntaxNode *resolveAllIndirect() {
65     PDFSyntaxNode *node, *temp;
66     ObjTableEntry *entry;
67 
68     node = getNewNode();
69 
70     temp = node;
71     while ((entry = getNextUnresolved())) {
72 
73         if (fseek(file, entry->offset, SEEK_SET) != 0) {
74             foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
75 			destroyNodeTree(node);
76 			return NULL;
77         }
78 
79         setUnresolved(0);
80 
81         currentToken = getNextToken(file);
82         if (currentToken == NULL) {
83 			destroyNodeTree(node);
84 			return NULL;
85 		}
86 
87 		temp->sibling = type();
88 
89         temp = temp->sibling;
90 
91         if (getUnresolved() == 0)
92             entry->fullyResolved = 1;
93 		destroyPDFToken(currentToken);
94     }
95 
96     return node;
97 }
98 
99 
100 /**Match function
101  *
102  * Compares current token with expected token
103  * and then calls the lexical analyzer to supply
104  * next token in the file.
105  */
match(PDFTokenType expected,int free)106 static int match(PDFTokenType expected, int free) {
107 
108 	if (currentToken == NULL) {
109 		foxLog(FATAL, "%s: NULL token.\n", __func__);
110 		return 0;
111 	}
112 
113 	if (currentToken->type == ENDOFFILE) {
114         foxLog(FATAL, "%s: Premature end of file.\n", __func__);
115         return 0;
116     }
117 
118 	if (currentToken->type == expected) {
119         foxLog(PDF_DEBUG, "%s: %s\n", __func__, PDFTokenString[expected]);
120 
121 		if ((lastposition = ftell(file)) == -1) {
122             foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
123 			return 0;
124         }
125 
126 		if (free)
127 			destroyPDFToken(currentToken);
128 
129 		currentToken = getNextToken(file);
130 
131 		if (currentToken == NULL)
132 			return 0;
133 
134 		if (currentToken->type == TOK_ERROR)
135 			return 0;
136 
137 		while(currentToken->type == COMMENT) {
138 			destroyPDFToken(currentToken);
139 			currentToken = getNextToken(file);
140 			if (currentToken == NULL)
141 				return 0;
142 			if (currentToken->type == TOK_ERROR)
143 				return 0;
144 		}
145 
146 	}
147 	else {
148 		//XXX add some recovery
149 		//Some error handling
150 		foxLog(FATAL, "%s: Expected token %s but got token %s.\n", __func__, PDFTokenString[expected], PDFTokenString[currentToken->type]);
151 		return 0;
152 	}
153 
154 	return 1;
155 }
156 
157 /**Syntax Node initialization function
158  *
159  */
getNewNode()160 PDFSyntaxNode *getNewNode() {
161 	PDFSyntaxNode *node = (PDFSyntaxNode *)calloc(1, sizeof(PDFSyntaxNode));
162 
163     if (node == NULL) {
164         foxLog(FATAL, "%s: Out of memory. Could not allocate for new node.\n", __func__);
165 		return NULL;
166 	}
167 
168 	return node;
169 }
170 
171 /**Main parsing function.
172  *
173  * Turns a stream of tokens into a syntax tree
174  * using recursive descent.
175  */
createPDFTree(FILE * l_file)176 bool createPDFTree(FILE *l_file) {
177 
178 	PDFSyntaxNode *node;
179 
180 	//Create PDF Syntax Tree from file
181     file = l_file;
182 	currentToken = getNextToken(file);
183 	if (currentToken == NULL)
184 		return false;
185 
186 	node = root();
187 	destroyPDFToken(currentToken);
188     if (node == NULL)
189 		return false;
190 
191 	//Go back and resolve any indirect objects
192 	//that were previously undefined
193     node->child[1] = resolveAllIndirect();
194 
195     destroyNodeTree(node);
196 
197     return true;
198 }
199 
200 /*
201  * ROOT = HEADER CONTENT
202  */
root()203 static PDFSyntaxNode *root() {
204 	PDFSyntaxNode *node;
205 
206     node = header();
207 	if (node == NULL) {
208 		return NULL;
209 	}
210 
211 	node->child[0] = content();
212     if (node->child[0] == NULL) {
213 		destroyNodeTree(node);
214 		return NULL;
215 	}
216 
217 	return node;
218 }
219 
220 /*
221  * HEADER = PDFVERS | PDFVERS BINHEAD
222  */
header()223 static PDFSyntaxNode *header() {
224 	PDFSyntaxNode *node = getNewNode();
225     if (node == NULL)
226 		return NULL;
227 
228     node->token = currentToken;
229 
230 	if(!match(PDFVERS, 0)) {
231 		destroyNodeTree(node);
232 		return NULL;
233 	}
234 
235 	if (currentToken->type == BINHEAD) {
236 		if (!match(BINHEAD, 1)) {
237 			destroyNodeTree(node);
238 			return NULL;
239 		}
240 	}
241 
242 	return node;
243 }
244 
245 /*
246  * CONTENT = DATA | DATA CONTENT
247  */
content()248 static PDFSyntaxNode *content() {
249 	PDFSyntaxNode *node;
250 
251     node = data();
252 	if (node == NULL) {
253 		return NULL;
254 	}
255 
256 	if (currentToken->type == INTEGER) {
257 		node->sibling = content();
258 		if (node->sibling == NULL) {
259 			destroyNodeTree(node);
260 			return NULL;
261 		}
262 	}
263 
264 	return node;
265 }
266 
267 /*
268  * DATA = BODY XREF TRAILER | BODY TRAILER
269  */
data()270 static PDFSyntaxNode *data() {
271 	PDFSyntaxNode *node = getNewNode();
272     if (node == NULL)
273 		return NULL;
274 
275     node->child[0] = body();
276 	if (node->child[0] == NULL) {
277 		destroyNodeTree(node);
278 		return NULL;
279 	}
280 
281 	if (currentToken->type == XREF) {
282 	    node->child[1] = xref();
283 		if (node->child[1] == NULL) {
284 			destroyNodeTree(node);
285 			return NULL;
286 		}
287 	}
288 
289 	node->child[2] = trailer();
290 	if (node->child[2] == NULL) {
291 		destroyNodeTree(node);
292 		return NULL;
293 	}
294 
295 	return node;
296 }
297 
298 /*
299  * BODY = OBJECT | OBJECT BODY
300  */
body()301 static PDFSyntaxNode *body() {
302     PDFSyntaxNode *node;
303 
304     node = object();
305 	if (node == NULL) {
306 		return NULL;
307 	}
308 
309 	if (currentToken->type == INTEGER) {
310 		node->sibling = body();
311 		if (node->sibling == NULL) {
312 			destroyNodeTree(node);
313 			return NULL;
314 		}
315 	}
316 
317 	return node;
318 }
319 
320 /*
321  * OBJECT = INTEGER INTEGER obj TYPE endobj
322  */
object()323 static PDFSyntaxNode *object() {
324     long int pos;
325 	PDFSyntaxNode *node = getNewNode();
326     if (node == NULL)
327 		return NULL;
328 
329 	node->child[0] = integer();
330 	if (node->child[0] == NULL) {
331 		destroyNodeTree(node);
332 		return NULL;
333 	}
334 
335 	node->child[1] = integer();
336     if (node->child[1] == NULL) {
337 		destroyNodeTree(node);
338 		return NULL;
339 	}
340 
341 	//Get current pos
342 	if ((pos = ftell(file)) == -1) {
343 		foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
344 		destroyNodeTree(node);
345 		return NULL;
346 	}
347 
348 	if (!match(OBJ, 1)) {
349 		destroyNodeTree(node);
350 		return NULL;
351 	}
352 
353 	node->child[2] = type();
354 	if (node->child[2] == NULL) {
355 		destroyNodeTree(node);
356 		return NULL;
357 	}
358 
359     //XXX If an object is a stream, then the data added should
360 	//be the content of that stream (no dictionary)
361 	//XXX Adds object to the object table so that
362 	//indirect references to it can be resolved
363     if (!object_Sem(node, lastposition-pos, pos)) {
364 		destroyNodeTree(node);
365 		return NULL;
366 	}
367 
368 	if (!match(ENDOBJ, 1)) {
369 		destroyNodeTree(node);
370 		return NULL;
371 	}
372 
373 	return node;
374 }
375 
arraycontent_CheckReference(PDFSyntaxNode ** node)376 int arraycontent_CheckReference(PDFSyntaxNode **node) {
377     long int pos;
378 	uint32_t ret;
379 	PDFSyntaxNode *temp1, *temp2;
380 
381 	temp1 = *node;
382 
383     if (temp1->token->type != INTEGER)
384 		return 1;
385 
386 	if (temp1->sibling->token->type != INTEGER)
387 		return 1;
388 
389     if (!temp1->sibling->sibling)
390 		return 1;
391 
392     if (temp1->sibling->sibling->token->type != REF)
393 	    return 1;
394 
395     /* A -> B -> C -> D
396      *        becomes
397      *
398      *     C -> D
399      *    / \
400      *   A   B
401      *
402      */
403      temp2 = temp1;
404      temp1 = temp1->sibling->sibling;
405      temp1->child[0] = temp2;
406      temp1->child[1] = temp2->sibling;
407      temp1->child[0]->sibling = NULL;
408      temp1->child[1]->sibling = NULL;
409 
410      *node = temp1;
411 
412 	 pos = lastposition;
413 
414      ret = resolveIndirect(file, temp1);
415      if (ret == 0)
416 		 return 0;
417 	 else if (ret == 2)
418 		 return 1;
419 
420      destroyPDFToken(currentToken);
421      currentToken = getNextToken(file);
422      if (currentToken == NULL) {
423 		 return 0;
424 	 }
425 
426 	 //node->child[2] = arraycontent();
427 	 temp1->child[2] = type();
428      if (temp1->child[2] == NULL)
429 		 return 0;
430 
431      if (fseek(file, pos, SEEK_SET) != 0) {
432          foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
433 		 return 0;
434 	 }
435 
436 	 destroyPDFToken(currentToken);
437 	 lastposition = pos;
438      currentToken = getNextToken(file);
439 	 if (currentToken == NULL) {
440 		 return 0;
441 	 }
442 
443      return 1;
444 }
445 
446 /*
447  * ARRAYCONTENT = TYPE | TYPE ARRAYCONTENT
448  */
arraycontent()449 static PDFSyntaxNode *arraycontent() {
450 	PDFSyntaxNode *node;
451 
452 	node = type();
453 	if (node == NULL) {
454 		return NULL;
455 	}
456 
457 	if (currentToken->type != CLOSEARRAY && currentToken->type != ENDOFFILE) {
458 		node->sibling = arraycontent();
459 		if (node->sibling == NULL) {
460 			destroyNodeTree(node);
461 			return NULL;
462 		}
463 
464 		if (arraycontent_CheckReference(&node) == 0) {
465 		    destroyNodeTree(node);
466 			return NULL;
467 		}
468 	}
469 
470 	return node;
471 }
472 
473 /*
474  * TYPE = BOOL | NUMBER | REFERENCE | LITSTRING | HEXSTRING | NAME | ARRAY | NULLOBJ | DICTIONARY | DICTIONARY STREAM
475  */
type()476 static PDFSyntaxNode *type() {
477 	PDFSyntaxNode *node = NULL;
478 
479     switch (currentToken->type) {
480 		case TOK_TRUE:
481 		case TOK_FALSE:
482 			node = boolean();
483 			break;
484 
485 		case REAL:
486 		case INTEGER:
487 			node = number();
488 			break;
489 
490 		case OPENPAREN:
491 			node = litString();
492 			break;
493 
494 		case OPENANGLE:
495 			node = hexString();
496 			break;
497 
498 		case NAME:
499 			node = name();
500 			break;
501 
502 		case OPENARRAY:
503 			node = array();
504 			break;
505 
506 		case OPENDICT:
507 			node = dictionary();
508 			if (currentToken->type == STREAM) {
509 				node->child[1] = stream();
510 				if (node->child[1] == NULL) {
511 					destroyNodeTree(node);
512 					return NULL;
513 				}
514 			}
515 			break;
516 
517 		case NULLOBJ:
518 			node = nullobj();
519 			break;
520 
521         case REF:
522 			node = reference();
523 			break;
524 
525 		default:
526 			foxLog(FATAL, "%s: Unknown token type %s.\n", __func__, PDFTokenString[currentToken->type]);
527 			break;
528 
529 	}
530 
531 	if (node == NULL)
532 		return NULL;
533 
534 	return node;
535 }
536 
537 /*
538  * BOOL = true | false
539  */
boolean()540 static PDFSyntaxNode *boolean() {
541 	PDFSyntaxNode *node = getNewNode();
542     if (node == NULL)
543 		return NULL;
544 
545 	node->token = currentToken;
546 	if (currentToken->type == TOK_TRUE) {
547 		if (!match(TOK_TRUE, 0)) {
548 			destroyNodeTree(node);
549 			return NULL;
550 		}
551 	}
552 	else {
553 		if (!match(TOK_FALSE, 0)) {
554 			destroyNodeTree(node);
555 			return NULL;
556 		}
557 	}
558 
559 	return node;
560 }
561 
562 /*
563  * LITSTRING = ( litstring )
564  */
litString()565 static PDFSyntaxNode *litString() {
566 	PDFSyntaxNode *node = getNewNode();
567     if (node == NULL)
568 		return NULL;
569 
570     node->token = tokenizeLitString(file);
571 	if (node->token == NULL) {
572 		destroyNodeTree(node);
573 		return NULL;
574 	}
575 
576 	if (!match(OPENPAREN, 1)) {
577 		destroyNodeTree(node);
578 		return NULL;
579 	}
580 
581 	if (!match(CLOSEPAREN, 1)) {
582 		destroyNodeTree(node);
583 		return NULL;
584 	}
585 
586 	return node;
587 }
588 
589 /*
590  * HEXSTRING = < hexstring >
591  */
hexString()592 static PDFSyntaxNode *hexString() {
593 	PDFSyntaxNode *node = getNewNode();
594     if (node == NULL)
595 		return NULL;
596 
597 	node->token = tokenizeHexString(file);
598     if (node->token == NULL) {
599 		destroyNodeTree(node);
600 		return NULL;
601 	}
602 
603 	if (!match(OPENANGLE, 1)) {
604 		destroyNodeTree(node);
605 		return NULL;
606 	}
607 
608 	//match(CLOSEANGLE);
609 
610 	return node;
611 }
612 
613 /*
614  * NAME = name
615  *
616  * If name is one of a list of recognizable name fields,
617  * then we identify it for state tracking.
618  */
name()619 static PDFSyntaxNode *name() {
620     uint32_t streamLength;
621 
622 	PDFSyntaxNode *node = getNewNode();
623     if (node == NULL)
624 		return NULL;
625 
626     node->token = currentToken;
627 
628     if (!match(NAME, 0)) {
629         destroyNodeTree(node);
630 		return NULL;
631 	}
632 
633 	checkNameKeyword(node->token);
634     if (node->token->type == TOK_ERROR) {
635 		node->token->type = NAME;
636 	}
637 
638     if(node->token->type == NAME_STRMLEN) {
639 		streamLength = (uint32_t)strtoul((char *)currentToken->content, NULL, 10);
640 		setStreamLength(streamLength);
641 	}
642 
643 	return node;
644 }
645 
646 /*
647  * ARRAY = [ ARRAYCONTENT ]
648  */
array()649 static PDFSyntaxNode *array() {
650 	PDFSyntaxNode *node = getNewNode();
651     if (node == NULL)
652 		return NULL;
653 
654 	PDFToken *token = newPDFToken();
655     if (token == NULL) {
656 		destroyNodeTree(node);
657 		return NULL;
658 	}
659 
660 	token->type = ARRAY;
661     node->token = token;
662 
663     if (!match(OPENARRAY, 1)) {
664 		destroyNodeTree(node);
665 		return NULL;
666 	}
667 
668 	if (currentToken->type != CLOSEARRAY) {
669 		node->child[0] = arraycontent();
670 
671 		if (node->child[0] == NULL) {
672 			destroyNodeTree(node);
673 			return NULL;
674 		}
675 	}
676 
677 	if (!match(CLOSEARRAY, 1)) {
678 		destroyNodeTree(node);
679 		return NULL;
680 	}
681 
682 	return node;
683 }
684 
685 /*
686  * DICTIONARY = << ENTRY >>
687  */
dictionary()688 static PDFSyntaxNode *dictionary() {
689 	PDFSyntaxNode *node = getNewNode();
690 	if (node == NULL)
691 		return NULL;
692 
693     PDFToken *token = newPDFToken();
694 	if (token == NULL) {
695 		destroyNodeTree(node);
696 		return NULL;
697 	}
698 
699 	token->type = DICTIONARY;
700 	node->token = token;
701 
702     if (!match(OPENDICT, 1)) {
703 		destroyNodeTree(node);
704 		return NULL;
705 	}
706 
707 	if (currentToken->type != CLOSEDICT) {
708 	    node->child[0] = entry();
709 		if (node->child[0] == NULL) {
710 			destroyNodeTree(node);
711 			return NULL;
712 		}
713 	}
714 
715 	if (!match(CLOSEDICT, 1)) {
716 		destroyNodeTree(node);
717 		return NULL;
718 	}
719 
720 	return node;
721 }
722 
723 
entry_CheckReference(PDFSyntaxNode ** node)724 int entry_CheckReference(PDFSyntaxNode **node) {
725 
726 	uint32_t ret;
727 	long int pos;
728     PDFSyntaxNode *temp, *temp2;
729 
730     temp2 = *node;
731 
732     if (temp2->child[1]->token->type != INTEGER)
733 		return 1;
734 
735     if (currentToken->type != INTEGER)
736 		return 1;
737 
738     temp2->child[2] = number();
739     if (temp2->child[2] == NULL) {
740 		return 0;
741 	}
742 
743     if (currentToken->type != REF) {
744 	    foxLog(FATAL, "%s: Invalid Reference.\n", __func__);
745 		return 0;
746 	}
747 
748 	temp = temp2->child[1];
749     temp2->child[1] = reference();
750     if (temp2->child[1] == NULL) {
751 		return 0;
752 	}
753 
754 	temp2->child[1]->child[0] = temp;
755     temp2->child[1]->child[1] = temp2->child[2];
756     temp2->child[2] = NULL;
757 
758     *node = temp2;
759 
760     pos = lastposition;
761 
762 	ret = resolveIndirect(file, temp2);
763 
764 	if (ret == 0)
765 		return 0;
766 	else if (ret == 2)
767 		return 1;
768 
769 	destroyPDFToken(currentToken);
770     currentToken = getNextToken(file);
771     temp2->child[2] = type();
772 	if (temp2->child[2] == NULL) {
773 		return 0;
774 	}
775 
776     if (fseek(file, pos, SEEK_SET) != 0) {
777         foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
778 		return 0;
779     }
780 
781 	destroyPDFToken(currentToken);
782 	lastposition = pos;
783 	currentToken = getNextToken(file);
784 	if (currentToken == NULL)
785 		return 0;
786 
787 	return 1;
788 }
789 
790 /*
791  * ENTRY = NAME TYPE | NAME TYPE ENTRY
792  */
entry()793 static PDFSyntaxNode *entry() {
794 	PDFToken *token;
795 	PDFStreamType strmtype = UNKNOWN;
796     DecodeParams *DParams;
797 
798 	PDFSyntaxNode *node = getNewNode();
799 	if (node == NULL)
800 		return NULL;
801 
802     token = currentToken;
803 
804 	node->child[0] = name();
805 	if (node->child[0] == NULL) {
806 		destroyNodeTree(node);
807 		return NULL;
808 	}
809 
810 	switch (token->type) {
811 		case NAME_JS:
812 			strmtype = JAVASCRIPT;
813 			foxLog(PDF_DEBUG, "%s: Javascript found!\n", __func__);
814 			break;
815 
816 		case NAME_URI:
817 			strmtype = URI;
818 			foxLog(PDF_DEBUG, "%s: URI found!\n", __func__);
819 			break;
820 
821 		case NAME_DECODEPARAMS:
822 		    strmtype = DECODEPARAMS;
823 			foxLog(PDF_DEBUG, "%s: DecodeParams found!\n", __func__);
824 			break;
825 
826 		case NAME_SUBTYPE:
827 			strmtype = SUBTYPE;
828 			break;
829 
830 		default:
831 			break;
832 	}
833 
834 
835 	node->child[1] = type();
836     if (node->child[1] == NULL) {
837 		destroyNodeTree(node);
838 		return NULL;
839 	}
840 
841 	if (node->child[0]->token->type == FILTER) {
842 		filterList = node->child[1];
843 	}
844 
845     if (entry_CheckReference(&node) == 0) {
846 		destroyNodeTree(node);
847 		return NULL;
848 	}
849 
850 	/*
851 	 *
852 	 * XXX STATE TRACKING EPILOGUE
853 	 * This is terrible. Fix it up.
854 	 *
855 	 */
856 	if (strmtype == SUBTYPE) {
857 		if (node->child[1]->token) {
858 			if (node->child[1]->token->type == NAME_TRUETYPE ||
859 					node->child[1]->token->type == NAME_OPENTYPE) {
860                 Dig(node->child[1]->token->content, node->child[1]->token->length, TRUEOPENTYPE);
861 			}
862 		}
863 	}
864 	else if (strmtype == DECODEPARAMS) {
865 		DParams = prepDecodeParams(node->child[1]);
866 		if (DParams == NULL)
867 			foxLog(NONFATAL, "%s: Could not set up decode params.\n", __func__);
868 		else {
869 		    Dig((uint8_t *)DParams, sizeof(DecodeParams), DECODEPARAMS);
870 		    free(DParams);
871 		}
872 	}
873 	else if (node->child[1]->token) {
874 		//Feed string to Dig()
875 		Dig(node->child[1]->token->content, node->child[1]->token->length, strmtype);
876 	}
877 
878 	/*
879 	 *
880 	 * END STATE TRACKING EPILOGUE
881 	 *
882 	 */
883 
884 	if (currentToken->type != CLOSEDICT) {
885 		node->sibling = entry();
886 		if (node->sibling == NULL) {
887 		    destroyNodeTree(node);
888 			return NULL;
889 		}
890 	}
891 
892 	return node;
893 }
894 
895 /*
896  * STREAM = stream rawbytes endstream
897  */
stream()898 static PDFSyntaxNode *stream() {
899 	PDFSyntaxNode *node = getNewNode();
900     if (node == NULL)
901 		return NULL;
902 
903 	long int streamstart;
904 	uint32_t streamLength;
905 
906     if ((streamstart = ftell(file)) == -1) {
907         foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
908 		destroyNodeTree(node);
909 		return NULL;
910     }
911 
912 	streamLength = getStreamLength();
913 
914 	node->token = tokenizeStream(file, streamLength);
915     if (node->token == NULL) {
916 		destroyNodeTree(node);
917 		return NULL;
918 	}
919 
920     if (!match(STREAM, 1) || currentToken->type != ENDSTREAM) {
921 
922         lastposition = streamstart;
923 
924 		streamLength = recoverStream(file, lastposition);
925 		if (streamLength == 0) {
926 			destroyNodeTree(node);
927 			return NULL;
928 		}
929 		destroyPDFToken(node->token);
930 		node->token = tokenizeStream(file, streamLength);
931 		if (node->token == NULL) {
932 			destroyNodeTree(node);
933 			return NULL;
934 		}
935 
936 		currentToken = getNextToken(file);
937 		if (currentToken == NULL) {
938 			destroyNodeTree(node);
939 			return NULL;
940 		}
941 
942 	}
943 
944 	setStreamLength(0);
945 
946     //XXX DECODE STREAMS
947     if (filterList != NULL) {
948         streamDecode(node->token, filterList);
949         filterList = NULL;
950     }
951 
952 	if (!match(ENDSTREAM, 1)) {
953 		destroyNodeTree(node);
954 		return NULL;
955 	}
956 
957 	return node;
958 }
959 
960 /*
961  * NULLOBJ = nullobj
962  */
nullobj()963 static PDFSyntaxNode *nullobj() {
964     PDFSyntaxNode *node = getNewNode();
965     if (node == NULL)
966 		return NULL;
967 
968 	node->token = currentToken;
969 	if (!match(NULLOBJ, 0)) {
970 		destroyNodeTree(node);
971         return NULL;
972 	}
973 
974 	return node;
975 }
976 
977 /*
978  * XREF = xref SUBSECTION
979  */
xref()980 static PDFSyntaxNode *xref() {
981 	PDFSyntaxNode *node, *temp;
982 
983     if (!match(XREF, 1)) {
984 		return NULL;
985 	}
986 
987 	node = subsection();
988 	if (node == NULL) {
989 		return NULL;
990 	}
991 
992 	temp = node;
993 	while (currentToken->type == INTEGER) {
994 		temp->sibling = subsection();
995 		if (temp->sibling == NULL) {
996 		    destroyNodeTree(node);
997 			return NULL;
998 		}
999 		temp = temp->sibling;
1000 	}
1001 
1002 	return node;
1003 }
1004 
1005 /*
1006  * SUBSECTION = INTEGER INTEGER=numrows numrows*XREFTABLE |
1007  *              INTEGER INTEGER=numrows numrows*XREFTABLE SUBSECTION
1008  */
subsection()1009 static PDFSyntaxNode *subsection() {
1010 	PDFSyntaxNode *temp, *node;
1011 	uint32_t numrows = 0;
1012     uint32_t i = 0;
1013 	node = getNewNode();
1014     if (node == NULL)
1015 		return NULL;
1016 
1017 	node->child[0] = integer();
1018 	if (node->child[0] == NULL) {
1019 		destroyNodeTree(node);
1020         return NULL;
1021 	}
1022 
1023 	node->child[1] = integer();
1024     if (node->child[1] == NULL) {
1025 		destroyNodeTree(node);
1026 		return NULL;
1027 	}
1028 
1029 	//extract value of node->child[1]->token->content
1030 	numrows = (uint32_t)strtoul((char *)node->child[1]->token->content, NULL, 10);
1031 
1032 	node->child[2] = getNewNode();
1033     if (node->child[2] == NULL) {
1034         destroyNodeTree(node);
1035         return NULL;
1036     }
1037 
1038 	//Call xref table with that value
1039 	temp = node->child[2];
1040 	for (i = 0; i < numrows; i++) {
1041 	    temp->sibling = xreftable();
1042 		if (temp->sibling == NULL) {
1043 		    destroyNodeTree(node);
1044 			return NULL;
1045 		}
1046 		temp = temp->sibling;
1047 	}
1048 
1049 	return node;
1050 }
1051 
1052 /*
1053  * TRAILER = trailer DICTIONARY startxref INTEGER END | startxref INTEGER END
1054  */
trailer()1055 static PDFSyntaxNode *trailer() {
1056 	PDFSyntaxNode *node = getNewNode();
1057     if (node == NULL)
1058 		return NULL;
1059 
1060 	if (currentToken->type == TRAILER) {
1061 	    if(!match(TRAILER, 1)) {
1062 		    destroyNodeTree(node);
1063             return NULL;
1064 		}
1065 
1066 	    node->child[0] = dictionary();
1067 		if (node->child[0] == NULL) {
1068 		    destroyNodeTree(node);
1069             return NULL;
1070 		}
1071 	}
1072 
1073 	if (!match(STARTXREF, 1)) {
1074 		destroyNodeTree(node);
1075         return NULL;
1076 	}
1077 
1078 	node->child[1] = integer();
1079 	if (node->child[1] == NULL) {
1080 		destroyNodeTree(node);
1081         return NULL;
1082 	}
1083 
1084 	if (!match(END, 1)) {
1085 		destroyNodeTree(node);
1086 		return NULL;
1087 	}
1088 
1089 	return node;
1090 }
1091 
1092 /*
1093  * NUMBER = real | integer
1094  */
number()1095 static PDFSyntaxNode *number() {
1096 	PDFSyntaxNode *node = getNewNode();
1097     if (node == NULL)
1098 		return NULL;
1099 
1100     node->token = currentToken;
1101 
1102 	if (currentToken->type == REAL) {
1103 		if (!match(REAL, 0)) {
1104 		    destroyNodeTree(node);
1105             return NULL;
1106 		}
1107 	}
1108 	else {
1109 		if (!match(INTEGER, 0)) {
1110 		    destroyNodeTree(node);
1111 			return NULL;
1112 		}
1113 	}
1114 
1115 	return node;
1116 }
1117 
1118 /*
1119  * INTEGER = integer
1120  */
integer()1121 static PDFSyntaxNode *integer() {
1122 	PDFSyntaxNode *node = getNewNode();
1123     if (node == NULL)
1124 		return NULL;
1125 
1126 	node->token = currentToken;
1127 
1128     if (!match(INTEGER, 0)) {
1129 		destroyNodeTree(node);
1130 		return NULL;
1131 	}
1132 
1133 	return node;
1134 }
1135 
1136 /*
1137  * XREFTABLE = INTEGER INTEGER IDENT (n|f)
1138  */
xreftable()1139 static PDFSyntaxNode *xreftable() {
1140 	PDFSyntaxNode *node = getNewNode();
1141     if (node == NULL)
1142 		return NULL;
1143 
1144     node->child[0] = integer();
1145 	if (node->child[0] == NULL) {
1146 		destroyNodeTree(node);
1147 	    return NULL;
1148 	}
1149 
1150 	node->child[1] = integer();
1151 	if (node->child[1] == NULL) {
1152 		destroyNodeTree(node);
1153 		return NULL;
1154 	}
1155 
1156 	if (!match(IDENT, 1)) {
1157 		destroyNodeTree(node);
1158 		return NULL;
1159 	}
1160 
1161 	return node;
1162 }
1163 
1164 /*
1165  * REFERENCE = ref
1166  */
reference()1167 static PDFSyntaxNode *reference() {
1168 	PDFSyntaxNode *node = getNewNode();
1169     if (node == NULL)
1170 		return NULL;
1171 
1172     node->token = currentToken;
1173 	if (!match(REF, 0)) {
1174 		destroyNodeTree(node);
1175         return NULL;
1176 	}
1177 
1178 	return node;
1179 }
1180 
destroyNodeTree(PDFSyntaxNode * node)1181 static void destroyNodeTree(PDFSyntaxNode *node) {
1182     if (node == NULL)
1183 		return;
1184 	if (node->child[0] != NULL)
1185 		destroyNodeTree(node->child[0]);
1186 	if (node->child[1] != NULL)
1187 		destroyNodeTree(node->child[1]);
1188 	if (node->child[2] != NULL)
1189 		destroyNodeTree(node->child[2]);
1190 	if (node->sibling != NULL)
1191 		destroyNodeTree(node->sibling);
1192     if (node->token != NULL)
1193 		destroyPDFToken(node->token);
1194 
1195 	free(node);
1196 
1197 }
1198