1 #include "foxparse.h"
2 #include "foxobjtab.h"
3 #include "foxdetect.h"
4 #include "foxreport.h"
5 #include "foxrecovery.h"
6 #include "foxsemantics.h"
7 #include "foxtoken.h"
8 #include "foxdecode.h"
9
10 //Necessaries
11 static PDFToken *currentToken = NULL;
12 static FILE *file = NULL;
13
14 //Needed for error recovery and token rewinds
15 static long int lastposition = 0;
16
17 //Leaving in for now, but needs fixin'
18 //Do not like this.
19 static PDFSyntaxNode * filterList = NULL;
20
21 /**
22 *
23 * Syntax Rules for PDF format
24 *
25 */
26 static PDFSyntaxNode *root(); //ROOT = HEADER CONTENT
27 static PDFSyntaxNode *header(); //HEADER = PDFVERS | PDFVERS BINHEAD
28 static PDFSyntaxNode *content(); //CONTENT = DATA | DATA CONTENT
29 static PDFSyntaxNode *data(); //DATA = BODY XREF TRAILER | BODY TRAILER
30 static PDFSyntaxNode *body(); //BODY = OBJECT | OBJECT BODY
31 static PDFSyntaxNode *object(); //OBJECT = INTEGER INTEGER obj TYPE endobj
32 static PDFSyntaxNode *arraycontent(); //ARRAYCONTENT = TYPE | TYPE ARRAYCONTENT
33 static PDFSyntaxNode *type(); //TYPE = BOOLEAN | NUMBER | LITSRING | HEXSTRING |
34 // NAME | ARRAY | NULLOBJ |
35 // DICTIONARY | DICTIONARY STREAM
36 static PDFSyntaxNode *boolean(); //BOOLEAN = true | false
37 static PDFSyntaxNode *litString(); //LITSTRING = ( litstring )
38 static PDFSyntaxNode *hexString(); //HEXSTRING = < hexstring >
39 static PDFSyntaxNode *name(); //NAME = item from list of names
40 static PDFSyntaxNode *array(); //ARRAY = [ ARRAYCONTENT ]
41 static PDFSyntaxNode *dictionary(); //DICTIONARY = << ENTRY >>
42 static PDFSyntaxNode *entry(); //ENTRY = NAME TYPE | NAME TYPE ENTRY
43 static PDFSyntaxNode *stream(); //STREAM = DICTIONARY stream rawbytes endstream
44 static PDFSyntaxNode *nullobj(); //NULLOBJ = null
45 static PDFSyntaxNode *xref(); //XREF = xref SUBSECTION
46 static PDFSyntaxNode *subsection(); //SUBSECTION = INTEGER INTEGER=numrows numrows*XREFTABLE |
47 // INTEGER INTEGER=numrows numrows*XREFTABLE SUBSECTION
48 static PDFSyntaxNode *trailer(); //TRAILER = trailer DICTIONARY startxref INTEGER END |
49 // startxref INTEGER END
50 static PDFSyntaxNode *number(); //NUMBER = real | integer
51 static PDFSyntaxNode *integer(); //INTEGER = integer
52 static PDFSyntaxNode *xreftable(); //XREFTABLE = INTEGER INTEGER IDENT (f|n)
53 static PDFSyntaxNode *reference(); //REFERENCE = ref
54
55 static void destroyNodeTree(PDFSyntaxNode *node);
56
57
58 /**This function attempts to resolve all hitherto unresolved indirect objects
59 * once parsing has reached the end of the file.
60 *
61 * If all indirect references have a corresponding object, this phase
62 * should take care of them.
63 */
resolveAllIndirect()64 PDFSyntaxNode *resolveAllIndirect() {
65 PDFSyntaxNode *node, *temp;
66 ObjTableEntry *entry;
67
68 node = getNewNode();
69
70 temp = node;
71 while ((entry = getNextUnresolved())) {
72
73 if (fseek(file, entry->offset, SEEK_SET) != 0) {
74 foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
75 destroyNodeTree(node);
76 return NULL;
77 }
78
79 setUnresolved(0);
80
81 currentToken = getNextToken(file);
82 if (currentToken == NULL) {
83 destroyNodeTree(node);
84 return NULL;
85 }
86
87 temp->sibling = type();
88
89 temp = temp->sibling;
90
91 if (getUnresolved() == 0)
92 entry->fullyResolved = 1;
93 destroyPDFToken(currentToken);
94 }
95
96 return node;
97 }
98
99
100 /**Match function
101 *
102 * Compares current token with expected token
103 * and then calls the lexical analyzer to supply
104 * next token in the file.
105 */
match(PDFTokenType expected,int free)106 static int match(PDFTokenType expected, int free) {
107
108 if (currentToken == NULL) {
109 foxLog(FATAL, "%s: NULL token.\n", __func__);
110 return 0;
111 }
112
113 if (currentToken->type == ENDOFFILE) {
114 foxLog(FATAL, "%s: Premature end of file.\n", __func__);
115 return 0;
116 }
117
118 if (currentToken->type == expected) {
119 foxLog(PDF_DEBUG, "%s: %s\n", __func__, PDFTokenString[expected]);
120
121 if ((lastposition = ftell(file)) == -1) {
122 foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
123 return 0;
124 }
125
126 if (free)
127 destroyPDFToken(currentToken);
128
129 currentToken = getNextToken(file);
130
131 if (currentToken == NULL)
132 return 0;
133
134 if (currentToken->type == TOK_ERROR)
135 return 0;
136
137 while(currentToken->type == COMMENT) {
138 destroyPDFToken(currentToken);
139 currentToken = getNextToken(file);
140 if (currentToken == NULL)
141 return 0;
142 if (currentToken->type == TOK_ERROR)
143 return 0;
144 }
145
146 }
147 else {
148 //XXX add some recovery
149 //Some error handling
150 foxLog(FATAL, "%s: Expected token %s but got token %s.\n", __func__, PDFTokenString[expected], PDFTokenString[currentToken->type]);
151 return 0;
152 }
153
154 return 1;
155 }
156
157 /**Syntax Node initialization function
158 *
159 */
getNewNode()160 PDFSyntaxNode *getNewNode() {
161 PDFSyntaxNode *node = (PDFSyntaxNode *)calloc(1, sizeof(PDFSyntaxNode));
162
163 if (node == NULL) {
164 foxLog(FATAL, "%s: Out of memory. Could not allocate for new node.\n", __func__);
165 return NULL;
166 }
167
168 return node;
169 }
170
171 /**Main parsing function.
172 *
173 * Turns a stream of tokens into a syntax tree
174 * using recursive descent.
175 */
createPDFTree(FILE * l_file)176 bool createPDFTree(FILE *l_file) {
177
178 PDFSyntaxNode *node;
179
180 //Create PDF Syntax Tree from file
181 file = l_file;
182 currentToken = getNextToken(file);
183 if (currentToken == NULL)
184 return false;
185
186 node = root();
187 destroyPDFToken(currentToken);
188 if (node == NULL)
189 return false;
190
191 //Go back and resolve any indirect objects
192 //that were previously undefined
193 node->child[1] = resolveAllIndirect();
194
195 destroyNodeTree(node);
196
197 return true;
198 }
199
200 /*
201 * ROOT = HEADER CONTENT
202 */
root()203 static PDFSyntaxNode *root() {
204 PDFSyntaxNode *node;
205
206 node = header();
207 if (node == NULL) {
208 return NULL;
209 }
210
211 node->child[0] = content();
212 if (node->child[0] == NULL) {
213 destroyNodeTree(node);
214 return NULL;
215 }
216
217 return node;
218 }
219
220 /*
221 * HEADER = PDFVERS | PDFVERS BINHEAD
222 */
header()223 static PDFSyntaxNode *header() {
224 PDFSyntaxNode *node = getNewNode();
225 if (node == NULL)
226 return NULL;
227
228 node->token = currentToken;
229
230 if(!match(PDFVERS, 0)) {
231 destroyNodeTree(node);
232 return NULL;
233 }
234
235 if (currentToken->type == BINHEAD) {
236 if (!match(BINHEAD, 1)) {
237 destroyNodeTree(node);
238 return NULL;
239 }
240 }
241
242 return node;
243 }
244
245 /*
246 * CONTENT = DATA | DATA CONTENT
247 */
content()248 static PDFSyntaxNode *content() {
249 PDFSyntaxNode *node;
250
251 node = data();
252 if (node == NULL) {
253 return NULL;
254 }
255
256 if (currentToken->type == INTEGER) {
257 node->sibling = content();
258 if (node->sibling == NULL) {
259 destroyNodeTree(node);
260 return NULL;
261 }
262 }
263
264 return node;
265 }
266
267 /*
268 * DATA = BODY XREF TRAILER | BODY TRAILER
269 */
data()270 static PDFSyntaxNode *data() {
271 PDFSyntaxNode *node = getNewNode();
272 if (node == NULL)
273 return NULL;
274
275 node->child[0] = body();
276 if (node->child[0] == NULL) {
277 destroyNodeTree(node);
278 return NULL;
279 }
280
281 if (currentToken->type == XREF) {
282 node->child[1] = xref();
283 if (node->child[1] == NULL) {
284 destroyNodeTree(node);
285 return NULL;
286 }
287 }
288
289 node->child[2] = trailer();
290 if (node->child[2] == NULL) {
291 destroyNodeTree(node);
292 return NULL;
293 }
294
295 return node;
296 }
297
298 /*
299 * BODY = OBJECT | OBJECT BODY
300 */
body()301 static PDFSyntaxNode *body() {
302 PDFSyntaxNode *node;
303
304 node = object();
305 if (node == NULL) {
306 return NULL;
307 }
308
309 if (currentToken->type == INTEGER) {
310 node->sibling = body();
311 if (node->sibling == NULL) {
312 destroyNodeTree(node);
313 return NULL;
314 }
315 }
316
317 return node;
318 }
319
320 /*
321 * OBJECT = INTEGER INTEGER obj TYPE endobj
322 */
object()323 static PDFSyntaxNode *object() {
324 long int pos;
325 PDFSyntaxNode *node = getNewNode();
326 if (node == NULL)
327 return NULL;
328
329 node->child[0] = integer();
330 if (node->child[0] == NULL) {
331 destroyNodeTree(node);
332 return NULL;
333 }
334
335 node->child[1] = integer();
336 if (node->child[1] == NULL) {
337 destroyNodeTree(node);
338 return NULL;
339 }
340
341 //Get current pos
342 if ((pos = ftell(file)) == -1) {
343 foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
344 destroyNodeTree(node);
345 return NULL;
346 }
347
348 if (!match(OBJ, 1)) {
349 destroyNodeTree(node);
350 return NULL;
351 }
352
353 node->child[2] = type();
354 if (node->child[2] == NULL) {
355 destroyNodeTree(node);
356 return NULL;
357 }
358
359 //XXX If an object is a stream, then the data added should
360 //be the content of that stream (no dictionary)
361 //XXX Adds object to the object table so that
362 //indirect references to it can be resolved
363 if (!object_Sem(node, lastposition-pos, pos)) {
364 destroyNodeTree(node);
365 return NULL;
366 }
367
368 if (!match(ENDOBJ, 1)) {
369 destroyNodeTree(node);
370 return NULL;
371 }
372
373 return node;
374 }
375
arraycontent_CheckReference(PDFSyntaxNode ** node)376 int arraycontent_CheckReference(PDFSyntaxNode **node) {
377 long int pos;
378 uint32_t ret;
379 PDFSyntaxNode *temp1, *temp2;
380
381 temp1 = *node;
382
383 if (temp1->token->type != INTEGER)
384 return 1;
385
386 if (temp1->sibling->token->type != INTEGER)
387 return 1;
388
389 if (!temp1->sibling->sibling)
390 return 1;
391
392 if (temp1->sibling->sibling->token->type != REF)
393 return 1;
394
395 /* A -> B -> C -> D
396 * becomes
397 *
398 * C -> D
399 * / \
400 * A B
401 *
402 */
403 temp2 = temp1;
404 temp1 = temp1->sibling->sibling;
405 temp1->child[0] = temp2;
406 temp1->child[1] = temp2->sibling;
407 temp1->child[0]->sibling = NULL;
408 temp1->child[1]->sibling = NULL;
409
410 *node = temp1;
411
412 pos = lastposition;
413
414 ret = resolveIndirect(file, temp1);
415 if (ret == 0)
416 return 0;
417 else if (ret == 2)
418 return 1;
419
420 destroyPDFToken(currentToken);
421 currentToken = getNextToken(file);
422 if (currentToken == NULL) {
423 return 0;
424 }
425
426 //node->child[2] = arraycontent();
427 temp1->child[2] = type();
428 if (temp1->child[2] == NULL)
429 return 0;
430
431 if (fseek(file, pos, SEEK_SET) != 0) {
432 foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
433 return 0;
434 }
435
436 destroyPDFToken(currentToken);
437 lastposition = pos;
438 currentToken = getNextToken(file);
439 if (currentToken == NULL) {
440 return 0;
441 }
442
443 return 1;
444 }
445
446 /*
447 * ARRAYCONTENT = TYPE | TYPE ARRAYCONTENT
448 */
arraycontent()449 static PDFSyntaxNode *arraycontent() {
450 PDFSyntaxNode *node;
451
452 node = type();
453 if (node == NULL) {
454 return NULL;
455 }
456
457 if (currentToken->type != CLOSEARRAY && currentToken->type != ENDOFFILE) {
458 node->sibling = arraycontent();
459 if (node->sibling == NULL) {
460 destroyNodeTree(node);
461 return NULL;
462 }
463
464 if (arraycontent_CheckReference(&node) == 0) {
465 destroyNodeTree(node);
466 return NULL;
467 }
468 }
469
470 return node;
471 }
472
473 /*
474 * TYPE = BOOL | NUMBER | REFERENCE | LITSTRING | HEXSTRING | NAME | ARRAY | NULLOBJ | DICTIONARY | DICTIONARY STREAM
475 */
type()476 static PDFSyntaxNode *type() {
477 PDFSyntaxNode *node = NULL;
478
479 switch (currentToken->type) {
480 case TOK_TRUE:
481 case TOK_FALSE:
482 node = boolean();
483 break;
484
485 case REAL:
486 case INTEGER:
487 node = number();
488 break;
489
490 case OPENPAREN:
491 node = litString();
492 break;
493
494 case OPENANGLE:
495 node = hexString();
496 break;
497
498 case NAME:
499 node = name();
500 break;
501
502 case OPENARRAY:
503 node = array();
504 break;
505
506 case OPENDICT:
507 node = dictionary();
508 if (currentToken->type == STREAM) {
509 node->child[1] = stream();
510 if (node->child[1] == NULL) {
511 destroyNodeTree(node);
512 return NULL;
513 }
514 }
515 break;
516
517 case NULLOBJ:
518 node = nullobj();
519 break;
520
521 case REF:
522 node = reference();
523 break;
524
525 default:
526 foxLog(FATAL, "%s: Unknown token type %s.\n", __func__, PDFTokenString[currentToken->type]);
527 break;
528
529 }
530
531 if (node == NULL)
532 return NULL;
533
534 return node;
535 }
536
537 /*
538 * BOOL = true | false
539 */
boolean()540 static PDFSyntaxNode *boolean() {
541 PDFSyntaxNode *node = getNewNode();
542 if (node == NULL)
543 return NULL;
544
545 node->token = currentToken;
546 if (currentToken->type == TOK_TRUE) {
547 if (!match(TOK_TRUE, 0)) {
548 destroyNodeTree(node);
549 return NULL;
550 }
551 }
552 else {
553 if (!match(TOK_FALSE, 0)) {
554 destroyNodeTree(node);
555 return NULL;
556 }
557 }
558
559 return node;
560 }
561
562 /*
563 * LITSTRING = ( litstring )
564 */
litString()565 static PDFSyntaxNode *litString() {
566 PDFSyntaxNode *node = getNewNode();
567 if (node == NULL)
568 return NULL;
569
570 node->token = tokenizeLitString(file);
571 if (node->token == NULL) {
572 destroyNodeTree(node);
573 return NULL;
574 }
575
576 if (!match(OPENPAREN, 1)) {
577 destroyNodeTree(node);
578 return NULL;
579 }
580
581 if (!match(CLOSEPAREN, 1)) {
582 destroyNodeTree(node);
583 return NULL;
584 }
585
586 return node;
587 }
588
589 /*
590 * HEXSTRING = < hexstring >
591 */
hexString()592 static PDFSyntaxNode *hexString() {
593 PDFSyntaxNode *node = getNewNode();
594 if (node == NULL)
595 return NULL;
596
597 node->token = tokenizeHexString(file);
598 if (node->token == NULL) {
599 destroyNodeTree(node);
600 return NULL;
601 }
602
603 if (!match(OPENANGLE, 1)) {
604 destroyNodeTree(node);
605 return NULL;
606 }
607
608 //match(CLOSEANGLE);
609
610 return node;
611 }
612
613 /*
614 * NAME = name
615 *
616 * If name is one of a list of recognizable name fields,
617 * then we identify it for state tracking.
618 */
name()619 static PDFSyntaxNode *name() {
620 uint32_t streamLength;
621
622 PDFSyntaxNode *node = getNewNode();
623 if (node == NULL)
624 return NULL;
625
626 node->token = currentToken;
627
628 if (!match(NAME, 0)) {
629 destroyNodeTree(node);
630 return NULL;
631 }
632
633 checkNameKeyword(node->token);
634 if (node->token->type == TOK_ERROR) {
635 node->token->type = NAME;
636 }
637
638 if(node->token->type == NAME_STRMLEN) {
639 streamLength = (uint32_t)strtoul((char *)currentToken->content, NULL, 10);
640 setStreamLength(streamLength);
641 }
642
643 return node;
644 }
645
646 /*
647 * ARRAY = [ ARRAYCONTENT ]
648 */
array()649 static PDFSyntaxNode *array() {
650 PDFSyntaxNode *node = getNewNode();
651 if (node == NULL)
652 return NULL;
653
654 PDFToken *token = newPDFToken();
655 if (token == NULL) {
656 destroyNodeTree(node);
657 return NULL;
658 }
659
660 token->type = ARRAY;
661 node->token = token;
662
663 if (!match(OPENARRAY, 1)) {
664 destroyNodeTree(node);
665 return NULL;
666 }
667
668 if (currentToken->type != CLOSEARRAY) {
669 node->child[0] = arraycontent();
670
671 if (node->child[0] == NULL) {
672 destroyNodeTree(node);
673 return NULL;
674 }
675 }
676
677 if (!match(CLOSEARRAY, 1)) {
678 destroyNodeTree(node);
679 return NULL;
680 }
681
682 return node;
683 }
684
685 /*
686 * DICTIONARY = << ENTRY >>
687 */
dictionary()688 static PDFSyntaxNode *dictionary() {
689 PDFSyntaxNode *node = getNewNode();
690 if (node == NULL)
691 return NULL;
692
693 PDFToken *token = newPDFToken();
694 if (token == NULL) {
695 destroyNodeTree(node);
696 return NULL;
697 }
698
699 token->type = DICTIONARY;
700 node->token = token;
701
702 if (!match(OPENDICT, 1)) {
703 destroyNodeTree(node);
704 return NULL;
705 }
706
707 if (currentToken->type != CLOSEDICT) {
708 node->child[0] = entry();
709 if (node->child[0] == NULL) {
710 destroyNodeTree(node);
711 return NULL;
712 }
713 }
714
715 if (!match(CLOSEDICT, 1)) {
716 destroyNodeTree(node);
717 return NULL;
718 }
719
720 return node;
721 }
722
723
entry_CheckReference(PDFSyntaxNode ** node)724 int entry_CheckReference(PDFSyntaxNode **node) {
725
726 uint32_t ret;
727 long int pos;
728 PDFSyntaxNode *temp, *temp2;
729
730 temp2 = *node;
731
732 if (temp2->child[1]->token->type != INTEGER)
733 return 1;
734
735 if (currentToken->type != INTEGER)
736 return 1;
737
738 temp2->child[2] = number();
739 if (temp2->child[2] == NULL) {
740 return 0;
741 }
742
743 if (currentToken->type != REF) {
744 foxLog(FATAL, "%s: Invalid Reference.\n", __func__);
745 return 0;
746 }
747
748 temp = temp2->child[1];
749 temp2->child[1] = reference();
750 if (temp2->child[1] == NULL) {
751 return 0;
752 }
753
754 temp2->child[1]->child[0] = temp;
755 temp2->child[1]->child[1] = temp2->child[2];
756 temp2->child[2] = NULL;
757
758 *node = temp2;
759
760 pos = lastposition;
761
762 ret = resolveIndirect(file, temp2);
763
764 if (ret == 0)
765 return 0;
766 else if (ret == 2)
767 return 1;
768
769 destroyPDFToken(currentToken);
770 currentToken = getNextToken(file);
771 temp2->child[2] = type();
772 if (temp2->child[2] == NULL) {
773 return 0;
774 }
775
776 if (fseek(file, pos, SEEK_SET) != 0) {
777 foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
778 return 0;
779 }
780
781 destroyPDFToken(currentToken);
782 lastposition = pos;
783 currentToken = getNextToken(file);
784 if (currentToken == NULL)
785 return 0;
786
787 return 1;
788 }
789
790 /*
791 * ENTRY = NAME TYPE | NAME TYPE ENTRY
792 */
entry()793 static PDFSyntaxNode *entry() {
794 PDFToken *token;
795 PDFStreamType strmtype = UNKNOWN;
796 DecodeParams *DParams;
797
798 PDFSyntaxNode *node = getNewNode();
799 if (node == NULL)
800 return NULL;
801
802 token = currentToken;
803
804 node->child[0] = name();
805 if (node->child[0] == NULL) {
806 destroyNodeTree(node);
807 return NULL;
808 }
809
810 switch (token->type) {
811 case NAME_JS:
812 strmtype = JAVASCRIPT;
813 foxLog(PDF_DEBUG, "%s: Javascript found!\n", __func__);
814 break;
815
816 case NAME_URI:
817 strmtype = URI;
818 foxLog(PDF_DEBUG, "%s: URI found!\n", __func__);
819 break;
820
821 case NAME_DECODEPARAMS:
822 strmtype = DECODEPARAMS;
823 foxLog(PDF_DEBUG, "%s: DecodeParams found!\n", __func__);
824 break;
825
826 case NAME_SUBTYPE:
827 strmtype = SUBTYPE;
828 break;
829
830 default:
831 break;
832 }
833
834
835 node->child[1] = type();
836 if (node->child[1] == NULL) {
837 destroyNodeTree(node);
838 return NULL;
839 }
840
841 if (node->child[0]->token->type == FILTER) {
842 filterList = node->child[1];
843 }
844
845 if (entry_CheckReference(&node) == 0) {
846 destroyNodeTree(node);
847 return NULL;
848 }
849
850 /*
851 *
852 * XXX STATE TRACKING EPILOGUE
853 * This is terrible. Fix it up.
854 *
855 */
856 if (strmtype == SUBTYPE) {
857 if (node->child[1]->token) {
858 if (node->child[1]->token->type == NAME_TRUETYPE ||
859 node->child[1]->token->type == NAME_OPENTYPE) {
860 Dig(node->child[1]->token->content, node->child[1]->token->length, TRUEOPENTYPE);
861 }
862 }
863 }
864 else if (strmtype == DECODEPARAMS) {
865 DParams = prepDecodeParams(node->child[1]);
866 if (DParams == NULL)
867 foxLog(NONFATAL, "%s: Could not set up decode params.\n", __func__);
868 else {
869 Dig((uint8_t *)DParams, sizeof(DecodeParams), DECODEPARAMS);
870 free(DParams);
871 }
872 }
873 else if (node->child[1]->token) {
874 //Feed string to Dig()
875 Dig(node->child[1]->token->content, node->child[1]->token->length, strmtype);
876 }
877
878 /*
879 *
880 * END STATE TRACKING EPILOGUE
881 *
882 */
883
884 if (currentToken->type != CLOSEDICT) {
885 node->sibling = entry();
886 if (node->sibling == NULL) {
887 destroyNodeTree(node);
888 return NULL;
889 }
890 }
891
892 return node;
893 }
894
895 /*
896 * STREAM = stream rawbytes endstream
897 */
stream()898 static PDFSyntaxNode *stream() {
899 PDFSyntaxNode *node = getNewNode();
900 if (node == NULL)
901 return NULL;
902
903 long int streamstart;
904 uint32_t streamLength;
905
906 if ((streamstart = ftell(file)) == -1) {
907 foxLog(FATAL, "%s: Can't figure out starting pos in file.\n", __func__);
908 destroyNodeTree(node);
909 return NULL;
910 }
911
912 streamLength = getStreamLength();
913
914 node->token = tokenizeStream(file, streamLength);
915 if (node->token == NULL) {
916 destroyNodeTree(node);
917 return NULL;
918 }
919
920 if (!match(STREAM, 1) || currentToken->type != ENDSTREAM) {
921
922 lastposition = streamstart;
923
924 streamLength = recoverStream(file, lastposition);
925 if (streamLength == 0) {
926 destroyNodeTree(node);
927 return NULL;
928 }
929 destroyPDFToken(node->token);
930 node->token = tokenizeStream(file, streamLength);
931 if (node->token == NULL) {
932 destroyNodeTree(node);
933 return NULL;
934 }
935
936 currentToken = getNextToken(file);
937 if (currentToken == NULL) {
938 destroyNodeTree(node);
939 return NULL;
940 }
941
942 }
943
944 setStreamLength(0);
945
946 //XXX DECODE STREAMS
947 if (filterList != NULL) {
948 streamDecode(node->token, filterList);
949 filterList = NULL;
950 }
951
952 if (!match(ENDSTREAM, 1)) {
953 destroyNodeTree(node);
954 return NULL;
955 }
956
957 return node;
958 }
959
960 /*
961 * NULLOBJ = nullobj
962 */
nullobj()963 static PDFSyntaxNode *nullobj() {
964 PDFSyntaxNode *node = getNewNode();
965 if (node == NULL)
966 return NULL;
967
968 node->token = currentToken;
969 if (!match(NULLOBJ, 0)) {
970 destroyNodeTree(node);
971 return NULL;
972 }
973
974 return node;
975 }
976
977 /*
978 * XREF = xref SUBSECTION
979 */
xref()980 static PDFSyntaxNode *xref() {
981 PDFSyntaxNode *node, *temp;
982
983 if (!match(XREF, 1)) {
984 return NULL;
985 }
986
987 node = subsection();
988 if (node == NULL) {
989 return NULL;
990 }
991
992 temp = node;
993 while (currentToken->type == INTEGER) {
994 temp->sibling = subsection();
995 if (temp->sibling == NULL) {
996 destroyNodeTree(node);
997 return NULL;
998 }
999 temp = temp->sibling;
1000 }
1001
1002 return node;
1003 }
1004
1005 /*
1006 * SUBSECTION = INTEGER INTEGER=numrows numrows*XREFTABLE |
1007 * INTEGER INTEGER=numrows numrows*XREFTABLE SUBSECTION
1008 */
subsection()1009 static PDFSyntaxNode *subsection() {
1010 PDFSyntaxNode *temp, *node;
1011 uint32_t numrows = 0;
1012 uint32_t i = 0;
1013 node = getNewNode();
1014 if (node == NULL)
1015 return NULL;
1016
1017 node->child[0] = integer();
1018 if (node->child[0] == NULL) {
1019 destroyNodeTree(node);
1020 return NULL;
1021 }
1022
1023 node->child[1] = integer();
1024 if (node->child[1] == NULL) {
1025 destroyNodeTree(node);
1026 return NULL;
1027 }
1028
1029 //extract value of node->child[1]->token->content
1030 numrows = (uint32_t)strtoul((char *)node->child[1]->token->content, NULL, 10);
1031
1032 node->child[2] = getNewNode();
1033 if (node->child[2] == NULL) {
1034 destroyNodeTree(node);
1035 return NULL;
1036 }
1037
1038 //Call xref table with that value
1039 temp = node->child[2];
1040 for (i = 0; i < numrows; i++) {
1041 temp->sibling = xreftable();
1042 if (temp->sibling == NULL) {
1043 destroyNodeTree(node);
1044 return NULL;
1045 }
1046 temp = temp->sibling;
1047 }
1048
1049 return node;
1050 }
1051
1052 /*
1053 * TRAILER = trailer DICTIONARY startxref INTEGER END | startxref INTEGER END
1054 */
trailer()1055 static PDFSyntaxNode *trailer() {
1056 PDFSyntaxNode *node = getNewNode();
1057 if (node == NULL)
1058 return NULL;
1059
1060 if (currentToken->type == TRAILER) {
1061 if(!match(TRAILER, 1)) {
1062 destroyNodeTree(node);
1063 return NULL;
1064 }
1065
1066 node->child[0] = dictionary();
1067 if (node->child[0] == NULL) {
1068 destroyNodeTree(node);
1069 return NULL;
1070 }
1071 }
1072
1073 if (!match(STARTXREF, 1)) {
1074 destroyNodeTree(node);
1075 return NULL;
1076 }
1077
1078 node->child[1] = integer();
1079 if (node->child[1] == NULL) {
1080 destroyNodeTree(node);
1081 return NULL;
1082 }
1083
1084 if (!match(END, 1)) {
1085 destroyNodeTree(node);
1086 return NULL;
1087 }
1088
1089 return node;
1090 }
1091
1092 /*
1093 * NUMBER = real | integer
1094 */
number()1095 static PDFSyntaxNode *number() {
1096 PDFSyntaxNode *node = getNewNode();
1097 if (node == NULL)
1098 return NULL;
1099
1100 node->token = currentToken;
1101
1102 if (currentToken->type == REAL) {
1103 if (!match(REAL, 0)) {
1104 destroyNodeTree(node);
1105 return NULL;
1106 }
1107 }
1108 else {
1109 if (!match(INTEGER, 0)) {
1110 destroyNodeTree(node);
1111 return NULL;
1112 }
1113 }
1114
1115 return node;
1116 }
1117
1118 /*
1119 * INTEGER = integer
1120 */
integer()1121 static PDFSyntaxNode *integer() {
1122 PDFSyntaxNode *node = getNewNode();
1123 if (node == NULL)
1124 return NULL;
1125
1126 node->token = currentToken;
1127
1128 if (!match(INTEGER, 0)) {
1129 destroyNodeTree(node);
1130 return NULL;
1131 }
1132
1133 return node;
1134 }
1135
1136 /*
1137 * XREFTABLE = INTEGER INTEGER IDENT (n|f)
1138 */
xreftable()1139 static PDFSyntaxNode *xreftable() {
1140 PDFSyntaxNode *node = getNewNode();
1141 if (node == NULL)
1142 return NULL;
1143
1144 node->child[0] = integer();
1145 if (node->child[0] == NULL) {
1146 destroyNodeTree(node);
1147 return NULL;
1148 }
1149
1150 node->child[1] = integer();
1151 if (node->child[1] == NULL) {
1152 destroyNodeTree(node);
1153 return NULL;
1154 }
1155
1156 if (!match(IDENT, 1)) {
1157 destroyNodeTree(node);
1158 return NULL;
1159 }
1160
1161 return node;
1162 }
1163
1164 /*
1165 * REFERENCE = ref
1166 */
reference()1167 static PDFSyntaxNode *reference() {
1168 PDFSyntaxNode *node = getNewNode();
1169 if (node == NULL)
1170 return NULL;
1171
1172 node->token = currentToken;
1173 if (!match(REF, 0)) {
1174 destroyNodeTree(node);
1175 return NULL;
1176 }
1177
1178 return node;
1179 }
1180
destroyNodeTree(PDFSyntaxNode * node)1181 static void destroyNodeTree(PDFSyntaxNode *node) {
1182 if (node == NULL)
1183 return;
1184 if (node->child[0] != NULL)
1185 destroyNodeTree(node->child[0]);
1186 if (node->child[1] != NULL)
1187 destroyNodeTree(node->child[1]);
1188 if (node->child[2] != NULL)
1189 destroyNodeTree(node->child[2]);
1190 if (node->sibling != NULL)
1191 destroyNodeTree(node->sibling);
1192 if (node->token != NULL)
1193 destroyPDFToken(node->token);
1194
1195 free(node);
1196
1197 }
1198