1 #ifndef PDF_FOX_TOKEN_H
2 #define PDF_FOX_TOKEN_H
3 
4 #include <ctype.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdint.h>
8 #include <stdbool.h>
9 
10 #define BUF_IDENT_SIZE 100
11 
12 
13 /**Defines each token's type.
14  * Each token has its own integer value
15  */
16 typedef enum {TOK_ERROR, ARRAY, BINHEAD, COMMENT, CLOSEANGLE, CLOSEARRAY, CLOSEDICT, CLOSEPAREN, DECIMAL, DICTIONARY, END, ENDOBJ, ENDSTREAM, IDENT, MINUS, NAME, NAME_STRMLEN, FILTER, FILTER_HEXDECODE, FILTER_85DECODE, FILTER_LZWDECODE, FILTER_FLATEDECODE, FILTER_RLEDECODE, FILTER_CCITTDECODE, FILTER_JBIG2DECODE, FILTER_DCTDECODE, FILTER_JPXDECODE, FILTER_CRYPTDECODE, NEWLINE, HEXSTRING, INTEGER, LITSTRING, REAL, NULLOBJ, OBJ, OPENANGLE, OPENARRAY, OPENDICT, OPENPAREN, PDFVERS, PLUS, REF, STARTXREF, STREAM, STREAMCONTENT, TRAILER, XREF, TOK_TRUE, TOK_FALSE, ENDOFFILE, NAME_SUBTYPE, NAME_S, NAME_JAVASCRIPT, NAME_JS, NAME_TRUETYPE, NAME_OPENTYPE, NAME_DECODEPARAMS, NAME_URI, NAME_DCPRMS_COLUMNS, NAME_DCPRMS_BPC, NAME_DCPRMS_COLORS, NAME_DCPRMS_PREDICTOR } PDFTokenType;
17 
18 /**Used by the lexical analyzer to track state.
19  * States are generally active when tokenizing multi-character strings.
20  */
21 typedef enum {START, INCOMMENT, INANGLE, INCLOSEANGLE, INIDENT, INNAME, INNUMBER, INREAL} PDFTokenizeState;
22 
23 /**The main definition of the PDFToken type.
24  * Contains the type of token as well as optional fields
25  * for the particular set of characters attributed to that token
26  * (for token types that are resolved from a pattern rather than
27  * a specific sequence of characters).
28  */
29 typedef struct _PDFToken {
30     PDFTokenType type;
31     uint32_t length;
32     uint8_t *content;
33 } PDFToken;
34 
35 /**Utility functions for tokenizing.
36  * Used by the Parser.
37  */
38 extern PDFToken *getNextToken(FILE *file);
39 extern PDFToken *newPDFToken();
40 extern void checkNameKeyword(PDFToken *token);
41 extern void destroyPDFToken(PDFToken *token);
42 extern PDFToken *tokenizeStream(FILE *file, uint32_t length);
43 extern PDFToken *tokenizeHexString(FILE *file);
44 extern PDFToken *tokenizeLitString(FILE *file);
45 extern bool tokenizeRef(FILE *file);
46 extern bool isWhitespace(char c);
47 
48 #endif
49