1 #ifndef PDF_FOX_TOKEN_H 2 #define PDF_FOX_TOKEN_H 3 4 #include <ctype.h> 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdint.h> 8 #include <stdbool.h> 9 10 #define BUF_IDENT_SIZE 100 11 12 13 /**Defines each token's type. 14 * Each token has its own integer value 15 */ 16 typedef enum {TOK_ERROR, ARRAY, BINHEAD, COMMENT, CLOSEANGLE, CLOSEARRAY, CLOSEDICT, CLOSEPAREN, DECIMAL, DICTIONARY, END, ENDOBJ, ENDSTREAM, IDENT, MINUS, NAME, NAME_STRMLEN, FILTER, FILTER_HEXDECODE, FILTER_85DECODE, FILTER_LZWDECODE, FILTER_FLATEDECODE, FILTER_RLEDECODE, FILTER_CCITTDECODE, FILTER_JBIG2DECODE, FILTER_DCTDECODE, FILTER_JPXDECODE, FILTER_CRYPTDECODE, NEWLINE, HEXSTRING, INTEGER, LITSTRING, REAL, NULLOBJ, OBJ, OPENANGLE, OPENARRAY, OPENDICT, OPENPAREN, PDFVERS, PLUS, REF, STARTXREF, STREAM, STREAMCONTENT, TRAILER, XREF, TOK_TRUE, TOK_FALSE, ENDOFFILE, NAME_SUBTYPE, NAME_S, NAME_JAVASCRIPT, NAME_JS, NAME_TRUETYPE, NAME_OPENTYPE, NAME_DECODEPARAMS, NAME_URI, NAME_DCPRMS_COLUMNS, NAME_DCPRMS_BPC, NAME_DCPRMS_COLORS, NAME_DCPRMS_PREDICTOR } PDFTokenType; 17 18 /**Used by the lexical analyzer to track state. 19 * States are generally active when tokenizing multi-character strings. 20 */ 21 typedef enum {START, INCOMMENT, INANGLE, INCLOSEANGLE, INIDENT, INNAME, INNUMBER, INREAL} PDFTokenizeState; 22 23 /**The main definition of the PDFToken type. 24 * Contains the type of token as well as optional fields 25 * for the particular set of characters attributed to that token 26 * (for token types that are resolved from a pattern rather than 27 * a specific sequence of characters). 28 */ 29 typedef struct _PDFToken { 30 PDFTokenType type; 31 uint32_t length; 32 uint8_t *content; 33 } PDFToken; 34 35 /**Utility functions for tokenizing. 36 * Used by the Parser. 37 */ 38 extern PDFToken *getNextToken(FILE *file); 39 extern PDFToken *newPDFToken(); 40 extern void checkNameKeyword(PDFToken *token); 41 extern void destroyPDFToken(PDFToken *token); 42 extern PDFToken *tokenizeStream(FILE *file, uint32_t length); 43 extern PDFToken *tokenizeHexString(FILE *file); 44 extern PDFToken *tokenizeLitString(FILE *file); 45 extern bool tokenizeRef(FILE *file); 46 extern bool isWhitespace(char c); 47 48 #endif 49