1 /* esl_json : JSON data file parsing 2 */ 3 #ifndef eslJSON_INCLUDED 4 #define eslJSON_INCLUDED 5 #include "esl_config.h" 6 7 #include "easel.h" 8 #include "esl_buffer.h" 9 #include "esl_random.h" 10 #include "esl_stack.h" 11 12 /* Each token (node) in the parse tree has a type. 13 */ 14 enum esl_json_type_e { 15 eslJSON_UNKNOWN = 0, 16 eslJSON_OBJECT = 1, 17 eslJSON_ARRAY = 2, 18 eslJSON_KEY = 3, 19 eslJSON_STRING = 4, 20 eslJSON_NUMBER = 5, 21 eslJSON_BOOLEAN = 6, // either "true" or "false" 22 eslJSON_NULL = 7 23 }; 24 25 /* The parser keeps a precise state in the JSON grammar, so it can 26 * stop and start at any point in a JSON string, enabling incremental 27 * parsing. These state types correspond to states in the JSON 28 * specification's grammar. 29 */ 30 enum esl_json_state_e { 31 eslJSON_OBJ_NONE = 0, 32 eslJSON_OBJ_OPEN = 1, 33 eslJSON_OBJ_COLON = 2, 34 eslJSON_OBJ_COMMA = 3, 35 eslJSON_ARR_OPEN = 4, 36 eslJSON_ARR_COMMA = 5, 37 eslJSON_STR_OPEN = 6, 38 eslJSON_STR_CHAR = 7, 39 eslJSON_STR_BACKSLASH = 8, 40 eslJSON_STR_PROTECTED = 9, 41 eslJSON_STR_UNICODE = 10, 42 eslJSON_KEY_OPEN = 11, 43 eslJSON_KEY_CHAR = 12, 44 eslJSON_KEY_BACKSLASH = 13, 45 eslJSON_KEY_PROTECTED = 14, 46 eslJSON_KEY_UNICODE = 15, 47 eslJSON_NUM_SIGN = 16, 48 eslJSON_NUM_ZERO = 17, 49 eslJSON_NUM_NONZERO = 18, 50 eslJSON_NUM_LEADDIGIT = 19, 51 eslJSON_NUM_POINT = 20, 52 eslJSON_NUM_FRACDIGIT = 21, 53 eslJSON_NUM_EXP = 22, 54 eslJSON_NUM_EXPSIGN = 23, 55 eslJSON_NUM_EXPDIGIT = 24, 56 eslJSON_VAL_TRUE = 25, 57 eslJSON_VAL_FALSE = 26, 58 eslJSON_VAL_NULL = 27, 59 eslJSON_VAL_INOBJ = 28, 60 eslJSON_VAL_INARR = 29, 61 eslJSON_STR_ASKEY = 30 62 }; 63 64 65 /* ESL_JSON_TOK 66 * A node in the parse tree. 67 * startpos, endpos are 0..n-1 in bytes in the input JSON string (ESL_BUFFER). 68 * 69 * Objects and arrays have >= 0 child nodes. To store arbitrarily 70 * multifurcating tree without arrays of children, an obj or arr 71 * node keeps index of first and last child, and the children are a 72 * linked list thru <nextsib>. Key:value pairs for objects are 73 * stored as sequential nodes in the list. 74 * 75 * Keys and values have 0 children, and the link fields (nextsib, 76 * firstchild, lastchild) are all -1. 77 * 78 * Links are indices in the tree's <tok> array, not pointers, so reallocation 79 * of <tok> array doesn't corrupt. 80 */ 81 typedef struct { 82 enum esl_json_type_e type; 83 esl_pos_t startpos; // byte 0..n-1 in the input. Strings do not include "". 84 esl_pos_t endpos; // (... for a zero-len string or key, endpos = startpos-1) 85 int nchild; // Object, array: number of children. Key, value: 0. 86 int firstchild; // -1, or (for obj, arr:) index of first child in tree's <tok> array 87 int lastchild; // ... ditto for last child 88 int nextsib; // Children are a linked list. <nextsib> is index in tree's <tok> array. 89 90 int linenum; // for user error reporting: what line number this token is on, 1.. 91 int linepos; // ... and what char position it starts at on that line, 1.. 92 } ESL_JSON_TOK; 93 94 95 /* ESL_JSON 96 * A parse tree. Root node (0) is an eslJSON_OBJECT. 97 */ 98 typedef struct { 99 ESL_JSON_TOK *tok; 100 int ntok; 101 int nalloc; // current allocation size 102 int redline; // if nalloc > redline, _Reuse() reallocates downward 103 } ESL_JSON; 104 105 /* ESL_JSON_PARSER 106 * Maintains precise state at each byte during (possibly incremental) parsing. 107 */ 108 typedef struct { 109 enum esl_json_state_e state; 110 ESL_STACK *pda; // push down stack of open internal obj|arr nodes on the parse tree 111 int curridx; // index of open (parse-in-progress) token in tree's <tok> array 112 int codelen; // how far we're into a unicode, "true", "false", "null". 113 esl_pos_t pos; // position in input JSON string 0..n-1 114 int linenum; // solely for informative error messages: what input line we're on, 1..N 115 int linepos; // ... and what char position we're on in that line, 1..L 116 } ESL_JSON_PARSER; 117 118 119 120 /* Full and incremental JSON parsing */ 121 extern int esl_json_Parse(ESL_BUFFER *bf, ESL_JSON **ret_pi); 122 extern int esl_json_PartialParse(ESL_JSON_PARSER *parser, ESL_JSON *pi, const char *s, esl_pos_t n, esl_pos_t *ret_nused, char *errbuf); 123 124 /* ESL_JSON */ 125 extern ESL_JSON *esl_json_Create (void); 126 extern int esl_json_Grow (ESL_JSON *pi); 127 extern size_t esl_json_Sizeof (ESL_JSON *pi); 128 extern size_t esl_json_MinSizeof(ESL_JSON *pi); 129 extern int esl_json_Reuse (ESL_JSON *pi); 130 extern void esl_json_Destroy (ESL_JSON *pi); 131 132 /* ESL_JSON_PARSER */ 133 extern ESL_JSON_PARSER *esl_json_parser_Create(void); 134 extern void esl_json_parser_Destroy(ESL_JSON_PARSER *parser); 135 136 /* Accessing tokenized data */ 137 extern char *esl_json_GetMem (const ESL_JSON *pi, int idx, const ESL_BUFFER *bf); 138 extern esl_pos_t esl_json_GetLen (const ESL_JSON *pi, int idx, const ESL_BUFFER *bf); 139 extern int esl_json_ReadInt (const ESL_JSON *pi, int idx, ESL_BUFFER *bf, int *ret_i); 140 extern int esl_json_ReadFloat(const ESL_JSON *pi, int idx, ESL_BUFFER *bf, float *ret_x); 141 142 /* Debugging, development */ 143 extern int esl_json_Validate(const ESL_JSON *pi, const ESL_BUFFER *bf, char *errbuf); 144 extern char *esl_json_DecodeType(enum esl_json_type_e type); 145 extern int esl_json_Dump(FILE *fp, ESL_JSON *pi); 146 extern int esl_json_SampleDirty(ESL_RANDOMNESS *rng, char **ret_s, int *ret_n); 147 148 149 #endif /* eslJSON_INCLUDED */ 150