1 /* esl_json : JSON data file parsing
2  */
3 #ifndef eslJSON_INCLUDED
4 #define eslJSON_INCLUDED
5 #include "esl_config.h"
6 
7 #include "easel.h"
8 #include "esl_buffer.h"
9 #include "esl_random.h"
10 #include "esl_stack.h"
11 
12 /* Each token (node) in the parse tree has a type.
13  */
14 enum esl_json_type_e {
15   eslJSON_UNKNOWN = 0,
16   eslJSON_OBJECT  = 1,
17   eslJSON_ARRAY   = 2,
18   eslJSON_KEY     = 3,
19   eslJSON_STRING  = 4,
20   eslJSON_NUMBER  = 5,
21   eslJSON_BOOLEAN = 6,  // either "true" or "false"
22   eslJSON_NULL    = 7
23 };
24 
25 /* The parser keeps a precise state in the JSON grammar, so it can
26  * stop and start at any point in a JSON string, enabling incremental
27  * parsing. These state types correspond to states in the JSON
28  * specification's grammar.
29  */
30 enum esl_json_state_e {
31   eslJSON_OBJ_NONE      = 0,
32   eslJSON_OBJ_OPEN      = 1,
33   eslJSON_OBJ_COLON     = 2,
34   eslJSON_OBJ_COMMA     = 3,
35   eslJSON_ARR_OPEN      = 4,
36   eslJSON_ARR_COMMA     = 5,
37   eslJSON_STR_OPEN      = 6,
38   eslJSON_STR_CHAR      = 7,
39   eslJSON_STR_BACKSLASH = 8,
40   eslJSON_STR_PROTECTED = 9,
41   eslJSON_STR_UNICODE   = 10,
42   eslJSON_KEY_OPEN      = 11,
43   eslJSON_KEY_CHAR      = 12,
44   eslJSON_KEY_BACKSLASH = 13,
45   eslJSON_KEY_PROTECTED = 14,
46   eslJSON_KEY_UNICODE   = 15,
47   eslJSON_NUM_SIGN      = 16,
48   eslJSON_NUM_ZERO      = 17,
49   eslJSON_NUM_NONZERO   = 18,
50   eslJSON_NUM_LEADDIGIT = 19,
51   eslJSON_NUM_POINT     = 20,
52   eslJSON_NUM_FRACDIGIT = 21,
53   eslJSON_NUM_EXP       = 22,
54   eslJSON_NUM_EXPSIGN   = 23,
55   eslJSON_NUM_EXPDIGIT  = 24,
56   eslJSON_VAL_TRUE      = 25,
57   eslJSON_VAL_FALSE     = 26,
58   eslJSON_VAL_NULL      = 27,
59   eslJSON_VAL_INOBJ     = 28,
60   eslJSON_VAL_INARR     = 29,
61   eslJSON_STR_ASKEY     = 30
62 };
63 
64 
65 /* ESL_JSON_TOK
66  * A node in the parse tree.
67  *   startpos, endpos are 0..n-1 in bytes in the input JSON string (ESL_BUFFER).
68  *
69  *   Objects and arrays have >= 0 child nodes.  To store arbitrarily
70  *   multifurcating tree without arrays of children, an obj or arr
71  *   node keeps index of first and last child, and the children are a
72  *   linked list thru <nextsib>. Key:value pairs for objects are
73  *   stored as sequential nodes in the list.
74  *
75  *   Keys and values have 0 children, and the link fields (nextsib,
76  *   firstchild, lastchild) are all -1.
77  *
78  *   Links are indices in the tree's <tok> array, not pointers, so reallocation
79  *   of <tok> array doesn't corrupt.
80  */
81 typedef struct {
82   enum esl_json_type_e type;
83   esl_pos_t startpos;    // byte 0..n-1 in the input. Strings do not include "".
84   esl_pos_t endpos;      //   (... for a zero-len string or key, endpos = startpos-1)
85   int       nchild;      // Object, array: number of children. Key, value: 0.
86   int       firstchild;  // -1, or (for obj, arr:) index of first child in tree's <tok> array
87   int       lastchild;   //  ... ditto for last child
88   int       nextsib;     // Children are a linked list. <nextsib> is index in tree's <tok> array.
89 
90   int       linenum;     // for user error reporting: what line number this token is on, 1..
91   int       linepos;     //   ... and what char position it starts at on that line, 1..
92 } ESL_JSON_TOK;
93 
94 
95 /* ESL_JSON
96  * A parse tree. Root node (0) is an eslJSON_OBJECT.
97  */
98 typedef struct {
99   ESL_JSON_TOK *tok;
100   int ntok;
101   int nalloc;          // current allocation size
102   int redline;         // if nalloc > redline, _Reuse() reallocates downward
103 } ESL_JSON;
104 
105 /* ESL_JSON_PARSER
106  * Maintains precise state at each byte during (possibly incremental) parsing.
107  */
108 typedef struct {
109   enum esl_json_state_e state;
110   ESL_STACK *pda;        // push down stack of open internal obj|arr nodes on the parse tree
111   int        curridx;    // index of open (parse-in-progress) token in tree's <tok> array
112   int        codelen;    // how far we're into a unicode, "true", "false", "null".
113   esl_pos_t  pos;        // position in input JSON string 0..n-1
114   int        linenum;    // solely for informative error messages: what input line we're on, 1..N
115   int        linepos;    //  ... and what char position we're on in that line, 1..L
116 } ESL_JSON_PARSER;
117 
118 
119 
120 /* Full and incremental JSON parsing */
121 extern int esl_json_Parse(ESL_BUFFER *bf, ESL_JSON **ret_pi);
122 extern int esl_json_PartialParse(ESL_JSON_PARSER *parser, ESL_JSON *pi, const char *s, esl_pos_t n, esl_pos_t *ret_nused, char *errbuf);
123 
124 /* ESL_JSON */
125 extern ESL_JSON *esl_json_Create   (void);
126 extern int       esl_json_Grow     (ESL_JSON *pi);
127 extern size_t    esl_json_Sizeof   (ESL_JSON *pi);
128 extern size_t    esl_json_MinSizeof(ESL_JSON *pi);
129 extern int       esl_json_Reuse    (ESL_JSON *pi);
130 extern void      esl_json_Destroy  (ESL_JSON *pi);
131 
132 /* ESL_JSON_PARSER */
133 extern ESL_JSON_PARSER *esl_json_parser_Create(void);
134 extern void             esl_json_parser_Destroy(ESL_JSON_PARSER *parser);
135 
136 /* Accessing tokenized data */
137 extern char      *esl_json_GetMem   (const ESL_JSON *pi, int idx, const ESL_BUFFER *bf);
138 extern esl_pos_t  esl_json_GetLen   (const ESL_JSON *pi, int idx, const ESL_BUFFER *bf);
139 extern int        esl_json_ReadInt  (const ESL_JSON *pi, int idx,       ESL_BUFFER *bf, int   *ret_i);
140 extern int        esl_json_ReadFloat(const ESL_JSON *pi, int idx,       ESL_BUFFER *bf, float *ret_x);
141 
142 /* Debugging, development */
143 extern int   esl_json_Validate(const ESL_JSON *pi, const ESL_BUFFER *bf, char *errbuf);
144 extern char *esl_json_DecodeType(enum esl_json_type_e type);
145 extern int   esl_json_Dump(FILE *fp, ESL_JSON *pi);
146 extern int   esl_json_SampleDirty(ESL_RANDOMNESS *rng, char **ret_s, int *ret_n);
147 
148 
149 #endif /* eslJSON_INCLUDED */
150