1 /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /** \defgroup lejp JSON parser
26  * ##JSON parsing related functions
27  * \ingroup lwsapi
28  *
29  * LEJP is an extremely lightweight JSON stream parser included in lws.
30  */
31 //@{
32 struct lejp_ctx;
33 
34 #if !defined(LWS_ARRAY_SIZE)
35 #define LWS_ARRAY_SIZE(_x) (sizeof(_x) / sizeof(_x[0]))
36 #endif
37 #define LEJP_FLAG_WS_KEEP 64
38 #define LEJP_FLAG_WS_COMMENTLINE 32
39 
40 enum lejp_states {
41 	LEJP_IDLE = 0,
42 	LEJP_MEMBERS = 1,
43 	LEJP_M_P = 2,
44 	LEJP_MP_STRING = LEJP_FLAG_WS_KEEP | 3,
45 	LEJP_MP_STRING_ESC = LEJP_FLAG_WS_KEEP | 4,
46 	LEJP_MP_STRING_ESC_U1 = LEJP_FLAG_WS_KEEP | 5,
47 	LEJP_MP_STRING_ESC_U2 = LEJP_FLAG_WS_KEEP | 6,
48 	LEJP_MP_STRING_ESC_U3 = LEJP_FLAG_WS_KEEP | 7,
49 	LEJP_MP_STRING_ESC_U4 = LEJP_FLAG_WS_KEEP | 8,
50 	LEJP_MP_DELIM = 9,
51 	LEJP_MP_VALUE = 10,
52 	LEJP_MP_VALUE_NUM_INT = LEJP_FLAG_WS_KEEP | 11,
53 	LEJP_MP_VALUE_NUM_EXP = LEJP_FLAG_WS_KEEP | 12,
54 	LEJP_MP_VALUE_TOK = LEJP_FLAG_WS_KEEP | 13,
55 	LEJP_MP_COMMA_OR_END = 14,
56 	LEJP_MP_ARRAY_END = 15,
57 };
58 
59 enum lejp_reasons {
60 	LEJP_CONTINUE = -1,
61 	LEJP_REJECT_IDLE_NO_BRACE = -2,
62 	LEJP_REJECT_MEMBERS_NO_CLOSE = -3,
63 	LEJP_REJECT_MP_NO_OPEN_QUOTE = -4,
64 	LEJP_REJECT_MP_STRING_UNDERRUN = -5,
65 	LEJP_REJECT_MP_ILLEGAL_CTRL = -6,
66 	LEJP_REJECT_MP_STRING_ESC_ILLEGAL_ESC = -7,
67 	LEJP_REJECT_ILLEGAL_HEX = -8,
68 	LEJP_REJECT_MP_DELIM_MISSING_COLON = -9,
69 	LEJP_REJECT_MP_DELIM_BAD_VALUE_START = -10,
70 	LEJP_REJECT_MP_VAL_NUM_INT_NO_FRAC = -11,
71 	LEJP_REJECT_MP_VAL_NUM_FORMAT = -12,
72 	LEJP_REJECT_MP_VAL_NUM_EXP_BAD_EXP = -13,
73 	LEJP_REJECT_MP_VAL_TOK_UNKNOWN = -14,
74 	LEJP_REJECT_MP_C_OR_E_UNDERF = -15,
75 	LEJP_REJECT_MP_C_OR_E_NOTARRAY = -16,
76 	LEJP_REJECT_MP_ARRAY_END_MISSING = -17,
77 	LEJP_REJECT_STACK_OVERFLOW = -18,
78 	LEJP_REJECT_MP_DELIM_ISTACK = -19,
79 	LEJP_REJECT_NUM_TOO_LONG = -20,
80 	LEJP_REJECT_MP_C_OR_E_NEITHER = -21,
81 	LEJP_REJECT_UNKNOWN = -22,
82 	LEJP_REJECT_CALLBACK = -23
83 };
84 
85 #define LEJP_FLAG_CB_IS_VALUE 64
86 
87 enum lejp_callbacks {
88 	LEJPCB_CONSTRUCTED	= 0,
89 	LEJPCB_DESTRUCTED	= 1,
90 
91 	LEJPCB_START		= 2,
92 	LEJPCB_COMPLETE		= 3,
93 	LEJPCB_FAILED		= 4,
94 
95 	LEJPCB_PAIR_NAME	= 5,
96 
97 	LEJPCB_VAL_TRUE		= LEJP_FLAG_CB_IS_VALUE | 6,
98 	LEJPCB_VAL_FALSE	= LEJP_FLAG_CB_IS_VALUE | 7,
99 	LEJPCB_VAL_NULL		= LEJP_FLAG_CB_IS_VALUE | 8,
100 	LEJPCB_VAL_NUM_INT	= LEJP_FLAG_CB_IS_VALUE | 9,
101 	LEJPCB_VAL_NUM_FLOAT	= LEJP_FLAG_CB_IS_VALUE | 10,
102 	LEJPCB_VAL_STR_START	= 11, /* notice handle separately */
103 	LEJPCB_VAL_STR_CHUNK	= LEJP_FLAG_CB_IS_VALUE | 12,
104 	LEJPCB_VAL_STR_END	= LEJP_FLAG_CB_IS_VALUE | 13,
105 
106 	LEJPCB_ARRAY_START	= 14,
107 	LEJPCB_ARRAY_END	= 15,
108 
109 	LEJPCB_OBJECT_START	= 16,
110 	LEJPCB_OBJECT_END	= 17,
111 };
112 
113 /**
114  * _lejp_callback() - User parser actions
115  * \param ctx:	LEJP context
116  * \param reason:	Callback reason
117  *
118  *	Your user callback is associated with the context at construction time,
119  *	and receives calls as the parsing progresses.
120  *
121  *	All of the callbacks may be ignored and just return 0.
122  *
123  *	The reasons it might get called, found in @reason, are:
124  *
125  *  LEJPCB_CONSTRUCTED:  The context was just constructed... you might want to
126  *		perform one-time allocation for the life of the context.
127  *
128  *  LEJPCB_DESTRUCTED:	The context is being destructed... if you made any
129  *		allocations at construction-time, you can free them now
130  *
131  *  LEJPCB_START:	Parsing is beginning at the first byte of input
132  *
133  *  LEJPCB_COMPLETE:	Parsing has completed successfully.  You'll get a 0 or
134  *			positive return code from lejp_parse indicating the
135  *			amount of unused bytes left in the input buffer
136  *
137  *  LEJPCB_FAILED:	Parsing failed.  You'll get a negative error code
138  *  			returned from lejp_parse
139  *
140  *  LEJPCB_PAIR_NAME:	When a "name":"value" pair has had the name parsed,
141  *			this callback occurs.  You can find the new name at
142  *			the end of ctx->path[]
143  *
144  *  LEJPCB_VAL_TRUE:	The "true" value appeared
145  *
146  *  LEJPCB_VAL_FALSE:	The "false" value appeared
147  *
148  *  LEJPCB_VAL_NULL:	The "null" value appeared
149  *
150  *  LEJPCB_VAL_NUM_INT:	A string representing an integer is in ctx->buf
151  *
152  *  LEJPCB_VAL_NUM_FLOAT: A string representing a float is in ctx->buf
153  *
154  *  LEJPCB_VAL_STR_START: We are starting to parse a string, no data yet
155  *
156  *  LEJPCB_VAL_STR_CHUNK: We filled the string buffer in the ctx, but it's not
157  *			  the end of the string.  We produce this to spill the
158  *			  intermediate buffer to the user code, so we can handle
159  *			  huge JSON strings using only the small buffer in the
160  *			  ctx.  If the whole JSON string fits in the ctx buffer,
161  *			  you won't get these callbacks.
162  *
163  *  LEJPCB_VAL_STR_END:	String parsing has completed, the last chunk of the
164  *			string is in ctx->buf.
165  *
166  *  LEJPCB_ARRAY_START:	An array started
167  *
168  *  LEJPCB_ARRAY_END:	An array ended
169  *
170  *  LEJPCB_OBJECT_START: An object started
171  *
172  *  LEJPCB_OBJECT_END:	An object ended
173  */
174 LWS_EXTERN signed char _lejp_callback(struct lejp_ctx *ctx, char reason);
175 
176 typedef signed char (*lejp_callback)(struct lejp_ctx *ctx, char reason);
177 
178 #ifndef LEJP_MAX_PARSING_STACK_DEPTH
179 #define LEJP_MAX_PARSING_STACK_DEPTH 5
180 #endif
181 #ifndef LEJP_MAX_DEPTH
182 #define LEJP_MAX_DEPTH 12
183 #endif
184 #ifndef LEJP_MAX_INDEX_DEPTH
185 #define LEJP_MAX_INDEX_DEPTH 8
186 #endif
187 #ifndef LEJP_MAX_PATH
188 #define LEJP_MAX_PATH 128
189 #endif
190 #ifndef LEJP_STRING_CHUNK
191 /* must be >= 30 to assemble floats */
192 #define LEJP_STRING_CHUNK 254
193 #endif
194 
195 enum num_flags {
196 	LEJP_SEEN_MINUS = (1 << 0),
197 	LEJP_SEEN_POINT = (1 << 1),
198 	LEJP_SEEN_POST_POINT = (1 << 2),
199 	LEJP_SEEN_EXP = (1 << 3)
200 };
201 
202 struct _lejp_stack {
203 	char s; /* lejp_state stack*/
204 	char p;	/* path length */
205 	char i; /* index array length */
206 	char b; /* user bitfield */
207 };
208 
209 struct _lejp_parsing_stack {
210 	void *user;	/* private to the stack level */
211 	signed char (*callback)(struct lejp_ctx *ctx, char reason);
212 	const char * const *paths;
213 	uint8_t count_paths;
214 	uint8_t ppos;
215 	uint8_t path_match;
216 };
217 
218 struct lejp_ctx {
219 
220 	/* sorted by type for most compact alignment
221 	 *
222 	 * pointers
223 	 */
224 	void *user;
225 
226 	/* arrays */
227 
228 	struct _lejp_parsing_stack pst[LEJP_MAX_PARSING_STACK_DEPTH];
229 	struct _lejp_stack st[LEJP_MAX_DEPTH];
230 	uint16_t i[LEJP_MAX_INDEX_DEPTH]; /* index array */
231 	uint16_t wild[LEJP_MAX_INDEX_DEPTH]; /* index array */
232 	char path[LEJP_MAX_PATH];
233 	char buf[LEJP_STRING_CHUNK + 1];
234 
235 	/* size_t */
236 
237 	size_t path_stride; /* 0 means default ptr size, else stride */
238 
239 	/* int */
240 
241 	uint32_t line;
242 
243 	/* short */
244 
245 	uint16_t uni;
246 
247 	/* char */
248 
249 	uint8_t npos;
250 	uint8_t dcount;
251 	uint8_t f;
252 	uint8_t sp; /* stack head */
253 	uint8_t ipos; /* index stack depth */
254 	uint8_t count_paths;
255 	uint8_t path_match;
256 	uint8_t path_match_len;
257 	uint8_t wildcount;
258 	uint8_t pst_sp; /* parsing stack head */
259 	uint8_t outer_array;
260 };
261 
262 LWS_VISIBLE LWS_EXTERN void
263 lejp_construct(struct lejp_ctx *ctx,
264 	       signed char (*callback)(struct lejp_ctx *ctx, char reason),
265 	       void *user, const char * const *paths, unsigned char paths_count);
266 
267 LWS_VISIBLE LWS_EXTERN void
268 lejp_destruct(struct lejp_ctx *ctx);
269 
270 LWS_VISIBLE LWS_EXTERN int
271 lejp_parse(struct lejp_ctx *ctx, const unsigned char *json, int len);
272 
273 LWS_VISIBLE LWS_EXTERN void
274 lejp_change_callback(struct lejp_ctx *ctx,
275 		     signed char (*callback)(struct lejp_ctx *ctx, char reason));
276 
277 /*
278  * push the current paths / paths_count and lejp_cb to a stack in the ctx, and
279  * start using the new ones
280  */
281 LWS_VISIBLE LWS_EXTERN int
282 lejp_parser_push(struct lejp_ctx *ctx, void *user, const char * const *paths,
283 		 unsigned char paths_count, lejp_callback lejp_cb);
284 
285 /*
286  * pop the previously used paths / paths_count and lejp_cb, and continue
287  * parsing using those as before
288  */
289 LWS_VISIBLE LWS_EXTERN int
290 lejp_parser_pop(struct lejp_ctx *ctx);
291 
292 /* exported for use when reevaluating a path for use with a subcontext */
293 LWS_VISIBLE LWS_EXTERN void
294 lejp_check_path_match(struct lejp_ctx *ctx);
295 
296 LWS_VISIBLE LWS_EXTERN int
297 lejp_get_wildcard(struct lejp_ctx *ctx, int wildcard, char *dest, int len);
298 
299 LWS_VISIBLE LWS_EXTERN const char *
300 lejp_error_to_string(int e);
301 //@}
302