1 /*
2 * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include <stdlib.h>
18 #include <limits.h>
19 #include <errno.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <assert.h>
24 #include <math.h>
25
26 #include "yajl_parse.h"
27 #include "yajl_lex.h"
28 #include "yajl_parser.h"
29 #include "yajl_encode.h"
30 #include "yajl_bytestack.h"
31
32 #define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10))
33
34 /* same semantics as strtol */
35 longlong
yajl_parse_integer(const unsigned char * number,unsigned int length)36 yajl_parse_integer(const unsigned char *number, unsigned int length)
37 {
38 longlong ret = 0;
39 long sign = 1;
40 const unsigned char *pos = number;
41 if (*pos == '-') { pos++; sign = -1; }
42 if (*pos == '+') { pos++; }
43
44 while (pos < number + length) {
45 if ( ret > MAX_VALUE_TO_MULTIPLY ) {
46 errno = ERANGE;
47 return sign == 1 ? LLONG_MAX : LLONG_MIN;
48 }
49 ret *= 10;
50 if (LLONG_MAX - ret < (*pos - '0')) {
51 errno = ERANGE;
52 return sign == 1 ? LLONG_MAX : LLONG_MIN;
53 }
54 if (*pos < '0' || *pos > '9') {
55 errno = ERANGE;
56 return sign == 1 ? LLONG_MAX : LLONG_MIN;
57 }
58 ret += (*pos++ - '0');
59 }
60
61 return sign * ret;
62 }
63
64 unsigned char *
yajl_render_error_string(yajl_handle hand,const unsigned char * jsonText,size_t jsonTextLen,int verbose)65 yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
66 size_t jsonTextLen, int verbose)
67 {
68 size_t offset = hand->bytesConsumed;
69 unsigned char * str;
70 const char * errorType = NULL;
71 const char * errorText = NULL;
72 char text[72];
73 const char * arrow = " (right here) ------^\n";
74
75 if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
76 errorType = "parse";
77 errorText = hand->parseError;
78 } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
79 errorType = "lexical";
80 errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
81 } else {
82 errorType = "unknown";
83 }
84
85 {
86 size_t memneeded = 0;
87 memneeded += strlen(errorType);
88 memneeded += strlen(" error");
89 if (errorText != NULL) {
90 memneeded += strlen(": ");
91 memneeded += strlen(errorText);
92 }
93 str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
94 if (!str) return NULL;
95 str[0] = 0;
96 strcat((char *) str, errorType);
97 strcat((char *) str, " error");
98 if (errorText != NULL) {
99 strcat((char *) str, ": ");
100 strcat((char *) str, errorText);
101 }
102 strcat((char *) str, "\n");
103 }
104
105 /* now we append as many spaces as needed to make sure the error
106 * falls at char 41, if verbose was specified */
107 if (verbose) {
108 size_t start, end, i;
109 size_t spacesNeeded;
110
111 spacesNeeded = (offset < 30 ? 40 - offset : 10);
112 start = (offset >= 30 ? offset - 30 : 0);
113 end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
114
115 for (i=0;i<spacesNeeded;i++) text[i] = ' ';
116
117 for (;start < end;start++, i++) {
118 if (jsonText[start] != '\n' && jsonText[start] != '\r')
119 {
120 text[i] = jsonText[start];
121 }
122 else
123 {
124 text[i] = ' ';
125 }
126 }
127 assert(i <= 71);
128 text[i++] = '\n';
129 text[i] = 0;
130 {
131 char * newStr = (char *)
132 YA_MALLOC(&(hand->alloc), (unsigned int)(strlen((char *) str) +
133 strlen((char *) text) +
134 strlen(arrow) + 1));
135 if (newStr) {
136 newStr[0] = 0;
137 strcat((char *) newStr, (char *) str);
138 strcat((char *) newStr, text);
139 strcat((char *) newStr, arrow);
140 }
141 YA_FREE(&(hand->alloc), str);
142 str = (unsigned char *) newStr;
143 }
144 }
145 return str;
146 }
147
148 /* check for client cancelation */
149 #define _CC_CHK(x) \
150 if (!(x)) { \
151 yajl_bs_set(hand->stateStack, yajl_state_parse_error); \
152 hand->parseError = \
153 "client cancelled parse via callback return value"; \
154 return yajl_status_client_canceled; \
155 }
156
157
158 yajl_status
yajl_do_finish(yajl_handle hand)159 yajl_do_finish(yajl_handle hand)
160 {
161 yajl_status stat;
162 stat = yajl_do_parse(hand,(const unsigned char *) " ",1);
163
164 if (stat != yajl_status_ok) return stat;
165
166 switch(yajl_bs_current(hand->stateStack))
167 {
168 case yajl_state_parse_error:
169 case yajl_state_lexical_error:
170 return yajl_status_error;
171 case yajl_state_got_value:
172 case yajl_state_parse_complete:
173 return yajl_status_ok;
174 default:
175 if (!(hand->flags & yajl_allow_partial_values))
176 {
177 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
178 hand->parseError = "premature EOF";
179 return yajl_status_error;
180 }
181 return yajl_status_ok;
182 }
183 }
184
185 yajl_status
yajl_do_parse(yajl_handle hand,const unsigned char * jsonText,size_t jsonTextLen)186 yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
187 size_t jsonTextLen)
188 {
189 yajl_tok tok;
190 const unsigned char * buf;
191 size_t bufLen;
192 size_t * offset = &(hand->bytesConsumed);
193
194 *offset = 0;
195
196 around_again:
197 switch (yajl_bs_current(hand->stateStack)) {
198 case yajl_state_parse_complete:
199 if (hand->flags & yajl_allow_multiple_values) {
200 yajl_bs_set(hand->stateStack, yajl_state_got_value);
201 goto around_again;
202 }
203 if (!(hand->flags & yajl_allow_trailing_garbage)) {
204 if (*offset != jsonTextLen) {
205 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
206 offset, &buf, &bufLen);
207 if (tok != yajl_tok_eof) {
208 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
209 hand->parseError = "trailing garbage";
210 }
211 goto around_again;
212 }
213 }
214 return yajl_status_ok;
215 case yajl_state_lexical_error:
216 case yajl_state_parse_error:
217 return yajl_status_error;
218 case yajl_state_start:
219 case yajl_state_got_value:
220 case yajl_state_map_need_val:
221 case yajl_state_array_need_val:
222 case yajl_state_array_start: {
223 /* for arrays and maps, we advance the state for this
224 * depth, then push the state of the next depth.
225 * If an error occurs during the parsing of the nesting
226 * enitity, the state at this level will not matter.
227 * a state that needs pushing will be anything other
228 * than state_start */
229
230 yajl_state stateToPush = yajl_state_start;
231
232 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
233 offset, &buf, &bufLen);
234
235 switch (tok) {
236 case yajl_tok_eof:
237 return yajl_status_ok;
238 case yajl_tok_error:
239 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
240 goto around_again;
241 case yajl_tok_c_comment:
242 if (hand->callbacks && hand->callbacks->yajl_c_comment) {
243 _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
244 buf, bufLen));
245 }
246 goto around_again;
247 case yajl_tok_cpp_comment:
248 if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
249 _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
250 buf, bufLen));
251 }
252 goto around_again;
253 case yajl_tok_string:
254 if (hand->callbacks && hand->callbacks->yajl_string) {
255 _CC_CHK(hand->callbacks->yajl_string(hand->ctx,
256 buf, bufLen));
257 }
258 break;
259 case yajl_tok_string_with_escapes:
260 if (hand->callbacks && hand->callbacks->yajl_string) {
261 yajl_buf_clear(hand->decodeBuf);
262 yajl_string_decode(hand->decodeBuf, buf, bufLen);
263 _CC_CHK(hand->callbacks->yajl_string(
264 hand->ctx, yajl_buf_data(hand->decodeBuf),
265 yajl_buf_len(hand->decodeBuf)));
266 }
267 break;
268 case yajl_tok_bool:
269 if (hand->callbacks && hand->callbacks->yajl_boolean) {
270 _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
271 *buf == 't'));
272 }
273 break;
274 case yajl_tok_null:
275 if (hand->callbacks && hand->callbacks->yajl_null) {
276 _CC_CHK(hand->callbacks->yajl_null(hand->ctx));
277 }
278 break;
279 case yajl_tok_left_bracket:
280 if (hand->callbacks && hand->callbacks->yajl_start_map) {
281 _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
282 }
283 stateToPush = yajl_state_map_start;
284 break;
285 case yajl_tok_left_brace:
286 if (hand->callbacks && hand->callbacks->yajl_start_array) {
287 _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
288 }
289 stateToPush = yajl_state_array_start;
290 break;
291 case yajl_tok_integer:
292 if (hand->callbacks) {
293 if (hand->callbacks->yajl_number) {
294 _CC_CHK(hand->callbacks->yajl_number(
295 hand->ctx,(const char *) buf, bufLen));
296 } else if (hand->callbacks->yajl_integer) {
297 longlong i = 0;
298 errno = 0;
299 i = yajl_parse_integer(buf, bufLen);
300 if ((i == LLONG_MIN || i == LLONG_MAX) &&
301 errno == ERANGE)
302 {
303 yajl_bs_set(hand->stateStack,
304 yajl_state_parse_error);
305 hand->parseError = "integer overflow" ;
306 /* try to restore error offset */
307 if (*offset >= bufLen) *offset -= bufLen;
308 else *offset = 0;
309 goto around_again;
310 }
311 _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
312 i));
313 }
314 }
315 break;
316 case yajl_tok_double:
317 if (hand->callbacks) {
318 if (hand->callbacks->yajl_number) {
319 _CC_CHK(hand->callbacks->yajl_number(
320 hand->ctx, (const char *) buf, bufLen));
321 } else if (hand->callbacks->yajl_double) {
322 double d = 0.0;
323 yajl_buf_clear(hand->decodeBuf);
324 yajl_buf_append(hand->decodeBuf, buf, bufLen);
325 buf = yajl_buf_data(hand->decodeBuf);
326 errno = 0;
327 d = strtod((char *) buf, NULL);
328 if ((d == HUGE_VAL || d == -HUGE_VAL) &&
329 errno == ERANGE)
330 {
331 yajl_bs_set(hand->stateStack,
332 yajl_state_parse_error);
333 hand->parseError = "numeric (floating point) "
334 "overflow";
335 /* try to restore error offset */
336 if (*offset >= bufLen) *offset -= bufLen;
337 else *offset = 0;
338 goto around_again;
339 }
340 _CC_CHK(hand->callbacks->yajl_double(hand->ctx,
341 d));
342 }
343 }
344 break;
345 case yajl_tok_right_brace: {
346 if (yajl_bs_current(hand->stateStack) ==
347 yajl_state_array_start)
348 {
349 if (hand->callbacks &&
350 hand->callbacks->yajl_end_array)
351 {
352 _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
353 }
354 yajl_bs_pop(hand->stateStack);
355 goto around_again;
356 }
357 /* intentional fall-through */
358 }
359 case yajl_tok_colon:
360 case yajl_tok_comma:
361 case yajl_tok_right_bracket:
362 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
363 hand->parseError =
364 "unallowed token at this point in JSON text";
365 goto around_again;
366 default:
367 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
368 hand->parseError = "invalid token, internal error";
369 goto around_again;
370 }
371 /* got a value. transition depends on the state we're in. */
372 {
373 yajl_state s = yajl_bs_current(hand->stateStack);
374 if (s == yajl_state_start || s == yajl_state_got_value) {
375 yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
376 } else if (s == yajl_state_map_need_val) {
377 yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
378 } else {
379 yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
380 }
381 }
382 if (stateToPush != yajl_state_start) {
383 yajl_bs_push(hand->stateStack, stateToPush);
384 }
385
386 goto around_again;
387 }
388 case yajl_state_map_start:
389 case yajl_state_map_need_key: {
390 /* only difference between these two states is that in
391 * start '}' is valid, whereas in need_key, we've parsed
392 * a comma, and a string key _must_ follow */
393 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
394 offset, &buf, &bufLen);
395 switch (tok) {
396 case yajl_tok_eof:
397 return yajl_status_ok;
398 case yajl_tok_error:
399 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
400 goto around_again;
401 case yajl_tok_string_with_escapes:
402 if (hand->callbacks && hand->callbacks->yajl_map_key) {
403 yajl_buf_clear(hand->decodeBuf);
404 yajl_string_decode(hand->decodeBuf, buf, bufLen);
405 buf = yajl_buf_data(hand->decodeBuf);
406 bufLen = yajl_buf_len(hand->decodeBuf);
407 }
408 /* intentional fall-through */
409 case yajl_tok_string:
410 if (hand->callbacks && hand->callbacks->yajl_map_key) {
411 _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
412 bufLen));
413 }
414 yajl_bs_set(hand->stateStack, yajl_state_map_sep);
415 goto around_again;
416 case yajl_tok_c_comment:
417 if (hand->callbacks && hand->callbacks->yajl_c_comment) {
418 _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
419 buf, bufLen));
420 }
421 goto around_again;
422 case yajl_tok_cpp_comment:
423 if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
424 _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
425 buf, bufLen));
426 }
427 goto around_again;
428 case yajl_tok_right_bracket:
429 if (yajl_bs_current(hand->stateStack) ==
430 yajl_state_map_start)
431 {
432 if (hand->callbacks && hand->callbacks->yajl_end_map) {
433 _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
434 }
435 yajl_bs_pop(hand->stateStack);
436 goto around_again;
437 }
438 default:
439 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
440 hand->parseError =
441 "invalid object key (must be a string)";
442 goto around_again;
443 }
444 }
445 case yajl_state_map_sep: {
446 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
447 offset, &buf, &bufLen);
448 switch (tok) {
449 case yajl_tok_c_comment:
450 if (hand->callbacks && hand->callbacks->yajl_c_comment) {
451 _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
452 buf, bufLen));
453 }
454 goto around_again;
455 case yajl_tok_cpp_comment:
456 if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
457 _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
458 buf, bufLen));
459 }
460 goto around_again;
461 case yajl_tok_colon:
462 yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
463 goto around_again;
464 case yajl_tok_eof:
465 return yajl_status_ok;
466 case yajl_tok_error:
467 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
468 goto around_again;
469 default:
470 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
471 hand->parseError = "object key and value must "
472 "be separated by a colon (':')";
473 goto around_again;
474 }
475 }
476 case yajl_state_map_got_val: {
477 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
478 offset, &buf, &bufLen);
479 switch (tok) {
480 case yajl_tok_c_comment:
481 if (hand->callbacks && hand->callbacks->yajl_c_comment) {
482 _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
483 buf, bufLen));
484 }
485 goto around_again;
486 case yajl_tok_cpp_comment:
487 if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
488 _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
489 buf, bufLen));
490 }
491 goto around_again;
492 case yajl_tok_right_bracket:
493 if (hand->callbacks && hand->callbacks->yajl_end_map) {
494 _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
495 }
496 yajl_bs_pop(hand->stateStack);
497 goto around_again;
498 case yajl_tok_comma:
499 yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
500 goto around_again;
501 case yajl_tok_eof:
502 return yajl_status_ok;
503 case yajl_tok_error:
504 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
505 goto around_again;
506 default:
507 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
508 hand->parseError = "after key and value, inside map, "
509 "I expect ',' or '}'";
510 /* try to restore error offset */
511 if (*offset >= bufLen) *offset -= bufLen;
512 else *offset = 0;
513 goto around_again;
514 }
515 }
516 case yajl_state_array_got_val: {
517 tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
518 offset, &buf, &bufLen);
519 switch (tok) {
520 case yajl_tok_c_comment:
521 if (hand->callbacks && hand->callbacks->yajl_c_comment) {
522 _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
523 buf, bufLen));
524 }
525 goto around_again;
526 case yajl_tok_cpp_comment:
527 if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
528 _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
529 buf, bufLen));
530 }
531 goto around_again;
532 case yajl_tok_right_brace:
533 if (hand->callbacks && hand->callbacks->yajl_end_array) {
534 _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
535 }
536 yajl_bs_pop(hand->stateStack);
537 goto around_again;
538 case yajl_tok_comma:
539 yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
540 goto around_again;
541 case yajl_tok_eof:
542 return yajl_status_ok;
543 case yajl_tok_error:
544 yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
545 goto around_again;
546 default:
547 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
548 hand->parseError =
549 "after array element, I expect ',' or ']'";
550 goto around_again;
551 }
552 }
553 }
554
555 abort();
556 return yajl_status_error;
557 }
558
559