1 /*
2  * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 #include <stdlib.h>
18 #include <limits.h>
19 #include <errno.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <assert.h>
24 #include <math.h>
25 
26 #include "yajl_parse.h"
27 #include "yajl_lex.h"
28 #include "yajl_parser.h"
29 #include "yajl_encode.h"
30 #include "yajl_bytestack.h"
31 
32 #define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10))
33 
34  /* same semantics as strtol */
35 longlong
yajl_parse_integer(const unsigned char * number,unsigned int length)36 yajl_parse_integer(const unsigned char *number, unsigned int length)
37 {
38     longlong ret  = 0;
39     long sign = 1;
40     const unsigned char *pos = number;
41     if (*pos == '-') { pos++; sign = -1; }
42     if (*pos == '+') { pos++; }
43 
44     while (pos < number + length) {
45         if ( ret > MAX_VALUE_TO_MULTIPLY ) {
46             errno = ERANGE;
47             return sign == 1 ? LLONG_MAX : LLONG_MIN;
48         }
49         ret *= 10;
50         if (LLONG_MAX - ret < (*pos - '0')) {
51             errno = ERANGE;
52             return sign == 1 ? LLONG_MAX : LLONG_MIN;
53         }
54         if (*pos < '0' || *pos > '9') {
55             errno = ERANGE;
56             return sign == 1 ? LLONG_MAX : LLONG_MIN;
57         }
58         ret += (*pos++ - '0');
59     }
60 
61     return sign * ret;
62 }
63 
64 unsigned char *
yajl_render_error_string(yajl_handle hand,const unsigned char * jsonText,size_t jsonTextLen,int verbose)65 yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
66                          size_t jsonTextLen, int verbose)
67 {
68     size_t offset = hand->bytesConsumed;
69     unsigned char * str;
70     const char * errorType = NULL;
71     const char * errorText = NULL;
72     char text[72];
73     const char * arrow = "                     (right here) ------^\n";
74 
75     if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
76         errorType = "parse";
77         errorText = hand->parseError;
78     } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
79         errorType = "lexical";
80         errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
81     } else {
82         errorType = "unknown";
83     }
84 
85     {
86         size_t memneeded = 0;
87         memneeded += strlen(errorType);
88         memneeded += strlen(" error");
89         if (errorText != NULL) {
90             memneeded += strlen(": ");
91             memneeded += strlen(errorText);
92         }
93         str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
94         if (!str) return NULL;
95         str[0] = 0;
96         strcat((char *) str, errorType);
97         strcat((char *) str, " error");
98         if (errorText != NULL) {
99             strcat((char *) str, ": ");
100             strcat((char *) str, errorText);
101         }
102         strcat((char *) str, "\n");
103     }
104 
105     /* now we append as many spaces as needed to make sure the error
106      * falls at char 41, if verbose was specified */
107     if (verbose) {
108         size_t start, end, i;
109         size_t spacesNeeded;
110 
111         spacesNeeded = (offset < 30 ? 40 - offset : 10);
112         start = (offset >= 30 ? offset - 30 : 0);
113         end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
114 
115         for (i=0;i<spacesNeeded;i++) text[i] = ' ';
116 
117         for (;start < end;start++, i++) {
118             if (jsonText[start] != '\n' && jsonText[start] != '\r')
119             {
120                 text[i] = jsonText[start];
121             }
122             else
123             {
124                 text[i] = ' ';
125             }
126         }
127         assert(i <= 71);
128         text[i++] = '\n';
129         text[i] = 0;
130         {
131             char * newStr = (char *)
132                 YA_MALLOC(&(hand->alloc), (unsigned int)(strlen((char *) str) +
133                                                          strlen((char *) text) +
134                                                          strlen(arrow) + 1));
135             if (newStr) {
136                 newStr[0] = 0;
137                 strcat((char *) newStr, (char *) str);
138                 strcat((char *) newStr, text);
139                 strcat((char *) newStr, arrow);
140             }
141             YA_FREE(&(hand->alloc), str);
142             str = (unsigned char *) newStr;
143         }
144     }
145     return str;
146 }
147 
148 /* check for client cancelation */
149 #define _CC_CHK(x)                                                \
150     if (!(x)) {                                                   \
151         yajl_bs_set(hand->stateStack, yajl_state_parse_error);    \
152         hand->parseError =                                        \
153             "client cancelled parse via callback return value";   \
154         return yajl_status_client_canceled;                       \
155     }
156 
157 
158 yajl_status
yajl_do_finish(yajl_handle hand)159 yajl_do_finish(yajl_handle hand)
160 {
161     yajl_status stat;
162     stat = yajl_do_parse(hand,(const unsigned char *) " ",1);
163 
164     if (stat != yajl_status_ok) return stat;
165 
166     switch(yajl_bs_current(hand->stateStack))
167     {
168         case yajl_state_parse_error:
169         case yajl_state_lexical_error:
170             return yajl_status_error;
171         case yajl_state_got_value:
172         case yajl_state_parse_complete:
173             return yajl_status_ok;
174         default:
175             if (!(hand->flags & yajl_allow_partial_values))
176             {
177                 yajl_bs_set(hand->stateStack, yajl_state_parse_error);
178                 hand->parseError = "premature EOF";
179                 return yajl_status_error;
180             }
181             return yajl_status_ok;
182     }
183 }
184 
185 yajl_status
yajl_do_parse(yajl_handle hand,const unsigned char * jsonText,size_t jsonTextLen)186 yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
187               size_t jsonTextLen)
188 {
189     yajl_tok tok;
190     const unsigned char * buf;
191     size_t bufLen;
192     size_t * offset = &(hand->bytesConsumed);
193 
194     *offset = 0;
195 
196   around_again:
197     switch (yajl_bs_current(hand->stateStack)) {
198         case yajl_state_parse_complete:
199             if (hand->flags & yajl_allow_multiple_values) {
200                 yajl_bs_set(hand->stateStack, yajl_state_got_value);
201                 goto around_again;
202             }
203             if (!(hand->flags & yajl_allow_trailing_garbage)) {
204                 if (*offset != jsonTextLen) {
205                     tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
206                                        offset, &buf, &bufLen);
207                     if (tok != yajl_tok_eof) {
208                         yajl_bs_set(hand->stateStack, yajl_state_parse_error);
209                         hand->parseError = "trailing garbage";
210                     }
211                     goto around_again;
212                 }
213             }
214             return yajl_status_ok;
215         case yajl_state_lexical_error:
216         case yajl_state_parse_error:
217             return yajl_status_error;
218         case yajl_state_start:
219         case yajl_state_got_value:
220         case yajl_state_map_need_val:
221         case yajl_state_array_need_val:
222         case yajl_state_array_start:  {
223             /* for arrays and maps, we advance the state for this
224              * depth, then push the state of the next depth.
225              * If an error occurs during the parsing of the nesting
226              * enitity, the state at this level will not matter.
227              * a state that needs pushing will be anything other
228              * than state_start */
229 
230             yajl_state stateToPush = yajl_state_start;
231 
232             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
233                                offset, &buf, &bufLen);
234 
235             switch (tok) {
236                 case yajl_tok_eof:
237                     return yajl_status_ok;
238                 case yajl_tok_error:
239                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
240                     goto around_again;
241                 case yajl_tok_c_comment:
242                     if (hand->callbacks && hand->callbacks->yajl_c_comment) {
243                         _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
244                                                              buf, bufLen));
245                     }
246                     goto around_again;
247                 case yajl_tok_cpp_comment:
248                     if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
249                         _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
250                                                              buf, bufLen));
251                     }
252                     goto around_again;
253                 case yajl_tok_string:
254                     if (hand->callbacks && hand->callbacks->yajl_string) {
255                         _CC_CHK(hand->callbacks->yajl_string(hand->ctx,
256                                                              buf, bufLen));
257                     }
258                     break;
259                 case yajl_tok_string_with_escapes:
260                     if (hand->callbacks && hand->callbacks->yajl_string) {
261                         yajl_buf_clear(hand->decodeBuf);
262                         yajl_string_decode(hand->decodeBuf, buf, bufLen);
263                         _CC_CHK(hand->callbacks->yajl_string(
264                                     hand->ctx, yajl_buf_data(hand->decodeBuf),
265                                     yajl_buf_len(hand->decodeBuf)));
266                     }
267                     break;
268                 case yajl_tok_bool:
269                     if (hand->callbacks && hand->callbacks->yajl_boolean) {
270                         _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
271                                                               *buf == 't'));
272                     }
273                     break;
274                 case yajl_tok_null:
275                     if (hand->callbacks && hand->callbacks->yajl_null) {
276                         _CC_CHK(hand->callbacks->yajl_null(hand->ctx));
277                     }
278                     break;
279                 case yajl_tok_left_bracket:
280                     if (hand->callbacks && hand->callbacks->yajl_start_map) {
281                         _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
282                     }
283                     stateToPush = yajl_state_map_start;
284                     break;
285                 case yajl_tok_left_brace:
286                     if (hand->callbacks && hand->callbacks->yajl_start_array) {
287                         _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
288                     }
289                     stateToPush = yajl_state_array_start;
290                     break;
291                 case yajl_tok_integer:
292                     if (hand->callbacks) {
293                         if (hand->callbacks->yajl_number) {
294                             _CC_CHK(hand->callbacks->yajl_number(
295                                         hand->ctx,(const char *) buf, bufLen));
296                         } else if (hand->callbacks->yajl_integer) {
297                             longlong i = 0;
298                             errno = 0;
299                             i = yajl_parse_integer(buf, bufLen);
300                             if ((i == LLONG_MIN || i == LLONG_MAX) &&
301                                 errno == ERANGE)
302                             {
303                                 yajl_bs_set(hand->stateStack,
304                                             yajl_state_parse_error);
305                                 hand->parseError = "integer overflow" ;
306                                 /* try to restore error offset */
307                                 if (*offset >= bufLen) *offset -= bufLen;
308                                 else *offset = 0;
309                                 goto around_again;
310                             }
311                             _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
312                                                                   i));
313                         }
314                     }
315                     break;
316                 case yajl_tok_double:
317                     if (hand->callbacks) {
318                         if (hand->callbacks->yajl_number) {
319                             _CC_CHK(hand->callbacks->yajl_number(
320                                         hand->ctx, (const char *) buf, bufLen));
321                         } else if (hand->callbacks->yajl_double) {
322                             double d = 0.0;
323                             yajl_buf_clear(hand->decodeBuf);
324                             yajl_buf_append(hand->decodeBuf, buf, bufLen);
325                             buf = yajl_buf_data(hand->decodeBuf);
326                             errno = 0;
327                             d = strtod((char *) buf, NULL);
328                             if ((d == HUGE_VAL || d == -HUGE_VAL) &&
329                                 errno == ERANGE)
330                             {
331                                 yajl_bs_set(hand->stateStack,
332                                             yajl_state_parse_error);
333                                 hand->parseError = "numeric (floating point) "
334                                     "overflow";
335                                 /* try to restore error offset */
336                                 if (*offset >= bufLen) *offset -= bufLen;
337                                 else *offset = 0;
338                                 goto around_again;
339                             }
340                             _CC_CHK(hand->callbacks->yajl_double(hand->ctx,
341                                                                  d));
342                         }
343                     }
344                     break;
345                 case yajl_tok_right_brace: {
346                     if (yajl_bs_current(hand->stateStack) ==
347                         yajl_state_array_start)
348                     {
349                         if (hand->callbacks &&
350                             hand->callbacks->yajl_end_array)
351                         {
352                             _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
353                         }
354                         yajl_bs_pop(hand->stateStack);
355                         goto around_again;
356                     }
357                     /* intentional fall-through */
358                 }
359                 case yajl_tok_colon:
360                 case yajl_tok_comma:
361                 case yajl_tok_right_bracket:
362                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
363                     hand->parseError =
364                         "unallowed token at this point in JSON text";
365                     goto around_again;
366                 default:
367                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
368                     hand->parseError = "invalid token, internal error";
369                     goto around_again;
370             }
371             /* got a value.  transition depends on the state we're in. */
372             {
373                 yajl_state s = yajl_bs_current(hand->stateStack);
374                 if (s == yajl_state_start || s == yajl_state_got_value) {
375                     yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
376                 } else if (s == yajl_state_map_need_val) {
377                     yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
378                 } else {
379                     yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
380                 }
381             }
382             if (stateToPush != yajl_state_start) {
383                 yajl_bs_push(hand->stateStack, stateToPush);
384             }
385 
386             goto around_again;
387         }
388         case yajl_state_map_start:
389         case yajl_state_map_need_key: {
390             /* only difference between these two states is that in
391              * start '}' is valid, whereas in need_key, we've parsed
392              * a comma, and a string key _must_ follow */
393             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
394                                offset, &buf, &bufLen);
395             switch (tok) {
396                 case yajl_tok_eof:
397                     return yajl_status_ok;
398                 case yajl_tok_error:
399                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
400                     goto around_again;
401                 case yajl_tok_string_with_escapes:
402                     if (hand->callbacks && hand->callbacks->yajl_map_key) {
403                         yajl_buf_clear(hand->decodeBuf);
404                         yajl_string_decode(hand->decodeBuf, buf, bufLen);
405                         buf = yajl_buf_data(hand->decodeBuf);
406                         bufLen = yajl_buf_len(hand->decodeBuf);
407                     }
408                     /* intentional fall-through */
409                 case yajl_tok_string:
410                     if (hand->callbacks && hand->callbacks->yajl_map_key) {
411                         _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
412                                                               bufLen));
413                     }
414                     yajl_bs_set(hand->stateStack, yajl_state_map_sep);
415                     goto around_again;
416                 case yajl_tok_c_comment:
417                     if (hand->callbacks && hand->callbacks->yajl_c_comment) {
418                         _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
419                                                              buf, bufLen));
420                     }
421                     goto around_again;
422                 case yajl_tok_cpp_comment:
423                     if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
424                         _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
425                                                              buf, bufLen));
426                     }
427                     goto around_again;
428                 case yajl_tok_right_bracket:
429                     if (yajl_bs_current(hand->stateStack) ==
430                         yajl_state_map_start)
431                     {
432                         if (hand->callbacks && hand->callbacks->yajl_end_map) {
433                             _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
434                         }
435                         yajl_bs_pop(hand->stateStack);
436                         goto around_again;
437                     }
438                 default:
439                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
440                     hand->parseError =
441                         "invalid object key (must be a string)";
442                     goto around_again;
443             }
444         }
445         case yajl_state_map_sep: {
446             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
447                                offset, &buf, &bufLen);
448             switch (tok) {
449                 case yajl_tok_c_comment:
450                     if (hand->callbacks && hand->callbacks->yajl_c_comment) {
451                         _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
452                                                              buf, bufLen));
453                     }
454                     goto around_again;
455                 case yajl_tok_cpp_comment:
456                     if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
457                         _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
458                                                              buf, bufLen));
459                     }
460                     goto around_again;
461                 case yajl_tok_colon:
462                     yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
463                     goto around_again;
464                 case yajl_tok_eof:
465                     return yajl_status_ok;
466                 case yajl_tok_error:
467                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
468                     goto around_again;
469                 default:
470                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
471                     hand->parseError = "object key and value must "
472                         "be separated by a colon (':')";
473                     goto around_again;
474             }
475         }
476         case yajl_state_map_got_val: {
477             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
478                                offset, &buf, &bufLen);
479             switch (tok) {
480                 case yajl_tok_c_comment:
481                     if (hand->callbacks && hand->callbacks->yajl_c_comment) {
482                         _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
483                                                              buf, bufLen));
484                     }
485                     goto around_again;
486                 case yajl_tok_cpp_comment:
487                     if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
488                         _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
489                                                              buf, bufLen));
490                     }
491                     goto around_again;
492                 case yajl_tok_right_bracket:
493                     if (hand->callbacks && hand->callbacks->yajl_end_map) {
494                         _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
495                     }
496                     yajl_bs_pop(hand->stateStack);
497                     goto around_again;
498                 case yajl_tok_comma:
499                     yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
500                     goto around_again;
501                 case yajl_tok_eof:
502                     return yajl_status_ok;
503                 case yajl_tok_error:
504                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
505                     goto around_again;
506                 default:
507                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
508                     hand->parseError = "after key and value, inside map, "
509                                        "I expect ',' or '}'";
510                     /* try to restore error offset */
511                     if (*offset >= bufLen) *offset -= bufLen;
512                     else *offset = 0;
513                     goto around_again;
514             }
515         }
516         case yajl_state_array_got_val: {
517             tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
518                                offset, &buf, &bufLen);
519             switch (tok) {
520                 case yajl_tok_c_comment:
521                     if (hand->callbacks && hand->callbacks->yajl_c_comment) {
522                         _CC_CHK(hand->callbacks->yajl_c_comment(hand->ctx,
523                                                              buf, bufLen));
524                     }
525                     goto around_again;
526                 case yajl_tok_cpp_comment:
527                     if (hand->callbacks && hand->callbacks->yajl_cpp_comment) {
528                         _CC_CHK(hand->callbacks->yajl_cpp_comment(hand->ctx,
529                                                              buf, bufLen));
530                     }
531                     goto around_again;
532                 case yajl_tok_right_brace:
533                     if (hand->callbacks && hand->callbacks->yajl_end_array) {
534                         _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
535                     }
536                     yajl_bs_pop(hand->stateStack);
537                     goto around_again;
538                 case yajl_tok_comma:
539                     yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
540                     goto around_again;
541                 case yajl_tok_eof:
542                     return yajl_status_ok;
543                 case yajl_tok_error:
544                     yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
545                     goto around_again;
546                 default:
547                     yajl_bs_set(hand->stateStack, yajl_state_parse_error);
548                     hand->parseError =
549                         "after array element, I expect ',' or ']'";
550                     goto around_again;
551             }
552         }
553     }
554 
555     abort();
556     return yajl_status_error;
557 }
558 
559