1 /*
2  * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 /*
26  * This is not really json in the state it is now.
27  * Some differences:
28  * - Double quotes around the key in an object is not enforced.
29  *     i.e you can write: { foo : "bar" } instead of { "foo" : "bar" }.
30  * - Comments are allowed.
31  * - The last element in an object or array can have an ending comma.
32  */
33 
34 #include "precompiled.hpp"
35 #include "utilities/json.hpp"
36 #include "utilities/ostream.hpp"
37 #include <math.h>
38 
strchrnul_(const char * s,int c)39 const char* strchrnul_(const char *s, int c) {
40   const char* tmp = strchr(s, c);
41   return tmp == NULL ? s + strlen(s) : tmp;
42 }
43 
JSON(const char * text,bool silent,outputStream * st)44 JSON::JSON(const char* text, bool silent, outputStream* st)
45 : start(text), pos(text), mark(text),
46   level(0), line(1), column(0), silent(silent), _valid(true), _st(st)
47 {
48 }
49 
parse()50 void JSON::parse() {
51   assert(start != NULL, "Need something to parse");
52   if (start == NULL) {
53     _valid = false;
54     error(INTERNAL_ERROR, "JSON parser was called with a string that was NULL.");
55   } else {
56     _valid = parse_json_value();
57   }
58 }
59 
valid()60 bool JSON::valid() {
61   return _valid;
62 }
63 
parse_json_value()64 bool JSON::parse_json_value() {
65   int c;
66 
67   c = skip_to_token();
68   if (c == -1) {
69     return false;
70   }
71 
72   // Must start with object or array
73   if (level == 0) {
74 
75     switch (c) {
76     case '{':
77       if (parse_json_object() == false) {
78         return false;
79       }
80       c = skip_to_token();
81       if (c > 0) {
82         mark_pos();
83         error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
84         return false;
85       } else if (c < 0) {
86         return false;
87       }
88       return true;
89 
90     case '[':
91       if (parse_json_array() == false) {
92         return false;
93       }
94       c = skip_to_token();
95       if (c > 0) {
96         mark_pos();
97         error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
98         return false;
99       } else if (c < 0) {
100         return false;
101       }
102       return true;
103 
104     case 0:
105       error(SYNTAX_ERROR, "EOS was encountered before any json declarations");
106       return false;
107 
108     default:
109       error(SYNTAX_ERROR, "Json must start with an object or an array.");
110       return false;
111     }
112   } else { // level > 0
113     switch (c) {
114     case '{':
115       return parse_json_object();
116 
117     case '[':
118       return parse_json_array();
119 
120     case '"':
121       return parse_json_string();
122 
123     case '-': case '0':
124     case '1': case '2': case '3':
125     case '4': case '5': case '6':
126     case '7': case '8': case '9':
127       return parse_json_number();
128 
129     case 't':
130       return parse_json_symbol("true", JSON_TRUE);
131 
132     case 'f':
133       return parse_json_symbol("false", JSON_FALSE);
134 
135     case 'n':
136       return parse_json_symbol("null", JSON_NULL);
137 
138     case 0:
139       error(SYNTAX_ERROR, "EOS was encountered when expecting a json value.");
140       return false;
141 
142     default:
143       error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?).");
144       return false;
145     }
146   }
147 }
148 
149 // Should only be called when we actually have the start of an object
150 // Otherwise it is an internal error
parse_json_object()151 bool JSON::parse_json_object() {
152   NOT_PRODUCT(const char* prev_pos);
153   int c;
154 
155   mark_pos();
156   // Check that we are not called in error
157   if (expect_any("{", "object start", INTERNAL_ERROR) <= 0) {
158     return false;
159   }
160 
161   if (!callback(JSON_OBJECT_BEGIN, NULL, level++)) {
162     return false;
163   }
164 
165   for (;;) {
166     mark_pos();
167     c = skip_to_token();
168     if (c == 0) {
169       error(SYNTAX_ERROR, "EOS when expecting an object key or object end");
170       return false;
171     } else if (c < 0) {
172       return false;
173     } else if (c == '}') {
174       // We got here from either empty object "{}" or ending comma "{a:1,}"
175       next();
176       break;
177     }
178 
179     NOT_PRODUCT(prev_pos = pos);
180     if (parse_json_key() == false) {
181       return false;
182     }
183     assert(pos > prev_pos, "parsing stalled");
184 
185     skip_to_token();
186     mark_pos();
187     if (expect_any(":", "object key-value separator") <= 0) {
188       return false;
189     }
190 
191     skip_to_token();
192     mark_pos();
193     NOT_PRODUCT(prev_pos = pos);
194     if (parse_json_value() == false) {
195       return false;
196     }
197     assert(pos > prev_pos, "parsing stalled");
198 
199     c = skip_to_token();
200     mark_pos();
201     if (expect_any(",}", "value separator or object end") <= 0) {
202       return false;
203     }
204     if (c == '}') {
205       break;
206     }
207   }
208 
209   assert(c == '}', "array parsing ended without object end token ('}')");
210   return callback(JSON_OBJECT_END, NULL, --level);
211 }
212 
213 // Should only be called when we actually have the start of an array
214 // Otherwise it is an internal error
parse_json_array()215 bool JSON::parse_json_array() {
216   NOT_PRODUCT(const char* prev_pos);
217   int c;
218 
219   mark_pos();
220   // Check that we are not called in error
221   if (expect_any("[", "array start character", INTERNAL_ERROR) <= 0) {
222     return false;
223   }
224 
225   if (!callback(JSON_ARRAY_BEGIN, NULL, level++)) {
226     return false;
227   }
228 
229   for (;;) {
230     mark_pos();
231     c = skip_to_token();
232     if (c == 0) {
233       error(SYNTAX_ERROR, "EOS when expecting a json value or array end");
234       return false;
235     } else if (c < 0) {
236       return false;
237     } else if (c == ']') {
238       // We got here from either empty array "[]" or ending comma "[1,]"
239       next();
240       break;
241     }
242 
243     mark_pos();
244     NOT_PRODUCT(prev_pos = pos);
245     if (parse_json_value() == false) {
246       return false;
247     }
248     assert(pos > prev_pos, "parsing stalled");
249 
250     c = skip_to_token();
251     mark_pos();
252     if (expect_any(",]", "value separator or array end") <= 0) {
253       return false;
254     }
255     if (c == ']') {
256       break;
257     }
258   }
259 
260   assert(c == ']', "array parsing ended without array end token (']')");
261   return callback(JSON_ARRAY_END, NULL, --level);
262 }
263 
parse_json_string(bool key)264 bool JSON::parse_json_string(bool key) {
265   const char* end;
266   JSON_VAL v;
267 
268   mark_pos();
269   if (expect_any("\"", "string start character", INTERNAL_ERROR) <= 0) {
270     return false;
271   }
272 
273   end = strchr(pos, '"'); // TODO: escapes
274   if (end == NULL) {
275     error(SYNTAX_ERROR, "String started here never ended. Expected \'\"\' before EOS.");
276     return false;
277   }
278 
279   v.str.start = pos;
280   v.str.length = end - pos;
281   skip(end - pos);
282 
283   if (expect_any("\"", "string end character", INTERNAL_ERROR) <= 0) {
284     return false;
285   }
286 
287   if (key == true) {
288     return callback(JSON_KEY, &v, level);
289   } else {
290     return callback(JSON_STRING, &v, level);
291   }
292 }
293 
294 // TODO: hotspot equivalents?
is_alpha(u_char c)295 static bool is_alpha(u_char c) {
296   return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
297 }
is_numeric(u_char c)298 static bool is_numeric(u_char c) {
299   return (c >= '0' && c <= '9');
300 }
is_alnum(u_char c)301 static bool is_alnum(u_char c) {
302   return is_alpha(c) || is_numeric(c);
303 }
is_word(u_char c)304 static bool is_word(u_char c) {
305   return c == '_' || is_alnum(c);
306 }
307 
308 // Allow object keys to be without quotation,
309 // but then restrict to ([a-zA-Z0-9_])+
parse_json_key()310 bool JSON::parse_json_key() {
311   const char* begin;
312   JSON_VAL v;
313   u_char c;
314 
315   mark_pos();
316   c = peek();
317   if (c == '"') {
318     return parse_json_string(true);
319   }
320 
321   begin = pos;
322   c = peek();
323   if (c == 0) {
324     error(SYNTAX_ERROR, "Got EOS when expecting an object key.");
325     return false;
326   } else if (is_word(c) == false) {
327     error(SYNTAX_ERROR, "Expected an object key, which can be a double-quoted (\") string or a simple string (only alphanumeric characters and underscore, separated by whitespace) that doesn't need to be quoted.");
328     return false;
329   }
330 
331   for (;;) {
332     c = peek();
333     // Allow the key to be delimited by control characters and the object key-value separator ':'
334     if (c <= ' ' || c == ':') {
335       break;
336     } else if (is_word(c) == false) {
337       error(SYNTAX_ERROR, "Object key need to be quoted, or consist entirely of alphanumeric characters and underscores.");
338       return false;
339     }
340     next();
341   }
342 
343   v.str.start = begin;
344   v.str.length = pos - begin;
345   return callback(JSON_KEY, &v, level);
346 }
347 
parse_json_number()348 bool JSON::parse_json_number() {
349   double double_value;
350   int tokens, read;
351   JSON_VAL v;
352 
353   mark_pos();
354 
355   // Parsing number - for simplicity ints are limited to 2**53
356   // sscanf as a double and check if part is 0.
357   tokens = sscanf(pos, "%lf%n", &double_value, &read);
358   assert(tokens <= 1, "scanf implementation that counts as a token, parsing json numbers will always fail");
359   if (tokens == 1) {
360     assert(read > 0, "sanity");
361 
362     if (floor(double_value) == double_value) {
363       // No exponent - treat as an int
364       v.int_value = (int)double_value;
365       if (!callback(JSON_NUMBER_INT, &v, level)) {
366         return false;
367       }
368     } else {
369       v.double_value = double_value;
370       if (!callback(JSON_NUMBER_FLOAT, &v, level)) {
371         return false;
372       }
373     }
374     skip(read);
375     return true;
376   }
377 
378   error(SYNTAX_ERROR, "Couldn't parse json number (note that exponents are not supported).");
379   return false;
380 }
381 
parse_json_symbol(const char * name,JSON_TYPE symbol)382 bool JSON::parse_json_symbol(const char* name, JSON_TYPE symbol) {
383   if (expect_string(name, "maybe you forgot to quote your strings?") == false) {
384     mark_pos();
385     return false;
386   }
387   return callback(symbol, NULL, level);
388 }
389 
mark_pos()390 void JSON::mark_pos() {
391   assert((mark == start || *(mark - 1)) != 0, "buffer overrun");
392   assert(mark <= pos, "mark runahead");
393 
394   u_char c;
395 
396   while (mark < pos) {
397     c = *mark;
398     assert(c != 0, "pos buffer overrun?");
399     if (c != 0) {
400       mark++;
401       column++;
402     }
403     if (c == '\n') {
404       line++;
405       column = 0;
406     }
407   }
408 
409   assert(mark <= pos, "mark runahead");
410 }
411 
next()412 u_char JSON::next() {
413   assert((pos == start || *(pos - 1)) != 0, "buffer overrun");
414 
415   u_char c = *pos;
416   if (c != 0) {
417     pos++;
418   }
419   return c;
420 }
421 
peek()422 u_char JSON::peek() {
423   return *pos;
424 }
425 
426 // Peek ahead i chars (0 is same as peek())
peek(size_t i)427 u_char JSON::peek(size_t i) {
428   u_char c;
429   const char* p;
430 
431   p = pos;
432   c = *p;
433   while (i > 0 && c != 0) {
434     i--;
435     p++;
436     c = *p;
437   }
438   return c;
439 }
440 
441 /*
442  * Check that one of the expected characters is next in the stream.
443  * If not, it is an error.
444  * Returns 0 if EOS is encountered.
445  * Returns -1 if the next character was not one of the expected.
446  * Otherwise consumes and returns the expected character that was encountered.
447  */
expect_any(const char * valid_chars,const char * error_msg,JSON_ERROR e)448 int JSON::expect_any(const char* valid_chars, const char* error_msg, JSON_ERROR e) {
449   size_t len;
450   u_char c;
451 
452   len = strlen(valid_chars);
453   assert(len > 0, "need non-empty string");
454 
455   c = peek();
456   if (c == 0) {
457     error(e, "Got EOS when expecting %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
458     return 0;
459   }
460   for (size_t i = 0; i < len; i++) {
461     if (c == valid_chars[i]) {
462       return next();
463     }
464   }
465   error(e, "Expected %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
466   return -1;
467 }
468 
469 /*
470  * Check that the expected string is next in the stream.
471  * If not, it is an error.
472  * Consumes the expected characters if they are present.
473  * Returns true if the expected characters were present, otherwise false.
474  */
expect_string(const char * expected_string,const char * error_msg,JSON_ERROR e)475 bool JSON::expect_string(const char* expected_string, const char* error_msg, JSON_ERROR e) {
476   u_char c, expected_char;
477   size_t len;
478 
479   assert(expected_string != NULL, "need non-null string");
480   len = strlen(expected_string);
481   assert(len > 0, "need non-empty string");
482 
483   for (size_t i = 0; i < len; i++) {
484     expected_char = expected_string[i];
485     assert(expected_char > ' ', "not sane for control characters");
486     if (expected_char <= ' ') {
487       error(INTERNAL_ERROR, "expect got a control char");
488     }
489     c = pos[i];
490     if (c == 0) {
491       error(e, "EOS encountered when expecting %s (\"%s\")", error_msg, expected_string);
492       return false;
493     } else if (c != expected_char) {
494       error(e, "Expected \"%s\" (%s)", expected_string, error_msg);
495       return false;
496     }
497   }
498   skip(len);
499   return true;
500 }
501 
502 /*
503  * Skip i characters.
504  * Returns number of characters skipped.
505  */
skip(size_t i)506 size_t JSON::skip(size_t i) {
507   u_char c;
508   size_t j;
509 
510   c = peek();
511   for (j = i; c != 0 && j > 0; j--) {
512     c = next();
513   }
514   return i - j;
515 }
516 
517 /*
518  * Skip whitespace and comments.
519  * Returns the first token after whitespace/comments without consuming it
520  * Returns 0 if EOS is encountered.
521  * Returns -1 if there is an error
522  */
skip_to_token()523 int JSON::skip_to_token() {
524   for (;;) {
525     int c = peek(0);
526     if (c == '/') {
527       u_char c2 = peek(1);
528       if (c2 == '/') {
529         c = skip_line_comment();
530       } else if (c2 == '*') {
531         c = skip_block_comment();
532         if (c < 0) {
533           return -1;
534         }
535       }
536       // Fall through to keep checking if there
537       // are more whitespace / comments to skip
538     }
539     if (c == 0 || c > ' ') {
540       return c;
541     }
542     next();
543   }
544   return 0;
545 }
546 
547 /*
548  * Skip to, and return the wanted char without consuming it
549  * Returns 0 if EOS is encountered.
550  */
skip_to(u_char want)551 u_char JSON::skip_to(u_char want) {
552   // We want the bookkeeping done in next().
553   // Otherwise strchr could have been used.
554   u_char c;
555   for(;;) {
556     c = peek();
557     if (c == 0 || c == want) {
558       return c;
559     }
560     next();
561   }
562 }
563 
564 /*
565  * Should only be called when we actually have a line comment to skip.
566  * Otherwise it is an internal error.
567  *
568  * Will return the first token after the line comment without consuming it.
569  * Returns 0 if EOS is encoutered.
570  */
skip_line_comment()571 u_char JSON::skip_line_comment() {
572   u_char c;
573 
574   // Check that we are not called in error
575   expect_any("/", "line comment start", INTERNAL_ERROR);
576   expect_any("/", "line comment start", INTERNAL_ERROR);
577 
578   c = skip_to('\n');
579   if (c == 0) {
580     return 0;
581   }
582   next();
583   return next();
584 }
585 
586 /*
587  * Should only be called when we actually have a block comment to skip.
588  * Otherwise it is an internal error.
589  *
590  * Returns the first token after the block comment without consuming it.
591  * Returns -1 if EOS is encountered in the middle of a comment.
592  */
skip_block_comment()593 int JSON::skip_block_comment() {
594   const char* current;
595 
596   // Check that we are not called in error.
597   if (peek() != '/' || peek(1) != '*') {
598     // Let expect handle EOS.
599     expect_string("/*", "block comment start", INTERNAL_ERROR);
600     return 0;
601   }
602 
603   current = pos;
604   for (;;) {
605     current = strchrnul_(current, '*');
606 
607     if (current[0] == 0 || current[1] == 0) {
608       // Advance error marker to start of block comment
609       mark_pos();
610       error(SYNTAX_ERROR, "Block comment started here never ended. Expected \"*/\" before EOS.");
611       return -1;
612     }
613 
614     if (current[1] == '/') {
615       pos = current;
616       if (expect_string("*/", "block comment end", INTERNAL_ERROR) == false) {
617         return -1;
618       }
619       // Found block comment end
620       return peek();
621     }
622     current++;
623   }
624 }
625 
strerror(JSON_ERROR e)626 const char* JSON::strerror(JSON_ERROR e) {
627   switch (e) {
628   case SYNTAX_ERROR:
629     return "Syntax error";
630   case INTERNAL_ERROR:
631     return "Internal error";
632   case KEY_ERROR:
633     return "Key error";
634   case VALUE_ERROR:
635     return "Value error";
636   default:
637     ShouldNotReachHere();
638     return "Unknown error";
639   }
640 }
641 
error(JSON_ERROR e,const char * format,...)642 void JSON::error(JSON_ERROR e, const char* format, ...) {
643   _valid = false;
644 
645   if (!silent) {
646     const char* line_start;
647     const char* tmp;
648     size_t line_length;
649     va_list args;
650     u_char c;
651 
652     _st->print("%s on line %u byte %u: ", JSON::strerror(e), line, column + 1);
653     va_start(args, format);
654     _st->vprint(format, args);
655     _st->cr();
656     va_end(args);
657 
658     line_start = mark - column;
659     assert(line_start >= start, "out of bounds");
660     assert(line_start <= mark, "out of bounds");
661     assert(line_start == start || line_start[-1] == '\n', "line counting error");
662 
663     c = *pos;
664     if (c == 0) {
665       _st->print("  Got ");
666       _st->print_cr("EOS.");
667     }
668     tmp = mark;
669     c = *tmp;
670     if (c > ' ') {
671       _st->print("  At ");
672       _st->print("'");
673       while (c > ' ') {
674         _st->print("%c", c);
675         tmp++;
676         c = *tmp;
677       }
678       _st->print_cr("'.");
679     }
680 
681     // Skip to newline or EOS
682     tmp = strchrnul_(mark, '\n');
683     line_length = tmp - line_start;
684 
685     _st->print_cr("%s", line_start);
686   }
687 }
688 
689