1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/blink/renderer/platform/json/json_parser.h"
6
7 #include "base/numerics/safe_conversions.h"
8 #include "third_party/blink/renderer/platform/json/json_values.h"
9 #include "third_party/blink/renderer/platform/wtf/decimal.h"
10 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
11 #include "third_party/blink/renderer/platform/wtf/text/string_to_number.h"
12
13 namespace blink {
14
15 namespace {
16
17 const int kMaxStackLimit = 1000;
18
19 using Error = JSONParseErrorType;
20
FormatErrorMessage(Error error,int line,int column)21 String FormatErrorMessage(Error error, int line, int column) {
22 String text;
23 switch (error) {
24 case Error::kNoError:
25 NOTREACHED();
26 return "";
27 case Error::kUnexpectedToken:
28 text = "Unexpected token.";
29 break;
30 case Error::kSyntaxError:
31 text = "Syntax error.";
32 break;
33 case Error::kInvalidEscape:
34 text = "Invalid escape sequence.";
35 break;
36 case Error::kTooMuchNesting:
37 text = "Too much nesting.";
38 break;
39 case Error::kUnexpectedDataAfterRoot:
40 text = "Unexpected data after root element.";
41 break;
42 case Error::kUnsupportedEncoding:
43 text =
44 "Unsupported encoding. JSON and all string literals must contain "
45 "valid Unicode characters.";
46 break;
47 }
48 return "Line: " + String::Number(line) +
49 ", column: " + String::Number(column) + ", " + text;
50 }
51
52 // Note: all parsing functions take a |cursor| parameter which is
53 // where they start parsing from.
54 // If the parsing succeeds, |cursor| will point to the position
55 // right after the parsed value, "consuming" some portion of the input.
56 // If the parsing fails, |cursor| will point to the error position.
57
58 template <typename CharType>
59 struct Cursor {
60 int line;
61 const CharType* line_start;
62 const CharType* pos;
63 };
64
65 enum Token {
66 kObjectBegin,
67 kObjectEnd,
68 kArrayBegin,
69 kArrayEnd,
70 kStringLiteral,
71 kNumber,
72 kBoolTrue,
73 kBoolFalse,
74 kNullToken,
75 kListSeparator,
76 kObjectPairSeparator,
77 };
78
79 template <typename CharType>
ParseConstToken(Cursor<CharType> * cursor,const CharType * end,const char * token)80 Error ParseConstToken(Cursor<CharType>* cursor,
81 const CharType* end,
82 const char* token) {
83 const CharType* token_start = cursor->pos;
84 while (cursor->pos < end && *token != '\0' && *(cursor->pos++) == *token++) {
85 }
86 if (*token != '\0') {
87 cursor->pos = token_start;
88 return Error::kSyntaxError;
89 }
90 return Error::kNoError;
91 }
92
93 template <typename CharType>
ReadInt(Cursor<CharType> * cursor,const CharType * end,bool can_have_leading_zeros)94 Error ReadInt(Cursor<CharType>* cursor,
95 const CharType* end,
96 bool can_have_leading_zeros) {
97 if (cursor->pos == end)
98 return Error::kSyntaxError;
99 const CharType* start_ptr = cursor->pos;
100 bool have_leading_zero = '0' == *(cursor->pos);
101 int length = 0;
102 while (cursor->pos < end && '0' <= *(cursor->pos) && *(cursor->pos) <= '9') {
103 ++(cursor->pos);
104 ++length;
105 }
106 if (!length)
107 return Error::kSyntaxError;
108 if (!can_have_leading_zeros && length > 1 && have_leading_zero) {
109 cursor->pos = start_ptr + 1;
110 return Error::kSyntaxError;
111 }
112 return Error::kNoError;
113 }
114
115 template <typename CharType>
ParseNumberToken(Cursor<CharType> * cursor,const CharType * end)116 Error ParseNumberToken(Cursor<CharType>* cursor, const CharType* end) {
117 // We just grab the number here. We validate the size in DecodeNumber.
118 // According to RFC4627, a valid number is: [minus] int [frac] [exp]
119 if (cursor->pos == end)
120 return Error::kSyntaxError;
121 if (*(cursor->pos) == '-')
122 ++(cursor->pos);
123
124 Error error = ReadInt(cursor, end, false);
125 if (error != Error::kNoError)
126 return error;
127
128 if (cursor->pos == end)
129 return Error::kNoError;
130
131 // Optional fraction part
132 CharType c = *(cursor->pos);
133 if ('.' == c) {
134 ++(cursor->pos);
135 error = ReadInt(cursor, end, true);
136 if (error != Error::kNoError)
137 return error;
138 if (cursor->pos == end)
139 return Error::kNoError;
140 c = *(cursor->pos);
141 }
142
143 // Optional exponent part
144 if ('e' == c || 'E' == c) {
145 ++(cursor->pos);
146 if (cursor->pos == end)
147 return Error::kSyntaxError;
148 c = *(cursor->pos);
149 if ('-' == c || '+' == c) {
150 ++(cursor->pos);
151 if (cursor->pos == end)
152 return Error::kSyntaxError;
153 }
154 error = ReadInt(cursor, end, true);
155 if (error != Error::kNoError)
156 return error;
157 }
158
159 return Error::kNoError;
160 }
161
162 template <typename CharType>
ReadHexDigits(Cursor<CharType> * cursor,const CharType * end,int digits)163 Error ReadHexDigits(Cursor<CharType>* cursor, const CharType* end, int digits) {
164 const CharType* token_start = cursor->pos;
165 if (end - cursor->pos < digits)
166 return Error::kInvalidEscape;
167 for (int i = 0; i < digits; ++i) {
168 CharType c = *(cursor->pos)++;
169 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
170 ('A' <= c && c <= 'F'))) {
171 cursor->pos = token_start;
172 return Error::kInvalidEscape;
173 }
174 }
175 return Error::kNoError;
176 }
177
178 template <typename CharType>
ParseStringToken(Cursor<CharType> * cursor,const CharType * end)179 Error ParseStringToken(Cursor<CharType>* cursor, const CharType* end) {
180 if (cursor->pos == end)
181 return Error::kSyntaxError;
182 if (*(cursor->pos) != '"')
183 return Error::kSyntaxError;
184 ++(cursor->pos);
185 while (cursor->pos < end) {
186 CharType c = *(cursor->pos)++;
187 if ('\\' == c) {
188 if (cursor->pos == end)
189 return Error::kInvalidEscape;
190 c = *(cursor->pos)++;
191 // Make sure the escaped char is valid.
192 switch (c) {
193 case 'x': {
194 Error error = ReadHexDigits(cursor, end, 2);
195 if (error != Error::kNoError)
196 return error;
197 break;
198 }
199 case 'u': {
200 Error error = ReadHexDigits(cursor, end, 4);
201 if (error != Error::kNoError)
202 return error;
203 break;
204 }
205 case '\\':
206 case '/':
207 case 'b':
208 case 'f':
209 case 'n':
210 case 'r':
211 case 't':
212 case 'v':
213 case '"':
214 break;
215 default:
216 return Error::kInvalidEscape;
217 }
218 } else if (c < 0x20) {
219 return Error::kSyntaxError;
220 } else if ('"' == c) {
221 return Error::kNoError;
222 }
223 }
224 return Error::kSyntaxError;
225 }
226
227 template <typename CharType>
SkipComment(Cursor<CharType> * cursor,const CharType * end)228 Error SkipComment(Cursor<CharType>* cursor, const CharType* end) {
229 const CharType* pos = cursor->pos;
230 if (pos == end)
231 return Error::kSyntaxError;
232
233 if (*pos != '/' || pos + 1 >= end)
234 return Error::kSyntaxError;
235 ++pos;
236
237 if (*pos == '/') {
238 // Single line comment, read to newline.
239 for (++pos; pos < end; ++pos) {
240 if (*pos == '\n') {
241 cursor->line++;
242 cursor->pos = pos + 1;
243 cursor->line_start = cursor->pos;
244 return Error::kNoError;
245 }
246 }
247 cursor->pos = end;
248 // Comment reaches end-of-input, which is fine.
249 return Error::kNoError;
250 }
251
252 if (*pos == '*') {
253 CharType previous = '\0';
254 // Block comment, read until end marker.
255 for (++pos; pos < end; previous = *pos++) {
256 if (*pos == '\n') {
257 cursor->line++;
258 cursor->line_start = pos + 1;
259 }
260 if (previous == '*' && *pos == '/') {
261 cursor->pos = pos + 1;
262 return Error::kNoError;
263 }
264 }
265 // Block comment must close before end-of-input.
266 return Error::kSyntaxError;
267 }
268
269 return Error::kSyntaxError;
270 }
271
272 template <typename CharType>
SkipWhitespaceAndComments(Cursor<CharType> * cursor,const CharType * end)273 Error SkipWhitespaceAndComments(Cursor<CharType>* cursor, const CharType* end) {
274 while (cursor->pos < end) {
275 CharType c = *(cursor->pos);
276 if (c == '\n') {
277 cursor->line++;
278 ++(cursor->pos);
279 cursor->line_start = cursor->pos;
280 } else if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
281 ++(cursor->pos);
282 } else if (c == '/') {
283 Error error = SkipComment(cursor, end);
284 if (error != Error::kNoError)
285 return error;
286 } else {
287 break;
288 }
289 }
290 return Error::kNoError;
291 }
292
293 template <typename CharType>
ParseToken(Cursor<CharType> * cursor,const CharType * end,Token * token,Cursor<CharType> * token_start)294 Error ParseToken(Cursor<CharType>* cursor,
295 const CharType* end,
296 Token* token,
297 Cursor<CharType>* token_start) {
298 Error error = SkipWhitespaceAndComments(cursor, end);
299 if (error != Error::kNoError)
300 return error;
301 *token_start = *cursor;
302
303 if (cursor->pos == end)
304 return Error::kSyntaxError;
305
306 switch (*(cursor->pos)) {
307 case 'n':
308 *token = kNullToken;
309 return ParseConstToken(cursor, end, kJSONNullString);
310 case 't':
311 *token = kBoolTrue;
312 return ParseConstToken(cursor, end, kJSONTrueString);
313 case 'f':
314 *token = kBoolFalse;
315 return ParseConstToken(cursor, end, kJSONFalseString);
316 case '[':
317 ++(cursor->pos);
318 *token = kArrayBegin;
319 return Error::kNoError;
320 case ']':
321 ++(cursor->pos);
322 *token = kArrayEnd;
323 return Error::kNoError;
324 case ',':
325 ++(cursor->pos);
326 *token = kListSeparator;
327 return Error::kNoError;
328 case '{':
329 ++(cursor->pos);
330 *token = kObjectBegin;
331 return Error::kNoError;
332 case '}':
333 ++(cursor->pos);
334 *token = kObjectEnd;
335 return Error::kNoError;
336 case ':':
337 ++(cursor->pos);
338 *token = kObjectPairSeparator;
339 return Error::kNoError;
340 case '0':
341 case '1':
342 case '2':
343 case '3':
344 case '4':
345 case '5':
346 case '6':
347 case '7':
348 case '8':
349 case '9':
350 case '-':
351 *token = kNumber;
352 return ParseNumberToken(cursor, end);
353 case '"':
354 *token = kStringLiteral;
355 return ParseStringToken(cursor, end);
356 }
357
358 return Error::kSyntaxError;
359 }
360
361 template <typename CharType>
HexToInt(CharType c)362 inline int HexToInt(CharType c) {
363 if ('0' <= c && c <= '9')
364 return c - '0';
365 if ('A' <= c && c <= 'F')
366 return c - 'A' + 10;
367 if ('a' <= c && c <= 'f')
368 return c - 'a' + 10;
369 NOTREACHED();
370 return 0;
371 }
372
373 template <typename CharType>
DecodeString(Cursor<CharType> * cursor,const CharType * end,String * output)374 Error DecodeString(Cursor<CharType>* cursor,
375 const CharType* end,
376 String* output) {
377 if (cursor->pos + 1 > end - 1)
378 return Error::kSyntaxError;
379 if (cursor->pos + 1 == end - 1) {
380 *output = "";
381 return Error::kNoError;
382 }
383
384 const CharType* string_start = cursor->pos;
385 StringBuilder buffer;
386 buffer.ReserveCapacity(static_cast<wtf_size_t>(end - cursor->pos - 2));
387
388 cursor->pos++;
389 while (cursor->pos < end - 1) {
390 UChar c = *(cursor->pos)++;
391 if (c == '\n') {
392 cursor->line++;
393 cursor->line_start = cursor->pos;
394 }
395 if ('\\' != c) {
396 buffer.Append(c);
397 continue;
398 }
399 if (cursor->pos == end - 1)
400 return Error::kInvalidEscape;
401 c = *(cursor->pos)++;
402
403 if (c == 'x') {
404 // \x is not supported.
405 return Error::kInvalidEscape;
406 }
407
408 switch (c) {
409 case '"':
410 case '/':
411 case '\\':
412 break;
413 case 'b':
414 c = '\b';
415 break;
416 case 'f':
417 c = '\f';
418 break;
419 case 'n':
420 c = '\n';
421 break;
422 case 'r':
423 c = '\r';
424 break;
425 case 't':
426 c = '\t';
427 break;
428 case 'v':
429 c = '\v';
430 break;
431 case 'u':
432 c = (HexToInt(*(cursor->pos)) << 12) +
433 (HexToInt(*(cursor->pos + 1)) << 8) +
434 (HexToInt(*(cursor->pos + 2)) << 4) + HexToInt(*(cursor->pos + 3));
435 cursor->pos += 4;
436 break;
437 default:
438 return Error::kInvalidEscape;
439 }
440 buffer.Append(c);
441 }
442 *output = buffer.ToString();
443
444 // Validate constructed utf16 string.
445 if (output->Utf8(kStrictUTF8Conversion).empty()) {
446 cursor->pos = string_start;
447 return Error::kUnsupportedEncoding;
448 }
449 return Error::kNoError;
450 }
451
452 template <typename CharType>
BuildValue(Cursor<CharType> * cursor,const CharType * end,int max_depth,std::unique_ptr<JSONValue> * result)453 Error BuildValue(Cursor<CharType>* cursor,
454 const CharType* end,
455 int max_depth,
456 std::unique_ptr<JSONValue>* result) {
457 if (max_depth == 0)
458 return Error::kTooMuchNesting;
459
460 Cursor<CharType> token_start;
461 Token token;
462 Error error = ParseToken(cursor, end, &token, &token_start);
463 if (error != Error::kNoError)
464 return error;
465
466 switch (token) {
467 case kNullToken:
468 *result = JSONValue::Null();
469 break;
470 case kBoolTrue:
471 *result = std::make_unique<JSONBasicValue>(true);
472 break;
473 case kBoolFalse:
474 *result = std::make_unique<JSONBasicValue>(false);
475 break;
476 case kNumber: {
477 bool ok;
478 double value = CharactersToDouble(token_start.pos,
479 cursor->pos - token_start.pos, &ok);
480 if (Decimal::FromDouble(value).IsInfinity())
481 ok = false;
482 if (!ok) {
483 *cursor = token_start;
484 return Error::kSyntaxError;
485 }
486 if (base::IsValueInRangeForNumericType<int>(value) &&
487 static_cast<int>(value) == value)
488 *result = std::make_unique<JSONBasicValue>(static_cast<int>(value));
489 else
490 *result = std::make_unique<JSONBasicValue>(value);
491 break;
492 }
493 case kStringLiteral: {
494 String value;
495 error = DecodeString(&token_start, cursor->pos, &value);
496 if (error != Error::kNoError) {
497 *cursor = token_start;
498 return error;
499 }
500 *result = std::make_unique<JSONString>(value);
501 break;
502 }
503 case kArrayBegin: {
504 auto array = std::make_unique<JSONArray>();
505 Cursor<CharType> before_token = *cursor;
506 error = ParseToken(cursor, end, &token, &token_start);
507 if (error != Error::kNoError)
508 return error;
509 while (token != kArrayEnd) {
510 *cursor = before_token;
511 std::unique_ptr<JSONValue> array_node;
512 error = BuildValue(cursor, end, max_depth - 1, &array_node);
513 if (error != Error::kNoError)
514 return error;
515 array->PushValue(std::move(array_node));
516
517 // After a list value, we expect a comma or the end of the list.
518 error = ParseToken(cursor, end, &token, &token_start);
519 if (error != Error::kNoError)
520 return error;
521 if (token == kListSeparator) {
522 before_token = *cursor;
523 error = ParseToken(cursor, end, &token, &token_start);
524 if (error != Error::kNoError)
525 return error;
526 if (token == kArrayEnd) {
527 *cursor = token_start;
528 return Error::kUnexpectedToken;
529 }
530 } else if (token != kArrayEnd) {
531 // Unexpected value after list value. Bail out.
532 *cursor = token_start;
533 return Error::kUnexpectedToken;
534 }
535 }
536 if (token != kArrayEnd) {
537 *cursor = token_start;
538 return Error::kUnexpectedToken;
539 }
540 *result = std::move(array);
541 break;
542 }
543 case kObjectBegin: {
544 auto object = std::make_unique<JSONObject>();
545 error = ParseToken(cursor, end, &token, &token_start);
546 if (error != Error::kNoError)
547 return error;
548 while (token != kObjectEnd) {
549 if (token != kStringLiteral) {
550 *cursor = token_start;
551 return Error::kUnexpectedToken;
552 }
553 String key;
554 error = DecodeString(&token_start, cursor->pos, &key);
555 if (error != Error::kNoError) {
556 *cursor = token_start;
557 return error;
558 }
559
560 error = ParseToken(cursor, end, &token, &token_start);
561 if (token != kObjectPairSeparator) {
562 *cursor = token_start;
563 return Error::kUnexpectedToken;
564 }
565
566 std::unique_ptr<JSONValue> value;
567 error = BuildValue(cursor, end, max_depth - 1, &value);
568 if (error != Error::kNoError)
569 return error;
570 object->SetValue(key, std::move(value));
571
572 // After a key/value pair, we expect a comma or the end of the
573 // object.
574 error = ParseToken(cursor, end, &token, &token_start);
575 if (error != Error::kNoError)
576 return error;
577 if (token == kListSeparator) {
578 error = ParseToken(cursor, end, &token, &token_start);
579 if (error != Error::kNoError)
580 return error;
581 if (token == kObjectEnd) {
582 *cursor = token_start;
583 return Error::kUnexpectedToken;
584 }
585 } else if (token != kObjectEnd) {
586 // Unexpected value after last object value. Bail out.
587 *cursor = token_start;
588 return Error::kUnexpectedToken;
589 }
590 }
591 if (token != kObjectEnd) {
592 *cursor = token_start;
593 return Error::kUnexpectedToken;
594 }
595 *result = std::move(object);
596 break;
597 }
598
599 default:
600 // We got a token that's not a value.
601 *cursor = token_start;
602 return Error::kUnexpectedToken;
603 }
604
605 return SkipWhitespaceAndComments(cursor, end);
606 }
607
608 template <typename CharType>
ParseJSONInternal(const CharType * start_ptr,unsigned length,int max_depth,std::unique_ptr<JSONValue> * result)609 JSONParseError ParseJSONInternal(const CharType* start_ptr,
610 unsigned length,
611 int max_depth,
612 std::unique_ptr<JSONValue>* result) {
613 Cursor<CharType> cursor;
614 cursor.pos = start_ptr;
615 cursor.line = 0;
616 cursor.line_start = start_ptr;
617 const CharType* end = start_ptr + length;
618 JSONParseError error;
619 error.type = BuildValue(&cursor, end, max_depth, result);
620 error.line = cursor.line;
621 error.column = static_cast<int>(cursor.pos - cursor.line_start);
622 if (error.type != Error::kNoError) {
623 *result = nullptr;
624 } else if (cursor.pos != end) {
625 error.type = Error::kUnexpectedDataAfterRoot;
626 *result = nullptr;
627 }
628 return error;
629 }
630
631 } // anonymous namespace
632
ParseJSON(const String & json,JSONParseError * opt_error)633 std::unique_ptr<JSONValue> ParseJSON(const String& json,
634 JSONParseError* opt_error) {
635 return ParseJSON(json, kMaxStackLimit, opt_error);
636 }
637
ParseJSON(const String & json,int max_depth,JSONParseError * opt_error)638 std::unique_ptr<JSONValue> ParseJSON(const String& json,
639 int max_depth,
640 JSONParseError* opt_error) {
641 if (max_depth < 0)
642 max_depth = 0;
643 if (max_depth > kMaxStackLimit)
644 max_depth = kMaxStackLimit;
645
646 std::unique_ptr<JSONValue> result;
647 JSONParseError error;
648
649 if (json.IsEmpty()) {
650 error.type = Error::kSyntaxError;
651 error.line = 0;
652 error.column = 0;
653 } else if (json.Is8Bit()) {
654 error = ParseJSONInternal(json.Characters8(), json.length(), max_depth,
655 &result);
656 } else {
657 error = ParseJSONInternal(json.Characters16(), json.length(), max_depth,
658 &result);
659 }
660
661 if (opt_error) {
662 error.line++;
663 error.column++;
664 if (error.type != Error::kNoError)
665 error.message = FormatErrorMessage(error.type, error.line, error.column);
666 *opt_error = error;
667 }
668 return result;
669 }
670
671 } // namespace blink
672