1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/blink/renderer/platform/json/json_parser.h"
6 
7 #include "base/numerics/safe_conversions.h"
8 #include "third_party/blink/renderer/platform/json/json_values.h"
9 #include "third_party/blink/renderer/platform/wtf/decimal.h"
10 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
11 #include "third_party/blink/renderer/platform/wtf/text/string_to_number.h"
12 
13 namespace blink {
14 
15 namespace {
16 
17 const int kMaxStackLimit = 1000;
18 
19 using Error = JSONParseErrorType;
20 
FormatErrorMessage(Error error,int line,int column)21 String FormatErrorMessage(Error error, int line, int column) {
22   String text;
23   switch (error) {
24     case Error::kNoError:
25       NOTREACHED();
26       return "";
27     case Error::kUnexpectedToken:
28       text = "Unexpected token.";
29       break;
30     case Error::kSyntaxError:
31       text = "Syntax error.";
32       break;
33     case Error::kInvalidEscape:
34       text = "Invalid escape sequence.";
35       break;
36     case Error::kTooMuchNesting:
37       text = "Too much nesting.";
38       break;
39     case Error::kUnexpectedDataAfterRoot:
40       text = "Unexpected data after root element.";
41       break;
42     case Error::kUnsupportedEncoding:
43       text =
44           "Unsupported encoding. JSON and all string literals must contain "
45           "valid Unicode characters.";
46       break;
47   }
48   return "Line: " + String::Number(line) +
49          ", column: " + String::Number(column) + ", " + text;
50 }
51 
52 // Note: all parsing functions take a |cursor| parameter which is
53 // where they start parsing from.
54 // If the parsing succeeds, |cursor| will point to the position
55 // right after the parsed value, "consuming" some portion of the input.
56 // If the parsing fails, |cursor| will point to the error position.
57 
58 template <typename CharType>
59 struct Cursor {
60   int line;
61   const CharType* line_start;
62   const CharType* pos;
63 };
64 
65 enum Token {
66   kObjectBegin,
67   kObjectEnd,
68   kArrayBegin,
69   kArrayEnd,
70   kStringLiteral,
71   kNumber,
72   kBoolTrue,
73   kBoolFalse,
74   kNullToken,
75   kListSeparator,
76   kObjectPairSeparator,
77 };
78 
79 template <typename CharType>
ParseConstToken(Cursor<CharType> * cursor,const CharType * end,const char * token)80 Error ParseConstToken(Cursor<CharType>* cursor,
81                       const CharType* end,
82                       const char* token) {
83   const CharType* token_start = cursor->pos;
84   while (cursor->pos < end && *token != '\0' && *(cursor->pos++) == *token++) {
85   }
86   if (*token != '\0') {
87     cursor->pos = token_start;
88     return Error::kSyntaxError;
89   }
90   return Error::kNoError;
91 }
92 
93 template <typename CharType>
ReadInt(Cursor<CharType> * cursor,const CharType * end,bool can_have_leading_zeros)94 Error ReadInt(Cursor<CharType>* cursor,
95               const CharType* end,
96               bool can_have_leading_zeros) {
97   if (cursor->pos == end)
98     return Error::kSyntaxError;
99   const CharType* start_ptr = cursor->pos;
100   bool have_leading_zero = '0' == *(cursor->pos);
101   int length = 0;
102   while (cursor->pos < end && '0' <= *(cursor->pos) && *(cursor->pos) <= '9') {
103     ++(cursor->pos);
104     ++length;
105   }
106   if (!length)
107     return Error::kSyntaxError;
108   if (!can_have_leading_zeros && length > 1 && have_leading_zero) {
109     cursor->pos = start_ptr + 1;
110     return Error::kSyntaxError;
111   }
112   return Error::kNoError;
113 }
114 
115 template <typename CharType>
ParseNumberToken(Cursor<CharType> * cursor,const CharType * end)116 Error ParseNumberToken(Cursor<CharType>* cursor, const CharType* end) {
117   // We just grab the number here. We validate the size in DecodeNumber.
118   // According to RFC4627, a valid number is: [minus] int [frac] [exp]
119   if (cursor->pos == end)
120     return Error::kSyntaxError;
121   if (*(cursor->pos) == '-')
122     ++(cursor->pos);
123 
124   Error error = ReadInt(cursor, end, false);
125   if (error != Error::kNoError)
126     return error;
127 
128   if (cursor->pos == end)
129     return Error::kNoError;
130 
131   // Optional fraction part
132   CharType c = *(cursor->pos);
133   if ('.' == c) {
134     ++(cursor->pos);
135     error = ReadInt(cursor, end, true);
136     if (error != Error::kNoError)
137       return error;
138     if (cursor->pos == end)
139       return Error::kNoError;
140     c = *(cursor->pos);
141   }
142 
143   // Optional exponent part
144   if ('e' == c || 'E' == c) {
145     ++(cursor->pos);
146     if (cursor->pos == end)
147       return Error::kSyntaxError;
148     c = *(cursor->pos);
149     if ('-' == c || '+' == c) {
150       ++(cursor->pos);
151       if (cursor->pos == end)
152         return Error::kSyntaxError;
153     }
154     error = ReadInt(cursor, end, true);
155     if (error != Error::kNoError)
156       return error;
157   }
158 
159   return Error::kNoError;
160 }
161 
162 template <typename CharType>
ReadHexDigits(Cursor<CharType> * cursor,const CharType * end,int digits)163 Error ReadHexDigits(Cursor<CharType>* cursor, const CharType* end, int digits) {
164   const CharType* token_start = cursor->pos;
165   if (end - cursor->pos < digits)
166     return Error::kInvalidEscape;
167   for (int i = 0; i < digits; ++i) {
168     CharType c = *(cursor->pos)++;
169     if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
170           ('A' <= c && c <= 'F'))) {
171       cursor->pos = token_start;
172       return Error::kInvalidEscape;
173     }
174   }
175   return Error::kNoError;
176 }
177 
178 template <typename CharType>
ParseStringToken(Cursor<CharType> * cursor,const CharType * end)179 Error ParseStringToken(Cursor<CharType>* cursor, const CharType* end) {
180   if (cursor->pos == end)
181     return Error::kSyntaxError;
182   if (*(cursor->pos) != '"')
183     return Error::kSyntaxError;
184   ++(cursor->pos);
185   while (cursor->pos < end) {
186     CharType c = *(cursor->pos)++;
187     if ('\\' == c) {
188       if (cursor->pos == end)
189         return Error::kInvalidEscape;
190       c = *(cursor->pos)++;
191       // Make sure the escaped char is valid.
192       switch (c) {
193         case 'x': {
194           Error error = ReadHexDigits(cursor, end, 2);
195           if (error != Error::kNoError)
196             return error;
197           break;
198         }
199         case 'u': {
200           Error error = ReadHexDigits(cursor, end, 4);
201           if (error != Error::kNoError)
202             return error;
203           break;
204         }
205         case '\\':
206         case '/':
207         case 'b':
208         case 'f':
209         case 'n':
210         case 'r':
211         case 't':
212         case 'v':
213         case '"':
214           break;
215         default:
216           return Error::kInvalidEscape;
217       }
218     } else if (c < 0x20) {
219       return Error::kSyntaxError;
220     } else if ('"' == c) {
221       return Error::kNoError;
222     }
223   }
224   return Error::kSyntaxError;
225 }
226 
227 template <typename CharType>
SkipComment(Cursor<CharType> * cursor,const CharType * end)228 Error SkipComment(Cursor<CharType>* cursor, const CharType* end) {
229   const CharType* pos = cursor->pos;
230   if (pos == end)
231     return Error::kSyntaxError;
232 
233   if (*pos != '/' || pos + 1 >= end)
234     return Error::kSyntaxError;
235   ++pos;
236 
237   if (*pos == '/') {
238     // Single line comment, read to newline.
239     for (++pos; pos < end; ++pos) {
240       if (*pos == '\n') {
241         cursor->line++;
242         cursor->pos = pos + 1;
243         cursor->line_start = cursor->pos;
244         return Error::kNoError;
245       }
246     }
247     cursor->pos = end;
248     // Comment reaches end-of-input, which is fine.
249     return Error::kNoError;
250   }
251 
252   if (*pos == '*') {
253     CharType previous = '\0';
254     // Block comment, read until end marker.
255     for (++pos; pos < end; previous = *pos++) {
256       if (*pos == '\n') {
257         cursor->line++;
258         cursor->line_start = pos + 1;
259       }
260       if (previous == '*' && *pos == '/') {
261         cursor->pos = pos + 1;
262         return Error::kNoError;
263       }
264     }
265     // Block comment must close before end-of-input.
266     return Error::kSyntaxError;
267   }
268 
269   return Error::kSyntaxError;
270 }
271 
272 template <typename CharType>
SkipWhitespaceAndComments(Cursor<CharType> * cursor,const CharType * end)273 Error SkipWhitespaceAndComments(Cursor<CharType>* cursor, const CharType* end) {
274   while (cursor->pos < end) {
275     CharType c = *(cursor->pos);
276     if (c == '\n') {
277       cursor->line++;
278       ++(cursor->pos);
279       cursor->line_start = cursor->pos;
280     } else if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
281       ++(cursor->pos);
282     } else if (c == '/') {
283       Error error = SkipComment(cursor, end);
284       if (error != Error::kNoError)
285         return error;
286     } else {
287       break;
288     }
289   }
290   return Error::kNoError;
291 }
292 
293 template <typename CharType>
ParseToken(Cursor<CharType> * cursor,const CharType * end,Token * token,Cursor<CharType> * token_start)294 Error ParseToken(Cursor<CharType>* cursor,
295                  const CharType* end,
296                  Token* token,
297                  Cursor<CharType>* token_start) {
298   Error error = SkipWhitespaceAndComments(cursor, end);
299   if (error != Error::kNoError)
300     return error;
301   *token_start = *cursor;
302 
303   if (cursor->pos == end)
304     return Error::kSyntaxError;
305 
306   switch (*(cursor->pos)) {
307     case 'n':
308       *token = kNullToken;
309       return ParseConstToken(cursor, end, kJSONNullString);
310     case 't':
311       *token = kBoolTrue;
312       return ParseConstToken(cursor, end, kJSONTrueString);
313     case 'f':
314       *token = kBoolFalse;
315       return ParseConstToken(cursor, end, kJSONFalseString);
316     case '[':
317       ++(cursor->pos);
318       *token = kArrayBegin;
319       return Error::kNoError;
320     case ']':
321       ++(cursor->pos);
322       *token = kArrayEnd;
323       return Error::kNoError;
324     case ',':
325       ++(cursor->pos);
326       *token = kListSeparator;
327       return Error::kNoError;
328     case '{':
329       ++(cursor->pos);
330       *token = kObjectBegin;
331       return Error::kNoError;
332     case '}':
333       ++(cursor->pos);
334       *token = kObjectEnd;
335       return Error::kNoError;
336     case ':':
337       ++(cursor->pos);
338       *token = kObjectPairSeparator;
339       return Error::kNoError;
340     case '0':
341     case '1':
342     case '2':
343     case '3':
344     case '4':
345     case '5':
346     case '6':
347     case '7':
348     case '8':
349     case '9':
350     case '-':
351       *token = kNumber;
352       return ParseNumberToken(cursor, end);
353     case '"':
354       *token = kStringLiteral;
355       return ParseStringToken(cursor, end);
356   }
357 
358   return Error::kSyntaxError;
359 }
360 
361 template <typename CharType>
HexToInt(CharType c)362 inline int HexToInt(CharType c) {
363   if ('0' <= c && c <= '9')
364     return c - '0';
365   if ('A' <= c && c <= 'F')
366     return c - 'A' + 10;
367   if ('a' <= c && c <= 'f')
368     return c - 'a' + 10;
369   NOTREACHED();
370   return 0;
371 }
372 
373 template <typename CharType>
DecodeString(Cursor<CharType> * cursor,const CharType * end,String * output)374 Error DecodeString(Cursor<CharType>* cursor,
375                    const CharType* end,
376                    String* output) {
377   if (cursor->pos + 1 > end - 1)
378     return Error::kSyntaxError;
379   if (cursor->pos + 1 == end - 1) {
380     *output = "";
381     return Error::kNoError;
382   }
383 
384   const CharType* string_start = cursor->pos;
385   StringBuilder buffer;
386   buffer.ReserveCapacity(static_cast<wtf_size_t>(end - cursor->pos - 2));
387 
388   cursor->pos++;
389   while (cursor->pos < end - 1) {
390     UChar c = *(cursor->pos)++;
391     if (c == '\n') {
392       cursor->line++;
393       cursor->line_start = cursor->pos;
394     }
395     if ('\\' != c) {
396       buffer.Append(c);
397       continue;
398     }
399     if (cursor->pos == end - 1)
400       return Error::kInvalidEscape;
401     c = *(cursor->pos)++;
402 
403     if (c == 'x') {
404       // \x is not supported.
405       return Error::kInvalidEscape;
406     }
407 
408     switch (c) {
409       case '"':
410       case '/':
411       case '\\':
412         break;
413       case 'b':
414         c = '\b';
415         break;
416       case 'f':
417         c = '\f';
418         break;
419       case 'n':
420         c = '\n';
421         break;
422       case 'r':
423         c = '\r';
424         break;
425       case 't':
426         c = '\t';
427         break;
428       case 'v':
429         c = '\v';
430         break;
431       case 'u':
432         c = (HexToInt(*(cursor->pos)) << 12) +
433             (HexToInt(*(cursor->pos + 1)) << 8) +
434             (HexToInt(*(cursor->pos + 2)) << 4) + HexToInt(*(cursor->pos + 3));
435         cursor->pos += 4;
436         break;
437       default:
438         return Error::kInvalidEscape;
439     }
440     buffer.Append(c);
441   }
442   *output = buffer.ToString();
443 
444   // Validate constructed utf16 string.
445   if (output->Utf8(kStrictUTF8Conversion).empty()) {
446     cursor->pos = string_start;
447     return Error::kUnsupportedEncoding;
448   }
449   return Error::kNoError;
450 }
451 
452 template <typename CharType>
BuildValue(Cursor<CharType> * cursor,const CharType * end,int max_depth,std::unique_ptr<JSONValue> * result)453 Error BuildValue(Cursor<CharType>* cursor,
454                  const CharType* end,
455                  int max_depth,
456                  std::unique_ptr<JSONValue>* result) {
457   if (max_depth == 0)
458     return Error::kTooMuchNesting;
459 
460   Cursor<CharType> token_start;
461   Token token;
462   Error error = ParseToken(cursor, end, &token, &token_start);
463   if (error != Error::kNoError)
464     return error;
465 
466   switch (token) {
467     case kNullToken:
468       *result = JSONValue::Null();
469       break;
470     case kBoolTrue:
471       *result = std::make_unique<JSONBasicValue>(true);
472       break;
473     case kBoolFalse:
474       *result = std::make_unique<JSONBasicValue>(false);
475       break;
476     case kNumber: {
477       bool ok;
478       double value = CharactersToDouble(token_start.pos,
479                                         cursor->pos - token_start.pos, &ok);
480       if (Decimal::FromDouble(value).IsInfinity())
481         ok = false;
482       if (!ok) {
483         *cursor = token_start;
484         return Error::kSyntaxError;
485       }
486       if (base::IsValueInRangeForNumericType<int>(value) &&
487           static_cast<int>(value) == value)
488         *result = std::make_unique<JSONBasicValue>(static_cast<int>(value));
489       else
490         *result = std::make_unique<JSONBasicValue>(value);
491       break;
492     }
493     case kStringLiteral: {
494       String value;
495       error = DecodeString(&token_start, cursor->pos, &value);
496       if (error != Error::kNoError) {
497         *cursor = token_start;
498         return error;
499       }
500       *result = std::make_unique<JSONString>(value);
501       break;
502     }
503     case kArrayBegin: {
504       auto array = std::make_unique<JSONArray>();
505       Cursor<CharType> before_token = *cursor;
506       error = ParseToken(cursor, end, &token, &token_start);
507       if (error != Error::kNoError)
508         return error;
509       while (token != kArrayEnd) {
510         *cursor = before_token;
511         std::unique_ptr<JSONValue> array_node;
512         error = BuildValue(cursor, end, max_depth - 1, &array_node);
513         if (error != Error::kNoError)
514           return error;
515         array->PushValue(std::move(array_node));
516 
517         // After a list value, we expect a comma or the end of the list.
518         error = ParseToken(cursor, end, &token, &token_start);
519         if (error != Error::kNoError)
520           return error;
521         if (token == kListSeparator) {
522           before_token = *cursor;
523           error = ParseToken(cursor, end, &token, &token_start);
524           if (error != Error::kNoError)
525             return error;
526           if (token == kArrayEnd) {
527             *cursor = token_start;
528             return Error::kUnexpectedToken;
529           }
530         } else if (token != kArrayEnd) {
531           // Unexpected value after list value. Bail out.
532           *cursor = token_start;
533           return Error::kUnexpectedToken;
534         }
535       }
536       if (token != kArrayEnd) {
537         *cursor = token_start;
538         return Error::kUnexpectedToken;
539       }
540       *result = std::move(array);
541       break;
542     }
543     case kObjectBegin: {
544       auto object = std::make_unique<JSONObject>();
545       error = ParseToken(cursor, end, &token, &token_start);
546       if (error != Error::kNoError)
547         return error;
548       while (token != kObjectEnd) {
549         if (token != kStringLiteral) {
550           *cursor = token_start;
551           return Error::kUnexpectedToken;
552         }
553         String key;
554         error = DecodeString(&token_start, cursor->pos, &key);
555         if (error != Error::kNoError) {
556           *cursor = token_start;
557           return error;
558         }
559 
560         error = ParseToken(cursor, end, &token, &token_start);
561         if (token != kObjectPairSeparator) {
562           *cursor = token_start;
563           return Error::kUnexpectedToken;
564         }
565 
566         std::unique_ptr<JSONValue> value;
567         error = BuildValue(cursor, end, max_depth - 1, &value);
568         if (error != Error::kNoError)
569           return error;
570         object->SetValue(key, std::move(value));
571 
572         // After a key/value pair, we expect a comma or the end of the
573         // object.
574         error = ParseToken(cursor, end, &token, &token_start);
575         if (error != Error::kNoError)
576           return error;
577         if (token == kListSeparator) {
578           error = ParseToken(cursor, end, &token, &token_start);
579           if (error != Error::kNoError)
580             return error;
581           if (token == kObjectEnd) {
582             *cursor = token_start;
583             return Error::kUnexpectedToken;
584           }
585         } else if (token != kObjectEnd) {
586           // Unexpected value after last object value. Bail out.
587           *cursor = token_start;
588           return Error::kUnexpectedToken;
589         }
590       }
591       if (token != kObjectEnd) {
592         *cursor = token_start;
593         return Error::kUnexpectedToken;
594       }
595       *result = std::move(object);
596       break;
597     }
598 
599     default:
600       // We got a token that's not a value.
601       *cursor = token_start;
602       return Error::kUnexpectedToken;
603   }
604 
605   return SkipWhitespaceAndComments(cursor, end);
606 }
607 
608 template <typename CharType>
ParseJSONInternal(const CharType * start_ptr,unsigned length,int max_depth,std::unique_ptr<JSONValue> * result)609 JSONParseError ParseJSONInternal(const CharType* start_ptr,
610                                  unsigned length,
611                                  int max_depth,
612                                  std::unique_ptr<JSONValue>* result) {
613   Cursor<CharType> cursor;
614   cursor.pos = start_ptr;
615   cursor.line = 0;
616   cursor.line_start = start_ptr;
617   const CharType* end = start_ptr + length;
618   JSONParseError error;
619   error.type = BuildValue(&cursor, end, max_depth, result);
620   error.line = cursor.line;
621   error.column = static_cast<int>(cursor.pos - cursor.line_start);
622   if (error.type != Error::kNoError) {
623     *result = nullptr;
624   } else if (cursor.pos != end) {
625     error.type = Error::kUnexpectedDataAfterRoot;
626     *result = nullptr;
627   }
628   return error;
629 }
630 
631 }  // anonymous namespace
632 
ParseJSON(const String & json,JSONParseError * opt_error)633 std::unique_ptr<JSONValue> ParseJSON(const String& json,
634                                      JSONParseError* opt_error) {
635   return ParseJSON(json, kMaxStackLimit, opt_error);
636 }
637 
ParseJSON(const String & json,int max_depth,JSONParseError * opt_error)638 std::unique_ptr<JSONValue> ParseJSON(const String& json,
639                                      int max_depth,
640                                      JSONParseError* opt_error) {
641   if (max_depth < 0)
642     max_depth = 0;
643   if (max_depth > kMaxStackLimit)
644     max_depth = kMaxStackLimit;
645 
646   std::unique_ptr<JSONValue> result;
647   JSONParseError error;
648 
649   if (json.IsEmpty()) {
650     error.type = Error::kSyntaxError;
651     error.line = 0;
652     error.column = 0;
653   } else if (json.Is8Bit()) {
654     error = ParseJSONInternal(json.Characters8(), json.length(), max_depth,
655                               &result);
656   } else {
657     error = ParseJSONInternal(json.Characters16(), json.length(), max_depth,
658                               &result);
659   }
660 
661   if (opt_error) {
662     error.line++;
663     error.column++;
664     if (error.type != Error::kNoError)
665       error.message = FormatErrorMessage(error.type, error.line, error.column);
666     *opt_error = error;
667   }
668   return result;
669 }
670 
671 }  // namespace blink
672