1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "vm/JSONParser.h"
8 
9 #include "mozilla/Range.h"
10 #include "mozilla/RangedPtr.h"
11 #include "mozilla/Sprintf.h"
12 #include "mozilla/TextUtils.h"
13 
14 #include "jsnum.h"
15 
16 #include "builtin/Array.h"
17 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
18 #include "util/StringBuffer.h"
19 #include "vm/PlainObject.h"  // js::NewPlainObjectWithProperties
20 #include "vm/Realm.h"
21 
22 #include "vm/NativeObject-inl.h"
23 
24 using namespace js;
25 
26 using mozilla::AsciiAlphanumericToNumber;
27 using mozilla::IsAsciiDigit;
28 using mozilla::IsAsciiHexDigit;
29 using mozilla::RangedPtr;
30 
~JSONParserBase()31 JSONParserBase::~JSONParserBase() {
32   for (size_t i = 0; i < stack.length(); i++) {
33     if (stack[i].state == FinishArrayElement) {
34       js_delete(&stack[i].elements());
35     } else {
36       js_delete(&stack[i].properties());
37     }
38   }
39 
40   for (size_t i = 0; i < freeElements.length(); i++) {
41     js_delete(freeElements[i]);
42   }
43 
44   for (size_t i = 0; i < freeProperties.length(); i++) {
45     js_delete(freeProperties[i]);
46   }
47 }
48 
trace(JSTracer * trc)49 void JSONParserBase::trace(JSTracer* trc) {
50   for (auto& elem : stack) {
51     if (elem.state == FinishArrayElement) {
52       elem.elements().trace(trc);
53     } else {
54       elem.properties().trace(trc);
55     }
56   }
57 }
58 
59 template <typename CharT>
getTextPosition(uint32_t * column,uint32_t * line)60 void JSONParser<CharT>::getTextPosition(uint32_t* column, uint32_t* line) {
61   CharPtr ptr = begin;
62   uint32_t col = 1;
63   uint32_t row = 1;
64   for (; ptr < current; ptr++) {
65     if (*ptr == '\n' || *ptr == '\r') {
66       ++row;
67       col = 1;
68       // \r\n is treated as a single newline.
69       if (ptr + 1 < current && *ptr == '\r' && *(ptr + 1) == '\n') {
70         ++ptr;
71       }
72     } else {
73       ++col;
74     }
75   }
76   *column = col;
77   *line = row;
78 }
79 
80 template <typename CharT>
error(const char * msg)81 void JSONParser<CharT>::error(const char* msg) {
82   if (parseType == ParseType::JSONParse) {
83     uint32_t column = 1, line = 1;
84     getTextPosition(&column, &line);
85 
86     const size_t MaxWidth = sizeof("4294967295");
87     char columnNumber[MaxWidth];
88     SprintfLiteral(columnNumber, "%" PRIu32, column);
89     char lineNumber[MaxWidth];
90     SprintfLiteral(lineNumber, "%" PRIu32, line);
91 
92     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
93                               JSMSG_JSON_BAD_PARSE, msg, lineNumber,
94                               columnNumber);
95   }
96 }
97 
errorReturn()98 bool JSONParserBase::errorReturn() {
99   return parseType == ParseType::AttemptForEval;
100 }
101 
102 template <typename CharT>
103 template <JSONParserBase::StringType ST>
readString()104 JSONParserBase::Token JSONParser<CharT>::readString() {
105   MOZ_ASSERT(current < end);
106   MOZ_ASSERT(*current == '"');
107 
108   /*
109    * JSONString:
110    *   /^"([^\u0000-\u001F"\\]|\\(["/\\bfnrt]|u[0-9a-fA-F]{4}))*"$/
111    */
112 
113   if (++current == end) {
114     error("unterminated string literal");
115     return token(Error);
116   }
117 
118   /*
119    * Optimization: if the source contains no escaped characters, create the
120    * string directly from the source text.
121    */
122   CharPtr start = current;
123   for (; current < end; current++) {
124     if (*current == '"') {
125       size_t length = current - start;
126       current++;
127       JSLinearString* str =
128           (ST == JSONParser::PropertyName)
129               ? AtomizeChars(cx, start.get(), length)
130               : NewStringCopyN<CanGC>(cx, start.get(), length);
131       if (!str) {
132         return token(OOM);
133       }
134       return stringToken(str);
135     }
136 
137     if (*current == '\\') {
138       break;
139     }
140 
141     if (*current <= 0x001F) {
142       error("bad control character in string literal");
143       return token(Error);
144     }
145   }
146 
147   /*
148    * Slow case: string contains escaped characters.  Copy a maximal sequence
149    * of unescaped characters into a temporary buffer, then an escaped
150    * character, and repeat until the entire string is consumed.
151    */
152   JSStringBuilder buffer(cx);
153   do {
154     if (start < current && !buffer.append(start.get(), current.get())) {
155       return token(OOM);
156     }
157 
158     if (current >= end) {
159       break;
160     }
161 
162     char16_t c = *current++;
163     if (c == '"') {
164       JSLinearString* str = (ST == JSONParser::PropertyName)
165                                 ? buffer.finishAtom()
166                                 : buffer.finishString();
167       if (!str) {
168         return token(OOM);
169       }
170       return stringToken(str);
171     }
172 
173     if (c != '\\') {
174       --current;
175       error("bad character in string literal");
176       return token(Error);
177     }
178 
179     if (current >= end) {
180       break;
181     }
182 
183     switch (*current++) {
184       case '"':
185         c = '"';
186         break;
187       case '/':
188         c = '/';
189         break;
190       case '\\':
191         c = '\\';
192         break;
193       case 'b':
194         c = '\b';
195         break;
196       case 'f':
197         c = '\f';
198         break;
199       case 'n':
200         c = '\n';
201         break;
202       case 'r':
203         c = '\r';
204         break;
205       case 't':
206         c = '\t';
207         break;
208 
209       case 'u':
210         if (end - current < 4 ||
211             !(IsAsciiHexDigit(current[0]) && IsAsciiHexDigit(current[1]) &&
212               IsAsciiHexDigit(current[2]) && IsAsciiHexDigit(current[3]))) {
213           // Point to the first non-hexadecimal character (which may be
214           // missing).
215           if (current == end || !IsAsciiHexDigit(current[0])) {
216             ;  // already at correct location
217           } else if (current + 1 == end || !IsAsciiHexDigit(current[1])) {
218             current += 1;
219           } else if (current + 2 == end || !IsAsciiHexDigit(current[2])) {
220             current += 2;
221           } else if (current + 3 == end || !IsAsciiHexDigit(current[3])) {
222             current += 3;
223           } else {
224             MOZ_CRASH("logic error determining first erroneous character");
225           }
226 
227           error("bad Unicode escape");
228           return token(Error);
229         }
230         c = (AsciiAlphanumericToNumber(current[0]) << 12) |
231             (AsciiAlphanumericToNumber(current[1]) << 8) |
232             (AsciiAlphanumericToNumber(current[2]) << 4) |
233             (AsciiAlphanumericToNumber(current[3]));
234         current += 4;
235         break;
236 
237       default:
238         current--;
239         error("bad escaped character");
240         return token(Error);
241     }
242     if (!buffer.append(c)) {
243       return token(OOM);
244     }
245 
246     start = current;
247     for (; current < end; current++) {
248       if (*current == '"' || *current == '\\' || *current <= 0x001F) {
249         break;
250       }
251     }
252   } while (current < end);
253 
254   error("unterminated string");
255   return token(Error);
256 }
257 
258 template <typename CharT>
readNumber()259 JSONParserBase::Token JSONParser<CharT>::readNumber() {
260   MOZ_ASSERT(current < end);
261   MOZ_ASSERT(IsAsciiDigit(*current) || *current == '-');
262 
263   /*
264    * JSONNumber:
265    *   /^-?(0|[1-9][0-9]+)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?$/
266    */
267 
268   bool negative = *current == '-';
269 
270   /* -? */
271   if (negative && ++current == end) {
272     error("no number after minus sign");
273     return token(Error);
274   }
275 
276   const CharPtr digitStart = current;
277 
278   /* 0|[1-9][0-9]+ */
279   if (!IsAsciiDigit(*current)) {
280     error("unexpected non-digit");
281     return token(Error);
282   }
283   if (*current++ != '0') {
284     for (; current < end; current++) {
285       if (!IsAsciiDigit(*current)) {
286         break;
287       }
288     }
289   }
290 
291   /* Fast path: no fractional or exponent part. */
292   if (current == end ||
293       (*current != '.' && *current != 'e' && *current != 'E')) {
294     mozilla::Range<const CharT> chars(digitStart.get(), current - digitStart);
295     if (chars.length() < strlen("9007199254740992")) {
296       // If the decimal number is shorter than the length of 2**53, (the
297       // largest number a double can represent with integral precision),
298       // parse it using a decimal-only parser.  This comparison is
299       // conservative but faster than a fully-precise check.
300       double d = ParseDecimalNumber(chars);
301       return numberToken(negative ? -d : d);
302     }
303 
304     double d;
305     if (!GetFullInteger(cx, digitStart.get(), current.get(), 10,
306                         IntegerSeparatorHandling::None, &d)) {
307       return token(OOM);
308     }
309     return numberToken(negative ? -d : d);
310   }
311 
312   /* (\.[0-9]+)? */
313   if (current < end && *current == '.') {
314     if (++current == end) {
315       error("missing digits after decimal point");
316       return token(Error);
317     }
318     if (!IsAsciiDigit(*current)) {
319       error("unterminated fractional number");
320       return token(Error);
321     }
322     while (++current < end) {
323       if (!IsAsciiDigit(*current)) {
324         break;
325       }
326     }
327   }
328 
329   /* ([eE][\+\-]?[0-9]+)? */
330   if (current < end && (*current == 'e' || *current == 'E')) {
331     if (++current == end) {
332       error("missing digits after exponent indicator");
333       return token(Error);
334     }
335     if (*current == '+' || *current == '-') {
336       if (++current == end) {
337         error("missing digits after exponent sign");
338         return token(Error);
339       }
340     }
341     if (!IsAsciiDigit(*current)) {
342       error("exponent part is missing a number");
343       return token(Error);
344     }
345     while (++current < end) {
346       if (!IsAsciiDigit(*current)) {
347         break;
348       }
349     }
350   }
351 
352   double d;
353   if (!FullStringToDouble(cx, digitStart.get(), current.get(), &d)) {
354     return token(OOM);
355   }
356   return numberToken(negative ? -d : d);
357 }
358 
IsJSONWhitespace(char16_t c)359 static inline bool IsJSONWhitespace(char16_t c) {
360   return c == '\t' || c == '\r' || c == '\n' || c == ' ';
361 }
362 
363 template <typename CharT>
advance()364 JSONParserBase::Token JSONParser<CharT>::advance() {
365   while (current < end && IsJSONWhitespace(*current)) {
366     current++;
367   }
368   if (current >= end) {
369     error("unexpected end of data");
370     return token(Error);
371   }
372 
373   switch (*current) {
374     case '"':
375       return readString<LiteralValue>();
376 
377     case '-':
378     case '0':
379     case '1':
380     case '2':
381     case '3':
382     case '4':
383     case '5':
384     case '6':
385     case '7':
386     case '8':
387     case '9':
388       return readNumber();
389 
390     case 't':
391       if (end - current < 4 || current[1] != 'r' || current[2] != 'u' ||
392           current[3] != 'e') {
393         error("unexpected keyword");
394         return token(Error);
395       }
396       current += 4;
397       return token(True);
398 
399     case 'f':
400       if (end - current < 5 || current[1] != 'a' || current[2] != 'l' ||
401           current[3] != 's' || current[4] != 'e') {
402         error("unexpected keyword");
403         return token(Error);
404       }
405       current += 5;
406       return token(False);
407 
408     case 'n':
409       if (end - current < 4 || current[1] != 'u' || current[2] != 'l' ||
410           current[3] != 'l') {
411         error("unexpected keyword");
412         return token(Error);
413       }
414       current += 4;
415       return token(Null);
416 
417     case '[':
418       current++;
419       return token(ArrayOpen);
420     case ']':
421       current++;
422       return token(ArrayClose);
423 
424     case '{':
425       current++;
426       return token(ObjectOpen);
427     case '}':
428       current++;
429       return token(ObjectClose);
430 
431     case ',':
432       current++;
433       return token(Comma);
434 
435     case ':':
436       current++;
437       return token(Colon);
438 
439     default:
440       error("unexpected character");
441       return token(Error);
442   }
443 }
444 
445 template <typename CharT>
advanceAfterObjectOpen()446 JSONParserBase::Token JSONParser<CharT>::advanceAfterObjectOpen() {
447   MOZ_ASSERT(current[-1] == '{');
448 
449   while (current < end && IsJSONWhitespace(*current)) {
450     current++;
451   }
452   if (current >= end) {
453     error("end of data while reading object contents");
454     return token(Error);
455   }
456 
457   if (*current == '"') {
458     return readString<PropertyName>();
459   }
460 
461   if (*current == '}') {
462     current++;
463     return token(ObjectClose);
464   }
465 
466   error("expected property name or '}'");
467   return token(Error);
468 }
469 
470 template <typename CharT>
AssertPastValue(const RangedPtr<const CharT> current)471 static inline void AssertPastValue(const RangedPtr<const CharT> current) {
472   /*
473    * We're past an arbitrary JSON value, so the previous character is
474    * *somewhat* constrained, even if this assertion is pretty broad.  Don't
475    * knock it till you tried it: this assertion *did* catch a bug once.
476    */
477   MOZ_ASSERT((current[-1] == 'l' && current[-2] == 'l' && current[-3] == 'u' &&
478               current[-4] == 'n') ||
479              (current[-1] == 'e' && current[-2] == 'u' && current[-3] == 'r' &&
480               current[-4] == 't') ||
481              (current[-1] == 'e' && current[-2] == 's' && current[-3] == 'l' &&
482               current[-4] == 'a' && current[-5] == 'f') ||
483              current[-1] == '}' || current[-1] == ']' || current[-1] == '"' ||
484              IsAsciiDigit(current[-1]));
485 }
486 
487 template <typename CharT>
advanceAfterArrayElement()488 JSONParserBase::Token JSONParser<CharT>::advanceAfterArrayElement() {
489   AssertPastValue(current);
490 
491   while (current < end && IsJSONWhitespace(*current)) {
492     current++;
493   }
494   if (current >= end) {
495     error("end of data when ',' or ']' was expected");
496     return token(Error);
497   }
498 
499   if (*current == ',') {
500     current++;
501     return token(Comma);
502   }
503 
504   if (*current == ']') {
505     current++;
506     return token(ArrayClose);
507   }
508 
509   error("expected ',' or ']' after array element");
510   return token(Error);
511 }
512 
513 template <typename CharT>
advancePropertyName()514 JSONParserBase::Token JSONParser<CharT>::advancePropertyName() {
515   MOZ_ASSERT(current[-1] == ',');
516 
517   while (current < end && IsJSONWhitespace(*current)) {
518     current++;
519   }
520   if (current >= end) {
521     error("end of data when property name was expected");
522     return token(Error);
523   }
524 
525   if (*current == '"') {
526     return readString<PropertyName>();
527   }
528 
529   error("expected double-quoted property name");
530   return token(Error);
531 }
532 
533 template <typename CharT>
advancePropertyColon()534 JSONParserBase::Token JSONParser<CharT>::advancePropertyColon() {
535   MOZ_ASSERT(current[-1] == '"');
536 
537   while (current < end && IsJSONWhitespace(*current)) {
538     current++;
539   }
540   if (current >= end) {
541     error("end of data after property name when ':' was expected");
542     return token(Error);
543   }
544 
545   if (*current == ':') {
546     current++;
547     return token(Colon);
548   }
549 
550   error("expected ':' after property name in object");
551   return token(Error);
552 }
553 
554 template <typename CharT>
advanceAfterProperty()555 JSONParserBase::Token JSONParser<CharT>::advanceAfterProperty() {
556   AssertPastValue(current);
557 
558   while (current < end && IsJSONWhitespace(*current)) {
559     current++;
560   }
561   if (current >= end) {
562     error("end of data after property value in object");
563     return token(Error);
564   }
565 
566   if (*current == ',') {
567     current++;
568     return token(Comma);
569   }
570 
571   if (*current == '}') {
572     current++;
573     return token(ObjectClose);
574   }
575 
576   error("expected ',' or '}' after property value in object");
577   return token(Error);
578 }
579 
finishObject(MutableHandleValue vp,PropertyVector & properties)580 inline bool JSONParserBase::finishObject(MutableHandleValue vp,
581                                          PropertyVector& properties) {
582   MOZ_ASSERT(&properties == &stack.back().properties());
583 
584   JSObject* obj = NewPlainObjectWithProperties(
585       cx, properties.begin(), properties.length(), GenericObject);
586   if (!obj) {
587     return false;
588   }
589 
590   vp.setObject(*obj);
591   if (!freeProperties.append(&properties)) {
592     return false;
593   }
594   stack.popBack();
595   return true;
596 }
597 
finishArray(MutableHandleValue vp,ElementVector & elements)598 inline bool JSONParserBase::finishArray(MutableHandleValue vp,
599                                         ElementVector& elements) {
600   MOZ_ASSERT(&elements == &stack.back().elements());
601 
602   ArrayObject* obj =
603       NewDenseCopiedArray(cx, elements.length(), elements.begin());
604   if (!obj) {
605     return false;
606   }
607 
608   vp.setObject(*obj);
609   if (!freeElements.append(&elements)) {
610     return false;
611   }
612   stack.popBack();
613   return true;
614 }
615 
616 template <typename CharT>
parse(MutableHandleValue vp)617 bool JSONParser<CharT>::parse(MutableHandleValue vp) {
618   RootedValue value(cx);
619   MOZ_ASSERT(stack.empty());
620 
621   vp.setUndefined();
622 
623   Token token;
624   ParserState state = JSONValue;
625   while (true) {
626     switch (state) {
627       case FinishObjectMember: {
628         PropertyVector& properties = stack.back().properties();
629         properties.back().value = value;
630 
631         token = advanceAfterProperty();
632         if (token == ObjectClose) {
633           if (!finishObject(&value, properties)) {
634             return false;
635           }
636           break;
637         }
638         if (token != Comma) {
639           if (token == OOM) {
640             return false;
641           }
642           if (token != Error) {
643             error(
644                 "expected ',' or '}' after property-value pair in object "
645                 "literal");
646           }
647           return errorReturn();
648         }
649         token = advancePropertyName();
650         /* FALL THROUGH */
651       }
652 
653       JSONMember:
654         if (token == String) {
655           jsid id = AtomToId(atomValue());
656           if (parseType == ParseType::AttemptForEval) {
657             // In |JSON.parse|, "__proto__" is a property like any other and may
658             // appear multiple times. In object literal syntax, "__proto__" is
659             // prototype mutation and can appear at most once. |JSONParser| only
660             // supports the former semantics, so if this parse attempt is for
661             // |eval|, return true (without reporting an error) to indicate the
662             // JSON parse attempt was unsuccessful.
663             if (id == NameToId(cx->names().proto)) {
664               return true;
665             }
666           }
667           PropertyVector& properties = stack.back().properties();
668           if (!properties.emplaceBack(id)) {
669             return false;
670           }
671           token = advancePropertyColon();
672           if (token != Colon) {
673             MOZ_ASSERT(token == Error);
674             return errorReturn();
675           }
676           goto JSONValue;
677         }
678         if (token == OOM) {
679           return false;
680         }
681         if (token != Error) {
682           error("property names must be double-quoted strings");
683         }
684         return errorReturn();
685 
686       case FinishArrayElement: {
687         ElementVector& elements = stack.back().elements();
688         if (!elements.append(value.get())) {
689           return false;
690         }
691         token = advanceAfterArrayElement();
692         if (token == Comma) {
693           goto JSONValue;
694         }
695         if (token == ArrayClose) {
696           if (!finishArray(&value, elements)) {
697             return false;
698           }
699           break;
700         }
701         MOZ_ASSERT(token == Error);
702         return errorReturn();
703       }
704 
705       JSONValue:
706       case JSONValue:
707         token = advance();
708       JSONValueSwitch:
709         switch (token) {
710           case String:
711             value = stringValue();
712             break;
713           case Number:
714             value = numberValue();
715             break;
716           case True:
717             value = BooleanValue(true);
718             break;
719           case False:
720             value = BooleanValue(false);
721             break;
722           case Null:
723             value = NullValue();
724             break;
725 
726           case ArrayOpen: {
727             ElementVector* elements;
728             if (!freeElements.empty()) {
729               elements = freeElements.popCopy();
730               elements->clear();
731             } else {
732               elements = cx->new_<ElementVector>(cx);
733               if (!elements) {
734                 return false;
735               }
736             }
737             if (!stack.append(elements)) {
738               js_delete(elements);
739               return false;
740             }
741 
742             token = advance();
743             if (token == ArrayClose) {
744               if (!finishArray(&value, *elements)) {
745                 return false;
746               }
747               break;
748             }
749             goto JSONValueSwitch;
750           }
751 
752           case ObjectOpen: {
753             PropertyVector* properties;
754             if (!freeProperties.empty()) {
755               properties = freeProperties.popCopy();
756               properties->clear();
757             } else {
758               properties = cx->new_<PropertyVector>(cx);
759               if (!properties) {
760                 return false;
761               }
762             }
763             if (!stack.append(properties)) {
764               js_delete(properties);
765               return false;
766             }
767 
768             token = advanceAfterObjectOpen();
769             if (token == ObjectClose) {
770               if (!finishObject(&value, *properties)) {
771                 return false;
772               }
773               break;
774             }
775             goto JSONMember;
776           }
777 
778           case ArrayClose:
779           case ObjectClose:
780           case Colon:
781           case Comma:
782             // Move the current pointer backwards so that the position
783             // reported in the error message is correct.
784             --current;
785             error("unexpected character");
786             return errorReturn();
787 
788           case OOM:
789             return false;
790 
791           case Error:
792             return errorReturn();
793         }
794         break;
795     }
796 
797     if (stack.empty()) {
798       break;
799     }
800     state = stack.back().state;
801   }
802 
803   for (; current < end; current++) {
804     if (!IsJSONWhitespace(*current)) {
805       error("unexpected non-whitespace character after JSON data");
806       return errorReturn();
807     }
808   }
809 
810   MOZ_ASSERT(end == current);
811   MOZ_ASSERT(stack.empty());
812 
813   vp.set(value);
814   return true;
815 }
816 
817 template class js::JSONParser<Latin1Char>;
818 template class js::JSONParser<char16_t>;
819