1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "vm/JSONParser.h"
8
9 #include "mozilla/Range.h"
10 #include "mozilla/RangedPtr.h"
11 #include "mozilla/Sprintf.h"
12 #include "mozilla/TextUtils.h"
13
14 #include "jsnum.h"
15
16 #include "builtin/Array.h"
17 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
18 #include "util/StringBuffer.h"
19 #include "vm/PlainObject.h" // js::NewPlainObjectWithProperties
20 #include "vm/Realm.h"
21
22 #include "vm/NativeObject-inl.h"
23
24 using namespace js;
25
26 using mozilla::AsciiAlphanumericToNumber;
27 using mozilla::IsAsciiDigit;
28 using mozilla::IsAsciiHexDigit;
29 using mozilla::RangedPtr;
30
~JSONParserBase()31 JSONParserBase::~JSONParserBase() {
32 for (size_t i = 0; i < stack.length(); i++) {
33 if (stack[i].state == FinishArrayElement) {
34 js_delete(&stack[i].elements());
35 } else {
36 js_delete(&stack[i].properties());
37 }
38 }
39
40 for (size_t i = 0; i < freeElements.length(); i++) {
41 js_delete(freeElements[i]);
42 }
43
44 for (size_t i = 0; i < freeProperties.length(); i++) {
45 js_delete(freeProperties[i]);
46 }
47 }
48
trace(JSTracer * trc)49 void JSONParserBase::trace(JSTracer* trc) {
50 for (auto& elem : stack) {
51 if (elem.state == FinishArrayElement) {
52 elem.elements().trace(trc);
53 } else {
54 elem.properties().trace(trc);
55 }
56 }
57 }
58
59 template <typename CharT>
getTextPosition(uint32_t * column,uint32_t * line)60 void JSONParser<CharT>::getTextPosition(uint32_t* column, uint32_t* line) {
61 CharPtr ptr = begin;
62 uint32_t col = 1;
63 uint32_t row = 1;
64 for (; ptr < current; ptr++) {
65 if (*ptr == '\n' || *ptr == '\r') {
66 ++row;
67 col = 1;
68 // \r\n is treated as a single newline.
69 if (ptr + 1 < current && *ptr == '\r' && *(ptr + 1) == '\n') {
70 ++ptr;
71 }
72 } else {
73 ++col;
74 }
75 }
76 *column = col;
77 *line = row;
78 }
79
80 template <typename CharT>
error(const char * msg)81 void JSONParser<CharT>::error(const char* msg) {
82 if (parseType == ParseType::JSONParse) {
83 uint32_t column = 1, line = 1;
84 getTextPosition(&column, &line);
85
86 const size_t MaxWidth = sizeof("4294967295");
87 char columnNumber[MaxWidth];
88 SprintfLiteral(columnNumber, "%" PRIu32, column);
89 char lineNumber[MaxWidth];
90 SprintfLiteral(lineNumber, "%" PRIu32, line);
91
92 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
93 JSMSG_JSON_BAD_PARSE, msg, lineNumber,
94 columnNumber);
95 }
96 }
97
errorReturn()98 bool JSONParserBase::errorReturn() {
99 return parseType == ParseType::AttemptForEval;
100 }
101
102 template <typename CharT>
103 template <JSONParserBase::StringType ST>
readString()104 JSONParserBase::Token JSONParser<CharT>::readString() {
105 MOZ_ASSERT(current < end);
106 MOZ_ASSERT(*current == '"');
107
108 /*
109 * JSONString:
110 * /^"([^\u0000-\u001F"\\]|\\(["/\\bfnrt]|u[0-9a-fA-F]{4}))*"$/
111 */
112
113 if (++current == end) {
114 error("unterminated string literal");
115 return token(Error);
116 }
117
118 /*
119 * Optimization: if the source contains no escaped characters, create the
120 * string directly from the source text.
121 */
122 CharPtr start = current;
123 for (; current < end; current++) {
124 if (*current == '"') {
125 size_t length = current - start;
126 current++;
127 JSLinearString* str =
128 (ST == JSONParser::PropertyName)
129 ? AtomizeChars(cx, start.get(), length)
130 : NewStringCopyN<CanGC>(cx, start.get(), length);
131 if (!str) {
132 return token(OOM);
133 }
134 return stringToken(str);
135 }
136
137 if (*current == '\\') {
138 break;
139 }
140
141 if (*current <= 0x001F) {
142 error("bad control character in string literal");
143 return token(Error);
144 }
145 }
146
147 /*
148 * Slow case: string contains escaped characters. Copy a maximal sequence
149 * of unescaped characters into a temporary buffer, then an escaped
150 * character, and repeat until the entire string is consumed.
151 */
152 JSStringBuilder buffer(cx);
153 do {
154 if (start < current && !buffer.append(start.get(), current.get())) {
155 return token(OOM);
156 }
157
158 if (current >= end) {
159 break;
160 }
161
162 char16_t c = *current++;
163 if (c == '"') {
164 JSLinearString* str = (ST == JSONParser::PropertyName)
165 ? buffer.finishAtom()
166 : buffer.finishString();
167 if (!str) {
168 return token(OOM);
169 }
170 return stringToken(str);
171 }
172
173 if (c != '\\') {
174 --current;
175 error("bad character in string literal");
176 return token(Error);
177 }
178
179 if (current >= end) {
180 break;
181 }
182
183 switch (*current++) {
184 case '"':
185 c = '"';
186 break;
187 case '/':
188 c = '/';
189 break;
190 case '\\':
191 c = '\\';
192 break;
193 case 'b':
194 c = '\b';
195 break;
196 case 'f':
197 c = '\f';
198 break;
199 case 'n':
200 c = '\n';
201 break;
202 case 'r':
203 c = '\r';
204 break;
205 case 't':
206 c = '\t';
207 break;
208
209 case 'u':
210 if (end - current < 4 ||
211 !(IsAsciiHexDigit(current[0]) && IsAsciiHexDigit(current[1]) &&
212 IsAsciiHexDigit(current[2]) && IsAsciiHexDigit(current[3]))) {
213 // Point to the first non-hexadecimal character (which may be
214 // missing).
215 if (current == end || !IsAsciiHexDigit(current[0])) {
216 ; // already at correct location
217 } else if (current + 1 == end || !IsAsciiHexDigit(current[1])) {
218 current += 1;
219 } else if (current + 2 == end || !IsAsciiHexDigit(current[2])) {
220 current += 2;
221 } else if (current + 3 == end || !IsAsciiHexDigit(current[3])) {
222 current += 3;
223 } else {
224 MOZ_CRASH("logic error determining first erroneous character");
225 }
226
227 error("bad Unicode escape");
228 return token(Error);
229 }
230 c = (AsciiAlphanumericToNumber(current[0]) << 12) |
231 (AsciiAlphanumericToNumber(current[1]) << 8) |
232 (AsciiAlphanumericToNumber(current[2]) << 4) |
233 (AsciiAlphanumericToNumber(current[3]));
234 current += 4;
235 break;
236
237 default:
238 current--;
239 error("bad escaped character");
240 return token(Error);
241 }
242 if (!buffer.append(c)) {
243 return token(OOM);
244 }
245
246 start = current;
247 for (; current < end; current++) {
248 if (*current == '"' || *current == '\\' || *current <= 0x001F) {
249 break;
250 }
251 }
252 } while (current < end);
253
254 error("unterminated string");
255 return token(Error);
256 }
257
258 template <typename CharT>
readNumber()259 JSONParserBase::Token JSONParser<CharT>::readNumber() {
260 MOZ_ASSERT(current < end);
261 MOZ_ASSERT(IsAsciiDigit(*current) || *current == '-');
262
263 /*
264 * JSONNumber:
265 * /^-?(0|[1-9][0-9]+)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?$/
266 */
267
268 bool negative = *current == '-';
269
270 /* -? */
271 if (negative && ++current == end) {
272 error("no number after minus sign");
273 return token(Error);
274 }
275
276 const CharPtr digitStart = current;
277
278 /* 0|[1-9][0-9]+ */
279 if (!IsAsciiDigit(*current)) {
280 error("unexpected non-digit");
281 return token(Error);
282 }
283 if (*current++ != '0') {
284 for (; current < end; current++) {
285 if (!IsAsciiDigit(*current)) {
286 break;
287 }
288 }
289 }
290
291 /* Fast path: no fractional or exponent part. */
292 if (current == end ||
293 (*current != '.' && *current != 'e' && *current != 'E')) {
294 mozilla::Range<const CharT> chars(digitStart.get(), current - digitStart);
295 if (chars.length() < strlen("9007199254740992")) {
296 // If the decimal number is shorter than the length of 2**53, (the
297 // largest number a double can represent with integral precision),
298 // parse it using a decimal-only parser. This comparison is
299 // conservative but faster than a fully-precise check.
300 double d = ParseDecimalNumber(chars);
301 return numberToken(negative ? -d : d);
302 }
303
304 double d;
305 if (!GetFullInteger(cx, digitStart.get(), current.get(), 10,
306 IntegerSeparatorHandling::None, &d)) {
307 return token(OOM);
308 }
309 return numberToken(negative ? -d : d);
310 }
311
312 /* (\.[0-9]+)? */
313 if (current < end && *current == '.') {
314 if (++current == end) {
315 error("missing digits after decimal point");
316 return token(Error);
317 }
318 if (!IsAsciiDigit(*current)) {
319 error("unterminated fractional number");
320 return token(Error);
321 }
322 while (++current < end) {
323 if (!IsAsciiDigit(*current)) {
324 break;
325 }
326 }
327 }
328
329 /* ([eE][\+\-]?[0-9]+)? */
330 if (current < end && (*current == 'e' || *current == 'E')) {
331 if (++current == end) {
332 error("missing digits after exponent indicator");
333 return token(Error);
334 }
335 if (*current == '+' || *current == '-') {
336 if (++current == end) {
337 error("missing digits after exponent sign");
338 return token(Error);
339 }
340 }
341 if (!IsAsciiDigit(*current)) {
342 error("exponent part is missing a number");
343 return token(Error);
344 }
345 while (++current < end) {
346 if (!IsAsciiDigit(*current)) {
347 break;
348 }
349 }
350 }
351
352 double d;
353 if (!FullStringToDouble(cx, digitStart.get(), current.get(), &d)) {
354 return token(OOM);
355 }
356 return numberToken(negative ? -d : d);
357 }
358
IsJSONWhitespace(char16_t c)359 static inline bool IsJSONWhitespace(char16_t c) {
360 return c == '\t' || c == '\r' || c == '\n' || c == ' ';
361 }
362
363 template <typename CharT>
advance()364 JSONParserBase::Token JSONParser<CharT>::advance() {
365 while (current < end && IsJSONWhitespace(*current)) {
366 current++;
367 }
368 if (current >= end) {
369 error("unexpected end of data");
370 return token(Error);
371 }
372
373 switch (*current) {
374 case '"':
375 return readString<LiteralValue>();
376
377 case '-':
378 case '0':
379 case '1':
380 case '2':
381 case '3':
382 case '4':
383 case '5':
384 case '6':
385 case '7':
386 case '8':
387 case '9':
388 return readNumber();
389
390 case 't':
391 if (end - current < 4 || current[1] != 'r' || current[2] != 'u' ||
392 current[3] != 'e') {
393 error("unexpected keyword");
394 return token(Error);
395 }
396 current += 4;
397 return token(True);
398
399 case 'f':
400 if (end - current < 5 || current[1] != 'a' || current[2] != 'l' ||
401 current[3] != 's' || current[4] != 'e') {
402 error("unexpected keyword");
403 return token(Error);
404 }
405 current += 5;
406 return token(False);
407
408 case 'n':
409 if (end - current < 4 || current[1] != 'u' || current[2] != 'l' ||
410 current[3] != 'l') {
411 error("unexpected keyword");
412 return token(Error);
413 }
414 current += 4;
415 return token(Null);
416
417 case '[':
418 current++;
419 return token(ArrayOpen);
420 case ']':
421 current++;
422 return token(ArrayClose);
423
424 case '{':
425 current++;
426 return token(ObjectOpen);
427 case '}':
428 current++;
429 return token(ObjectClose);
430
431 case ',':
432 current++;
433 return token(Comma);
434
435 case ':':
436 current++;
437 return token(Colon);
438
439 default:
440 error("unexpected character");
441 return token(Error);
442 }
443 }
444
445 template <typename CharT>
advanceAfterObjectOpen()446 JSONParserBase::Token JSONParser<CharT>::advanceAfterObjectOpen() {
447 MOZ_ASSERT(current[-1] == '{');
448
449 while (current < end && IsJSONWhitespace(*current)) {
450 current++;
451 }
452 if (current >= end) {
453 error("end of data while reading object contents");
454 return token(Error);
455 }
456
457 if (*current == '"') {
458 return readString<PropertyName>();
459 }
460
461 if (*current == '}') {
462 current++;
463 return token(ObjectClose);
464 }
465
466 error("expected property name or '}'");
467 return token(Error);
468 }
469
470 template <typename CharT>
AssertPastValue(const RangedPtr<const CharT> current)471 static inline void AssertPastValue(const RangedPtr<const CharT> current) {
472 /*
473 * We're past an arbitrary JSON value, so the previous character is
474 * *somewhat* constrained, even if this assertion is pretty broad. Don't
475 * knock it till you tried it: this assertion *did* catch a bug once.
476 */
477 MOZ_ASSERT((current[-1] == 'l' && current[-2] == 'l' && current[-3] == 'u' &&
478 current[-4] == 'n') ||
479 (current[-1] == 'e' && current[-2] == 'u' && current[-3] == 'r' &&
480 current[-4] == 't') ||
481 (current[-1] == 'e' && current[-2] == 's' && current[-3] == 'l' &&
482 current[-4] == 'a' && current[-5] == 'f') ||
483 current[-1] == '}' || current[-1] == ']' || current[-1] == '"' ||
484 IsAsciiDigit(current[-1]));
485 }
486
487 template <typename CharT>
advanceAfterArrayElement()488 JSONParserBase::Token JSONParser<CharT>::advanceAfterArrayElement() {
489 AssertPastValue(current);
490
491 while (current < end && IsJSONWhitespace(*current)) {
492 current++;
493 }
494 if (current >= end) {
495 error("end of data when ',' or ']' was expected");
496 return token(Error);
497 }
498
499 if (*current == ',') {
500 current++;
501 return token(Comma);
502 }
503
504 if (*current == ']') {
505 current++;
506 return token(ArrayClose);
507 }
508
509 error("expected ',' or ']' after array element");
510 return token(Error);
511 }
512
513 template <typename CharT>
advancePropertyName()514 JSONParserBase::Token JSONParser<CharT>::advancePropertyName() {
515 MOZ_ASSERT(current[-1] == ',');
516
517 while (current < end && IsJSONWhitespace(*current)) {
518 current++;
519 }
520 if (current >= end) {
521 error("end of data when property name was expected");
522 return token(Error);
523 }
524
525 if (*current == '"') {
526 return readString<PropertyName>();
527 }
528
529 error("expected double-quoted property name");
530 return token(Error);
531 }
532
533 template <typename CharT>
advancePropertyColon()534 JSONParserBase::Token JSONParser<CharT>::advancePropertyColon() {
535 MOZ_ASSERT(current[-1] == '"');
536
537 while (current < end && IsJSONWhitespace(*current)) {
538 current++;
539 }
540 if (current >= end) {
541 error("end of data after property name when ':' was expected");
542 return token(Error);
543 }
544
545 if (*current == ':') {
546 current++;
547 return token(Colon);
548 }
549
550 error("expected ':' after property name in object");
551 return token(Error);
552 }
553
554 template <typename CharT>
advanceAfterProperty()555 JSONParserBase::Token JSONParser<CharT>::advanceAfterProperty() {
556 AssertPastValue(current);
557
558 while (current < end && IsJSONWhitespace(*current)) {
559 current++;
560 }
561 if (current >= end) {
562 error("end of data after property value in object");
563 return token(Error);
564 }
565
566 if (*current == ',') {
567 current++;
568 return token(Comma);
569 }
570
571 if (*current == '}') {
572 current++;
573 return token(ObjectClose);
574 }
575
576 error("expected ',' or '}' after property value in object");
577 return token(Error);
578 }
579
finishObject(MutableHandleValue vp,PropertyVector & properties)580 inline bool JSONParserBase::finishObject(MutableHandleValue vp,
581 PropertyVector& properties) {
582 MOZ_ASSERT(&properties == &stack.back().properties());
583
584 JSObject* obj = NewPlainObjectWithProperties(
585 cx, properties.begin(), properties.length(), GenericObject);
586 if (!obj) {
587 return false;
588 }
589
590 vp.setObject(*obj);
591 if (!freeProperties.append(&properties)) {
592 return false;
593 }
594 stack.popBack();
595 return true;
596 }
597
finishArray(MutableHandleValue vp,ElementVector & elements)598 inline bool JSONParserBase::finishArray(MutableHandleValue vp,
599 ElementVector& elements) {
600 MOZ_ASSERT(&elements == &stack.back().elements());
601
602 ArrayObject* obj =
603 NewDenseCopiedArray(cx, elements.length(), elements.begin());
604 if (!obj) {
605 return false;
606 }
607
608 vp.setObject(*obj);
609 if (!freeElements.append(&elements)) {
610 return false;
611 }
612 stack.popBack();
613 return true;
614 }
615
616 template <typename CharT>
parse(MutableHandleValue vp)617 bool JSONParser<CharT>::parse(MutableHandleValue vp) {
618 RootedValue value(cx);
619 MOZ_ASSERT(stack.empty());
620
621 vp.setUndefined();
622
623 Token token;
624 ParserState state = JSONValue;
625 while (true) {
626 switch (state) {
627 case FinishObjectMember: {
628 PropertyVector& properties = stack.back().properties();
629 properties.back().value = value;
630
631 token = advanceAfterProperty();
632 if (token == ObjectClose) {
633 if (!finishObject(&value, properties)) {
634 return false;
635 }
636 break;
637 }
638 if (token != Comma) {
639 if (token == OOM) {
640 return false;
641 }
642 if (token != Error) {
643 error(
644 "expected ',' or '}' after property-value pair in object "
645 "literal");
646 }
647 return errorReturn();
648 }
649 token = advancePropertyName();
650 /* FALL THROUGH */
651 }
652
653 JSONMember:
654 if (token == String) {
655 jsid id = AtomToId(atomValue());
656 if (parseType == ParseType::AttemptForEval) {
657 // In |JSON.parse|, "__proto__" is a property like any other and may
658 // appear multiple times. In object literal syntax, "__proto__" is
659 // prototype mutation and can appear at most once. |JSONParser| only
660 // supports the former semantics, so if this parse attempt is for
661 // |eval|, return true (without reporting an error) to indicate the
662 // JSON parse attempt was unsuccessful.
663 if (id == NameToId(cx->names().proto)) {
664 return true;
665 }
666 }
667 PropertyVector& properties = stack.back().properties();
668 if (!properties.emplaceBack(id)) {
669 return false;
670 }
671 token = advancePropertyColon();
672 if (token != Colon) {
673 MOZ_ASSERT(token == Error);
674 return errorReturn();
675 }
676 goto JSONValue;
677 }
678 if (token == OOM) {
679 return false;
680 }
681 if (token != Error) {
682 error("property names must be double-quoted strings");
683 }
684 return errorReturn();
685
686 case FinishArrayElement: {
687 ElementVector& elements = stack.back().elements();
688 if (!elements.append(value.get())) {
689 return false;
690 }
691 token = advanceAfterArrayElement();
692 if (token == Comma) {
693 goto JSONValue;
694 }
695 if (token == ArrayClose) {
696 if (!finishArray(&value, elements)) {
697 return false;
698 }
699 break;
700 }
701 MOZ_ASSERT(token == Error);
702 return errorReturn();
703 }
704
705 JSONValue:
706 case JSONValue:
707 token = advance();
708 JSONValueSwitch:
709 switch (token) {
710 case String:
711 value = stringValue();
712 break;
713 case Number:
714 value = numberValue();
715 break;
716 case True:
717 value = BooleanValue(true);
718 break;
719 case False:
720 value = BooleanValue(false);
721 break;
722 case Null:
723 value = NullValue();
724 break;
725
726 case ArrayOpen: {
727 ElementVector* elements;
728 if (!freeElements.empty()) {
729 elements = freeElements.popCopy();
730 elements->clear();
731 } else {
732 elements = cx->new_<ElementVector>(cx);
733 if (!elements) {
734 return false;
735 }
736 }
737 if (!stack.append(elements)) {
738 js_delete(elements);
739 return false;
740 }
741
742 token = advance();
743 if (token == ArrayClose) {
744 if (!finishArray(&value, *elements)) {
745 return false;
746 }
747 break;
748 }
749 goto JSONValueSwitch;
750 }
751
752 case ObjectOpen: {
753 PropertyVector* properties;
754 if (!freeProperties.empty()) {
755 properties = freeProperties.popCopy();
756 properties->clear();
757 } else {
758 properties = cx->new_<PropertyVector>(cx);
759 if (!properties) {
760 return false;
761 }
762 }
763 if (!stack.append(properties)) {
764 js_delete(properties);
765 return false;
766 }
767
768 token = advanceAfterObjectOpen();
769 if (token == ObjectClose) {
770 if (!finishObject(&value, *properties)) {
771 return false;
772 }
773 break;
774 }
775 goto JSONMember;
776 }
777
778 case ArrayClose:
779 case ObjectClose:
780 case Colon:
781 case Comma:
782 // Move the current pointer backwards so that the position
783 // reported in the error message is correct.
784 --current;
785 error("unexpected character");
786 return errorReturn();
787
788 case OOM:
789 return false;
790
791 case Error:
792 return errorReturn();
793 }
794 break;
795 }
796
797 if (stack.empty()) {
798 break;
799 }
800 state = stack.back().state;
801 }
802
803 for (; current < end; current++) {
804 if (!IsJSONWhitespace(*current)) {
805 error("unexpected non-whitespace character after JSON data");
806 return errorReturn();
807 }
808 }
809
810 MOZ_ASSERT(end == current);
811 MOZ_ASSERT(stack.empty());
812
813 vp.set(value);
814 return true;
815 }
816
817 template class js::JSONParser<Latin1Char>;
818 template class js::JSONParser<char16_t>;
819