1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cstring>
16 #include <iostream>
17 #include <istream>
18 #include <limits>
19 #include <memory>
20 #include <set>
21 #include <sstream>
22 #include <utility>
23 
24 #include <cstdio>
25 #if __cplusplus >= 201103L
26 
27 #if !defined(sscanf)
28 #define sscanf std::sscanf
29 #endif
30 
31 #endif //__cplusplus
32 
33 #if defined(_MSC_VER)
34 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
35 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
36 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
37 #endif //_MSC_VER
38 
39 #if defined(_MSC_VER)
40 // Disable warning about strdup being deprecated.
41 #pragma warning(disable : 4996)
42 #endif
43 
44 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
45 // time to change the stack limit
46 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
47 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
48 #endif
49 
50 static size_t const stackLimit_g =
51     JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
52 
53 namespace Json {
54 
55 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
56 using CharReaderPtr = std::unique_ptr<CharReader>;
57 #else
58 using CharReaderPtr = std::auto_ptr<CharReader>;
59 #endif
60 
61 // Implementation of class Features
62 // ////////////////////////////////
63 
64 Features::Features() = default;
65 
all()66 Features Features::all() { return {}; }
67 
strictMode()68 Features Features::strictMode() {
69   Features features;
70   features.allowComments_ = false;
71   features.strictRoot_ = true;
72   features.allowDroppedNullPlaceholders_ = false;
73   features.allowNumericKeys_ = false;
74   return features;
75 }
76 
77 // Implementation of class Reader
78 // ////////////////////////////////
79 
containsNewLine(Reader::Location begin,Reader::Location end)80 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
81   return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
82 }
83 
84 // Class Reader
85 // //////////////////////////////////////////////////////////////////
86 
Reader()87 Reader::Reader() : features_(Features::all()) {}
88 
Reader(const Features & features)89 Reader::Reader(const Features& features) : features_(features) {}
90 
parse(const std::string & document,Value & root,bool collectComments)91 bool Reader::parse(const std::string& document, Value& root,
92                    bool collectComments) {
93   document_.assign(document.begin(), document.end());
94   const char* begin = document_.c_str();
95   const char* end = begin + document_.length();
96   return parse(begin, end, root, collectComments);
97 }
98 
parse(std::istream & is,Value & root,bool collectComments)99 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
100   // std::istream_iterator<char> begin(is);
101   // std::istream_iterator<char> end;
102   // Those would allow streamed input from a file, if parse() were a
103   // template function.
104 
105   // Since String is reference-counted, this at least does not
106   // create an extra copy.
107   String doc;
108   std::getline(is, doc, static_cast<char> EOF);
109   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
110 }
111 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)112 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
113                    bool collectComments) {
114   if (!features_.allowComments_) {
115     collectComments = false;
116   }
117 
118   begin_ = beginDoc;
119   end_ = endDoc;
120   collectComments_ = collectComments;
121   current_ = begin_;
122   lastValueEnd_ = nullptr;
123   lastValue_ = nullptr;
124   commentsBefore_.clear();
125   errors_.clear();
126   while (!nodes_.empty())
127     nodes_.pop();
128   nodes_.push(&root);
129 
130   bool successful = readValue();
131   Token token;
132   skipCommentTokens(token);
133   if (collectComments_ && !commentsBefore_.empty())
134     root.setComment(commentsBefore_, commentAfter);
135   if (features_.strictRoot_) {
136     if (!root.isArray() && !root.isObject()) {
137       // Set error location to start of doc, ideally should be first token found
138       // in doc
139       token.type_ = tokenError;
140       token.start_ = beginDoc;
141       token.end_ = endDoc;
142       addError(
143           "A valid JSON document must be either an array or an object value.",
144           token);
145       return false;
146     }
147   }
148   return successful;
149 }
150 
readValue()151 bool Reader::readValue() {
152   // readValue() may call itself only if it calls readObject() or ReadArray().
153   // These methods execute nodes_.push() just before and nodes_.pop)() just
154   // after calling readValue(). parse() executes one nodes_.push(), so > instead
155   // of >=.
156   if (nodes_.size() > stackLimit_g)
157     throwRuntimeError("Exceeded stackLimit in readValue().");
158 
159   Token token;
160   skipCommentTokens(token);
161   bool successful = true;
162 
163   if (collectComments_ && !commentsBefore_.empty()) {
164     currentValue().setComment(commentsBefore_, commentBefore);
165     commentsBefore_.clear();
166   }
167 
168   switch (token.type_) {
169   case tokenObjectBegin:
170     successful = readObject(token);
171     currentValue().setOffsetLimit(current_ - begin_);
172     break;
173   case tokenArrayBegin:
174     successful = readArray(token);
175     currentValue().setOffsetLimit(current_ - begin_);
176     break;
177   case tokenNumber:
178     successful = decodeNumber(token);
179     break;
180   case tokenString:
181     successful = decodeString(token);
182     break;
183   case tokenTrue: {
184     Value v(true);
185     currentValue().swapPayload(v);
186     currentValue().setOffsetStart(token.start_ - begin_);
187     currentValue().setOffsetLimit(token.end_ - begin_);
188   } break;
189   case tokenFalse: {
190     Value v(false);
191     currentValue().swapPayload(v);
192     currentValue().setOffsetStart(token.start_ - begin_);
193     currentValue().setOffsetLimit(token.end_ - begin_);
194   } break;
195   case tokenNull: {
196     Value v;
197     currentValue().swapPayload(v);
198     currentValue().setOffsetStart(token.start_ - begin_);
199     currentValue().setOffsetLimit(token.end_ - begin_);
200   } break;
201   case tokenArraySeparator:
202   case tokenObjectEnd:
203   case tokenArrayEnd:
204     if (features_.allowDroppedNullPlaceholders_) {
205       // "Un-read" the current token and mark the current value as a null
206       // token.
207       current_--;
208       Value v;
209       currentValue().swapPayload(v);
210       currentValue().setOffsetStart(current_ - begin_ - 1);
211       currentValue().setOffsetLimit(current_ - begin_);
212       break;
213     } // Else, fall through...
214   default:
215     currentValue().setOffsetStart(token.start_ - begin_);
216     currentValue().setOffsetLimit(token.end_ - begin_);
217     return addError("Syntax error: value, object or array expected.", token);
218   }
219 
220   if (collectComments_) {
221     lastValueEnd_ = current_;
222     lastValue_ = &currentValue();
223   }
224 
225   return successful;
226 }
227 
skipCommentTokens(Token & token)228 void Reader::skipCommentTokens(Token& token) {
229   if (features_.allowComments_) {
230     do {
231       readToken(token);
232     } while (token.type_ == tokenComment);
233   } else {
234     readToken(token);
235   }
236 }
237 
readToken(Token & token)238 bool Reader::readToken(Token& token) {
239   skipSpaces();
240   token.start_ = current_;
241   Char c = getNextChar();
242   bool ok = true;
243   switch (c) {
244   case '{':
245     token.type_ = tokenObjectBegin;
246     break;
247   case '}':
248     token.type_ = tokenObjectEnd;
249     break;
250   case '[':
251     token.type_ = tokenArrayBegin;
252     break;
253   case ']':
254     token.type_ = tokenArrayEnd;
255     break;
256   case '"':
257     token.type_ = tokenString;
258     ok = readString();
259     break;
260   case '/':
261     token.type_ = tokenComment;
262     ok = readComment();
263     break;
264   case '0':
265   case '1':
266   case '2':
267   case '3':
268   case '4':
269   case '5':
270   case '6':
271   case '7':
272   case '8':
273   case '9':
274   case '-':
275     token.type_ = tokenNumber;
276     readNumber();
277     break;
278   case 't':
279     token.type_ = tokenTrue;
280     ok = match("rue", 3);
281     break;
282   case 'f':
283     token.type_ = tokenFalse;
284     ok = match("alse", 4);
285     break;
286   case 'n':
287     token.type_ = tokenNull;
288     ok = match("ull", 3);
289     break;
290   case ',':
291     token.type_ = tokenArraySeparator;
292     break;
293   case ':':
294     token.type_ = tokenMemberSeparator;
295     break;
296   case 0:
297     token.type_ = tokenEndOfStream;
298     break;
299   default:
300     ok = false;
301     break;
302   }
303   if (!ok)
304     token.type_ = tokenError;
305   token.end_ = current_;
306   return ok;
307 }
308 
skipSpaces()309 void Reader::skipSpaces() {
310   while (current_ != end_) {
311     Char c = *current_;
312     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
313       ++current_;
314     else
315       break;
316   }
317 }
318 
match(const Char * pattern,int patternLength)319 bool Reader::match(const Char* pattern, int patternLength) {
320   if (end_ - current_ < patternLength)
321     return false;
322   int index = patternLength;
323   while (index--)
324     if (current_[index] != pattern[index])
325       return false;
326   current_ += patternLength;
327   return true;
328 }
329 
readComment()330 bool Reader::readComment() {
331   Location commentBegin = current_ - 1;
332   Char c = getNextChar();
333   bool successful = false;
334   if (c == '*')
335     successful = readCStyleComment();
336   else if (c == '/')
337     successful = readCppStyleComment();
338   if (!successful)
339     return false;
340 
341   if (collectComments_) {
342     CommentPlacement placement = commentBefore;
343     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
344       if (c != '*' || !containsNewLine(commentBegin, current_))
345         placement = commentAfterOnSameLine;
346     }
347 
348     addComment(commentBegin, current_, placement);
349   }
350   return true;
351 }
352 
normalizeEOL(Reader::Location begin,Reader::Location end)353 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
354   String normalized;
355   normalized.reserve(static_cast<size_t>(end - begin));
356   Reader::Location current = begin;
357   while (current != end) {
358     char c = *current++;
359     if (c == '\r') {
360       if (current != end && *current == '\n')
361         // convert dos EOL
362         ++current;
363       // convert Mac EOL
364       normalized += '\n';
365     } else {
366       normalized += c;
367     }
368   }
369   return normalized;
370 }
371 
addComment(Location begin,Location end,CommentPlacement placement)372 void Reader::addComment(Location begin, Location end,
373                         CommentPlacement placement) {
374   assert(collectComments_);
375   const String& normalized = normalizeEOL(begin, end);
376   if (placement == commentAfterOnSameLine) {
377     assert(lastValue_ != nullptr);
378     lastValue_->setComment(normalized, placement);
379   } else {
380     commentsBefore_ += normalized;
381   }
382 }
383 
readCStyleComment()384 bool Reader::readCStyleComment() {
385   while ((current_ + 1) < end_) {
386     Char c = getNextChar();
387     if (c == '*' && *current_ == '/')
388       break;
389   }
390   return getNextChar() == '/';
391 }
392 
readCppStyleComment()393 bool Reader::readCppStyleComment() {
394   while (current_ != end_) {
395     Char c = getNextChar();
396     if (c == '\n')
397       break;
398     if (c == '\r') {
399       // Consume DOS EOL. It will be normalized in addComment.
400       if (current_ != end_ && *current_ == '\n')
401         getNextChar();
402       // Break on Moc OS 9 EOL.
403       break;
404     }
405   }
406   return true;
407 }
408 
readNumber()409 void Reader::readNumber() {
410   Location p = current_;
411   char c = '0'; // stopgap for already consumed character
412   // integral part
413   while (c >= '0' && c <= '9')
414     c = (current_ = p) < end_ ? *p++ : '\0';
415   // fractional part
416   if (c == '.') {
417     c = (current_ = p) < end_ ? *p++ : '\0';
418     while (c >= '0' && c <= '9')
419       c = (current_ = p) < end_ ? *p++ : '\0';
420   }
421   // exponential part
422   if (c == 'e' || c == 'E') {
423     c = (current_ = p) < end_ ? *p++ : '\0';
424     if (c == '+' || c == '-')
425       c = (current_ = p) < end_ ? *p++ : '\0';
426     while (c >= '0' && c <= '9')
427       c = (current_ = p) < end_ ? *p++ : '\0';
428   }
429 }
430 
readString()431 bool Reader::readString() {
432   Char c = '\0';
433   while (current_ != end_) {
434     c = getNextChar();
435     if (c == '\\')
436       getNextChar();
437     else if (c == '"')
438       break;
439   }
440   return c == '"';
441 }
442 
readObject(Token & token)443 bool Reader::readObject(Token& token) {
444   Token tokenName;
445   String name;
446   Value init(objectValue);
447   currentValue().swapPayload(init);
448   currentValue().setOffsetStart(token.start_ - begin_);
449   while (readToken(tokenName)) {
450     bool initialTokenOk = true;
451     while (tokenName.type_ == tokenComment && initialTokenOk)
452       initialTokenOk = readToken(tokenName);
453     if (!initialTokenOk)
454       break;
455     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
456       return true;
457     name.clear();
458     if (tokenName.type_ == tokenString) {
459       if (!decodeString(tokenName, name))
460         return recoverFromError(tokenObjectEnd);
461     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
462       Value numberName;
463       if (!decodeNumber(tokenName, numberName))
464         return recoverFromError(tokenObjectEnd);
465       name = numberName.asString();
466     } else {
467       break;
468     }
469 
470     Token colon;
471     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
472       return addErrorAndRecover("Missing ':' after object member name", colon,
473                                 tokenObjectEnd);
474     }
475     Value& value = currentValue()[name];
476     nodes_.push(&value);
477     bool ok = readValue();
478     nodes_.pop();
479     if (!ok) // error already set
480       return recoverFromError(tokenObjectEnd);
481 
482     Token comma;
483     if (!readToken(comma) ||
484         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
485          comma.type_ != tokenComment)) {
486       return addErrorAndRecover("Missing ',' or '}' in object declaration",
487                                 comma, tokenObjectEnd);
488     }
489     bool finalizeTokenOk = true;
490     while (comma.type_ == tokenComment && finalizeTokenOk)
491       finalizeTokenOk = readToken(comma);
492     if (comma.type_ == tokenObjectEnd)
493       return true;
494   }
495   return addErrorAndRecover("Missing '}' or object member name", tokenName,
496                             tokenObjectEnd);
497 }
498 
readArray(Token & token)499 bool Reader::readArray(Token& token) {
500   Value init(arrayValue);
501   currentValue().swapPayload(init);
502   currentValue().setOffsetStart(token.start_ - begin_);
503   skipSpaces();
504   if (current_ != end_ && *current_ == ']') // empty array
505   {
506     Token endArray;
507     readToken(endArray);
508     return true;
509   }
510   int index = 0;
511   for (;;) {
512     Value& value = currentValue()[index++];
513     nodes_.push(&value);
514     bool ok = readValue();
515     nodes_.pop();
516     if (!ok) // error already set
517       return recoverFromError(tokenArrayEnd);
518 
519     Token currentToken;
520     // Accept Comment after last item in the array.
521     ok = readToken(currentToken);
522     while (currentToken.type_ == tokenComment && ok) {
523       ok = readToken(currentToken);
524     }
525     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
526                          currentToken.type_ != tokenArrayEnd);
527     if (!ok || badTokenType) {
528       return addErrorAndRecover("Missing ',' or ']' in array declaration",
529                                 currentToken, tokenArrayEnd);
530     }
531     if (currentToken.type_ == tokenArrayEnd)
532       break;
533   }
534   return true;
535 }
536 
decodeNumber(Token & token)537 bool Reader::decodeNumber(Token& token) {
538   Value decoded;
539   if (!decodeNumber(token, decoded))
540     return false;
541   currentValue().swapPayload(decoded);
542   currentValue().setOffsetStart(token.start_ - begin_);
543   currentValue().setOffsetLimit(token.end_ - begin_);
544   return true;
545 }
546 
decodeNumber(Token & token,Value & decoded)547 bool Reader::decodeNumber(Token& token, Value& decoded) {
548   // Attempts to parse the number as an integer. If the number is
549   // larger than the maximum supported value of an integer then
550   // we decode the number as a double.
551   Location current = token.start_;
552   bool isNegative = *current == '-';
553   if (isNegative)
554     ++current;
555   // TODO: Help the compiler do the div and mod at compile time or get rid of
556   // them.
557   Value::LargestUInt maxIntegerValue =
558       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
559                  : Value::maxLargestUInt;
560   Value::LargestUInt threshold = maxIntegerValue / 10;
561   Value::LargestUInt value = 0;
562   while (current < token.end_) {
563     Char c = *current++;
564     if (c < '0' || c > '9')
565       return decodeDouble(token, decoded);
566     auto digit(static_cast<Value::UInt>(c - '0'));
567     if (value >= threshold) {
568       // We've hit or exceeded the max value divided by 10 (rounded down). If
569       // a) we've only just touched the limit, b) this is the last digit, and
570       // c) it's small enough to fit in that rounding delta, we're okay.
571       // Otherwise treat this number as a double to avoid overflow.
572       if (value > threshold || current != token.end_ ||
573           digit > maxIntegerValue % 10) {
574         return decodeDouble(token, decoded);
575       }
576     }
577     value = value * 10 + digit;
578   }
579   if (isNegative && value == maxIntegerValue)
580     decoded = Value::minLargestInt;
581   else if (isNegative)
582     decoded = -Value::LargestInt(value);
583   else if (value <= Value::LargestUInt(Value::maxInt))
584     decoded = Value::LargestInt(value);
585   else
586     decoded = value;
587   return true;
588 }
589 
decodeDouble(Token & token)590 bool Reader::decodeDouble(Token& token) {
591   Value decoded;
592   if (!decodeDouble(token, decoded))
593     return false;
594   currentValue().swapPayload(decoded);
595   currentValue().setOffsetStart(token.start_ - begin_);
596   currentValue().setOffsetLimit(token.end_ - begin_);
597   return true;
598 }
599 
decodeDouble(Token & token,Value & decoded)600 bool Reader::decodeDouble(Token& token, Value& decoded) {
601   double value = 0;
602   String buffer(token.start_, token.end_);
603   IStringStream is(buffer);
604   if (!(is >> value))
605     return addError(
606         "'" + String(token.start_, token.end_) + "' is not a number.", token);
607   decoded = value;
608   return true;
609 }
610 
decodeString(Token & token)611 bool Reader::decodeString(Token& token) {
612   String decoded_string;
613   if (!decodeString(token, decoded_string))
614     return false;
615   Value decoded(decoded_string);
616   currentValue().swapPayload(decoded);
617   currentValue().setOffsetStart(token.start_ - begin_);
618   currentValue().setOffsetLimit(token.end_ - begin_);
619   return true;
620 }
621 
decodeString(Token & token,String & decoded)622 bool Reader::decodeString(Token& token, String& decoded) {
623   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
624   Location current = token.start_ + 1; // skip '"'
625   Location end = token.end_ - 1;       // do not include '"'
626   while (current != end) {
627     Char c = *current++;
628     if (c == '"')
629       break;
630     if (c == '\\') {
631       if (current == end)
632         return addError("Empty escape sequence in string", token, current);
633       Char escape = *current++;
634       switch (escape) {
635       case '"':
636         decoded += '"';
637         break;
638       case '/':
639         decoded += '/';
640         break;
641       case '\\':
642         decoded += '\\';
643         break;
644       case 'b':
645         decoded += '\b';
646         break;
647       case 'f':
648         decoded += '\f';
649         break;
650       case 'n':
651         decoded += '\n';
652         break;
653       case 'r':
654         decoded += '\r';
655         break;
656       case 't':
657         decoded += '\t';
658         break;
659       case 'u': {
660         unsigned int unicode;
661         if (!decodeUnicodeCodePoint(token, current, end, unicode))
662           return false;
663         decoded += codePointToUTF8(unicode);
664       } break;
665       default:
666         return addError("Bad escape sequence in string", token, current);
667       }
668     } else {
669       decoded += c;
670     }
671   }
672   return true;
673 }
674 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)675 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
676                                     Location end, unsigned int& unicode) {
677 
678   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
679     return false;
680   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
681     // surrogate pairs
682     if (end - current < 6)
683       return addError(
684           "additional six characters expected to parse unicode surrogate pair.",
685           token, current);
686     if (*(current++) == '\\' && *(current++) == 'u') {
687       unsigned int surrogatePair;
688       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
689         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
690       } else
691         return false;
692     } else
693       return addError("expecting another \\u token to begin the second half of "
694                       "a unicode surrogate pair",
695                       token, current);
696   }
697   return true;
698 }
699 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)700 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
701                                          Location end,
702                                          unsigned int& ret_unicode) {
703   if (end - current < 4)
704     return addError(
705         "Bad unicode escape sequence in string: four digits expected.", token,
706         current);
707   int unicode = 0;
708   for (int index = 0; index < 4; ++index) {
709     Char c = *current++;
710     unicode *= 16;
711     if (c >= '0' && c <= '9')
712       unicode += c - '0';
713     else if (c >= 'a' && c <= 'f')
714       unicode += c - 'a' + 10;
715     else if (c >= 'A' && c <= 'F')
716       unicode += c - 'A' + 10;
717     else
718       return addError(
719           "Bad unicode escape sequence in string: hexadecimal digit expected.",
720           token, current);
721   }
722   ret_unicode = static_cast<unsigned int>(unicode);
723   return true;
724 }
725 
addError(const String & message,Token & token,Location extra)726 bool Reader::addError(const String& message, Token& token, Location extra) {
727   ErrorInfo info;
728   info.token_ = token;
729   info.message_ = message;
730   info.extra_ = extra;
731   errors_.push_back(info);
732   return false;
733 }
734 
recoverFromError(TokenType skipUntilToken)735 bool Reader::recoverFromError(TokenType skipUntilToken) {
736   size_t const errorCount = errors_.size();
737   Token skip;
738   for (;;) {
739     if (!readToken(skip))
740       errors_.resize(errorCount); // discard errors caused by recovery
741     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
742       break;
743   }
744   errors_.resize(errorCount);
745   return false;
746 }
747 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)748 bool Reader::addErrorAndRecover(const String& message, Token& token,
749                                 TokenType skipUntilToken) {
750   addError(message, token);
751   return recoverFromError(skipUntilToken);
752 }
753 
currentValue()754 Value& Reader::currentValue() { return *(nodes_.top()); }
755 
getNextChar()756 Reader::Char Reader::getNextChar() {
757   if (current_ == end_)
758     return 0;
759   return *current_++;
760 }
761 
getLocationLineAndColumn(Location location,int & line,int & column) const762 void Reader::getLocationLineAndColumn(Location location, int& line,
763                                       int& column) const {
764   Location current = begin_;
765   Location lastLineStart = current;
766   line = 0;
767   while (current < location && current != end_) {
768     Char c = *current++;
769     if (c == '\r') {
770       if (*current == '\n')
771         ++current;
772       lastLineStart = current;
773       ++line;
774     } else if (c == '\n') {
775       lastLineStart = current;
776       ++line;
777     }
778   }
779   // column & line start at 1
780   column = int(location - lastLineStart) + 1;
781   ++line;
782 }
783 
getLocationLineAndColumn(Location location) const784 String Reader::getLocationLineAndColumn(Location location) const {
785   int line, column;
786   getLocationLineAndColumn(location, line, column);
787   char buffer[18 + 16 + 16 + 1];
788   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
789   return buffer;
790 }
791 
792 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const793 String Reader::getFormatedErrorMessages() const {
794   return getFormattedErrorMessages();
795 }
796 
getFormattedErrorMessages() const797 String Reader::getFormattedErrorMessages() const {
798   String formattedMessage;
799   for (const auto& error : errors_) {
800     formattedMessage +=
801         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
802     formattedMessage += "  " + error.message_ + "\n";
803     if (error.extra_)
804       formattedMessage +=
805           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
806   }
807   return formattedMessage;
808 }
809 
getStructuredErrors() const810 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
811   std::vector<Reader::StructuredError> allErrors;
812   for (const auto& error : errors_) {
813     Reader::StructuredError structured;
814     structured.offset_start = error.token_.start_ - begin_;
815     structured.offset_limit = error.token_.end_ - begin_;
816     structured.message = error.message_;
817     allErrors.push_back(structured);
818   }
819   return allErrors;
820 }
821 
pushError(const Value & value,const String & message)822 bool Reader::pushError(const Value& value, const String& message) {
823   ptrdiff_t const length = end_ - begin_;
824   if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
825     return false;
826   Token token;
827   token.type_ = tokenError;
828   token.start_ = begin_ + value.getOffsetStart();
829   token.end_ = begin_ + value.getOffsetLimit();
830   ErrorInfo info;
831   info.token_ = token;
832   info.message_ = message;
833   info.extra_ = nullptr;
834   errors_.push_back(info);
835   return true;
836 }
837 
pushError(const Value & value,const String & message,const Value & extra)838 bool Reader::pushError(const Value& value, const String& message,
839                        const Value& extra) {
840   ptrdiff_t const length = end_ - begin_;
841   if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
842       extra.getOffsetLimit() > length)
843     return false;
844   Token token;
845   token.type_ = tokenError;
846   token.start_ = begin_ + value.getOffsetStart();
847   token.end_ = begin_ + value.getOffsetLimit();
848   ErrorInfo info;
849   info.token_ = token;
850   info.message_ = message;
851   info.extra_ = begin_ + extra.getOffsetStart();
852   errors_.push_back(info);
853   return true;
854 }
855 
good() const856 bool Reader::good() const { return errors_.empty(); }
857 
858 // Originally copied from the Features class (now deprecated), used internally
859 // for features implementation.
860 class OurFeatures {
861 public:
862   static OurFeatures all();
863   bool allowComments_;
864   bool allowTrailingCommas_;
865   bool strictRoot_;
866   bool allowDroppedNullPlaceholders_;
867   bool allowNumericKeys_;
868   bool allowSingleQuotes_;
869   bool failIfExtra_;
870   bool rejectDupKeys_;
871   bool allowSpecialFloats_;
872   bool skipBom_;
873   size_t stackLimit_;
874 }; // OurFeatures
875 
all()876 OurFeatures OurFeatures::all() { return {}; }
877 
878 // Implementation of class Reader
879 // ////////////////////////////////
880 
881 // Originally copied from the Reader class (now deprecated), used internally
882 // for implementing JSON reading.
883 class OurReader {
884 public:
885   using Char = char;
886   using Location = const Char*;
887   struct StructuredError {
888     ptrdiff_t offset_start;
889     ptrdiff_t offset_limit;
890     String message;
891   };
892 
893   explicit OurReader(OurFeatures const& features);
894   bool parse(const char* beginDoc, const char* endDoc, Value& root,
895              bool collectComments = true);
896   String getFormattedErrorMessages() const;
897   std::vector<StructuredError> getStructuredErrors() const;
898 
899 private:
900   OurReader(OurReader const&);      // no impl
901   void operator=(OurReader const&); // no impl
902 
903   enum TokenType {
904     tokenEndOfStream = 0,
905     tokenObjectBegin,
906     tokenObjectEnd,
907     tokenArrayBegin,
908     tokenArrayEnd,
909     tokenString,
910     tokenNumber,
911     tokenTrue,
912     tokenFalse,
913     tokenNull,
914     tokenNaN,
915     tokenPosInf,
916     tokenNegInf,
917     tokenArraySeparator,
918     tokenMemberSeparator,
919     tokenComment,
920     tokenError
921   };
922 
923   class Token {
924   public:
925     TokenType type_;
926     Location start_;
927     Location end_;
928   };
929 
930   class ErrorInfo {
931   public:
932     Token token_;
933     String message_;
934     Location extra_;
935   };
936 
937   using Errors = std::deque<ErrorInfo>;
938 
939   bool readToken(Token& token);
940   void skipSpaces();
941   void skipBom(bool skipBom);
942   bool match(const Char* pattern, int patternLength);
943   bool readComment();
944   bool readCStyleComment(bool* containsNewLineResult);
945   bool readCppStyleComment();
946   bool readString();
947   bool readStringSingleQuote();
948   bool readNumber(bool checkInf);
949   bool readValue();
950   bool readObject(Token& token);
951   bool readArray(Token& token);
952   bool decodeNumber(Token& token);
953   bool decodeNumber(Token& token, Value& decoded);
954   bool decodeString(Token& token);
955   bool decodeString(Token& token, String& decoded);
956   bool decodeDouble(Token& token);
957   bool decodeDouble(Token& token, Value& decoded);
958   bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
959                               unsigned int& unicode);
960   bool decodeUnicodeEscapeSequence(Token& token, Location& current,
961                                    Location end, unsigned int& unicode);
962   bool addError(const String& message, Token& token, Location extra = nullptr);
963   bool recoverFromError(TokenType skipUntilToken);
964   bool addErrorAndRecover(const String& message, Token& token,
965                           TokenType skipUntilToken);
966   void skipUntilSpace();
967   Value& currentValue();
968   Char getNextChar();
969   void getLocationLineAndColumn(Location location, int& line,
970                                 int& column) const;
971   String getLocationLineAndColumn(Location location) const;
972   void addComment(Location begin, Location end, CommentPlacement placement);
973   void skipCommentTokens(Token& token);
974 
975   static String normalizeEOL(Location begin, Location end);
976   static bool containsNewLine(Location begin, Location end);
977 
978   using Nodes = std::stack<Value*>;
979 
980   Nodes nodes_{};
981   Errors errors_{};
982   String document_{};
983   Location begin_ = nullptr;
984   Location end_ = nullptr;
985   Location current_ = nullptr;
986   Location lastValueEnd_ = nullptr;
987   Value* lastValue_ = nullptr;
988   bool lastValueHasAComment_ = false;
989   String commentsBefore_{};
990 
991   OurFeatures const features_;
992   bool collectComments_ = false;
993 }; // OurReader
994 
995 // complete copy of Read impl, for OurReader
996 
containsNewLine(OurReader::Location begin,OurReader::Location end)997 bool OurReader::containsNewLine(OurReader::Location begin,
998                                 OurReader::Location end) {
999   return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
1000 }
1001 
OurReader(OurFeatures const & features)1002 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1003 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1004 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1005                       bool collectComments) {
1006   if (!features_.allowComments_) {
1007     collectComments = false;
1008   }
1009 
1010   begin_ = beginDoc;
1011   end_ = endDoc;
1012   collectComments_ = collectComments;
1013   current_ = begin_;
1014   lastValueEnd_ = nullptr;
1015   lastValue_ = nullptr;
1016   commentsBefore_.clear();
1017   errors_.clear();
1018   while (!nodes_.empty())
1019     nodes_.pop();
1020   nodes_.push(&root);
1021 
1022   // skip byte order mark if it exists at the beginning of the UTF-8 text.
1023   skipBom(features_.skipBom_);
1024   bool successful = readValue();
1025   nodes_.pop();
1026   Token token;
1027   skipCommentTokens(token);
1028   if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1029     addError("Extra non-whitespace after JSON value.", token);
1030     return false;
1031   }
1032   if (collectComments_ && !commentsBefore_.empty())
1033     root.setComment(commentsBefore_, commentAfter);
1034   if (features_.strictRoot_) {
1035     if (!root.isArray() && !root.isObject()) {
1036       // Set error location to start of doc, ideally should be first token found
1037       // in doc
1038       token.type_ = tokenError;
1039       token.start_ = beginDoc;
1040       token.end_ = endDoc;
1041       addError(
1042           "A valid JSON document must be either an array or an object value.",
1043           token);
1044       return false;
1045     }
1046   }
1047   return successful;
1048 }
1049 
readValue()1050 bool OurReader::readValue() {
1051   //  To preserve the old behaviour we cast size_t to int.
1052   if (nodes_.size() > features_.stackLimit_)
1053     throwRuntimeError("Exceeded stackLimit in readValue().");
1054   Token token;
1055   skipCommentTokens(token);
1056   bool successful = true;
1057 
1058   if (collectComments_ && !commentsBefore_.empty()) {
1059     currentValue().setComment(commentsBefore_, commentBefore);
1060     commentsBefore_.clear();
1061   }
1062 
1063   switch (token.type_) {
1064   case tokenObjectBegin:
1065     successful = readObject(token);
1066     currentValue().setOffsetLimit(current_ - begin_);
1067     break;
1068   case tokenArrayBegin:
1069     successful = readArray(token);
1070     currentValue().setOffsetLimit(current_ - begin_);
1071     break;
1072   case tokenNumber:
1073     successful = decodeNumber(token);
1074     break;
1075   case tokenString:
1076     successful = decodeString(token);
1077     break;
1078   case tokenTrue: {
1079     Value v(true);
1080     currentValue().swapPayload(v);
1081     currentValue().setOffsetStart(token.start_ - begin_);
1082     currentValue().setOffsetLimit(token.end_ - begin_);
1083   } break;
1084   case tokenFalse: {
1085     Value v(false);
1086     currentValue().swapPayload(v);
1087     currentValue().setOffsetStart(token.start_ - begin_);
1088     currentValue().setOffsetLimit(token.end_ - begin_);
1089   } break;
1090   case tokenNull: {
1091     Value v;
1092     currentValue().swapPayload(v);
1093     currentValue().setOffsetStart(token.start_ - begin_);
1094     currentValue().setOffsetLimit(token.end_ - begin_);
1095   } break;
1096   case tokenNaN: {
1097     Value v(std::numeric_limits<double>::quiet_NaN());
1098     currentValue().swapPayload(v);
1099     currentValue().setOffsetStart(token.start_ - begin_);
1100     currentValue().setOffsetLimit(token.end_ - begin_);
1101   } break;
1102   case tokenPosInf: {
1103     Value v(std::numeric_limits<double>::infinity());
1104     currentValue().swapPayload(v);
1105     currentValue().setOffsetStart(token.start_ - begin_);
1106     currentValue().setOffsetLimit(token.end_ - begin_);
1107   } break;
1108   case tokenNegInf: {
1109     Value v(-std::numeric_limits<double>::infinity());
1110     currentValue().swapPayload(v);
1111     currentValue().setOffsetStart(token.start_ - begin_);
1112     currentValue().setOffsetLimit(token.end_ - begin_);
1113   } break;
1114   case tokenArraySeparator:
1115   case tokenObjectEnd:
1116   case tokenArrayEnd:
1117     if (features_.allowDroppedNullPlaceholders_) {
1118       // "Un-read" the current token and mark the current value as a null
1119       // token.
1120       current_--;
1121       Value v;
1122       currentValue().swapPayload(v);
1123       currentValue().setOffsetStart(current_ - begin_ - 1);
1124       currentValue().setOffsetLimit(current_ - begin_);
1125       break;
1126     } // else, fall through ...
1127   default:
1128     currentValue().setOffsetStart(token.start_ - begin_);
1129     currentValue().setOffsetLimit(token.end_ - begin_);
1130     return addError("Syntax error: value, object or array expected.", token);
1131   }
1132 
1133   if (collectComments_) {
1134     lastValueEnd_ = current_;
1135     lastValueHasAComment_ = false;
1136     lastValue_ = &currentValue();
1137   }
1138 
1139   return successful;
1140 }
1141 
skipCommentTokens(Token & token)1142 void OurReader::skipCommentTokens(Token& token) {
1143   if (features_.allowComments_) {
1144     do {
1145       readToken(token);
1146     } while (token.type_ == tokenComment);
1147   } else {
1148     readToken(token);
1149   }
1150 }
1151 
readToken(Token & token)1152 bool OurReader::readToken(Token& token) {
1153   skipSpaces();
1154   token.start_ = current_;
1155   Char c = getNextChar();
1156   bool ok = true;
1157   switch (c) {
1158   case '{':
1159     token.type_ = tokenObjectBegin;
1160     break;
1161   case '}':
1162     token.type_ = tokenObjectEnd;
1163     break;
1164   case '[':
1165     token.type_ = tokenArrayBegin;
1166     break;
1167   case ']':
1168     token.type_ = tokenArrayEnd;
1169     break;
1170   case '"':
1171     token.type_ = tokenString;
1172     ok = readString();
1173     break;
1174   case '\'':
1175     if (features_.allowSingleQuotes_) {
1176       token.type_ = tokenString;
1177       ok = readStringSingleQuote();
1178     } else {
1179       // If we don't allow single quotes, this is a failure case.
1180       ok = false;
1181     }
1182     break;
1183   case '/':
1184     token.type_ = tokenComment;
1185     ok = readComment();
1186     break;
1187   case '0':
1188   case '1':
1189   case '2':
1190   case '3':
1191   case '4':
1192   case '5':
1193   case '6':
1194   case '7':
1195   case '8':
1196   case '9':
1197     token.type_ = tokenNumber;
1198     readNumber(false);
1199     break;
1200   case '-':
1201     if (readNumber(true)) {
1202       token.type_ = tokenNumber;
1203     } else {
1204       token.type_ = tokenNegInf;
1205       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1206     }
1207     break;
1208   case '+':
1209     if (readNumber(true)) {
1210       token.type_ = tokenNumber;
1211     } else {
1212       token.type_ = tokenPosInf;
1213       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1214     }
1215     break;
1216   case 't':
1217     token.type_ = tokenTrue;
1218     ok = match("rue", 3);
1219     break;
1220   case 'f':
1221     token.type_ = tokenFalse;
1222     ok = match("alse", 4);
1223     break;
1224   case 'n':
1225     token.type_ = tokenNull;
1226     ok = match("ull", 3);
1227     break;
1228   case 'N':
1229     if (features_.allowSpecialFloats_) {
1230       token.type_ = tokenNaN;
1231       ok = match("aN", 2);
1232     } else {
1233       ok = false;
1234     }
1235     break;
1236   case 'I':
1237     if (features_.allowSpecialFloats_) {
1238       token.type_ = tokenPosInf;
1239       ok = match("nfinity", 7);
1240     } else {
1241       ok = false;
1242     }
1243     break;
1244   case ',':
1245     token.type_ = tokenArraySeparator;
1246     break;
1247   case ':':
1248     token.type_ = tokenMemberSeparator;
1249     break;
1250   case 0:
1251     token.type_ = tokenEndOfStream;
1252     break;
1253   default:
1254     ok = false;
1255     break;
1256   }
1257   if (!ok)
1258     token.type_ = tokenError;
1259   token.end_ = current_;
1260   return ok;
1261 }
1262 
skipSpaces()1263 void OurReader::skipSpaces() {
1264   while (current_ != end_) {
1265     Char c = *current_;
1266     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1267       ++current_;
1268     else
1269       break;
1270   }
1271 }
1272 
skipBom(bool skipBom)1273 void OurReader::skipBom(bool skipBom) {
1274   // The default behavior is to skip BOM.
1275   if (skipBom) {
1276     if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1277       begin_ += 3;
1278       current_ = begin_;
1279     }
1280   }
1281 }
1282 
match(const Char * pattern,int patternLength)1283 bool OurReader::match(const Char* pattern, int patternLength) {
1284   if (end_ - current_ < patternLength)
1285     return false;
1286   int index = patternLength;
1287   while (index--)
1288     if (current_[index] != pattern[index])
1289       return false;
1290   current_ += patternLength;
1291   return true;
1292 }
1293 
readComment()1294 bool OurReader::readComment() {
1295   const Location commentBegin = current_ - 1;
1296   const Char c = getNextChar();
1297   bool successful = false;
1298   bool cStyleWithEmbeddedNewline = false;
1299 
1300   const bool isCStyleComment = (c == '*');
1301   const bool isCppStyleComment = (c == '/');
1302   if (isCStyleComment) {
1303     successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1304   } else if (isCppStyleComment) {
1305     successful = readCppStyleComment();
1306   }
1307 
1308   if (!successful)
1309     return false;
1310 
1311   if (collectComments_) {
1312     CommentPlacement placement = commentBefore;
1313 
1314     if (!lastValueHasAComment_) {
1315       if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1316         if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1317           placement = commentAfterOnSameLine;
1318           lastValueHasAComment_ = true;
1319         }
1320       }
1321     }
1322 
1323     addComment(commentBegin, current_, placement);
1324   }
1325   return true;
1326 }
1327 
normalizeEOL(OurReader::Location begin,OurReader::Location end)1328 String OurReader::normalizeEOL(OurReader::Location begin,
1329                                OurReader::Location end) {
1330   String normalized;
1331   normalized.reserve(static_cast<size_t>(end - begin));
1332   OurReader::Location current = begin;
1333   while (current != end) {
1334     char c = *current++;
1335     if (c == '\r') {
1336       if (current != end && *current == '\n')
1337         // convert dos EOL
1338         ++current;
1339       // convert Mac EOL
1340       normalized += '\n';
1341     } else {
1342       normalized += c;
1343     }
1344   }
1345   return normalized;
1346 }
1347 
addComment(Location begin,Location end,CommentPlacement placement)1348 void OurReader::addComment(Location begin, Location end,
1349                            CommentPlacement placement) {
1350   assert(collectComments_);
1351   const String& normalized = normalizeEOL(begin, end);
1352   if (placement == commentAfterOnSameLine) {
1353     assert(lastValue_ != nullptr);
1354     lastValue_->setComment(normalized, placement);
1355   } else {
1356     commentsBefore_ += normalized;
1357   }
1358 }
1359 
readCStyleComment(bool * containsNewLineResult)1360 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1361   *containsNewLineResult = false;
1362 
1363   while ((current_ + 1) < end_) {
1364     Char c = getNextChar();
1365     if (c == '*' && *current_ == '/')
1366       break;
1367     if (c == '\n')
1368       *containsNewLineResult = true;
1369   }
1370 
1371   return getNextChar() == '/';
1372 }
1373 
readCppStyleComment()1374 bool OurReader::readCppStyleComment() {
1375   while (current_ != end_) {
1376     Char c = getNextChar();
1377     if (c == '\n')
1378       break;
1379     if (c == '\r') {
1380       // Consume DOS EOL. It will be normalized in addComment.
1381       if (current_ != end_ && *current_ == '\n')
1382         getNextChar();
1383       // Break on Moc OS 9 EOL.
1384       break;
1385     }
1386   }
1387   return true;
1388 }
1389 
readNumber(bool checkInf)1390 bool OurReader::readNumber(bool checkInf) {
1391   Location p = current_;
1392   if (checkInf && p != end_ && *p == 'I') {
1393     current_ = ++p;
1394     return false;
1395   }
1396   char c = '0'; // stopgap for already consumed character
1397   // integral part
1398   while (c >= '0' && c <= '9')
1399     c = (current_ = p) < end_ ? *p++ : '\0';
1400   // fractional part
1401   if (c == '.') {
1402     c = (current_ = p) < end_ ? *p++ : '\0';
1403     while (c >= '0' && c <= '9')
1404       c = (current_ = p) < end_ ? *p++ : '\0';
1405   }
1406   // exponential part
1407   if (c == 'e' || c == 'E') {
1408     c = (current_ = p) < end_ ? *p++ : '\0';
1409     if (c == '+' || c == '-')
1410       c = (current_ = p) < end_ ? *p++ : '\0';
1411     while (c >= '0' && c <= '9')
1412       c = (current_ = p) < end_ ? *p++ : '\0';
1413   }
1414   return true;
1415 }
readString()1416 bool OurReader::readString() {
1417   Char c = 0;
1418   while (current_ != end_) {
1419     c = getNextChar();
1420     if (c == '\\')
1421       getNextChar();
1422     else if (c == '"')
1423       break;
1424   }
1425   return c == '"';
1426 }
1427 
readStringSingleQuote()1428 bool OurReader::readStringSingleQuote() {
1429   Char c = 0;
1430   while (current_ != end_) {
1431     c = getNextChar();
1432     if (c == '\\')
1433       getNextChar();
1434     else if (c == '\'')
1435       break;
1436   }
1437   return c == '\'';
1438 }
1439 
readObject(Token & token)1440 bool OurReader::readObject(Token& token) {
1441   Token tokenName;
1442   String name;
1443   Value init(objectValue);
1444   currentValue().swapPayload(init);
1445   currentValue().setOffsetStart(token.start_ - begin_);
1446   while (readToken(tokenName)) {
1447     bool initialTokenOk = true;
1448     while (tokenName.type_ == tokenComment && initialTokenOk)
1449       initialTokenOk = readToken(tokenName);
1450     if (!initialTokenOk)
1451       break;
1452     if (tokenName.type_ == tokenObjectEnd &&
1453         (name.empty() ||
1454          features_.allowTrailingCommas_)) // empty object or trailing comma
1455       return true;
1456     name.clear();
1457     if (tokenName.type_ == tokenString) {
1458       if (!decodeString(tokenName, name))
1459         return recoverFromError(tokenObjectEnd);
1460     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1461       Value numberName;
1462       if (!decodeNumber(tokenName, numberName))
1463         return recoverFromError(tokenObjectEnd);
1464       name = numberName.asString();
1465     } else {
1466       break;
1467     }
1468     if (name.length() >= (1U << 30))
1469       throwRuntimeError("keylength >= 2^30");
1470     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1471       String msg = "Duplicate key: '" + name + "'";
1472       return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1473     }
1474 
1475     Token colon;
1476     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1477       return addErrorAndRecover("Missing ':' after object member name", colon,
1478                                 tokenObjectEnd);
1479     }
1480     Value& value = currentValue()[name];
1481     nodes_.push(&value);
1482     bool ok = readValue();
1483     nodes_.pop();
1484     if (!ok) // error already set
1485       return recoverFromError(tokenObjectEnd);
1486 
1487     Token comma;
1488     if (!readToken(comma) ||
1489         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1490          comma.type_ != tokenComment)) {
1491       return addErrorAndRecover("Missing ',' or '}' in object declaration",
1492                                 comma, tokenObjectEnd);
1493     }
1494     bool finalizeTokenOk = true;
1495     while (comma.type_ == tokenComment && finalizeTokenOk)
1496       finalizeTokenOk = readToken(comma);
1497     if (comma.type_ == tokenObjectEnd)
1498       return true;
1499   }
1500   return addErrorAndRecover("Missing '}' or object member name", tokenName,
1501                             tokenObjectEnd);
1502 }
1503 
readArray(Token & token)1504 bool OurReader::readArray(Token& token) {
1505   Value init(arrayValue);
1506   currentValue().swapPayload(init);
1507   currentValue().setOffsetStart(token.start_ - begin_);
1508   int index = 0;
1509   for (;;) {
1510     skipSpaces();
1511     if (current_ != end_ && *current_ == ']' &&
1512         (index == 0 ||
1513          (features_.allowTrailingCommas_ &&
1514           !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1515                                                       // comma
1516     {
1517       Token endArray;
1518       readToken(endArray);
1519       return true;
1520     }
1521     Value& value = currentValue()[index++];
1522     nodes_.push(&value);
1523     bool ok = readValue();
1524     nodes_.pop();
1525     if (!ok) // error already set
1526       return recoverFromError(tokenArrayEnd);
1527 
1528     Token currentToken;
1529     // Accept Comment after last item in the array.
1530     ok = readToken(currentToken);
1531     while (currentToken.type_ == tokenComment && ok) {
1532       ok = readToken(currentToken);
1533     }
1534     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1535                          currentToken.type_ != tokenArrayEnd);
1536     if (!ok || badTokenType) {
1537       return addErrorAndRecover("Missing ',' or ']' in array declaration",
1538                                 currentToken, tokenArrayEnd);
1539     }
1540     if (currentToken.type_ == tokenArrayEnd)
1541       break;
1542   }
1543   return true;
1544 }
1545 
decodeNumber(Token & token)1546 bool OurReader::decodeNumber(Token& token) {
1547   Value decoded;
1548   if (!decodeNumber(token, decoded))
1549     return false;
1550   currentValue().swapPayload(decoded);
1551   currentValue().setOffsetStart(token.start_ - begin_);
1552   currentValue().setOffsetLimit(token.end_ - begin_);
1553   return true;
1554 }
1555 
decodeNumber(Token & token,Value & decoded)1556 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1557   // Attempts to parse the number as an integer. If the number is
1558   // larger than the maximum supported value of an integer then
1559   // we decode the number as a double.
1560   Location current = token.start_;
1561   const bool isNegative = *current == '-';
1562   if (isNegative) {
1563     ++current;
1564   }
1565 
1566   // We assume we can represent the largest and smallest integer types as
1567   // unsigned integers with separate sign. This is only true if they can fit
1568   // into an unsigned integer.
1569   static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1570                 "Int must be smaller than UInt");
1571 
1572   // We need to convert minLargestInt into a positive number. The easiest way
1573   // to do this conversion is to assume our "threshold" value of minLargestInt
1574   // divided by 10 can fit in maxLargestInt when absolute valued. This should
1575   // be a safe assumption.
1576   static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1577                 "The absolute value of minLargestInt must be greater than or "
1578                 "equal to maxLargestInt");
1579   static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1580                 "The absolute value of minLargestInt must be only 1 magnitude "
1581                 "larger than maxLargest Int");
1582 
1583   static constexpr Value::LargestUInt positive_threshold =
1584       Value::maxLargestUInt / 10;
1585   static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1586 
1587   // For the negative values, we have to be more careful. Since typically
1588   // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1589   // then take the inverse. This assumes that minLargestInt is only a single
1590   // power of 10 different in magnitude, which we check above. For the last
1591   // digit, we take the modulus before negating for the same reason.
1592   static constexpr auto negative_threshold =
1593       Value::LargestUInt(-(Value::minLargestInt / 10));
1594   static constexpr auto negative_last_digit =
1595       Value::UInt(-(Value::minLargestInt % 10));
1596 
1597   const Value::LargestUInt threshold =
1598       isNegative ? negative_threshold : positive_threshold;
1599   const Value::UInt max_last_digit =
1600       isNegative ? negative_last_digit : positive_last_digit;
1601 
1602   Value::LargestUInt value = 0;
1603   while (current < token.end_) {
1604     Char c = *current++;
1605     if (c < '0' || c > '9')
1606       return decodeDouble(token, decoded);
1607 
1608     const auto digit(static_cast<Value::UInt>(c - '0'));
1609     if (value >= threshold) {
1610       // We've hit or exceeded the max value divided by 10 (rounded down). If
1611       // a) we've only just touched the limit, meaing value == threshold,
1612       // b) this is the last digit, or
1613       // c) it's small enough to fit in that rounding delta, we're okay.
1614       // Otherwise treat this number as a double to avoid overflow.
1615       if (value > threshold || current != token.end_ ||
1616           digit > max_last_digit) {
1617         return decodeDouble(token, decoded);
1618       }
1619     }
1620     value = value * 10 + digit;
1621   }
1622 
1623   if (isNegative) {
1624     // We use the same magnitude assumption here, just in case.
1625     const auto last_digit = static_cast<Value::UInt>(value % 10);
1626     decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1627   } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1628     decoded = Value::LargestInt(value);
1629   } else {
1630     decoded = value;
1631   }
1632 
1633   return true;
1634 }
1635 
decodeDouble(Token & token)1636 bool OurReader::decodeDouble(Token& token) {
1637   Value decoded;
1638   if (!decodeDouble(token, decoded))
1639     return false;
1640   currentValue().swapPayload(decoded);
1641   currentValue().setOffsetStart(token.start_ - begin_);
1642   currentValue().setOffsetLimit(token.end_ - begin_);
1643   return true;
1644 }
1645 
decodeDouble(Token & token,Value & decoded)1646 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1647   double value = 0;
1648   const String buffer(token.start_, token.end_);
1649   IStringStream is(buffer);
1650   if (!(is >> value)) {
1651     return addError(
1652         "'" + String(token.start_, token.end_) + "' is not a number.", token);
1653   }
1654   decoded = value;
1655   return true;
1656 }
1657 
decodeString(Token & token)1658 bool OurReader::decodeString(Token& token) {
1659   String decoded_string;
1660   if (!decodeString(token, decoded_string))
1661     return false;
1662   Value decoded(decoded_string);
1663   currentValue().swapPayload(decoded);
1664   currentValue().setOffsetStart(token.start_ - begin_);
1665   currentValue().setOffsetLimit(token.end_ - begin_);
1666   return true;
1667 }
1668 
decodeString(Token & token,String & decoded)1669 bool OurReader::decodeString(Token& token, String& decoded) {
1670   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1671   Location current = token.start_ + 1; // skip '"'
1672   Location end = token.end_ - 1;       // do not include '"'
1673   while (current != end) {
1674     Char c = *current++;
1675     if (c == '"')
1676       break;
1677     if (c == '\\') {
1678       if (current == end)
1679         return addError("Empty escape sequence in string", token, current);
1680       Char escape = *current++;
1681       switch (escape) {
1682       case '"':
1683         decoded += '"';
1684         break;
1685       case '/':
1686         decoded += '/';
1687         break;
1688       case '\\':
1689         decoded += '\\';
1690         break;
1691       case 'b':
1692         decoded += '\b';
1693         break;
1694       case 'f':
1695         decoded += '\f';
1696         break;
1697       case 'n':
1698         decoded += '\n';
1699         break;
1700       case 'r':
1701         decoded += '\r';
1702         break;
1703       case 't':
1704         decoded += '\t';
1705         break;
1706       case 'u': {
1707         unsigned int unicode;
1708         if (!decodeUnicodeCodePoint(token, current, end, unicode))
1709           return false;
1710         decoded += codePointToUTF8(unicode);
1711       } break;
1712       default:
1713         return addError("Bad escape sequence in string", token, current);
1714       }
1715     } else {
1716       decoded += c;
1717     }
1718   }
1719   return true;
1720 }
1721 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1722 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1723                                        Location end, unsigned int& unicode) {
1724 
1725   unicode = 0; // Convince clang-analyzer that this is initialized before use.
1726   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1727     return false;
1728   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1729     // surrogate pairs
1730     if (end - current < 6)
1731       return addError(
1732           "additional six characters expected to parse unicode surrogate pair.",
1733           token, current);
1734     if (*(current++) == '\\' && *(current++) == 'u') {
1735       unsigned int surrogatePair;
1736       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1737         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1738       } else
1739         return false;
1740     } else
1741       return addError("expecting another \\u token to begin the second half of "
1742                       "a unicode surrogate pair",
1743                       token, current);
1744   }
1745   return true;
1746 }
1747 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1748 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1749                                             Location end,
1750                                             unsigned int& ret_unicode) {
1751   if (end - current < 4)
1752     return addError(
1753         "Bad unicode escape sequence in string: four digits expected.", token,
1754         current);
1755   int unicode = 0;
1756   for (int index = 0; index < 4; ++index) {
1757     Char c = *current++;
1758     unicode *= 16;
1759     if (c >= '0' && c <= '9')
1760       unicode += c - '0';
1761     else if (c >= 'a' && c <= 'f')
1762       unicode += c - 'a' + 10;
1763     else if (c >= 'A' && c <= 'F')
1764       unicode += c - 'A' + 10;
1765     else
1766       return addError(
1767           "Bad unicode escape sequence in string: hexadecimal digit expected.",
1768           token, current);
1769   }
1770   ret_unicode = static_cast<unsigned int>(unicode);
1771   return true;
1772 }
1773 
addError(const String & message,Token & token,Location extra)1774 bool OurReader::addError(const String& message, Token& token, Location extra) {
1775   ErrorInfo info;
1776   info.token_ = token;
1777   info.message_ = message;
1778   info.extra_ = extra;
1779   errors_.push_back(info);
1780   return false;
1781 }
1782 
recoverFromError(TokenType skipUntilToken)1783 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1784   size_t errorCount = errors_.size();
1785   Token skip;
1786   for (;;) {
1787     if (!readToken(skip))
1788       errors_.resize(errorCount); // discard errors caused by recovery
1789     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1790       break;
1791   }
1792   errors_.resize(errorCount);
1793   return false;
1794 }
1795 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1796 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1797                                    TokenType skipUntilToken) {
1798   addError(message, token);
1799   return recoverFromError(skipUntilToken);
1800 }
1801 
currentValue()1802 Value& OurReader::currentValue() { return *(nodes_.top()); }
1803 
getNextChar()1804 OurReader::Char OurReader::getNextChar() {
1805   if (current_ == end_)
1806     return 0;
1807   return *current_++;
1808 }
1809 
getLocationLineAndColumn(Location location,int & line,int & column) const1810 void OurReader::getLocationLineAndColumn(Location location, int& line,
1811                                          int& column) const {
1812   Location current = begin_;
1813   Location lastLineStart = current;
1814   line = 0;
1815   while (current < location && current != end_) {
1816     Char c = *current++;
1817     if (c == '\r') {
1818       if (*current == '\n')
1819         ++current;
1820       lastLineStart = current;
1821       ++line;
1822     } else if (c == '\n') {
1823       lastLineStart = current;
1824       ++line;
1825     }
1826   }
1827   // column & line start at 1
1828   column = int(location - lastLineStart) + 1;
1829   ++line;
1830 }
1831 
getLocationLineAndColumn(Location location) const1832 String OurReader::getLocationLineAndColumn(Location location) const {
1833   int line, column;
1834   getLocationLineAndColumn(location, line, column);
1835   char buffer[18 + 16 + 16 + 1];
1836   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1837   return buffer;
1838 }
1839 
getFormattedErrorMessages() const1840 String OurReader::getFormattedErrorMessages() const {
1841   String formattedMessage;
1842   for (const auto& error : errors_) {
1843     formattedMessage +=
1844         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1845     formattedMessage += "  " + error.message_ + "\n";
1846     if (error.extra_)
1847       formattedMessage +=
1848           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1849   }
1850   return formattedMessage;
1851 }
1852 
getStructuredErrors() const1853 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1854   std::vector<OurReader::StructuredError> allErrors;
1855   for (const auto& error : errors_) {
1856     OurReader::StructuredError structured;
1857     structured.offset_start = error.token_.start_ - begin_;
1858     structured.offset_limit = error.token_.end_ - begin_;
1859     structured.message = error.message_;
1860     allErrors.push_back(structured);
1861   }
1862   return allErrors;
1863 }
1864 
1865 class OurCharReader : public CharReader {
1866   bool const collectComments_;
1867   OurReader reader_;
1868 
1869 public:
OurCharReader(bool collectComments,OurFeatures const & features)1870   OurCharReader(bool collectComments, OurFeatures const& features)
1871       : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1872   bool parse(char const* beginDoc, char const* endDoc, Value* root,
1873              String* errs) override {
1874     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1875     if (errs) {
1876       *errs = reader_.getFormattedErrorMessages();
1877     }
1878     return ok;
1879   }
1880 };
1881 
CharReaderBuilder()1882 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1883 CharReaderBuilder::~CharReaderBuilder() = default;
newCharReader() const1884 CharReader* CharReaderBuilder::newCharReader() const {
1885   bool collectComments = settings_["collectComments"].asBool();
1886   OurFeatures features = OurFeatures::all();
1887   features.allowComments_ = settings_["allowComments"].asBool();
1888   features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1889   features.strictRoot_ = settings_["strictRoot"].asBool();
1890   features.allowDroppedNullPlaceholders_ =
1891       settings_["allowDroppedNullPlaceholders"].asBool();
1892   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1893   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1894 
1895   // Stack limit is always a size_t, so we get this as an unsigned int
1896   // regardless of it we have 64-bit integer support enabled.
1897   features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1898   features.failIfExtra_ = settings_["failIfExtra"].asBool();
1899   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1900   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1901   features.skipBom_ = settings_["skipBom"].asBool();
1902   return new OurCharReader(collectComments, features);
1903 }
1904 
validate(Json::Value * invalid) const1905 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1906   static const auto& valid_keys = *new std::set<String>{
1907       "collectComments",
1908       "allowComments",
1909       "allowTrailingCommas",
1910       "strictRoot",
1911       "allowDroppedNullPlaceholders",
1912       "allowNumericKeys",
1913       "allowSingleQuotes",
1914       "stackLimit",
1915       "failIfExtra",
1916       "rejectDupKeys",
1917       "allowSpecialFloats",
1918       "skipBom",
1919   };
1920   for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1921     auto key = si.name();
1922     if (valid_keys.count(key))
1923       continue;
1924     if (invalid)
1925       (*invalid)[std::move(key)] = *si;
1926     else
1927       return false;
1928   }
1929   return invalid ? invalid->empty() : true;
1930 }
1931 
operator [](const String & key)1932 Value& CharReaderBuilder::operator[](const String& key) {
1933   return settings_[key];
1934 }
1935 // static
strictMode(Json::Value * settings)1936 void CharReaderBuilder::strictMode(Json::Value* settings) {
1937   //! [CharReaderBuilderStrictMode]
1938   (*settings)["allowComments"] = false;
1939   (*settings)["allowTrailingCommas"] = false;
1940   (*settings)["strictRoot"] = true;
1941   (*settings)["allowDroppedNullPlaceholders"] = false;
1942   (*settings)["allowNumericKeys"] = false;
1943   (*settings)["allowSingleQuotes"] = false;
1944   (*settings)["stackLimit"] = 1000;
1945   (*settings)["failIfExtra"] = true;
1946   (*settings)["rejectDupKeys"] = true;
1947   (*settings)["allowSpecialFloats"] = false;
1948   (*settings)["skipBom"] = true;
1949   //! [CharReaderBuilderStrictMode]
1950 }
1951 // static
setDefaults(Json::Value * settings)1952 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1953   //! [CharReaderBuilderDefaults]
1954   (*settings)["collectComments"] = true;
1955   (*settings)["allowComments"] = true;
1956   (*settings)["allowTrailingCommas"] = true;
1957   (*settings)["strictRoot"] = false;
1958   (*settings)["allowDroppedNullPlaceholders"] = false;
1959   (*settings)["allowNumericKeys"] = false;
1960   (*settings)["allowSingleQuotes"] = false;
1961   (*settings)["stackLimit"] = 1000;
1962   (*settings)["failIfExtra"] = false;
1963   (*settings)["rejectDupKeys"] = false;
1964   (*settings)["allowSpecialFloats"] = false;
1965   (*settings)["skipBom"] = true;
1966   //! [CharReaderBuilderDefaults]
1967 }
1968 
1969 //////////////////////////////////
1970 // global functions
1971 
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)1972 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1973                      String* errs) {
1974   OStringStream ssin;
1975   ssin << sin.rdbuf();
1976   String doc = ssin.str();
1977   char const* begin = doc.data();
1978   char const* end = begin + doc.size();
1979   // Note that we do not actually need a null-terminator.
1980   CharReaderPtr const reader(fact.newCharReader());
1981   return reader->parse(begin, end, root, errs);
1982 }
1983 
operator >>(IStream & sin,Value & root)1984 IStream& operator>>(IStream& sin, Value& root) {
1985   CharReaderBuilder b;
1986   String errs;
1987   bool ok = parseFromStream(b, sin, &root, &errs);
1988   if (!ok) {
1989     throwRuntimeError(errs);
1990   }
1991   return sin;
1992 }
1993 
1994 } // namespace Json
1995