1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Copyright (C) 2016 Intel Corporation.
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the QtCore module of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU Lesser General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU Lesser
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
22 ** packaging of this file. Please review the following information to
23 ** ensure the GNU Lesser General Public License version 3 requirements
24 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25 **
26 ** GNU General Public License Usage
27 ** Alternatively, this file may be used under the terms of the GNU
28 ** General Public License version 2.0 or (at your option) the GNU General
29 ** Public license version 3 or any later version approved by the KDE Free
30 ** Qt Foundation. The licenses are as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32 ** included in the packaging of this file. Please review the following
33 ** information to ensure the GNU General Public License requirements will
34 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35 ** https://www.gnu.org/licenses/gpl-3.0.html.
36 **
37 ** $QT_END_LICENSE$
38 **
39 ****************************************************************************/
40 
41 #ifndef QT_BOOTSTRAPPED
42 #include <qcoreapplication.h>
43 #endif
44 #include <qdebug.h>
45 #include "qjsonparser_p.h"
46 #include "qjson_p.h"
47 #include "private/qutfcodec_p.h"
48 #include "private/qcborvalue_p.h"
49 #include "private/qnumeric_p.h"
50 
51 //#define PARSER_DEBUG
52 #ifdef PARSER_DEBUG
53 static int indent = 0;
54 #define BEGIN qDebug() << QByteArray(4*indent++, ' ').constData() << "pos=" << current
55 #define END --indent
56 #define DEBUG qDebug() << QByteArray(4*indent, ' ').constData()
57 #else
58 #define BEGIN if (1) ; else qDebug()
59 #define END do {} while (0)
60 #define DEBUG if (1) ; else qDebug()
61 #endif
62 
63 static const int nestingLimit = 1024;
64 
65 QT_BEGIN_NAMESPACE
66 
67 // error strings for the JSON parser
68 #define JSONERR_OK          QT_TRANSLATE_NOOP("QJsonParseError", "no error occurred")
69 #define JSONERR_UNTERM_OBJ  QT_TRANSLATE_NOOP("QJsonParseError", "unterminated object")
70 #define JSONERR_MISS_NSEP   QT_TRANSLATE_NOOP("QJsonParseError", "missing name separator")
71 #define JSONERR_UNTERM_AR   QT_TRANSLATE_NOOP("QJsonParseError", "unterminated array")
72 #define JSONERR_MISS_VSEP   QT_TRANSLATE_NOOP("QJsonParseError", "missing value separator")
73 #define JSONERR_ILLEGAL_VAL QT_TRANSLATE_NOOP("QJsonParseError", "illegal value")
74 #define JSONERR_END_OF_NUM  QT_TRANSLATE_NOOP("QJsonParseError", "invalid termination by number")
75 #define JSONERR_ILLEGAL_NUM QT_TRANSLATE_NOOP("QJsonParseError", "illegal number")
76 #define JSONERR_STR_ESC_SEQ QT_TRANSLATE_NOOP("QJsonParseError", "invalid escape sequence")
77 #define JSONERR_STR_UTF8    QT_TRANSLATE_NOOP("QJsonParseError", "invalid UTF8 string")
78 #define JSONERR_UTERM_STR   QT_TRANSLATE_NOOP("QJsonParseError", "unterminated string")
79 #define JSONERR_MISS_OBJ    QT_TRANSLATE_NOOP("QJsonParseError", "object is missing after a comma")
80 #define JSONERR_DEEP_NEST   QT_TRANSLATE_NOOP("QJsonParseError", "too deeply nested document")
81 #define JSONERR_DOC_LARGE   QT_TRANSLATE_NOOP("QJsonParseError", "too large document")
82 #define JSONERR_GARBAGEEND  QT_TRANSLATE_NOOP("QJsonParseError", "garbage at the end of the document")
83 
84 /*!
85     \class QJsonParseError
86     \inmodule QtCore
87     \ingroup json
88     \ingroup shared
89     \reentrant
90     \since 5.0
91 
92     \brief The QJsonParseError class is used to report errors during JSON parsing.
93 
94     \sa {JSON Support in Qt}, {JSON Save Game Example}
95 */
96 
97 /*!
98     \enum QJsonParseError::ParseError
99 
100     This enum describes the type of error that occurred during the parsing of a JSON document.
101 
102     \value NoError                  No error occurred
103     \value UnterminatedObject       An object is not correctly terminated with a closing curly bracket
104     \value MissingNameSeparator     A comma separating different items is missing
105     \value UnterminatedArray        The array is not correctly terminated with a closing square bracket
106     \value MissingValueSeparator    A colon separating keys from values inside objects is missing
107     \value IllegalValue             The value is illegal
108     \value TerminationByNumber      The input stream ended while parsing a number
109     \value IllegalNumber            The number is not well formed
110     \value IllegalEscapeSequence    An illegal escape sequence occurred in the input
111     \value IllegalUTF8String        An illegal UTF8 sequence occurred in the input
112     \value UnterminatedString       A string wasn't terminated with a quote
113     \value MissingObject            An object was expected but couldn't be found
114     \value DeepNesting              The JSON document is too deeply nested for the parser to parse it
115     \value DocumentTooLarge         The JSON document is too large for the parser to parse it
116     \value GarbageAtEnd             The parsed document contains additional garbage characters at the end
117 
118 */
119 
120 /*!
121     \variable QJsonParseError::error
122 
123     Contains the type of the parse error. Is equal to QJsonParseError::NoError if the document
124     was parsed correctly.
125 
126     \sa ParseError, errorString()
127 */
128 
129 
130 /*!
131     \variable QJsonParseError::offset
132 
133     Contains the offset in the input string where the parse error occurred.
134 
135     \sa error, errorString()
136 */
137 
138 /*!
139   Returns the human-readable message appropriate to the reported JSON parsing error.
140 
141   \sa error
142  */
errorString() const143 QString QJsonParseError::errorString() const
144 {
145     const char *sz = "";
146     switch (error) {
147     case NoError:
148         sz = JSONERR_OK;
149         break;
150     case UnterminatedObject:
151         sz = JSONERR_UNTERM_OBJ;
152         break;
153     case MissingNameSeparator:
154         sz = JSONERR_MISS_NSEP;
155         break;
156     case UnterminatedArray:
157         sz = JSONERR_UNTERM_AR;
158         break;
159     case MissingValueSeparator:
160         sz = JSONERR_MISS_VSEP;
161         break;
162     case IllegalValue:
163         sz = JSONERR_ILLEGAL_VAL;
164         break;
165     case TerminationByNumber:
166         sz = JSONERR_END_OF_NUM;
167         break;
168     case IllegalNumber:
169         sz = JSONERR_ILLEGAL_NUM;
170         break;
171     case IllegalEscapeSequence:
172         sz = JSONERR_STR_ESC_SEQ;
173         break;
174     case IllegalUTF8String:
175         sz = JSONERR_STR_UTF8;
176         break;
177     case UnterminatedString:
178         sz = JSONERR_UTERM_STR;
179         break;
180     case MissingObject:
181         sz = JSONERR_MISS_OBJ;
182         break;
183     case DeepNesting:
184         sz = JSONERR_DEEP_NEST;
185         break;
186     case DocumentTooLarge:
187         sz = JSONERR_DOC_LARGE;
188         break;
189     case GarbageAtEnd:
190         sz = JSONERR_GARBAGEEND;
191         break;
192     }
193 #ifndef QT_BOOTSTRAPPED
194     return QCoreApplication::translate("QJsonParseError", sz);
195 #else
196     return QLatin1String(sz);
197 #endif
198 }
199 
200 using namespace QJsonPrivate;
201 
202 class StashedContainer
203 {
204     Q_DISABLE_COPY_MOVE(StashedContainer)
205 public:
StashedContainer(QExplicitlySharedDataPointer<QCborContainerPrivate> * container,QCborValue::Type type)206     StashedContainer(QExplicitlySharedDataPointer<QCborContainerPrivate> *container,
207                      QCborValue::Type type)
208         : type(type), stashed(std::move(*container)), current(container)
209     {
210     }
211 
~StashedContainer()212     ~StashedContainer()
213     {
214         stashed->append(QCborContainerPrivate::makeValue(type, -1, current->take(),
215                                                          QCborContainerPrivate::MoveContainer));
216         *current = std::move(stashed);
217     }
218 
219 private:
220     QCborValue::Type type;
221     QExplicitlySharedDataPointer<QCborContainerPrivate> stashed;
222     QExplicitlySharedDataPointer<QCborContainerPrivate> *current;
223 };
224 
Parser(const char * json,int length)225 Parser::Parser(const char *json, int length)
226     : head(json), json(json)
227     , nestingLevel(0)
228     , lastError(QJsonParseError::NoError)
229 {
230     end = json + length;
231 }
232 
233 
234 
235 /*
236 
237 begin-array     = ws %x5B ws  ; [ left square bracket
238 
239 begin-object    = ws %x7B ws  ; { left curly bracket
240 
241 end-array       = ws %x5D ws  ; ] right square bracket
242 
243 end-object      = ws %x7D ws  ; } right curly bracket
244 
245 name-separator  = ws %x3A ws  ; : colon
246 
247 value-separator = ws %x2C ws  ; , comma
248 
249 Insignificant whitespace is allowed before or after any of the six
250 structural characters.
251 
252 ws = *(
253           %x20 /              ; Space
254           %x09 /              ; Horizontal tab
255           %x0A /              ; Line feed or New line
256           %x0D                ; Carriage return
257       )
258 
259 */
260 
261 enum {
262     Space = 0x20,
263     Tab = 0x09,
264     LineFeed = 0x0a,
265     Return = 0x0d,
266     BeginArray = 0x5b,
267     BeginObject = 0x7b,
268     EndArray = 0x5d,
269     EndObject = 0x7d,
270     NameSeparator = 0x3a,
271     ValueSeparator = 0x2c,
272     Quote = 0x22
273 };
274 
eatBOM()275 void Parser::eatBOM()
276 {
277     // eat UTF-8 byte order mark
278     uchar utf8bom[3] = { 0xef, 0xbb, 0xbf };
279     if (end - json > 3 &&
280         (uchar)json[0] == utf8bom[0] &&
281         (uchar)json[1] == utf8bom[1] &&
282         (uchar)json[2] == utf8bom[2])
283         json += 3;
284 }
285 
eatSpace()286 bool Parser::eatSpace()
287 {
288     while (json < end) {
289         if (*json > Space)
290             break;
291         if (*json != Space &&
292             *json != Tab &&
293             *json != LineFeed &&
294             *json != Return)
295             break;
296         ++json;
297     }
298     return (json < end);
299 }
300 
nextToken()301 char Parser::nextToken()
302 {
303     if (!eatSpace())
304         return 0;
305     char token = *json++;
306     switch (token) {
307     case BeginArray:
308     case BeginObject:
309     case NameSeparator:
310     case ValueSeparator:
311     case EndArray:
312     case EndObject:
313     case Quote:
314         break;
315     default:
316         token = 0;
317         break;
318     }
319     return token;
320 }
321 
322 /*
323     JSON-text = object / array
324 */
parse(QJsonParseError * error)325 QCborValue Parser::parse(QJsonParseError *error)
326 {
327 #ifdef PARSER_DEBUG
328     indent = 0;
329     qDebug(">>>>> parser begin");
330 #endif
331     eatBOM();
332     char token = nextToken();
333 
334     QCborValue data;
335 
336     DEBUG << Qt::hex << (uint)token;
337     if (token == BeginArray) {
338         container = new QCborContainerPrivate;
339         if (!parseArray())
340             goto error;
341         data = QCborContainerPrivate::makeValue(QCborValue::Array, -1, container.take(),
342                                                 QCborContainerPrivate::MoveContainer);
343     } else if (token == BeginObject) {
344         container = new QCborContainerPrivate;
345         if (!parseObject())
346             goto error;
347         data = QCborContainerPrivate::makeValue(QCborValue::Map, -1, container.take(),
348                                                 QCborContainerPrivate::MoveContainer);
349     } else {
350         lastError = QJsonParseError::IllegalValue;
351         goto error;
352     }
353 
354     eatSpace();
355     if (json < end) {
356         lastError = QJsonParseError::GarbageAtEnd;
357         goto error;
358     }
359 
360     END;
361     {
362         if (error) {
363             error->offset = 0;
364             error->error = QJsonParseError::NoError;
365         }
366 
367         return data;
368     }
369 
370 error:
371 #ifdef PARSER_DEBUG
372     qDebug(">>>>> parser error");
373 #endif
374     container.reset();
375     if (error) {
376         error->offset = json - head;
377         error->error  = lastError;
378     }
379     return QCborValue();
380 }
381 
sortContainer(QCborContainerPrivate * container)382 static void sortContainer(QCborContainerPrivate *container)
383 {
384     using Forward = QJsonPrivate::KeyIterator;
385     using Reverse = std::reverse_iterator<Forward>;
386     using Value = Forward::value_type;
387 
388     auto compare = [container](const Value &a, const Value &b)
389     {
390         const auto &aKey = a.key();
391         const auto &bKey = b.key();
392 
393         Q_ASSERT(aKey.flags & QtCbor::Element::HasByteData);
394         Q_ASSERT(bKey.flags & QtCbor::Element::HasByteData);
395 
396         const QtCbor::ByteData *aData = container->byteData(aKey);
397         const QtCbor::ByteData *bData = container->byteData(bKey);
398 
399         if (!aData)
400             return bData ? -1 : 0;
401         if (!bData)
402             return 1;
403 
404         // US-ASCII (StringIsAscii flag) is just a special case of UTF-8
405         // string, so we can safely ignore the flag.
406 
407         if (aKey.flags & QtCbor::Element::StringIsUtf16) {
408             if (bKey.flags & QtCbor::Element::StringIsUtf16)
409                 return QtPrivate::compareStrings(aData->asStringView(), bData->asStringView());
410 
411             return -QCborContainerPrivate::compareUtf8(bData, aData->asStringView());
412         } else {
413             if (bKey.flags & QtCbor::Element::StringIsUtf16)
414                 return QCborContainerPrivate::compareUtf8(aData, bData->asStringView());
415 
416             // We're missing an explicit UTF-8 to UTF-8 comparison in Qt, but
417             // UTF-8 to UTF-8 comparison retains simple byte ordering, so we'll
418             // abuse the Latin-1 comparison function.
419             return QtPrivate::compareStrings(aData->asLatin1(), bData->asLatin1());
420         }
421     };
422 
423     std::sort(Forward(container->elements.begin()), Forward(container->elements.end()),
424               [&compare](const Value &a, const Value &b) { return compare(a, b) < 0; });
425 
426     // We need to retain the _last_ value for any duplicate keys. Therefore the reverse dance here.
427     auto it = std::unique(Reverse(container->elements.end()), Reverse(container->elements.begin()),
428                           [&compare](const Value &a, const Value &b) {
429         return compare(a, b) == 0;
430     }).base().elementsIterator();
431 
432     // The erase from beginning is expensive but hopefully rare.
433     container->elements.erase(container->elements.begin(), it);
434 }
435 
436 
437 /*
438     object = begin-object [ member *( value-separator member ) ]
439     end-object
440 */
441 
parseObject()442 bool Parser::parseObject()
443 {
444     if (++nestingLevel > nestingLimit) {
445         lastError = QJsonParseError::DeepNesting;
446         return false;
447     }
448 
449     BEGIN << "parseObject" << json;
450 
451     char token = nextToken();
452     while (token == Quote) {
453         if (!container)
454             container = new QCborContainerPrivate;
455         if (!parseMember())
456             return false;
457         token = nextToken();
458         if (token != ValueSeparator)
459             break;
460         token = nextToken();
461         if (token == EndObject) {
462             lastError = QJsonParseError::MissingObject;
463             return false;
464         }
465     }
466 
467     DEBUG << "end token=" << token;
468     if (token != EndObject) {
469         lastError = QJsonParseError::UnterminatedObject;
470         return false;
471     }
472 
473     END;
474 
475     --nestingLevel;
476 
477     if (container)
478         sortContainer(container.data());
479     return true;
480 }
481 
482 /*
483     member = string name-separator value
484 */
parseMember()485 bool Parser::parseMember()
486 {
487     BEGIN << "parseMember";
488 
489     if (!parseString())
490         return false;
491     char token = nextToken();
492     if (token != NameSeparator) {
493         lastError = QJsonParseError::MissingNameSeparator;
494         return false;
495     }
496     if (!eatSpace()) {
497         lastError = QJsonParseError::UnterminatedObject;
498         return false;
499     }
500     if (!parseValue())
501         return false;
502 
503     END;
504     return true;
505 }
506 
507 /*
508     array = begin-array [ value *( value-separator value ) ] end-array
509 */
parseArray()510 bool Parser::parseArray()
511 {
512     BEGIN << "parseArray";
513 
514     if (++nestingLevel > nestingLimit) {
515         lastError = QJsonParseError::DeepNesting;
516         return false;
517     }
518 
519     if (!eatSpace()) {
520         lastError = QJsonParseError::UnterminatedArray;
521         return false;
522     }
523     if (*json == EndArray) {
524         nextToken();
525     } else {
526         while (1) {
527             if (!eatSpace()) {
528                 lastError = QJsonParseError::UnterminatedArray;
529                 return false;
530             }
531             if (!container)
532                 container = new QCborContainerPrivate;
533             if (!parseValue())
534                 return false;
535             char token = nextToken();
536             if (token == EndArray)
537                 break;
538             else if (token != ValueSeparator) {
539                 if (!eatSpace())
540                     lastError = QJsonParseError::UnterminatedArray;
541                 else
542                     lastError = QJsonParseError::MissingValueSeparator;
543                 return false;
544             }
545         }
546     }
547 
548     DEBUG << "size =" << (container ? container->elements.length() : 0);
549     END;
550 
551     --nestingLevel;
552 
553     return true;
554 }
555 
556 /*
557 value = false / null / true / object / array / number / string
558 
559 */
560 
parseValue()561 bool Parser::parseValue()
562 {
563     BEGIN << "parse Value" << json;
564 
565     switch (*json++) {
566     case 'n':
567         if (end - json < 4) {
568             lastError = QJsonParseError::IllegalValue;
569             return false;
570         }
571         if (*json++ == 'u' &&
572             *json++ == 'l' &&
573             *json++ == 'l') {
574             container->append(QCborValue(QCborValue::Null));
575             DEBUG << "value: null";
576             END;
577             return true;
578         }
579         lastError = QJsonParseError::IllegalValue;
580         return false;
581     case 't':
582         if (end - json < 4) {
583             lastError = QJsonParseError::IllegalValue;
584             return false;
585         }
586         if (*json++ == 'r' &&
587             *json++ == 'u' &&
588             *json++ == 'e') {
589             container->append(QCborValue(true));
590             DEBUG << "value: true";
591             END;
592             return true;
593         }
594         lastError = QJsonParseError::IllegalValue;
595         return false;
596     case 'f':
597         if (end - json < 5) {
598             lastError = QJsonParseError::IllegalValue;
599             return false;
600         }
601         if (*json++ == 'a' &&
602             *json++ == 'l' &&
603             *json++ == 's' &&
604             *json++ == 'e') {
605             container->append(QCborValue(false));
606             DEBUG << "value: false";
607             END;
608             return true;
609         }
610         lastError = QJsonParseError::IllegalValue;
611         return false;
612     case Quote: {
613         if (!parseString())
614             return false;
615         DEBUG << "value: string";
616         END;
617         return true;
618     }
619     case BeginArray: {
620         StashedContainer stashedContainer(&container, QCborValue::Array);
621         if (!parseArray())
622             return false;
623         DEBUG << "value: array";
624         END;
625         return true;
626     }
627     case BeginObject: {
628         StashedContainer stashedContainer(&container, QCborValue::Map);
629         if (!parseObject())
630             return false;
631         DEBUG << "value: object";
632         END;
633         return true;
634     }
635     case ValueSeparator:
636         // Essentially missing value, but after a colon, not after a comma
637         // like the other MissingObject errors.
638         lastError = QJsonParseError::IllegalValue;
639         return false;
640     case EndObject:
641     case EndArray:
642         lastError = QJsonParseError::MissingObject;
643         return false;
644     default:
645         --json;
646         if (!parseNumber())
647             return false;
648         DEBUG << "value: number";
649         END;
650     }
651 
652     return true;
653 }
654 
655 
656 
657 
658 
659 /*
660         number = [ minus ] int [ frac ] [ exp ]
661         decimal-point = %x2E       ; .
662         digit1-9 = %x31-39         ; 1-9
663         e = %x65 / %x45            ; e E
664         exp = e [ minus / plus ] 1*DIGIT
665         frac = decimal-point 1*DIGIT
666         int = zero / ( digit1-9 *DIGIT )
667         minus = %x2D               ; -
668         plus = %x2B                ; +
669         zero = %x30                ; 0
670 
671 */
672 
parseNumber()673 bool Parser::parseNumber()
674 {
675     BEGIN << "parseNumber" << json;
676 
677     const char *start = json;
678     bool isInt = true;
679 
680     // minus
681     if (json < end && *json == '-')
682         ++json;
683 
684     // int = zero / ( digit1-9 *DIGIT )
685     if (json < end && *json == '0') {
686         ++json;
687     } else {
688         while (json < end && *json >= '0' && *json <= '9')
689             ++json;
690     }
691 
692     // frac = decimal-point 1*DIGIT
693     if (json < end && *json == '.') {
694         isInt = false;
695         ++json;
696         while (json < end && *json >= '0' && *json <= '9')
697             ++json;
698     }
699 
700     // exp = e [ minus / plus ] 1*DIGIT
701     if (json < end && (*json == 'e' || *json == 'E')) {
702         isInt = false;
703         ++json;
704         if (json < end && (*json == '-' || *json == '+'))
705             ++json;
706         while (json < end && *json >= '0' && *json <= '9')
707             ++json;
708     }
709 
710     if (json >= end) {
711         lastError = QJsonParseError::TerminationByNumber;
712         return false;
713     }
714 
715     const QByteArray number = QByteArray::fromRawData(start, json - start);
716     DEBUG << "numberstring" << number;
717 
718     if (isInt) {
719         bool ok;
720         qlonglong n = number.toLongLong(&ok);
721         if (ok) {
722             container->append(QCborValue(n));
723             END;
724             return true;
725         }
726     }
727 
728     bool ok;
729     double d = number.toDouble(&ok);
730 
731     if (!ok) {
732         lastError = QJsonParseError::IllegalNumber;
733         return false;
734     }
735 
736     qint64 n;
737     if (convertDoubleTo(d, &n))
738         container->append(QCborValue(n));
739     else
740         container->append(QCborValue(d));
741 
742     END;
743     return true;
744 }
745 
746 /*
747 
748         string = quotation-mark *char quotation-mark
749 
750         char = unescaped /
751                escape (
752                    %x22 /          ; "    quotation mark  U+0022
753                    %x5C /          ; \    reverse solidus U+005C
754                    %x2F /          ; /    solidus         U+002F
755                    %x62 /          ; b    backspace       U+0008
756                    %x66 /          ; f    form feed       U+000C
757                    %x6E /          ; n    line feed       U+000A
758                    %x72 /          ; r    carriage return U+000D
759                    %x74 /          ; t    tab             U+0009
760                    %x75 4HEXDIG )  ; uXXXX                U+XXXX
761 
762         escape = %x5C              ; \
763 
764         quotation-mark = %x22      ; "
765 
766         unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
767  */
addHexDigit(char digit,uint * result)768 static inline bool addHexDigit(char digit, uint *result)
769 {
770     *result <<= 4;
771     if (digit >= '0' && digit <= '9')
772         *result |= (digit - '0');
773     else if (digit >= 'a' && digit <= 'f')
774         *result |= (digit - 'a') + 10;
775     else if (digit >= 'A' && digit <= 'F')
776         *result |= (digit - 'A') + 10;
777     else
778         return false;
779     return true;
780 }
781 
scanEscapeSequence(const char * & json,const char * end,uint * ch)782 static inline bool scanEscapeSequence(const char *&json, const char *end, uint *ch)
783 {
784     ++json;
785     if (json >= end)
786         return false;
787 
788     DEBUG << "scan escape" << (char)*json;
789     uint escaped = *json++;
790     switch (escaped) {
791     case '"':
792         *ch = '"'; break;
793     case '\\':
794         *ch = '\\'; break;
795     case '/':
796         *ch = '/'; break;
797     case 'b':
798         *ch = 0x8; break;
799     case 'f':
800         *ch = 0xc; break;
801     case 'n':
802         *ch = 0xa; break;
803     case 'r':
804         *ch = 0xd; break;
805     case 't':
806         *ch = 0x9; break;
807     case 'u': {
808         *ch = 0;
809         if (json > end - 4)
810             return false;
811         for (int i = 0; i < 4; ++i) {
812             if (!addHexDigit(*json, ch))
813                 return false;
814             ++json;
815         }
816         return true;
817     }
818     default:
819         // this is not as strict as one could be, but allows for more Json files
820         // to be parsed correctly.
821         *ch = escaped;
822         return true;
823     }
824     return true;
825 }
826 
scanUtf8Char(const char * & json,const char * end,uint * result)827 static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
828 {
829     const auto *usrc = reinterpret_cast<const uchar *>(json);
830     const auto *uend = reinterpret_cast<const uchar *>(end);
831     const uchar b = *usrc++;
832     int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, result, usrc, uend);
833     if (res < 0)
834         return false;
835 
836     json = reinterpret_cast<const char *>(usrc);
837     return true;
838 }
839 
parseString()840 bool Parser::parseString()
841 {
842     const char *start = json;
843 
844     // try to parse a utf-8 string without escape sequences, and note whether it's 7bit ASCII.
845 
846     BEGIN << "parse string" << json;
847     bool isUtf8 = true;
848     bool isAscii = true;
849     while (json < end) {
850         uint ch = 0;
851         if (*json == '"')
852             break;
853         if (*json == '\\') {
854             isAscii = false;
855             // If we find escape sequences, we store UTF-16 as there are some
856             // escape sequences which are hard to represent in UTF-8.
857             // (plain "\\ud800" for example)
858             isUtf8 = false;
859             break;
860         }
861         if (!scanUtf8Char(json, end, &ch)) {
862             lastError = QJsonParseError::IllegalUTF8String;
863             return false;
864         }
865         if (ch > 0x7f)
866             isAscii = false;
867         DEBUG << "  " << ch << char(ch);
868     }
869     ++json;
870     DEBUG << "end of string";
871     if (json >= end) {
872         lastError = QJsonParseError::UnterminatedString;
873         return false;
874     }
875 
876     // no escape sequences, we are done
877     if (isUtf8) {
878         container->appendByteData(start, json - start - 1, QCborValue::String,
879                                   isAscii ? QtCbor::Element::StringIsAscii
880                                           : QtCbor::Element::ValueFlags {});
881         END;
882         return true;
883     }
884 
885     DEBUG << "has escape sequences";
886 
887     json = start;
888 
889     QString ucs4;
890     while (json < end) {
891         uint ch = 0;
892         if (*json == '"')
893             break;
894         else if (*json == '\\') {
895             if (!scanEscapeSequence(json, end, &ch)) {
896                 lastError = QJsonParseError::IllegalEscapeSequence;
897                 return false;
898             }
899         } else {
900             if (!scanUtf8Char(json, end, &ch)) {
901                 lastError = QJsonParseError::IllegalUTF8String;
902                 return false;
903             }
904         }
905         if (QChar::requiresSurrogates(ch)) {
906             ucs4.append(QChar::highSurrogate(ch));
907             ucs4.append(QChar::lowSurrogate(ch));
908         } else {
909             ucs4.append(QChar(ushort(ch)));
910         }
911     }
912     ++json;
913 
914     if (json >= end) {
915         lastError = QJsonParseError::UnterminatedString;
916         return false;
917     }
918 
919     container->appendByteData(reinterpret_cast<const char *>(ucs4.utf16()), ucs4.size() * 2,
920                               QCborValue::String, QtCbor::Element::StringIsUtf16);
921     END;
922     return true;
923 }
924 
925 QT_END_NAMESPACE
926