1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/utils/SkJSON.h"
9 
10 #include "include/core/SkStream.h"
11 #include "include/core/SkString.h"
12 #include "include/private/SkMalloc.h"
13 #include "include/utils/SkParse.h"
14 #include "src/utils/SkUTF.h"
15 
16 #include <cmath>
17 #include <tuple>
18 #include <vector>
19 
20 namespace skjson {
21 
22 // #define SK_JSON_REPORT_ERRORS
23 
24 static_assert( sizeof(Value) == 8, "");
25 static_assert(alignof(Value) == 8, "");
26 
27 static constexpr size_t kRecAlign = alignof(Value);
28 
init_tagged(Tag t)29 void Value::init_tagged(Tag t) {
30     memset(fData8, 0, sizeof(fData8));
31     fData8[Value::kTagOffset] = SkTo<uint8_t>(t);
32     SkASSERT(this->getTag() == t);
33 }
34 
35 // Pointer values store a type (in the upper kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)36 void Value::init_tagged_pointer(Tag t, void* p) {
37 #if !defined(SK_CPU_LENDIAN)
38     // Check that kRecAlign is large enough to leave room for the tag
39     static_assert(sizeof(Value) > sizeof(uintptr_t) || !(kRecAlign & Value::kTagMask), "kRecAlign is not a multiple of kTagMask+1");
40 #endif
41     *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
42 
43     if (sizeof(Value) == sizeof(uintptr_t)) {
44         // For 64-bit, we rely on the pointer upper bits being unused/zero.
45         SkASSERT(!(fData8[kTagOffset] & kTagMask));
46         fData8[kTagOffset] |= SkTo<uint8_t>(t);
47     } else {
48         // For 32-bit, we need to zero-initialize the upper 32 bits
49         SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
50         this->cast<uintptr_t>()[kTagOffset >> 2] = 0;
51         fData8[kTagOffset] = SkTo<uint8_t>(t);
52     }
53 
54     SkASSERT(this->getTag()    == t);
55     SkASSERT(this->ptr<void>() == p);
56 }
57 
NullValue()58 NullValue::NullValue() {
59     this->init_tagged(Tag::kNull);
60     SkASSERT(this->getTag() == Tag::kNull);
61 }
62 
BoolValue(bool b)63 BoolValue::BoolValue(bool b) {
64     this->init_tagged(Tag::kBool);
65     *this->cast<bool>() = b;
66     SkASSERT(this->getTag() == Tag::kBool);
67 }
68 
NumberValue(int32_t i)69 NumberValue::NumberValue(int32_t i) {
70     this->init_tagged(Tag::kInt);
71     *this->cast<int32_t>() = i;
72     SkASSERT(this->getTag() == Tag::kInt);
73 }
74 
NumberValue(float f)75 NumberValue::NumberValue(float f) {
76     this->init_tagged(Tag::kFloat);
77     *this->cast<float>() = f;
78     SkASSERT(this->getTag() == Tag::kFloat);
79 }
80 
81 // Vector recs point to externally allocated slabs with the following layout:
82 //
83 //   [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
84 //
85 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
86 //
87 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)88 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
89     // The Ts are already in memory, so their size should be safe.
90     const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
91     auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
92 
93     *size_ptr = size;
94     sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
95 
96     return size_ptr;
97 }
98 
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)99 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
100     this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
101     SkASSERT(this->getTag() == Tag::kArray);
102 }
103 
104 // Strings have two flavors:
105 //
106 // -- short strings (len <= 7) -> these are stored inline, in the record
107 //    (one byte reserved for null terminator/type):
108 //
109 //        [str] [\0]|[max_len - actual_len]
110 //
111 //    Storing [max_len - actual_len] allows the 'len' field to double-up as a
112 //    null terminator when size == max_len (this works 'cause kShortString == 0).
113 //
114 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
115 //
116 // The string data plus a null-char terminator are copied over.
117 //
118 namespace {
119 
120 // An internal string builder with a fast 8 byte short string load path
121 // (for the common case where the string is not at the end of the stream).
122 class FastString final : public Value {
123 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)124     FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
125         SkASSERT(src <= eos);
126 
127         if (size > kMaxInlineStringSize) {
128             this->initLongString(src, size, alloc);
129             SkASSERT(this->getTag() == Tag::kString);
130             return;
131         }
132 
133         static_assert(static_cast<uint8_t>(Tag::kShortString) == 0, "please don't break this");
134         static_assert(sizeof(Value) == 8, "");
135 
136         // TODO: LIKELY
137         if (src && src + 7 <= eos) {
138             this->initFastShortString(src, size);
139         } else {
140             this->initShortString(src, size);
141         }
142 
143         SkASSERT(this->getTag() == Tag::kShortString);
144     }
145 
146 private:
147     static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 1;
148 
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)149     void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
150         SkASSERT(size > kMaxInlineStringSize);
151 
152         this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
153 
154         auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
155         const_cast<char*>(data)[size] = '\0';
156     }
157 
initShortString(const char * src,size_t size)158     void initShortString(const char* src, size_t size) {
159         SkASSERT(size <= kMaxInlineStringSize);
160 
161         this->init_tagged(Tag::kShortString);
162         sk_careful_memcpy(this->cast<char>(), src, size);
163         // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
164     }
165 
initFastShortString(const char * src,size_t size)166     void initFastShortString(const char* src, size_t size) {
167         SkASSERT(size <= kMaxInlineStringSize);
168 
169         // Load 8 chars and mask out the tag and \0 terminator.
170         uint64_t* s64 = this->cast<uint64_t>();
171         memcpy(s64, src, 8);
172 
173 #if defined(SK_CPU_LENDIAN)
174         *s64 &= 0x00ffffffffffffffULL >> ((kMaxInlineStringSize - size) * 8);
175 #else
176         *s64 &= 0xffffffffffffff00ULL << ((kMaxInlineStringSize - size) * 8);
177 #endif
178     }
179 };
180 
181 } // namespace
182 
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)183 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
184     new (this) FastString(src, size, src, alloc);
185 }
186 
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)187 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
188     this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
189     SkASSERT(this->getTag() == Tag::kObject);
190 }
191 
192 
193 // Boring public Value glue.
194 
inline_strcmp(const char a[],const char b[])195 static int inline_strcmp(const char a[], const char b[]) {
196     for (;;) {
197         char c = *a++;
198         if (c == 0) {
199             break;
200         }
201         if (c != *b++) {
202             return 1;
203         }
204     }
205     return *b != 0;
206 }
207 
operator [](const char * key) const208 const Value& ObjectValue::operator[](const char* key) const {
209     // Reverse search for duplicates resolution (policy: return last).
210     const auto* begin  = this->begin();
211     const auto* member = this->end();
212 
213     while (member > begin) {
214         --member;
215         if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
216             return member->fValue;
217         }
218     }
219 
220     static const Value g_null = NullValue();
221     return g_null;
222 }
223 
224 namespace {
225 
226 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
227 //
228 // [1] https://github.com/Tencent/rapidjson/
229 // [2] https://github.com/chadaustin/sajson
230 // [3] https://pastebin.com/hnhSTL3h
231 
232 
233 // bit 0 (0x01) - plain ASCII string character
234 // bit 1 (0x02) - whitespace
235 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
236 // bit 3 (0x08) - 0-9
237 // bit 4 (0x10) - 0-9 e E .
238 // bit 5 (0x20) - scope terminator (} ])
239 static constexpr uint8_t g_token_flags[256] = {
240  // 0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
241     4,   4,   4,   4,   4,   4,   4,   4,     4,   6,   6,   4,   4,   6,   4,   4, // 0
242     4,   4,   4,   4,   4,   4,   4,   4,     4,   4,   4,   4,   4,   4,   4,   4, // 1
243     3,   1,   4,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   0x11,1, // 2
244  0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,  0x19,0x19,   1,   1,   1,   1,   1,   1, // 3
245     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
246     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   4,0x25,   1,   1, // 5
247     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
248     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,0x25,   1,   1, // 7
249 
250  // 128-255
251     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
252     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
253     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
254     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
255 };
256 
is_ws(char c)257 static inline bool is_ws(char c)       { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)258 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)259 static inline bool is_digit(char c)    { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)260 static inline bool is_numeric(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)261 static inline bool is_eoscope(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
262 
skip_ws(const char * p)263 static inline const char* skip_ws(const char* p) {
264     while (is_ws(*p)) ++p;
265     return p;
266 }
267 
pow10(int32_t exp)268 static inline float pow10(int32_t exp) {
269     static constexpr float g_pow10_table[63] =
270     {
271        1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
272        1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
273        1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
274        1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
275        1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
276        1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
277        1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
278        1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
279     };
280 
281     static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2;
282 
283     // We only support negative exponents for now.
284     SkASSERT(exp <= 0);
285 
286     return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
287                                   : std::pow(10.0f, static_cast<float>(exp));
288 }
289 
290 class DOMParser {
291 public:
DOMParser(SkArenaAlloc & alloc)292     explicit DOMParser(SkArenaAlloc& alloc)
293         : fAlloc(alloc) {
294         fValueStack.reserve(kValueStackReserve);
295         fUnescapeBuffer.reserve(kUnescapeBufferReserve);
296     }
297 
parse(const char * p,size_t size)298     const Value parse(const char* p, size_t size) {
299         if (!size) {
300             return this->error(NullValue(), p, "invalid empty input");
301         }
302 
303         const char* p_stop = p + size - 1;
304 
305         // We're only checking for end-of-stream on object/array close('}',']'),
306         // so we must trim any whitespace from the buffer tail.
307         while (p_stop > p && is_ws(*p_stop)) --p_stop;
308 
309         SkASSERT(p_stop >= p && p_stop < p + size);
310         if (!is_eoscope(*p_stop)) {
311             return this->error(NullValue(), p_stop, "invalid top-level value");
312         }
313 
314         p = skip_ws(p);
315 
316         switch (*p) {
317         case '{':
318             goto match_object;
319         case '[':
320             goto match_array;
321         default:
322             return this->error(NullValue(), p, "invalid top-level value");
323         }
324 
325     match_object:
326         SkASSERT(*p == '{');
327         p = skip_ws(p + 1);
328 
329         this->pushObjectScope();
330 
331         if (*p == '}') goto pop_object;
332 
333         // goto match_object_key;
334     match_object_key:
335         p = skip_ws(p);
336         if (*p != '"') return this->error(NullValue(), p, "expected object key");
337 
338         p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
339             this->pushObjectKey(key, size, eos);
340         });
341         if (!p) return NullValue();
342 
343         p = skip_ws(p);
344         if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
345 
346         ++p;
347 
348         // goto match_value;
349     match_value:
350         p = skip_ws(p);
351 
352         switch (*p) {
353         case '\0':
354             return this->error(NullValue(), p, "unexpected input end");
355         case '"':
356             p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
357                 this->pushString(str, size, eos);
358             });
359             break;
360         case '[':
361             goto match_array;
362         case 'f':
363             p = this->matchFalse(p);
364             break;
365         case 'n':
366             p = this->matchNull(p);
367             break;
368         case 't':
369             p = this->matchTrue(p);
370             break;
371         case '{':
372             goto match_object;
373         default:
374             p = this->matchNumber(p);
375             break;
376         }
377 
378         if (!p) return NullValue();
379 
380         // goto match_post_value;
381     match_post_value:
382         SkASSERT(!this->inTopLevelScope());
383 
384         p = skip_ws(p);
385         switch (*p) {
386         case ',':
387             ++p;
388             if (this->inObjectScope()) {
389                 goto match_object_key;
390             } else {
391                 SkASSERT(this->inArrayScope());
392                 goto match_value;
393             }
394         case ']':
395             goto pop_array;
396         case '}':
397             goto pop_object;
398         default:
399             return this->error(NullValue(), p - 1, "unexpected value-trailing token");
400         }
401 
402         // unreachable
403         SkASSERT(false);
404 
405     pop_object:
406         SkASSERT(*p == '}');
407 
408         if (this->inArrayScope()) {
409             return this->error(NullValue(), p, "unexpected object terminator");
410         }
411 
412         this->popObjectScope();
413 
414         // goto pop_common
415     pop_common:
416         SkASSERT(is_eoscope(*p));
417 
418         if (this->inTopLevelScope()) {
419             SkASSERT(fValueStack.size() == 1);
420 
421             // Success condition: parsed the top level element and reached the stop token.
422             return p == p_stop
423                 ? fValueStack.front()
424                 : this->error(NullValue(), p + 1, "trailing root garbage");
425         }
426 
427         if (p == p_stop) {
428             return this->error(NullValue(), p, "unexpected end-of-input");
429         }
430 
431         ++p;
432 
433         goto match_post_value;
434 
435     match_array:
436         SkASSERT(*p == '[');
437         p = skip_ws(p + 1);
438 
439         this->pushArrayScope();
440 
441         if (*p != ']') goto match_value;
442 
443         // goto pop_array;
444     pop_array:
445         SkASSERT(*p == ']');
446 
447         if (this->inObjectScope()) {
448             return this->error(NullValue(), p, "unexpected array terminator");
449         }
450 
451         this->popArrayScope();
452 
453         goto pop_common;
454 
455         SkASSERT(false);
456         return NullValue();
457     }
458 
getError() const459     std::tuple<const char*, const SkString> getError() const {
460         return std::make_tuple(fErrorToken, fErrorMessage);
461     }
462 
463 private:
464     SkArenaAlloc&         fAlloc;
465 
466     // Pending values stack.
467     static constexpr size_t kValueStackReserve = 256;
468     std::vector<Value>    fValueStack;
469 
470     // String unescape buffer.
471     static constexpr size_t kUnescapeBufferReserve = 512;
472     std::vector<char>     fUnescapeBuffer;
473 
474     // Tracks the current object/array scope, as an index into fStack:
475     //
476     //   - for objects: fScopeIndex =  (index of first value in scope)
477     //   - for arrays : fScopeIndex = -(index of first value in scope)
478     //
479     // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
480     intptr_t              fScopeIndex = 0;
481 
482     // Error reporting.
483     const char*           fErrorToken = nullptr;
484     SkString              fErrorMessage;
485 
inTopLevelScope() const486     bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const487     bool inObjectScope()   const { return fScopeIndex >  0; }
inArrayScope() const488     bool inArrayScope()    const { return fScopeIndex <  0; }
489 
490     // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
491     template <typename T>
492     class RawValue final : public Value {
493     public:
RawValue(T v)494         explicit RawValue(T v) {
495             static_assert(sizeof(T) <= sizeof(Value), "");
496             *this->cast<T>() = v;
497         }
498 
operator *() const499         T operator *() const { return *this->cast<T>(); }
500     };
501 
502     template <typename VectorT>
popScopeAsVec(size_t scope_start)503     void popScopeAsVec(size_t scope_start) {
504         SkASSERT(scope_start > 0);
505         SkASSERT(scope_start <= fValueStack.size());
506 
507         using T = typename VectorT::ValueT;
508         static_assert( sizeof(T) >=  sizeof(Value), "");
509         static_assert( sizeof(T)  %  sizeof(Value) == 0, "");
510         static_assert(alignof(T) == alignof(Value), "");
511 
512         const auto scope_count = fValueStack.size() - scope_start,
513                          count = scope_count / (sizeof(T) / sizeof(Value));
514         SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
515 
516         const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
517 
518         // Restore the previous scope index from saved placeholder value,
519         // and instantiate as a vector of values in scope.
520         auto& placeholder = fValueStack[scope_start - 1];
521         fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
522         placeholder = VectorT(begin, count, fAlloc);
523 
524         // Drop the (consumed) values in scope.
525         fValueStack.resize(scope_start);
526     }
527 
pushObjectScope()528     void pushObjectScope() {
529         // Save a scope index now, and then later we'll overwrite this value as the Object itself.
530         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
531 
532         // New object scope.
533         fScopeIndex = SkTo<intptr_t>(fValueStack.size());
534     }
535 
popObjectScope()536     void popObjectScope() {
537         SkASSERT(this->inObjectScope());
538         this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
539 
540         SkDEBUGCODE(
541             const auto& obj = fValueStack.back().as<ObjectValue>();
542             SkASSERT(obj.is<ObjectValue>());
543             for (const auto& member : obj) {
544                 SkASSERT(member.fKey.is<StringValue>());
545             }
546         )
547     }
548 
pushArrayScope()549     void pushArrayScope() {
550         // Save a scope index now, and then later we'll overwrite this value as the Array itself.
551         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
552 
553         // New array scope.
554         fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
555     }
556 
popArrayScope()557     void popArrayScope() {
558         SkASSERT(this->inArrayScope());
559         this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
560 
561         SkDEBUGCODE(
562             const auto& arr = fValueStack.back().as<ArrayValue>();
563             SkASSERT(arr.is<ArrayValue>());
564         )
565     }
566 
pushObjectKey(const char * key,size_t size,const char * eos)567     void pushObjectKey(const char* key, size_t size, const char* eos) {
568         SkASSERT(this->inObjectScope());
569         SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
570         SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
571         this->pushString(key, size, eos);
572     }
573 
pushTrue()574     void pushTrue() {
575         fValueStack.push_back(BoolValue(true));
576     }
577 
pushFalse()578     void pushFalse() {
579         fValueStack.push_back(BoolValue(false));
580     }
581 
pushNull()582     void pushNull() {
583         fValueStack.push_back(NullValue());
584     }
585 
pushString(const char * s,size_t size,const char * eos)586     void pushString(const char* s, size_t size, const char* eos) {
587         fValueStack.push_back(FastString(s, size, eos, fAlloc));
588     }
589 
pushInt32(int32_t i)590     void pushInt32(int32_t i) {
591         fValueStack.push_back(NumberValue(i));
592     }
593 
pushFloat(float f)594     void pushFloat(float f) {
595         fValueStack.push_back(NumberValue(f));
596     }
597 
598     template <typename T>
error(T && ret_val,const char * p,const char * msg)599     T error(T&& ret_val, const char* p, const char* msg) {
600 #if defined(SK_JSON_REPORT_ERRORS)
601         fErrorToken = p;
602         fErrorMessage.set(msg);
603 #endif
604         return ret_val;
605     }
606 
matchTrue(const char * p)607     const char* matchTrue(const char* p) {
608         SkASSERT(p[0] == 't');
609 
610         if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
611             this->pushTrue();
612             return p + 4;
613         }
614 
615         return this->error(nullptr, p, "invalid token");
616     }
617 
matchFalse(const char * p)618     const char* matchFalse(const char* p) {
619         SkASSERT(p[0] == 'f');
620 
621         if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
622             this->pushFalse();
623             return p + 5;
624         }
625 
626         return this->error(nullptr, p, "invalid token");
627     }
628 
matchNull(const char * p)629     const char* matchNull(const char* p) {
630         SkASSERT(p[0] == 'n');
631 
632         if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
633             this->pushNull();
634             return p + 4;
635         }
636 
637         return this->error(nullptr, p, "invalid token");
638     }
639 
unescapeString(const char * begin,const char * end)640     const std::vector<char>* unescapeString(const char* begin, const char* end) {
641         fUnescapeBuffer.clear();
642 
643         for (const auto* p = begin; p != end; ++p) {
644             if (*p != '\\') {
645                 fUnescapeBuffer.push_back(*p);
646                 continue;
647             }
648 
649             if (++p == end) {
650                 return nullptr;
651             }
652 
653             switch (*p) {
654             case  '"': fUnescapeBuffer.push_back( '"'); break;
655             case '\\': fUnescapeBuffer.push_back('\\'); break;
656             case  '/': fUnescapeBuffer.push_back( '/'); break;
657             case  'b': fUnescapeBuffer.push_back('\b'); break;
658             case  'f': fUnescapeBuffer.push_back('\f'); break;
659             case  'n': fUnescapeBuffer.push_back('\n'); break;
660             case  'r': fUnescapeBuffer.push_back('\r'); break;
661             case  't': fUnescapeBuffer.push_back('\t'); break;
662             case  'u': {
663                 if (p + 4 >= end) {
664                     return nullptr;
665                 }
666 
667                 uint32_t hexed;
668                 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
669                 const auto* eos = SkParse::FindHex(hex_str, &hexed);
670                 if (!eos || *eos) {
671                     return nullptr;
672                 }
673 
674                 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
675                 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
676                 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
677                 p += 4;
678             } break;
679             default: return nullptr;
680             }
681         }
682 
683         return &fUnescapeBuffer;
684     }
685 
686     template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)687     const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
688         SkASSERT(*p == '"');
689         const auto* s_begin = p + 1;
690         bool requires_unescape = false;
691 
692         do {
693             // Consume string chars.
694             // This is the fast path, and hopefully we only hit it once then quick-exit below.
695             for (p = p + 1; !is_eostring(*p); ++p);
696 
697             if (*p == '"') {
698                 // Valid string found.
699                 if (!requires_unescape) {
700                     func(s_begin, p - s_begin, p_stop);
701                 } else {
702                     // Slow unescape.  We could avoid this extra copy with some effort,
703                     // but in practice escaped strings should be rare.
704                     const auto* buf = this->unescapeString(s_begin, p);
705                     if (!buf) {
706                         break;
707                     }
708 
709                     SkASSERT(!buf->empty());
710                     func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
711                 }
712                 return p + 1;
713             }
714 
715             if (*p == '\\') {
716                 requires_unescape = true;
717                 ++p;
718                 continue;
719             }
720 
721             // End-of-scope chars are special: we use them to tag the end of the input.
722             // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
723             // end of the input.  To that effect, we treat them as string terminators above,
724             // then we catch them here.
725             if (is_eoscope(*p)) {
726                 continue;
727             }
728 
729             // Invalid/unexpected char.
730             break;
731         } while (p != p_stop);
732 
733         // Premature end-of-input, or illegal string char.
734         return this->error(nullptr, s_begin - 1, "invalid string");
735     }
736 
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)737     const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
738         SkASSERT(exp <= 0);
739 
740         for (;;) {
741             if (!is_digit(*p)) break;
742             f = f * 10.f + (*p++ - '0'); --exp;
743             if (!is_digit(*p)) break;
744             f = f * 10.f + (*p++ - '0'); --exp;
745         }
746 
747         const auto decimal_scale = pow10(exp);
748         if (is_numeric(*p) || !decimal_scale) {
749             SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
750             // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
751             return nullptr;
752         }
753 
754         this->pushFloat(sign * f * decimal_scale);
755 
756         return p;
757     }
758 
matchFastFloatPart(const char * p,int sign,float f)759     const char* matchFastFloatPart(const char* p, int sign, float f) {
760         for (;;) {
761             if (!is_digit(*p)) break;
762             f = f * 10.f + (*p++ - '0');
763             if (!is_digit(*p)) break;
764             f = f * 10.f + (*p++ - '0');
765         }
766 
767         if (!is_numeric(*p)) {
768             // Matched (integral) float.
769             this->pushFloat(sign * f);
770             return p;
771         }
772 
773         return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
774                            : nullptr;
775     }
776 
matchFast32OrFloat(const char * p)777     const char* matchFast32OrFloat(const char* p) {
778         int sign = 1;
779         if (*p == '-') {
780             sign = -1;
781             ++p;
782         }
783 
784         const auto* digits_start = p;
785 
786         int32_t n32 = 0;
787 
788         // This is the largest absolute int32 value we can handle before
789         // risking overflow *on the next digit* (214748363).
790         static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
791 
792         if (is_digit(*p)) {
793             n32 = (*p++ - '0');
794             for (;;) {
795                 if (!is_digit(*p) || n32 > kMaxInt32) break;
796                 n32 = n32 * 10 + (*p++ - '0');
797             }
798         }
799 
800         if (!is_numeric(*p)) {
801             // Did we actually match any digits?
802             if (p > digits_start) {
803                 this->pushInt32(sign * n32);
804                 return p;
805             }
806             return nullptr;
807         }
808 
809         if (*p == '.') {
810             const auto* decimals_start = ++p;
811 
812             int exp = 0;
813 
814             for (;;) {
815                 if (!is_digit(*p) || n32 > kMaxInt32) break;
816                 n32 = n32 * 10 + (*p++ - '0'); --exp;
817                 if (!is_digit(*p) || n32 > kMaxInt32) break;
818                 n32 = n32 * 10 + (*p++ - '0'); --exp;
819             }
820 
821             if (!is_numeric(*p)) {
822                 // Did we actually match any digits?
823                 if (p > decimals_start) {
824                     this->pushFloat(sign * n32 * pow10(exp));
825                     return p;
826                 }
827                 return nullptr;
828             }
829 
830             if (n32 > kMaxInt32) {
831                 // we ran out on n32 bits
832                 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
833             }
834         }
835 
836         return this->matchFastFloatPart(p, sign, n32);
837     }
838 
matchNumber(const char * p)839     const char* matchNumber(const char* p) {
840         if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
841 
842         // slow fallback
843         char* matched;
844         float f = strtof(p, &matched);
845         if (matched > p) {
846             this->pushFloat(f);
847             return matched;
848         }
849         return this->error(nullptr, p, "invalid numeric token");
850     }
851 };
852 
Write(const Value & v,SkWStream * stream)853 void Write(const Value& v, SkWStream* stream) {
854     switch (v.getType()) {
855     case Value::Type::kNull:
856         stream->writeText("null");
857         break;
858     case Value::Type::kBool:
859         stream->writeText(*v.as<BoolValue>() ? "true" : "false");
860         break;
861     case Value::Type::kNumber:
862         stream->writeScalarAsText(*v.as<NumberValue>());
863         break;
864     case Value::Type::kString:
865         stream->writeText("\"");
866         stream->writeText(v.as<StringValue>().begin());
867         stream->writeText("\"");
868         break;
869     case Value::Type::kArray: {
870         const auto& array = v.as<ArrayValue>();
871         stream->writeText("[");
872         bool first_value = true;
873         for (const auto& v : array) {
874             if (!first_value) stream->writeText(",");
875             Write(v, stream);
876             first_value = false;
877         }
878         stream->writeText("]");
879         break;
880     }
881     case Value::Type::kObject:
882         const auto& object = v.as<ObjectValue>();
883         stream->writeText("{");
884         bool first_member = true;
885         for (const auto& member : object) {
886             SkASSERT(member.fKey.getType() == Value::Type::kString);
887             if (!first_member) stream->writeText(",");
888             Write(member.fKey, stream);
889             stream->writeText(":");
890             Write(member.fValue, stream);
891             first_member = false;
892         }
893         stream->writeText("}");
894         break;
895     }
896 }
897 
898 } // namespace
899 
toString() const900 SkString Value::toString() const {
901     SkDynamicMemoryWStream wstream;
902     Write(*this, &wstream);
903     const auto data = wstream.detachAsData();
904     // TODO: is there a better way to pass data around without copying?
905     return SkString(static_cast<const char*>(data->data()), data->size());
906 }
907 
908 static constexpr size_t kMinChunkSize = 4096;
909 
DOM(const char * data,size_t size)910 DOM::DOM(const char* data, size_t size)
911     : fAlloc(kMinChunkSize) {
912     DOMParser parser(fAlloc);
913 
914     fRoot = parser.parse(data, size);
915 }
916 
write(SkWStream * stream) const917 void DOM::write(SkWStream* stream) const {
918     Write(fRoot, stream);
919 }
920 
921 } // namespace skjson
922