1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/utils/SkJSON.h"
9
10 #include "include/core/SkStream.h"
11 #include "include/core/SkString.h"
12 #include "include/private/SkMalloc.h"
13 #include "include/utils/SkParse.h"
14 #include "src/utils/SkUTF.h"
15
16 #include <cmath>
17 #include <tuple>
18 #include <vector>
19
20 namespace skjson {
21
22 // #define SK_JSON_REPORT_ERRORS
23
24 static_assert( sizeof(Value) == 8, "");
25 static_assert(alignof(Value) == 8, "");
26
27 static constexpr size_t kRecAlign = alignof(Value);
28
init_tagged(Tag t)29 void Value::init_tagged(Tag t) {
30 memset(fData8, 0, sizeof(fData8));
31 fData8[Value::kTagOffset] = SkTo<uint8_t>(t);
32 SkASSERT(this->getTag() == t);
33 }
34
35 // Pointer values store a type (in the upper kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)36 void Value::init_tagged_pointer(Tag t, void* p) {
37 #if !defined(SK_CPU_LENDIAN)
38 // Check that kRecAlign is large enough to leave room for the tag
39 static_assert(sizeof(Value) > sizeof(uintptr_t) || !(kRecAlign & Value::kTagMask), "kRecAlign is not a multiple of kTagMask+1");
40 #endif
41 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
42
43 if (sizeof(Value) == sizeof(uintptr_t)) {
44 // For 64-bit, we rely on the pointer upper bits being unused/zero.
45 SkASSERT(!(fData8[kTagOffset] & kTagMask));
46 fData8[kTagOffset] |= SkTo<uint8_t>(t);
47 } else {
48 // For 32-bit, we need to zero-initialize the upper 32 bits
49 SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
50 this->cast<uintptr_t>()[kTagOffset >> 2] = 0;
51 fData8[kTagOffset] = SkTo<uint8_t>(t);
52 }
53
54 SkASSERT(this->getTag() == t);
55 SkASSERT(this->ptr<void>() == p);
56 }
57
NullValue()58 NullValue::NullValue() {
59 this->init_tagged(Tag::kNull);
60 SkASSERT(this->getTag() == Tag::kNull);
61 }
62
BoolValue(bool b)63 BoolValue::BoolValue(bool b) {
64 this->init_tagged(Tag::kBool);
65 *this->cast<bool>() = b;
66 SkASSERT(this->getTag() == Tag::kBool);
67 }
68
NumberValue(int32_t i)69 NumberValue::NumberValue(int32_t i) {
70 this->init_tagged(Tag::kInt);
71 *this->cast<int32_t>() = i;
72 SkASSERT(this->getTag() == Tag::kInt);
73 }
74
NumberValue(float f)75 NumberValue::NumberValue(float f) {
76 this->init_tagged(Tag::kFloat);
77 *this->cast<float>() = f;
78 SkASSERT(this->getTag() == Tag::kFloat);
79 }
80
81 // Vector recs point to externally allocated slabs with the following layout:
82 //
83 // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
84 //
85 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
86 //
87 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)88 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
89 // The Ts are already in memory, so their size should be safe.
90 const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
91 auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
92
93 *size_ptr = size;
94 sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
95
96 return size_ptr;
97 }
98
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)99 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
100 this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
101 SkASSERT(this->getTag() == Tag::kArray);
102 }
103
104 // Strings have two flavors:
105 //
106 // -- short strings (len <= 7) -> these are stored inline, in the record
107 // (one byte reserved for null terminator/type):
108 //
109 // [str] [\0]|[max_len - actual_len]
110 //
111 // Storing [max_len - actual_len] allows the 'len' field to double-up as a
112 // null terminator when size == max_len (this works 'cause kShortString == 0).
113 //
114 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
115 //
116 // The string data plus a null-char terminator are copied over.
117 //
118 namespace {
119
120 // An internal string builder with a fast 8 byte short string load path
121 // (for the common case where the string is not at the end of the stream).
122 class FastString final : public Value {
123 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)124 FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
125 SkASSERT(src <= eos);
126
127 if (size > kMaxInlineStringSize) {
128 this->initLongString(src, size, alloc);
129 SkASSERT(this->getTag() == Tag::kString);
130 return;
131 }
132
133 static_assert(static_cast<uint8_t>(Tag::kShortString) == 0, "please don't break this");
134 static_assert(sizeof(Value) == 8, "");
135
136 // TODO: LIKELY
137 if (src && src + 7 <= eos) {
138 this->initFastShortString(src, size);
139 } else {
140 this->initShortString(src, size);
141 }
142
143 SkASSERT(this->getTag() == Tag::kShortString);
144 }
145
146 private:
147 static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 1;
148
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)149 void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
150 SkASSERT(size > kMaxInlineStringSize);
151
152 this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
153
154 auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
155 const_cast<char*>(data)[size] = '\0';
156 }
157
initShortString(const char * src,size_t size)158 void initShortString(const char* src, size_t size) {
159 SkASSERT(size <= kMaxInlineStringSize);
160
161 this->init_tagged(Tag::kShortString);
162 sk_careful_memcpy(this->cast<char>(), src, size);
163 // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
164 }
165
initFastShortString(const char * src,size_t size)166 void initFastShortString(const char* src, size_t size) {
167 SkASSERT(size <= kMaxInlineStringSize);
168
169 // Load 8 chars and mask out the tag and \0 terminator.
170 uint64_t* s64 = this->cast<uint64_t>();
171 memcpy(s64, src, 8);
172
173 #if defined(SK_CPU_LENDIAN)
174 *s64 &= 0x00ffffffffffffffULL >> ((kMaxInlineStringSize - size) * 8);
175 #else
176 *s64 &= 0xffffffffffffff00ULL << ((kMaxInlineStringSize - size) * 8);
177 #endif
178 }
179 };
180
181 } // namespace
182
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)183 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
184 new (this) FastString(src, size, src, alloc);
185 }
186
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)187 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
188 this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
189 SkASSERT(this->getTag() == Tag::kObject);
190 }
191
192
193 // Boring public Value glue.
194
inline_strcmp(const char a[],const char b[])195 static int inline_strcmp(const char a[], const char b[]) {
196 for (;;) {
197 char c = *a++;
198 if (c == 0) {
199 break;
200 }
201 if (c != *b++) {
202 return 1;
203 }
204 }
205 return *b != 0;
206 }
207
operator [](const char * key) const208 const Value& ObjectValue::operator[](const char* key) const {
209 // Reverse search for duplicates resolution (policy: return last).
210 const auto* begin = this->begin();
211 const auto* member = this->end();
212
213 while (member > begin) {
214 --member;
215 if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
216 return member->fValue;
217 }
218 }
219
220 static const Value g_null = NullValue();
221 return g_null;
222 }
223
224 namespace {
225
226 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
227 //
228 // [1] https://github.com/Tencent/rapidjson/
229 // [2] https://github.com/chadaustin/sajson
230 // [3] https://pastebin.com/hnhSTL3h
231
232
233 // bit 0 (0x01) - plain ASCII string character
234 // bit 1 (0x02) - whitespace
235 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
236 // bit 3 (0x08) - 0-9
237 // bit 4 (0x10) - 0-9 e E .
238 // bit 5 (0x20) - scope terminator (} ])
239 static constexpr uint8_t g_token_flags[256] = {
240 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
241 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0
242 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1
243 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
244 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3
245 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
246 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5
247 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
248 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7
249
250 // 128-255
251 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
252 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
253 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
254 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
255 };
256
is_ws(char c)257 static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)258 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)259 static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)260 static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)261 static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
262
skip_ws(const char * p)263 static inline const char* skip_ws(const char* p) {
264 while (is_ws(*p)) ++p;
265 return p;
266 }
267
pow10(int32_t exp)268 static inline float pow10(int32_t exp) {
269 static constexpr float g_pow10_table[63] =
270 {
271 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
272 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
273 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
274 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
275 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
276 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
277 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
278 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
279 };
280
281 static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2;
282
283 // We only support negative exponents for now.
284 SkASSERT(exp <= 0);
285
286 return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
287 : std::pow(10.0f, static_cast<float>(exp));
288 }
289
290 class DOMParser {
291 public:
DOMParser(SkArenaAlloc & alloc)292 explicit DOMParser(SkArenaAlloc& alloc)
293 : fAlloc(alloc) {
294 fValueStack.reserve(kValueStackReserve);
295 fUnescapeBuffer.reserve(kUnescapeBufferReserve);
296 }
297
parse(const char * p,size_t size)298 const Value parse(const char* p, size_t size) {
299 if (!size) {
300 return this->error(NullValue(), p, "invalid empty input");
301 }
302
303 const char* p_stop = p + size - 1;
304
305 // We're only checking for end-of-stream on object/array close('}',']'),
306 // so we must trim any whitespace from the buffer tail.
307 while (p_stop > p && is_ws(*p_stop)) --p_stop;
308
309 SkASSERT(p_stop >= p && p_stop < p + size);
310 if (!is_eoscope(*p_stop)) {
311 return this->error(NullValue(), p_stop, "invalid top-level value");
312 }
313
314 p = skip_ws(p);
315
316 switch (*p) {
317 case '{':
318 goto match_object;
319 case '[':
320 goto match_array;
321 default:
322 return this->error(NullValue(), p, "invalid top-level value");
323 }
324
325 match_object:
326 SkASSERT(*p == '{');
327 p = skip_ws(p + 1);
328
329 this->pushObjectScope();
330
331 if (*p == '}') goto pop_object;
332
333 // goto match_object_key;
334 match_object_key:
335 p = skip_ws(p);
336 if (*p != '"') return this->error(NullValue(), p, "expected object key");
337
338 p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
339 this->pushObjectKey(key, size, eos);
340 });
341 if (!p) return NullValue();
342
343 p = skip_ws(p);
344 if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
345
346 ++p;
347
348 // goto match_value;
349 match_value:
350 p = skip_ws(p);
351
352 switch (*p) {
353 case '\0':
354 return this->error(NullValue(), p, "unexpected input end");
355 case '"':
356 p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
357 this->pushString(str, size, eos);
358 });
359 break;
360 case '[':
361 goto match_array;
362 case 'f':
363 p = this->matchFalse(p);
364 break;
365 case 'n':
366 p = this->matchNull(p);
367 break;
368 case 't':
369 p = this->matchTrue(p);
370 break;
371 case '{':
372 goto match_object;
373 default:
374 p = this->matchNumber(p);
375 break;
376 }
377
378 if (!p) return NullValue();
379
380 // goto match_post_value;
381 match_post_value:
382 SkASSERT(!this->inTopLevelScope());
383
384 p = skip_ws(p);
385 switch (*p) {
386 case ',':
387 ++p;
388 if (this->inObjectScope()) {
389 goto match_object_key;
390 } else {
391 SkASSERT(this->inArrayScope());
392 goto match_value;
393 }
394 case ']':
395 goto pop_array;
396 case '}':
397 goto pop_object;
398 default:
399 return this->error(NullValue(), p - 1, "unexpected value-trailing token");
400 }
401
402 // unreachable
403 SkASSERT(false);
404
405 pop_object:
406 SkASSERT(*p == '}');
407
408 if (this->inArrayScope()) {
409 return this->error(NullValue(), p, "unexpected object terminator");
410 }
411
412 this->popObjectScope();
413
414 // goto pop_common
415 pop_common:
416 SkASSERT(is_eoscope(*p));
417
418 if (this->inTopLevelScope()) {
419 SkASSERT(fValueStack.size() == 1);
420
421 // Success condition: parsed the top level element and reached the stop token.
422 return p == p_stop
423 ? fValueStack.front()
424 : this->error(NullValue(), p + 1, "trailing root garbage");
425 }
426
427 if (p == p_stop) {
428 return this->error(NullValue(), p, "unexpected end-of-input");
429 }
430
431 ++p;
432
433 goto match_post_value;
434
435 match_array:
436 SkASSERT(*p == '[');
437 p = skip_ws(p + 1);
438
439 this->pushArrayScope();
440
441 if (*p != ']') goto match_value;
442
443 // goto pop_array;
444 pop_array:
445 SkASSERT(*p == ']');
446
447 if (this->inObjectScope()) {
448 return this->error(NullValue(), p, "unexpected array terminator");
449 }
450
451 this->popArrayScope();
452
453 goto pop_common;
454
455 SkASSERT(false);
456 return NullValue();
457 }
458
getError() const459 std::tuple<const char*, const SkString> getError() const {
460 return std::make_tuple(fErrorToken, fErrorMessage);
461 }
462
463 private:
464 SkArenaAlloc& fAlloc;
465
466 // Pending values stack.
467 static constexpr size_t kValueStackReserve = 256;
468 std::vector<Value> fValueStack;
469
470 // String unescape buffer.
471 static constexpr size_t kUnescapeBufferReserve = 512;
472 std::vector<char> fUnescapeBuffer;
473
474 // Tracks the current object/array scope, as an index into fStack:
475 //
476 // - for objects: fScopeIndex = (index of first value in scope)
477 // - for arrays : fScopeIndex = -(index of first value in scope)
478 //
479 // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
480 intptr_t fScopeIndex = 0;
481
482 // Error reporting.
483 const char* fErrorToken = nullptr;
484 SkString fErrorMessage;
485
inTopLevelScope() const486 bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const487 bool inObjectScope() const { return fScopeIndex > 0; }
inArrayScope() const488 bool inArrayScope() const { return fScopeIndex < 0; }
489
490 // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
491 template <typename T>
492 class RawValue final : public Value {
493 public:
RawValue(T v)494 explicit RawValue(T v) {
495 static_assert(sizeof(T) <= sizeof(Value), "");
496 *this->cast<T>() = v;
497 }
498
operator *() const499 T operator *() const { return *this->cast<T>(); }
500 };
501
502 template <typename VectorT>
popScopeAsVec(size_t scope_start)503 void popScopeAsVec(size_t scope_start) {
504 SkASSERT(scope_start > 0);
505 SkASSERT(scope_start <= fValueStack.size());
506
507 using T = typename VectorT::ValueT;
508 static_assert( sizeof(T) >= sizeof(Value), "");
509 static_assert( sizeof(T) % sizeof(Value) == 0, "");
510 static_assert(alignof(T) == alignof(Value), "");
511
512 const auto scope_count = fValueStack.size() - scope_start,
513 count = scope_count / (sizeof(T) / sizeof(Value));
514 SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
515
516 const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
517
518 // Restore the previous scope index from saved placeholder value,
519 // and instantiate as a vector of values in scope.
520 auto& placeholder = fValueStack[scope_start - 1];
521 fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
522 placeholder = VectorT(begin, count, fAlloc);
523
524 // Drop the (consumed) values in scope.
525 fValueStack.resize(scope_start);
526 }
527
pushObjectScope()528 void pushObjectScope() {
529 // Save a scope index now, and then later we'll overwrite this value as the Object itself.
530 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
531
532 // New object scope.
533 fScopeIndex = SkTo<intptr_t>(fValueStack.size());
534 }
535
popObjectScope()536 void popObjectScope() {
537 SkASSERT(this->inObjectScope());
538 this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
539
540 SkDEBUGCODE(
541 const auto& obj = fValueStack.back().as<ObjectValue>();
542 SkASSERT(obj.is<ObjectValue>());
543 for (const auto& member : obj) {
544 SkASSERT(member.fKey.is<StringValue>());
545 }
546 )
547 }
548
pushArrayScope()549 void pushArrayScope() {
550 // Save a scope index now, and then later we'll overwrite this value as the Array itself.
551 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
552
553 // New array scope.
554 fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
555 }
556
popArrayScope()557 void popArrayScope() {
558 SkASSERT(this->inArrayScope());
559 this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
560
561 SkDEBUGCODE(
562 const auto& arr = fValueStack.back().as<ArrayValue>();
563 SkASSERT(arr.is<ArrayValue>());
564 )
565 }
566
pushObjectKey(const char * key,size_t size,const char * eos)567 void pushObjectKey(const char* key, size_t size, const char* eos) {
568 SkASSERT(this->inObjectScope());
569 SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
570 SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
571 this->pushString(key, size, eos);
572 }
573
pushTrue()574 void pushTrue() {
575 fValueStack.push_back(BoolValue(true));
576 }
577
pushFalse()578 void pushFalse() {
579 fValueStack.push_back(BoolValue(false));
580 }
581
pushNull()582 void pushNull() {
583 fValueStack.push_back(NullValue());
584 }
585
pushString(const char * s,size_t size,const char * eos)586 void pushString(const char* s, size_t size, const char* eos) {
587 fValueStack.push_back(FastString(s, size, eos, fAlloc));
588 }
589
pushInt32(int32_t i)590 void pushInt32(int32_t i) {
591 fValueStack.push_back(NumberValue(i));
592 }
593
pushFloat(float f)594 void pushFloat(float f) {
595 fValueStack.push_back(NumberValue(f));
596 }
597
598 template <typename T>
error(T && ret_val,const char * p,const char * msg)599 T error(T&& ret_val, const char* p, const char* msg) {
600 #if defined(SK_JSON_REPORT_ERRORS)
601 fErrorToken = p;
602 fErrorMessage.set(msg);
603 #endif
604 return ret_val;
605 }
606
matchTrue(const char * p)607 const char* matchTrue(const char* p) {
608 SkASSERT(p[0] == 't');
609
610 if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
611 this->pushTrue();
612 return p + 4;
613 }
614
615 return this->error(nullptr, p, "invalid token");
616 }
617
matchFalse(const char * p)618 const char* matchFalse(const char* p) {
619 SkASSERT(p[0] == 'f');
620
621 if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
622 this->pushFalse();
623 return p + 5;
624 }
625
626 return this->error(nullptr, p, "invalid token");
627 }
628
matchNull(const char * p)629 const char* matchNull(const char* p) {
630 SkASSERT(p[0] == 'n');
631
632 if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
633 this->pushNull();
634 return p + 4;
635 }
636
637 return this->error(nullptr, p, "invalid token");
638 }
639
unescapeString(const char * begin,const char * end)640 const std::vector<char>* unescapeString(const char* begin, const char* end) {
641 fUnescapeBuffer.clear();
642
643 for (const auto* p = begin; p != end; ++p) {
644 if (*p != '\\') {
645 fUnescapeBuffer.push_back(*p);
646 continue;
647 }
648
649 if (++p == end) {
650 return nullptr;
651 }
652
653 switch (*p) {
654 case '"': fUnescapeBuffer.push_back( '"'); break;
655 case '\\': fUnescapeBuffer.push_back('\\'); break;
656 case '/': fUnescapeBuffer.push_back( '/'); break;
657 case 'b': fUnescapeBuffer.push_back('\b'); break;
658 case 'f': fUnescapeBuffer.push_back('\f'); break;
659 case 'n': fUnescapeBuffer.push_back('\n'); break;
660 case 'r': fUnescapeBuffer.push_back('\r'); break;
661 case 't': fUnescapeBuffer.push_back('\t'); break;
662 case 'u': {
663 if (p + 4 >= end) {
664 return nullptr;
665 }
666
667 uint32_t hexed;
668 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
669 const auto* eos = SkParse::FindHex(hex_str, &hexed);
670 if (!eos || *eos) {
671 return nullptr;
672 }
673
674 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
675 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
676 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
677 p += 4;
678 } break;
679 default: return nullptr;
680 }
681 }
682
683 return &fUnescapeBuffer;
684 }
685
686 template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)687 const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
688 SkASSERT(*p == '"');
689 const auto* s_begin = p + 1;
690 bool requires_unescape = false;
691
692 do {
693 // Consume string chars.
694 // This is the fast path, and hopefully we only hit it once then quick-exit below.
695 for (p = p + 1; !is_eostring(*p); ++p);
696
697 if (*p == '"') {
698 // Valid string found.
699 if (!requires_unescape) {
700 func(s_begin, p - s_begin, p_stop);
701 } else {
702 // Slow unescape. We could avoid this extra copy with some effort,
703 // but in practice escaped strings should be rare.
704 const auto* buf = this->unescapeString(s_begin, p);
705 if (!buf) {
706 break;
707 }
708
709 SkASSERT(!buf->empty());
710 func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
711 }
712 return p + 1;
713 }
714
715 if (*p == '\\') {
716 requires_unescape = true;
717 ++p;
718 continue;
719 }
720
721 // End-of-scope chars are special: we use them to tag the end of the input.
722 // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
723 // end of the input. To that effect, we treat them as string terminators above,
724 // then we catch them here.
725 if (is_eoscope(*p)) {
726 continue;
727 }
728
729 // Invalid/unexpected char.
730 break;
731 } while (p != p_stop);
732
733 // Premature end-of-input, or illegal string char.
734 return this->error(nullptr, s_begin - 1, "invalid string");
735 }
736
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)737 const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
738 SkASSERT(exp <= 0);
739
740 for (;;) {
741 if (!is_digit(*p)) break;
742 f = f * 10.f + (*p++ - '0'); --exp;
743 if (!is_digit(*p)) break;
744 f = f * 10.f + (*p++ - '0'); --exp;
745 }
746
747 const auto decimal_scale = pow10(exp);
748 if (is_numeric(*p) || !decimal_scale) {
749 SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
750 // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
751 return nullptr;
752 }
753
754 this->pushFloat(sign * f * decimal_scale);
755
756 return p;
757 }
758
matchFastFloatPart(const char * p,int sign,float f)759 const char* matchFastFloatPart(const char* p, int sign, float f) {
760 for (;;) {
761 if (!is_digit(*p)) break;
762 f = f * 10.f + (*p++ - '0');
763 if (!is_digit(*p)) break;
764 f = f * 10.f + (*p++ - '0');
765 }
766
767 if (!is_numeric(*p)) {
768 // Matched (integral) float.
769 this->pushFloat(sign * f);
770 return p;
771 }
772
773 return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
774 : nullptr;
775 }
776
matchFast32OrFloat(const char * p)777 const char* matchFast32OrFloat(const char* p) {
778 int sign = 1;
779 if (*p == '-') {
780 sign = -1;
781 ++p;
782 }
783
784 const auto* digits_start = p;
785
786 int32_t n32 = 0;
787
788 // This is the largest absolute int32 value we can handle before
789 // risking overflow *on the next digit* (214748363).
790 static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
791
792 if (is_digit(*p)) {
793 n32 = (*p++ - '0');
794 for (;;) {
795 if (!is_digit(*p) || n32 > kMaxInt32) break;
796 n32 = n32 * 10 + (*p++ - '0');
797 }
798 }
799
800 if (!is_numeric(*p)) {
801 // Did we actually match any digits?
802 if (p > digits_start) {
803 this->pushInt32(sign * n32);
804 return p;
805 }
806 return nullptr;
807 }
808
809 if (*p == '.') {
810 const auto* decimals_start = ++p;
811
812 int exp = 0;
813
814 for (;;) {
815 if (!is_digit(*p) || n32 > kMaxInt32) break;
816 n32 = n32 * 10 + (*p++ - '0'); --exp;
817 if (!is_digit(*p) || n32 > kMaxInt32) break;
818 n32 = n32 * 10 + (*p++ - '0'); --exp;
819 }
820
821 if (!is_numeric(*p)) {
822 // Did we actually match any digits?
823 if (p > decimals_start) {
824 this->pushFloat(sign * n32 * pow10(exp));
825 return p;
826 }
827 return nullptr;
828 }
829
830 if (n32 > kMaxInt32) {
831 // we ran out on n32 bits
832 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
833 }
834 }
835
836 return this->matchFastFloatPart(p, sign, n32);
837 }
838
matchNumber(const char * p)839 const char* matchNumber(const char* p) {
840 if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
841
842 // slow fallback
843 char* matched;
844 float f = strtof(p, &matched);
845 if (matched > p) {
846 this->pushFloat(f);
847 return matched;
848 }
849 return this->error(nullptr, p, "invalid numeric token");
850 }
851 };
852
Write(const Value & v,SkWStream * stream)853 void Write(const Value& v, SkWStream* stream) {
854 switch (v.getType()) {
855 case Value::Type::kNull:
856 stream->writeText("null");
857 break;
858 case Value::Type::kBool:
859 stream->writeText(*v.as<BoolValue>() ? "true" : "false");
860 break;
861 case Value::Type::kNumber:
862 stream->writeScalarAsText(*v.as<NumberValue>());
863 break;
864 case Value::Type::kString:
865 stream->writeText("\"");
866 stream->writeText(v.as<StringValue>().begin());
867 stream->writeText("\"");
868 break;
869 case Value::Type::kArray: {
870 const auto& array = v.as<ArrayValue>();
871 stream->writeText("[");
872 bool first_value = true;
873 for (const auto& v : array) {
874 if (!first_value) stream->writeText(",");
875 Write(v, stream);
876 first_value = false;
877 }
878 stream->writeText("]");
879 break;
880 }
881 case Value::Type::kObject:
882 const auto& object = v.as<ObjectValue>();
883 stream->writeText("{");
884 bool first_member = true;
885 for (const auto& member : object) {
886 SkASSERT(member.fKey.getType() == Value::Type::kString);
887 if (!first_member) stream->writeText(",");
888 Write(member.fKey, stream);
889 stream->writeText(":");
890 Write(member.fValue, stream);
891 first_member = false;
892 }
893 stream->writeText("}");
894 break;
895 }
896 }
897
898 } // namespace
899
toString() const900 SkString Value::toString() const {
901 SkDynamicMemoryWStream wstream;
902 Write(*this, &wstream);
903 const auto data = wstream.detachAsData();
904 // TODO: is there a better way to pass data around without copying?
905 return SkString(static_cast<const char*>(data->data()), data->size());
906 }
907
908 static constexpr size_t kMinChunkSize = 4096;
909
DOM(const char * data,size_t size)910 DOM::DOM(const char* data, size_t size)
911 : fAlloc(kMinChunkSize) {
912 DOMParser parser(fAlloc);
913
914 fRoot = parser.parse(data, size);
915 }
916
write(SkWStream * stream) const917 void DOM::write(SkWStream* stream) const {
918 Write(fRoot, stream);
919 }
920
921 } // namespace skjson
922