1 // Copyright (c) 2015 Sandstorm Development Group, Inc. and contributors
2 // Licensed under the MIT License:
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21
22 #include "json.h"
23 #include <math.h> // for HUGEVAL to check for overflow in strtod
24 #include <stdlib.h> // strtod
25 #include <errno.h> // for strtod errors
26 #include <capnp/orphan.h>
27 #include <kj/debug.h>
28 #include <kj/function.h>
29 #include <kj/vector.h>
30 #include <kj/one-of.h>
31 #include <kj/encoding.h>
32 #include <kj/map.h>
33
34 namespace capnp {
35
36 struct JsonCodec::Impl {
37 bool prettyPrint = false;
38 HasMode hasMode = HasMode::NON_NULL;
39 size_t maxNestingDepth = 64;
40
41 kj::HashMap<Type, HandlerBase*> typeHandlers;
42 kj::HashMap<StructSchema::Field, HandlerBase*> fieldHandlers;
43 kj::HashMap<Type, kj::Maybe<kj::Own<AnnotatedHandler>>> annotatedHandlers;
44 kj::HashMap<Type, kj::Own<AnnotatedEnumHandler>> annotatedEnumHandlers;
45
encodeRawcapnp::JsonCodec::Impl46 kj::StringTree encodeRaw(JsonValue::Reader value, uint indent, bool& multiline,
47 bool hasPrefix) const {
48 switch (value.which()) {
49 case JsonValue::NULL_:
50 return kj::strTree("null");
51 case JsonValue::BOOLEAN:
52 return kj::strTree(value.getBoolean());
53 case JsonValue::NUMBER:
54 return kj::strTree(value.getNumber());
55
56 case JsonValue::STRING:
57 return kj::strTree(encodeString(value.getString()));
58
59 case JsonValue::ARRAY: {
60 auto array = value.getArray();
61 uint subIndent = indent + (array.size() > 1);
62 bool childMultiline = false;
63 auto encodedElements = KJ_MAP(element, array) {
64 return encodeRaw(element, subIndent, childMultiline, false);
65 };
66
67 return kj::strTree('[', encodeList(
68 kj::mv(encodedElements), childMultiline, indent, multiline, hasPrefix), ']');
69 }
70
71 case JsonValue::OBJECT: {
72 auto object = value.getObject();
73 uint subIndent = indent + (object.size() > 1);
74 bool childMultiline = false;
75 kj::StringPtr colon = prettyPrint ? ": " : ":";
76 auto encodedElements = KJ_MAP(field, object) {
77 return kj::strTree(
78 encodeString(field.getName()), colon,
79 encodeRaw(field.getValue(), subIndent, childMultiline, true));
80 };
81
82 return kj::strTree('{', encodeList(
83 kj::mv(encodedElements), childMultiline, indent, multiline, hasPrefix), '}');
84 }
85
86 case JsonValue::CALL: {
87 auto call = value.getCall();
88 auto params = call.getParams();
89 uint subIndent = indent + (params.size() > 1);
90 bool childMultiline = false;
91 auto encodedElements = KJ_MAP(element, params) {
92 return encodeRaw(element, subIndent, childMultiline, false);
93 };
94
95 return kj::strTree(call.getFunction(), '(', encodeList(
96 kj::mv(encodedElements), childMultiline, indent, multiline, true), ')');
97 }
98 }
99
100 KJ_FAIL_ASSERT("unknown JsonValue type", static_cast<uint>(value.which()));
101 }
102
encodeStringcapnp::JsonCodec::Impl103 kj::String encodeString(kj::StringPtr chars) const {
104 static const char HEXDIGITS[] = "0123456789abcdef";
105 kj::Vector<char> escaped(chars.size() + 3);
106
107 escaped.add('"');
108 for (char c: chars) {
109 switch (c) {
110 case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
111 case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
112 case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
113 case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
114 case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
115 case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
116 case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
117 default:
118 if (static_cast<uint8_t>(c) < 0x20) {
119 escaped.addAll(kj::StringPtr("\\u00"));
120 uint8_t c2 = c;
121 escaped.add(HEXDIGITS[c2 / 16]);
122 escaped.add(HEXDIGITS[c2 % 16]);
123 } else {
124 escaped.add(c);
125 }
126 break;
127 }
128 }
129 escaped.add('"');
130 escaped.add('\0');
131
132 return kj::String(escaped.releaseAsArray());
133 }
134
encodeListcapnp::JsonCodec::Impl135 kj::StringTree encodeList(kj::Array<kj::StringTree> elements,
136 bool hasMultilineElement, uint indent, bool& multiline,
137 bool hasPrefix) const {
138 size_t maxChildSize = 0;
139 for (auto& e: elements) maxChildSize = kj::max(maxChildSize, e.size());
140
141 kj::StringPtr prefix;
142 kj::StringPtr delim;
143 kj::StringPtr suffix;
144 kj::String ownPrefix;
145 kj::String ownDelim;
146 if (!prettyPrint) {
147 // No whitespace.
148 delim = ",";
149 prefix = "";
150 suffix = "";
151 } else if ((elements.size() > 1) && (hasMultilineElement || maxChildSize > 50)) {
152 // If the array contained any multi-line elements, OR it contained sufficiently long
153 // elements, then put each element on its own line.
154 auto indentSpace = kj::repeat(' ', (indent + 1) * 2);
155 delim = ownDelim = kj::str(",\n", indentSpace);
156 multiline = true;
157 if (hasPrefix) {
158 // We're producing a multi-line list, and the first line has some garbage in front of it.
159 // Therefore, move the first element to the next line.
160 prefix = ownPrefix = kj::str("\n", indentSpace);
161 } else {
162 prefix = " ";
163 }
164 suffix = " ";
165 } else {
166 // Put everything on one line, but add spacing between elements for legibility.
167 delim = ", ";
168 prefix = "";
169 suffix = "";
170 }
171
172 return kj::strTree(prefix, kj::StringTree(kj::mv(elements), delim), suffix);
173 }
174 };
175
JsonCodec()176 JsonCodec::JsonCodec()
177 : impl(kj::heap<Impl>()) {}
~JsonCodec()178 JsonCodec::~JsonCodec() noexcept(false) {}
179
setPrettyPrint(bool enabled)180 void JsonCodec::setPrettyPrint(bool enabled) { impl->prettyPrint = enabled; }
181
setMaxNestingDepth(size_t maxNestingDepth)182 void JsonCodec::setMaxNestingDepth(size_t maxNestingDepth) {
183 impl->maxNestingDepth = maxNestingDepth;
184 }
185
setHasMode(HasMode mode)186 void JsonCodec::setHasMode(HasMode mode) { impl->hasMode = mode; }
187
encode(DynamicValue::Reader value,Type type) const188 kj::String JsonCodec::encode(DynamicValue::Reader value, Type type) const {
189 MallocMessageBuilder message;
190 auto json = message.getRoot<JsonValue>();
191 encode(value, type, json);
192 return encodeRaw(json);
193 }
194
decode(kj::ArrayPtr<const char> input,DynamicStruct::Builder output) const195 void JsonCodec::decode(kj::ArrayPtr<const char> input, DynamicStruct::Builder output) const {
196 MallocMessageBuilder message;
197 auto json = message.getRoot<JsonValue>();
198 decodeRaw(input, json);
199 decode(json, output);
200 }
201
decode(kj::ArrayPtr<const char> input,Type type,Orphanage orphanage) const202 Orphan<DynamicValue> JsonCodec::decode(
203 kj::ArrayPtr<const char> input, Type type, Orphanage orphanage) const {
204 MallocMessageBuilder message;
205 auto json = message.getRoot<JsonValue>();
206 decodeRaw(input, json);
207 return decode(json, type, orphanage);
208 }
209
encodeRaw(JsonValue::Reader value) const210 kj::String JsonCodec::encodeRaw(JsonValue::Reader value) const {
211 bool multiline = false;
212 return impl->encodeRaw(value, 0, multiline, false).flatten();
213 }
214
encode(DynamicValue::Reader input,Type type,JsonValue::Builder output) const215 void JsonCodec::encode(DynamicValue::Reader input, Type type, JsonValue::Builder output) const {
216 // TODO(someday): For interfaces, check for handlers on superclasses, per documentation...
217 // TODO(someday): For branded types, should we check for handlers on the generic?
218 // TODO(someday): Allow registering handlers for "all structs", "all lists", etc?
219 KJ_IF_MAYBE(handler, impl->typeHandlers.find(type)) {
220 (*handler)->encodeBase(*this, input, output);
221 return;
222 }
223
224 switch (type.which()) {
225 case schema::Type::VOID:
226 output.setNull();
227 break;
228 case schema::Type::BOOL:
229 output.setBoolean(input.as<bool>());
230 break;
231 case schema::Type::INT8:
232 case schema::Type::INT16:
233 case schema::Type::INT32:
234 case schema::Type::UINT8:
235 case schema::Type::UINT16:
236 case schema::Type::UINT32:
237 output.setNumber(input.as<double>());
238 break;
239 case schema::Type::FLOAT32:
240 case schema::Type::FLOAT64:
241 {
242 double value = input.as<double>();
243 // Inf, -inf and NaN are not allowed in the JSON spec. Storing into string.
244 if (kj::inf() == value) {
245 output.setString("Infinity");
246 } else if (-kj::inf() == value) {
247 output.setString("-Infinity");
248 } else if (kj::isNaN(value)) {
249 output.setString("NaN");
250 } else {
251 output.setNumber(value);
252 }
253 }
254 break;
255 case schema::Type::INT64:
256 output.setString(kj::str(input.as<int64_t>()));
257 break;
258 case schema::Type::UINT64:
259 output.setString(kj::str(input.as<uint64_t>()));
260 break;
261 case schema::Type::TEXT:
262 output.setString(kj::str(input.as<Text>()));
263 break;
264 case schema::Type::DATA: {
265 // Turn into array of byte values. Yep, this is pretty ugly. People really need to override
266 // this with a handler.
267 auto bytes = input.as<Data>();
268 auto array = output.initArray(bytes.size());
269 for (auto i: kj::indices(bytes)) {
270 array[i].setNumber(bytes[i]);
271 }
272 break;
273 }
274 case schema::Type::LIST: {
275 auto list = input.as<DynamicList>();
276 auto elementType = type.asList().getElementType();
277 auto array = output.initArray(list.size());
278 for (auto i: kj::indices(list)) {
279 encode(list[i], elementType, array[i]);
280 }
281 break;
282 }
283 case schema::Type::ENUM: {
284 auto e = input.as<DynamicEnum>();
285 KJ_IF_MAYBE(symbol, e.getEnumerant()) {
286 output.setString(symbol->getProto().getName());
287 } else {
288 output.setNumber(e.getRaw());
289 }
290 break;
291 }
292 case schema::Type::STRUCT: {
293 auto structValue = input.as<capnp::DynamicStruct>();
294 auto nonUnionFields = structValue.getSchema().getNonUnionFields();
295
296 KJ_STACK_ARRAY(bool, hasField, nonUnionFields.size(), 32, 128);
297
298 uint fieldCount = 0;
299 for (auto i: kj::indices(nonUnionFields)) {
300 fieldCount += (hasField[i] = structValue.has(nonUnionFields[i], impl->hasMode));
301 }
302
303 // We try to write the union field, if any, in proper order with the rest.
304 auto which = structValue.which();
305 bool unionFieldIsNull = false;
306
307 KJ_IF_MAYBE(field, which) {
308 // Even if the union field is null, if it is not the default field of the union then we
309 // have to print it anyway.
310 unionFieldIsNull = !structValue.has(*field, impl->hasMode);
311 if (field->getProto().getDiscriminantValue() != 0 || !unionFieldIsNull) {
312 ++fieldCount;
313 } else {
314 which = nullptr;
315 }
316 }
317
318 auto object = output.initObject(fieldCount);
319
320 size_t pos = 0;
321 for (auto i: kj::indices(nonUnionFields)) {
322 auto field = nonUnionFields[i];
323 KJ_IF_MAYBE(unionField, which) {
324 if (unionField->getIndex() < field.getIndex()) {
325 auto outField = object[pos++];
326 outField.setName(unionField->getProto().getName());
327 if (unionFieldIsNull) {
328 outField.initValue().setNull();
329 } else {
330 encodeField(*unionField, structValue.get(*unionField), outField.initValue());
331 }
332 which = nullptr;
333 }
334 }
335 if (hasField[i]) {
336 auto outField = object[pos++];
337 outField.setName(field.getProto().getName());
338 encodeField(field, structValue.get(field), outField.initValue());
339 }
340 }
341 if (which != nullptr) {
342 // Union field not printed yet; must be last.
343 auto unionField = KJ_ASSERT_NONNULL(which);
344 auto outField = object[pos++];
345 outField.setName(unionField.getProto().getName());
346 if (unionFieldIsNull) {
347 outField.initValue().setNull();
348 } else {
349 encodeField(unionField, structValue.get(unionField), outField.initValue());
350 }
351 }
352 KJ_ASSERT(pos == fieldCount);
353 break;
354 }
355 case schema::Type::INTERFACE:
356 KJ_FAIL_REQUIRE("don't know how to JSON-encode capabilities; "
357 "please register a JsonCodec::Handler for this");
358 case schema::Type::ANY_POINTER:
359 KJ_FAIL_REQUIRE("don't know how to JSON-encode AnyPointer; "
360 "please register a JsonCodec::Handler for this");
361 }
362 }
363
encodeField(StructSchema::Field field,DynamicValue::Reader input,JsonValue::Builder output) const364 void JsonCodec::encodeField(StructSchema::Field field, DynamicValue::Reader input,
365 JsonValue::Builder output) const {
366 KJ_IF_MAYBE(handler, impl->fieldHandlers.find(field)) {
367 (*handler)->encodeBase(*this, input, output);
368 return;
369 }
370
371 encode(input, field.getType(), output);
372 }
373
decodeArray(List<JsonValue>::Reader input,ListSchema type,Orphanage orphanage) const374 Orphan<DynamicList> JsonCodec::decodeArray(List<JsonValue>::Reader input, ListSchema type, Orphanage orphanage) const {
375 auto orphan = orphanage.newOrphan(type, input.size());
376 auto output = orphan.get();
377 for (auto i: kj::indices(input)) {
378 output.adopt(i, decode(input[i], type.getElementType(), orphanage));
379 }
380 return orphan;
381 }
382
decodeObject(JsonValue::Reader input,StructSchema type,Orphanage orphanage,DynamicStruct::Builder output) const383 void JsonCodec::decodeObject(JsonValue::Reader input, StructSchema type, Orphanage orphanage, DynamicStruct::Builder output) const {
384 KJ_REQUIRE(input.isObject(), "Expected object value") { return; }
385 for (auto field: input.getObject()) {
386 KJ_IF_MAYBE(fieldSchema, type.findFieldByName(field.getName())) {
387 decodeField(*fieldSchema, field.getValue(), orphanage, output);
388 } else {
389 // Unknown json fields are ignored to allow schema evolution
390 }
391 }
392 }
393
decodeField(StructSchema::Field fieldSchema,JsonValue::Reader fieldValue,Orphanage orphanage,DynamicStruct::Builder output) const394 void JsonCodec::decodeField(StructSchema::Field fieldSchema, JsonValue::Reader fieldValue,
395 Orphanage orphanage, DynamicStruct::Builder output) const {
396 auto fieldType = fieldSchema.getType();
397
398 KJ_IF_MAYBE(handler, impl->fieldHandlers.find(fieldSchema)) {
399 output.adopt(fieldSchema, (*handler)->decodeBase(*this, fieldValue, fieldType, orphanage));
400 } else {
401 output.adopt(fieldSchema, decode(fieldValue, fieldType, orphanage));
402 }
403 }
404
decode(JsonValue::Reader input,DynamicStruct::Builder output) const405 void JsonCodec::decode(JsonValue::Reader input, DynamicStruct::Builder output) const {
406 auto type = output.getSchema();
407
408 KJ_IF_MAYBE(handler, impl->typeHandlers.find(type)) {
409 return (*handler)->decodeStructBase(*this, input, output);
410 }
411
412 decodeObject(input, type, Orphanage::getForMessageContaining(output), output);
413 }
414
decode(JsonValue::Reader input,Type type,Orphanage orphanage) const415 Orphan<DynamicValue> JsonCodec::decode(
416 JsonValue::Reader input, Type type, Orphanage orphanage) const {
417 KJ_IF_MAYBE(handler, impl->typeHandlers.find(type)) {
418 return (*handler)->decodeBase(*this, input, type, orphanage);
419 }
420
421 switch(type.which()) {
422 case schema::Type::VOID:
423 return capnp::VOID;
424 case schema::Type::BOOL:
425 switch (input.which()) {
426 case JsonValue::BOOLEAN:
427 return input.getBoolean();
428 default:
429 KJ_FAIL_REQUIRE("Expected boolean value");
430 }
431 case schema::Type::INT8:
432 case schema::Type::INT16:
433 case schema::Type::INT32:
434 case schema::Type::INT64:
435 // Relies on range check in DynamicValue::Reader::as<IntType>
436 switch (input.which()) {
437 case JsonValue::NUMBER:
438 return input.getNumber();
439 case JsonValue::STRING:
440 return input.getString().parseAs<int64_t>();
441 default:
442 KJ_FAIL_REQUIRE("Expected integer value");
443 }
444 case schema::Type::UINT8:
445 case schema::Type::UINT16:
446 case schema::Type::UINT32:
447 case schema::Type::UINT64:
448 // Relies on range check in DynamicValue::Reader::as<IntType>
449 switch (input.which()) {
450 case JsonValue::NUMBER:
451 return input.getNumber();
452 case JsonValue::STRING:
453 return input.getString().parseAs<uint64_t>();
454 default:
455 KJ_FAIL_REQUIRE("Expected integer value");
456 }
457 case schema::Type::FLOAT32:
458 case schema::Type::FLOAT64:
459 switch (input.which()) {
460 case JsonValue::NULL_:
461 return kj::nan();
462 case JsonValue::NUMBER:
463 return input.getNumber();
464 case JsonValue::STRING:
465 return input.getString().parseAs<double>();
466 default:
467 KJ_FAIL_REQUIRE("Expected float value");
468 }
469 case schema::Type::TEXT:
470 switch (input.which()) {
471 case JsonValue::STRING:
472 return orphanage.newOrphanCopy(input.getString());
473 default:
474 KJ_FAIL_REQUIRE("Expected text value");
475 }
476 case schema::Type::DATA:
477 switch (input.which()) {
478 case JsonValue::ARRAY: {
479 auto array = input.getArray();
480 auto orphan = orphanage.newOrphan<Data>(array.size());
481 auto data = orphan.get();
482 for (auto i: kj::indices(array)) {
483 auto x = array[i].getNumber();
484 KJ_REQUIRE(byte(x) == x, "Number in byte array is not an integer in [0, 255]");
485 data[i] = x;
486 }
487 return kj::mv(orphan);
488 }
489 default:
490 KJ_FAIL_REQUIRE("Expected data value");
491 }
492 case schema::Type::LIST:
493 switch (input.which()) {
494 case JsonValue::ARRAY:
495 return decodeArray(input.getArray(), type.asList(), orphanage);
496 default:
497 KJ_FAIL_REQUIRE("Expected list value") { break; }
498 return orphanage.newOrphan(type.asList(), 0);
499 }
500 case schema::Type::ENUM:
501 switch (input.which()) {
502 case JsonValue::STRING:
503 return DynamicEnum(type.asEnum().getEnumerantByName(input.getString()));
504 default:
505 KJ_FAIL_REQUIRE("Expected enum value") { break; }
506 return DynamicEnum(type.asEnum(), 0);
507 }
508 case schema::Type::STRUCT: {
509 auto structType = type.asStruct();
510 auto orphan = orphanage.newOrphan(structType);
511 decodeObject(input, structType, orphanage, orphan.get());
512 return kj::mv(orphan);
513 }
514 case schema::Type::INTERFACE:
515 KJ_FAIL_REQUIRE("don't know how to JSON-decode capabilities; "
516 "please register a JsonCodec::Handler for this");
517 case schema::Type::ANY_POINTER:
518 KJ_FAIL_REQUIRE("don't know how to JSON-decode AnyPointer; "
519 "please register a JsonCodec::Handler for this");
520 }
521
522 KJ_CLANG_KNOWS_THIS_IS_UNREACHABLE_BUT_GCC_DOESNT;
523 }
524
525 // -----------------------------------------------------------------------------
526
527 namespace {
528
529 class Input {
530 public:
Input(kj::ArrayPtr<const char> input)531 Input(kj::ArrayPtr<const char> input) : wrapped(input) {}
532
exhausted()533 bool exhausted() {
534 return wrapped.size() == 0 || wrapped.front() == '\0';
535 }
536
nextChar()537 char nextChar() {
538 KJ_REQUIRE(!exhausted(), "JSON message ends prematurely.");
539 return wrapped.front();
540 }
541
advance(size_t numBytes=1)542 void advance(size_t numBytes = 1) {
543 KJ_REQUIRE(numBytes <= wrapped.size(), "JSON message ends prematurely.");
544 wrapped = kj::arrayPtr(wrapped.begin() + numBytes, wrapped.end());
545 }
546
advanceTo(const char * newPos)547 void advanceTo(const char *newPos) {
548 KJ_REQUIRE(wrapped.begin() <= newPos && newPos < wrapped.end(),
549 "JSON message ends prematurely.");
550 wrapped = kj::arrayPtr(newPos, wrapped.end());
551 }
552
consume(size_t numBytes=1)553 kj::ArrayPtr<const char> consume(size_t numBytes = 1) {
554 auto originalPos = wrapped.begin();
555 advance(numBytes);
556
557 return kj::arrayPtr(originalPos, wrapped.begin());
558 }
559
consume(char expected)560 void consume(char expected) {
561 char current = nextChar();
562 KJ_REQUIRE(current == expected, "Unexpected input in JSON message.");
563
564 advance();
565 }
566
consume(kj::ArrayPtr<const char> expected)567 void consume(kj::ArrayPtr<const char> expected) {
568 KJ_REQUIRE(wrapped.size() >= expected.size());
569
570 auto prefix = wrapped.slice(0, expected.size());
571 KJ_REQUIRE(prefix == expected, "Unexpected input in JSON message.");
572
573 advance(expected.size());
574 }
575
tryConsume(char expected)576 bool tryConsume(char expected) {
577 bool found = !exhausted() && nextChar() == expected;
578 if (found) { advance(); }
579
580 return found;
581 }
582
583 template <typename Predicate>
consumeOne(Predicate && predicate)584 void consumeOne(Predicate&& predicate) {
585 char current = nextChar();
586 KJ_REQUIRE(predicate(current), "Unexpected input in JSON message.");
587
588 advance();
589 }
590
591 template <typename Predicate>
consumeWhile(Predicate && predicate)592 kj::ArrayPtr<const char> consumeWhile(Predicate&& predicate) {
593 auto originalPos = wrapped.begin();
594 while (!exhausted() && predicate(nextChar())) { advance(); }
595
596 return kj::arrayPtr(originalPos, wrapped.begin());
597 }
598
599 template <typename F> // Function<void(Input&)>
consumeCustom(F && f)600 kj::ArrayPtr<const char> consumeCustom(F&& f) {
601 // Allows consuming in a custom manner without exposing the wrapped ArrayPtr.
602 auto originalPos = wrapped.begin();
603 f(*this);
604
605 return kj::arrayPtr(originalPos, wrapped.begin());
606 }
607
consumeWhitespace()608 void consumeWhitespace() {
609 consumeWhile([](char chr) {
610 return (
611 chr == ' ' ||
612 chr == '\n' ||
613 chr == '\r' ||
614 chr == '\t'
615 );
616 });
617 }
618
619
620 private:
621 kj::ArrayPtr<const char> wrapped;
622
623 }; // class Input
624
625 class Parser {
626 public:
Parser(size_t maxNestingDepth,kj::ArrayPtr<const char> input)627 Parser(size_t maxNestingDepth, kj::ArrayPtr<const char> input) :
628 maxNestingDepth(maxNestingDepth), input(input), nestingDepth(0) {}
629
parseValue(JsonValue::Builder & output)630 void parseValue(JsonValue::Builder& output) {
631 input.consumeWhitespace();
632 KJ_DEFER(input.consumeWhitespace());
633
634 KJ_REQUIRE(!input.exhausted(), "JSON message ends prematurely.");
635
636 switch (input.nextChar()) {
637 case 'n': input.consume(kj::StringPtr("null")); output.setNull(); break;
638 case 'f': input.consume(kj::StringPtr("false")); output.setBoolean(false); break;
639 case 't': input.consume(kj::StringPtr("true")); output.setBoolean(true); break;
640 case '"': parseString(output); break;
641 case '[': parseArray(output); break;
642 case '{': parseObject(output); break;
643 case '-': case '0': case '1': case '2': case '3':
644 case '4': case '5': case '6': case '7': case '8':
645 case '9': parseNumber(output); break;
646 default: KJ_FAIL_REQUIRE("Unexpected input in JSON message.");
647 }
648 }
649
parseNumber(JsonValue::Builder & output)650 void parseNumber(JsonValue::Builder& output) {
651 output.setNumber(consumeNumber().parseAs<double>());
652 }
653
parseString(JsonValue::Builder & output)654 void parseString(JsonValue::Builder& output) {
655 output.setString(consumeQuotedString());
656 }
657
parseArray(JsonValue::Builder & output)658 void parseArray(JsonValue::Builder& output) {
659 // TODO(perf): Using orphans leaves holes in the message. It's expected
660 // that a JsonValue is used for interop, and won't be sent or written as a
661 // Cap'n Proto message. This also applies to parseObject below.
662 kj::Vector<Orphan<JsonValue>> values;
663 auto orphanage = Orphanage::getForMessageContaining(output);
664 bool expectComma = false;
665
666 input.consume('[');
667 KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
668 KJ_DEFER(--nestingDepth);
669
670 while (input.consumeWhitespace(), input.nextChar() != ']') {
671 auto orphan = orphanage.newOrphan<JsonValue>();
672 auto builder = orphan.get();
673
674 if (expectComma) {
675 input.consumeWhitespace();
676 input.consume(',');
677 input.consumeWhitespace();
678 }
679
680 parseValue(builder);
681 values.add(kj::mv(orphan));
682
683 expectComma = true;
684 }
685
686 output.initArray(values.size());
687 auto array = output.getArray();
688
689 for (auto i : kj::indices(values)) {
690 array.adoptWithCaveats(i, kj::mv(values[i]));
691 }
692
693 input.consume(']');
694 }
695
parseObject(JsonValue::Builder & output)696 void parseObject(JsonValue::Builder& output) {
697 kj::Vector<Orphan<JsonValue::Field>> fields;
698 auto orphanage = Orphanage::getForMessageContaining(output);
699 bool expectComma = false;
700
701 input.consume('{');
702 KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
703 KJ_DEFER(--nestingDepth);
704
705 while (input.consumeWhitespace(), input.nextChar() != '}') {
706 auto orphan = orphanage.newOrphan<JsonValue::Field>();
707 auto builder = orphan.get();
708
709 if (expectComma) {
710 input.consumeWhitespace();
711 input.consume(',');
712 input.consumeWhitespace();
713 }
714
715 builder.setName(consumeQuotedString());
716
717 input.consumeWhitespace();
718 input.consume(':');
719 input.consumeWhitespace();
720
721 auto valueBuilder = builder.getValue();
722 parseValue(valueBuilder);
723
724 fields.add(kj::mv(orphan));
725
726 expectComma = true;
727 }
728
729 output.initObject(fields.size());
730 auto object = output.getObject();
731
732 for (auto i : kj::indices(fields)) {
733 object.adoptWithCaveats(i, kj::mv(fields[i]));
734 }
735
736 input.consume('}');
737 }
738
inputExhausted()739 bool inputExhausted() { return input.exhausted(); }
740
741 private:
consumeQuotedString()742 kj::String consumeQuotedString() {
743 input.consume('"');
744 // TODO(perf): Avoid copy / alloc if no escapes encoutered.
745 // TODO(perf): Get statistics on string size and preallocate?
746 kj::Vector<char> decoded;
747
748 do {
749 auto stringValue = input.consumeWhile([](const char chr) {
750 return chr != '"' && chr != '\\';
751 });
752
753 decoded.addAll(stringValue);
754
755 if (input.nextChar() == '\\') { // handle escapes.
756 input.advance();
757 switch(input.nextChar()) {
758 case '"' : decoded.add('"' ); input.advance(); break;
759 case '\\': decoded.add('\\'); input.advance(); break;
760 case '/' : decoded.add('/' ); input.advance(); break;
761 case 'b' : decoded.add('\b'); input.advance(); break;
762 case 'f' : decoded.add('\f'); input.advance(); break;
763 case 'n' : decoded.add('\n'); input.advance(); break;
764 case 'r' : decoded.add('\r'); input.advance(); break;
765 case 't' : decoded.add('\t'); input.advance(); break;
766 case 'u' :
767 input.consume('u');
768 unescapeAndAppend(input.consume(size_t(4)), decoded);
769 break;
770 default: KJ_FAIL_REQUIRE("Invalid escape in JSON string."); break;
771 }
772 }
773
774 } while(input.nextChar() != '"');
775
776 input.consume('"');
777 decoded.add('\0');
778
779 // TODO(perf): This copy can be eliminated, but I can't find the kj::wayToDoIt();
780 return kj::String(decoded.releaseAsArray());
781 }
782
consumeNumber()783 kj::String consumeNumber() {
784 auto numArrayPtr = input.consumeCustom([](Input& input) {
785 input.tryConsume('-');
786 if (!input.tryConsume('0')) {
787 input.consumeOne([](char c) { return '1' <= c && c <= '9'; });
788 input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
789 }
790
791 if (input.tryConsume('.')) {
792 input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
793 }
794
795 if (input.tryConsume('e') || input.tryConsume('E')) {
796 input.tryConsume('+') || input.tryConsume('-');
797 input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
798 }
799 });
800
801 KJ_REQUIRE(numArrayPtr.size() > 0, "Expected number in JSON input.");
802
803 kj::Vector<char> number;
804 number.addAll(numArrayPtr);
805 number.add('\0');
806
807 return kj::String(number.releaseAsArray());
808 }
809
810 // TODO(someday): This "interface" is ugly, and won't work if/when surrogates are handled.
unescapeAndAppend(kj::ArrayPtr<const char> hex,kj::Vector<char> & target)811 void unescapeAndAppend(kj::ArrayPtr<const char> hex, kj::Vector<char>& target) {
812 KJ_REQUIRE(hex.size() == 4);
813 int codePoint = 0;
814
815 for (int i = 0; i < 4; ++i) {
816 char c = hex[i];
817 codePoint <<= 4;
818
819 if ('0' <= c && c <= '9') {
820 codePoint |= c - '0';
821 } else if ('a' <= c && c <= 'f') {
822 codePoint |= c - 'a';
823 } else if ('A' <= c && c <= 'F') {
824 codePoint |= c - 'A';
825 } else {
826 KJ_FAIL_REQUIRE("Invalid hex digit in unicode escape.", c);
827 }
828 }
829
830 if (codePoint < 128) {
831 target.add(0x7f & static_cast<char>(codePoint));
832 } else {
833 // TODO(perf): This is sorta malloc-heavy...
834 char16_t u = codePoint;
835 target.addAll(kj::decodeUtf16(kj::arrayPtr(&u, 1)));
836 }
837 }
838
839 const size_t maxNestingDepth;
840 Input input;
841 size_t nestingDepth;
842
843
844 }; // class Parser
845
846 } // namespace
847
848
decodeRaw(kj::ArrayPtr<const char> input,JsonValue::Builder output) const849 void JsonCodec::decodeRaw(kj::ArrayPtr<const char> input, JsonValue::Builder output) const {
850 Parser parser(impl->maxNestingDepth, input);
851 parser.parseValue(output);
852
853 KJ_REQUIRE(parser.inputExhausted(), "Input remains after parsing JSON.");
854 }
855
856 // -----------------------------------------------------------------------------
857
decodeBase(const JsonCodec & codec,JsonValue::Reader input,Type type,Orphanage orphanage) const858 Orphan<DynamicValue> JsonCodec::HandlerBase::decodeBase(
859 const JsonCodec& codec, JsonValue::Reader input, Type type, Orphanage orphanage) const {
860 KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
861 }
decodeStructBase(const JsonCodec & codec,JsonValue::Reader input,DynamicStruct::Builder output) const862 void JsonCodec::HandlerBase::decodeStructBase(
863 const JsonCodec& codec, JsonValue::Reader input, DynamicStruct::Builder output) const {
864 KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
865 }
866
addTypeHandlerImpl(Type type,HandlerBase & handler)867 void JsonCodec::addTypeHandlerImpl(Type type, HandlerBase& handler) {
868 impl->typeHandlers.upsert(type, &handler, [](HandlerBase*& existing, HandlerBase* replacement) {
869 KJ_REQUIRE(existing == replacement, "type already has a different registered handler");
870 });
871 }
872
addFieldHandlerImpl(StructSchema::Field field,Type type,HandlerBase & handler)873 void JsonCodec::addFieldHandlerImpl(StructSchema::Field field, Type type, HandlerBase& handler) {
874 KJ_REQUIRE(type == field.getType(),
875 "handler type did not match field type for addFieldHandler()");
876 impl->fieldHandlers.upsert(field, &handler, [](HandlerBase*& existing, HandlerBase* replacement) {
877 KJ_REQUIRE(existing == replacement, "field already has a different registered handler");
878 });
879 }
880
881 // =======================================================================================
882
883 static constexpr uint64_t JSON_NAME_ANNOTATION_ID = 0xfa5b1fd61c2e7c3dull;
884 static constexpr uint64_t JSON_FLATTEN_ANNOTATION_ID = 0x82d3e852af0336bfull;
885 static constexpr uint64_t JSON_DISCRIMINATOR_ANNOTATION_ID = 0xcfa794e8d19a0162ull;
886 static constexpr uint64_t JSON_BASE64_ANNOTATION_ID = 0xd7d879450a253e4bull;
887 static constexpr uint64_t JSON_HEX_ANNOTATION_ID = 0xf061e22f0ae5c7b5ull;
888
889 class JsonCodec::Base64Handler final: public JsonCodec::Handler<capnp::Data> {
890 public:
encode(const JsonCodec & codec,capnp::Data::Reader input,JsonValue::Builder output) const891 void encode(const JsonCodec& codec, capnp::Data::Reader input, JsonValue::Builder output) const {
892 output.setString(kj::encodeBase64(input));
893 }
894
decode(const JsonCodec & codec,JsonValue::Reader input,Orphanage orphanage) const895 Orphan<capnp::Data> decode(const JsonCodec& codec, JsonValue::Reader input,
896 Orphanage orphanage) const {
897 return orphanage.newOrphanCopy(capnp::Data::Reader(kj::decodeBase64(input.getString())));
898 }
899 };
900
901 class JsonCodec::HexHandler final: public JsonCodec::Handler<capnp::Data> {
902 public:
encode(const JsonCodec & codec,capnp::Data::Reader input,JsonValue::Builder output) const903 void encode(const JsonCodec& codec, capnp::Data::Reader input, JsonValue::Builder output) const {
904 output.setString(kj::encodeHex(input));
905 }
906
decode(const JsonCodec & codec,JsonValue::Reader input,Orphanage orphanage) const907 Orphan<capnp::Data> decode(const JsonCodec& codec, JsonValue::Reader input,
908 Orphanage orphanage) const {
909 return orphanage.newOrphanCopy(capnp::Data::Reader(kj::decodeHex(input.getString())));
910 }
911 };
912
913 class JsonCodec::AnnotatedHandler final: public JsonCodec::Handler<DynamicStruct> {
914 public:
AnnotatedHandler(JsonCodec & codec,StructSchema schema,kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,kj::Maybe<kj::StringPtr> unionDeclName,kj::Vector<Schema> & dependencies)915 AnnotatedHandler(JsonCodec& codec, StructSchema schema,
916 kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,
917 kj::Maybe<kj::StringPtr> unionDeclName,
918 kj::Vector<Schema>& dependencies)
919 : schema(schema) {
920 auto schemaProto = schema.getProto();
921 auto typeName = schemaProto.getDisplayName();
922
923 if (discriminator == nullptr) {
924 // There are two cases of unions:
925 // * Named unions, which are special cases of named groups. In this case, the union may be
926 // annotated by annotating the field. In this case, we receive a non-null `discriminator`
927 // as a constructor parameter, and schemaProto.getAnnotations() must be empty because
928 // it's not possible to annotate a group's type (because the type is anonymous).
929 // * Unnamed unions, of which there can only be one in any particular scope. In this case,
930 // the parent struct type itself is annotated.
931 // So if we received `null` as the constructor parameter, check for annotations on the struct
932 // type.
933 for (auto anno: schemaProto.getAnnotations()) {
934 switch (anno.getId()) {
935 case JSON_DISCRIMINATOR_ANNOTATION_ID:
936 discriminator = anno.getValue().getStruct().getAs<json::DiscriminatorOptions>();
937 break;
938 }
939 }
940 }
941
942 KJ_IF_MAYBE(d, discriminator) {
943 if (d->hasName()) {
944 unionTagName = d->getName();
945 } else {
946 unionTagName = unionDeclName;
947 }
948 KJ_IF_MAYBE(u, unionTagName) {
949 fieldsByName.insert(*u, FieldNameInfo {
950 FieldNameInfo::UNION_TAG, 0, 0, nullptr
951 });
952 }
953
954 if (d->hasValueName()) {
955 fieldsByName.insert(d->getValueName(), FieldNameInfo {
956 FieldNameInfo::UNION_VALUE, 0, 0, nullptr
957 });
958 }
959 }
960
961 discriminantOffset = schemaProto.getStruct().getDiscriminantOffset();
962
963 fields = KJ_MAP(field, schema.getFields()) {
964 auto fieldProto = field.getProto();
965 auto type = field.getType();
966 auto fieldName = fieldProto.getName();
967
968 FieldNameInfo nameInfo;
969 nameInfo.index = field.getIndex();
970 nameInfo.type = FieldNameInfo::NORMAL;
971 nameInfo.prefixLength = 0;
972
973 FieldInfo info;
974 info.name = fieldName;
975
976 kj::Maybe<json::DiscriminatorOptions::Reader> subDiscriminator;
977 bool flattened = false;
978 for (auto anno: field.getProto().getAnnotations()) {
979 switch (anno.getId()) {
980 case JSON_NAME_ANNOTATION_ID:
981 info.name = anno.getValue().getText();
982 break;
983 case JSON_FLATTEN_ANNOTATION_ID:
984 KJ_REQUIRE(type.isStruct(), "only struct types can be flattened", fieldName, typeName);
985 flattened = true;
986 info.prefix = anno.getValue().getStruct().getAs<json::FlattenOptions>().getPrefix();
987 break;
988 case JSON_DISCRIMINATOR_ANNOTATION_ID:
989 KJ_REQUIRE(fieldProto.isGroup(), "only unions can have discriminator");
990 subDiscriminator = anno.getValue().getStruct().getAs<json::DiscriminatorOptions>();
991 break;
992 case JSON_BASE64_ANNOTATION_ID: {
993 KJ_REQUIRE(field.getType().isData(), "only Data can be marked for base64 encoding");
994 static Base64Handler handler;
995 codec.addFieldHandler(field, handler);
996 break;
997 }
998 case JSON_HEX_ANNOTATION_ID: {
999 KJ_REQUIRE(field.getType().isData(), "only Data can be marked for hex encoding");
1000 static HexHandler handler;
1001 codec.addFieldHandler(field, handler);
1002 break;
1003 }
1004 }
1005 }
1006
1007 if (fieldProto.isGroup()) {
1008 // Load group type handler now, even if not flattened, so that we can pass its
1009 // `subDiscriminator`.
1010 kj::Maybe<kj::StringPtr> subFieldName;
1011 if (flattened) {
1012 // If the group was flattened, then we allow its field name to be used as the
1013 // discriminator name, so that the discriminator doesn't have to explicitly specify a
1014 // name.
1015 subFieldName = fieldName;
1016 }
1017 auto& subHandler = codec.loadAnnotatedHandler(
1018 type.asStruct(), subDiscriminator, subFieldName, dependencies);
1019 if (flattened) {
1020 info.flattenHandler = subHandler;
1021 }
1022 } else if (type.isStruct()) {
1023 if (flattened) {
1024 info.flattenHandler = codec.loadAnnotatedHandler(
1025 type.asStruct(), nullptr, nullptr, dependencies);
1026 }
1027 }
1028
1029 bool isUnionMember = fieldProto.getDiscriminantValue() != schema::Field::NO_DISCRIMINANT;
1030
1031 KJ_IF_MAYBE(fh, info.flattenHandler) {
1032 // Set up fieldsByName for each of the child's fields.
1033 for (auto& entry: fh->fieldsByName) {
1034 kj::StringPtr flattenedName;
1035 kj::String ownName;
1036 if (info.prefix.size() > 0) {
1037 ownName = kj::str(info.prefix, entry.key);
1038 flattenedName = ownName;
1039 } else {
1040 flattenedName = entry.key;
1041 }
1042
1043 fieldsByName.upsert(flattenedName, FieldNameInfo {
1044 isUnionMember ? FieldNameInfo::FLATTENED_FROM_UNION : FieldNameInfo::FLATTENED,
1045 field.getIndex(), (uint)info.prefix.size(), kj::mv(ownName)
1046 }, [&](FieldNameInfo& existing, FieldNameInfo&& replacement) {
1047 KJ_REQUIRE(existing.type == FieldNameInfo::FLATTENED_FROM_UNION &&
1048 replacement.type == FieldNameInfo::FLATTENED_FROM_UNION,
1049 "flattened members have the same name and are not mutually exclusive");
1050 });
1051 }
1052 }
1053
1054 info.nameForDiscriminant = info.name;
1055
1056 if (!flattened) {
1057 bool isUnionWithValueName = false;
1058 if (isUnionMember) {
1059 KJ_IF_MAYBE(d, discriminator) {
1060 if (d->hasValueName()) {
1061 info.name = d->getValueName();
1062 isUnionWithValueName = true;
1063 }
1064 }
1065 }
1066
1067 if (!isUnionWithValueName) {
1068 fieldsByName.insert(info.name, kj::mv(nameInfo));
1069 }
1070 }
1071
1072 if (isUnionMember) {
1073 unionTagValues.insert(info.nameForDiscriminant, field);
1074 }
1075
1076 // Look for dependencies that we need to add.
1077 while (type.isList()) type = type.asList().getElementType();
1078 if (codec.impl->typeHandlers.find(type) == nullptr) {
1079 switch (type.which()) {
1080 case schema::Type::STRUCT:
1081 dependencies.add(type.asStruct());
1082 break;
1083 case schema::Type::ENUM:
1084 dependencies.add(type.asEnum());
1085 break;
1086 case schema::Type::INTERFACE:
1087 dependencies.add(type.asInterface());
1088 break;
1089 default:
1090 break;
1091 }
1092 }
1093
1094 return info;
1095 };
1096 }
1097
1098 const StructSchema schema;
1099
encode(const JsonCodec & codec,DynamicStruct::Reader input,JsonValue::Builder output) const1100 void encode(const JsonCodec& codec, DynamicStruct::Reader input,
1101 JsonValue::Builder output) const override {
1102 kj::Vector<FlattenedField> flattenedFields;
1103 gatherForEncode(codec, input, nullptr, nullptr, flattenedFields);
1104
1105 auto outs = output.initObject(flattenedFields.size());
1106 for (auto i: kj::indices(flattenedFields)) {
1107 auto& in = flattenedFields[i];
1108 auto out = outs[i];
1109 out.setName(in.name);
1110 KJ_SWITCH_ONEOF(in.type) {
1111 KJ_CASE_ONEOF(type, Type) {
1112 codec.encode(in.value, type, out.initValue());
1113 }
1114 KJ_CASE_ONEOF(field, StructSchema::Field) {
1115 codec.encodeField(field, in.value, out.initValue());
1116 }
1117 }
1118 }
1119 }
1120
decode(const JsonCodec & codec,JsonValue::Reader input,DynamicStruct::Builder output) const1121 void decode(const JsonCodec& codec, JsonValue::Reader input,
1122 DynamicStruct::Builder output) const override {
1123 KJ_REQUIRE(input.isObject());
1124 kj::HashSet<const void*> unionsSeen;
1125 kj::Vector<JsonValue::Field::Reader> retries;
1126 for (auto field: input.getObject()) {
1127 if (!decodeField(codec, field.getName(), field.getValue(), output, unionsSeen)) {
1128 retries.add(field);
1129 }
1130 }
1131 while (!retries.empty()) {
1132 auto retriesCopy = kj::mv(retries);
1133 KJ_ASSERT(retries.empty());
1134 for (auto field: retriesCopy) {
1135 if (!decodeField(codec, field.getName(), field.getValue(), output, unionsSeen)) {
1136 retries.add(field);
1137 }
1138 }
1139 if (retries.size() == retriesCopy.size()) {
1140 // We made no progress in this iteration. Give up on the remaining fields.
1141 break;
1142 }
1143 }
1144 }
1145
1146 private:
1147 struct FieldInfo {
1148 kj::StringPtr name;
1149 kj::StringPtr nameForDiscriminant;
1150 kj::Maybe<const AnnotatedHandler&> flattenHandler;
1151 kj::StringPtr prefix;
1152 };
1153
1154 kj::Array<FieldInfo> fields;
1155 // Maps field index -> info about the field
1156
1157 struct FieldNameInfo {
1158 enum {
1159 NORMAL,
1160 // This is a normal field with the given `index`.
1161
1162 FLATTENED,
1163 // This is a field of a flattened inner struct or group (that is not in a union). `index`
1164 // is the field index of the particular struct/group field.
1165
1166 UNION_TAG,
1167 // The parent struct is a flattened union, and this field is the discriminant tag. It is a
1168 // string field whose name determines the union type. `index` is not used.
1169
1170 FLATTENED_FROM_UNION,
1171 // The parent struct is a flattened union, and some of the union's members are flattened
1172 // structs or groups, and this field is possibly a member of one or more of them. `index`
1173 // is not used, because it's possible that the same field name appears in multiple variants.
1174 // Instead, the parser must find the union tag, and then can descend and attempt to parse
1175 // the field in the context of whichever variant is selected.
1176
1177 UNION_VALUE
1178 // This field is the value of a discriminated union that has `valueName` set.
1179 } type;
1180
1181 uint index;
1182 // For `NORMAL` and `FLATTENED`, the index of the field in schema.getFields().
1183
1184 uint prefixLength;
1185 kj::String ownName;
1186 };
1187
1188 kj::HashMap<kj::StringPtr, FieldNameInfo> fieldsByName;
1189 // Maps JSON names to info needed to parse them.
1190
1191 kj::HashMap<kj::StringPtr, StructSchema::Field> unionTagValues;
1192 // If the parent struct is a flattened union, it has a tag field which is a string with one of
1193 // these values. The map maps to the union member to set.
1194
1195 kj::Maybe<kj::StringPtr> unionTagName;
1196 // If the parent struct is a flattened union, the name of the "tag" field.
1197
1198 uint discriminantOffset;
1199 // Shortcut for schema.getProto().getStruct().getDiscriminantOffset(), used in a hack to identify
1200 // which unions have been seen.
1201
1202 struct FlattenedField {
1203 kj::String ownName;
1204 kj::StringPtr name;
1205 kj::OneOf<StructSchema::Field, Type> type;
1206 DynamicValue::Reader value;
1207
FlattenedFieldcapnp::JsonCodec::AnnotatedHandler::FlattenedField1208 FlattenedField(kj::StringPtr prefix, kj::StringPtr name,
1209 kj::OneOf<StructSchema::Field, Type> type, DynamicValue::Reader value)
1210 : ownName(prefix.size() > 0 ? kj::str(prefix, name) : nullptr),
1211 name(prefix.size() > 0 ? ownName : name),
1212 type(type), value(value) {}
1213 };
1214
gatherForEncode(const JsonCodec & codec,DynamicValue::Reader input,kj::StringPtr prefix,kj::StringPtr morePrefix,kj::Vector<FlattenedField> & flattenedFields) const1215 void gatherForEncode(const JsonCodec& codec, DynamicValue::Reader input,
1216 kj::StringPtr prefix, kj::StringPtr morePrefix,
1217 kj::Vector<FlattenedField>& flattenedFields) const {
1218 kj::String ownPrefix;
1219 if (morePrefix.size() > 0) {
1220 if (prefix.size() > 0) {
1221 ownPrefix = kj::str(prefix, morePrefix);
1222 prefix = ownPrefix;
1223 } else {
1224 prefix = morePrefix;
1225 }
1226 }
1227
1228 auto reader = input.as<DynamicStruct>();
1229 auto schema = reader.getSchema();
1230 for (auto field: schema.getNonUnionFields()) {
1231 auto& info = fields[field.getIndex()];
1232 if (!reader.has(field, codec.impl->hasMode)) {
1233 // skip
1234 } else KJ_IF_MAYBE(handler, info.flattenHandler) {
1235 handler->gatherForEncode(codec, reader.get(field), prefix, info.prefix, flattenedFields);
1236 } else {
1237 flattenedFields.add(FlattenedField {
1238 prefix, info.name, field, reader.get(field) });
1239 }
1240 }
1241
1242 KJ_IF_MAYBE(which, reader.which()) {
1243 auto& info = fields[which->getIndex()];
1244 KJ_IF_MAYBE(tag, unionTagName) {
1245 flattenedFields.add(FlattenedField {
1246 prefix, *tag, Type(schema::Type::TEXT), Text::Reader(info.nameForDiscriminant) });
1247 }
1248
1249 KJ_IF_MAYBE(handler, info.flattenHandler) {
1250 handler->gatherForEncode(codec, reader.get(*which), prefix, info.prefix, flattenedFields);
1251 } else {
1252 auto type = which->getType();
1253 if (type.which() == schema::Type::VOID && unionTagName != nullptr) {
1254 // When we have an explicit union discriminant, we don't need to encode void fields.
1255 } else {
1256 flattenedFields.add(FlattenedField {
1257 prefix, info.name, *which, reader.get(*which) });
1258 }
1259 }
1260 }
1261 }
1262
decodeField(const JsonCodec & codec,kj::StringPtr name,JsonValue::Reader value,DynamicStruct::Builder output,kj::HashSet<const void * > & unionsSeen) const1263 bool decodeField(const JsonCodec& codec, kj::StringPtr name, JsonValue::Reader value,
1264 DynamicStruct::Builder output, kj::HashSet<const void*>& unionsSeen) const {
1265 KJ_ASSERT(output.getSchema() == schema);
1266
1267 KJ_IF_MAYBE(info, fieldsByName.find(name)) {
1268 switch (info->type) {
1269 case FieldNameInfo::NORMAL: {
1270 auto field = output.getSchema().getFields()[info->index];
1271 codec.decodeField(field, value, Orphanage::getForMessageContaining(output), output);
1272 return true;
1273 }
1274 case FieldNameInfo::FLATTENED:
1275 return KJ_ASSERT_NONNULL(fields[info->index].flattenHandler)
1276 .decodeField(codec, name.slice(info->prefixLength), value,
1277 output.get(output.getSchema().getFields()[info->index]).as<DynamicStruct>(),
1278 unionsSeen);
1279 case FieldNameInfo::UNION_TAG: {
1280 KJ_REQUIRE(value.isString(), "Expected string value.");
1281
1282 // Mark that we've seen a union tag for this struct.
1283 const void* ptr = getUnionInstanceIdentifier(output);
1284 KJ_IF_MAYBE(field, unionTagValues.find(value.getString())) {
1285 // clear() has the side-effect of activating this member of the union, without
1286 // allocating any objects.
1287 output.clear(*field);
1288 unionsSeen.insert(ptr);
1289 }
1290 return true;
1291 }
1292 case FieldNameInfo::FLATTENED_FROM_UNION: {
1293 const void* ptr = getUnionInstanceIdentifier(output);
1294 if (unionsSeen.contains(ptr)) {
1295 auto variant = KJ_ASSERT_NONNULL(output.which());
1296 return KJ_ASSERT_NONNULL(fields[variant.getIndex()].flattenHandler)
1297 .decodeField(codec, name.slice(info->prefixLength), value,
1298 output.get(variant).as<DynamicStruct>(), unionsSeen);
1299 } else {
1300 // We haven't seen the union tag yet, so we can't parse this field yet. Try again later.
1301 return false;
1302 }
1303 }
1304 case FieldNameInfo::UNION_VALUE: {
1305 const void* ptr = getUnionInstanceIdentifier(output);
1306 if (unionsSeen.contains(ptr)) {
1307 auto variant = KJ_ASSERT_NONNULL(output.which());
1308 codec.decodeField(variant, value, Orphanage::getForMessageContaining(output), output);
1309 return true;
1310 } else {
1311 // We haven't seen the union tag yet, so we can't parse this field yet. Try again later.
1312 return false;
1313 }
1314 }
1315 }
1316
1317 KJ_UNREACHABLE;
1318 } else {
1319 // Ignore undefined field.
1320 return true;
1321 }
1322 }
1323
getUnionInstanceIdentifier(DynamicStruct::Builder obj) const1324 const void* getUnionInstanceIdentifier(DynamicStruct::Builder obj) const {
1325 // Gets a value uniquely identifying an instance of a union.
1326 // HACK: We return a poniter to the union's discriminant within the underlying buffer.
1327 return reinterpret_cast<const uint16_t*>(
1328 AnyStruct::Reader(obj.asReader()).getDataSection().begin()) + discriminantOffset;
1329 }
1330 };
1331
1332 class JsonCodec::AnnotatedEnumHandler final: public JsonCodec::Handler<DynamicEnum> {
1333 public:
AnnotatedEnumHandler(EnumSchema schema)1334 AnnotatedEnumHandler(EnumSchema schema): schema(schema) {
1335 auto enumerants = schema.getEnumerants();
1336 auto builder = kj::heapArrayBuilder<kj::StringPtr>(enumerants.size());
1337
1338 for (auto e: enumerants) {
1339 auto proto = e.getProto();
1340 kj::StringPtr name = proto.getName();
1341
1342 for (auto anno: proto.getAnnotations()) {
1343 switch (anno.getId()) {
1344 case JSON_NAME_ANNOTATION_ID:
1345 name = anno.getValue().getText();
1346 break;
1347 }
1348 }
1349
1350 builder.add(name);
1351 nameToValue.insert(name, e.getIndex());
1352 }
1353
1354 valueToName = builder.finish();
1355 }
1356
encode(const JsonCodec & codec,DynamicEnum input,JsonValue::Builder output) const1357 void encode(const JsonCodec& codec, DynamicEnum input, JsonValue::Builder output) const override {
1358 KJ_IF_MAYBE(e, input.getEnumerant()) {
1359 KJ_ASSERT(e->getIndex() < valueToName.size());
1360 output.setString(valueToName[e->getIndex()]);
1361 } else {
1362 output.setNumber(input.getRaw());
1363 }
1364 }
1365
decode(const JsonCodec & codec,JsonValue::Reader input) const1366 DynamicEnum decode(const JsonCodec& codec, JsonValue::Reader input) const override {
1367 if (input.isNumber()) {
1368 return DynamicEnum(schema, static_cast<uint16_t>(input.getNumber()));
1369 } else {
1370 uint16_t val = KJ_REQUIRE_NONNULL(nameToValue.find(input.getString()),
1371 "invalid enum value", input.getString());
1372 return DynamicEnum(schema.getEnumerants()[val]);
1373 }
1374 }
1375
1376 private:
1377 EnumSchema schema;
1378 kj::Array<kj::StringPtr> valueToName;
1379 kj::HashMap<kj::StringPtr, uint16_t> nameToValue;
1380 };
1381
1382 class JsonCodec::JsonValueHandler final: public JsonCodec::Handler<DynamicStruct> {
1383 public:
encode(const JsonCodec & codec,DynamicStruct::Reader input,JsonValue::Builder output) const1384 void encode(const JsonCodec& codec, DynamicStruct::Reader input,
1385 JsonValue::Builder output) const override {
1386 #if _MSC_VER
1387 // TODO(msvc): Hack to work around missing AnyStruct::Builder constructor on MSVC.
1388 rawCopy(input, toDynamic(output));
1389 #else
1390 rawCopy(input, kj::mv(output));
1391 #endif
1392 }
1393
decode(const JsonCodec & codec,JsonValue::Reader input,DynamicStruct::Builder output) const1394 void decode(const JsonCodec& codec, JsonValue::Reader input,
1395 DynamicStruct::Builder output) const override {
1396 rawCopy(input, kj::mv(output));
1397 }
1398
1399 private:
rawCopy(AnyStruct::Reader input,AnyStruct::Builder output) const1400 void rawCopy(AnyStruct::Reader input, AnyStruct::Builder output) const {
1401 // HACK: Manually copy using AnyStruct, so that if JsonValue's definition changes, this code
1402 // doesn't need to be updated. However, note that if JsonValue ever adds new fields that
1403 // change its size, and the input struct is a newer version than the output, we may lose
1404 // the new fields. Technically the "correct" thing to do would be to allocate the output
1405 // struct to be exactly the same size as the input, but JsonCodec's Handler interface is
1406 // not designed to allow that -- it passes in an already-allocated builder. Oops.
1407 auto dataIn = input.getDataSection();
1408 auto dataOut = output.getDataSection();
1409 memcpy(dataOut.begin(), dataIn.begin(), kj::min(dataOut.size(), dataIn.size()));
1410
1411 auto ptrIn = input.getPointerSection();
1412 auto ptrOut = output.getPointerSection();
1413 for (auto i: kj::zeroTo(kj::min(ptrIn.size(), ptrOut.size()))) {
1414 ptrOut[i].set(ptrIn[i]);
1415 }
1416 }
1417 };
1418
loadAnnotatedHandler(StructSchema schema,kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,kj::Maybe<kj::StringPtr> unionDeclName,kj::Vector<Schema> & dependencies)1419 JsonCodec::AnnotatedHandler& JsonCodec::loadAnnotatedHandler(
1420 StructSchema schema, kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,
1421 kj::Maybe<kj::StringPtr> unionDeclName, kj::Vector<Schema>& dependencies) {
1422 auto& entry = impl->annotatedHandlers.upsert(schema, nullptr,
1423 [&](kj::Maybe<kj::Own<AnnotatedHandler>>& existing, auto dummy) {
1424 KJ_ASSERT(existing != nullptr,
1425 "cyclic JSON flattening detected", schema.getProto().getDisplayName());
1426 });
1427
1428 KJ_IF_MAYBE(v, entry.value) {
1429 // Already exists.
1430 return **v;
1431 } else {
1432 // Not seen before.
1433 auto newHandler = kj::heap<AnnotatedHandler>(
1434 *this, schema, discriminator, unionDeclName, dependencies);
1435 auto& result = *newHandler;
1436
1437 // Map may have changed, so we have to look up again.
1438 KJ_ASSERT_NONNULL(impl->annotatedHandlers.find(schema)) = kj::mv(newHandler);
1439
1440 addTypeHandler(schema, result);
1441 return result;
1442 };
1443 }
1444
handleByAnnotation(Schema schema)1445 void JsonCodec::handleByAnnotation(Schema schema) {
1446 switch (schema.getProto().which()) {
1447 case schema::Node::STRUCT: {
1448 if (schema.getProto().getId() == capnp::typeId<JsonValue>()) {
1449 // Special handler for JsonValue.
1450 static JsonValueHandler GLOBAL_HANDLER;
1451 addTypeHandler(schema.asStruct(), GLOBAL_HANDLER);
1452 } else {
1453 kj::Vector<Schema> dependencies;
1454 loadAnnotatedHandler(schema.asStruct(), nullptr, nullptr, dependencies);
1455 for (auto dep: dependencies) {
1456 handleByAnnotation(dep);
1457 }
1458 }
1459 break;
1460 }
1461 case schema::Node::ENUM: {
1462 auto enumSchema = schema.asEnum();
1463 impl->annotatedEnumHandlers.findOrCreate(enumSchema, [&]() {
1464 auto handler = kj::heap<AnnotatedEnumHandler>(enumSchema);
1465 addTypeHandler(enumSchema, *handler);
1466 return kj::HashMap<Type, kj::Own<AnnotatedEnumHandler>>::Entry {
1467 enumSchema, kj::mv(handler) };
1468 });
1469 break;
1470 }
1471 default:
1472 break;
1473 }
1474 }
1475
1476 } // namespace capnp
1477