1 // Copyright (c) 2015 Sandstorm Development Group, Inc. and contributors
2 // Licensed under the MIT License:
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21 
22 #include "json.h"
23 #include <math.h>    // for HUGEVAL to check for overflow in strtod
24 #include <stdlib.h>  // strtod
25 #include <errno.h>   // for strtod errors
26 #include <capnp/orphan.h>
27 #include <kj/debug.h>
28 #include <kj/function.h>
29 #include <kj/vector.h>
30 #include <kj/one-of.h>
31 #include <kj/encoding.h>
32 #include <kj/map.h>
33 
34 namespace capnp {
35 
36 struct JsonCodec::Impl {
37   bool prettyPrint = false;
38   HasMode hasMode = HasMode::NON_NULL;
39   size_t maxNestingDepth = 64;
40 
41   kj::HashMap<Type, HandlerBase*> typeHandlers;
42   kj::HashMap<StructSchema::Field, HandlerBase*> fieldHandlers;
43   kj::HashMap<Type, kj::Maybe<kj::Own<AnnotatedHandler>>> annotatedHandlers;
44   kj::HashMap<Type, kj::Own<AnnotatedEnumHandler>> annotatedEnumHandlers;
45 
encodeRawcapnp::JsonCodec::Impl46   kj::StringTree encodeRaw(JsonValue::Reader value, uint indent, bool& multiline,
47                            bool hasPrefix) const {
48     switch (value.which()) {
49       case JsonValue::NULL_:
50         return kj::strTree("null");
51       case JsonValue::BOOLEAN:
52         return kj::strTree(value.getBoolean());
53       case JsonValue::NUMBER:
54         return kj::strTree(value.getNumber());
55 
56       case JsonValue::STRING:
57         return kj::strTree(encodeString(value.getString()));
58 
59       case JsonValue::ARRAY: {
60         auto array = value.getArray();
61         uint subIndent = indent + (array.size() > 1);
62         bool childMultiline = false;
63         auto encodedElements = KJ_MAP(element, array) {
64           return encodeRaw(element, subIndent, childMultiline, false);
65         };
66 
67         return kj::strTree('[', encodeList(
68             kj::mv(encodedElements), childMultiline, indent, multiline, hasPrefix), ']');
69       }
70 
71       case JsonValue::OBJECT: {
72         auto object = value.getObject();
73         uint subIndent = indent + (object.size() > 1);
74         bool childMultiline = false;
75         kj::StringPtr colon = prettyPrint ? ": " : ":";
76         auto encodedElements = KJ_MAP(field, object) {
77           return kj::strTree(
78               encodeString(field.getName()), colon,
79               encodeRaw(field.getValue(), subIndent, childMultiline, true));
80         };
81 
82         return kj::strTree('{', encodeList(
83             kj::mv(encodedElements), childMultiline, indent, multiline, hasPrefix), '}');
84       }
85 
86       case JsonValue::CALL: {
87         auto call = value.getCall();
88         auto params = call.getParams();
89         uint subIndent = indent + (params.size() > 1);
90         bool childMultiline = false;
91         auto encodedElements = KJ_MAP(element, params) {
92           return encodeRaw(element, subIndent, childMultiline, false);
93         };
94 
95         return kj::strTree(call.getFunction(), '(', encodeList(
96             kj::mv(encodedElements), childMultiline, indent, multiline, true), ')');
97       }
98     }
99 
100     KJ_FAIL_ASSERT("unknown JsonValue type", static_cast<uint>(value.which()));
101   }
102 
encodeStringcapnp::JsonCodec::Impl103   kj::String encodeString(kj::StringPtr chars) const {
104     static const char HEXDIGITS[] = "0123456789abcdef";
105     kj::Vector<char> escaped(chars.size() + 3);
106 
107     escaped.add('"');
108     for (char c: chars) {
109       switch (c) {
110         case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
111         case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
112         case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
113         case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
114         case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
115         case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
116         case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
117         default:
118           if (static_cast<uint8_t>(c) < 0x20) {
119             escaped.addAll(kj::StringPtr("\\u00"));
120             uint8_t c2 = c;
121             escaped.add(HEXDIGITS[c2 / 16]);
122             escaped.add(HEXDIGITS[c2 % 16]);
123           } else {
124             escaped.add(c);
125           }
126           break;
127       }
128     }
129     escaped.add('"');
130     escaped.add('\0');
131 
132     return kj::String(escaped.releaseAsArray());
133   }
134 
encodeListcapnp::JsonCodec::Impl135   kj::StringTree encodeList(kj::Array<kj::StringTree> elements,
136                             bool hasMultilineElement, uint indent, bool& multiline,
137                             bool hasPrefix) const {
138     size_t maxChildSize = 0;
139     for (auto& e: elements) maxChildSize = kj::max(maxChildSize, e.size());
140 
141     kj::StringPtr prefix;
142     kj::StringPtr delim;
143     kj::StringPtr suffix;
144     kj::String ownPrefix;
145     kj::String ownDelim;
146     if (!prettyPrint) {
147       // No whitespace.
148       delim = ",";
149       prefix = "";
150       suffix = "";
151     } else if ((elements.size() > 1) && (hasMultilineElement || maxChildSize > 50)) {
152       // If the array contained any multi-line elements, OR it contained sufficiently long
153       // elements, then put each element on its own line.
154       auto indentSpace = kj::repeat(' ', (indent + 1) * 2);
155       delim = ownDelim = kj::str(",\n", indentSpace);
156       multiline = true;
157       if (hasPrefix) {
158         // We're producing a multi-line list, and the first line has some garbage in front of it.
159         // Therefore, move the first element to the next line.
160         prefix = ownPrefix = kj::str("\n", indentSpace);
161       } else {
162         prefix = " ";
163       }
164       suffix = " ";
165     } else {
166       // Put everything on one line, but add spacing between elements for legibility.
167       delim = ", ";
168       prefix = "";
169       suffix = "";
170     }
171 
172     return kj::strTree(prefix, kj::StringTree(kj::mv(elements), delim), suffix);
173   }
174 };
175 
JsonCodec()176 JsonCodec::JsonCodec()
177     : impl(kj::heap<Impl>()) {}
~JsonCodec()178 JsonCodec::~JsonCodec() noexcept(false) {}
179 
setPrettyPrint(bool enabled)180 void JsonCodec::setPrettyPrint(bool enabled) { impl->prettyPrint = enabled; }
181 
setMaxNestingDepth(size_t maxNestingDepth)182 void JsonCodec::setMaxNestingDepth(size_t maxNestingDepth) {
183   impl->maxNestingDepth = maxNestingDepth;
184 }
185 
setHasMode(HasMode mode)186 void JsonCodec::setHasMode(HasMode mode) { impl->hasMode = mode; }
187 
encode(DynamicValue::Reader value,Type type) const188 kj::String JsonCodec::encode(DynamicValue::Reader value, Type type) const {
189   MallocMessageBuilder message;
190   auto json = message.getRoot<JsonValue>();
191   encode(value, type, json);
192   return encodeRaw(json);
193 }
194 
decode(kj::ArrayPtr<const char> input,DynamicStruct::Builder output) const195 void JsonCodec::decode(kj::ArrayPtr<const char> input, DynamicStruct::Builder output) const {
196   MallocMessageBuilder message;
197   auto json = message.getRoot<JsonValue>();
198   decodeRaw(input, json);
199   decode(json, output);
200 }
201 
decode(kj::ArrayPtr<const char> input,Type type,Orphanage orphanage) const202 Orphan<DynamicValue> JsonCodec::decode(
203     kj::ArrayPtr<const char> input, Type type, Orphanage orphanage) const {
204   MallocMessageBuilder message;
205   auto json = message.getRoot<JsonValue>();
206   decodeRaw(input, json);
207   return decode(json, type, orphanage);
208 }
209 
encodeRaw(JsonValue::Reader value) const210 kj::String JsonCodec::encodeRaw(JsonValue::Reader value) const {
211   bool multiline = false;
212   return impl->encodeRaw(value, 0, multiline, false).flatten();
213 }
214 
encode(DynamicValue::Reader input,Type type,JsonValue::Builder output) const215 void JsonCodec::encode(DynamicValue::Reader input, Type type, JsonValue::Builder output) const {
216   // TODO(someday): For interfaces, check for handlers on superclasses, per documentation...
217   // TODO(someday): For branded types, should we check for handlers on the generic?
218   // TODO(someday): Allow registering handlers for "all structs", "all lists", etc?
219   KJ_IF_MAYBE(handler, impl->typeHandlers.find(type)) {
220     (*handler)->encodeBase(*this, input, output);
221     return;
222   }
223 
224   switch (type.which()) {
225     case schema::Type::VOID:
226       output.setNull();
227       break;
228     case schema::Type::BOOL:
229       output.setBoolean(input.as<bool>());
230       break;
231     case schema::Type::INT8:
232     case schema::Type::INT16:
233     case schema::Type::INT32:
234     case schema::Type::UINT8:
235     case schema::Type::UINT16:
236     case schema::Type::UINT32:
237       output.setNumber(input.as<double>());
238       break;
239     case schema::Type::FLOAT32:
240     case schema::Type::FLOAT64:
241       {
242         double value = input.as<double>();
243         // Inf, -inf and NaN are not allowed in the JSON spec. Storing into string.
244         if (kj::inf() == value) {
245           output.setString("Infinity");
246         } else if (-kj::inf() == value) {
247           output.setString("-Infinity");
248         } else if (kj::isNaN(value)) {
249           output.setString("NaN");
250         } else {
251           output.setNumber(value);
252         }
253       }
254       break;
255     case schema::Type::INT64:
256       output.setString(kj::str(input.as<int64_t>()));
257       break;
258     case schema::Type::UINT64:
259       output.setString(kj::str(input.as<uint64_t>()));
260       break;
261     case schema::Type::TEXT:
262       output.setString(kj::str(input.as<Text>()));
263       break;
264     case schema::Type::DATA: {
265       // Turn into array of byte values. Yep, this is pretty ugly. People really need to override
266       // this with a handler.
267       auto bytes = input.as<Data>();
268       auto array = output.initArray(bytes.size());
269       for (auto i: kj::indices(bytes)) {
270         array[i].setNumber(bytes[i]);
271       }
272       break;
273     }
274     case schema::Type::LIST: {
275       auto list = input.as<DynamicList>();
276       auto elementType = type.asList().getElementType();
277       auto array = output.initArray(list.size());
278       for (auto i: kj::indices(list)) {
279         encode(list[i], elementType, array[i]);
280       }
281       break;
282     }
283     case schema::Type::ENUM: {
284       auto e = input.as<DynamicEnum>();
285       KJ_IF_MAYBE(symbol, e.getEnumerant()) {
286         output.setString(symbol->getProto().getName());
287       } else {
288         output.setNumber(e.getRaw());
289       }
290       break;
291     }
292     case schema::Type::STRUCT: {
293       auto structValue = input.as<capnp::DynamicStruct>();
294       auto nonUnionFields = structValue.getSchema().getNonUnionFields();
295 
296       KJ_STACK_ARRAY(bool, hasField, nonUnionFields.size(), 32, 128);
297 
298       uint fieldCount = 0;
299       for (auto i: kj::indices(nonUnionFields)) {
300         fieldCount += (hasField[i] = structValue.has(nonUnionFields[i], impl->hasMode));
301       }
302 
303       // We try to write the union field, if any, in proper order with the rest.
304       auto which = structValue.which();
305       bool unionFieldIsNull = false;
306 
307       KJ_IF_MAYBE(field, which) {
308         // Even if the union field is null, if it is not the default field of the union then we
309         // have to print it anyway.
310         unionFieldIsNull = !structValue.has(*field, impl->hasMode);
311         if (field->getProto().getDiscriminantValue() != 0 || !unionFieldIsNull) {
312           ++fieldCount;
313         } else {
314           which = nullptr;
315         }
316       }
317 
318       auto object = output.initObject(fieldCount);
319 
320       size_t pos = 0;
321       for (auto i: kj::indices(nonUnionFields)) {
322         auto field = nonUnionFields[i];
323         KJ_IF_MAYBE(unionField, which) {
324           if (unionField->getIndex() < field.getIndex()) {
325             auto outField = object[pos++];
326             outField.setName(unionField->getProto().getName());
327             if (unionFieldIsNull) {
328               outField.initValue().setNull();
329             } else {
330               encodeField(*unionField, structValue.get(*unionField), outField.initValue());
331             }
332             which = nullptr;
333           }
334         }
335         if (hasField[i]) {
336           auto outField = object[pos++];
337           outField.setName(field.getProto().getName());
338           encodeField(field, structValue.get(field), outField.initValue());
339         }
340       }
341       if (which != nullptr) {
342         // Union field not printed yet; must be last.
343         auto unionField = KJ_ASSERT_NONNULL(which);
344         auto outField = object[pos++];
345         outField.setName(unionField.getProto().getName());
346         if (unionFieldIsNull) {
347           outField.initValue().setNull();
348         } else {
349           encodeField(unionField, structValue.get(unionField), outField.initValue());
350         }
351       }
352       KJ_ASSERT(pos == fieldCount);
353       break;
354     }
355     case schema::Type::INTERFACE:
356       KJ_FAIL_REQUIRE("don't know how to JSON-encode capabilities; "
357                       "please register a JsonCodec::Handler for this");
358     case schema::Type::ANY_POINTER:
359       KJ_FAIL_REQUIRE("don't know how to JSON-encode AnyPointer; "
360                       "please register a JsonCodec::Handler for this");
361   }
362 }
363 
encodeField(StructSchema::Field field,DynamicValue::Reader input,JsonValue::Builder output) const364 void JsonCodec::encodeField(StructSchema::Field field, DynamicValue::Reader input,
365                             JsonValue::Builder output) const {
366   KJ_IF_MAYBE(handler, impl->fieldHandlers.find(field)) {
367     (*handler)->encodeBase(*this, input, output);
368     return;
369   }
370 
371   encode(input, field.getType(), output);
372 }
373 
decodeArray(List<JsonValue>::Reader input,ListSchema type,Orphanage orphanage) const374 Orphan<DynamicList> JsonCodec::decodeArray(List<JsonValue>::Reader input, ListSchema type, Orphanage orphanage) const {
375   auto orphan = orphanage.newOrphan(type, input.size());
376   auto output = orphan.get();
377   for (auto i: kj::indices(input)) {
378     output.adopt(i, decode(input[i], type.getElementType(), orphanage));
379   }
380   return orphan;
381 }
382 
decodeObject(JsonValue::Reader input,StructSchema type,Orphanage orphanage,DynamicStruct::Builder output) const383 void JsonCodec::decodeObject(JsonValue::Reader input, StructSchema type, Orphanage orphanage, DynamicStruct::Builder output) const {
384   KJ_REQUIRE(input.isObject(), "Expected object value") { return; }
385   for (auto field: input.getObject()) {
386     KJ_IF_MAYBE(fieldSchema, type.findFieldByName(field.getName())) {
387       decodeField(*fieldSchema, field.getValue(), orphanage, output);
388     } else {
389       // Unknown json fields are ignored to allow schema evolution
390     }
391   }
392 }
393 
decodeField(StructSchema::Field fieldSchema,JsonValue::Reader fieldValue,Orphanage orphanage,DynamicStruct::Builder output) const394 void JsonCodec::decodeField(StructSchema::Field fieldSchema, JsonValue::Reader fieldValue,
395                             Orphanage orphanage, DynamicStruct::Builder output) const {
396   auto fieldType = fieldSchema.getType();
397 
398   KJ_IF_MAYBE(handler, impl->fieldHandlers.find(fieldSchema)) {
399     output.adopt(fieldSchema, (*handler)->decodeBase(*this, fieldValue, fieldType, orphanage));
400   } else {
401     output.adopt(fieldSchema, decode(fieldValue, fieldType, orphanage));
402   }
403 }
404 
decode(JsonValue::Reader input,DynamicStruct::Builder output) const405 void JsonCodec::decode(JsonValue::Reader input, DynamicStruct::Builder output) const {
406   auto type = output.getSchema();
407 
408   KJ_IF_MAYBE(handler, impl->typeHandlers.find(type)) {
409     return (*handler)->decodeStructBase(*this, input, output);
410   }
411 
412   decodeObject(input, type, Orphanage::getForMessageContaining(output), output);
413 }
414 
decode(JsonValue::Reader input,Type type,Orphanage orphanage) const415 Orphan<DynamicValue> JsonCodec::decode(
416     JsonValue::Reader input, Type type, Orphanage orphanage) const {
417   KJ_IF_MAYBE(handler, impl->typeHandlers.find(type)) {
418     return (*handler)->decodeBase(*this, input, type, orphanage);
419   }
420 
421   switch(type.which()) {
422     case schema::Type::VOID:
423       return capnp::VOID;
424     case schema::Type::BOOL:
425       switch (input.which()) {
426         case JsonValue::BOOLEAN:
427           return input.getBoolean();
428         default:
429           KJ_FAIL_REQUIRE("Expected boolean value");
430       }
431     case schema::Type::INT8:
432     case schema::Type::INT16:
433     case schema::Type::INT32:
434     case schema::Type::INT64:
435       // Relies on range check in DynamicValue::Reader::as<IntType>
436       switch (input.which()) {
437         case JsonValue::NUMBER:
438           return input.getNumber();
439         case JsonValue::STRING:
440           return input.getString().parseAs<int64_t>();
441         default:
442           KJ_FAIL_REQUIRE("Expected integer value");
443       }
444     case schema::Type::UINT8:
445     case schema::Type::UINT16:
446     case schema::Type::UINT32:
447     case schema::Type::UINT64:
448       // Relies on range check in DynamicValue::Reader::as<IntType>
449       switch (input.which()) {
450         case JsonValue::NUMBER:
451           return input.getNumber();
452         case JsonValue::STRING:
453           return input.getString().parseAs<uint64_t>();
454         default:
455           KJ_FAIL_REQUIRE("Expected integer value");
456       }
457     case schema::Type::FLOAT32:
458     case schema::Type::FLOAT64:
459       switch (input.which()) {
460         case JsonValue::NULL_:
461           return kj::nan();
462         case JsonValue::NUMBER:
463           return input.getNumber();
464         case JsonValue::STRING:
465           return input.getString().parseAs<double>();
466         default:
467           KJ_FAIL_REQUIRE("Expected float value");
468       }
469     case schema::Type::TEXT:
470       switch (input.which()) {
471         case JsonValue::STRING:
472           return orphanage.newOrphanCopy(input.getString());
473         default:
474           KJ_FAIL_REQUIRE("Expected text value");
475       }
476     case schema::Type::DATA:
477       switch (input.which()) {
478         case JsonValue::ARRAY: {
479           auto array = input.getArray();
480           auto orphan = orphanage.newOrphan<Data>(array.size());
481           auto data = orphan.get();
482           for (auto i: kj::indices(array)) {
483             auto x = array[i].getNumber();
484             KJ_REQUIRE(byte(x) == x, "Number in byte array is not an integer in [0, 255]");
485             data[i] = x;
486           }
487           return kj::mv(orphan);
488         }
489         default:
490           KJ_FAIL_REQUIRE("Expected data value");
491       }
492     case schema::Type::LIST:
493       switch (input.which()) {
494         case JsonValue::ARRAY:
495           return decodeArray(input.getArray(), type.asList(), orphanage);
496         default:
497           KJ_FAIL_REQUIRE("Expected list value") { break; }
498           return orphanage.newOrphan(type.asList(), 0);
499       }
500     case schema::Type::ENUM:
501       switch (input.which()) {
502         case JsonValue::STRING:
503           return DynamicEnum(type.asEnum().getEnumerantByName(input.getString()));
504         default:
505           KJ_FAIL_REQUIRE("Expected enum value") { break; }
506           return DynamicEnum(type.asEnum(), 0);
507       }
508     case schema::Type::STRUCT: {
509       auto structType = type.asStruct();
510       auto orphan = orphanage.newOrphan(structType);
511       decodeObject(input, structType, orphanage, orphan.get());
512       return kj::mv(orphan);
513     }
514     case schema::Type::INTERFACE:
515       KJ_FAIL_REQUIRE("don't know how to JSON-decode capabilities; "
516                       "please register a JsonCodec::Handler for this");
517     case schema::Type::ANY_POINTER:
518       KJ_FAIL_REQUIRE("don't know how to JSON-decode AnyPointer; "
519                       "please register a JsonCodec::Handler for this");
520   }
521 
522   KJ_CLANG_KNOWS_THIS_IS_UNREACHABLE_BUT_GCC_DOESNT;
523 }
524 
525 // -----------------------------------------------------------------------------
526 
527 namespace {
528 
529 class Input {
530 public:
Input(kj::ArrayPtr<const char> input)531   Input(kj::ArrayPtr<const char> input) : wrapped(input) {}
532 
exhausted()533   bool exhausted() {
534     return wrapped.size() == 0 || wrapped.front() == '\0';
535   }
536 
nextChar()537   char nextChar() {
538     KJ_REQUIRE(!exhausted(), "JSON message ends prematurely.");
539     return wrapped.front();
540   }
541 
advance(size_t numBytes=1)542   void advance(size_t numBytes = 1) {
543     KJ_REQUIRE(numBytes <= wrapped.size(), "JSON message ends prematurely.");
544     wrapped = kj::arrayPtr(wrapped.begin() + numBytes, wrapped.end());
545   }
546 
advanceTo(const char * newPos)547   void advanceTo(const char *newPos) {
548     KJ_REQUIRE(wrapped.begin() <= newPos && newPos < wrapped.end(),
549         "JSON message ends prematurely.");
550     wrapped = kj::arrayPtr(newPos, wrapped.end());
551   }
552 
consume(size_t numBytes=1)553   kj::ArrayPtr<const char> consume(size_t numBytes = 1) {
554     auto originalPos = wrapped.begin();
555     advance(numBytes);
556 
557     return kj::arrayPtr(originalPos, wrapped.begin());
558   }
559 
consume(char expected)560   void consume(char expected) {
561     char current = nextChar();
562     KJ_REQUIRE(current == expected, "Unexpected input in JSON message.");
563 
564     advance();
565   }
566 
consume(kj::ArrayPtr<const char> expected)567   void consume(kj::ArrayPtr<const char> expected) {
568     KJ_REQUIRE(wrapped.size() >= expected.size());
569 
570     auto prefix = wrapped.slice(0, expected.size());
571     KJ_REQUIRE(prefix == expected, "Unexpected input in JSON message.");
572 
573     advance(expected.size());
574   }
575 
tryConsume(char expected)576   bool tryConsume(char expected) {
577     bool found = !exhausted() && nextChar() == expected;
578     if (found) { advance(); }
579 
580     return found;
581   }
582 
583   template <typename Predicate>
consumeOne(Predicate && predicate)584   void consumeOne(Predicate&& predicate) {
585     char current = nextChar();
586     KJ_REQUIRE(predicate(current), "Unexpected input in JSON message.");
587 
588     advance();
589   }
590 
591   template <typename Predicate>
consumeWhile(Predicate && predicate)592   kj::ArrayPtr<const char> consumeWhile(Predicate&& predicate) {
593     auto originalPos = wrapped.begin();
594     while (!exhausted() && predicate(nextChar())) { advance(); }
595 
596     return kj::arrayPtr(originalPos, wrapped.begin());
597   }
598 
599   template <typename F>  // Function<void(Input&)>
consumeCustom(F && f)600   kj::ArrayPtr<const char> consumeCustom(F&& f) {
601     // Allows consuming in a custom manner without exposing the wrapped ArrayPtr.
602     auto originalPos = wrapped.begin();
603     f(*this);
604 
605     return kj::arrayPtr(originalPos, wrapped.begin());
606   }
607 
consumeWhitespace()608   void consumeWhitespace() {
609     consumeWhile([](char chr) {
610       return (
611         chr == ' '  ||
612         chr == '\n' ||
613         chr == '\r' ||
614         chr == '\t'
615       );
616     });
617   }
618 
619 
620 private:
621   kj::ArrayPtr<const char> wrapped;
622 
623 };  // class Input
624 
625 class Parser {
626 public:
Parser(size_t maxNestingDepth,kj::ArrayPtr<const char> input)627   Parser(size_t maxNestingDepth, kj::ArrayPtr<const char> input) :
628     maxNestingDepth(maxNestingDepth), input(input), nestingDepth(0) {}
629 
parseValue(JsonValue::Builder & output)630   void parseValue(JsonValue::Builder& output) {
631     input.consumeWhitespace();
632     KJ_DEFER(input.consumeWhitespace());
633 
634     KJ_REQUIRE(!input.exhausted(), "JSON message ends prematurely.");
635 
636     switch (input.nextChar()) {
637       case 'n': input.consume(kj::StringPtr("null"));  output.setNull();         break;
638       case 'f': input.consume(kj::StringPtr("false")); output.setBoolean(false); break;
639       case 't': input.consume(kj::StringPtr("true"));  output.setBoolean(true);  break;
640       case '"': parseString(output); break;
641       case '[': parseArray(output);  break;
642       case '{': parseObject(output); break;
643       case '-': case '0': case '1': case '2': case '3':
644       case '4': case '5': case '6': case '7': case '8':
645       case '9': parseNumber(output); break;
646       default: KJ_FAIL_REQUIRE("Unexpected input in JSON message.");
647     }
648   }
649 
parseNumber(JsonValue::Builder & output)650   void parseNumber(JsonValue::Builder& output) {
651     output.setNumber(consumeNumber().parseAs<double>());
652   }
653 
parseString(JsonValue::Builder & output)654   void parseString(JsonValue::Builder& output) {
655     output.setString(consumeQuotedString());
656   }
657 
parseArray(JsonValue::Builder & output)658   void parseArray(JsonValue::Builder& output) {
659     // TODO(perf): Using orphans leaves holes in the message. It's expected
660     // that a JsonValue is used for interop, and won't be sent or written as a
661     // Cap'n Proto message.  This also applies to parseObject below.
662     kj::Vector<Orphan<JsonValue>> values;
663     auto orphanage = Orphanage::getForMessageContaining(output);
664     bool expectComma = false;
665 
666     input.consume('[');
667     KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
668     KJ_DEFER(--nestingDepth);
669 
670     while (input.consumeWhitespace(), input.nextChar() != ']') {
671       auto orphan = orphanage.newOrphan<JsonValue>();
672       auto builder = orphan.get();
673 
674       if (expectComma) {
675         input.consumeWhitespace();
676         input.consume(',');
677         input.consumeWhitespace();
678       }
679 
680       parseValue(builder);
681       values.add(kj::mv(orphan));
682 
683       expectComma = true;
684     }
685 
686     output.initArray(values.size());
687     auto array = output.getArray();
688 
689     for (auto i : kj::indices(values)) {
690       array.adoptWithCaveats(i, kj::mv(values[i]));
691     }
692 
693     input.consume(']');
694   }
695 
parseObject(JsonValue::Builder & output)696   void parseObject(JsonValue::Builder& output) {
697     kj::Vector<Orphan<JsonValue::Field>> fields;
698     auto orphanage = Orphanage::getForMessageContaining(output);
699     bool expectComma = false;
700 
701     input.consume('{');
702     KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
703     KJ_DEFER(--nestingDepth);
704 
705     while (input.consumeWhitespace(), input.nextChar() != '}') {
706       auto orphan = orphanage.newOrphan<JsonValue::Field>();
707       auto builder = orphan.get();
708 
709       if (expectComma) {
710         input.consumeWhitespace();
711         input.consume(',');
712         input.consumeWhitespace();
713       }
714 
715       builder.setName(consumeQuotedString());
716 
717       input.consumeWhitespace();
718       input.consume(':');
719       input.consumeWhitespace();
720 
721       auto valueBuilder = builder.getValue();
722       parseValue(valueBuilder);
723 
724       fields.add(kj::mv(orphan));
725 
726       expectComma = true;
727     }
728 
729     output.initObject(fields.size());
730     auto object = output.getObject();
731 
732     for (auto i : kj::indices(fields)) {
733       object.adoptWithCaveats(i, kj::mv(fields[i]));
734     }
735 
736     input.consume('}');
737   }
738 
inputExhausted()739   bool inputExhausted() { return input.exhausted(); }
740 
741 private:
consumeQuotedString()742   kj::String consumeQuotedString() {
743     input.consume('"');
744     // TODO(perf): Avoid copy / alloc if no escapes encoutered.
745     // TODO(perf): Get statistics on string size and preallocate?
746     kj::Vector<char> decoded;
747 
748     do {
749       auto stringValue = input.consumeWhile([](const char chr) {
750           return chr != '"' && chr != '\\';
751       });
752 
753       decoded.addAll(stringValue);
754 
755       if (input.nextChar() == '\\') {  // handle escapes.
756         input.advance();
757         switch(input.nextChar()) {
758           case '"' : decoded.add('"' ); input.advance(); break;
759           case '\\': decoded.add('\\'); input.advance(); break;
760           case '/' : decoded.add('/' ); input.advance(); break;
761           case 'b' : decoded.add('\b'); input.advance(); break;
762           case 'f' : decoded.add('\f'); input.advance(); break;
763           case 'n' : decoded.add('\n'); input.advance(); break;
764           case 'r' : decoded.add('\r'); input.advance(); break;
765           case 't' : decoded.add('\t'); input.advance(); break;
766           case 'u' :
767             input.consume('u');
768             unescapeAndAppend(input.consume(size_t(4)), decoded);
769             break;
770           default: KJ_FAIL_REQUIRE("Invalid escape in JSON string."); break;
771         }
772       }
773 
774     } while(input.nextChar() != '"');
775 
776     input.consume('"');
777     decoded.add('\0');
778 
779     // TODO(perf): This copy can be eliminated, but I can't find the kj::wayToDoIt();
780     return kj::String(decoded.releaseAsArray());
781   }
782 
consumeNumber()783   kj::String consumeNumber() {
784     auto numArrayPtr = input.consumeCustom([](Input& input) {
785       input.tryConsume('-');
786       if (!input.tryConsume('0')) {
787         input.consumeOne([](char c) { return '1' <= c && c <= '9'; });
788         input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
789       }
790 
791       if (input.tryConsume('.')) {
792         input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
793       }
794 
795       if (input.tryConsume('e') || input.tryConsume('E')) {
796         input.tryConsume('+') || input.tryConsume('-');
797         input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
798       }
799     });
800 
801     KJ_REQUIRE(numArrayPtr.size() > 0, "Expected number in JSON input.");
802 
803     kj::Vector<char> number;
804     number.addAll(numArrayPtr);
805     number.add('\0');
806 
807     return kj::String(number.releaseAsArray());
808   }
809 
810   // TODO(someday): This "interface" is ugly, and won't work if/when surrogates are handled.
unescapeAndAppend(kj::ArrayPtr<const char> hex,kj::Vector<char> & target)811   void unescapeAndAppend(kj::ArrayPtr<const char> hex, kj::Vector<char>& target) {
812     KJ_REQUIRE(hex.size() == 4);
813     int codePoint = 0;
814 
815     for (int i = 0; i < 4; ++i) {
816       char c = hex[i];
817       codePoint <<= 4;
818 
819       if ('0' <= c && c <= '9') {
820         codePoint |= c - '0';
821       } else if ('a' <= c && c <= 'f') {
822         codePoint |= c - 'a';
823       } else if ('A' <= c && c <= 'F') {
824         codePoint |= c - 'A';
825       } else {
826         KJ_FAIL_REQUIRE("Invalid hex digit in unicode escape.", c);
827       }
828     }
829 
830     if (codePoint < 128) {
831       target.add(0x7f & static_cast<char>(codePoint));
832     } else {
833       // TODO(perf): This is sorta malloc-heavy...
834       char16_t u = codePoint;
835       target.addAll(kj::decodeUtf16(kj::arrayPtr(&u, 1)));
836     }
837   }
838 
839   const size_t maxNestingDepth;
840   Input input;
841   size_t nestingDepth;
842 
843 
844 };  // class Parser
845 
846 }  // namespace
847 
848 
decodeRaw(kj::ArrayPtr<const char> input,JsonValue::Builder output) const849 void JsonCodec::decodeRaw(kj::ArrayPtr<const char> input, JsonValue::Builder output) const {
850   Parser parser(impl->maxNestingDepth, input);
851   parser.parseValue(output);
852 
853   KJ_REQUIRE(parser.inputExhausted(), "Input remains after parsing JSON.");
854 }
855 
856 // -----------------------------------------------------------------------------
857 
decodeBase(const JsonCodec & codec,JsonValue::Reader input,Type type,Orphanage orphanage) const858 Orphan<DynamicValue> JsonCodec::HandlerBase::decodeBase(
859     const JsonCodec& codec, JsonValue::Reader input, Type type, Orphanage orphanage) const {
860   KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
861 }
decodeStructBase(const JsonCodec & codec,JsonValue::Reader input,DynamicStruct::Builder output) const862 void JsonCodec::HandlerBase::decodeStructBase(
863     const JsonCodec& codec, JsonValue::Reader input, DynamicStruct::Builder output) const {
864   KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
865 }
866 
addTypeHandlerImpl(Type type,HandlerBase & handler)867 void JsonCodec::addTypeHandlerImpl(Type type, HandlerBase& handler) {
868   impl->typeHandlers.upsert(type, &handler, [](HandlerBase*& existing, HandlerBase* replacement) {
869     KJ_REQUIRE(existing == replacement, "type already has a different registered handler");
870   });
871 }
872 
addFieldHandlerImpl(StructSchema::Field field,Type type,HandlerBase & handler)873 void JsonCodec::addFieldHandlerImpl(StructSchema::Field field, Type type, HandlerBase& handler) {
874   KJ_REQUIRE(type == field.getType(),
875       "handler type did not match field type for addFieldHandler()");
876   impl->fieldHandlers.upsert(field, &handler, [](HandlerBase*& existing, HandlerBase* replacement) {
877     KJ_REQUIRE(existing == replacement, "field already has a different registered handler");
878   });
879 }
880 
881 // =======================================================================================
882 
883 static constexpr uint64_t JSON_NAME_ANNOTATION_ID = 0xfa5b1fd61c2e7c3dull;
884 static constexpr uint64_t JSON_FLATTEN_ANNOTATION_ID = 0x82d3e852af0336bfull;
885 static constexpr uint64_t JSON_DISCRIMINATOR_ANNOTATION_ID = 0xcfa794e8d19a0162ull;
886 static constexpr uint64_t JSON_BASE64_ANNOTATION_ID = 0xd7d879450a253e4bull;
887 static constexpr uint64_t JSON_HEX_ANNOTATION_ID = 0xf061e22f0ae5c7b5ull;
888 
889 class JsonCodec::Base64Handler final: public JsonCodec::Handler<capnp::Data> {
890 public:
encode(const JsonCodec & codec,capnp::Data::Reader input,JsonValue::Builder output) const891   void encode(const JsonCodec& codec, capnp::Data::Reader input, JsonValue::Builder output) const {
892     output.setString(kj::encodeBase64(input));
893   }
894 
decode(const JsonCodec & codec,JsonValue::Reader input,Orphanage orphanage) const895   Orphan<capnp::Data> decode(const JsonCodec& codec, JsonValue::Reader input,
896                              Orphanage orphanage) const {
897     return orphanage.newOrphanCopy(capnp::Data::Reader(kj::decodeBase64(input.getString())));
898   }
899 };
900 
901 class JsonCodec::HexHandler final: public JsonCodec::Handler<capnp::Data> {
902 public:
encode(const JsonCodec & codec,capnp::Data::Reader input,JsonValue::Builder output) const903   void encode(const JsonCodec& codec, capnp::Data::Reader input, JsonValue::Builder output) const {
904     output.setString(kj::encodeHex(input));
905   }
906 
decode(const JsonCodec & codec,JsonValue::Reader input,Orphanage orphanage) const907   Orphan<capnp::Data> decode(const JsonCodec& codec, JsonValue::Reader input,
908                              Orphanage orphanage) const {
909     return orphanage.newOrphanCopy(capnp::Data::Reader(kj::decodeHex(input.getString())));
910   }
911 };
912 
913 class JsonCodec::AnnotatedHandler final: public JsonCodec::Handler<DynamicStruct> {
914 public:
AnnotatedHandler(JsonCodec & codec,StructSchema schema,kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,kj::Maybe<kj::StringPtr> unionDeclName,kj::Vector<Schema> & dependencies)915   AnnotatedHandler(JsonCodec& codec, StructSchema schema,
916                    kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,
917                    kj::Maybe<kj::StringPtr> unionDeclName,
918                    kj::Vector<Schema>& dependencies)
919       : schema(schema) {
920     auto schemaProto = schema.getProto();
921     auto typeName = schemaProto.getDisplayName();
922 
923     if (discriminator == nullptr) {
924       // There are two cases of unions:
925       // * Named unions, which are special cases of named groups. In this case, the union may be
926       //   annotated by annotating the field. In this case, we receive a non-null `discriminator`
927       //   as a constructor parameter, and schemaProto.getAnnotations() must be empty because
928       //   it's not possible to annotate a group's type (because the type is anonymous).
929       // * Unnamed unions, of which there can only be one in any particular scope. In this case,
930       //   the parent struct type itself is annotated.
931       // So if we received `null` as the constructor parameter, check for annotations on the struct
932       // type.
933       for (auto anno: schemaProto.getAnnotations()) {
934         switch (anno.getId()) {
935           case JSON_DISCRIMINATOR_ANNOTATION_ID:
936             discriminator = anno.getValue().getStruct().getAs<json::DiscriminatorOptions>();
937             break;
938         }
939       }
940     }
941 
942     KJ_IF_MAYBE(d, discriminator) {
943       if (d->hasName()) {
944         unionTagName = d->getName();
945       } else {
946         unionTagName = unionDeclName;
947       }
948       KJ_IF_MAYBE(u, unionTagName) {
949         fieldsByName.insert(*u, FieldNameInfo {
950           FieldNameInfo::UNION_TAG, 0, 0, nullptr
951         });
952       }
953 
954       if (d->hasValueName()) {
955         fieldsByName.insert(d->getValueName(), FieldNameInfo {
956           FieldNameInfo::UNION_VALUE, 0, 0, nullptr
957         });
958       }
959     }
960 
961     discriminantOffset = schemaProto.getStruct().getDiscriminantOffset();
962 
963     fields = KJ_MAP(field, schema.getFields()) {
964       auto fieldProto = field.getProto();
965       auto type = field.getType();
966       auto fieldName = fieldProto.getName();
967 
968       FieldNameInfo nameInfo;
969       nameInfo.index = field.getIndex();
970       nameInfo.type = FieldNameInfo::NORMAL;
971       nameInfo.prefixLength = 0;
972 
973       FieldInfo info;
974       info.name = fieldName;
975 
976       kj::Maybe<json::DiscriminatorOptions::Reader> subDiscriminator;
977       bool flattened = false;
978       for (auto anno: field.getProto().getAnnotations()) {
979         switch (anno.getId()) {
980           case JSON_NAME_ANNOTATION_ID:
981             info.name = anno.getValue().getText();
982             break;
983           case JSON_FLATTEN_ANNOTATION_ID:
984             KJ_REQUIRE(type.isStruct(), "only struct types can be flattened", fieldName, typeName);
985             flattened = true;
986             info.prefix = anno.getValue().getStruct().getAs<json::FlattenOptions>().getPrefix();
987             break;
988           case JSON_DISCRIMINATOR_ANNOTATION_ID:
989             KJ_REQUIRE(fieldProto.isGroup(), "only unions can have discriminator");
990             subDiscriminator = anno.getValue().getStruct().getAs<json::DiscriminatorOptions>();
991             break;
992           case JSON_BASE64_ANNOTATION_ID: {
993             KJ_REQUIRE(field.getType().isData(), "only Data can be marked for base64 encoding");
994             static Base64Handler handler;
995             codec.addFieldHandler(field, handler);
996             break;
997           }
998           case JSON_HEX_ANNOTATION_ID: {
999             KJ_REQUIRE(field.getType().isData(), "only Data can be marked for hex encoding");
1000             static HexHandler handler;
1001             codec.addFieldHandler(field, handler);
1002             break;
1003           }
1004         }
1005       }
1006 
1007       if (fieldProto.isGroup()) {
1008         // Load group type handler now, even if not flattened, so that we can pass its
1009         // `subDiscriminator`.
1010         kj::Maybe<kj::StringPtr> subFieldName;
1011         if (flattened) {
1012           // If the group was flattened, then we allow its field name to be used as the
1013           // discriminator name, so that the discriminator doesn't have to explicitly specify a
1014           // name.
1015           subFieldName = fieldName;
1016         }
1017         auto& subHandler = codec.loadAnnotatedHandler(
1018             type.asStruct(), subDiscriminator, subFieldName, dependencies);
1019         if (flattened) {
1020           info.flattenHandler = subHandler;
1021         }
1022       } else if (type.isStruct()) {
1023         if (flattened) {
1024           info.flattenHandler = codec.loadAnnotatedHandler(
1025               type.asStruct(), nullptr, nullptr, dependencies);
1026         }
1027       }
1028 
1029       bool isUnionMember = fieldProto.getDiscriminantValue() != schema::Field::NO_DISCRIMINANT;
1030 
1031       KJ_IF_MAYBE(fh, info.flattenHandler) {
1032         // Set up fieldsByName for each of the child's fields.
1033         for (auto& entry: fh->fieldsByName) {
1034           kj::StringPtr flattenedName;
1035           kj::String ownName;
1036           if (info.prefix.size() > 0) {
1037             ownName = kj::str(info.prefix, entry.key);
1038             flattenedName = ownName;
1039           } else {
1040             flattenedName = entry.key;
1041           }
1042 
1043           fieldsByName.upsert(flattenedName, FieldNameInfo {
1044             isUnionMember ? FieldNameInfo::FLATTENED_FROM_UNION : FieldNameInfo::FLATTENED,
1045             field.getIndex(), (uint)info.prefix.size(), kj::mv(ownName)
1046           }, [&](FieldNameInfo& existing, FieldNameInfo&& replacement) {
1047             KJ_REQUIRE(existing.type == FieldNameInfo::FLATTENED_FROM_UNION &&
1048                        replacement.type == FieldNameInfo::FLATTENED_FROM_UNION,
1049                 "flattened members have the same name and are not mutually exclusive");
1050           });
1051         }
1052       }
1053 
1054       info.nameForDiscriminant = info.name;
1055 
1056       if (!flattened) {
1057         bool isUnionWithValueName = false;
1058         if (isUnionMember) {
1059           KJ_IF_MAYBE(d, discriminator) {
1060             if (d->hasValueName()) {
1061               info.name = d->getValueName();
1062               isUnionWithValueName = true;
1063             }
1064           }
1065         }
1066 
1067         if (!isUnionWithValueName) {
1068           fieldsByName.insert(info.name, kj::mv(nameInfo));
1069         }
1070       }
1071 
1072       if (isUnionMember) {
1073         unionTagValues.insert(info.nameForDiscriminant, field);
1074       }
1075 
1076       // Look for dependencies that we need to add.
1077       while (type.isList()) type = type.asList().getElementType();
1078       if (codec.impl->typeHandlers.find(type) == nullptr) {
1079         switch (type.which()) {
1080           case schema::Type::STRUCT:
1081             dependencies.add(type.asStruct());
1082             break;
1083           case schema::Type::ENUM:
1084             dependencies.add(type.asEnum());
1085             break;
1086           case schema::Type::INTERFACE:
1087             dependencies.add(type.asInterface());
1088             break;
1089           default:
1090             break;
1091         }
1092       }
1093 
1094       return info;
1095     };
1096   }
1097 
1098   const StructSchema schema;
1099 
encode(const JsonCodec & codec,DynamicStruct::Reader input,JsonValue::Builder output) const1100   void encode(const JsonCodec& codec, DynamicStruct::Reader input,
1101               JsonValue::Builder output) const override {
1102     kj::Vector<FlattenedField> flattenedFields;
1103     gatherForEncode(codec, input, nullptr, nullptr, flattenedFields);
1104 
1105     auto outs = output.initObject(flattenedFields.size());
1106     for (auto i: kj::indices(flattenedFields)) {
1107       auto& in = flattenedFields[i];
1108       auto out = outs[i];
1109       out.setName(in.name);
1110       KJ_SWITCH_ONEOF(in.type) {
1111         KJ_CASE_ONEOF(type, Type) {
1112           codec.encode(in.value, type, out.initValue());
1113         }
1114         KJ_CASE_ONEOF(field, StructSchema::Field) {
1115           codec.encodeField(field, in.value, out.initValue());
1116         }
1117       }
1118     }
1119   }
1120 
decode(const JsonCodec & codec,JsonValue::Reader input,DynamicStruct::Builder output) const1121   void decode(const JsonCodec& codec, JsonValue::Reader input,
1122               DynamicStruct::Builder output) const override {
1123     KJ_REQUIRE(input.isObject());
1124     kj::HashSet<const void*> unionsSeen;
1125     kj::Vector<JsonValue::Field::Reader> retries;
1126     for (auto field: input.getObject()) {
1127       if (!decodeField(codec, field.getName(), field.getValue(), output, unionsSeen)) {
1128         retries.add(field);
1129       }
1130     }
1131     while (!retries.empty()) {
1132       auto retriesCopy = kj::mv(retries);
1133       KJ_ASSERT(retries.empty());
1134       for (auto field: retriesCopy) {
1135         if (!decodeField(codec, field.getName(), field.getValue(), output, unionsSeen)) {
1136           retries.add(field);
1137         }
1138       }
1139       if (retries.size() == retriesCopy.size()) {
1140         // We made no progress in this iteration. Give up on the remaining fields.
1141         break;
1142       }
1143     }
1144   }
1145 
1146 private:
1147   struct FieldInfo {
1148     kj::StringPtr name;
1149     kj::StringPtr nameForDiscriminant;
1150     kj::Maybe<const AnnotatedHandler&> flattenHandler;
1151     kj::StringPtr prefix;
1152   };
1153 
1154   kj::Array<FieldInfo> fields;
1155   // Maps field index -> info about the field
1156 
1157   struct FieldNameInfo {
1158     enum {
1159       NORMAL,
1160       // This is a normal field with the given `index`.
1161 
1162       FLATTENED,
1163       // This is a field of a flattened inner struct or group (that is not in a union). `index`
1164       // is the field index of the particular struct/group field.
1165 
1166       UNION_TAG,
1167       // The parent struct is a flattened union, and this field is the discriminant tag. It is a
1168       // string field whose name determines the union type. `index` is not used.
1169 
1170       FLATTENED_FROM_UNION,
1171       // The parent struct is a flattened union, and some of the union's members are flattened
1172       // structs or groups, and this field is possibly a member of one or more of them. `index`
1173       // is not used, because it's possible that the same field name appears in multiple variants.
1174       // Instead, the parser must find the union tag, and then can descend and attempt to parse
1175       // the field in the context of whichever variant is selected.
1176 
1177       UNION_VALUE
1178       // This field is the value of a discriminated union that has `valueName` set.
1179     } type;
1180 
1181     uint index;
1182     // For `NORMAL` and `FLATTENED`, the index of the field in schema.getFields().
1183 
1184     uint prefixLength;
1185     kj::String ownName;
1186   };
1187 
1188   kj::HashMap<kj::StringPtr, FieldNameInfo> fieldsByName;
1189   // Maps JSON names to info needed to parse them.
1190 
1191   kj::HashMap<kj::StringPtr, StructSchema::Field> unionTagValues;
1192   // If the parent struct is a flattened union, it has a tag field which is a string with one of
1193   // these values. The map maps to the union member to set.
1194 
1195   kj::Maybe<kj::StringPtr> unionTagName;
1196   // If the parent struct is a flattened union, the name of the "tag" field.
1197 
1198   uint discriminantOffset;
1199   // Shortcut for schema.getProto().getStruct().getDiscriminantOffset(), used in a hack to identify
1200   // which unions have been seen.
1201 
1202   struct FlattenedField {
1203     kj::String ownName;
1204     kj::StringPtr name;
1205     kj::OneOf<StructSchema::Field, Type> type;
1206     DynamicValue::Reader value;
1207 
FlattenedFieldcapnp::JsonCodec::AnnotatedHandler::FlattenedField1208     FlattenedField(kj::StringPtr prefix, kj::StringPtr name,
1209                    kj::OneOf<StructSchema::Field, Type> type, DynamicValue::Reader value)
1210         : ownName(prefix.size() > 0 ? kj::str(prefix, name) : nullptr),
1211           name(prefix.size() > 0 ? ownName : name),
1212           type(type), value(value) {}
1213   };
1214 
gatherForEncode(const JsonCodec & codec,DynamicValue::Reader input,kj::StringPtr prefix,kj::StringPtr morePrefix,kj::Vector<FlattenedField> & flattenedFields) const1215   void gatherForEncode(const JsonCodec& codec, DynamicValue::Reader input,
1216                        kj::StringPtr prefix, kj::StringPtr morePrefix,
1217                        kj::Vector<FlattenedField>& flattenedFields) const {
1218     kj::String ownPrefix;
1219     if (morePrefix.size() > 0) {
1220       if (prefix.size() > 0) {
1221         ownPrefix = kj::str(prefix, morePrefix);
1222         prefix = ownPrefix;
1223       } else {
1224         prefix = morePrefix;
1225       }
1226     }
1227 
1228     auto reader = input.as<DynamicStruct>();
1229     auto schema = reader.getSchema();
1230     for (auto field: schema.getNonUnionFields()) {
1231       auto& info = fields[field.getIndex()];
1232       if (!reader.has(field, codec.impl->hasMode)) {
1233         // skip
1234       } else KJ_IF_MAYBE(handler, info.flattenHandler) {
1235         handler->gatherForEncode(codec, reader.get(field), prefix, info.prefix, flattenedFields);
1236       } else {
1237         flattenedFields.add(FlattenedField {
1238             prefix, info.name, field, reader.get(field) });
1239       }
1240     }
1241 
1242     KJ_IF_MAYBE(which, reader.which()) {
1243       auto& info = fields[which->getIndex()];
1244       KJ_IF_MAYBE(tag, unionTagName) {
1245         flattenedFields.add(FlattenedField {
1246             prefix, *tag, Type(schema::Type::TEXT), Text::Reader(info.nameForDiscriminant) });
1247       }
1248 
1249       KJ_IF_MAYBE(handler, info.flattenHandler) {
1250         handler->gatherForEncode(codec, reader.get(*which), prefix, info.prefix, flattenedFields);
1251       } else {
1252         auto type = which->getType();
1253         if (type.which() == schema::Type::VOID && unionTagName != nullptr) {
1254           // When we have an explicit union discriminant, we don't need to encode void fields.
1255         } else {
1256           flattenedFields.add(FlattenedField {
1257               prefix, info.name, *which, reader.get(*which) });
1258         }
1259       }
1260     }
1261   }
1262 
decodeField(const JsonCodec & codec,kj::StringPtr name,JsonValue::Reader value,DynamicStruct::Builder output,kj::HashSet<const void * > & unionsSeen) const1263   bool decodeField(const JsonCodec& codec, kj::StringPtr name, JsonValue::Reader value,
1264                    DynamicStruct::Builder output, kj::HashSet<const void*>& unionsSeen) const {
1265     KJ_ASSERT(output.getSchema() == schema);
1266 
1267     KJ_IF_MAYBE(info, fieldsByName.find(name)) {
1268       switch (info->type) {
1269         case FieldNameInfo::NORMAL: {
1270           auto field = output.getSchema().getFields()[info->index];
1271           codec.decodeField(field, value, Orphanage::getForMessageContaining(output), output);
1272           return true;
1273         }
1274         case FieldNameInfo::FLATTENED:
1275           return KJ_ASSERT_NONNULL(fields[info->index].flattenHandler)
1276               .decodeField(codec, name.slice(info->prefixLength), value,
1277                   output.get(output.getSchema().getFields()[info->index]).as<DynamicStruct>(),
1278                   unionsSeen);
1279         case FieldNameInfo::UNION_TAG: {
1280           KJ_REQUIRE(value.isString(), "Expected string value.");
1281 
1282           // Mark that we've seen a union tag for this struct.
1283           const void* ptr = getUnionInstanceIdentifier(output);
1284           KJ_IF_MAYBE(field, unionTagValues.find(value.getString())) {
1285             // clear() has the side-effect of activating this member of the union, without
1286             // allocating any objects.
1287             output.clear(*field);
1288             unionsSeen.insert(ptr);
1289           }
1290           return true;
1291         }
1292         case FieldNameInfo::FLATTENED_FROM_UNION: {
1293           const void* ptr = getUnionInstanceIdentifier(output);
1294           if (unionsSeen.contains(ptr)) {
1295             auto variant = KJ_ASSERT_NONNULL(output.which());
1296             return KJ_ASSERT_NONNULL(fields[variant.getIndex()].flattenHandler)
1297                 .decodeField(codec, name.slice(info->prefixLength), value,
1298                     output.get(variant).as<DynamicStruct>(), unionsSeen);
1299           } else {
1300             // We haven't seen the union tag yet, so we can't parse this field yet. Try again later.
1301             return false;
1302           }
1303         }
1304         case FieldNameInfo::UNION_VALUE: {
1305           const void* ptr = getUnionInstanceIdentifier(output);
1306           if (unionsSeen.contains(ptr)) {
1307             auto variant = KJ_ASSERT_NONNULL(output.which());
1308             codec.decodeField(variant, value, Orphanage::getForMessageContaining(output), output);
1309             return true;
1310           } else {
1311             // We haven't seen the union tag yet, so we can't parse this field yet. Try again later.
1312             return false;
1313           }
1314         }
1315       }
1316 
1317       KJ_UNREACHABLE;
1318     } else {
1319       // Ignore undefined field.
1320       return true;
1321     }
1322   }
1323 
getUnionInstanceIdentifier(DynamicStruct::Builder obj) const1324   const void* getUnionInstanceIdentifier(DynamicStruct::Builder obj) const {
1325     // Gets a value uniquely identifying an instance of a union.
1326     // HACK: We return a poniter to the union's discriminant within the underlying buffer.
1327     return reinterpret_cast<const uint16_t*>(
1328         AnyStruct::Reader(obj.asReader()).getDataSection().begin()) + discriminantOffset;
1329   }
1330 };
1331 
1332 class JsonCodec::AnnotatedEnumHandler final: public JsonCodec::Handler<DynamicEnum> {
1333 public:
AnnotatedEnumHandler(EnumSchema schema)1334   AnnotatedEnumHandler(EnumSchema schema): schema(schema) {
1335     auto enumerants = schema.getEnumerants();
1336     auto builder = kj::heapArrayBuilder<kj::StringPtr>(enumerants.size());
1337 
1338     for (auto e: enumerants) {
1339       auto proto = e.getProto();
1340       kj::StringPtr name = proto.getName();
1341 
1342       for (auto anno: proto.getAnnotations()) {
1343         switch (anno.getId()) {
1344           case JSON_NAME_ANNOTATION_ID:
1345             name = anno.getValue().getText();
1346             break;
1347         }
1348       }
1349 
1350       builder.add(name);
1351       nameToValue.insert(name, e.getIndex());
1352     }
1353 
1354     valueToName = builder.finish();
1355   }
1356 
encode(const JsonCodec & codec,DynamicEnum input,JsonValue::Builder output) const1357   void encode(const JsonCodec& codec, DynamicEnum input, JsonValue::Builder output) const override {
1358     KJ_IF_MAYBE(e, input.getEnumerant()) {
1359       KJ_ASSERT(e->getIndex() < valueToName.size());
1360       output.setString(valueToName[e->getIndex()]);
1361     } else {
1362       output.setNumber(input.getRaw());
1363     }
1364   }
1365 
decode(const JsonCodec & codec,JsonValue::Reader input) const1366   DynamicEnum decode(const JsonCodec& codec, JsonValue::Reader input) const override {
1367     if (input.isNumber()) {
1368       return DynamicEnum(schema, static_cast<uint16_t>(input.getNumber()));
1369     } else {
1370       uint16_t val = KJ_REQUIRE_NONNULL(nameToValue.find(input.getString()),
1371           "invalid enum value", input.getString());
1372       return DynamicEnum(schema.getEnumerants()[val]);
1373     }
1374   }
1375 
1376 private:
1377   EnumSchema schema;
1378   kj::Array<kj::StringPtr> valueToName;
1379   kj::HashMap<kj::StringPtr, uint16_t> nameToValue;
1380 };
1381 
1382 class JsonCodec::JsonValueHandler final: public JsonCodec::Handler<DynamicStruct> {
1383 public:
encode(const JsonCodec & codec,DynamicStruct::Reader input,JsonValue::Builder output) const1384   void encode(const JsonCodec& codec, DynamicStruct::Reader input,
1385               JsonValue::Builder output) const override {
1386 #if _MSC_VER
1387     // TODO(msvc): Hack to work around missing AnyStruct::Builder constructor on MSVC.
1388     rawCopy(input, toDynamic(output));
1389 #else
1390     rawCopy(input, kj::mv(output));
1391 #endif
1392   }
1393 
decode(const JsonCodec & codec,JsonValue::Reader input,DynamicStruct::Builder output) const1394   void decode(const JsonCodec& codec, JsonValue::Reader input,
1395               DynamicStruct::Builder output) const override {
1396     rawCopy(input, kj::mv(output));
1397   }
1398 
1399 private:
rawCopy(AnyStruct::Reader input,AnyStruct::Builder output) const1400   void rawCopy(AnyStruct::Reader input, AnyStruct::Builder output) const {
1401     // HACK: Manually copy using AnyStruct, so that if JsonValue's definition changes, this code
1402     //   doesn't need to be updated. However, note that if JsonValue ever adds new fields that
1403     //   change its size, and the input struct is a newer version than the output, we may lose
1404     //   the new fields. Technically the "correct" thing to do would be to allocate the output
1405     //   struct to be exactly the same size as the input, but JsonCodec's Handler interface is
1406     //   not designed to allow that -- it passes in an already-allocated builder. Oops.
1407     auto dataIn = input.getDataSection();
1408     auto dataOut = output.getDataSection();
1409     memcpy(dataOut.begin(), dataIn.begin(), kj::min(dataOut.size(), dataIn.size()));
1410 
1411     auto ptrIn = input.getPointerSection();
1412     auto ptrOut = output.getPointerSection();
1413     for (auto i: kj::zeroTo(kj::min(ptrIn.size(), ptrOut.size()))) {
1414       ptrOut[i].set(ptrIn[i]);
1415     }
1416   }
1417 };
1418 
loadAnnotatedHandler(StructSchema schema,kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,kj::Maybe<kj::StringPtr> unionDeclName,kj::Vector<Schema> & dependencies)1419 JsonCodec::AnnotatedHandler& JsonCodec::loadAnnotatedHandler(
1420       StructSchema schema, kj::Maybe<json::DiscriminatorOptions::Reader> discriminator,
1421       kj::Maybe<kj::StringPtr> unionDeclName, kj::Vector<Schema>& dependencies) {
1422   auto& entry = impl->annotatedHandlers.upsert(schema, nullptr,
1423       [&](kj::Maybe<kj::Own<AnnotatedHandler>>& existing, auto dummy) {
1424     KJ_ASSERT(existing != nullptr,
1425         "cyclic JSON flattening detected", schema.getProto().getDisplayName());
1426   });
1427 
1428   KJ_IF_MAYBE(v, entry.value) {
1429     // Already exists.
1430     return **v;
1431   } else {
1432     // Not seen before.
1433     auto newHandler = kj::heap<AnnotatedHandler>(
1434           *this, schema, discriminator, unionDeclName, dependencies);
1435     auto& result = *newHandler;
1436 
1437     // Map may have changed, so we have to look up again.
1438     KJ_ASSERT_NONNULL(impl->annotatedHandlers.find(schema)) = kj::mv(newHandler);
1439 
1440     addTypeHandler(schema, result);
1441     return result;
1442   };
1443 }
1444 
handleByAnnotation(Schema schema)1445 void JsonCodec::handleByAnnotation(Schema schema) {
1446   switch (schema.getProto().which()) {
1447     case schema::Node::STRUCT: {
1448       if (schema.getProto().getId() == capnp::typeId<JsonValue>()) {
1449         // Special handler for JsonValue.
1450         static JsonValueHandler GLOBAL_HANDLER;
1451         addTypeHandler(schema.asStruct(), GLOBAL_HANDLER);
1452       } else {
1453         kj::Vector<Schema> dependencies;
1454         loadAnnotatedHandler(schema.asStruct(), nullptr, nullptr, dependencies);
1455         for (auto dep: dependencies) {
1456           handleByAnnotation(dep);
1457         }
1458       }
1459       break;
1460     }
1461     case schema::Node::ENUM: {
1462       auto enumSchema = schema.asEnum();
1463       impl->annotatedEnumHandlers.findOrCreate(enumSchema, [&]() {
1464         auto handler = kj::heap<AnnotatedEnumHandler>(enumSchema);
1465         addTypeHandler(enumSchema, *handler);
1466         return kj::HashMap<Type, kj::Own<AnnotatedEnumHandler>>::Entry {
1467             enumSchema, kj::mv(handler) };
1468       });
1469       break;
1470     }
1471     default:
1472       break;
1473   }
1474 }
1475 
1476 } // namespace capnp
1477