1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file supports working with JSON data.
11 ///
12 /// It comprises:
13 ///
14 /// - classes which hold dynamically-typed parsed JSON structures
15 ///   These are value types that can be composed, inspected, and modified.
16 ///   See json::Value, and the related types json::Object and json::Array.
17 ///
18 /// - functions to parse JSON text into Values, and to serialize Values to text.
19 ///   See parse(), operator<<, and format_provider.
20 ///
21 /// - a convention and helpers for mapping between json::Value and user-defined
22 ///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
23 ///
24 /// - an output API json::OStream which can emit JSON without materializing
25 ///   all structures as json::Value.
26 ///
27 /// Typically, JSON data would be read from an external source, parsed into
28 /// a Value, and then converted into some native data structure before doing
29 /// real work on it. (And vice versa when writing).
30 ///
31 /// Other serialization mechanisms you may consider:
32 ///
33 /// - YAML is also text-based, and more human-readable than JSON. It's a more
34 ///   complex format and data model, and YAML parsers aren't ubiquitous.
35 ///   YAMLParser.h is a streaming parser suitable for parsing large documents
36 ///   (including JSON, as YAML is a superset). It can be awkward to use
37 ///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38 ///   declarative than the toJSON/fromJSON conventions here.
39 ///
40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41 ///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
42 ///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
43 ///
44 //===---------------------------------------------------------------------===//
45 
46 #ifndef LLVM_SUPPORT_JSON_H
47 #define LLVM_SUPPORT_JSON_H
48 
49 #include "llvm/ADT/DenseMap.h"
50 #include "llvm/ADT/SmallVector.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/Support/Error.h"
53 #include "llvm/Support/FormatVariadic.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <map>
56 
57 namespace llvm {
58 namespace json {
59 
60 // === String encodings ===
61 //
62 // JSON strings are character sequences (not byte sequences like std::string).
63 // We need to know the encoding, and for simplicity only support UTF-8.
64 //
65 //   - When parsing, invalid UTF-8 is a syntax error like any other
66 //
67 //   - When creating Values from strings, callers must ensure they are UTF-8.
68 //        with asserts on, invalid UTF-8 will crash the program
69 //        with asserts off, we'll substitute the replacement character (U+FFFD)
70 //     Callers can use json::isUTF8() and json::fixUTF8() for validation.
71 //
72 //   - When retrieving strings from Values (e.g. asString()), the result will
73 //     always be valid UTF-8.
74 
75 /// Returns true if \p S is valid UTF-8, which is required for use as JSON.
76 /// If it returns false, \p Offset is set to a byte offset near the first error.
77 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
78 /// Replaces invalid UTF-8 sequences in \p S with the replacement character
79 /// (U+FFFD). The returned string is valid UTF-8.
80 /// This is much slower than isUTF8, so test that first.
81 std::string fixUTF8(llvm::StringRef S);
82 
83 class Array;
84 class ObjectKey;
85 class Value;
86 template <typename T> Value toJSON(const llvm::Optional<T> &Opt);
87 
88 /// An Object is a JSON object, which maps strings to heterogenous JSON values.
89 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
90 class Object {
91   using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
92   Storage M;
93 
94 public:
95   using key_type = ObjectKey;
96   using mapped_type = Value;
97   using value_type = Storage::value_type;
98   using iterator = Storage::iterator;
99   using const_iterator = Storage::const_iterator;
100 
101   Object() = default;
102   // KV is a trivial key-value struct for list-initialization.
103   // (using std::pair forces extra copies).
104   struct KV;
105   explicit Object(std::initializer_list<KV> Properties);
106 
begin()107   iterator begin() { return M.begin(); }
begin()108   const_iterator begin() const { return M.begin(); }
end()109   iterator end() { return M.end(); }
end()110   const_iterator end() const { return M.end(); }
111 
empty()112   bool empty() const { return M.empty(); }
size()113   size_t size() const { return M.size(); }
114 
clear()115   void clear() { M.clear(); }
116   std::pair<iterator, bool> insert(KV E);
117   template <typename... Ts>
try_emplace(const ObjectKey & K,Ts &&...Args)118   std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
119     return M.try_emplace(K, std::forward<Ts>(Args)...);
120   }
121   template <typename... Ts>
try_emplace(ObjectKey && K,Ts &&...Args)122   std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
123     return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
124   }
125   bool erase(StringRef K);
erase(iterator I)126   void erase(iterator I) { M.erase(I); }
127 
find(StringRef K)128   iterator find(StringRef K) { return M.find_as(K); }
find(StringRef K)129   const_iterator find(StringRef K) const { return M.find_as(K); }
130   // operator[] acts as if Value was default-constructible as null.
131   Value &operator[](const ObjectKey &K);
132   Value &operator[](ObjectKey &&K);
133   // Look up a property, returning nullptr if it doesn't exist.
134   Value *get(StringRef K);
135   const Value *get(StringRef K) const;
136   // Typed accessors return None/nullptr if
137   //   - the property doesn't exist
138   //   - or it has the wrong type
139   llvm::Optional<std::nullptr_t> getNull(StringRef K) const;
140   llvm::Optional<bool> getBoolean(StringRef K) const;
141   llvm::Optional<double> getNumber(StringRef K) const;
142   llvm::Optional<int64_t> getInteger(StringRef K) const;
143   llvm::Optional<llvm::StringRef> getString(StringRef K) const;
144   const json::Object *getObject(StringRef K) const;
145   json::Object *getObject(StringRef K);
146   const json::Array *getArray(StringRef K) const;
147   json::Array *getArray(StringRef K);
148 };
149 bool operator==(const Object &LHS, const Object &RHS);
150 inline bool operator!=(const Object &LHS, const Object &RHS) {
151   return !(LHS == RHS);
152 }
153 
154 /// An Array is a JSON array, which contains heterogeneous JSON values.
155 /// It simulates std::vector<Value>.
156 class Array {
157   std::vector<Value> V;
158 
159 public:
160   using value_type = Value;
161   using iterator = std::vector<Value>::iterator;
162   using const_iterator = std::vector<Value>::const_iterator;
163 
164   Array() = default;
165   explicit Array(std::initializer_list<Value> Elements);
Array(const Collection & C)166   template <typename Collection> explicit Array(const Collection &C) {
167     for (const auto &V : C)
168       emplace_back(V);
169   }
170 
171   Value &operator[](size_t I) { return V[I]; }
172   const Value &operator[](size_t I) const { return V[I]; }
front()173   Value &front() { return V.front(); }
front()174   const Value &front() const { return V.front(); }
back()175   Value &back() { return V.back(); }
back()176   const Value &back() const { return V.back(); }
data()177   Value *data() { return V.data(); }
data()178   const Value *data() const { return V.data(); }
179 
begin()180   iterator begin() { return V.begin(); }
begin()181   const_iterator begin() const { return V.begin(); }
end()182   iterator end() { return V.end(); }
end()183   const_iterator end() const { return V.end(); }
184 
empty()185   bool empty() const { return V.empty(); }
size()186   size_t size() const { return V.size(); }
reserve(size_t S)187   void reserve(size_t S) { V.reserve(S); }
188 
clear()189   void clear() { V.clear(); }
push_back(const Value & E)190   void push_back(const Value &E) { V.push_back(E); }
push_back(Value && E)191   void push_back(Value &&E) { V.push_back(std::move(E)); }
emplace_back(Args &&...A)192   template <typename... Args> void emplace_back(Args &&... A) {
193     V.emplace_back(std::forward<Args>(A)...);
194   }
pop_back()195   void pop_back() { V.pop_back(); }
196   // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees.
insert(iterator P,const Value & E)197   iterator insert(iterator P, const Value &E) { return V.insert(P, E); }
insert(iterator P,Value && E)198   iterator insert(iterator P, Value &&E) {
199     return V.insert(P, std::move(E));
200   }
insert(iterator P,It A,It Z)201   template <typename It> iterator insert(iterator P, It A, It Z) {
202     return V.insert(P, A, Z);
203   }
emplace(const_iterator P,Args &&...A)204   template <typename... Args> iterator emplace(const_iterator P, Args &&... A) {
205     return V.emplace(P, std::forward<Args>(A)...);
206   }
207 
208   friend bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
209 };
210 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
211 
212 /// A Value is an JSON value of unknown type.
213 /// They can be copied, but should generally be moved.
214 ///
215 /// === Composing values ===
216 ///
217 /// You can implicitly construct Values from:
218 ///   - strings: std::string, SmallString, formatv, StringRef, char*
219 ///              (char*, and StringRef are references, not copies!)
220 ///   - numbers
221 ///   - booleans
222 ///   - null: nullptr
223 ///   - arrays: {"foo", 42.0, false}
224 ///   - serializable things: types with toJSON(const T&)->Value, found by ADL
225 ///
226 /// They can also be constructed from object/array helpers:
227 ///   - json::Object is a type like map<ObjectKey, Value>
228 ///   - json::Array is a type like vector<Value>
229 /// These can be list-initialized, or used to build up collections in a loop.
230 /// json::ary(Collection) converts all items in a collection to Values.
231 ///
232 /// === Inspecting values ===
233 ///
234 /// Each Value is one of the JSON kinds:
235 ///   null    (nullptr_t)
236 ///   boolean (bool)
237 ///   number  (double or int64)
238 ///   string  (StringRef)
239 ///   array   (json::Array)
240 ///   object  (json::Object)
241 ///
242 /// The kind can be queried directly, or implicitly via the typed accessors:
243 ///   if (Optional<StringRef> S = E.getAsString()
244 ///     assert(E.kind() == Value::String);
245 ///
246 /// Array and Object also have typed indexing accessors for easy traversal:
247 ///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
248 ///   if (Object* O = E->getAsObject())
249 ///     if (Object* Opts = O->getObject("options"))
250 ///       if (Optional<StringRef> Font = Opts->getString("font"))
251 ///         assert(Opts->at("font").kind() == Value::String);
252 ///
253 /// === Converting JSON values to C++ types ===
254 ///
255 /// The convention is to have a deserializer function findable via ADL:
256 ///     fromJSON(const json::Value&, T&, Path) -> bool
257 ///
258 /// The return value indicates overall success, and Path is used for precise
259 /// error reporting. (The Path::Root passed in at the top level fromJSON call
260 /// captures any nested error and can render it in context).
261 /// If conversion fails, fromJSON calls Path::report() and immediately returns.
262 /// This ensures that the first fatal error survives.
263 ///
264 /// Deserializers are provided for:
265 ///   - bool
266 ///   - int and int64_t
267 ///   - double
268 ///   - std::string
269 ///   - vector<T>, where T is deserializable
270 ///   - map<string, T>, where T is deserializable
271 ///   - Optional<T>, where T is deserializable
272 /// ObjectMapper can help writing fromJSON() functions for object types.
273 ///
274 /// For conversion in the other direction, the serializer function is:
275 ///    toJSON(const T&) -> json::Value
276 /// If this exists, then it also allows constructing Value from T, and can
277 /// be used to serialize vector<T>, map<string, T>, and Optional<T>.
278 ///
279 /// === Serialization ===
280 ///
281 /// Values can be serialized to JSON:
282 ///   1) raw_ostream << Value                    // Basic formatting.
283 ///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
284 ///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
285 ///
286 /// And parsed:
287 ///   Expected<Value> E = json::parse("[1, 2, null]");
288 ///   assert(E && E->kind() == Value::Array);
289 class Value {
290 public:
291   enum Kind {
292     Null,
293     Boolean,
294     /// Number values can store both int64s and doubles at full precision,
295     /// depending on what they were constructed/parsed from.
296     Number,
297     String,
298     Array,
299     Object,
300   };
301 
302   // It would be nice to have Value() be null. But that would make {} null too.
Value(const Value & M)303   Value(const Value &M) { copyFrom(M); }
Value(Value && M)304   Value(Value &&M) { moveFrom(std::move(M)); }
305   Value(std::initializer_list<Value> Elements);
Value(json::Array && Elements)306   Value(json::Array &&Elements) : Type(T_Array) {
307     create<json::Array>(std::move(Elements));
308   }
309   template <typename Elt>
Value(const std::vector<Elt> & C)310   Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
Value(json::Object && Properties)311   Value(json::Object &&Properties) : Type(T_Object) {
312     create<json::Object>(std::move(Properties));
313   }
314   template <typename Elt>
Value(const std::map<std::string,Elt> & C)315   Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
316   // Strings: types with value semantics. Must be valid UTF-8.
Value(std::string V)317   Value(std::string V) : Type(T_String) {
318     if (LLVM_UNLIKELY(!isUTF8(V))) {
319       assert(false && "Invalid UTF-8 in value used as JSON");
320       V = fixUTF8(std::move(V));
321     }
322     create<std::string>(std::move(V));
323   }
Value(const llvm::SmallVectorImpl<char> & V)324   Value(const llvm::SmallVectorImpl<char> &V)
325       : Value(std::string(V.begin(), V.end())) {}
Value(const llvm::formatv_object_base & V)326   Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
327   // Strings: types with reference semantics. Must be valid UTF-8.
Value(StringRef V)328   Value(StringRef V) : Type(T_StringRef) {
329     create<llvm::StringRef>(V);
330     if (LLVM_UNLIKELY(!isUTF8(V))) {
331       assert(false && "Invalid UTF-8 in value used as JSON");
332       *this = Value(fixUTF8(V));
333     }
334   }
Value(const char * V)335   Value(const char *V) : Value(StringRef(V)) {}
Value(std::nullptr_t)336   Value(std::nullptr_t) : Type(T_Null) {}
337   // Boolean (disallow implicit conversions).
338   // (The last template parameter is a dummy to keep templates distinct.)
339   template <typename T,
340             typename = std::enable_if_t<std::is_same<T, bool>::value>,
341             bool = false>
Value(T B)342   Value(T B) : Type(T_Boolean) {
343     create<bool>(B);
344   }
345   // Integers (except boolean). Must be non-narrowing convertible to int64_t.
346   template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
347             typename = std::enable_if_t<!std::is_same<T, bool>::value>>
Value(T I)348   Value(T I) : Type(T_Integer) {
349     create<int64_t>(int64_t{I});
350   }
351   // Floating point. Must be non-narrowing convertible to double.
352   template <typename T,
353             typename = std::enable_if_t<std::is_floating_point<T>::value>,
354             double * = nullptr>
Value(T D)355   Value(T D) : Type(T_Double) {
356     create<double>(double{D});
357   }
358   // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
359   template <typename T,
360             typename = std::enable_if_t<std::is_same<
361                 Value, decltype(toJSON(*(const T *)nullptr))>::value>,
362             Value * = nullptr>
Value(const T & V)363   Value(const T &V) : Value(toJSON(V)) {}
364 
365   Value &operator=(const Value &M) {
366     destroy();
367     copyFrom(M);
368     return *this;
369   }
370   Value &operator=(Value &&M) {
371     destroy();
372     moveFrom(std::move(M));
373     return *this;
374   }
~Value()375   ~Value() { destroy(); }
376 
kind()377   Kind kind() const {
378     switch (Type) {
379     case T_Null:
380       return Null;
381     case T_Boolean:
382       return Boolean;
383     case T_Double:
384     case T_Integer:
385       return Number;
386     case T_String:
387     case T_StringRef:
388       return String;
389     case T_Object:
390       return Object;
391     case T_Array:
392       return Array;
393     }
394     llvm_unreachable("Unknown kind");
395   }
396 
397   // Typed accessors return None/nullptr if the Value is not of this type.
getAsNull()398   llvm::Optional<std::nullptr_t> getAsNull() const {
399     if (LLVM_LIKELY(Type == T_Null))
400       return nullptr;
401     return llvm::None;
402   }
getAsBoolean()403   llvm::Optional<bool> getAsBoolean() const {
404     if (LLVM_LIKELY(Type == T_Boolean))
405       return as<bool>();
406     return llvm::None;
407   }
getAsNumber()408   llvm::Optional<double> getAsNumber() const {
409     if (LLVM_LIKELY(Type == T_Double))
410       return as<double>();
411     if (LLVM_LIKELY(Type == T_Integer))
412       return as<int64_t>();
413     return llvm::None;
414   }
415   // Succeeds if the Value is a Number, and exactly representable as int64_t.
getAsInteger()416   llvm::Optional<int64_t> getAsInteger() const {
417     if (LLVM_LIKELY(Type == T_Integer))
418       return as<int64_t>();
419     if (LLVM_LIKELY(Type == T_Double)) {
420       double D = as<double>();
421       if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
422                       D >= double(std::numeric_limits<int64_t>::min()) &&
423                       D <= double(std::numeric_limits<int64_t>::max())))
424         return D;
425     }
426     return llvm::None;
427   }
getAsString()428   llvm::Optional<llvm::StringRef> getAsString() const {
429     if (Type == T_String)
430       return llvm::StringRef(as<std::string>());
431     if (LLVM_LIKELY(Type == T_StringRef))
432       return as<llvm::StringRef>();
433     return llvm::None;
434   }
getAsObject()435   const json::Object *getAsObject() const {
436     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
437   }
getAsObject()438   json::Object *getAsObject() {
439     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
440   }
getAsArray()441   const json::Array *getAsArray() const {
442     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
443   }
getAsArray()444   json::Array *getAsArray() {
445     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
446   }
447 
448 private:
449   void destroy();
450   void copyFrom(const Value &M);
451   // We allow moving from *const* Values, by marking all members as mutable!
452   // This hack is needed to support initializer-list syntax efficiently.
453   // (std::initializer_list<T> is a container of const T).
454   void moveFrom(const Value &&M);
455   friend class Array;
456   friend class Object;
457 
create(U &&...V)458   template <typename T, typename... U> void create(U &&... V) {
459     new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
460   }
as()461   template <typename T> T &as() const {
462     // Using this two-step static_cast via void * instead of reinterpret_cast
463     // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
464     void *Storage = static_cast<void *>(&Union);
465     return *static_cast<T *>(Storage);
466   }
467 
468   friend class OStream;
469 
470   enum ValueType : char {
471     T_Null,
472     T_Boolean,
473     T_Double,
474     T_Integer,
475     T_StringRef,
476     T_String,
477     T_Object,
478     T_Array,
479   };
480   // All members mutable, see moveFrom().
481   mutable ValueType Type;
482   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
483                                       std::string, json::Array, json::Object>
484       Union;
485   friend bool operator==(const Value &, const Value &);
486 };
487 
488 bool operator==(const Value &, const Value &);
489 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
490 
491 /// ObjectKey is a used to capture keys in Object. Like Value but:
492 ///   - only strings are allowed
493 ///   - it's optimized for the string literal case (Owned == nullptr)
494 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
495 class ObjectKey {
496 public:
ObjectKey(const char * S)497   ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
ObjectKey(std::string S)498   ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
499     if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
500       assert(false && "Invalid UTF-8 in value used as JSON");
501       *Owned = fixUTF8(std::move(*Owned));
502     }
503     Data = *Owned;
504   }
ObjectKey(llvm::StringRef S)505   ObjectKey(llvm::StringRef S) : Data(S) {
506     if (LLVM_UNLIKELY(!isUTF8(Data))) {
507       assert(false && "Invalid UTF-8 in value used as JSON");
508       *this = ObjectKey(fixUTF8(S));
509     }
510   }
ObjectKey(const llvm::SmallVectorImpl<char> & V)511   ObjectKey(const llvm::SmallVectorImpl<char> &V)
512       : ObjectKey(std::string(V.begin(), V.end())) {}
ObjectKey(const llvm::formatv_object_base & V)513   ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
514 
ObjectKey(const ObjectKey & C)515   ObjectKey(const ObjectKey &C) { *this = C; }
ObjectKey(ObjectKey && C)516   ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
517   ObjectKey &operator=(const ObjectKey &C) {
518     if (C.Owned) {
519       Owned.reset(new std::string(*C.Owned));
520       Data = *Owned;
521     } else {
522       Data = C.Data;
523     }
524     return *this;
525   }
526   ObjectKey &operator=(ObjectKey &&) = default;
527 
StringRef()528   operator llvm::StringRef() const { return Data; }
str()529   std::string str() const { return Data.str(); }
530 
531 private:
532   // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
533   // could be 2 pointers at most.
534   std::unique_ptr<std::string> Owned;
535   llvm::StringRef Data;
536 };
537 
538 inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
539   return llvm::StringRef(L) == llvm::StringRef(R);
540 }
541 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
542   return !(L == R);
543 }
544 inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
545   return StringRef(L) < StringRef(R);
546 }
547 
548 struct Object::KV {
549   ObjectKey K;
550   Value V;
551 };
552 
Object(std::initializer_list<KV> Properties)553 inline Object::Object(std::initializer_list<KV> Properties) {
554   for (const auto &P : Properties) {
555     auto R = try_emplace(P.K, nullptr);
556     if (R.second)
557       R.first->getSecond().moveFrom(std::move(P.V));
558   }
559 }
insert(KV E)560 inline std::pair<Object::iterator, bool> Object::insert(KV E) {
561   return try_emplace(std::move(E.K), std::move(E.V));
562 }
erase(StringRef K)563 inline bool Object::erase(StringRef K) {
564   return M.erase(ObjectKey(K));
565 }
566 
567 /// A "cursor" marking a position within a Value.
568 /// The Value is a tree, and this is the path from the root to the current node.
569 /// This is used to associate errors with particular subobjects.
570 class Path {
571 public:
572   class Root;
573 
574   /// Records that the value at the current path is invalid.
575   /// Message is e.g. "expected number" and becomes part of the final error.
576   /// This overwrites any previously written error message in the root.
577   void report(llvm::StringLiteral Message);
578 
579   /// The root may be treated as a Path.
Path(Root & R)580   Path(Root &R) : Parent(nullptr), Seg(&R) {}
581   /// Derives a path for an array element: this[Index]
index(unsigned Index)582   Path index(unsigned Index) const { return Path(this, Segment(Index)); }
583   /// Derives a path for an object field: this.Field
field(StringRef Field)584   Path field(StringRef Field) const { return Path(this, Segment(Field)); }
585 
586 private:
587   /// One element in a JSON path: an object field (.foo) or array index [27].
588   /// Exception: the root Path encodes a pointer to the Path::Root.
589   class Segment {
590     uintptr_t Pointer;
591     unsigned Offset;
592 
593   public:
594     Segment() = default;
Segment(Root * R)595     Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
Segment(llvm::StringRef Field)596     Segment(llvm::StringRef Field)
597         : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
598           Offset(static_cast<unsigned>(Field.size())) {}
Segment(unsigned Index)599     Segment(unsigned Index) : Pointer(0), Offset(Index) {}
600 
isField()601     bool isField() const { return Pointer != 0; }
field()602     StringRef field() const {
603       return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
604     }
index()605     unsigned index() const { return Offset; }
root()606     Root *root() const { return reinterpret_cast<Root *>(Pointer); }
607   };
608 
609   const Path *Parent;
610   Segment Seg;
611 
Path(const Path * Parent,Segment S)612   Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
613 };
614 
615 /// The root is the trivial Path to the root value.
616 /// It also stores the latest reported error and the path where it occurred.
617 class Path::Root {
618   llvm::StringRef Name;
619   llvm::StringLiteral ErrorMessage;
620   std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
621 
622   friend void Path::report(llvm::StringLiteral Message);
623 
624 public:
Name(Name)625   Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
626   // No copy/move allowed as there are incoming pointers.
627   Root(Root &&) = delete;
628   Root &operator=(Root &&) = delete;
629   Root(const Root &) = delete;
630   Root &operator=(const Root &) = delete;
631 
632   /// Returns the last error reported, or else a generic error.
633   Error getError() const;
634   /// Print the root value with the error shown inline as a comment.
635   /// Unrelated parts of the value are elided for brevity, e.g.
636   ///   {
637   ///      "id": 42,
638   ///      "name": /* expected string */ null,
639   ///      "properties": { ... }
640   ///   }
641   void printErrorContext(const Value &, llvm::raw_ostream &) const;
642 };
643 
644 // Standard deserializers are provided for primitive types.
645 // See comments on Value.
fromJSON(const Value & E,std::string & Out,Path P)646 inline bool fromJSON(const Value &E, std::string &Out, Path P) {
647   if (auto S = E.getAsString()) {
648     Out = std::string(*S);
649     return true;
650   }
651   P.report("expected string");
652   return false;
653 }
fromJSON(const Value & E,int & Out,Path P)654 inline bool fromJSON(const Value &E, int &Out, Path P) {
655   if (auto S = E.getAsInteger()) {
656     Out = *S;
657     return true;
658   }
659   P.report("expected integer");
660   return false;
661 }
fromJSON(const Value & E,int64_t & Out,Path P)662 inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
663   if (auto S = E.getAsInteger()) {
664     Out = *S;
665     return true;
666   }
667   P.report("expected integer");
668   return false;
669 }
fromJSON(const Value & E,double & Out,Path P)670 inline bool fromJSON(const Value &E, double &Out, Path P) {
671   if (auto S = E.getAsNumber()) {
672     Out = *S;
673     return true;
674   }
675   P.report("expected number");
676   return false;
677 }
fromJSON(const Value & E,bool & Out,Path P)678 inline bool fromJSON(const Value &E, bool &Out, Path P) {
679   if (auto S = E.getAsBoolean()) {
680     Out = *S;
681     return true;
682   }
683   P.report("expected boolean");
684   return false;
685 }
fromJSON(const Value & E,std::nullptr_t & Out,Path P)686 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
687   if (auto S = E.getAsNull()) {
688     Out = *S;
689     return true;
690   }
691   P.report("expected null");
692   return false;
693 }
694 template <typename T>
fromJSON(const Value & E,llvm::Optional<T> & Out,Path P)695 bool fromJSON(const Value &E, llvm::Optional<T> &Out, Path P) {
696   if (E.getAsNull()) {
697     Out = llvm::None;
698     return true;
699   }
700   T Result;
701   if (!fromJSON(E, Result, P))
702     return false;
703   Out = std::move(Result);
704   return true;
705 }
706 template <typename T>
fromJSON(const Value & E,std::vector<T> & Out,Path P)707 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
708   if (auto *A = E.getAsArray()) {
709     Out.clear();
710     Out.resize(A->size());
711     for (size_t I = 0; I < A->size(); ++I)
712       if (!fromJSON((*A)[I], Out[I], P.index(I)))
713         return false;
714     return true;
715   }
716   P.report("expected array");
717   return false;
718 }
719 template <typename T>
fromJSON(const Value & E,std::map<std::string,T> & Out,Path P)720 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
721   if (auto *O = E.getAsObject()) {
722     Out.clear();
723     for (const auto &KV : *O)
724       if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
725                     P.field(KV.first)))
726         return false;
727     return true;
728   }
729   P.report("expected object");
730   return false;
731 }
732 
733 // Allow serialization of Optional<T> for supported T.
toJSON(const llvm::Optional<T> & Opt)734 template <typename T> Value toJSON(const llvm::Optional<T> &Opt) {
735   return Opt ? Value(*Opt) : Value(nullptr);
736 }
737 
738 /// Helper for mapping JSON objects onto protocol structs.
739 ///
740 /// Example:
741 /// \code
742 ///   bool fromJSON(const Value &E, MyStruct &R, Path P) {
743 ///     ObjectMapper O(E, P);
744 ///     // When returning false, error details were already reported.
745 ///     return O && O.map("mandatory_field", R.MandatoryField) &&
746 ///         O.mapOptional("optional_field", R.OptionalField);
747 ///   }
748 /// \endcode
749 class ObjectMapper {
750 public:
751   /// If O is not an object, this mapper is invalid and an error is reported.
ObjectMapper(const Value & E,Path P)752   ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
753     if (!O)
754       P.report("expected object");
755   }
756 
757   /// True if the expression is an object.
758   /// Must be checked before calling map().
759   operator bool() const { return O; }
760 
761   /// Maps a property to a field.
762   /// If the property is missing or invalid, reports an error.
map(StringLiteral Prop,T & Out)763   template <typename T> bool map(StringLiteral Prop, T &Out) {
764     assert(*this && "Must check this is an object before calling map()");
765     if (const Value *E = O->get(Prop))
766       return fromJSON(*E, Out, P.field(Prop));
767     P.field(Prop).report("missing value");
768     return false;
769   }
770 
771   /// Maps a property to a field, if it exists.
772   /// If the property exists and is invalid, reports an error.
773   /// (Optional requires special handling, because missing keys are OK).
map(StringLiteral Prop,llvm::Optional<T> & Out)774   template <typename T> bool map(StringLiteral Prop, llvm::Optional<T> &Out) {
775     assert(*this && "Must check this is an object before calling map()");
776     if (const Value *E = O->get(Prop))
777       return fromJSON(*E, Out, P.field(Prop));
778     Out = llvm::None;
779     return true;
780   }
781 
782   /// Maps a property to a field, if it exists.
783   /// If the property exists and is invalid, reports an error.
784   /// If the property does not exist, Out is unchanged.
mapOptional(StringLiteral Prop,T & Out)785   template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
786     assert(*this && "Must check this is an object before calling map()");
787     if (const Value *E = O->get(Prop))
788       return fromJSON(*E, Out, P.field(Prop));
789     return true;
790   }
791 
792 private:
793   const Object *O;
794   Path P;
795 };
796 
797 /// Parses the provided JSON source, or returns a ParseError.
798 /// The returned Value is self-contained and owns its strings (they do not refer
799 /// to the original source).
800 llvm::Expected<Value> parse(llvm::StringRef JSON);
801 
802 class ParseError : public llvm::ErrorInfo<ParseError> {
803   const char *Msg;
804   unsigned Line, Column, Offset;
805 
806 public:
807   static char ID;
ParseError(const char * Msg,unsigned Line,unsigned Column,unsigned Offset)808   ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
809       : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
log(llvm::raw_ostream & OS)810   void log(llvm::raw_ostream &OS) const override {
811     OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
812   }
convertToErrorCode()813   std::error_code convertToErrorCode() const override {
814     return llvm::inconvertibleErrorCode();
815   }
816 };
817 
818 /// Version of parse() that converts the parsed value to the type T.
819 /// RootName describes the root object and is used in error messages.
820 template <typename T>
821 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
822   auto V = parse(JSON);
823   if (!V)
824     return V.takeError();
825   Path::Root R(RootName);
826   T Result;
827   if (fromJSON(*V, Result, R))
828     return std::move(Result);
829   return R.getError();
830 }
831 
832 /// json::OStream allows writing well-formed JSON without materializing
833 /// all structures as json::Value ahead of time.
834 /// It's faster, lower-level, and less safe than OS << json::Value.
835 /// It also allows emitting more constructs, such as comments.
836 ///
837 /// Only one "top-level" object can be written to a stream.
838 /// Simplest usage involves passing lambdas (Blocks) to fill in containers:
839 ///
840 ///   json::OStream J(OS);
841 ///   J.array([&]{
842 ///     for (const Event &E : Events)
843 ///       J.object([&] {
844 ///         J.attribute("timestamp", int64_t(E.Time));
845 ///         J.attributeArray("participants", [&] {
846 ///           for (const Participant &P : E.Participants)
847 ///             J.value(P.toString());
848 ///         });
849 ///       });
850 ///   });
851 ///
852 /// This would produce JSON like:
853 ///
854 ///   [
855 ///     {
856 ///       "timestamp": 19287398741,
857 ///       "participants": [
858 ///         "King Kong",
859 ///         "Miley Cyrus",
860 ///         "Cleopatra"
861 ///       ]
862 ///     },
863 ///     ...
864 ///   ]
865 ///
866 /// The lower level begin/end methods (arrayBegin()) are more flexible but
867 /// care must be taken to pair them correctly:
868 ///
869 ///   json::OStream J(OS);
870 //    J.arrayBegin();
871 ///   for (const Event &E : Events) {
872 ///     J.objectBegin();
873 ///     J.attribute("timestamp", int64_t(E.Time));
874 ///     J.attributeBegin("participants");
875 ///     for (const Participant &P : E.Participants)
876 ///       J.value(P.toString());
877 ///     J.attributeEnd();
878 ///     J.objectEnd();
879 ///   }
880 ///   J.arrayEnd();
881 ///
882 /// If the call sequence isn't valid JSON, asserts will fire in debug mode.
883 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside
884 /// an array, and so on.
885 /// With asserts disabled, this is undefined behavior.
886 class OStream {
887  public:
888   using Block = llvm::function_ref<void()>;
889   // If IndentSize is nonzero, output is pretty-printed.
890   explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
OS(OS)891       : OS(OS), IndentSize(IndentSize) {
892     Stack.emplace_back();
893   }
~OStream()894   ~OStream() {
895     assert(Stack.size() == 1 && "Unmatched begin()/end()");
896     assert(Stack.back().Ctx == Singleton);
897     assert(Stack.back().HasValue && "Did not write top-level value");
898   }
899 
900   /// Flushes the underlying ostream. OStream does not buffer internally.
flush()901   void flush() { OS.flush(); }
902 
903   // High level functions to output a value.
904   // Valid at top-level (exactly once), in an attribute value (exactly once),
905   // or in an array (any number of times).
906 
907   /// Emit a self-contained value (number, string, vector<string> etc).
908   void value(const Value &V);
909   /// Emit an array whose elements are emitted in the provided Block.
array(Block Contents)910   void array(Block Contents) {
911     arrayBegin();
912     Contents();
913     arrayEnd();
914   }
915   /// Emit an object whose elements are emitted in the provided Block.
object(Block Contents)916   void object(Block Contents) {
917     objectBegin();
918     Contents();
919     objectEnd();
920   }
921   /// Emit an externally-serialized value.
922   /// The caller must write exactly one valid JSON value to the provided stream.
923   /// No validation or formatting of this value occurs.
rawValue(llvm::function_ref<void (raw_ostream &)> Contents)924   void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
925     rawValueBegin();
926     Contents(OS);
927     rawValueEnd();
928   }
rawValue(llvm::StringRef Contents)929   void rawValue(llvm::StringRef Contents) {
930     rawValue([&](raw_ostream &OS) { OS << Contents; });
931   }
932   /// Emit a JavaScript comment associated with the next printed value.
933   /// The string must be valid until the next attribute or value is emitted.
934   /// Comments are not part of standard JSON, and many parsers reject them!
935   void comment(llvm::StringRef);
936 
937   // High level functions to output object attributes.
938   // Valid only within an object (any number of times).
939 
940   /// Emit an attribute whose value is self-contained (number, vector<int> etc).
attribute(llvm::StringRef Key,const Value & Contents)941   void attribute(llvm::StringRef Key, const Value& Contents) {
942     attributeImpl(Key, [&] { value(Contents); });
943   }
944   /// Emit an attribute whose value is an array with elements from the Block.
attributeArray(llvm::StringRef Key,Block Contents)945   void attributeArray(llvm::StringRef Key, Block Contents) {
946     attributeImpl(Key, [&] { array(Contents); });
947   }
948   /// Emit an attribute whose value is an object with attributes from the Block.
attributeObject(llvm::StringRef Key,Block Contents)949   void attributeObject(llvm::StringRef Key, Block Contents) {
950     attributeImpl(Key, [&] { object(Contents); });
951   }
952 
953   // Low-level begin/end functions to output arrays, objects, and attributes.
954   // Must be correctly paired. Allowed contexts are as above.
955 
956   void arrayBegin();
957   void arrayEnd();
958   void objectBegin();
959   void objectEnd();
960   void attributeBegin(llvm::StringRef Key);
961   void attributeEnd();
962   raw_ostream &rawValueBegin();
963   void rawValueEnd();
964 
965 private:
attributeImpl(llvm::StringRef Key,Block Contents)966   void attributeImpl(llvm::StringRef Key, Block Contents) {
967     attributeBegin(Key);
968     Contents();
969     attributeEnd();
970   }
971 
972   void valueBegin();
973   void flushComment();
974   void newline();
975 
976   enum Context {
977     Singleton, // Top level, or object attribute.
978     Array,
979     Object,
980     RawValue, // External code writing a value to OS directly.
981   };
982   struct State {
983     Context Ctx = Singleton;
984     bool HasValue = false;
985   };
986   llvm::SmallVector<State, 16> Stack; // Never empty.
987   llvm::StringRef PendingComment;
988   llvm::raw_ostream &OS;
989   unsigned IndentSize;
990   unsigned Indent = 0;
991 };
992 
993 /// Serializes this Value to JSON, writing it to the provided stream.
994 /// The formatting is compact (no extra whitespace) and deterministic.
995 /// For pretty-printing, use the formatv() format_provider below.
996 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
997   OStream(OS).value(V);
998   return OS;
999 }
1000 } // namespace json
1001 
1002 /// Allow printing json::Value with formatv().
1003 /// The default style is basic/compact formatting, like operator<<.
1004 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
1005 template <> struct format_provider<llvm::json::Value> {
1006   static void format(const llvm::json::Value &, raw_ostream &, StringRef);
1007 };
1008 } // namespace llvm
1009 
1010 #endif
1011