1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file supports working with JSON data.
11 ///
12 /// It comprises:
13 ///
14 /// - classes which hold dynamically-typed parsed JSON structures
15 ///   These are value types that can be composed, inspected, and modified.
16 ///   See json::Value, and the related types json::Object and json::Array.
17 ///
18 /// - functions to parse JSON text into Values, and to serialize Values to text.
19 ///   See parse(), operator<<, and format_provider.
20 ///
21 /// - a convention and helpers for mapping between json::Value and user-defined
22 ///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
23 ///
24 /// - an output API json::OStream which can emit JSON without materializing
25 ///   all structures as json::Value.
26 ///
27 /// Typically, JSON data would be read from an external source, parsed into
28 /// a Value, and then converted into some native data structure before doing
29 /// real work on it. (And vice versa when writing).
30 ///
31 /// Other serialization mechanisms you may consider:
32 ///
33 /// - YAML is also text-based, and more human-readable than JSON. It's a more
34 ///   complex format and data model, and YAML parsers aren't ubiquitous.
35 ///   YAMLParser.h is a streaming parser suitable for parsing large documents
36 ///   (including JSON, as YAML is a superset). It can be awkward to use
37 ///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38 ///   declarative than the toJSON/fromJSON conventions here.
39 ///
40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41 ///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
42 ///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
43 ///
44 //===---------------------------------------------------------------------===//
45 
46 #ifndef LLVM_SUPPORT_JSON_H
47 #define LLVM_SUPPORT_JSON_H
48 
49 #include "llvm/ADT/DenseMap.h"
50 #include "llvm/ADT/SmallVector.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/ADT/STLFunctionalExtras.h"
53 #include "llvm/Support/Error.h"
54 #include "llvm/Support/FormatVariadic.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include <cmath>
57 #include <map>
58 
59 namespace llvm {
60 namespace json {
61 
62 // === String encodings ===
63 //
64 // JSON strings are character sequences (not byte sequences like std::string).
65 // We need to know the encoding, and for simplicity only support UTF-8.
66 //
67 //   - When parsing, invalid UTF-8 is a syntax error like any other
68 //
69 //   - When creating Values from strings, callers must ensure they are UTF-8.
70 //        with asserts on, invalid UTF-8 will crash the program
71 //        with asserts off, we'll substitute the replacement character (U+FFFD)
72 //     Callers can use json::isUTF8() and json::fixUTF8() for validation.
73 //
74 //   - When retrieving strings from Values (e.g. asString()), the result will
75 //     always be valid UTF-8.
76 
77 template <typename T>
78 constexpr bool is_uint_64_bit_v =
79     std::is_integral_v<T> && std::is_unsigned_v<T> &&
80     sizeof(T) == sizeof(uint64_t);
81 
82 /// Returns true if \p S is valid UTF-8, which is required for use as JSON.
83 /// If it returns false, \p Offset is set to a byte offset near the first error.
84 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
85 /// Replaces invalid UTF-8 sequences in \p S with the replacement character
86 /// (U+FFFD). The returned string is valid UTF-8.
87 /// This is much slower than isUTF8, so test that first.
88 std::string fixUTF8(llvm::StringRef S);
89 
90 class Array;
91 class ObjectKey;
92 class Value;
93 template <typename T> Value toJSON(const std::optional<T> &Opt);
94 
95 /// An Object is a JSON object, which maps strings to heterogenous JSON values.
96 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
97 class Object {
98   using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
99   Storage M;
100 
101 public:
102   using key_type = ObjectKey;
103   using mapped_type = Value;
104   using value_type = Storage::value_type;
105   using iterator = Storage::iterator;
106   using const_iterator = Storage::const_iterator;
107 
108   Object() = default;
109   // KV is a trivial key-value struct for list-initialization.
110   // (using std::pair forces extra copies).
111   struct KV;
112   explicit Object(std::initializer_list<KV> Properties);
113 
114   iterator begin() { return M.begin(); }
115   const_iterator begin() const { return M.begin(); }
116   iterator end() { return M.end(); }
117   const_iterator end() const { return M.end(); }
118 
119   bool empty() const { return M.empty(); }
120   size_t size() const { return M.size(); }
121 
122   void clear() { M.clear(); }
123   std::pair<iterator, bool> insert(KV E);
124   template <typename... Ts>
125   std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
126     return M.try_emplace(K, std::forward<Ts>(Args)...);
127   }
128   template <typename... Ts>
129   std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
130     return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
131   }
132   bool erase(StringRef K);
133   void erase(iterator I) { M.erase(I); }
134 
135   iterator find(StringRef K) { return M.find_as(K); }
136   const_iterator find(StringRef K) const { return M.find_as(K); }
137   // operator[] acts as if Value was default-constructible as null.
138   Value &operator[](const ObjectKey &K);
139   Value &operator[](ObjectKey &&K);
140   // Look up a property, returning nullptr if it doesn't exist.
141   Value *get(StringRef K);
142   const Value *get(StringRef K) const;
143   // Typed accessors return std::nullopt/nullptr if
144   //   - the property doesn't exist
145   //   - or it has the wrong type
146   std::optional<std::nullptr_t> getNull(StringRef K) const;
147   std::optional<bool> getBoolean(StringRef K) const;
148   std::optional<double> getNumber(StringRef K) const;
149   std::optional<int64_t> getInteger(StringRef K) const;
150   std::optional<llvm::StringRef> getString(StringRef K) const;
151   const json::Object *getObject(StringRef K) const;
152   json::Object *getObject(StringRef K);
153   const json::Array *getArray(StringRef K) const;
154   json::Array *getArray(StringRef K);
155 };
156 bool operator==(const Object &LHS, const Object &RHS);
157 inline bool operator!=(const Object &LHS, const Object &RHS) {
158   return !(LHS == RHS);
159 }
160 
161 /// An Array is a JSON array, which contains heterogeneous JSON values.
162 /// It simulates std::vector<Value>.
163 class Array {
164   std::vector<Value> V;
165 
166 public:
167   using value_type = Value;
168   using iterator = std::vector<Value>::iterator;
169   using const_iterator = std::vector<Value>::const_iterator;
170 
171   Array() = default;
172   explicit Array(std::initializer_list<Value> Elements);
173   template <typename Collection> explicit Array(const Collection &C) {
174     for (const auto &V : C)
175       emplace_back(V);
176   }
177 
178   Value &operator[](size_t I);
179   const Value &operator[](size_t I) const;
180   Value &front();
181   const Value &front() const;
182   Value &back();
183   const Value &back() const;
184   Value *data();
185   const Value *data() const;
186 
187   iterator begin();
188   const_iterator begin() const;
189   iterator end();
190   const_iterator end() const;
191 
192   bool empty() const;
193   size_t size() const;
194   void reserve(size_t S);
195 
196   void clear();
197   void push_back(const Value &E);
198   void push_back(Value &&E);
199   template <typename... Args> void emplace_back(Args &&...A);
200   void pop_back();
201   iterator insert(const_iterator P, const Value &E);
202   iterator insert(const_iterator P, Value &&E);
203   template <typename It> iterator insert(const_iterator P, It A, It Z);
204   template <typename... Args> iterator emplace(const_iterator P, Args &&...A);
205 
206   friend bool operator==(const Array &L, const Array &R);
207 };
208 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
209 
210 /// A Value is an JSON value of unknown type.
211 /// They can be copied, but should generally be moved.
212 ///
213 /// === Composing values ===
214 ///
215 /// You can implicitly construct Values from:
216 ///   - strings: std::string, SmallString, formatv, StringRef, char*
217 ///              (char*, and StringRef are references, not copies!)
218 ///   - numbers
219 ///   - booleans
220 ///   - null: nullptr
221 ///   - arrays: {"foo", 42.0, false}
222 ///   - serializable things: types with toJSON(const T&)->Value, found by ADL
223 ///
224 /// They can also be constructed from object/array helpers:
225 ///   - json::Object is a type like map<ObjectKey, Value>
226 ///   - json::Array is a type like vector<Value>
227 /// These can be list-initialized, or used to build up collections in a loop.
228 /// json::ary(Collection) converts all items in a collection to Values.
229 ///
230 /// === Inspecting values ===
231 ///
232 /// Each Value is one of the JSON kinds:
233 ///   null    (nullptr_t)
234 ///   boolean (bool)
235 ///   number  (double, int64 or uint64)
236 ///   string  (StringRef)
237 ///   array   (json::Array)
238 ///   object  (json::Object)
239 ///
240 /// The kind can be queried directly, or implicitly via the typed accessors:
241 ///   if (std::optional<StringRef> S = E.getAsString()
242 ///     assert(E.kind() == Value::String);
243 ///
244 /// Array and Object also have typed indexing accessors for easy traversal:
245 ///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
246 ///   if (Object* O = E->getAsObject())
247 ///     if (Object* Opts = O->getObject("options"))
248 ///       if (std::optional<StringRef> Font = Opts->getString("font"))
249 ///         assert(Opts->at("font").kind() == Value::String);
250 ///
251 /// === Converting JSON values to C++ types ===
252 ///
253 /// The convention is to have a deserializer function findable via ADL:
254 ///     fromJSON(const json::Value&, T&, Path) -> bool
255 ///
256 /// The return value indicates overall success, and Path is used for precise
257 /// error reporting. (The Path::Root passed in at the top level fromJSON call
258 /// captures any nested error and can render it in context).
259 /// If conversion fails, fromJSON calls Path::report() and immediately returns.
260 /// This ensures that the first fatal error survives.
261 ///
262 /// Deserializers are provided for:
263 ///   - bool
264 ///   - int and int64_t
265 ///   - double
266 ///   - std::string
267 ///   - vector<T>, where T is deserializable
268 ///   - map<string, T>, where T is deserializable
269 ///   - std::optional<T>, where T is deserializable
270 /// ObjectMapper can help writing fromJSON() functions for object types.
271 ///
272 /// For conversion in the other direction, the serializer function is:
273 ///    toJSON(const T&) -> json::Value
274 /// If this exists, then it also allows constructing Value from T, and can
275 /// be used to serialize vector<T>, map<string, T>, and std::optional<T>.
276 ///
277 /// === Serialization ===
278 ///
279 /// Values can be serialized to JSON:
280 ///   1) raw_ostream << Value                    // Basic formatting.
281 ///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
282 ///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
283 ///
284 /// And parsed:
285 ///   Expected<Value> E = json::parse("[1, 2, null]");
286 ///   assert(E && E->kind() == Value::Array);
287 class Value {
288 public:
289   enum Kind {
290     Null,
291     Boolean,
292     /// Number values can store both int64s and doubles at full precision,
293     /// depending on what they were constructed/parsed from.
294     Number,
295     String,
296     Array,
297     Object,
298   };
299 
300   // It would be nice to have Value() be null. But that would make {} null too.
301   Value(const Value &M) { copyFrom(M); }
302   Value(Value &&M) { moveFrom(std::move(M)); }
303   Value(std::initializer_list<Value> Elements);
304   Value(json::Array &&Elements) : Type(T_Array) {
305     create<json::Array>(std::move(Elements));
306   }
307   template <typename Elt>
308   Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
309   Value(json::Object &&Properties) : Type(T_Object) {
310     create<json::Object>(std::move(Properties));
311   }
312   template <typename Elt>
313   Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
314   // Strings: types with value semantics. Must be valid UTF-8.
315   Value(std::string V) : Type(T_String) {
316     if (LLVM_UNLIKELY(!isUTF8(V))) {
317       assert(false && "Invalid UTF-8 in value used as JSON");
318       V = fixUTF8(std::move(V));
319     }
320     create<std::string>(std::move(V));
321   }
322   Value(const llvm::SmallVectorImpl<char> &V)
323       : Value(std::string(V.begin(), V.end())) {}
324   Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
325   // Strings: types with reference semantics. Must be valid UTF-8.
326   Value(StringRef V) : Type(T_StringRef) {
327     create<llvm::StringRef>(V);
328     if (LLVM_UNLIKELY(!isUTF8(V))) {
329       assert(false && "Invalid UTF-8 in value used as JSON");
330       *this = Value(fixUTF8(V));
331     }
332   }
333   Value(const char *V) : Value(StringRef(V)) {}
334   Value(std::nullptr_t) : Type(T_Null) {}
335   // Boolean (disallow implicit conversions).
336   // (The last template parameter is a dummy to keep templates distinct.)
337   template <typename T, typename = std::enable_if_t<std::is_same_v<T, bool>>,
338             bool = false>
339   Value(T B) : Type(T_Boolean) {
340     create<bool>(B);
341   }
342 
343   // Unsigned 64-bit integers.
344   template <typename T, typename = std::enable_if_t<is_uint_64_bit_v<T>>>
345   Value(T V) : Type(T_UINT64) {
346     create<uint64_t>(uint64_t{V});
347   }
348 
349   // Integers (except boolean and uint64_t).
350   // Must be non-narrowing convertible to int64_t.
351   template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>,
352             typename = std::enable_if_t<!std::is_same_v<T, bool>>,
353             typename = std::enable_if_t<!is_uint_64_bit_v<T>>>
354   Value(T I) : Type(T_Integer) {
355     create<int64_t>(int64_t{I});
356   }
357   // Floating point. Must be non-narrowing convertible to double.
358   template <typename T,
359             typename = std::enable_if_t<std::is_floating_point_v<T>>,
360             double * = nullptr>
361   Value(T D) : Type(T_Double) {
362     create<double>(double{D});
363   }
364   // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
365   template <typename T,
366             typename = std::enable_if_t<
367                 std::is_same_v<Value, decltype(toJSON(*(const T *)nullptr))>>,
368             Value * = nullptr>
369   Value(const T &V) : Value(toJSON(V)) {}
370 
371   Value &operator=(const Value &M) {
372     destroy();
373     copyFrom(M);
374     return *this;
375   }
376   Value &operator=(Value &&M) {
377     destroy();
378     moveFrom(std::move(M));
379     return *this;
380   }
381   ~Value() { destroy(); }
382 
383   Kind kind() const {
384     switch (Type) {
385     case T_Null:
386       return Null;
387     case T_Boolean:
388       return Boolean;
389     case T_Double:
390     case T_Integer:
391     case T_UINT64:
392       return Number;
393     case T_String:
394     case T_StringRef:
395       return String;
396     case T_Object:
397       return Object;
398     case T_Array:
399       return Array;
400     }
401     llvm_unreachable("Unknown kind");
402   }
403 
404   // Typed accessors return std::nullopt/nullptr if the Value is not of this
405   // type.
406   std::optional<std::nullptr_t> getAsNull() const {
407     if (LLVM_LIKELY(Type == T_Null))
408       return nullptr;
409     return std::nullopt;
410   }
411   std::optional<bool> getAsBoolean() const {
412     if (LLVM_LIKELY(Type == T_Boolean))
413       return as<bool>();
414     return std::nullopt;
415   }
416   std::optional<double> getAsNumber() const {
417     if (LLVM_LIKELY(Type == T_Double))
418       return as<double>();
419     if (LLVM_LIKELY(Type == T_Integer))
420       return as<int64_t>();
421     if (LLVM_LIKELY(Type == T_UINT64))
422       return as<uint64_t>();
423     return std::nullopt;
424   }
425   // Succeeds if the Value is a Number, and exactly representable as int64_t.
426   std::optional<int64_t> getAsInteger() const {
427     if (LLVM_LIKELY(Type == T_Integer))
428       return as<int64_t>();
429     if (LLVM_LIKELY(Type == T_UINT64)) {
430       uint64_t U = as<uint64_t>();
431       if (LLVM_LIKELY(U <= uint64_t(std::numeric_limits<int64_t>::max()))) {
432         return U;
433       }
434     }
435     if (LLVM_LIKELY(Type == T_Double)) {
436       double D = as<double>();
437       if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
438                       D >= double(std::numeric_limits<int64_t>::min()) &&
439                       D <= double(std::numeric_limits<int64_t>::max())))
440         return D;
441     }
442     return std::nullopt;
443   }
444   std::optional<uint64_t> getAsUINT64() const {
445     if (Type == T_UINT64)
446       return as<uint64_t>();
447     else if (Type == T_Integer) {
448       int64_t N = as<int64_t>();
449       if (N >= 0)
450         return as<uint64_t>();
451     }
452     return std::nullopt;
453   }
454   std::optional<llvm::StringRef> getAsString() const {
455     if (Type == T_String)
456       return llvm::StringRef(as<std::string>());
457     if (LLVM_LIKELY(Type == T_StringRef))
458       return as<llvm::StringRef>();
459     return std::nullopt;
460   }
461   const json::Object *getAsObject() const {
462     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
463   }
464   json::Object *getAsObject() {
465     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
466   }
467   const json::Array *getAsArray() const {
468     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
469   }
470   json::Array *getAsArray() {
471     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
472   }
473 
474 private:
475   void destroy();
476   void copyFrom(const Value &M);
477   // We allow moving from *const* Values, by marking all members as mutable!
478   // This hack is needed to support initializer-list syntax efficiently.
479   // (std::initializer_list<T> is a container of const T).
480   void moveFrom(const Value &&M);
481   friend class Array;
482   friend class Object;
483 
484   template <typename T, typename... U> void create(U &&... V) {
485     new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
486   }
487   template <typename T> T &as() const {
488     // Using this two-step static_cast via void * instead of reinterpret_cast
489     // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
490     void *Storage = static_cast<void *>(&Union);
491     return *static_cast<T *>(Storage);
492   }
493 
494   friend class OStream;
495 
496   enum ValueType : char16_t {
497     T_Null,
498     T_Boolean,
499     T_Double,
500     T_Integer,
501     T_UINT64,
502     T_StringRef,
503     T_String,
504     T_Object,
505     T_Array,
506   };
507   // All members mutable, see moveFrom().
508   mutable ValueType Type;
509   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
510                                       llvm::StringRef, std::string, json::Array,
511                                       json::Object>
512       Union;
513   friend bool operator==(const Value &, const Value &);
514 };
515 
516 bool operator==(const Value &, const Value &);
517 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
518 
519 // Array Methods
520 inline Value &Array::operator[](size_t I) { return V[I]; }
521 inline const Value &Array::operator[](size_t I) const { return V[I]; }
522 inline Value &Array::front() { return V.front(); }
523 inline const Value &Array::front() const { return V.front(); }
524 inline Value &Array::back() { return V.back(); }
525 inline const Value &Array::back() const { return V.back(); }
526 inline Value *Array::data() { return V.data(); }
527 inline const Value *Array::data() const { return V.data(); }
528 
529 inline typename Array::iterator Array::begin() { return V.begin(); }
530 inline typename Array::const_iterator Array::begin() const { return V.begin(); }
531 inline typename Array::iterator Array::end() { return V.end(); }
532 inline typename Array::const_iterator Array::end() const { return V.end(); }
533 
534 inline bool Array::empty() const { return V.empty(); }
535 inline size_t Array::size() const { return V.size(); }
536 inline void Array::reserve(size_t S) { V.reserve(S); }
537 
538 inline void Array::clear() { V.clear(); }
539 inline void Array::push_back(const Value &E) { V.push_back(E); }
540 inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); }
541 template <typename... Args> inline void Array::emplace_back(Args &&...A) {
542   V.emplace_back(std::forward<Args>(A)...);
543 }
544 inline void Array::pop_back() { V.pop_back(); }
545 inline typename Array::iterator Array::insert(const_iterator P, const Value &E) {
546   return V.insert(P, E);
547 }
548 inline typename Array::iterator Array::insert(const_iterator P, Value &&E) {
549   return V.insert(P, std::move(E));
550 }
551 template <typename It>
552 inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) {
553   return V.insert(P, A, Z);
554 }
555 template <typename... Args>
556 inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) {
557   return V.emplace(P, std::forward<Args>(A)...);
558 }
559 inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
560 
561 /// ObjectKey is a used to capture keys in Object. Like Value but:
562 ///   - only strings are allowed
563 ///   - it's optimized for the string literal case (Owned == nullptr)
564 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
565 class ObjectKey {
566 public:
567   ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
568   ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
569     if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
570       assert(false && "Invalid UTF-8 in value used as JSON");
571       *Owned = fixUTF8(std::move(*Owned));
572     }
573     Data = *Owned;
574   }
575   ObjectKey(llvm::StringRef S) : Data(S) {
576     if (LLVM_UNLIKELY(!isUTF8(Data))) {
577       assert(false && "Invalid UTF-8 in value used as JSON");
578       *this = ObjectKey(fixUTF8(S));
579     }
580   }
581   ObjectKey(const llvm::SmallVectorImpl<char> &V)
582       : ObjectKey(std::string(V.begin(), V.end())) {}
583   ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
584 
585   ObjectKey(const ObjectKey &C) { *this = C; }
586   ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
587   ObjectKey &operator=(const ObjectKey &C) {
588     if (C.Owned) {
589       Owned.reset(new std::string(*C.Owned));
590       Data = *Owned;
591     } else {
592       Data = C.Data;
593     }
594     return *this;
595   }
596   ObjectKey &operator=(ObjectKey &&) = default;
597 
598   operator llvm::StringRef() const { return Data; }
599   std::string str() const { return Data.str(); }
600 
601 private:
602   // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
603   // could be 2 pointers at most.
604   std::unique_ptr<std::string> Owned;
605   llvm::StringRef Data;
606 };
607 
608 inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
609   return llvm::StringRef(L) == llvm::StringRef(R);
610 }
611 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
612   return !(L == R);
613 }
614 inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
615   return StringRef(L) < StringRef(R);
616 }
617 
618 struct Object::KV {
619   ObjectKey K;
620   Value V;
621 };
622 
623 inline Object::Object(std::initializer_list<KV> Properties) {
624   for (const auto &P : Properties) {
625     auto R = try_emplace(P.K, nullptr);
626     if (R.second)
627       R.first->getSecond().moveFrom(std::move(P.V));
628   }
629 }
630 inline std::pair<Object::iterator, bool> Object::insert(KV E) {
631   return try_emplace(std::move(E.K), std::move(E.V));
632 }
633 inline bool Object::erase(StringRef K) {
634   return M.erase(ObjectKey(K));
635 }
636 
637 /// A "cursor" marking a position within a Value.
638 /// The Value is a tree, and this is the path from the root to the current node.
639 /// This is used to associate errors with particular subobjects.
640 class Path {
641 public:
642   class Root;
643 
644   /// Records that the value at the current path is invalid.
645   /// Message is e.g. "expected number" and becomes part of the final error.
646   /// This overwrites any previously written error message in the root.
647   void report(llvm::StringLiteral Message);
648 
649   /// The root may be treated as a Path.
650   Path(Root &R) : Parent(nullptr), Seg(&R) {}
651   /// Derives a path for an array element: this[Index]
652   Path index(unsigned Index) const { return Path(this, Segment(Index)); }
653   /// Derives a path for an object field: this.Field
654   Path field(StringRef Field) const { return Path(this, Segment(Field)); }
655 
656 private:
657   /// One element in a JSON path: an object field (.foo) or array index [27].
658   /// Exception: the root Path encodes a pointer to the Path::Root.
659   class Segment {
660     uintptr_t Pointer;
661     unsigned Offset;
662 
663   public:
664     Segment() = default;
665     Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
666     Segment(llvm::StringRef Field)
667         : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
668           Offset(static_cast<unsigned>(Field.size())) {}
669     Segment(unsigned Index) : Pointer(0), Offset(Index) {}
670 
671     bool isField() const { return Pointer != 0; }
672     StringRef field() const {
673       return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
674     }
675     unsigned index() const { return Offset; }
676     Root *root() const { return reinterpret_cast<Root *>(Pointer); }
677   };
678 
679   const Path *Parent;
680   Segment Seg;
681 
682   Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
683 };
684 
685 /// The root is the trivial Path to the root value.
686 /// It also stores the latest reported error and the path where it occurred.
687 class Path::Root {
688   llvm::StringRef Name;
689   llvm::StringLiteral ErrorMessage;
690   std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
691 
692   friend void Path::report(llvm::StringLiteral Message);
693 
694 public:
695   Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
696   // No copy/move allowed as there are incoming pointers.
697   Root(Root &&) = delete;
698   Root &operator=(Root &&) = delete;
699   Root(const Root &) = delete;
700   Root &operator=(const Root &) = delete;
701 
702   /// Returns the last error reported, or else a generic error.
703   Error getError() const;
704   /// Print the root value with the error shown inline as a comment.
705   /// Unrelated parts of the value are elided for brevity, e.g.
706   ///   {
707   ///      "id": 42,
708   ///      "name": /* expected string */ null,
709   ///      "properties": { ... }
710   ///   }
711   void printErrorContext(const Value &, llvm::raw_ostream &) const;
712 };
713 
714 // Standard deserializers are provided for primitive types.
715 // See comments on Value.
716 inline bool fromJSON(const Value &E, std::string &Out, Path P) {
717   if (auto S = E.getAsString()) {
718     Out = std::string(*S);
719     return true;
720   }
721   P.report("expected string");
722   return false;
723 }
724 inline bool fromJSON(const Value &E, int &Out, Path P) {
725   if (auto S = E.getAsInteger()) {
726     Out = *S;
727     return true;
728   }
729   P.report("expected integer");
730   return false;
731 }
732 inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
733   if (auto S = E.getAsInteger()) {
734     Out = *S;
735     return true;
736   }
737   P.report("expected integer");
738   return false;
739 }
740 inline bool fromJSON(const Value &E, double &Out, Path P) {
741   if (auto S = E.getAsNumber()) {
742     Out = *S;
743     return true;
744   }
745   P.report("expected number");
746   return false;
747 }
748 inline bool fromJSON(const Value &E, bool &Out, Path P) {
749   if (auto S = E.getAsBoolean()) {
750     Out = *S;
751     return true;
752   }
753   P.report("expected boolean");
754   return false;
755 }
756 inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
757   if (auto S = E.getAsUINT64()) {
758     Out = *S;
759     return true;
760   }
761   P.report("expected uint64_t");
762   return false;
763 }
764 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
765   if (auto S = E.getAsNull()) {
766     Out = *S;
767     return true;
768   }
769   P.report("expected null");
770   return false;
771 }
772 template <typename T>
773 bool fromJSON(const Value &E, std::optional<T> &Out, Path P) {
774   if (E.getAsNull()) {
775     Out = std::nullopt;
776     return true;
777   }
778   T Result = {};
779   if (!fromJSON(E, Result, P))
780     return false;
781   Out = std::move(Result);
782   return true;
783 }
784 template <typename T>
785 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
786   if (auto *A = E.getAsArray()) {
787     Out.clear();
788     Out.resize(A->size());
789     for (size_t I = 0; I < A->size(); ++I)
790       if (!fromJSON((*A)[I], Out[I], P.index(I)))
791         return false;
792     return true;
793   }
794   P.report("expected array");
795   return false;
796 }
797 template <typename T>
798 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
799   if (auto *O = E.getAsObject()) {
800     Out.clear();
801     for (const auto &KV : *O)
802       if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
803                     P.field(KV.first)))
804         return false;
805     return true;
806   }
807   P.report("expected object");
808   return false;
809 }
810 
811 // Allow serialization of std::optional<T> for supported T.
812 template <typename T> Value toJSON(const std::optional<T> &Opt) {
813   return Opt ? Value(*Opt) : Value(nullptr);
814 }
815 
816 /// Helper for mapping JSON objects onto protocol structs.
817 ///
818 /// Example:
819 /// \code
820 ///   bool fromJSON(const Value &E, MyStruct &R, Path P) {
821 ///     ObjectMapper O(E, P);
822 ///     // When returning false, error details were already reported.
823 ///     return O && O.map("mandatory_field", R.MandatoryField) &&
824 ///         O.mapOptional("optional_field", R.OptionalField);
825 ///   }
826 /// \endcode
827 class ObjectMapper {
828 public:
829   /// If O is not an object, this mapper is invalid and an error is reported.
830   ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
831     if (!O)
832       P.report("expected object");
833   }
834 
835   /// True if the expression is an object.
836   /// Must be checked before calling map().
837   operator bool() const { return O; }
838 
839   /// Maps a property to a field.
840   /// If the property is missing or invalid, reports an error.
841   template <typename T> bool map(StringLiteral Prop, T &Out) {
842     assert(*this && "Must check this is an object before calling map()");
843     if (const Value *E = O->get(Prop))
844       return fromJSON(*E, Out, P.field(Prop));
845     P.field(Prop).report("missing value");
846     return false;
847   }
848 
849   /// Maps a property to a field, if it exists.
850   /// If the property exists and is invalid, reports an error.
851   /// (Optional requires special handling, because missing keys are OK).
852   template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) {
853     assert(*this && "Must check this is an object before calling map()");
854     if (const Value *E = O->get(Prop))
855       return fromJSON(*E, Out, P.field(Prop));
856     Out = std::nullopt;
857     return true;
858   }
859 
860   /// Maps a property to a field, if it exists.
861   /// If the property exists and is invalid, reports an error.
862   /// If the property does not exist, Out is unchanged.
863   template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
864     assert(*this && "Must check this is an object before calling map()");
865     if (const Value *E = O->get(Prop))
866       return fromJSON(*E, Out, P.field(Prop));
867     return true;
868   }
869 
870 private:
871   const Object *O;
872   Path P;
873 };
874 
875 /// Parses the provided JSON source, or returns a ParseError.
876 /// The returned Value is self-contained and owns its strings (they do not refer
877 /// to the original source).
878 llvm::Expected<Value> parse(llvm::StringRef JSON);
879 
880 class ParseError : public llvm::ErrorInfo<ParseError> {
881   const char *Msg;
882   unsigned Line, Column, Offset;
883 
884 public:
885   static char ID;
886   ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
887       : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
888   void log(llvm::raw_ostream &OS) const override {
889     OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
890   }
891   std::error_code convertToErrorCode() const override {
892     return llvm::inconvertibleErrorCode();
893   }
894 };
895 
896 /// Version of parse() that converts the parsed value to the type T.
897 /// RootName describes the root object and is used in error messages.
898 template <typename T>
899 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
900   auto V = parse(JSON);
901   if (!V)
902     return V.takeError();
903   Path::Root R(RootName);
904   T Result;
905   if (fromJSON(*V, Result, R))
906     return std::move(Result);
907   return R.getError();
908 }
909 
910 /// json::OStream allows writing well-formed JSON without materializing
911 /// all structures as json::Value ahead of time.
912 /// It's faster, lower-level, and less safe than OS << json::Value.
913 /// It also allows emitting more constructs, such as comments.
914 ///
915 /// Only one "top-level" object can be written to a stream.
916 /// Simplest usage involves passing lambdas (Blocks) to fill in containers:
917 ///
918 ///   json::OStream J(OS);
919 ///   J.array([&]{
920 ///     for (const Event &E : Events)
921 ///       J.object([&] {
922 ///         J.attribute("timestamp", int64_t(E.Time));
923 ///         J.attributeArray("participants", [&] {
924 ///           for (const Participant &P : E.Participants)
925 ///             J.value(P.toString());
926 ///         });
927 ///       });
928 ///   });
929 ///
930 /// This would produce JSON like:
931 ///
932 ///   [
933 ///     {
934 ///       "timestamp": 19287398741,
935 ///       "participants": [
936 ///         "King Kong",
937 ///         "Miley Cyrus",
938 ///         "Cleopatra"
939 ///       ]
940 ///     },
941 ///     ...
942 ///   ]
943 ///
944 /// The lower level begin/end methods (arrayBegin()) are more flexible but
945 /// care must be taken to pair them correctly:
946 ///
947 ///   json::OStream J(OS);
948 //    J.arrayBegin();
949 ///   for (const Event &E : Events) {
950 ///     J.objectBegin();
951 ///     J.attribute("timestamp", int64_t(E.Time));
952 ///     J.attributeBegin("participants");
953 ///     for (const Participant &P : E.Participants)
954 ///       J.value(P.toString());
955 ///     J.attributeEnd();
956 ///     J.objectEnd();
957 ///   }
958 ///   J.arrayEnd();
959 ///
960 /// If the call sequence isn't valid JSON, asserts will fire in debug mode.
961 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside
962 /// an array, and so on.
963 /// With asserts disabled, this is undefined behavior.
964 class OStream {
965  public:
966   using Block = llvm::function_ref<void()>;
967   // If IndentSize is nonzero, output is pretty-printed.
968   explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
969       : OS(OS), IndentSize(IndentSize) {
970     Stack.emplace_back();
971   }
972   ~OStream() {
973     assert(Stack.size() == 1 && "Unmatched begin()/end()");
974     assert(Stack.back().Ctx == Singleton);
975     assert(Stack.back().HasValue && "Did not write top-level value");
976   }
977 
978   /// Flushes the underlying ostream. OStream does not buffer internally.
979   void flush() { OS.flush(); }
980 
981   // High level functions to output a value.
982   // Valid at top-level (exactly once), in an attribute value (exactly once),
983   // or in an array (any number of times).
984 
985   /// Emit a self-contained value (number, string, vector<string> etc).
986   void value(const Value &V);
987   /// Emit an array whose elements are emitted in the provided Block.
988   void array(Block Contents) {
989     arrayBegin();
990     Contents();
991     arrayEnd();
992   }
993   /// Emit an object whose elements are emitted in the provided Block.
994   void object(Block Contents) {
995     objectBegin();
996     Contents();
997     objectEnd();
998   }
999   /// Emit an externally-serialized value.
1000   /// The caller must write exactly one valid JSON value to the provided stream.
1001   /// No validation or formatting of this value occurs.
1002   void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
1003     rawValueBegin();
1004     Contents(OS);
1005     rawValueEnd();
1006   }
1007   void rawValue(llvm::StringRef Contents) {
1008     rawValue([&](raw_ostream &OS) { OS << Contents; });
1009   }
1010   /// Emit a JavaScript comment associated with the next printed value.
1011   /// The string must be valid until the next attribute or value is emitted.
1012   /// Comments are not part of standard JSON, and many parsers reject them!
1013   void comment(llvm::StringRef);
1014 
1015   // High level functions to output object attributes.
1016   // Valid only within an object (any number of times).
1017 
1018   /// Emit an attribute whose value is self-contained (number, vector<int> etc).
1019   void attribute(llvm::StringRef Key, const Value& Contents) {
1020     attributeImpl(Key, [&] { value(Contents); });
1021   }
1022   /// Emit an attribute whose value is an array with elements from the Block.
1023   void attributeArray(llvm::StringRef Key, Block Contents) {
1024     attributeImpl(Key, [&] { array(Contents); });
1025   }
1026   /// Emit an attribute whose value is an object with attributes from the Block.
1027   void attributeObject(llvm::StringRef Key, Block Contents) {
1028     attributeImpl(Key, [&] { object(Contents); });
1029   }
1030 
1031   // Low-level begin/end functions to output arrays, objects, and attributes.
1032   // Must be correctly paired. Allowed contexts are as above.
1033 
1034   void arrayBegin();
1035   void arrayEnd();
1036   void objectBegin();
1037   void objectEnd();
1038   void attributeBegin(llvm::StringRef Key);
1039   void attributeEnd();
1040   raw_ostream &rawValueBegin();
1041   void rawValueEnd();
1042 
1043 private:
1044   void attributeImpl(llvm::StringRef Key, Block Contents) {
1045     attributeBegin(Key);
1046     Contents();
1047     attributeEnd();
1048   }
1049 
1050   void valueBegin();
1051   void flushComment();
1052   void newline();
1053 
1054   enum Context {
1055     Singleton, // Top level, or object attribute.
1056     Array,
1057     Object,
1058     RawValue, // External code writing a value to OS directly.
1059   };
1060   struct State {
1061     Context Ctx = Singleton;
1062     bool HasValue = false;
1063   };
1064   llvm::SmallVector<State, 16> Stack; // Never empty.
1065   llvm::StringRef PendingComment;
1066   llvm::raw_ostream &OS;
1067   unsigned IndentSize;
1068   unsigned Indent = 0;
1069 };
1070 
1071 /// Serializes this Value to JSON, writing it to the provided stream.
1072 /// The formatting is compact (no extra whitespace) and deterministic.
1073 /// For pretty-printing, use the formatv() format_provider below.
1074 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
1075   OStream(OS).value(V);
1076   return OS;
1077 }
1078 } // namespace json
1079 
1080 /// Allow printing json::Value with formatv().
1081 /// The default style is basic/compact formatting, like operator<<.
1082 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
1083 template <> struct format_provider<llvm::json::Value> {
1084   static void format(const llvm::json::Value &, raw_ostream &, StringRef);
1085 };
1086 } // namespace llvm
1087 
1088 #endif
1089