xref: /openbsd/gnu/llvm/llvm/include/llvm/Support/JSON.h (revision d415bd75)
1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file supports working with JSON data.
11 ///
12 /// It comprises:
13 ///
14 /// - classes which hold dynamically-typed parsed JSON structures
15 ///   These are value types that can be composed, inspected, and modified.
16 ///   See json::Value, and the related types json::Object and json::Array.
17 ///
18 /// - functions to parse JSON text into Values, and to serialize Values to text.
19 ///   See parse(), operator<<, and format_provider.
20 ///
21 /// - a convention and helpers for mapping between json::Value and user-defined
22 ///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
23 ///
24 /// - an output API json::OStream which can emit JSON without materializing
25 ///   all structures as json::Value.
26 ///
27 /// Typically, JSON data would be read from an external source, parsed into
28 /// a Value, and then converted into some native data structure before doing
29 /// real work on it. (And vice versa when writing).
30 ///
31 /// Other serialization mechanisms you may consider:
32 ///
33 /// - YAML is also text-based, and more human-readable than JSON. It's a more
34 ///   complex format and data model, and YAML parsers aren't ubiquitous.
35 ///   YAMLParser.h is a streaming parser suitable for parsing large documents
36 ///   (including JSON, as YAML is a superset). It can be awkward to use
37 ///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38 ///   declarative than the toJSON/fromJSON conventions here.
39 ///
40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41 ///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
42 ///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
43 ///
44 //===---------------------------------------------------------------------===//
45 
46 #ifndef LLVM_SUPPORT_JSON_H
47 #define LLVM_SUPPORT_JSON_H
48 
49 #include "llvm/ADT/DenseMap.h"
50 #include "llvm/ADT/SmallVector.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/ADT/STLFunctionalExtras.h"
53 #include "llvm/Support/Error.h"
54 #include "llvm/Support/FormatVariadic.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include <cmath>
57 #include <map>
58 
59 namespace llvm {
60 namespace json {
61 
62 // === String encodings ===
63 //
64 // JSON strings are character sequences (not byte sequences like std::string).
65 // We need to know the encoding, and for simplicity only support UTF-8.
66 //
67 //   - When parsing, invalid UTF-8 is a syntax error like any other
68 //
69 //   - When creating Values from strings, callers must ensure they are UTF-8.
70 //        with asserts on, invalid UTF-8 will crash the program
71 //        with asserts off, we'll substitute the replacement character (U+FFFD)
72 //     Callers can use json::isUTF8() and json::fixUTF8() for validation.
73 //
74 //   - When retrieving strings from Values (e.g. asString()), the result will
75 //     always be valid UTF-8.
76 
77 /// Returns true if \p S is valid UTF-8, which is required for use as JSON.
78 /// If it returns false, \p Offset is set to a byte offset near the first error.
79 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
80 /// Replaces invalid UTF-8 sequences in \p S with the replacement character
81 /// (U+FFFD). The returned string is valid UTF-8.
82 /// This is much slower than isUTF8, so test that first.
83 std::string fixUTF8(llvm::StringRef S);
84 
85 class Array;
86 class ObjectKey;
87 class Value;
88 template <typename T> Value toJSON(const std::optional<T> &Opt);
89 
90 /// An Object is a JSON object, which maps strings to heterogenous JSON values.
91 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
92 class Object {
93   using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
94   Storage M;
95 
96 public:
97   using key_type = ObjectKey;
98   using mapped_type = Value;
99   using value_type = Storage::value_type;
100   using iterator = Storage::iterator;
101   using const_iterator = Storage::const_iterator;
102 
103   Object() = default;
104   // KV is a trivial key-value struct for list-initialization.
105   // (using std::pair forces extra copies).
106   struct KV;
107   explicit Object(std::initializer_list<KV> Properties);
108 
begin()109   iterator begin() { return M.begin(); }
begin()110   const_iterator begin() const { return M.begin(); }
end()111   iterator end() { return M.end(); }
end()112   const_iterator end() const { return M.end(); }
113 
empty()114   bool empty() const { return M.empty(); }
size()115   size_t size() const { return M.size(); }
116 
clear()117   void clear() { M.clear(); }
118   std::pair<iterator, bool> insert(KV E);
119   template <typename... Ts>
try_emplace(const ObjectKey & K,Ts &&...Args)120   std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
121     return M.try_emplace(K, std::forward<Ts>(Args)...);
122   }
123   template <typename... Ts>
try_emplace(ObjectKey && K,Ts &&...Args)124   std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
125     return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
126   }
127   bool erase(StringRef K);
erase(iterator I)128   void erase(iterator I) { M.erase(I); }
129 
find(StringRef K)130   iterator find(StringRef K) { return M.find_as(K); }
find(StringRef K)131   const_iterator find(StringRef K) const { return M.find_as(K); }
132   // operator[] acts as if Value was default-constructible as null.
133   Value &operator[](const ObjectKey &K);
134   Value &operator[](ObjectKey &&K);
135   // Look up a property, returning nullptr if it doesn't exist.
136   Value *get(StringRef K);
137   const Value *get(StringRef K) const;
138   // Typed accessors return std::nullopt/nullptr if
139   //   - the property doesn't exist
140   //   - or it has the wrong type
141   std::optional<std::nullptr_t> getNull(StringRef K) const;
142   std::optional<bool> getBoolean(StringRef K) const;
143   std::optional<double> getNumber(StringRef K) const;
144   std::optional<int64_t> getInteger(StringRef K) const;
145   std::optional<llvm::StringRef> getString(StringRef K) const;
146   const json::Object *getObject(StringRef K) const;
147   json::Object *getObject(StringRef K);
148   const json::Array *getArray(StringRef K) const;
149   json::Array *getArray(StringRef K);
150 };
151 bool operator==(const Object &LHS, const Object &RHS);
152 inline bool operator!=(const Object &LHS, const Object &RHS) {
153   return !(LHS == RHS);
154 }
155 
156 /// An Array is a JSON array, which contains heterogeneous JSON values.
157 /// It simulates std::vector<Value>.
158 class Array {
159   std::vector<Value> V;
160 
161 public:
162   using value_type = Value;
163   using iterator = std::vector<Value>::iterator;
164   using const_iterator = std::vector<Value>::const_iterator;
165 
166   Array() = default;
167   explicit Array(std::initializer_list<Value> Elements);
Array(const Collection & C)168   template <typename Collection> explicit Array(const Collection &C) {
169     for (const auto &V : C)
170       emplace_back(V);
171   }
172 
173   Value &operator[](size_t I);
174   const Value &operator[](size_t I) const;
175   Value &front();
176   const Value &front() const;
177   Value &back();
178   const Value &back() const;
179   Value *data();
180   const Value *data() const;
181 
182   iterator begin();
183   const_iterator begin() const;
184   iterator end();
185   const_iterator end() const;
186 
187   bool empty() const;
188   size_t size() const;
189   void reserve(size_t S);
190 
191   void clear();
192   void push_back(const Value &E);
193   void push_back(Value &&E);
194   template <typename... Args> void emplace_back(Args &&...A);
195   void pop_back();
196   iterator insert(const_iterator P, const Value &E);
197   iterator insert(const_iterator P, Value &&E);
198   template <typename It> iterator insert(const_iterator P, It A, It Z);
199   template <typename... Args> iterator emplace(const_iterator P, Args &&...A);
200 
201   friend bool operator==(const Array &L, const Array &R);
202 };
203 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
204 
205 /// A Value is an JSON value of unknown type.
206 /// They can be copied, but should generally be moved.
207 ///
208 /// === Composing values ===
209 ///
210 /// You can implicitly construct Values from:
211 ///   - strings: std::string, SmallString, formatv, StringRef, char*
212 ///              (char*, and StringRef are references, not copies!)
213 ///   - numbers
214 ///   - booleans
215 ///   - null: nullptr
216 ///   - arrays: {"foo", 42.0, false}
217 ///   - serializable things: types with toJSON(const T&)->Value, found by ADL
218 ///
219 /// They can also be constructed from object/array helpers:
220 ///   - json::Object is a type like map<ObjectKey, Value>
221 ///   - json::Array is a type like vector<Value>
222 /// These can be list-initialized, or used to build up collections in a loop.
223 /// json::ary(Collection) converts all items in a collection to Values.
224 ///
225 /// === Inspecting values ===
226 ///
227 /// Each Value is one of the JSON kinds:
228 ///   null    (nullptr_t)
229 ///   boolean (bool)
230 ///   number  (double, int64 or uint64)
231 ///   string  (StringRef)
232 ///   array   (json::Array)
233 ///   object  (json::Object)
234 ///
235 /// The kind can be queried directly, or implicitly via the typed accessors:
236 ///   if (std::optional<StringRef> S = E.getAsString()
237 ///     assert(E.kind() == Value::String);
238 ///
239 /// Array and Object also have typed indexing accessors for easy traversal:
240 ///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
241 ///   if (Object* O = E->getAsObject())
242 ///     if (Object* Opts = O->getObject("options"))
243 ///       if (std::optional<StringRef> Font = Opts->getString("font"))
244 ///         assert(Opts->at("font").kind() == Value::String);
245 ///
246 /// === Converting JSON values to C++ types ===
247 ///
248 /// The convention is to have a deserializer function findable via ADL:
249 ///     fromJSON(const json::Value&, T&, Path) -> bool
250 ///
251 /// The return value indicates overall success, and Path is used for precise
252 /// error reporting. (The Path::Root passed in at the top level fromJSON call
253 /// captures any nested error and can render it in context).
254 /// If conversion fails, fromJSON calls Path::report() and immediately returns.
255 /// This ensures that the first fatal error survives.
256 ///
257 /// Deserializers are provided for:
258 ///   - bool
259 ///   - int and int64_t
260 ///   - double
261 ///   - std::string
262 ///   - vector<T>, where T is deserializable
263 ///   - map<string, T>, where T is deserializable
264 ///   - std::optional<T>, where T is deserializable
265 /// ObjectMapper can help writing fromJSON() functions for object types.
266 ///
267 /// For conversion in the other direction, the serializer function is:
268 ///    toJSON(const T&) -> json::Value
269 /// If this exists, then it also allows constructing Value from T, and can
270 /// be used to serialize vector<T>, map<string, T>, and std::optional<T>.
271 ///
272 /// === Serialization ===
273 ///
274 /// Values can be serialized to JSON:
275 ///   1) raw_ostream << Value                    // Basic formatting.
276 ///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
277 ///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
278 ///
279 /// And parsed:
280 ///   Expected<Value> E = json::parse("[1, 2, null]");
281 ///   assert(E && E->kind() == Value::Array);
282 class Value {
283 public:
284   enum Kind {
285     Null,
286     Boolean,
287     /// Number values can store both int64s and doubles at full precision,
288     /// depending on what they were constructed/parsed from.
289     Number,
290     String,
291     Array,
292     Object,
293   };
294 
295   // It would be nice to have Value() be null. But that would make {} null too.
Value(const Value & M)296   Value(const Value &M) { copyFrom(M); }
Value(Value && M)297   Value(Value &&M) { moveFrom(std::move(M)); }
298   Value(std::initializer_list<Value> Elements);
Value(json::Array && Elements)299   Value(json::Array &&Elements) : Type(T_Array) {
300     create<json::Array>(std::move(Elements));
301   }
302   template <typename Elt>
Value(const std::vector<Elt> & C)303   Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
Value(json::Object && Properties)304   Value(json::Object &&Properties) : Type(T_Object) {
305     create<json::Object>(std::move(Properties));
306   }
307   template <typename Elt>
Value(const std::map<std::string,Elt> & C)308   Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
309   // Strings: types with value semantics. Must be valid UTF-8.
Value(std::string V)310   Value(std::string V) : Type(T_String) {
311     if (LLVM_UNLIKELY(!isUTF8(V))) {
312       assert(false && "Invalid UTF-8 in value used as JSON");
313       V = fixUTF8(std::move(V));
314     }
315     create<std::string>(std::move(V));
316   }
Value(const llvm::SmallVectorImpl<char> & V)317   Value(const llvm::SmallVectorImpl<char> &V)
318       : Value(std::string(V.begin(), V.end())) {}
Value(const llvm::formatv_object_base & V)319   Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
320   // Strings: types with reference semantics. Must be valid UTF-8.
Value(StringRef V)321   Value(StringRef V) : Type(T_StringRef) {
322     create<llvm::StringRef>(V);
323     if (LLVM_UNLIKELY(!isUTF8(V))) {
324       assert(false && "Invalid UTF-8 in value used as JSON");
325       *this = Value(fixUTF8(V));
326     }
327   }
Value(const char * V)328   Value(const char *V) : Value(StringRef(V)) {}
Value(std::nullptr_t)329   Value(std::nullptr_t) : Type(T_Null) {}
330   // Boolean (disallow implicit conversions).
331   // (The last template parameter is a dummy to keep templates distinct.)
332   template <typename T,
333             typename = std::enable_if_t<std::is_same<T, bool>::value>,
334             bool = false>
Value(T B)335   Value(T B) : Type(T_Boolean) {
336     create<bool>(B);
337   }
338 
339   // Unsigned 64-bit long integers.
340   template <typename T,
341             typename = std::enable_if_t<std::is_same<T, uint64_t>::value>,
342             bool = false, bool = false>
Value(T V)343   Value(T V) : Type(T_UINT64) {
344     create<uint64_t>(uint64_t{V});
345   }
346 
347   // Integers (except boolean and uint64_t).
348   // Must be non-narrowing convertible to int64_t.
349   template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
350             typename = std::enable_if_t<!std::is_same<T, bool>::value>,
351             typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>>
Value(T I)352   Value(T I) : Type(T_Integer) {
353     create<int64_t>(int64_t{I});
354   }
355   // Floating point. Must be non-narrowing convertible to double.
356   template <typename T,
357             typename = std::enable_if_t<std::is_floating_point<T>::value>,
358             double * = nullptr>
Value(T D)359   Value(T D) : Type(T_Double) {
360     create<double>(double{D});
361   }
362   // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
363   template <typename T,
364             typename = std::enable_if_t<std::is_same<
365                 Value, decltype(toJSON(*(const T *)nullptr))>::value>,
366             Value * = nullptr>
Value(const T & V)367   Value(const T &V) : Value(toJSON(V)) {}
368 
369   Value &operator=(const Value &M) {
370     destroy();
371     copyFrom(M);
372     return *this;
373   }
374   Value &operator=(Value &&M) {
375     destroy();
376     moveFrom(std::move(M));
377     return *this;
378   }
~Value()379   ~Value() { destroy(); }
380 
kind()381   Kind kind() const {
382     switch (Type) {
383     case T_Null:
384       return Null;
385     case T_Boolean:
386       return Boolean;
387     case T_Double:
388     case T_Integer:
389     case T_UINT64:
390       return Number;
391     case T_String:
392     case T_StringRef:
393       return String;
394     case T_Object:
395       return Object;
396     case T_Array:
397       return Array;
398     }
399     llvm_unreachable("Unknown kind");
400   }
401 
402   // Typed accessors return std::nullopt/nullptr if the Value is not of this
403   // type.
getAsNull()404   std::optional<std::nullptr_t> getAsNull() const {
405     if (LLVM_LIKELY(Type == T_Null))
406       return nullptr;
407     return std::nullopt;
408   }
getAsBoolean()409   std::optional<bool> getAsBoolean() const {
410     if (LLVM_LIKELY(Type == T_Boolean))
411       return as<bool>();
412     return std::nullopt;
413   }
getAsNumber()414   std::optional<double> getAsNumber() const {
415     if (LLVM_LIKELY(Type == T_Double))
416       return as<double>();
417     if (LLVM_LIKELY(Type == T_Integer))
418       return as<int64_t>();
419     if (LLVM_LIKELY(Type == T_UINT64))
420       return as<uint64_t>();
421     return std::nullopt;
422   }
423   // Succeeds if the Value is a Number, and exactly representable as int64_t.
getAsInteger()424   std::optional<int64_t> getAsInteger() const {
425     if (LLVM_LIKELY(Type == T_Integer))
426       return as<int64_t>();
427     if (LLVM_LIKELY(Type == T_Double)) {
428       double D = as<double>();
429       if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
430                       D >= double(std::numeric_limits<int64_t>::min()) &&
431                       D <= double(std::numeric_limits<int64_t>::max())))
432         return D;
433     }
434     return std::nullopt;
435   }
getAsUINT64()436   std::optional<uint64_t> getAsUINT64() const {
437     if (Type == T_UINT64)
438       return as<uint64_t>();
439     else if (Type == T_Integer) {
440       int64_t N = as<int64_t>();
441       if (N >= 0)
442         return as<uint64_t>();
443     }
444     return std::nullopt;
445   }
getAsString()446   std::optional<llvm::StringRef> getAsString() const {
447     if (Type == T_String)
448       return llvm::StringRef(as<std::string>());
449     if (LLVM_LIKELY(Type == T_StringRef))
450       return as<llvm::StringRef>();
451     return std::nullopt;
452   }
getAsObject()453   const json::Object *getAsObject() const {
454     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
455   }
getAsObject()456   json::Object *getAsObject() {
457     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
458   }
getAsArray()459   const json::Array *getAsArray() const {
460     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
461   }
getAsArray()462   json::Array *getAsArray() {
463     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
464   }
465 
466 private:
467   void destroy();
468   void copyFrom(const Value &M);
469   // We allow moving from *const* Values, by marking all members as mutable!
470   // This hack is needed to support initializer-list syntax efficiently.
471   // (std::initializer_list<T> is a container of const T).
472   void moveFrom(const Value &&M);
473   friend class Array;
474   friend class Object;
475 
create(U &&...V)476   template <typename T, typename... U> void create(U &&... V) {
477     new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
478   }
as()479   template <typename T> T &as() const {
480     // Using this two-step static_cast via void * instead of reinterpret_cast
481     // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
482     void *Storage = static_cast<void *>(&Union);
483     return *static_cast<T *>(Storage);
484   }
485 
486   friend class OStream;
487 
488   enum ValueType : char16_t {
489     T_Null,
490     T_Boolean,
491     T_Double,
492     T_Integer,
493     T_UINT64,
494     T_StringRef,
495     T_String,
496     T_Object,
497     T_Array,
498   };
499   // All members mutable, see moveFrom().
500   mutable ValueType Type;
501   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
502                                       llvm::StringRef, std::string, json::Array,
503                                       json::Object>
504       Union;
505   friend bool operator==(const Value &, const Value &);
506 };
507 
508 bool operator==(const Value &, const Value &);
509 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
510 
511 // Array Methods
512 inline Value &Array::operator[](size_t I) { return V[I]; }
513 inline const Value &Array::operator[](size_t I) const { return V[I]; }
front()514 inline Value &Array::front() { return V.front(); }
front()515 inline const Value &Array::front() const { return V.front(); }
back()516 inline Value &Array::back() { return V.back(); }
back()517 inline const Value &Array::back() const { return V.back(); }
data()518 inline Value *Array::data() { return V.data(); }
data()519 inline const Value *Array::data() const { return V.data(); }
520 
begin()521 inline typename Array::iterator Array::begin() { return V.begin(); }
begin()522 inline typename Array::const_iterator Array::begin() const { return V.begin(); }
end()523 inline typename Array::iterator Array::end() { return V.end(); }
end()524 inline typename Array::const_iterator Array::end() const { return V.end(); }
525 
empty()526 inline bool Array::empty() const { return V.empty(); }
size()527 inline size_t Array::size() const { return V.size(); }
reserve(size_t S)528 inline void Array::reserve(size_t S) { V.reserve(S); }
529 
clear()530 inline void Array::clear() { V.clear(); }
push_back(const Value & E)531 inline void Array::push_back(const Value &E) { V.push_back(E); }
push_back(Value && E)532 inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); }
emplace_back(Args &&...A)533 template <typename... Args> inline void Array::emplace_back(Args &&...A) {
534   V.emplace_back(std::forward<Args>(A)...);
535 }
pop_back()536 inline void Array::pop_back() { V.pop_back(); }
insert(const_iterator P,const Value & E)537 inline typename Array::iterator Array::insert(const_iterator P, const Value &E) {
538   return V.insert(P, E);
539 }
insert(const_iterator P,Value && E)540 inline typename Array::iterator Array::insert(const_iterator P, Value &&E) {
541   return V.insert(P, std::move(E));
542 }
543 template <typename It>
insert(const_iterator P,It A,It Z)544 inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) {
545   return V.insert(P, A, Z);
546 }
547 template <typename... Args>
emplace(const_iterator P,Args &&...A)548 inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) {
549   return V.emplace(P, std::forward<Args>(A)...);
550 }
551 inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
552 
553 /// ObjectKey is a used to capture keys in Object. Like Value but:
554 ///   - only strings are allowed
555 ///   - it's optimized for the string literal case (Owned == nullptr)
556 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
557 class ObjectKey {
558 public:
ObjectKey(const char * S)559   ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
ObjectKey(std::string S)560   ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
561     if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
562       assert(false && "Invalid UTF-8 in value used as JSON");
563       *Owned = fixUTF8(std::move(*Owned));
564     }
565     Data = *Owned;
566   }
ObjectKey(llvm::StringRef S)567   ObjectKey(llvm::StringRef S) : Data(S) {
568     if (LLVM_UNLIKELY(!isUTF8(Data))) {
569       assert(false && "Invalid UTF-8 in value used as JSON");
570       *this = ObjectKey(fixUTF8(S));
571     }
572   }
ObjectKey(const llvm::SmallVectorImpl<char> & V)573   ObjectKey(const llvm::SmallVectorImpl<char> &V)
574       : ObjectKey(std::string(V.begin(), V.end())) {}
ObjectKey(const llvm::formatv_object_base & V)575   ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
576 
ObjectKey(const ObjectKey & C)577   ObjectKey(const ObjectKey &C) { *this = C; }
ObjectKey(ObjectKey && C)578   ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
579   ObjectKey &operator=(const ObjectKey &C) {
580     if (C.Owned) {
581       Owned.reset(new std::string(*C.Owned));
582       Data = *Owned;
583     } else {
584       Data = C.Data;
585     }
586     return *this;
587   }
588   ObjectKey &operator=(ObjectKey &&) = default;
589 
StringRef()590   operator llvm::StringRef() const { return Data; }
str()591   std::string str() const { return Data.str(); }
592 
593 private:
594   // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
595   // could be 2 pointers at most.
596   std::unique_ptr<std::string> Owned;
597   llvm::StringRef Data;
598 };
599 
600 inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
601   return llvm::StringRef(L) == llvm::StringRef(R);
602 }
603 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
604   return !(L == R);
605 }
606 inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
607   return StringRef(L) < StringRef(R);
608 }
609 
610 struct Object::KV {
611   ObjectKey K;
612   Value V;
613 };
614 
Object(std::initializer_list<KV> Properties)615 inline Object::Object(std::initializer_list<KV> Properties) {
616   for (const auto &P : Properties) {
617     auto R = try_emplace(P.K, nullptr);
618     if (R.second)
619       R.first->getSecond().moveFrom(std::move(P.V));
620   }
621 }
insert(KV E)622 inline std::pair<Object::iterator, bool> Object::insert(KV E) {
623   return try_emplace(std::move(E.K), std::move(E.V));
624 }
erase(StringRef K)625 inline bool Object::erase(StringRef K) {
626   return M.erase(ObjectKey(K));
627 }
628 
629 /// A "cursor" marking a position within a Value.
630 /// The Value is a tree, and this is the path from the root to the current node.
631 /// This is used to associate errors with particular subobjects.
632 class Path {
633 public:
634   class Root;
635 
636   /// Records that the value at the current path is invalid.
637   /// Message is e.g. "expected number" and becomes part of the final error.
638   /// This overwrites any previously written error message in the root.
639   void report(llvm::StringLiteral Message);
640 
641   /// The root may be treated as a Path.
Path(Root & R)642   Path(Root &R) : Parent(nullptr), Seg(&R) {}
643   /// Derives a path for an array element: this[Index]
index(unsigned Index)644   Path index(unsigned Index) const { return Path(this, Segment(Index)); }
645   /// Derives a path for an object field: this.Field
field(StringRef Field)646   Path field(StringRef Field) const { return Path(this, Segment(Field)); }
647 
648 private:
649   /// One element in a JSON path: an object field (.foo) or array index [27].
650   /// Exception: the root Path encodes a pointer to the Path::Root.
651   class Segment {
652     uintptr_t Pointer;
653     unsigned Offset;
654 
655   public:
656     Segment() = default;
Segment(Root * R)657     Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
Segment(llvm::StringRef Field)658     Segment(llvm::StringRef Field)
659         : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
660           Offset(static_cast<unsigned>(Field.size())) {}
Segment(unsigned Index)661     Segment(unsigned Index) : Pointer(0), Offset(Index) {}
662 
isField()663     bool isField() const { return Pointer != 0; }
field()664     StringRef field() const {
665       return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
666     }
index()667     unsigned index() const { return Offset; }
root()668     Root *root() const { return reinterpret_cast<Root *>(Pointer); }
669   };
670 
671   const Path *Parent;
672   Segment Seg;
673 
Path(const Path * Parent,Segment S)674   Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
675 };
676 
677 /// The root is the trivial Path to the root value.
678 /// It also stores the latest reported error and the path where it occurred.
679 class Path::Root {
680   llvm::StringRef Name;
681   llvm::StringLiteral ErrorMessage;
682   std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
683 
684   friend void Path::report(llvm::StringLiteral Message);
685 
686 public:
Name(Name)687   Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
688   // No copy/move allowed as there are incoming pointers.
689   Root(Root &&) = delete;
690   Root &operator=(Root &&) = delete;
691   Root(const Root &) = delete;
692   Root &operator=(const Root &) = delete;
693 
694   /// Returns the last error reported, or else a generic error.
695   Error getError() const;
696   /// Print the root value with the error shown inline as a comment.
697   /// Unrelated parts of the value are elided for brevity, e.g.
698   ///   {
699   ///      "id": 42,
700   ///      "name": /* expected string */ null,
701   ///      "properties": { ... }
702   ///   }
703   void printErrorContext(const Value &, llvm::raw_ostream &) const;
704 };
705 
706 // Standard deserializers are provided for primitive types.
707 // See comments on Value.
fromJSON(const Value & E,std::string & Out,Path P)708 inline bool fromJSON(const Value &E, std::string &Out, Path P) {
709   if (auto S = E.getAsString()) {
710     Out = std::string(*S);
711     return true;
712   }
713   P.report("expected string");
714   return false;
715 }
fromJSON(const Value & E,int & Out,Path P)716 inline bool fromJSON(const Value &E, int &Out, Path P) {
717   if (auto S = E.getAsInteger()) {
718     Out = *S;
719     return true;
720   }
721   P.report("expected integer");
722   return false;
723 }
fromJSON(const Value & E,int64_t & Out,Path P)724 inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
725   if (auto S = E.getAsInteger()) {
726     Out = *S;
727     return true;
728   }
729   P.report("expected integer");
730   return false;
731 }
fromJSON(const Value & E,double & Out,Path P)732 inline bool fromJSON(const Value &E, double &Out, Path P) {
733   if (auto S = E.getAsNumber()) {
734     Out = *S;
735     return true;
736   }
737   P.report("expected number");
738   return false;
739 }
fromJSON(const Value & E,bool & Out,Path P)740 inline bool fromJSON(const Value &E, bool &Out, Path P) {
741   if (auto S = E.getAsBoolean()) {
742     Out = *S;
743     return true;
744   }
745   P.report("expected boolean");
746   return false;
747 }
fromJSON(const Value & E,uint64_t & Out,Path P)748 inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
749   if (auto S = E.getAsUINT64()) {
750     Out = *S;
751     return true;
752   }
753   P.report("expected uint64_t");
754   return false;
755 }
fromJSON(const Value & E,std::nullptr_t & Out,Path P)756 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
757   if (auto S = E.getAsNull()) {
758     Out = *S;
759     return true;
760   }
761   P.report("expected null");
762   return false;
763 }
764 template <typename T>
fromJSON(const Value & E,std::optional<T> & Out,Path P)765 bool fromJSON(const Value &E, std::optional<T> &Out, Path P) {
766   if (E.getAsNull()) {
767     Out = std::nullopt;
768     return true;
769   }
770   T Result;
771   if (!fromJSON(E, Result, P))
772     return false;
773   Out = std::move(Result);
774   return true;
775 }
776 template <typename T>
fromJSON(const Value & E,std::vector<T> & Out,Path P)777 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
778   if (auto *A = E.getAsArray()) {
779     Out.clear();
780     Out.resize(A->size());
781     for (size_t I = 0; I < A->size(); ++I)
782       if (!fromJSON((*A)[I], Out[I], P.index(I)))
783         return false;
784     return true;
785   }
786   P.report("expected array");
787   return false;
788 }
789 template <typename T>
fromJSON(const Value & E,std::map<std::string,T> & Out,Path P)790 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
791   if (auto *O = E.getAsObject()) {
792     Out.clear();
793     for (const auto &KV : *O)
794       if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
795                     P.field(KV.first)))
796         return false;
797     return true;
798   }
799   P.report("expected object");
800   return false;
801 }
802 
803 // Allow serialization of std::optional<T> for supported T.
toJSON(const std::optional<T> & Opt)804 template <typename T> Value toJSON(const std::optional<T> &Opt) {
805   return Opt ? Value(*Opt) : Value(nullptr);
806 }
807 
808 /// Helper for mapping JSON objects onto protocol structs.
809 ///
810 /// Example:
811 /// \code
812 ///   bool fromJSON(const Value &E, MyStruct &R, Path P) {
813 ///     ObjectMapper O(E, P);
814 ///     // When returning false, error details were already reported.
815 ///     return O && O.map("mandatory_field", R.MandatoryField) &&
816 ///         O.mapOptional("optional_field", R.OptionalField);
817 ///   }
818 /// \endcode
819 class ObjectMapper {
820 public:
821   /// If O is not an object, this mapper is invalid and an error is reported.
ObjectMapper(const Value & E,Path P)822   ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
823     if (!O)
824       P.report("expected object");
825   }
826 
827   /// True if the expression is an object.
828   /// Must be checked before calling map().
829   operator bool() const { return O; }
830 
831   /// Maps a property to a field.
832   /// If the property is missing or invalid, reports an error.
map(StringLiteral Prop,T & Out)833   template <typename T> bool map(StringLiteral Prop, T &Out) {
834     assert(*this && "Must check this is an object before calling map()");
835     if (const Value *E = O->get(Prop))
836       return fromJSON(*E, Out, P.field(Prop));
837     P.field(Prop).report("missing value");
838     return false;
839   }
840 
841   /// Maps a property to a field, if it exists.
842   /// If the property exists and is invalid, reports an error.
843   /// (Optional requires special handling, because missing keys are OK).
map(StringLiteral Prop,std::optional<T> & Out)844   template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) {
845     assert(*this && "Must check this is an object before calling map()");
846     if (const Value *E = O->get(Prop))
847       return fromJSON(*E, Out, P.field(Prop));
848     Out = std::nullopt;
849     return true;
850   }
851 
852   /// Maps a property to a field, if it exists.
853   /// If the property exists and is invalid, reports an error.
854   /// If the property does not exist, Out is unchanged.
mapOptional(StringLiteral Prop,T & Out)855   template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
856     assert(*this && "Must check this is an object before calling map()");
857     if (const Value *E = O->get(Prop))
858       return fromJSON(*E, Out, P.field(Prop));
859     return true;
860   }
861 
862 private:
863   const Object *O;
864   Path P;
865 };
866 
867 /// Parses the provided JSON source, or returns a ParseError.
868 /// The returned Value is self-contained and owns its strings (they do not refer
869 /// to the original source).
870 llvm::Expected<Value> parse(llvm::StringRef JSON);
871 
872 class ParseError : public llvm::ErrorInfo<ParseError> {
873   const char *Msg;
874   unsigned Line, Column, Offset;
875 
876 public:
877   static char ID;
ParseError(const char * Msg,unsigned Line,unsigned Column,unsigned Offset)878   ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
879       : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
log(llvm::raw_ostream & OS)880   void log(llvm::raw_ostream &OS) const override {
881     OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
882   }
convertToErrorCode()883   std::error_code convertToErrorCode() const override {
884     return llvm::inconvertibleErrorCode();
885   }
886 };
887 
888 /// Version of parse() that converts the parsed value to the type T.
889 /// RootName describes the root object and is used in error messages.
890 template <typename T>
891 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
892   auto V = parse(JSON);
893   if (!V)
894     return V.takeError();
895   Path::Root R(RootName);
896   T Result;
897   if (fromJSON(*V, Result, R))
898     return std::move(Result);
899   return R.getError();
900 }
901 
902 /// json::OStream allows writing well-formed JSON without materializing
903 /// all structures as json::Value ahead of time.
904 /// It's faster, lower-level, and less safe than OS << json::Value.
905 /// It also allows emitting more constructs, such as comments.
906 ///
907 /// Only one "top-level" object can be written to a stream.
908 /// Simplest usage involves passing lambdas (Blocks) to fill in containers:
909 ///
910 ///   json::OStream J(OS);
911 ///   J.array([&]{
912 ///     for (const Event &E : Events)
913 ///       J.object([&] {
914 ///         J.attribute("timestamp", int64_t(E.Time));
915 ///         J.attributeArray("participants", [&] {
916 ///           for (const Participant &P : E.Participants)
917 ///             J.value(P.toString());
918 ///         });
919 ///       });
920 ///   });
921 ///
922 /// This would produce JSON like:
923 ///
924 ///   [
925 ///     {
926 ///       "timestamp": 19287398741,
927 ///       "participants": [
928 ///         "King Kong",
929 ///         "Miley Cyrus",
930 ///         "Cleopatra"
931 ///       ]
932 ///     },
933 ///     ...
934 ///   ]
935 ///
936 /// The lower level begin/end methods (arrayBegin()) are more flexible but
937 /// care must be taken to pair them correctly:
938 ///
939 ///   json::OStream J(OS);
940 //    J.arrayBegin();
941 ///   for (const Event &E : Events) {
942 ///     J.objectBegin();
943 ///     J.attribute("timestamp", int64_t(E.Time));
944 ///     J.attributeBegin("participants");
945 ///     for (const Participant &P : E.Participants)
946 ///       J.value(P.toString());
947 ///     J.attributeEnd();
948 ///     J.objectEnd();
949 ///   }
950 ///   J.arrayEnd();
951 ///
952 /// If the call sequence isn't valid JSON, asserts will fire in debug mode.
953 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside
954 /// an array, and so on.
955 /// With asserts disabled, this is undefined behavior.
956 class OStream {
957  public:
958   using Block = llvm::function_ref<void()>;
959   // If IndentSize is nonzero, output is pretty-printed.
960   explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
OS(OS)961       : OS(OS), IndentSize(IndentSize) {
962     Stack.emplace_back();
963   }
~OStream()964   ~OStream() {
965     assert(Stack.size() == 1 && "Unmatched begin()/end()");
966     assert(Stack.back().Ctx == Singleton);
967     assert(Stack.back().HasValue && "Did not write top-level value");
968   }
969 
970   /// Flushes the underlying ostream. OStream does not buffer internally.
flush()971   void flush() { OS.flush(); }
972 
973   // High level functions to output a value.
974   // Valid at top-level (exactly once), in an attribute value (exactly once),
975   // or in an array (any number of times).
976 
977   /// Emit a self-contained value (number, string, vector<string> etc).
978   void value(const Value &V);
979   /// Emit an array whose elements are emitted in the provided Block.
array(Block Contents)980   void array(Block Contents) {
981     arrayBegin();
982     Contents();
983     arrayEnd();
984   }
985   /// Emit an object whose elements are emitted in the provided Block.
object(Block Contents)986   void object(Block Contents) {
987     objectBegin();
988     Contents();
989     objectEnd();
990   }
991   /// Emit an externally-serialized value.
992   /// The caller must write exactly one valid JSON value to the provided stream.
993   /// No validation or formatting of this value occurs.
rawValue(llvm::function_ref<void (raw_ostream &)> Contents)994   void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
995     rawValueBegin();
996     Contents(OS);
997     rawValueEnd();
998   }
rawValue(llvm::StringRef Contents)999   void rawValue(llvm::StringRef Contents) {
1000     rawValue([&](raw_ostream &OS) { OS << Contents; });
1001   }
1002   /// Emit a JavaScript comment associated with the next printed value.
1003   /// The string must be valid until the next attribute or value is emitted.
1004   /// Comments are not part of standard JSON, and many parsers reject them!
1005   void comment(llvm::StringRef);
1006 
1007   // High level functions to output object attributes.
1008   // Valid only within an object (any number of times).
1009 
1010   /// Emit an attribute whose value is self-contained (number, vector<int> etc).
attribute(llvm::StringRef Key,const Value & Contents)1011   void attribute(llvm::StringRef Key, const Value& Contents) {
1012     attributeImpl(Key, [&] { value(Contents); });
1013   }
1014   /// Emit an attribute whose value is an array with elements from the Block.
attributeArray(llvm::StringRef Key,Block Contents)1015   void attributeArray(llvm::StringRef Key, Block Contents) {
1016     attributeImpl(Key, [&] { array(Contents); });
1017   }
1018   /// Emit an attribute whose value is an object with attributes from the Block.
attributeObject(llvm::StringRef Key,Block Contents)1019   void attributeObject(llvm::StringRef Key, Block Contents) {
1020     attributeImpl(Key, [&] { object(Contents); });
1021   }
1022 
1023   // Low-level begin/end functions to output arrays, objects, and attributes.
1024   // Must be correctly paired. Allowed contexts are as above.
1025 
1026   void arrayBegin();
1027   void arrayEnd();
1028   void objectBegin();
1029   void objectEnd();
1030   void attributeBegin(llvm::StringRef Key);
1031   void attributeEnd();
1032   raw_ostream &rawValueBegin();
1033   void rawValueEnd();
1034 
1035 private:
attributeImpl(llvm::StringRef Key,Block Contents)1036   void attributeImpl(llvm::StringRef Key, Block Contents) {
1037     attributeBegin(Key);
1038     Contents();
1039     attributeEnd();
1040   }
1041 
1042   void valueBegin();
1043   void flushComment();
1044   void newline();
1045 
1046   enum Context {
1047     Singleton, // Top level, or object attribute.
1048     Array,
1049     Object,
1050     RawValue, // External code writing a value to OS directly.
1051   };
1052   struct State {
1053     Context Ctx = Singleton;
1054     bool HasValue = false;
1055   };
1056   llvm::SmallVector<State, 16> Stack; // Never empty.
1057   llvm::StringRef PendingComment;
1058   llvm::raw_ostream &OS;
1059   unsigned IndentSize;
1060   unsigned Indent = 0;
1061 };
1062 
1063 /// Serializes this Value to JSON, writing it to the provided stream.
1064 /// The formatting is compact (no extra whitespace) and deterministic.
1065 /// For pretty-printing, use the formatv() format_provider below.
1066 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
1067   OStream(OS).value(V);
1068   return OS;
1069 }
1070 } // namespace json
1071 
1072 /// Allow printing json::Value with formatv().
1073 /// The default style is basic/compact formatting, like operator<<.
1074 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
1075 template <> struct format_provider<llvm::json::Value> {
1076   static void format(const llvm::json::Value &, raw_ostream &, StringRef);
1077 };
1078 } // namespace llvm
1079 
1080 #endif
1081