1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file supports working with JSON data.
11 ///
12 /// It comprises:
13 ///
14 /// - classes which hold dynamically-typed parsed JSON structures
15 ///   These are value types that can be composed, inspected, and modified.
16 ///   See json::Value, and the related types json::Object and json::Array.
17 ///
18 /// - functions to parse JSON text into Values, and to serialize Values to text.
19 ///   See parse(), operator<<, and format_provider.
20 ///
21 /// - a convention and helpers for mapping between json::Value and user-defined
22 ///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
23 ///
24 /// - an output API json::OStream which can emit JSON without materializing
25 ///   all structures as json::Value.
26 ///
27 /// Typically, JSON data would be read from an external source, parsed into
28 /// a Value, and then converted into some native data structure before doing
29 /// real work on it. (And vice versa when writing).
30 ///
31 /// Other serialization mechanisms you may consider:
32 ///
33 /// - YAML is also text-based, and more human-readable than JSON. It's a more
34 ///   complex format and data model, and YAML parsers aren't ubiquitous.
35 ///   YAMLParser.h is a streaming parser suitable for parsing large documents
36 ///   (including JSON, as YAML is a superset). It can be awkward to use
37 ///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
38 ///   declarative than the toJSON/fromJSON conventions here.
39 ///
40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
41 ///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
42 ///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
43 ///
44 //===---------------------------------------------------------------------===//
45 
46 #ifndef LLVM_SUPPORT_JSON_H
47 #define LLVM_SUPPORT_JSON_H
48 
49 #include "llvm/ADT/DenseMap.h"
50 #include "llvm/ADT/STLFunctionalExtras.h"
51 #include "llvm/ADT/SmallVector.h"
52 #include "llvm/ADT/StringRef.h"
53 #include "llvm/Support/Compiler.h"
54 #include "llvm/Support/Error.h"
55 #include "llvm/Support/FormatVariadic.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include <cmath>
58 #include <map>
59 
60 namespace llvm {
61 namespace json {
62 
63 // === String encodings ===
64 //
65 // JSON strings are character sequences (not byte sequences like std::string).
66 // We need to know the encoding, and for simplicity only support UTF-8.
67 //
68 //   - When parsing, invalid UTF-8 is a syntax error like any other
69 //
70 //   - When creating Values from strings, callers must ensure they are UTF-8.
71 //        with asserts on, invalid UTF-8 will crash the program
72 //        with asserts off, we'll substitute the replacement character (U+FFFD)
73 //     Callers can use json::isUTF8() and json::fixUTF8() for validation.
74 //
75 //   - When retrieving strings from Values (e.g. asString()), the result will
76 //     always be valid UTF-8.
77 
78 template <typename T>
79 constexpr bool is_uint_64_bit_v =
80     std::is_integral_v<T> && std::is_unsigned_v<T> &&
81     sizeof(T) == sizeof(uint64_t);
82 
83 /// Returns true if \p S is valid UTF-8, which is required for use as JSON.
84 /// If it returns false, \p Offset is set to a byte offset near the first error.
85 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
86 /// Replaces invalid UTF-8 sequences in \p S with the replacement character
87 /// (U+FFFD). The returned string is valid UTF-8.
88 /// This is much slower than isUTF8, so test that first.
89 std::string fixUTF8(llvm::StringRef S);
90 
91 class Array;
92 class ObjectKey;
93 class Value;
94 template <typename T> Value toJSON(const std::optional<T> &Opt);
95 
96 /// An Object is a JSON object, which maps strings to heterogenous JSON values.
97 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
98 class Object {
99   using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
100   Storage M;
101 
102 public:
103   using key_type = ObjectKey;
104   using mapped_type = Value;
105   using value_type = Storage::value_type;
106   using iterator = Storage::iterator;
107   using const_iterator = Storage::const_iterator;
108 
109   Object() = default;
110   // KV is a trivial key-value struct for list-initialization.
111   // (using std::pair forces extra copies).
112   struct KV;
113   explicit Object(std::initializer_list<KV> Properties);
114 
begin()115   iterator begin() { return M.begin(); }
begin()116   const_iterator begin() const { return M.begin(); }
end()117   iterator end() { return M.end(); }
end()118   const_iterator end() const { return M.end(); }
119 
empty()120   bool empty() const { return M.empty(); }
size()121   size_t size() const { return M.size(); }
122 
clear()123   void clear() { M.clear(); }
124   std::pair<iterator, bool> insert(KV E);
125   template <typename... Ts>
try_emplace(const ObjectKey & K,Ts &&...Args)126   std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
127     return M.try_emplace(K, std::forward<Ts>(Args)...);
128   }
129   template <typename... Ts>
try_emplace(ObjectKey && K,Ts &&...Args)130   std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
131     return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
132   }
133   bool erase(StringRef K);
erase(iterator I)134   void erase(iterator I) { M.erase(I); }
135 
find(StringRef K)136   iterator find(StringRef K) { return M.find_as(K); }
find(StringRef K)137   const_iterator find(StringRef K) const { return M.find_as(K); }
138   // operator[] acts as if Value was default-constructible as null.
139   Value &operator[](const ObjectKey &K);
140   Value &operator[](ObjectKey &&K);
141   // Look up a property, returning nullptr if it doesn't exist.
142   Value *get(StringRef K);
143   const Value *get(StringRef K) const;
144   // Typed accessors return std::nullopt/nullptr if
145   //   - the property doesn't exist
146   //   - or it has the wrong type
147   std::optional<std::nullptr_t> getNull(StringRef K) const;
148   std::optional<bool> getBoolean(StringRef K) const;
149   std::optional<double> getNumber(StringRef K) const;
150   std::optional<int64_t> getInteger(StringRef K) const;
151   std::optional<llvm::StringRef> getString(StringRef K) const;
152   const json::Object *getObject(StringRef K) const;
153   json::Object *getObject(StringRef K);
154   const json::Array *getArray(StringRef K) const;
155   json::Array *getArray(StringRef K);
156 };
157 bool operator==(const Object &LHS, const Object &RHS);
158 inline bool operator!=(const Object &LHS, const Object &RHS) {
159   return !(LHS == RHS);
160 }
161 
162 /// An Array is a JSON array, which contains heterogeneous JSON values.
163 /// It simulates std::vector<Value>.
164 class Array {
165   std::vector<Value> V;
166 
167 public:
168   using value_type = Value;
169   using iterator = std::vector<Value>::iterator;
170   using const_iterator = std::vector<Value>::const_iterator;
171 
172   Array() = default;
173   explicit Array(std::initializer_list<Value> Elements);
Array(const Collection & C)174   template <typename Collection> explicit Array(const Collection &C) {
175     for (const auto &V : C)
176       emplace_back(V);
177   }
178 
179   Value &operator[](size_t I);
180   const Value &operator[](size_t I) const;
181   Value &front();
182   const Value &front() const;
183   Value &back();
184   const Value &back() const;
185   Value *data();
186   const Value *data() const;
187 
188   iterator begin();
189   const_iterator begin() const;
190   iterator end();
191   const_iterator end() const;
192 
193   bool empty() const;
194   size_t size() const;
195   void reserve(size_t S);
196 
197   void clear();
198   void push_back(const Value &E);
199   void push_back(Value &&E);
200   template <typename... Args> void emplace_back(Args &&...A);
201   void pop_back();
202   iterator insert(const_iterator P, const Value &E);
203   iterator insert(const_iterator P, Value &&E);
204   template <typename It> iterator insert(const_iterator P, It A, It Z);
205   template <typename... Args> iterator emplace(const_iterator P, Args &&...A);
206 
207   friend bool operator==(const Array &L, const Array &R);
208 };
209 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
210 
211 /// A Value is an JSON value of unknown type.
212 /// They can be copied, but should generally be moved.
213 ///
214 /// === Composing values ===
215 ///
216 /// You can implicitly construct Values from:
217 ///   - strings: std::string, SmallString, formatv, StringRef, char*
218 ///              (char*, and StringRef are references, not copies!)
219 ///   - numbers
220 ///   - booleans
221 ///   - null: nullptr
222 ///   - arrays: {"foo", 42.0, false}
223 ///   - serializable things: types with toJSON(const T&)->Value, found by ADL
224 ///
225 /// They can also be constructed from object/array helpers:
226 ///   - json::Object is a type like map<ObjectKey, Value>
227 ///   - json::Array is a type like vector<Value>
228 /// These can be list-initialized, or used to build up collections in a loop.
229 /// json::ary(Collection) converts all items in a collection to Values.
230 ///
231 /// === Inspecting values ===
232 ///
233 /// Each Value is one of the JSON kinds:
234 ///   null    (nullptr_t)
235 ///   boolean (bool)
236 ///   number  (double, int64 or uint64)
237 ///   string  (StringRef)
238 ///   array   (json::Array)
239 ///   object  (json::Object)
240 ///
241 /// The kind can be queried directly, or implicitly via the typed accessors:
242 ///   if (std::optional<StringRef> S = E.getAsString()
243 ///     assert(E.kind() == Value::String);
244 ///
245 /// Array and Object also have typed indexing accessors for easy traversal:
246 ///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
247 ///   if (Object* O = E->getAsObject())
248 ///     if (Object* Opts = O->getObject("options"))
249 ///       if (std::optional<StringRef> Font = Opts->getString("font"))
250 ///         assert(Opts->at("font").kind() == Value::String);
251 ///
252 /// === Converting JSON values to C++ types ===
253 ///
254 /// The convention is to have a deserializer function findable via ADL:
255 ///     fromJSON(const json::Value&, T&, Path) -> bool
256 ///
257 /// The return value indicates overall success, and Path is used for precise
258 /// error reporting. (The Path::Root passed in at the top level fromJSON call
259 /// captures any nested error and can render it in context).
260 /// If conversion fails, fromJSON calls Path::report() and immediately returns.
261 /// This ensures that the first fatal error survives.
262 ///
263 /// Deserializers are provided for:
264 ///   - bool
265 ///   - int and int64_t
266 ///   - double
267 ///   - std::string
268 ///   - vector<T>, where T is deserializable
269 ///   - map<string, T>, where T is deserializable
270 ///   - std::optional<T>, where T is deserializable
271 /// ObjectMapper can help writing fromJSON() functions for object types.
272 ///
273 /// For conversion in the other direction, the serializer function is:
274 ///    toJSON(const T&) -> json::Value
275 /// If this exists, then it also allows constructing Value from T, and can
276 /// be used to serialize vector<T>, map<string, T>, and std::optional<T>.
277 ///
278 /// === Serialization ===
279 ///
280 /// Values can be serialized to JSON:
281 ///   1) raw_ostream << Value                    // Basic formatting.
282 ///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
283 ///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
284 ///
285 /// And parsed:
286 ///   Expected<Value> E = json::parse("[1, 2, null]");
287 ///   assert(E && E->kind() == Value::Array);
288 class Value {
289 public:
290   enum Kind {
291     Null,
292     Boolean,
293     /// Number values can store both int64s and doubles at full precision,
294     /// depending on what they were constructed/parsed from.
295     Number,
296     String,
297     Array,
298     Object,
299   };
300 
301   // It would be nice to have Value() be null. But that would make {} null too.
Value(const Value & M)302   Value(const Value &M) { copyFrom(M); }
Value(Value && M)303   Value(Value &&M) { moveFrom(std::move(M)); }
304   Value(std::initializer_list<Value> Elements);
Value(json::Array && Elements)305   Value(json::Array &&Elements) : Type(T_Array) {
306     create<json::Array>(std::move(Elements));
307   }
308   template <typename Elt>
Value(const std::vector<Elt> & C)309   Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
Value(json::Object && Properties)310   Value(json::Object &&Properties) : Type(T_Object) {
311     create<json::Object>(std::move(Properties));
312   }
313   template <typename Elt>
Value(const std::map<std::string,Elt> & C)314   Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
315   // Strings: types with value semantics. Must be valid UTF-8.
Value(std::string V)316   Value(std::string V) : Type(T_String) {
317     if (LLVM_UNLIKELY(!isUTF8(V))) {
318       assert(false && "Invalid UTF-8 in value used as JSON");
319       V = fixUTF8(std::move(V));
320     }
321     create<std::string>(std::move(V));
322   }
Value(const llvm::SmallVectorImpl<char> & V)323   Value(const llvm::SmallVectorImpl<char> &V)
324       : Value(std::string(V.begin(), V.end())) {}
Value(const llvm::formatv_object_base & V)325   Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
326   // Strings: types with reference semantics. Must be valid UTF-8.
Value(StringRef V)327   Value(StringRef V) : Type(T_StringRef) {
328     create<llvm::StringRef>(V);
329     if (LLVM_UNLIKELY(!isUTF8(V))) {
330       assert(false && "Invalid UTF-8 in value used as JSON");
331       *this = Value(fixUTF8(V));
332     }
333   }
Value(const char * V)334   Value(const char *V) : Value(StringRef(V)) {}
Value(std::nullptr_t)335   Value(std::nullptr_t) : Type(T_Null) {}
336   // Boolean (disallow implicit conversions).
337   // (The last template parameter is a dummy to keep templates distinct.)
338   template <typename T, typename = std::enable_if_t<std::is_same_v<T, bool>>,
339             bool = false>
Value(T B)340   Value(T B) : Type(T_Boolean) {
341     create<bool>(B);
342   }
343 
344   // Unsigned 64-bit integers.
345   template <typename T, typename = std::enable_if_t<is_uint_64_bit_v<T>>>
Value(T V)346   Value(T V) : Type(T_UINT64) {
347     create<uint64_t>(uint64_t{V});
348   }
349 
350   // Integers (except boolean and uint64_t).
351   // Must be non-narrowing convertible to int64_t.
352   template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>,
353             typename = std::enable_if_t<!std::is_same_v<T, bool>>,
354             typename = std::enable_if_t<!is_uint_64_bit_v<T>>>
Value(T I)355   Value(T I) : Type(T_Integer) {
356     create<int64_t>(int64_t{I});
357   }
358   // Floating point. Must be non-narrowing convertible to double.
359   template <typename T,
360             typename = std::enable_if_t<std::is_floating_point_v<T>>,
361             double * = nullptr>
Value(T D)362   Value(T D) : Type(T_Double) {
363     create<double>(double{D});
364   }
365   // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
366   template <typename T,
367             typename = std::enable_if_t<
368                 std::is_same_v<Value, decltype(toJSON(*(const T *)nullptr))>>,
369             Value * = nullptr>
Value(const T & V)370   Value(const T &V) : Value(toJSON(V)) {}
371 
372   Value &operator=(const Value &M) {
373     destroy();
374     copyFrom(M);
375     return *this;
376   }
377   Value &operator=(Value &&M) {
378     destroy();
379     moveFrom(std::move(M));
380     return *this;
381   }
~Value()382   ~Value() { destroy(); }
383 
kind()384   Kind kind() const {
385     switch (Type) {
386     case T_Null:
387       return Null;
388     case T_Boolean:
389       return Boolean;
390     case T_Double:
391     case T_Integer:
392     case T_UINT64:
393       return Number;
394     case T_String:
395     case T_StringRef:
396       return String;
397     case T_Object:
398       return Object;
399     case T_Array:
400       return Array;
401     }
402     llvm_unreachable("Unknown kind");
403   }
404 
405   // Typed accessors return std::nullopt/nullptr if the Value is not of this
406   // type.
getAsNull()407   std::optional<std::nullptr_t> getAsNull() const {
408     if (LLVM_LIKELY(Type == T_Null))
409       return nullptr;
410     return std::nullopt;
411   }
getAsBoolean()412   std::optional<bool> getAsBoolean() const {
413     if (LLVM_LIKELY(Type == T_Boolean))
414       return as<bool>();
415     return std::nullopt;
416   }
getAsNumber()417   std::optional<double> getAsNumber() const {
418     if (LLVM_LIKELY(Type == T_Double))
419       return as<double>();
420     if (LLVM_LIKELY(Type == T_Integer))
421       return as<int64_t>();
422     if (LLVM_LIKELY(Type == T_UINT64))
423       return as<uint64_t>();
424     return std::nullopt;
425   }
426   // Succeeds if the Value is a Number, and exactly representable as int64_t.
getAsInteger()427   std::optional<int64_t> getAsInteger() const {
428     if (LLVM_LIKELY(Type == T_Integer))
429       return as<int64_t>();
430     if (LLVM_LIKELY(Type == T_UINT64)) {
431       uint64_t U = as<uint64_t>();
432       if (LLVM_LIKELY(U <= uint64_t(std::numeric_limits<int64_t>::max()))) {
433         return U;
434       }
435     }
436     if (LLVM_LIKELY(Type == T_Double)) {
437       double D = as<double>();
438       if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
439                       D >= double(std::numeric_limits<int64_t>::min()) &&
440                       D <= double(std::numeric_limits<int64_t>::max())))
441         return D;
442     }
443     return std::nullopt;
444   }
getAsUINT64()445   std::optional<uint64_t> getAsUINT64() const {
446     if (Type == T_UINT64)
447       return as<uint64_t>();
448     else if (Type == T_Integer) {
449       int64_t N = as<int64_t>();
450       if (N >= 0)
451         return as<uint64_t>();
452     }
453     return std::nullopt;
454   }
getAsString()455   std::optional<llvm::StringRef> getAsString() const {
456     if (Type == T_String)
457       return llvm::StringRef(as<std::string>());
458     if (LLVM_LIKELY(Type == T_StringRef))
459       return as<llvm::StringRef>();
460     return std::nullopt;
461   }
getAsObject()462   const json::Object *getAsObject() const {
463     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
464   }
getAsObject()465   json::Object *getAsObject() {
466     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
467   }
getAsArray()468   const json::Array *getAsArray() const {
469     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
470   }
getAsArray()471   json::Array *getAsArray() {
472     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
473   }
474 
475 private:
476   void destroy();
477   void copyFrom(const Value &M);
478   // We allow moving from *const* Values, by marking all members as mutable!
479   // This hack is needed to support initializer-list syntax efficiently.
480   // (std::initializer_list<T> is a container of const T).
481   void moveFrom(const Value &&M);
482   friend class Array;
483   friend class Object;
484 
create(U &&...V)485   template <typename T, typename... U> void create(U &&... V) {
486 #if LLVM_ADDRESS_SANITIZER_BUILD
487     // Unpoisoning to prevent overwriting poisoned object (e.g., annotated short
488     // string). Objects that have had their memory poisoned may cause an ASan
489     // error if their memory is reused without calling their destructor.
490     // Unpoisoning the memory prevents this error from occurring.
491     // FIXME: This is a temporary solution to prevent buildbots from failing.
492     //  The more appropriate approach would be to call the object's destructor
493     //  to unpoison memory. This would prevent any potential memory leaks (long
494     //  strings). Read for details:
495     //  https://github.com/llvm/llvm-project/pull/79065#discussion_r1462621761
496     __asan_unpoison_memory_region(&Union, sizeof(T));
497 #endif
498     new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
499   }
as()500   template <typename T> T &as() const {
501     // Using this two-step static_cast via void * instead of reinterpret_cast
502     // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
503     void *Storage = static_cast<void *>(&Union);
504     return *static_cast<T *>(Storage);
505   }
506 
507   friend class OStream;
508 
509   enum ValueType : char16_t {
510     T_Null,
511     T_Boolean,
512     T_Double,
513     T_Integer,
514     T_UINT64,
515     T_StringRef,
516     T_String,
517     T_Object,
518     T_Array,
519   };
520   // All members mutable, see moveFrom().
521   mutable ValueType Type;
522   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
523                                       llvm::StringRef, std::string, json::Array,
524                                       json::Object>
525       Union;
526   friend bool operator==(const Value &, const Value &);
527 };
528 
529 bool operator==(const Value &, const Value &);
530 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
531 
532 // Array Methods
533 inline Value &Array::operator[](size_t I) { return V[I]; }
534 inline const Value &Array::operator[](size_t I) const { return V[I]; }
front()535 inline Value &Array::front() { return V.front(); }
front()536 inline const Value &Array::front() const { return V.front(); }
back()537 inline Value &Array::back() { return V.back(); }
back()538 inline const Value &Array::back() const { return V.back(); }
data()539 inline Value *Array::data() { return V.data(); }
data()540 inline const Value *Array::data() const { return V.data(); }
541 
begin()542 inline typename Array::iterator Array::begin() { return V.begin(); }
begin()543 inline typename Array::const_iterator Array::begin() const { return V.begin(); }
end()544 inline typename Array::iterator Array::end() { return V.end(); }
end()545 inline typename Array::const_iterator Array::end() const { return V.end(); }
546 
empty()547 inline bool Array::empty() const { return V.empty(); }
size()548 inline size_t Array::size() const { return V.size(); }
reserve(size_t S)549 inline void Array::reserve(size_t S) { V.reserve(S); }
550 
clear()551 inline void Array::clear() { V.clear(); }
push_back(const Value & E)552 inline void Array::push_back(const Value &E) { V.push_back(E); }
push_back(Value && E)553 inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); }
emplace_back(Args &&...A)554 template <typename... Args> inline void Array::emplace_back(Args &&...A) {
555   V.emplace_back(std::forward<Args>(A)...);
556 }
pop_back()557 inline void Array::pop_back() { V.pop_back(); }
insert(const_iterator P,const Value & E)558 inline typename Array::iterator Array::insert(const_iterator P, const Value &E) {
559   return V.insert(P, E);
560 }
insert(const_iterator P,Value && E)561 inline typename Array::iterator Array::insert(const_iterator P, Value &&E) {
562   return V.insert(P, std::move(E));
563 }
564 template <typename It>
insert(const_iterator P,It A,It Z)565 inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) {
566   return V.insert(P, A, Z);
567 }
568 template <typename... Args>
emplace(const_iterator P,Args &&...A)569 inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) {
570   return V.emplace(P, std::forward<Args>(A)...);
571 }
572 inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
573 
574 /// ObjectKey is a used to capture keys in Object. Like Value but:
575 ///   - only strings are allowed
576 ///   - it's optimized for the string literal case (Owned == nullptr)
577 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
578 class ObjectKey {
579 public:
ObjectKey(const char * S)580   ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
ObjectKey(std::string S)581   ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
582     if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
583       assert(false && "Invalid UTF-8 in value used as JSON");
584       *Owned = fixUTF8(std::move(*Owned));
585     }
586     Data = *Owned;
587   }
ObjectKey(llvm::StringRef S)588   ObjectKey(llvm::StringRef S) : Data(S) {
589     if (LLVM_UNLIKELY(!isUTF8(Data))) {
590       assert(false && "Invalid UTF-8 in value used as JSON");
591       *this = ObjectKey(fixUTF8(S));
592     }
593   }
ObjectKey(const llvm::SmallVectorImpl<char> & V)594   ObjectKey(const llvm::SmallVectorImpl<char> &V)
595       : ObjectKey(std::string(V.begin(), V.end())) {}
ObjectKey(const llvm::formatv_object_base & V)596   ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
597 
ObjectKey(const ObjectKey & C)598   ObjectKey(const ObjectKey &C) { *this = C; }
ObjectKey(ObjectKey && C)599   ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
600   ObjectKey &operator=(const ObjectKey &C) {
601     if (C.Owned) {
602       Owned.reset(new std::string(*C.Owned));
603       Data = *Owned;
604     } else {
605       Data = C.Data;
606     }
607     return *this;
608   }
609   ObjectKey &operator=(ObjectKey &&) = default;
610 
StringRef()611   operator llvm::StringRef() const { return Data; }
str()612   std::string str() const { return Data.str(); }
613 
614 private:
615   // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
616   // could be 2 pointers at most.
617   std::unique_ptr<std::string> Owned;
618   llvm::StringRef Data;
619 };
620 
621 inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
622   return llvm::StringRef(L) == llvm::StringRef(R);
623 }
624 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
625   return !(L == R);
626 }
627 inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
628   return StringRef(L) < StringRef(R);
629 }
630 
631 struct Object::KV {
632   ObjectKey K;
633   Value V;
634 };
635 
Object(std::initializer_list<KV> Properties)636 inline Object::Object(std::initializer_list<KV> Properties) {
637   for (const auto &P : Properties) {
638     auto R = try_emplace(P.K, nullptr);
639     if (R.second)
640       R.first->getSecond().moveFrom(std::move(P.V));
641   }
642 }
insert(KV E)643 inline std::pair<Object::iterator, bool> Object::insert(KV E) {
644   return try_emplace(std::move(E.K), std::move(E.V));
645 }
erase(StringRef K)646 inline bool Object::erase(StringRef K) {
647   return M.erase(ObjectKey(K));
648 }
649 
650 /// A "cursor" marking a position within a Value.
651 /// The Value is a tree, and this is the path from the root to the current node.
652 /// This is used to associate errors with particular subobjects.
653 class Path {
654 public:
655   class Root;
656 
657   /// Records that the value at the current path is invalid.
658   /// Message is e.g. "expected number" and becomes part of the final error.
659   /// This overwrites any previously written error message in the root.
660   void report(llvm::StringLiteral Message);
661 
662   /// The root may be treated as a Path.
Path(Root & R)663   Path(Root &R) : Parent(nullptr), Seg(&R) {}
664   /// Derives a path for an array element: this[Index]
index(unsigned Index)665   Path index(unsigned Index) const { return Path(this, Segment(Index)); }
666   /// Derives a path for an object field: this.Field
field(StringRef Field)667   Path field(StringRef Field) const { return Path(this, Segment(Field)); }
668 
669 private:
670   /// One element in a JSON path: an object field (.foo) or array index [27].
671   /// Exception: the root Path encodes a pointer to the Path::Root.
672   class Segment {
673     uintptr_t Pointer;
674     unsigned Offset;
675 
676   public:
677     Segment() = default;
Segment(Root * R)678     Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
Segment(llvm::StringRef Field)679     Segment(llvm::StringRef Field)
680         : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
681           Offset(static_cast<unsigned>(Field.size())) {}
Segment(unsigned Index)682     Segment(unsigned Index) : Pointer(0), Offset(Index) {}
683 
isField()684     bool isField() const { return Pointer != 0; }
field()685     StringRef field() const {
686       return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
687     }
index()688     unsigned index() const { return Offset; }
root()689     Root *root() const { return reinterpret_cast<Root *>(Pointer); }
690   };
691 
692   const Path *Parent;
693   Segment Seg;
694 
Path(const Path * Parent,Segment S)695   Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
696 };
697 
698 /// The root is the trivial Path to the root value.
699 /// It also stores the latest reported error and the path where it occurred.
700 class Path::Root {
701   llvm::StringRef Name;
702   llvm::StringLiteral ErrorMessage;
703   std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
704 
705   friend void Path::report(llvm::StringLiteral Message);
706 
707 public:
Name(Name)708   Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
709   // No copy/move allowed as there are incoming pointers.
710   Root(Root &&) = delete;
711   Root &operator=(Root &&) = delete;
712   Root(const Root &) = delete;
713   Root &operator=(const Root &) = delete;
714 
715   /// Returns the last error reported, or else a generic error.
716   Error getError() const;
717   /// Print the root value with the error shown inline as a comment.
718   /// Unrelated parts of the value are elided for brevity, e.g.
719   ///   {
720   ///      "id": 42,
721   ///      "name": /* expected string */ null,
722   ///      "properties": { ... }
723   ///   }
724   void printErrorContext(const Value &, llvm::raw_ostream &) const;
725 };
726 
727 // Standard deserializers are provided for primitive types.
728 // See comments on Value.
fromJSON(const Value & E,std::string & Out,Path P)729 inline bool fromJSON(const Value &E, std::string &Out, Path P) {
730   if (auto S = E.getAsString()) {
731     Out = std::string(*S);
732     return true;
733   }
734   P.report("expected string");
735   return false;
736 }
fromJSON(const Value & E,int & Out,Path P)737 inline bool fromJSON(const Value &E, int &Out, Path P) {
738   if (auto S = E.getAsInteger()) {
739     Out = *S;
740     return true;
741   }
742   P.report("expected integer");
743   return false;
744 }
fromJSON(const Value & E,int64_t & Out,Path P)745 inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
746   if (auto S = E.getAsInteger()) {
747     Out = *S;
748     return true;
749   }
750   P.report("expected integer");
751   return false;
752 }
fromJSON(const Value & E,double & Out,Path P)753 inline bool fromJSON(const Value &E, double &Out, Path P) {
754   if (auto S = E.getAsNumber()) {
755     Out = *S;
756     return true;
757   }
758   P.report("expected number");
759   return false;
760 }
fromJSON(const Value & E,bool & Out,Path P)761 inline bool fromJSON(const Value &E, bool &Out, Path P) {
762   if (auto S = E.getAsBoolean()) {
763     Out = *S;
764     return true;
765   }
766   P.report("expected boolean");
767   return false;
768 }
fromJSON(const Value & E,uint64_t & Out,Path P)769 inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
770   if (auto S = E.getAsUINT64()) {
771     Out = *S;
772     return true;
773   }
774   P.report("expected uint64_t");
775   return false;
776 }
fromJSON(const Value & E,std::nullptr_t & Out,Path P)777 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
778   if (auto S = E.getAsNull()) {
779     Out = *S;
780     return true;
781   }
782   P.report("expected null");
783   return false;
784 }
785 template <typename T>
fromJSON(const Value & E,std::optional<T> & Out,Path P)786 bool fromJSON(const Value &E, std::optional<T> &Out, Path P) {
787   if (E.getAsNull()) {
788     Out = std::nullopt;
789     return true;
790   }
791   T Result = {};
792   if (!fromJSON(E, Result, P))
793     return false;
794   Out = std::move(Result);
795   return true;
796 }
797 template <typename T>
fromJSON(const Value & E,std::vector<T> & Out,Path P)798 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
799   if (auto *A = E.getAsArray()) {
800     Out.clear();
801     Out.resize(A->size());
802     for (size_t I = 0; I < A->size(); ++I)
803       if (!fromJSON((*A)[I], Out[I], P.index(I)))
804         return false;
805     return true;
806   }
807   P.report("expected array");
808   return false;
809 }
810 template <typename T>
fromJSON(const Value & E,std::map<std::string,T> & Out,Path P)811 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
812   if (auto *O = E.getAsObject()) {
813     Out.clear();
814     for (const auto &KV : *O)
815       if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
816                     P.field(KV.first)))
817         return false;
818     return true;
819   }
820   P.report("expected object");
821   return false;
822 }
823 
824 // Allow serialization of std::optional<T> for supported T.
toJSON(const std::optional<T> & Opt)825 template <typename T> Value toJSON(const std::optional<T> &Opt) {
826   return Opt ? Value(*Opt) : Value(nullptr);
827 }
828 
829 /// Helper for mapping JSON objects onto protocol structs.
830 ///
831 /// Example:
832 /// \code
833 ///   bool fromJSON(const Value &E, MyStruct &R, Path P) {
834 ///     ObjectMapper O(E, P);
835 ///     // When returning false, error details were already reported.
836 ///     return O && O.map("mandatory_field", R.MandatoryField) &&
837 ///         O.mapOptional("optional_field", R.OptionalField);
838 ///   }
839 /// \endcode
840 class ObjectMapper {
841 public:
842   /// If O is not an object, this mapper is invalid and an error is reported.
ObjectMapper(const Value & E,Path P)843   ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
844     if (!O)
845       P.report("expected object");
846   }
847 
848   /// True if the expression is an object.
849   /// Must be checked before calling map().
850   operator bool() const { return O; }
851 
852   /// Maps a property to a field.
853   /// If the property is missing or invalid, reports an error.
map(StringLiteral Prop,T & Out)854   template <typename T> bool map(StringLiteral Prop, T &Out) {
855     assert(*this && "Must check this is an object before calling map()");
856     if (const Value *E = O->get(Prop))
857       return fromJSON(*E, Out, P.field(Prop));
858     P.field(Prop).report("missing value");
859     return false;
860   }
861 
862   /// Maps a property to a field, if it exists.
863   /// If the property exists and is invalid, reports an error.
864   /// (Optional requires special handling, because missing keys are OK).
map(StringLiteral Prop,std::optional<T> & Out)865   template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) {
866     assert(*this && "Must check this is an object before calling map()");
867     if (const Value *E = O->get(Prop))
868       return fromJSON(*E, Out, P.field(Prop));
869     Out = std::nullopt;
870     return true;
871   }
872 
873   /// Maps a property to a field, if it exists.
874   /// If the property exists and is invalid, reports an error.
875   /// If the property does not exist, Out is unchanged.
mapOptional(StringLiteral Prop,T & Out)876   template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
877     assert(*this && "Must check this is an object before calling map()");
878     if (const Value *E = O->get(Prop))
879       return fromJSON(*E, Out, P.field(Prop));
880     return true;
881   }
882 
883 private:
884   const Object *O;
885   Path P;
886 };
887 
888 /// Parses the provided JSON source, or returns a ParseError.
889 /// The returned Value is self-contained and owns its strings (they do not refer
890 /// to the original source).
891 llvm::Expected<Value> parse(llvm::StringRef JSON);
892 
893 class ParseError : public llvm::ErrorInfo<ParseError> {
894   const char *Msg;
895   unsigned Line, Column, Offset;
896 
897 public:
898   static char ID;
ParseError(const char * Msg,unsigned Line,unsigned Column,unsigned Offset)899   ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
900       : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
log(llvm::raw_ostream & OS)901   void log(llvm::raw_ostream &OS) const override {
902     OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
903   }
convertToErrorCode()904   std::error_code convertToErrorCode() const override {
905     return llvm::inconvertibleErrorCode();
906   }
907 };
908 
909 /// Version of parse() that converts the parsed value to the type T.
910 /// RootName describes the root object and is used in error messages.
911 template <typename T>
912 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
913   auto V = parse(JSON);
914   if (!V)
915     return V.takeError();
916   Path::Root R(RootName);
917   T Result;
918   if (fromJSON(*V, Result, R))
919     return std::move(Result);
920   return R.getError();
921 }
922 
923 /// json::OStream allows writing well-formed JSON without materializing
924 /// all structures as json::Value ahead of time.
925 /// It's faster, lower-level, and less safe than OS << json::Value.
926 /// It also allows emitting more constructs, such as comments.
927 ///
928 /// Only one "top-level" object can be written to a stream.
929 /// Simplest usage involves passing lambdas (Blocks) to fill in containers:
930 ///
931 ///   json::OStream J(OS);
932 ///   J.array([&]{
933 ///     for (const Event &E : Events)
934 ///       J.object([&] {
935 ///         J.attribute("timestamp", int64_t(E.Time));
936 ///         J.attributeArray("participants", [&] {
937 ///           for (const Participant &P : E.Participants)
938 ///             J.value(P.toString());
939 ///         });
940 ///       });
941 ///   });
942 ///
943 /// This would produce JSON like:
944 ///
945 ///   [
946 ///     {
947 ///       "timestamp": 19287398741,
948 ///       "participants": [
949 ///         "King Kong",
950 ///         "Miley Cyrus",
951 ///         "Cleopatra"
952 ///       ]
953 ///     },
954 ///     ...
955 ///   ]
956 ///
957 /// The lower level begin/end methods (arrayBegin()) are more flexible but
958 /// care must be taken to pair them correctly:
959 ///
960 ///   json::OStream J(OS);
961 //    J.arrayBegin();
962 ///   for (const Event &E : Events) {
963 ///     J.objectBegin();
964 ///     J.attribute("timestamp", int64_t(E.Time));
965 ///     J.attributeBegin("participants");
966 ///     for (const Participant &P : E.Participants)
967 ///       J.value(P.toString());
968 ///     J.attributeEnd();
969 ///     J.objectEnd();
970 ///   }
971 ///   J.arrayEnd();
972 ///
973 /// If the call sequence isn't valid JSON, asserts will fire in debug mode.
974 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside
975 /// an array, and so on.
976 /// With asserts disabled, this is undefined behavior.
977 class OStream {
978  public:
979   using Block = llvm::function_ref<void()>;
980   // If IndentSize is nonzero, output is pretty-printed.
981   explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
OS(OS)982       : OS(OS), IndentSize(IndentSize) {
983     Stack.emplace_back();
984   }
~OStream()985   ~OStream() {
986     assert(Stack.size() == 1 && "Unmatched begin()/end()");
987     assert(Stack.back().Ctx == Singleton);
988     assert(Stack.back().HasValue && "Did not write top-level value");
989   }
990 
991   /// Flushes the underlying ostream. OStream does not buffer internally.
flush()992   void flush() { OS.flush(); }
993 
994   // High level functions to output a value.
995   // Valid at top-level (exactly once), in an attribute value (exactly once),
996   // or in an array (any number of times).
997 
998   /// Emit a self-contained value (number, string, vector<string> etc).
999   void value(const Value &V);
1000   /// Emit an array whose elements are emitted in the provided Block.
array(Block Contents)1001   void array(Block Contents) {
1002     arrayBegin();
1003     Contents();
1004     arrayEnd();
1005   }
1006   /// Emit an object whose elements are emitted in the provided Block.
object(Block Contents)1007   void object(Block Contents) {
1008     objectBegin();
1009     Contents();
1010     objectEnd();
1011   }
1012   /// Emit an externally-serialized value.
1013   /// The caller must write exactly one valid JSON value to the provided stream.
1014   /// No validation or formatting of this value occurs.
rawValue(llvm::function_ref<void (raw_ostream &)> Contents)1015   void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
1016     rawValueBegin();
1017     Contents(OS);
1018     rawValueEnd();
1019   }
rawValue(llvm::StringRef Contents)1020   void rawValue(llvm::StringRef Contents) {
1021     rawValue([&](raw_ostream &OS) { OS << Contents; });
1022   }
1023   /// Emit a JavaScript comment associated with the next printed value.
1024   /// The string must be valid until the next attribute or value is emitted.
1025   /// Comments are not part of standard JSON, and many parsers reject them!
1026   void comment(llvm::StringRef);
1027 
1028   // High level functions to output object attributes.
1029   // Valid only within an object (any number of times).
1030 
1031   /// Emit an attribute whose value is self-contained (number, vector<int> etc).
attribute(llvm::StringRef Key,const Value & Contents)1032   void attribute(llvm::StringRef Key, const Value& Contents) {
1033     attributeImpl(Key, [&] { value(Contents); });
1034   }
1035   /// Emit an attribute whose value is an array with elements from the Block.
attributeArray(llvm::StringRef Key,Block Contents)1036   void attributeArray(llvm::StringRef Key, Block Contents) {
1037     attributeImpl(Key, [&] { array(Contents); });
1038   }
1039   /// Emit an attribute whose value is an object with attributes from the Block.
attributeObject(llvm::StringRef Key,Block Contents)1040   void attributeObject(llvm::StringRef Key, Block Contents) {
1041     attributeImpl(Key, [&] { object(Contents); });
1042   }
1043 
1044   // Low-level begin/end functions to output arrays, objects, and attributes.
1045   // Must be correctly paired. Allowed contexts are as above.
1046 
1047   void arrayBegin();
1048   void arrayEnd();
1049   void objectBegin();
1050   void objectEnd();
1051   void attributeBegin(llvm::StringRef Key);
1052   void attributeEnd();
1053   raw_ostream &rawValueBegin();
1054   void rawValueEnd();
1055 
1056 private:
attributeImpl(llvm::StringRef Key,Block Contents)1057   void attributeImpl(llvm::StringRef Key, Block Contents) {
1058     attributeBegin(Key);
1059     Contents();
1060     attributeEnd();
1061   }
1062 
1063   void valueBegin();
1064   void flushComment();
1065   void newline();
1066 
1067   enum Context {
1068     Singleton, // Top level, or object attribute.
1069     Array,
1070     Object,
1071     RawValue, // External code writing a value to OS directly.
1072   };
1073   struct State {
1074     Context Ctx = Singleton;
1075     bool HasValue = false;
1076   };
1077   llvm::SmallVector<State, 16> Stack; // Never empty.
1078   llvm::StringRef PendingComment;
1079   llvm::raw_ostream &OS;
1080   unsigned IndentSize;
1081   unsigned Indent = 0;
1082 };
1083 
1084 /// Serializes this Value to JSON, writing it to the provided stream.
1085 /// The formatting is compact (no extra whitespace) and deterministic.
1086 /// For pretty-printing, use the formatv() format_provider below.
1087 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
1088   OStream(OS).value(V);
1089   return OS;
1090 }
1091 } // namespace json
1092 
1093 /// Allow printing json::Value with formatv().
1094 /// The default style is basic/compact formatting, like operator<<.
1095 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
1096 template <> struct format_provider<llvm::json::Value> {
1097   static void format(const llvm::json::Value &, raw_ostream &, StringRef);
1098 };
1099 } // namespace llvm
1100 
1101 #endif
1102