1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file supports working with JSON data. 11 /// 12 /// It comprises: 13 /// 14 /// - classes which hold dynamically-typed parsed JSON structures 15 /// These are value types that can be composed, inspected, and modified. 16 /// See json::Value, and the related types json::Object and json::Array. 17 /// 18 /// - functions to parse JSON text into Values, and to serialize Values to text. 19 /// See parse(), operator<<, and format_provider. 20 /// 21 /// - a convention and helpers for mapping between json::Value and user-defined 22 /// types. See fromJSON(), ObjectMapper, and the class comment on Value. 23 /// 24 /// - an output API json::OStream which can emit JSON without materializing 25 /// all structures as json::Value. 26 /// 27 /// Typically, JSON data would be read from an external source, parsed into 28 /// a Value, and then converted into some native data structure before doing 29 /// real work on it. (And vice versa when writing). 30 /// 31 /// Other serialization mechanisms you may consider: 32 /// 33 /// - YAML is also text-based, and more human-readable than JSON. It's a more 34 /// complex format and data model, and YAML parsers aren't ubiquitous. 35 /// YAMLParser.h is a streaming parser suitable for parsing large documents 36 /// (including JSON, as YAML is a superset). It can be awkward to use 37 /// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more 38 /// declarative than the toJSON/fromJSON conventions here. 39 /// 40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it 41 /// encodes LLVM IR ("bitcode"), but it can be a container for other data. 42 /// Low-level reader/writer libraries are in Bitstream/Bitstream*.h 43 /// 44 //===---------------------------------------------------------------------===// 45 46 #ifndef LLVM_SUPPORT_JSON_H 47 #define LLVM_SUPPORT_JSON_H 48 49 #include "llvm/ADT/DenseMap.h" 50 #include "llvm/ADT/STLFunctionalExtras.h" 51 #include "llvm/ADT/SmallVector.h" 52 #include "llvm/ADT/StringRef.h" 53 #include "llvm/Support/Compiler.h" 54 #include "llvm/Support/Error.h" 55 #include "llvm/Support/FormatVariadic.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <cmath> 58 #include <map> 59 60 namespace llvm { 61 namespace json { 62 63 // === String encodings === 64 // 65 // JSON strings are character sequences (not byte sequences like std::string). 66 // We need to know the encoding, and for simplicity only support UTF-8. 67 // 68 // - When parsing, invalid UTF-8 is a syntax error like any other 69 // 70 // - When creating Values from strings, callers must ensure they are UTF-8. 71 // with asserts on, invalid UTF-8 will crash the program 72 // with asserts off, we'll substitute the replacement character (U+FFFD) 73 // Callers can use json::isUTF8() and json::fixUTF8() for validation. 74 // 75 // - When retrieving strings from Values (e.g. asString()), the result will 76 // always be valid UTF-8. 77 78 template <typename T> 79 constexpr bool is_uint_64_bit_v = 80 std::is_integral_v<T> && std::is_unsigned_v<T> && 81 sizeof(T) == sizeof(uint64_t); 82 83 /// Returns true if \p S is valid UTF-8, which is required for use as JSON. 84 /// If it returns false, \p Offset is set to a byte offset near the first error. 85 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr); 86 /// Replaces invalid UTF-8 sequences in \p S with the replacement character 87 /// (U+FFFD). The returned string is valid UTF-8. 88 /// This is much slower than isUTF8, so test that first. 89 std::string fixUTF8(llvm::StringRef S); 90 91 class Array; 92 class ObjectKey; 93 class Value; 94 template <typename T> Value toJSON(const std::optional<T> &Opt); 95 96 /// An Object is a JSON object, which maps strings to heterogenous JSON values. 97 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string. 98 class Object { 99 using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>; 100 Storage M; 101 102 public: 103 using key_type = ObjectKey; 104 using mapped_type = Value; 105 using value_type = Storage::value_type; 106 using iterator = Storage::iterator; 107 using const_iterator = Storage::const_iterator; 108 109 Object() = default; 110 // KV is a trivial key-value struct for list-initialization. 111 // (using std::pair forces extra copies). 112 struct KV; 113 explicit Object(std::initializer_list<KV> Properties); 114 115 iterator begin() { return M.begin(); } 116 const_iterator begin() const { return M.begin(); } 117 iterator end() { return M.end(); } 118 const_iterator end() const { return M.end(); } 119 120 bool empty() const { return M.empty(); } 121 size_t size() const { return M.size(); } 122 123 void clear() { M.clear(); } 124 std::pair<iterator, bool> insert(KV E); 125 template <typename... Ts> 126 std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) { 127 return M.try_emplace(K, std::forward<Ts>(Args)...); 128 } 129 template <typename... Ts> 130 std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) { 131 return M.try_emplace(std::move(K), std::forward<Ts>(Args)...); 132 } 133 bool erase(StringRef K); 134 void erase(iterator I) { M.erase(I); } 135 136 iterator find(StringRef K) { return M.find_as(K); } 137 const_iterator find(StringRef K) const { return M.find_as(K); } 138 // operator[] acts as if Value was default-constructible as null. 139 Value &operator[](const ObjectKey &K); 140 Value &operator[](ObjectKey &&K); 141 // Look up a property, returning nullptr if it doesn't exist. 142 Value *get(StringRef K); 143 const Value *get(StringRef K) const; 144 // Typed accessors return std::nullopt/nullptr if 145 // - the property doesn't exist 146 // - or it has the wrong type 147 std::optional<std::nullptr_t> getNull(StringRef K) const; 148 std::optional<bool> getBoolean(StringRef K) const; 149 std::optional<double> getNumber(StringRef K) const; 150 std::optional<int64_t> getInteger(StringRef K) const; 151 std::optional<llvm::StringRef> getString(StringRef K) const; 152 const json::Object *getObject(StringRef K) const; 153 json::Object *getObject(StringRef K); 154 const json::Array *getArray(StringRef K) const; 155 json::Array *getArray(StringRef K); 156 }; 157 bool operator==(const Object &LHS, const Object &RHS); 158 inline bool operator!=(const Object &LHS, const Object &RHS) { 159 return !(LHS == RHS); 160 } 161 162 /// An Array is a JSON array, which contains heterogeneous JSON values. 163 /// It simulates std::vector<Value>. 164 class Array { 165 std::vector<Value> V; 166 167 public: 168 using value_type = Value; 169 using iterator = std::vector<Value>::iterator; 170 using const_iterator = std::vector<Value>::const_iterator; 171 172 Array() = default; 173 explicit Array(std::initializer_list<Value> Elements); 174 template <typename Collection> explicit Array(const Collection &C) { 175 for (const auto &V : C) 176 emplace_back(V); 177 } 178 179 Value &operator[](size_t I); 180 const Value &operator[](size_t I) const; 181 Value &front(); 182 const Value &front() const; 183 Value &back(); 184 const Value &back() const; 185 Value *data(); 186 const Value *data() const; 187 188 iterator begin(); 189 const_iterator begin() const; 190 iterator end(); 191 const_iterator end() const; 192 193 bool empty() const; 194 size_t size() const; 195 void reserve(size_t S); 196 197 void clear(); 198 void push_back(const Value &E); 199 void push_back(Value &&E); 200 template <typename... Args> void emplace_back(Args &&...A); 201 void pop_back(); 202 iterator insert(const_iterator P, const Value &E); 203 iterator insert(const_iterator P, Value &&E); 204 template <typename It> iterator insert(const_iterator P, It A, It Z); 205 template <typename... Args> iterator emplace(const_iterator P, Args &&...A); 206 207 friend bool operator==(const Array &L, const Array &R); 208 }; 209 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } 210 211 /// A Value is an JSON value of unknown type. 212 /// They can be copied, but should generally be moved. 213 /// 214 /// === Composing values === 215 /// 216 /// You can implicitly construct Values from: 217 /// - strings: std::string, SmallString, formatv, StringRef, char* 218 /// (char*, and StringRef are references, not copies!) 219 /// - numbers 220 /// - booleans 221 /// - null: nullptr 222 /// - arrays: {"foo", 42.0, false} 223 /// - serializable things: types with toJSON(const T&)->Value, found by ADL 224 /// 225 /// They can also be constructed from object/array helpers: 226 /// - json::Object is a type like map<ObjectKey, Value> 227 /// - json::Array is a type like vector<Value> 228 /// These can be list-initialized, or used to build up collections in a loop. 229 /// json::ary(Collection) converts all items in a collection to Values. 230 /// 231 /// === Inspecting values === 232 /// 233 /// Each Value is one of the JSON kinds: 234 /// null (nullptr_t) 235 /// boolean (bool) 236 /// number (double, int64 or uint64) 237 /// string (StringRef) 238 /// array (json::Array) 239 /// object (json::Object) 240 /// 241 /// The kind can be queried directly, or implicitly via the typed accessors: 242 /// if (std::optional<StringRef> S = E.getAsString() 243 /// assert(E.kind() == Value::String); 244 /// 245 /// Array and Object also have typed indexing accessors for easy traversal: 246 /// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )"); 247 /// if (Object* O = E->getAsObject()) 248 /// if (Object* Opts = O->getObject("options")) 249 /// if (std::optional<StringRef> Font = Opts->getString("font")) 250 /// assert(Opts->at("font").kind() == Value::String); 251 /// 252 /// === Converting JSON values to C++ types === 253 /// 254 /// The convention is to have a deserializer function findable via ADL: 255 /// fromJSON(const json::Value&, T&, Path) -> bool 256 /// 257 /// The return value indicates overall success, and Path is used for precise 258 /// error reporting. (The Path::Root passed in at the top level fromJSON call 259 /// captures any nested error and can render it in context). 260 /// If conversion fails, fromJSON calls Path::report() and immediately returns. 261 /// This ensures that the first fatal error survives. 262 /// 263 /// Deserializers are provided for: 264 /// - bool 265 /// - int and int64_t 266 /// - double 267 /// - std::string 268 /// - vector<T>, where T is deserializable 269 /// - map<string, T>, where T is deserializable 270 /// - std::optional<T>, where T is deserializable 271 /// ObjectMapper can help writing fromJSON() functions for object types. 272 /// 273 /// For conversion in the other direction, the serializer function is: 274 /// toJSON(const T&) -> json::Value 275 /// If this exists, then it also allows constructing Value from T, and can 276 /// be used to serialize vector<T>, map<string, T>, and std::optional<T>. 277 /// 278 /// === Serialization === 279 /// 280 /// Values can be serialized to JSON: 281 /// 1) raw_ostream << Value // Basic formatting. 282 /// 2) raw_ostream << formatv("{0}", Value) // Basic formatting. 283 /// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2. 284 /// 285 /// And parsed: 286 /// Expected<Value> E = json::parse("[1, 2, null]"); 287 /// assert(E && E->kind() == Value::Array); 288 class Value { 289 public: 290 enum Kind { 291 Null, 292 Boolean, 293 /// Number values can store both int64s and doubles at full precision, 294 /// depending on what they were constructed/parsed from. 295 Number, 296 String, 297 Array, 298 Object, 299 }; 300 301 // It would be nice to have Value() be null. But that would make {} null too. 302 Value(const Value &M) { copyFrom(M); } 303 Value(Value &&M) { moveFrom(std::move(M)); } 304 Value(std::initializer_list<Value> Elements); 305 Value(json::Array &&Elements) : Type(T_Array) { 306 create<json::Array>(std::move(Elements)); 307 } 308 template <typename Elt> 309 Value(const std::vector<Elt> &C) : Value(json::Array(C)) {} 310 Value(json::Object &&Properties) : Type(T_Object) { 311 create<json::Object>(std::move(Properties)); 312 } 313 template <typename Elt> 314 Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {} 315 // Strings: types with value semantics. Must be valid UTF-8. 316 Value(std::string V) : Type(T_String) { 317 if (LLVM_UNLIKELY(!isUTF8(V))) { 318 assert(false && "Invalid UTF-8 in value used as JSON"); 319 V = fixUTF8(std::move(V)); 320 } 321 create<std::string>(std::move(V)); 322 } 323 Value(const llvm::SmallVectorImpl<char> &V) 324 : Value(std::string(V.begin(), V.end())) {} 325 Value(const llvm::formatv_object_base &V) : Value(V.str()) {} 326 // Strings: types with reference semantics. Must be valid UTF-8. 327 Value(StringRef V) : Type(T_StringRef) { 328 create<llvm::StringRef>(V); 329 if (LLVM_UNLIKELY(!isUTF8(V))) { 330 assert(false && "Invalid UTF-8 in value used as JSON"); 331 *this = Value(fixUTF8(V)); 332 } 333 } 334 Value(const char *V) : Value(StringRef(V)) {} 335 Value(std::nullptr_t) : Type(T_Null) {} 336 // Boolean (disallow implicit conversions). 337 // (The last template parameter is a dummy to keep templates distinct.) 338 template <typename T, typename = std::enable_if_t<std::is_same_v<T, bool>>, 339 bool = false> 340 Value(T B) : Type(T_Boolean) { 341 create<bool>(B); 342 } 343 344 // Unsigned 64-bit integers. 345 template <typename T, typename = std::enable_if_t<is_uint_64_bit_v<T>>> 346 Value(T V) : Type(T_UINT64) { 347 create<uint64_t>(uint64_t{V}); 348 } 349 350 // Integers (except boolean and uint64_t). 351 // Must be non-narrowing convertible to int64_t. 352 template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>, 353 typename = std::enable_if_t<!std::is_same_v<T, bool>>, 354 typename = std::enable_if_t<!is_uint_64_bit_v<T>>> 355 Value(T I) : Type(T_Integer) { 356 create<int64_t>(int64_t{I}); 357 } 358 // Floating point. Must be non-narrowing convertible to double. 359 template <typename T, 360 typename = std::enable_if_t<std::is_floating_point_v<T>>, 361 double * = nullptr> 362 Value(T D) : Type(T_Double) { 363 create<double>(double{D}); 364 } 365 // Serializable types: with a toJSON(const T&)->Value function, found by ADL. 366 template <typename T, 367 typename = std::enable_if_t< 368 std::is_same_v<Value, decltype(toJSON(*(const T *)nullptr))>>, 369 Value * = nullptr> 370 Value(const T &V) : Value(toJSON(V)) {} 371 372 Value &operator=(const Value &M) { 373 destroy(); 374 copyFrom(M); 375 return *this; 376 } 377 Value &operator=(Value &&M) { 378 destroy(); 379 moveFrom(std::move(M)); 380 return *this; 381 } 382 ~Value() { destroy(); } 383 384 Kind kind() const { 385 switch (Type) { 386 case T_Null: 387 return Null; 388 case T_Boolean: 389 return Boolean; 390 case T_Double: 391 case T_Integer: 392 case T_UINT64: 393 return Number; 394 case T_String: 395 case T_StringRef: 396 return String; 397 case T_Object: 398 return Object; 399 case T_Array: 400 return Array; 401 } 402 llvm_unreachable("Unknown kind"); 403 } 404 405 // Typed accessors return std::nullopt/nullptr if the Value is not of this 406 // type. 407 std::optional<std::nullptr_t> getAsNull() const { 408 if (LLVM_LIKELY(Type == T_Null)) 409 return nullptr; 410 return std::nullopt; 411 } 412 std::optional<bool> getAsBoolean() const { 413 if (LLVM_LIKELY(Type == T_Boolean)) 414 return as<bool>(); 415 return std::nullopt; 416 } 417 std::optional<double> getAsNumber() const { 418 if (LLVM_LIKELY(Type == T_Double)) 419 return as<double>(); 420 if (LLVM_LIKELY(Type == T_Integer)) 421 return as<int64_t>(); 422 if (LLVM_LIKELY(Type == T_UINT64)) 423 return as<uint64_t>(); 424 return std::nullopt; 425 } 426 // Succeeds if the Value is a Number, and exactly representable as int64_t. 427 std::optional<int64_t> getAsInteger() const { 428 if (LLVM_LIKELY(Type == T_Integer)) 429 return as<int64_t>(); 430 if (LLVM_LIKELY(Type == T_UINT64)) { 431 uint64_t U = as<uint64_t>(); 432 if (LLVM_LIKELY(U <= uint64_t(std::numeric_limits<int64_t>::max()))) { 433 return U; 434 } 435 } 436 if (LLVM_LIKELY(Type == T_Double)) { 437 double D = as<double>(); 438 if (LLVM_LIKELY(std::modf(D, &D) == 0.0 && 439 D >= double(std::numeric_limits<int64_t>::min()) && 440 D <= double(std::numeric_limits<int64_t>::max()))) 441 return D; 442 } 443 return std::nullopt; 444 } 445 std::optional<uint64_t> getAsUINT64() const { 446 if (Type == T_UINT64) 447 return as<uint64_t>(); 448 else if (Type == T_Integer) { 449 int64_t N = as<int64_t>(); 450 if (N >= 0) 451 return as<uint64_t>(); 452 } 453 return std::nullopt; 454 } 455 std::optional<llvm::StringRef> getAsString() const { 456 if (Type == T_String) 457 return llvm::StringRef(as<std::string>()); 458 if (LLVM_LIKELY(Type == T_StringRef)) 459 return as<llvm::StringRef>(); 460 return std::nullopt; 461 } 462 const json::Object *getAsObject() const { 463 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 464 } 465 json::Object *getAsObject() { 466 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 467 } 468 const json::Array *getAsArray() const { 469 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 470 } 471 json::Array *getAsArray() { 472 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 473 } 474 475 private: 476 void destroy(); 477 void copyFrom(const Value &M); 478 // We allow moving from *const* Values, by marking all members as mutable! 479 // This hack is needed to support initializer-list syntax efficiently. 480 // (std::initializer_list<T> is a container of const T). 481 void moveFrom(const Value &&M); 482 friend class Array; 483 friend class Object; 484 485 template <typename T, typename... U> void create(U &&... V) { 486 #if LLVM_ADDRESS_SANITIZER_BUILD 487 // Unpoisoning to prevent overwriting poisoned object (e.g., annotated short 488 // string). Objects that have had their memory poisoned may cause an ASan 489 // error if their memory is reused without calling their destructor. 490 // Unpoisoning the memory prevents this error from occurring. 491 // FIXME: This is a temporary solution to prevent buildbots from failing. 492 // The more appropriate approach would be to call the object's destructor 493 // to unpoison memory. This would prevent any potential memory leaks (long 494 // strings). Read for details: 495 // https://github.com/llvm/llvm-project/pull/79065#discussion_r1462621761 496 __asan_unpoison_memory_region(&Union, sizeof(T)); 497 #endif 498 new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...); 499 } 500 template <typename T> T &as() const { 501 // Using this two-step static_cast via void * instead of reinterpret_cast 502 // silences a -Wstrict-aliasing false positive from GCC6 and earlier. 503 void *Storage = static_cast<void *>(&Union); 504 return *static_cast<T *>(Storage); 505 } 506 507 friend class OStream; 508 509 enum ValueType : char16_t { 510 T_Null, 511 T_Boolean, 512 T_Double, 513 T_Integer, 514 T_UINT64, 515 T_StringRef, 516 T_String, 517 T_Object, 518 T_Array, 519 }; 520 // All members mutable, see moveFrom(). 521 mutable ValueType Type; 522 mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t, 523 llvm::StringRef, std::string, json::Array, 524 json::Object> 525 Union; 526 friend bool operator==(const Value &, const Value &); 527 }; 528 529 bool operator==(const Value &, const Value &); 530 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } 531 532 // Array Methods 533 inline Value &Array::operator[](size_t I) { return V[I]; } 534 inline const Value &Array::operator[](size_t I) const { return V[I]; } 535 inline Value &Array::front() { return V.front(); } 536 inline const Value &Array::front() const { return V.front(); } 537 inline Value &Array::back() { return V.back(); } 538 inline const Value &Array::back() const { return V.back(); } 539 inline Value *Array::data() { return V.data(); } 540 inline const Value *Array::data() const { return V.data(); } 541 542 inline typename Array::iterator Array::begin() { return V.begin(); } 543 inline typename Array::const_iterator Array::begin() const { return V.begin(); } 544 inline typename Array::iterator Array::end() { return V.end(); } 545 inline typename Array::const_iterator Array::end() const { return V.end(); } 546 547 inline bool Array::empty() const { return V.empty(); } 548 inline size_t Array::size() const { return V.size(); } 549 inline void Array::reserve(size_t S) { V.reserve(S); } 550 551 inline void Array::clear() { V.clear(); } 552 inline void Array::push_back(const Value &E) { V.push_back(E); } 553 inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); } 554 template <typename... Args> inline void Array::emplace_back(Args &&...A) { 555 V.emplace_back(std::forward<Args>(A)...); 556 } 557 inline void Array::pop_back() { V.pop_back(); } 558 inline typename Array::iterator Array::insert(const_iterator P, const Value &E) { 559 return V.insert(P, E); 560 } 561 inline typename Array::iterator Array::insert(const_iterator P, Value &&E) { 562 return V.insert(P, std::move(E)); 563 } 564 template <typename It> 565 inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) { 566 return V.insert(P, A, Z); 567 } 568 template <typename... Args> 569 inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) { 570 return V.emplace(P, std::forward<Args>(A)...); 571 } 572 inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; } 573 574 /// ObjectKey is a used to capture keys in Object. Like Value but: 575 /// - only strings are allowed 576 /// - it's optimized for the string literal case (Owned == nullptr) 577 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details. 578 class ObjectKey { 579 public: 580 ObjectKey(const char *S) : ObjectKey(StringRef(S)) {} 581 ObjectKey(std::string S) : Owned(new std::string(std::move(S))) { 582 if (LLVM_UNLIKELY(!isUTF8(*Owned))) { 583 assert(false && "Invalid UTF-8 in value used as JSON"); 584 *Owned = fixUTF8(std::move(*Owned)); 585 } 586 Data = *Owned; 587 } 588 ObjectKey(llvm::StringRef S) : Data(S) { 589 if (LLVM_UNLIKELY(!isUTF8(Data))) { 590 assert(false && "Invalid UTF-8 in value used as JSON"); 591 *this = ObjectKey(fixUTF8(S)); 592 } 593 } 594 ObjectKey(const llvm::SmallVectorImpl<char> &V) 595 : ObjectKey(std::string(V.begin(), V.end())) {} 596 ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {} 597 598 ObjectKey(const ObjectKey &C) { *this = C; } 599 ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {} 600 ObjectKey &operator=(const ObjectKey &C) { 601 if (C.Owned) { 602 Owned.reset(new std::string(*C.Owned)); 603 Data = *Owned; 604 } else { 605 Data = C.Data; 606 } 607 return *this; 608 } 609 ObjectKey &operator=(ObjectKey &&) = default; 610 611 operator llvm::StringRef() const { return Data; } 612 std::string str() const { return Data.str(); } 613 614 private: 615 // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned 616 // could be 2 pointers at most. 617 std::unique_ptr<std::string> Owned; 618 llvm::StringRef Data; 619 }; 620 621 inline bool operator==(const ObjectKey &L, const ObjectKey &R) { 622 return llvm::StringRef(L) == llvm::StringRef(R); 623 } 624 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) { 625 return !(L == R); 626 } 627 inline bool operator<(const ObjectKey &L, const ObjectKey &R) { 628 return StringRef(L) < StringRef(R); 629 } 630 631 struct Object::KV { 632 ObjectKey K; 633 Value V; 634 }; 635 636 inline Object::Object(std::initializer_list<KV> Properties) { 637 for (const auto &P : Properties) { 638 auto R = try_emplace(P.K, nullptr); 639 if (R.second) 640 R.first->getSecond().moveFrom(std::move(P.V)); 641 } 642 } 643 inline std::pair<Object::iterator, bool> Object::insert(KV E) { 644 return try_emplace(std::move(E.K), std::move(E.V)); 645 } 646 inline bool Object::erase(StringRef K) { 647 return M.erase(ObjectKey(K)); 648 } 649 650 /// A "cursor" marking a position within a Value. 651 /// The Value is a tree, and this is the path from the root to the current node. 652 /// This is used to associate errors with particular subobjects. 653 class Path { 654 public: 655 class Root; 656 657 /// Records that the value at the current path is invalid. 658 /// Message is e.g. "expected number" and becomes part of the final error. 659 /// This overwrites any previously written error message in the root. 660 void report(llvm::StringLiteral Message); 661 662 /// The root may be treated as a Path. 663 Path(Root &R) : Parent(nullptr), Seg(&R) {} 664 /// Derives a path for an array element: this[Index] 665 Path index(unsigned Index) const { return Path(this, Segment(Index)); } 666 /// Derives a path for an object field: this.Field 667 Path field(StringRef Field) const { return Path(this, Segment(Field)); } 668 669 private: 670 /// One element in a JSON path: an object field (.foo) or array index [27]. 671 /// Exception: the root Path encodes a pointer to the Path::Root. 672 class Segment { 673 uintptr_t Pointer; 674 unsigned Offset; 675 676 public: 677 Segment() = default; 678 Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {} 679 Segment(llvm::StringRef Field) 680 : Pointer(reinterpret_cast<uintptr_t>(Field.data())), 681 Offset(static_cast<unsigned>(Field.size())) {} 682 Segment(unsigned Index) : Pointer(0), Offset(Index) {} 683 684 bool isField() const { return Pointer != 0; } 685 StringRef field() const { 686 return StringRef(reinterpret_cast<const char *>(Pointer), Offset); 687 } 688 unsigned index() const { return Offset; } 689 Root *root() const { return reinterpret_cast<Root *>(Pointer); } 690 }; 691 692 const Path *Parent; 693 Segment Seg; 694 695 Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {} 696 }; 697 698 /// The root is the trivial Path to the root value. 699 /// It also stores the latest reported error and the path where it occurred. 700 class Path::Root { 701 llvm::StringRef Name; 702 llvm::StringLiteral ErrorMessage; 703 std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed. 704 705 friend void Path::report(llvm::StringLiteral Message); 706 707 public: 708 Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {} 709 // No copy/move allowed as there are incoming pointers. 710 Root(Root &&) = delete; 711 Root &operator=(Root &&) = delete; 712 Root(const Root &) = delete; 713 Root &operator=(const Root &) = delete; 714 715 /// Returns the last error reported, or else a generic error. 716 Error getError() const; 717 /// Print the root value with the error shown inline as a comment. 718 /// Unrelated parts of the value are elided for brevity, e.g. 719 /// { 720 /// "id": 42, 721 /// "name": /* expected string */ null, 722 /// "properties": { ... } 723 /// } 724 void printErrorContext(const Value &, llvm::raw_ostream &) const; 725 }; 726 727 // Standard deserializers are provided for primitive types. 728 // See comments on Value. 729 inline bool fromJSON(const Value &E, std::string &Out, Path P) { 730 if (auto S = E.getAsString()) { 731 Out = std::string(*S); 732 return true; 733 } 734 P.report("expected string"); 735 return false; 736 } 737 inline bool fromJSON(const Value &E, int &Out, Path P) { 738 if (auto S = E.getAsInteger()) { 739 Out = *S; 740 return true; 741 } 742 P.report("expected integer"); 743 return false; 744 } 745 inline bool fromJSON(const Value &E, int64_t &Out, Path P) { 746 if (auto S = E.getAsInteger()) { 747 Out = *S; 748 return true; 749 } 750 P.report("expected integer"); 751 return false; 752 } 753 inline bool fromJSON(const Value &E, double &Out, Path P) { 754 if (auto S = E.getAsNumber()) { 755 Out = *S; 756 return true; 757 } 758 P.report("expected number"); 759 return false; 760 } 761 inline bool fromJSON(const Value &E, bool &Out, Path P) { 762 if (auto S = E.getAsBoolean()) { 763 Out = *S; 764 return true; 765 } 766 P.report("expected boolean"); 767 return false; 768 } 769 inline bool fromJSON(const Value &E, uint64_t &Out, Path P) { 770 if (auto S = E.getAsUINT64()) { 771 Out = *S; 772 return true; 773 } 774 P.report("expected uint64_t"); 775 return false; 776 } 777 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) { 778 if (auto S = E.getAsNull()) { 779 Out = *S; 780 return true; 781 } 782 P.report("expected null"); 783 return false; 784 } 785 template <typename T> 786 bool fromJSON(const Value &E, std::optional<T> &Out, Path P) { 787 if (E.getAsNull()) { 788 Out = std::nullopt; 789 return true; 790 } 791 T Result = {}; 792 if (!fromJSON(E, Result, P)) 793 return false; 794 Out = std::move(Result); 795 return true; 796 } 797 template <typename T> 798 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) { 799 if (auto *A = E.getAsArray()) { 800 Out.clear(); 801 Out.resize(A->size()); 802 for (size_t I = 0; I < A->size(); ++I) 803 if (!fromJSON((*A)[I], Out[I], P.index(I))) 804 return false; 805 return true; 806 } 807 P.report("expected array"); 808 return false; 809 } 810 template <typename T> 811 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) { 812 if (auto *O = E.getAsObject()) { 813 Out.clear(); 814 for (const auto &KV : *O) 815 if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))], 816 P.field(KV.first))) 817 return false; 818 return true; 819 } 820 P.report("expected object"); 821 return false; 822 } 823 824 // Allow serialization of std::optional<T> for supported T. 825 template <typename T> Value toJSON(const std::optional<T> &Opt) { 826 return Opt ? Value(*Opt) : Value(nullptr); 827 } 828 829 /// Helper for mapping JSON objects onto protocol structs. 830 /// 831 /// Example: 832 /// \code 833 /// bool fromJSON(const Value &E, MyStruct &R, Path P) { 834 /// ObjectMapper O(E, P); 835 /// // When returning false, error details were already reported. 836 /// return O && O.map("mandatory_field", R.MandatoryField) && 837 /// O.mapOptional("optional_field", R.OptionalField); 838 /// } 839 /// \endcode 840 class ObjectMapper { 841 public: 842 /// If O is not an object, this mapper is invalid and an error is reported. 843 ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) { 844 if (!O) 845 P.report("expected object"); 846 } 847 848 /// True if the expression is an object. 849 /// Must be checked before calling map(). 850 operator bool() const { return O; } 851 852 /// Maps a property to a field. 853 /// If the property is missing or invalid, reports an error. 854 template <typename T> bool map(StringLiteral Prop, T &Out) { 855 assert(*this && "Must check this is an object before calling map()"); 856 if (const Value *E = O->get(Prop)) 857 return fromJSON(*E, Out, P.field(Prop)); 858 P.field(Prop).report("missing value"); 859 return false; 860 } 861 862 /// Maps a property to a field, if it exists. 863 /// If the property exists and is invalid, reports an error. 864 /// (Optional requires special handling, because missing keys are OK). 865 template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) { 866 assert(*this && "Must check this is an object before calling map()"); 867 if (const Value *E = O->get(Prop)) 868 return fromJSON(*E, Out, P.field(Prop)); 869 Out = std::nullopt; 870 return true; 871 } 872 873 /// Maps a property to a field, if it exists. 874 /// If the property exists and is invalid, reports an error. 875 /// If the property does not exist, Out is unchanged. 876 template <typename T> bool mapOptional(StringLiteral Prop, T &Out) { 877 assert(*this && "Must check this is an object before calling map()"); 878 if (const Value *E = O->get(Prop)) 879 return fromJSON(*E, Out, P.field(Prop)); 880 return true; 881 } 882 883 private: 884 const Object *O; 885 Path P; 886 }; 887 888 /// Parses the provided JSON source, or returns a ParseError. 889 /// The returned Value is self-contained and owns its strings (they do not refer 890 /// to the original source). 891 llvm::Expected<Value> parse(llvm::StringRef JSON); 892 893 class ParseError : public llvm::ErrorInfo<ParseError> { 894 const char *Msg; 895 unsigned Line, Column, Offset; 896 897 public: 898 static char ID; 899 ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset) 900 : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {} 901 void log(llvm::raw_ostream &OS) const override { 902 OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg); 903 } 904 std::error_code convertToErrorCode() const override { 905 return llvm::inconvertibleErrorCode(); 906 } 907 }; 908 909 /// Version of parse() that converts the parsed value to the type T. 910 /// RootName describes the root object and is used in error messages. 911 template <typename T> 912 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") { 913 auto V = parse(JSON); 914 if (!V) 915 return V.takeError(); 916 Path::Root R(RootName); 917 T Result; 918 if (fromJSON(*V, Result, R)) 919 return std::move(Result); 920 return R.getError(); 921 } 922 923 /// json::OStream allows writing well-formed JSON without materializing 924 /// all structures as json::Value ahead of time. 925 /// It's faster, lower-level, and less safe than OS << json::Value. 926 /// It also allows emitting more constructs, such as comments. 927 /// 928 /// Only one "top-level" object can be written to a stream. 929 /// Simplest usage involves passing lambdas (Blocks) to fill in containers: 930 /// 931 /// json::OStream J(OS); 932 /// J.array([&]{ 933 /// for (const Event &E : Events) 934 /// J.object([&] { 935 /// J.attribute("timestamp", int64_t(E.Time)); 936 /// J.attributeArray("participants", [&] { 937 /// for (const Participant &P : E.Participants) 938 /// J.value(P.toString()); 939 /// }); 940 /// }); 941 /// }); 942 /// 943 /// This would produce JSON like: 944 /// 945 /// [ 946 /// { 947 /// "timestamp": 19287398741, 948 /// "participants": [ 949 /// "King Kong", 950 /// "Miley Cyrus", 951 /// "Cleopatra" 952 /// ] 953 /// }, 954 /// ... 955 /// ] 956 /// 957 /// The lower level begin/end methods (arrayBegin()) are more flexible but 958 /// care must be taken to pair them correctly: 959 /// 960 /// json::OStream J(OS); 961 // J.arrayBegin(); 962 /// for (const Event &E : Events) { 963 /// J.objectBegin(); 964 /// J.attribute("timestamp", int64_t(E.Time)); 965 /// J.attributeBegin("participants"); 966 /// for (const Participant &P : E.Participants) 967 /// J.value(P.toString()); 968 /// J.attributeEnd(); 969 /// J.objectEnd(); 970 /// } 971 /// J.arrayEnd(); 972 /// 973 /// If the call sequence isn't valid JSON, asserts will fire in debug mode. 974 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside 975 /// an array, and so on. 976 /// With asserts disabled, this is undefined behavior. 977 class OStream { 978 public: 979 using Block = llvm::function_ref<void()>; 980 // If IndentSize is nonzero, output is pretty-printed. 981 explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0) 982 : OS(OS), IndentSize(IndentSize) { 983 Stack.emplace_back(); 984 } 985 ~OStream() { 986 assert(Stack.size() == 1 && "Unmatched begin()/end()"); 987 assert(Stack.back().Ctx == Singleton); 988 assert(Stack.back().HasValue && "Did not write top-level value"); 989 } 990 991 /// Flushes the underlying ostream. OStream does not buffer internally. 992 void flush() { OS.flush(); } 993 994 // High level functions to output a value. 995 // Valid at top-level (exactly once), in an attribute value (exactly once), 996 // or in an array (any number of times). 997 998 /// Emit a self-contained value (number, string, vector<string> etc). 999 void value(const Value &V); 1000 /// Emit an array whose elements are emitted in the provided Block. 1001 void array(Block Contents) { 1002 arrayBegin(); 1003 Contents(); 1004 arrayEnd(); 1005 } 1006 /// Emit an object whose elements are emitted in the provided Block. 1007 void object(Block Contents) { 1008 objectBegin(); 1009 Contents(); 1010 objectEnd(); 1011 } 1012 /// Emit an externally-serialized value. 1013 /// The caller must write exactly one valid JSON value to the provided stream. 1014 /// No validation or formatting of this value occurs. 1015 void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) { 1016 rawValueBegin(); 1017 Contents(OS); 1018 rawValueEnd(); 1019 } 1020 void rawValue(llvm::StringRef Contents) { 1021 rawValue([&](raw_ostream &OS) { OS << Contents; }); 1022 } 1023 /// Emit a JavaScript comment associated with the next printed value. 1024 /// The string must be valid until the next attribute or value is emitted. 1025 /// Comments are not part of standard JSON, and many parsers reject them! 1026 void comment(llvm::StringRef); 1027 1028 // High level functions to output object attributes. 1029 // Valid only within an object (any number of times). 1030 1031 /// Emit an attribute whose value is self-contained (number, vector<int> etc). 1032 void attribute(llvm::StringRef Key, const Value& Contents) { 1033 attributeImpl(Key, [&] { value(Contents); }); 1034 } 1035 /// Emit an attribute whose value is an array with elements from the Block. 1036 void attributeArray(llvm::StringRef Key, Block Contents) { 1037 attributeImpl(Key, [&] { array(Contents); }); 1038 } 1039 /// Emit an attribute whose value is an object with attributes from the Block. 1040 void attributeObject(llvm::StringRef Key, Block Contents) { 1041 attributeImpl(Key, [&] { object(Contents); }); 1042 } 1043 1044 // Low-level begin/end functions to output arrays, objects, and attributes. 1045 // Must be correctly paired. Allowed contexts are as above. 1046 1047 void arrayBegin(); 1048 void arrayEnd(); 1049 void objectBegin(); 1050 void objectEnd(); 1051 void attributeBegin(llvm::StringRef Key); 1052 void attributeEnd(); 1053 raw_ostream &rawValueBegin(); 1054 void rawValueEnd(); 1055 1056 private: 1057 void attributeImpl(llvm::StringRef Key, Block Contents) { 1058 attributeBegin(Key); 1059 Contents(); 1060 attributeEnd(); 1061 } 1062 1063 void valueBegin(); 1064 void flushComment(); 1065 void newline(); 1066 1067 enum Context { 1068 Singleton, // Top level, or object attribute. 1069 Array, 1070 Object, 1071 RawValue, // External code writing a value to OS directly. 1072 }; 1073 struct State { 1074 Context Ctx = Singleton; 1075 bool HasValue = false; 1076 }; 1077 llvm::SmallVector<State, 16> Stack; // Never empty. 1078 llvm::StringRef PendingComment; 1079 llvm::raw_ostream &OS; 1080 unsigned IndentSize; 1081 unsigned Indent = 0; 1082 }; 1083 1084 /// Serializes this Value to JSON, writing it to the provided stream. 1085 /// The formatting is compact (no extra whitespace) and deterministic. 1086 /// For pretty-printing, use the formatv() format_provider below. 1087 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) { 1088 OStream(OS).value(V); 1089 return OS; 1090 } 1091 } // namespace json 1092 1093 /// Allow printing json::Value with formatv(). 1094 /// The default style is basic/compact formatting, like operator<<. 1095 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2. 1096 template <> struct format_provider<llvm::json::Value> { 1097 static void format(const llvm::json::Value &, raw_ostream &, StringRef); 1098 }; 1099 } // namespace llvm 1100 1101 #endif 1102