1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file supports working with JSON data. 11 /// 12 /// It comprises: 13 /// 14 /// - classes which hold dynamically-typed parsed JSON structures 15 /// These are value types that can be composed, inspected, and modified. 16 /// See json::Value, and the related types json::Object and json::Array. 17 /// 18 /// - functions to parse JSON text into Values, and to serialize Values to text. 19 /// See parse(), operator<<, and format_provider. 20 /// 21 /// - a convention and helpers for mapping between json::Value and user-defined 22 /// types. See fromJSON(), ObjectMapper, and the class comment on Value. 23 /// 24 /// - an output API json::OStream which can emit JSON without materializing 25 /// all structures as json::Value. 26 /// 27 /// Typically, JSON data would be read from an external source, parsed into 28 /// a Value, and then converted into some native data structure before doing 29 /// real work on it. (And vice versa when writing). 30 /// 31 /// Other serialization mechanisms you may consider: 32 /// 33 /// - YAML is also text-based, and more human-readable than JSON. It's a more 34 /// complex format and data model, and YAML parsers aren't ubiquitous. 35 /// YAMLParser.h is a streaming parser suitable for parsing large documents 36 /// (including JSON, as YAML is a superset). It can be awkward to use 37 /// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more 38 /// declarative than the toJSON/fromJSON conventions here. 39 /// 40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it 41 /// encodes LLVM IR ("bitcode"), but it can be a container for other data. 42 /// Low-level reader/writer libraries are in Bitstream/Bitstream*.h 43 /// 44 //===---------------------------------------------------------------------===// 45 46 #ifndef LLVM_SUPPORT_JSON_H 47 #define LLVM_SUPPORT_JSON_H 48 49 #include "llvm/ADT/DenseMap.h" 50 #include "llvm/ADT/SmallVector.h" 51 #include "llvm/ADT/StringRef.h" 52 #include "llvm/ADT/STLFunctionalExtras.h" 53 #include "llvm/Support/Error.h" 54 #include "llvm/Support/FormatVariadic.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include <cmath> 57 #include <map> 58 59 namespace llvm { 60 namespace json { 61 62 // === String encodings === 63 // 64 // JSON strings are character sequences (not byte sequences like std::string). 65 // We need to know the encoding, and for simplicity only support UTF-8. 66 // 67 // - When parsing, invalid UTF-8 is a syntax error like any other 68 // 69 // - When creating Values from strings, callers must ensure they are UTF-8. 70 // with asserts on, invalid UTF-8 will crash the program 71 // with asserts off, we'll substitute the replacement character (U+FFFD) 72 // Callers can use json::isUTF8() and json::fixUTF8() for validation. 73 // 74 // - When retrieving strings from Values (e.g. asString()), the result will 75 // always be valid UTF-8. 76 77 template <typename T> 78 constexpr bool is_uint_64_bit_v = 79 std::is_integral_v<T> && std::is_unsigned_v<T> && 80 sizeof(T) == sizeof(uint64_t); 81 82 /// Returns true if \p S is valid UTF-8, which is required for use as JSON. 83 /// If it returns false, \p Offset is set to a byte offset near the first error. 84 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr); 85 /// Replaces invalid UTF-8 sequences in \p S with the replacement character 86 /// (U+FFFD). The returned string is valid UTF-8. 87 /// This is much slower than isUTF8, so test that first. 88 std::string fixUTF8(llvm::StringRef S); 89 90 class Array; 91 class ObjectKey; 92 class Value; 93 template <typename T> Value toJSON(const std::optional<T> &Opt); 94 95 /// An Object is a JSON object, which maps strings to heterogenous JSON values. 96 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string. 97 class Object { 98 using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>; 99 Storage M; 100 101 public: 102 using key_type = ObjectKey; 103 using mapped_type = Value; 104 using value_type = Storage::value_type; 105 using iterator = Storage::iterator; 106 using const_iterator = Storage::const_iterator; 107 108 Object() = default; 109 // KV is a trivial key-value struct for list-initialization. 110 // (using std::pair forces extra copies). 111 struct KV; 112 explicit Object(std::initializer_list<KV> Properties); 113 114 iterator begin() { return M.begin(); } 115 const_iterator begin() const { return M.begin(); } 116 iterator end() { return M.end(); } 117 const_iterator end() const { return M.end(); } 118 119 bool empty() const { return M.empty(); } 120 size_t size() const { return M.size(); } 121 122 void clear() { M.clear(); } 123 std::pair<iterator, bool> insert(KV E); 124 template <typename... Ts> 125 std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) { 126 return M.try_emplace(K, std::forward<Ts>(Args)...); 127 } 128 template <typename... Ts> 129 std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) { 130 return M.try_emplace(std::move(K), std::forward<Ts>(Args)...); 131 } 132 bool erase(StringRef K); 133 void erase(iterator I) { M.erase(I); } 134 135 iterator find(StringRef K) { return M.find_as(K); } 136 const_iterator find(StringRef K) const { return M.find_as(K); } 137 // operator[] acts as if Value was default-constructible as null. 138 Value &operator[](const ObjectKey &K); 139 Value &operator[](ObjectKey &&K); 140 // Look up a property, returning nullptr if it doesn't exist. 141 Value *get(StringRef K); 142 const Value *get(StringRef K) const; 143 // Typed accessors return std::nullopt/nullptr if 144 // - the property doesn't exist 145 // - or it has the wrong type 146 std::optional<std::nullptr_t> getNull(StringRef K) const; 147 std::optional<bool> getBoolean(StringRef K) const; 148 std::optional<double> getNumber(StringRef K) const; 149 std::optional<int64_t> getInteger(StringRef K) const; 150 std::optional<llvm::StringRef> getString(StringRef K) const; 151 const json::Object *getObject(StringRef K) const; 152 json::Object *getObject(StringRef K); 153 const json::Array *getArray(StringRef K) const; 154 json::Array *getArray(StringRef K); 155 }; 156 bool operator==(const Object &LHS, const Object &RHS); 157 inline bool operator!=(const Object &LHS, const Object &RHS) { 158 return !(LHS == RHS); 159 } 160 161 /// An Array is a JSON array, which contains heterogeneous JSON values. 162 /// It simulates std::vector<Value>. 163 class Array { 164 std::vector<Value> V; 165 166 public: 167 using value_type = Value; 168 using iterator = std::vector<Value>::iterator; 169 using const_iterator = std::vector<Value>::const_iterator; 170 171 Array() = default; 172 explicit Array(std::initializer_list<Value> Elements); 173 template <typename Collection> explicit Array(const Collection &C) { 174 for (const auto &V : C) 175 emplace_back(V); 176 } 177 178 Value &operator[](size_t I); 179 const Value &operator[](size_t I) const; 180 Value &front(); 181 const Value &front() const; 182 Value &back(); 183 const Value &back() const; 184 Value *data(); 185 const Value *data() const; 186 187 iterator begin(); 188 const_iterator begin() const; 189 iterator end(); 190 const_iterator end() const; 191 192 bool empty() const; 193 size_t size() const; 194 void reserve(size_t S); 195 196 void clear(); 197 void push_back(const Value &E); 198 void push_back(Value &&E); 199 template <typename... Args> void emplace_back(Args &&...A); 200 void pop_back(); 201 iterator insert(const_iterator P, const Value &E); 202 iterator insert(const_iterator P, Value &&E); 203 template <typename It> iterator insert(const_iterator P, It A, It Z); 204 template <typename... Args> iterator emplace(const_iterator P, Args &&...A); 205 206 friend bool operator==(const Array &L, const Array &R); 207 }; 208 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } 209 210 /// A Value is an JSON value of unknown type. 211 /// They can be copied, but should generally be moved. 212 /// 213 /// === Composing values === 214 /// 215 /// You can implicitly construct Values from: 216 /// - strings: std::string, SmallString, formatv, StringRef, char* 217 /// (char*, and StringRef are references, not copies!) 218 /// - numbers 219 /// - booleans 220 /// - null: nullptr 221 /// - arrays: {"foo", 42.0, false} 222 /// - serializable things: types with toJSON(const T&)->Value, found by ADL 223 /// 224 /// They can also be constructed from object/array helpers: 225 /// - json::Object is a type like map<ObjectKey, Value> 226 /// - json::Array is a type like vector<Value> 227 /// These can be list-initialized, or used to build up collections in a loop. 228 /// json::ary(Collection) converts all items in a collection to Values. 229 /// 230 /// === Inspecting values === 231 /// 232 /// Each Value is one of the JSON kinds: 233 /// null (nullptr_t) 234 /// boolean (bool) 235 /// number (double, int64 or uint64) 236 /// string (StringRef) 237 /// array (json::Array) 238 /// object (json::Object) 239 /// 240 /// The kind can be queried directly, or implicitly via the typed accessors: 241 /// if (std::optional<StringRef> S = E.getAsString() 242 /// assert(E.kind() == Value::String); 243 /// 244 /// Array and Object also have typed indexing accessors for easy traversal: 245 /// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )"); 246 /// if (Object* O = E->getAsObject()) 247 /// if (Object* Opts = O->getObject("options")) 248 /// if (std::optional<StringRef> Font = Opts->getString("font")) 249 /// assert(Opts->at("font").kind() == Value::String); 250 /// 251 /// === Converting JSON values to C++ types === 252 /// 253 /// The convention is to have a deserializer function findable via ADL: 254 /// fromJSON(const json::Value&, T&, Path) -> bool 255 /// 256 /// The return value indicates overall success, and Path is used for precise 257 /// error reporting. (The Path::Root passed in at the top level fromJSON call 258 /// captures any nested error and can render it in context). 259 /// If conversion fails, fromJSON calls Path::report() and immediately returns. 260 /// This ensures that the first fatal error survives. 261 /// 262 /// Deserializers are provided for: 263 /// - bool 264 /// - int and int64_t 265 /// - double 266 /// - std::string 267 /// - vector<T>, where T is deserializable 268 /// - map<string, T>, where T is deserializable 269 /// - std::optional<T>, where T is deserializable 270 /// ObjectMapper can help writing fromJSON() functions for object types. 271 /// 272 /// For conversion in the other direction, the serializer function is: 273 /// toJSON(const T&) -> json::Value 274 /// If this exists, then it also allows constructing Value from T, and can 275 /// be used to serialize vector<T>, map<string, T>, and std::optional<T>. 276 /// 277 /// === Serialization === 278 /// 279 /// Values can be serialized to JSON: 280 /// 1) raw_ostream << Value // Basic formatting. 281 /// 2) raw_ostream << formatv("{0}", Value) // Basic formatting. 282 /// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2. 283 /// 284 /// And parsed: 285 /// Expected<Value> E = json::parse("[1, 2, null]"); 286 /// assert(E && E->kind() == Value::Array); 287 class Value { 288 public: 289 enum Kind { 290 Null, 291 Boolean, 292 /// Number values can store both int64s and doubles at full precision, 293 /// depending on what they were constructed/parsed from. 294 Number, 295 String, 296 Array, 297 Object, 298 }; 299 300 // It would be nice to have Value() be null. But that would make {} null too. 301 Value(const Value &M) { copyFrom(M); } 302 Value(Value &&M) { moveFrom(std::move(M)); } 303 Value(std::initializer_list<Value> Elements); 304 Value(json::Array &&Elements) : Type(T_Array) { 305 create<json::Array>(std::move(Elements)); 306 } 307 template <typename Elt> 308 Value(const std::vector<Elt> &C) : Value(json::Array(C)) {} 309 Value(json::Object &&Properties) : Type(T_Object) { 310 create<json::Object>(std::move(Properties)); 311 } 312 template <typename Elt> 313 Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {} 314 // Strings: types with value semantics. Must be valid UTF-8. 315 Value(std::string V) : Type(T_String) { 316 if (LLVM_UNLIKELY(!isUTF8(V))) { 317 assert(false && "Invalid UTF-8 in value used as JSON"); 318 V = fixUTF8(std::move(V)); 319 } 320 create<std::string>(std::move(V)); 321 } 322 Value(const llvm::SmallVectorImpl<char> &V) 323 : Value(std::string(V.begin(), V.end())) {} 324 Value(const llvm::formatv_object_base &V) : Value(V.str()) {} 325 // Strings: types with reference semantics. Must be valid UTF-8. 326 Value(StringRef V) : Type(T_StringRef) { 327 create<llvm::StringRef>(V); 328 if (LLVM_UNLIKELY(!isUTF8(V))) { 329 assert(false && "Invalid UTF-8 in value used as JSON"); 330 *this = Value(fixUTF8(V)); 331 } 332 } 333 Value(const char *V) : Value(StringRef(V)) {} 334 Value(std::nullptr_t) : Type(T_Null) {} 335 // Boolean (disallow implicit conversions). 336 // (The last template parameter is a dummy to keep templates distinct.) 337 template <typename T, typename = std::enable_if_t<std::is_same_v<T, bool>>, 338 bool = false> 339 Value(T B) : Type(T_Boolean) { 340 create<bool>(B); 341 } 342 343 // Unsigned 64-bit integers. 344 template <typename T, typename = std::enable_if_t<is_uint_64_bit_v<T>>> 345 Value(T V) : Type(T_UINT64) { 346 create<uint64_t>(uint64_t{V}); 347 } 348 349 // Integers (except boolean and uint64_t). 350 // Must be non-narrowing convertible to int64_t. 351 template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>, 352 typename = std::enable_if_t<!std::is_same_v<T, bool>>, 353 typename = std::enable_if_t<!is_uint_64_bit_v<T>>> 354 Value(T I) : Type(T_Integer) { 355 create<int64_t>(int64_t{I}); 356 } 357 // Floating point. Must be non-narrowing convertible to double. 358 template <typename T, 359 typename = std::enable_if_t<std::is_floating_point_v<T>>, 360 double * = nullptr> 361 Value(T D) : Type(T_Double) { 362 create<double>(double{D}); 363 } 364 // Serializable types: with a toJSON(const T&)->Value function, found by ADL. 365 template <typename T, 366 typename = std::enable_if_t< 367 std::is_same_v<Value, decltype(toJSON(*(const T *)nullptr))>>, 368 Value * = nullptr> 369 Value(const T &V) : Value(toJSON(V)) {} 370 371 Value &operator=(const Value &M) { 372 destroy(); 373 copyFrom(M); 374 return *this; 375 } 376 Value &operator=(Value &&M) { 377 destroy(); 378 moveFrom(std::move(M)); 379 return *this; 380 } 381 ~Value() { destroy(); } 382 383 Kind kind() const { 384 switch (Type) { 385 case T_Null: 386 return Null; 387 case T_Boolean: 388 return Boolean; 389 case T_Double: 390 case T_Integer: 391 case T_UINT64: 392 return Number; 393 case T_String: 394 case T_StringRef: 395 return String; 396 case T_Object: 397 return Object; 398 case T_Array: 399 return Array; 400 } 401 llvm_unreachable("Unknown kind"); 402 } 403 404 // Typed accessors return std::nullopt/nullptr if the Value is not of this 405 // type. 406 std::optional<std::nullptr_t> getAsNull() const { 407 if (LLVM_LIKELY(Type == T_Null)) 408 return nullptr; 409 return std::nullopt; 410 } 411 std::optional<bool> getAsBoolean() const { 412 if (LLVM_LIKELY(Type == T_Boolean)) 413 return as<bool>(); 414 return std::nullopt; 415 } 416 std::optional<double> getAsNumber() const { 417 if (LLVM_LIKELY(Type == T_Double)) 418 return as<double>(); 419 if (LLVM_LIKELY(Type == T_Integer)) 420 return as<int64_t>(); 421 if (LLVM_LIKELY(Type == T_UINT64)) 422 return as<uint64_t>(); 423 return std::nullopt; 424 } 425 // Succeeds if the Value is a Number, and exactly representable as int64_t. 426 std::optional<int64_t> getAsInteger() const { 427 if (LLVM_LIKELY(Type == T_Integer)) 428 return as<int64_t>(); 429 if (LLVM_LIKELY(Type == T_UINT64)) { 430 uint64_t U = as<uint64_t>(); 431 if (LLVM_LIKELY(U <= uint64_t(std::numeric_limits<int64_t>::max()))) { 432 return U; 433 } 434 } 435 if (LLVM_LIKELY(Type == T_Double)) { 436 double D = as<double>(); 437 if (LLVM_LIKELY(std::modf(D, &D) == 0.0 && 438 D >= double(std::numeric_limits<int64_t>::min()) && 439 D <= double(std::numeric_limits<int64_t>::max()))) 440 return D; 441 } 442 return std::nullopt; 443 } 444 std::optional<uint64_t> getAsUINT64() const { 445 if (Type == T_UINT64) 446 return as<uint64_t>(); 447 else if (Type == T_Integer) { 448 int64_t N = as<int64_t>(); 449 if (N >= 0) 450 return as<uint64_t>(); 451 } 452 return std::nullopt; 453 } 454 std::optional<llvm::StringRef> getAsString() const { 455 if (Type == T_String) 456 return llvm::StringRef(as<std::string>()); 457 if (LLVM_LIKELY(Type == T_StringRef)) 458 return as<llvm::StringRef>(); 459 return std::nullopt; 460 } 461 const json::Object *getAsObject() const { 462 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 463 } 464 json::Object *getAsObject() { 465 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 466 } 467 const json::Array *getAsArray() const { 468 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 469 } 470 json::Array *getAsArray() { 471 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 472 } 473 474 private: 475 void destroy(); 476 void copyFrom(const Value &M); 477 // We allow moving from *const* Values, by marking all members as mutable! 478 // This hack is needed to support initializer-list syntax efficiently. 479 // (std::initializer_list<T> is a container of const T). 480 void moveFrom(const Value &&M); 481 friend class Array; 482 friend class Object; 483 484 template <typename T, typename... U> void create(U &&... V) { 485 new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...); 486 } 487 template <typename T> T &as() const { 488 // Using this two-step static_cast via void * instead of reinterpret_cast 489 // silences a -Wstrict-aliasing false positive from GCC6 and earlier. 490 void *Storage = static_cast<void *>(&Union); 491 return *static_cast<T *>(Storage); 492 } 493 494 friend class OStream; 495 496 enum ValueType : char16_t { 497 T_Null, 498 T_Boolean, 499 T_Double, 500 T_Integer, 501 T_UINT64, 502 T_StringRef, 503 T_String, 504 T_Object, 505 T_Array, 506 }; 507 // All members mutable, see moveFrom(). 508 mutable ValueType Type; 509 mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t, 510 llvm::StringRef, std::string, json::Array, 511 json::Object> 512 Union; 513 friend bool operator==(const Value &, const Value &); 514 }; 515 516 bool operator==(const Value &, const Value &); 517 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } 518 519 // Array Methods 520 inline Value &Array::operator[](size_t I) { return V[I]; } 521 inline const Value &Array::operator[](size_t I) const { return V[I]; } 522 inline Value &Array::front() { return V.front(); } 523 inline const Value &Array::front() const { return V.front(); } 524 inline Value &Array::back() { return V.back(); } 525 inline const Value &Array::back() const { return V.back(); } 526 inline Value *Array::data() { return V.data(); } 527 inline const Value *Array::data() const { return V.data(); } 528 529 inline typename Array::iterator Array::begin() { return V.begin(); } 530 inline typename Array::const_iterator Array::begin() const { return V.begin(); } 531 inline typename Array::iterator Array::end() { return V.end(); } 532 inline typename Array::const_iterator Array::end() const { return V.end(); } 533 534 inline bool Array::empty() const { return V.empty(); } 535 inline size_t Array::size() const { return V.size(); } 536 inline void Array::reserve(size_t S) { V.reserve(S); } 537 538 inline void Array::clear() { V.clear(); } 539 inline void Array::push_back(const Value &E) { V.push_back(E); } 540 inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); } 541 template <typename... Args> inline void Array::emplace_back(Args &&...A) { 542 V.emplace_back(std::forward<Args>(A)...); 543 } 544 inline void Array::pop_back() { V.pop_back(); } 545 inline typename Array::iterator Array::insert(const_iterator P, const Value &E) { 546 return V.insert(P, E); 547 } 548 inline typename Array::iterator Array::insert(const_iterator P, Value &&E) { 549 return V.insert(P, std::move(E)); 550 } 551 template <typename It> 552 inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) { 553 return V.insert(P, A, Z); 554 } 555 template <typename... Args> 556 inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) { 557 return V.emplace(P, std::forward<Args>(A)...); 558 } 559 inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; } 560 561 /// ObjectKey is a used to capture keys in Object. Like Value but: 562 /// - only strings are allowed 563 /// - it's optimized for the string literal case (Owned == nullptr) 564 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details. 565 class ObjectKey { 566 public: 567 ObjectKey(const char *S) : ObjectKey(StringRef(S)) {} 568 ObjectKey(std::string S) : Owned(new std::string(std::move(S))) { 569 if (LLVM_UNLIKELY(!isUTF8(*Owned))) { 570 assert(false && "Invalid UTF-8 in value used as JSON"); 571 *Owned = fixUTF8(std::move(*Owned)); 572 } 573 Data = *Owned; 574 } 575 ObjectKey(llvm::StringRef S) : Data(S) { 576 if (LLVM_UNLIKELY(!isUTF8(Data))) { 577 assert(false && "Invalid UTF-8 in value used as JSON"); 578 *this = ObjectKey(fixUTF8(S)); 579 } 580 } 581 ObjectKey(const llvm::SmallVectorImpl<char> &V) 582 : ObjectKey(std::string(V.begin(), V.end())) {} 583 ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {} 584 585 ObjectKey(const ObjectKey &C) { *this = C; } 586 ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {} 587 ObjectKey &operator=(const ObjectKey &C) { 588 if (C.Owned) { 589 Owned.reset(new std::string(*C.Owned)); 590 Data = *Owned; 591 } else { 592 Data = C.Data; 593 } 594 return *this; 595 } 596 ObjectKey &operator=(ObjectKey &&) = default; 597 598 operator llvm::StringRef() const { return Data; } 599 std::string str() const { return Data.str(); } 600 601 private: 602 // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned 603 // could be 2 pointers at most. 604 std::unique_ptr<std::string> Owned; 605 llvm::StringRef Data; 606 }; 607 608 inline bool operator==(const ObjectKey &L, const ObjectKey &R) { 609 return llvm::StringRef(L) == llvm::StringRef(R); 610 } 611 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) { 612 return !(L == R); 613 } 614 inline bool operator<(const ObjectKey &L, const ObjectKey &R) { 615 return StringRef(L) < StringRef(R); 616 } 617 618 struct Object::KV { 619 ObjectKey K; 620 Value V; 621 }; 622 623 inline Object::Object(std::initializer_list<KV> Properties) { 624 for (const auto &P : Properties) { 625 auto R = try_emplace(P.K, nullptr); 626 if (R.second) 627 R.first->getSecond().moveFrom(std::move(P.V)); 628 } 629 } 630 inline std::pair<Object::iterator, bool> Object::insert(KV E) { 631 return try_emplace(std::move(E.K), std::move(E.V)); 632 } 633 inline bool Object::erase(StringRef K) { 634 return M.erase(ObjectKey(K)); 635 } 636 637 /// A "cursor" marking a position within a Value. 638 /// The Value is a tree, and this is the path from the root to the current node. 639 /// This is used to associate errors with particular subobjects. 640 class Path { 641 public: 642 class Root; 643 644 /// Records that the value at the current path is invalid. 645 /// Message is e.g. "expected number" and becomes part of the final error. 646 /// This overwrites any previously written error message in the root. 647 void report(llvm::StringLiteral Message); 648 649 /// The root may be treated as a Path. 650 Path(Root &R) : Parent(nullptr), Seg(&R) {} 651 /// Derives a path for an array element: this[Index] 652 Path index(unsigned Index) const { return Path(this, Segment(Index)); } 653 /// Derives a path for an object field: this.Field 654 Path field(StringRef Field) const { return Path(this, Segment(Field)); } 655 656 private: 657 /// One element in a JSON path: an object field (.foo) or array index [27]. 658 /// Exception: the root Path encodes a pointer to the Path::Root. 659 class Segment { 660 uintptr_t Pointer; 661 unsigned Offset; 662 663 public: 664 Segment() = default; 665 Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {} 666 Segment(llvm::StringRef Field) 667 : Pointer(reinterpret_cast<uintptr_t>(Field.data())), 668 Offset(static_cast<unsigned>(Field.size())) {} 669 Segment(unsigned Index) : Pointer(0), Offset(Index) {} 670 671 bool isField() const { return Pointer != 0; } 672 StringRef field() const { 673 return StringRef(reinterpret_cast<const char *>(Pointer), Offset); 674 } 675 unsigned index() const { return Offset; } 676 Root *root() const { return reinterpret_cast<Root *>(Pointer); } 677 }; 678 679 const Path *Parent; 680 Segment Seg; 681 682 Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {} 683 }; 684 685 /// The root is the trivial Path to the root value. 686 /// It also stores the latest reported error and the path where it occurred. 687 class Path::Root { 688 llvm::StringRef Name; 689 llvm::StringLiteral ErrorMessage; 690 std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed. 691 692 friend void Path::report(llvm::StringLiteral Message); 693 694 public: 695 Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {} 696 // No copy/move allowed as there are incoming pointers. 697 Root(Root &&) = delete; 698 Root &operator=(Root &&) = delete; 699 Root(const Root &) = delete; 700 Root &operator=(const Root &) = delete; 701 702 /// Returns the last error reported, or else a generic error. 703 Error getError() const; 704 /// Print the root value with the error shown inline as a comment. 705 /// Unrelated parts of the value are elided for brevity, e.g. 706 /// { 707 /// "id": 42, 708 /// "name": /* expected string */ null, 709 /// "properties": { ... } 710 /// } 711 void printErrorContext(const Value &, llvm::raw_ostream &) const; 712 }; 713 714 // Standard deserializers are provided for primitive types. 715 // See comments on Value. 716 inline bool fromJSON(const Value &E, std::string &Out, Path P) { 717 if (auto S = E.getAsString()) { 718 Out = std::string(*S); 719 return true; 720 } 721 P.report("expected string"); 722 return false; 723 } 724 inline bool fromJSON(const Value &E, int &Out, Path P) { 725 if (auto S = E.getAsInteger()) { 726 Out = *S; 727 return true; 728 } 729 P.report("expected integer"); 730 return false; 731 } 732 inline bool fromJSON(const Value &E, int64_t &Out, Path P) { 733 if (auto S = E.getAsInteger()) { 734 Out = *S; 735 return true; 736 } 737 P.report("expected integer"); 738 return false; 739 } 740 inline bool fromJSON(const Value &E, double &Out, Path P) { 741 if (auto S = E.getAsNumber()) { 742 Out = *S; 743 return true; 744 } 745 P.report("expected number"); 746 return false; 747 } 748 inline bool fromJSON(const Value &E, bool &Out, Path P) { 749 if (auto S = E.getAsBoolean()) { 750 Out = *S; 751 return true; 752 } 753 P.report("expected boolean"); 754 return false; 755 } 756 inline bool fromJSON(const Value &E, uint64_t &Out, Path P) { 757 if (auto S = E.getAsUINT64()) { 758 Out = *S; 759 return true; 760 } 761 P.report("expected uint64_t"); 762 return false; 763 } 764 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) { 765 if (auto S = E.getAsNull()) { 766 Out = *S; 767 return true; 768 } 769 P.report("expected null"); 770 return false; 771 } 772 template <typename T> 773 bool fromJSON(const Value &E, std::optional<T> &Out, Path P) { 774 if (E.getAsNull()) { 775 Out = std::nullopt; 776 return true; 777 } 778 T Result = {}; 779 if (!fromJSON(E, Result, P)) 780 return false; 781 Out = std::move(Result); 782 return true; 783 } 784 template <typename T> 785 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) { 786 if (auto *A = E.getAsArray()) { 787 Out.clear(); 788 Out.resize(A->size()); 789 for (size_t I = 0; I < A->size(); ++I) 790 if (!fromJSON((*A)[I], Out[I], P.index(I))) 791 return false; 792 return true; 793 } 794 P.report("expected array"); 795 return false; 796 } 797 template <typename T> 798 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) { 799 if (auto *O = E.getAsObject()) { 800 Out.clear(); 801 for (const auto &KV : *O) 802 if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))], 803 P.field(KV.first))) 804 return false; 805 return true; 806 } 807 P.report("expected object"); 808 return false; 809 } 810 811 // Allow serialization of std::optional<T> for supported T. 812 template <typename T> Value toJSON(const std::optional<T> &Opt) { 813 return Opt ? Value(*Opt) : Value(nullptr); 814 } 815 816 /// Helper for mapping JSON objects onto protocol structs. 817 /// 818 /// Example: 819 /// \code 820 /// bool fromJSON(const Value &E, MyStruct &R, Path P) { 821 /// ObjectMapper O(E, P); 822 /// // When returning false, error details were already reported. 823 /// return O && O.map("mandatory_field", R.MandatoryField) && 824 /// O.mapOptional("optional_field", R.OptionalField); 825 /// } 826 /// \endcode 827 class ObjectMapper { 828 public: 829 /// If O is not an object, this mapper is invalid and an error is reported. 830 ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) { 831 if (!O) 832 P.report("expected object"); 833 } 834 835 /// True if the expression is an object. 836 /// Must be checked before calling map(). 837 operator bool() const { return O; } 838 839 /// Maps a property to a field. 840 /// If the property is missing or invalid, reports an error. 841 template <typename T> bool map(StringLiteral Prop, T &Out) { 842 assert(*this && "Must check this is an object before calling map()"); 843 if (const Value *E = O->get(Prop)) 844 return fromJSON(*E, Out, P.field(Prop)); 845 P.field(Prop).report("missing value"); 846 return false; 847 } 848 849 /// Maps a property to a field, if it exists. 850 /// If the property exists and is invalid, reports an error. 851 /// (Optional requires special handling, because missing keys are OK). 852 template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) { 853 assert(*this && "Must check this is an object before calling map()"); 854 if (const Value *E = O->get(Prop)) 855 return fromJSON(*E, Out, P.field(Prop)); 856 Out = std::nullopt; 857 return true; 858 } 859 860 /// Maps a property to a field, if it exists. 861 /// If the property exists and is invalid, reports an error. 862 /// If the property does not exist, Out is unchanged. 863 template <typename T> bool mapOptional(StringLiteral Prop, T &Out) { 864 assert(*this && "Must check this is an object before calling map()"); 865 if (const Value *E = O->get(Prop)) 866 return fromJSON(*E, Out, P.field(Prop)); 867 return true; 868 } 869 870 private: 871 const Object *O; 872 Path P; 873 }; 874 875 /// Parses the provided JSON source, or returns a ParseError. 876 /// The returned Value is self-contained and owns its strings (they do not refer 877 /// to the original source). 878 llvm::Expected<Value> parse(llvm::StringRef JSON); 879 880 class ParseError : public llvm::ErrorInfo<ParseError> { 881 const char *Msg; 882 unsigned Line, Column, Offset; 883 884 public: 885 static char ID; 886 ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset) 887 : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {} 888 void log(llvm::raw_ostream &OS) const override { 889 OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg); 890 } 891 std::error_code convertToErrorCode() const override { 892 return llvm::inconvertibleErrorCode(); 893 } 894 }; 895 896 /// Version of parse() that converts the parsed value to the type T. 897 /// RootName describes the root object and is used in error messages. 898 template <typename T> 899 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") { 900 auto V = parse(JSON); 901 if (!V) 902 return V.takeError(); 903 Path::Root R(RootName); 904 T Result; 905 if (fromJSON(*V, Result, R)) 906 return std::move(Result); 907 return R.getError(); 908 } 909 910 /// json::OStream allows writing well-formed JSON without materializing 911 /// all structures as json::Value ahead of time. 912 /// It's faster, lower-level, and less safe than OS << json::Value. 913 /// It also allows emitting more constructs, such as comments. 914 /// 915 /// Only one "top-level" object can be written to a stream. 916 /// Simplest usage involves passing lambdas (Blocks) to fill in containers: 917 /// 918 /// json::OStream J(OS); 919 /// J.array([&]{ 920 /// for (const Event &E : Events) 921 /// J.object([&] { 922 /// J.attribute("timestamp", int64_t(E.Time)); 923 /// J.attributeArray("participants", [&] { 924 /// for (const Participant &P : E.Participants) 925 /// J.value(P.toString()); 926 /// }); 927 /// }); 928 /// }); 929 /// 930 /// This would produce JSON like: 931 /// 932 /// [ 933 /// { 934 /// "timestamp": 19287398741, 935 /// "participants": [ 936 /// "King Kong", 937 /// "Miley Cyrus", 938 /// "Cleopatra" 939 /// ] 940 /// }, 941 /// ... 942 /// ] 943 /// 944 /// The lower level begin/end methods (arrayBegin()) are more flexible but 945 /// care must be taken to pair them correctly: 946 /// 947 /// json::OStream J(OS); 948 // J.arrayBegin(); 949 /// for (const Event &E : Events) { 950 /// J.objectBegin(); 951 /// J.attribute("timestamp", int64_t(E.Time)); 952 /// J.attributeBegin("participants"); 953 /// for (const Participant &P : E.Participants) 954 /// J.value(P.toString()); 955 /// J.attributeEnd(); 956 /// J.objectEnd(); 957 /// } 958 /// J.arrayEnd(); 959 /// 960 /// If the call sequence isn't valid JSON, asserts will fire in debug mode. 961 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside 962 /// an array, and so on. 963 /// With asserts disabled, this is undefined behavior. 964 class OStream { 965 public: 966 using Block = llvm::function_ref<void()>; 967 // If IndentSize is nonzero, output is pretty-printed. 968 explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0) 969 : OS(OS), IndentSize(IndentSize) { 970 Stack.emplace_back(); 971 } 972 ~OStream() { 973 assert(Stack.size() == 1 && "Unmatched begin()/end()"); 974 assert(Stack.back().Ctx == Singleton); 975 assert(Stack.back().HasValue && "Did not write top-level value"); 976 } 977 978 /// Flushes the underlying ostream. OStream does not buffer internally. 979 void flush() { OS.flush(); } 980 981 // High level functions to output a value. 982 // Valid at top-level (exactly once), in an attribute value (exactly once), 983 // or in an array (any number of times). 984 985 /// Emit a self-contained value (number, string, vector<string> etc). 986 void value(const Value &V); 987 /// Emit an array whose elements are emitted in the provided Block. 988 void array(Block Contents) { 989 arrayBegin(); 990 Contents(); 991 arrayEnd(); 992 } 993 /// Emit an object whose elements are emitted in the provided Block. 994 void object(Block Contents) { 995 objectBegin(); 996 Contents(); 997 objectEnd(); 998 } 999 /// Emit an externally-serialized value. 1000 /// The caller must write exactly one valid JSON value to the provided stream. 1001 /// No validation or formatting of this value occurs. 1002 void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) { 1003 rawValueBegin(); 1004 Contents(OS); 1005 rawValueEnd(); 1006 } 1007 void rawValue(llvm::StringRef Contents) { 1008 rawValue([&](raw_ostream &OS) { OS << Contents; }); 1009 } 1010 /// Emit a JavaScript comment associated with the next printed value. 1011 /// The string must be valid until the next attribute or value is emitted. 1012 /// Comments are not part of standard JSON, and many parsers reject them! 1013 void comment(llvm::StringRef); 1014 1015 // High level functions to output object attributes. 1016 // Valid only within an object (any number of times). 1017 1018 /// Emit an attribute whose value is self-contained (number, vector<int> etc). 1019 void attribute(llvm::StringRef Key, const Value& Contents) { 1020 attributeImpl(Key, [&] { value(Contents); }); 1021 } 1022 /// Emit an attribute whose value is an array with elements from the Block. 1023 void attributeArray(llvm::StringRef Key, Block Contents) { 1024 attributeImpl(Key, [&] { array(Contents); }); 1025 } 1026 /// Emit an attribute whose value is an object with attributes from the Block. 1027 void attributeObject(llvm::StringRef Key, Block Contents) { 1028 attributeImpl(Key, [&] { object(Contents); }); 1029 } 1030 1031 // Low-level begin/end functions to output arrays, objects, and attributes. 1032 // Must be correctly paired. Allowed contexts are as above. 1033 1034 void arrayBegin(); 1035 void arrayEnd(); 1036 void objectBegin(); 1037 void objectEnd(); 1038 void attributeBegin(llvm::StringRef Key); 1039 void attributeEnd(); 1040 raw_ostream &rawValueBegin(); 1041 void rawValueEnd(); 1042 1043 private: 1044 void attributeImpl(llvm::StringRef Key, Block Contents) { 1045 attributeBegin(Key); 1046 Contents(); 1047 attributeEnd(); 1048 } 1049 1050 void valueBegin(); 1051 void flushComment(); 1052 void newline(); 1053 1054 enum Context { 1055 Singleton, // Top level, or object attribute. 1056 Array, 1057 Object, 1058 RawValue, // External code writing a value to OS directly. 1059 }; 1060 struct State { 1061 Context Ctx = Singleton; 1062 bool HasValue = false; 1063 }; 1064 llvm::SmallVector<State, 16> Stack; // Never empty. 1065 llvm::StringRef PendingComment; 1066 llvm::raw_ostream &OS; 1067 unsigned IndentSize; 1068 unsigned Indent = 0; 1069 }; 1070 1071 /// Serializes this Value to JSON, writing it to the provided stream. 1072 /// The formatting is compact (no extra whitespace) and deterministic. 1073 /// For pretty-printing, use the formatv() format_provider below. 1074 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) { 1075 OStream(OS).value(V); 1076 return OS; 1077 } 1078 } // namespace json 1079 1080 /// Allow printing json::Value with formatv(). 1081 /// The default style is basic/compact formatting, like operator<<. 1082 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2. 1083 template <> struct format_provider<llvm::json::Value> { 1084 static void format(const llvm::json::Value &, raw_ostream &, StringRef); 1085 }; 1086 } // namespace llvm 1087 1088 #endif 1089