1 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file supports working with JSON data. 11 /// 12 /// It comprises: 13 /// 14 /// - classes which hold dynamically-typed parsed JSON structures 15 /// These are value types that can be composed, inspected, and modified. 16 /// See json::Value, and the related types json::Object and json::Array. 17 /// 18 /// - functions to parse JSON text into Values, and to serialize Values to text. 19 /// See parse(), operator<<, and format_provider. 20 /// 21 /// - a convention and helpers for mapping between json::Value and user-defined 22 /// types. See fromJSON(), ObjectMapper, and the class comment on Value. 23 /// 24 /// - an output API json::OStream which can emit JSON without materializing 25 /// all structures as json::Value. 26 /// 27 /// Typically, JSON data would be read from an external source, parsed into 28 /// a Value, and then converted into some native data structure before doing 29 /// real work on it. (And vice versa when writing). 30 /// 31 /// Other serialization mechanisms you may consider: 32 /// 33 /// - YAML is also text-based, and more human-readable than JSON. It's a more 34 /// complex format and data model, and YAML parsers aren't ubiquitous. 35 /// YAMLParser.h is a streaming parser suitable for parsing large documents 36 /// (including JSON, as YAML is a superset). It can be awkward to use 37 /// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more 38 /// declarative than the toJSON/fromJSON conventions here. 39 /// 40 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it 41 /// encodes LLVM IR ("bitcode"), but it can be a container for other data. 42 /// Low-level reader/writer libraries are in Bitstream/Bitstream*.h 43 /// 44 //===---------------------------------------------------------------------===// 45 46 #ifndef LLVM_SUPPORT_JSON_H 47 #define LLVM_SUPPORT_JSON_H 48 49 #include "llvm/ADT/DenseMap.h" 50 #include "llvm/ADT/SmallVector.h" 51 #include "llvm/ADT/StringRef.h" 52 #include "llvm/ADT/STLFunctionalExtras.h" 53 #include "llvm/Support/Error.h" 54 #include "llvm/Support/FormatVariadic.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include <map> 57 58 namespace llvm { 59 namespace json { 60 61 // === String encodings === 62 // 63 // JSON strings are character sequences (not byte sequences like std::string). 64 // We need to know the encoding, and for simplicity only support UTF-8. 65 // 66 // - When parsing, invalid UTF-8 is a syntax error like any other 67 // 68 // - When creating Values from strings, callers must ensure they are UTF-8. 69 // with asserts on, invalid UTF-8 will crash the program 70 // with asserts off, we'll substitute the replacement character (U+FFFD) 71 // Callers can use json::isUTF8() and json::fixUTF8() for validation. 72 // 73 // - When retrieving strings from Values (e.g. asString()), the result will 74 // always be valid UTF-8. 75 76 /// Returns true if \p S is valid UTF-8, which is required for use as JSON. 77 /// If it returns false, \p Offset is set to a byte offset near the first error. 78 bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr); 79 /// Replaces invalid UTF-8 sequences in \p S with the replacement character 80 /// (U+FFFD). The returned string is valid UTF-8. 81 /// This is much slower than isUTF8, so test that first. 82 std::string fixUTF8(llvm::StringRef S); 83 84 class Array; 85 class ObjectKey; 86 class Value; 87 template <typename T> Value toJSON(const llvm::Optional<T> &Opt); 88 89 /// An Object is a JSON object, which maps strings to heterogenous JSON values. 90 /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string. 91 class Object { 92 using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>; 93 Storage M; 94 95 public: 96 using key_type = ObjectKey; 97 using mapped_type = Value; 98 using value_type = Storage::value_type; 99 using iterator = Storage::iterator; 100 using const_iterator = Storage::const_iterator; 101 102 Object() = default; 103 // KV is a trivial key-value struct for list-initialization. 104 // (using std::pair forces extra copies). 105 struct KV; 106 explicit Object(std::initializer_list<KV> Properties); 107 108 iterator begin() { return M.begin(); } 109 const_iterator begin() const { return M.begin(); } 110 iterator end() { return M.end(); } 111 const_iterator end() const { return M.end(); } 112 113 bool empty() const { return M.empty(); } 114 size_t size() const { return M.size(); } 115 116 void clear() { M.clear(); } 117 std::pair<iterator, bool> insert(KV E); 118 template <typename... Ts> 119 std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) { 120 return M.try_emplace(K, std::forward<Ts>(Args)...); 121 } 122 template <typename... Ts> 123 std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) { 124 return M.try_emplace(std::move(K), std::forward<Ts>(Args)...); 125 } 126 bool erase(StringRef K); 127 void erase(iterator I) { M.erase(I); } 128 129 iterator find(StringRef K) { return M.find_as(K); } 130 const_iterator find(StringRef K) const { return M.find_as(K); } 131 // operator[] acts as if Value was default-constructible as null. 132 Value &operator[](const ObjectKey &K); 133 Value &operator[](ObjectKey &&K); 134 // Look up a property, returning nullptr if it doesn't exist. 135 Value *get(StringRef K); 136 const Value *get(StringRef K) const; 137 // Typed accessors return None/nullptr if 138 // - the property doesn't exist 139 // - or it has the wrong type 140 llvm::Optional<std::nullptr_t> getNull(StringRef K) const; 141 llvm::Optional<bool> getBoolean(StringRef K) const; 142 llvm::Optional<double> getNumber(StringRef K) const; 143 llvm::Optional<int64_t> getInteger(StringRef K) const; 144 llvm::Optional<llvm::StringRef> getString(StringRef K) const; 145 const json::Object *getObject(StringRef K) const; 146 json::Object *getObject(StringRef K); 147 const json::Array *getArray(StringRef K) const; 148 json::Array *getArray(StringRef K); 149 }; 150 bool operator==(const Object &LHS, const Object &RHS); 151 inline bool operator!=(const Object &LHS, const Object &RHS) { 152 return !(LHS == RHS); 153 } 154 155 /// An Array is a JSON array, which contains heterogeneous JSON values. 156 /// It simulates std::vector<Value>. 157 class Array { 158 std::vector<Value> V; 159 160 public: 161 using value_type = Value; 162 using iterator = std::vector<Value>::iterator; 163 using const_iterator = std::vector<Value>::const_iterator; 164 165 Array() = default; 166 explicit Array(std::initializer_list<Value> Elements); 167 template <typename Collection> explicit Array(const Collection &C) { 168 for (const auto &V : C) 169 emplace_back(V); 170 } 171 172 Value &operator[](size_t I); 173 const Value &operator[](size_t I) const; 174 Value &front(); 175 const Value &front() const; 176 Value &back(); 177 const Value &back() const; 178 Value *data(); 179 const Value *data() const; 180 181 iterator begin(); 182 const_iterator begin() const; 183 iterator end(); 184 const_iterator end() const; 185 186 bool empty() const; 187 size_t size() const; 188 void reserve(size_t S); 189 190 void clear(); 191 void push_back(const Value &E); 192 void push_back(Value &&E); 193 template <typename... Args> void emplace_back(Args &&...A); 194 void pop_back(); 195 // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees. 196 iterator insert(iterator P, const Value &E); 197 iterator insert(iterator P, Value &&E); 198 template <typename It> iterator insert(iterator P, It A, It Z); 199 template <typename... Args> iterator emplace(const_iterator P, Args &&...A); 200 201 friend bool operator==(const Array &L, const Array &R); 202 }; 203 inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } 204 205 /// A Value is an JSON value of unknown type. 206 /// They can be copied, but should generally be moved. 207 /// 208 /// === Composing values === 209 /// 210 /// You can implicitly construct Values from: 211 /// - strings: std::string, SmallString, formatv, StringRef, char* 212 /// (char*, and StringRef are references, not copies!) 213 /// - numbers 214 /// - booleans 215 /// - null: nullptr 216 /// - arrays: {"foo", 42.0, false} 217 /// - serializable things: types with toJSON(const T&)->Value, found by ADL 218 /// 219 /// They can also be constructed from object/array helpers: 220 /// - json::Object is a type like map<ObjectKey, Value> 221 /// - json::Array is a type like vector<Value> 222 /// These can be list-initialized, or used to build up collections in a loop. 223 /// json::ary(Collection) converts all items in a collection to Values. 224 /// 225 /// === Inspecting values === 226 /// 227 /// Each Value is one of the JSON kinds: 228 /// null (nullptr_t) 229 /// boolean (bool) 230 /// number (double, int64 or uint64) 231 /// string (StringRef) 232 /// array (json::Array) 233 /// object (json::Object) 234 /// 235 /// The kind can be queried directly, or implicitly via the typed accessors: 236 /// if (Optional<StringRef> S = E.getAsString() 237 /// assert(E.kind() == Value::String); 238 /// 239 /// Array and Object also have typed indexing accessors for easy traversal: 240 /// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )"); 241 /// if (Object* O = E->getAsObject()) 242 /// if (Object* Opts = O->getObject("options")) 243 /// if (Optional<StringRef> Font = Opts->getString("font")) 244 /// assert(Opts->at("font").kind() == Value::String); 245 /// 246 /// === Converting JSON values to C++ types === 247 /// 248 /// The convention is to have a deserializer function findable via ADL: 249 /// fromJSON(const json::Value&, T&, Path) -> bool 250 /// 251 /// The return value indicates overall success, and Path is used for precise 252 /// error reporting. (The Path::Root passed in at the top level fromJSON call 253 /// captures any nested error and can render it in context). 254 /// If conversion fails, fromJSON calls Path::report() and immediately returns. 255 /// This ensures that the first fatal error survives. 256 /// 257 /// Deserializers are provided for: 258 /// - bool 259 /// - int and int64_t 260 /// - double 261 /// - std::string 262 /// - vector<T>, where T is deserializable 263 /// - map<string, T>, where T is deserializable 264 /// - Optional<T>, where T is deserializable 265 /// ObjectMapper can help writing fromJSON() functions for object types. 266 /// 267 /// For conversion in the other direction, the serializer function is: 268 /// toJSON(const T&) -> json::Value 269 /// If this exists, then it also allows constructing Value from T, and can 270 /// be used to serialize vector<T>, map<string, T>, and Optional<T>. 271 /// 272 /// === Serialization === 273 /// 274 /// Values can be serialized to JSON: 275 /// 1) raw_ostream << Value // Basic formatting. 276 /// 2) raw_ostream << formatv("{0}", Value) // Basic formatting. 277 /// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2. 278 /// 279 /// And parsed: 280 /// Expected<Value> E = json::parse("[1, 2, null]"); 281 /// assert(E && E->kind() == Value::Array); 282 class Value { 283 public: 284 enum Kind { 285 Null, 286 Boolean, 287 /// Number values can store both int64s and doubles at full precision, 288 /// depending on what they were constructed/parsed from. 289 Number, 290 String, 291 Array, 292 Object, 293 }; 294 295 // It would be nice to have Value() be null. But that would make {} null too. 296 Value(const Value &M) { copyFrom(M); } 297 Value(Value &&M) { moveFrom(std::move(M)); } 298 Value(std::initializer_list<Value> Elements); 299 Value(json::Array &&Elements) : Type(T_Array) { 300 create<json::Array>(std::move(Elements)); 301 } 302 template <typename Elt> 303 Value(const std::vector<Elt> &C) : Value(json::Array(C)) {} 304 Value(json::Object &&Properties) : Type(T_Object) { 305 create<json::Object>(std::move(Properties)); 306 } 307 template <typename Elt> 308 Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {} 309 // Strings: types with value semantics. Must be valid UTF-8. 310 Value(std::string V) : Type(T_String) { 311 if (LLVM_UNLIKELY(!isUTF8(V))) { 312 assert(false && "Invalid UTF-8 in value used as JSON"); 313 V = fixUTF8(std::move(V)); 314 } 315 create<std::string>(std::move(V)); 316 } 317 Value(const llvm::SmallVectorImpl<char> &V) 318 : Value(std::string(V.begin(), V.end())) {} 319 Value(const llvm::formatv_object_base &V) : Value(V.str()) {} 320 // Strings: types with reference semantics. Must be valid UTF-8. 321 Value(StringRef V) : Type(T_StringRef) { 322 create<llvm::StringRef>(V); 323 if (LLVM_UNLIKELY(!isUTF8(V))) { 324 assert(false && "Invalid UTF-8 in value used as JSON"); 325 *this = Value(fixUTF8(V)); 326 } 327 } 328 Value(const char *V) : Value(StringRef(V)) {} 329 Value(std::nullptr_t) : Type(T_Null) {} 330 // Boolean (disallow implicit conversions). 331 // (The last template parameter is a dummy to keep templates distinct.) 332 template <typename T, 333 typename = std::enable_if_t<std::is_same<T, bool>::value>, 334 bool = false> 335 Value(T B) : Type(T_Boolean) { 336 create<bool>(B); 337 } 338 339 // Unsigned 64-bit long integers. 340 template <typename T, 341 typename = std::enable_if_t<std::is_same<T, uint64_t>::value>, 342 bool = false, bool = false> 343 Value(T V) : Type(T_UINT64) { 344 create<uint64_t>(uint64_t{V}); 345 } 346 347 // Integers (except boolean and uint64_t). 348 // Must be non-narrowing convertible to int64_t. 349 template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>, 350 typename = std::enable_if_t<!std::is_same<T, bool>::value>, 351 typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>> 352 Value(T I) : Type(T_Integer) { 353 create<int64_t>(int64_t{I}); 354 } 355 // Floating point. Must be non-narrowing convertible to double. 356 template <typename T, 357 typename = std::enable_if_t<std::is_floating_point<T>::value>, 358 double * = nullptr> 359 Value(T D) : Type(T_Double) { 360 create<double>(double{D}); 361 } 362 // Serializable types: with a toJSON(const T&)->Value function, found by ADL. 363 template <typename T, 364 typename = std::enable_if_t<std::is_same< 365 Value, decltype(toJSON(*(const T *)nullptr))>::value>, 366 Value * = nullptr> 367 Value(const T &V) : Value(toJSON(V)) {} 368 369 Value &operator=(const Value &M) { 370 destroy(); 371 copyFrom(M); 372 return *this; 373 } 374 Value &operator=(Value &&M) { 375 destroy(); 376 moveFrom(std::move(M)); 377 return *this; 378 } 379 ~Value() { destroy(); } 380 381 Kind kind() const { 382 switch (Type) { 383 case T_Null: 384 return Null; 385 case T_Boolean: 386 return Boolean; 387 case T_Double: 388 case T_Integer: 389 case T_UINT64: 390 return Number; 391 case T_String: 392 case T_StringRef: 393 return String; 394 case T_Object: 395 return Object; 396 case T_Array: 397 return Array; 398 } 399 llvm_unreachable("Unknown kind"); 400 } 401 402 // Typed accessors return None/nullptr if the Value is not of this type. 403 llvm::Optional<std::nullptr_t> getAsNull() const { 404 if (LLVM_LIKELY(Type == T_Null)) 405 return nullptr; 406 return llvm::None; 407 } 408 llvm::Optional<bool> getAsBoolean() const { 409 if (LLVM_LIKELY(Type == T_Boolean)) 410 return as<bool>(); 411 return llvm::None; 412 } 413 llvm::Optional<double> getAsNumber() const { 414 if (LLVM_LIKELY(Type == T_Double)) 415 return as<double>(); 416 if (LLVM_LIKELY(Type == T_Integer)) 417 return as<int64_t>(); 418 if (LLVM_LIKELY(Type == T_UINT64)) 419 return as<uint64_t>(); 420 return llvm::None; 421 } 422 // Succeeds if the Value is a Number, and exactly representable as int64_t. 423 llvm::Optional<int64_t> getAsInteger() const { 424 if (LLVM_LIKELY(Type == T_Integer)) 425 return as<int64_t>(); 426 if (LLVM_LIKELY(Type == T_Double)) { 427 double D = as<double>(); 428 if (LLVM_LIKELY(std::modf(D, &D) == 0.0 && 429 D >= double(std::numeric_limits<int64_t>::min()) && 430 D <= double(std::numeric_limits<int64_t>::max()))) 431 return D; 432 } 433 return llvm::None; 434 } 435 llvm::Optional<uint64_t> getAsUINT64() const { 436 if (Type == T_UINT64) 437 return as<uint64_t>(); 438 else if (Type == T_Integer) { 439 int64_t N = as<int64_t>(); 440 if (N >= 0) 441 return as<uint64_t>(); 442 } 443 return llvm::None; 444 } 445 llvm::Optional<llvm::StringRef> getAsString() const { 446 if (Type == T_String) 447 return llvm::StringRef(as<std::string>()); 448 if (LLVM_LIKELY(Type == T_StringRef)) 449 return as<llvm::StringRef>(); 450 return llvm::None; 451 } 452 const json::Object *getAsObject() const { 453 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 454 } 455 json::Object *getAsObject() { 456 return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; 457 } 458 const json::Array *getAsArray() const { 459 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 460 } 461 json::Array *getAsArray() { 462 return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; 463 } 464 465 private: 466 void destroy(); 467 void copyFrom(const Value &M); 468 // We allow moving from *const* Values, by marking all members as mutable! 469 // This hack is needed to support initializer-list syntax efficiently. 470 // (std::initializer_list<T> is a container of const T). 471 void moveFrom(const Value &&M); 472 friend class Array; 473 friend class Object; 474 475 template <typename T, typename... U> void create(U &&... V) { 476 new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...); 477 } 478 template <typename T> T &as() const { 479 // Using this two-step static_cast via void * instead of reinterpret_cast 480 // silences a -Wstrict-aliasing false positive from GCC6 and earlier. 481 void *Storage = static_cast<void *>(&Union); 482 return *static_cast<T *>(Storage); 483 } 484 485 friend class OStream; 486 487 enum ValueType : char16_t { 488 T_Null, 489 T_Boolean, 490 T_Double, 491 T_Integer, 492 T_UINT64, 493 T_StringRef, 494 T_String, 495 T_Object, 496 T_Array, 497 }; 498 // All members mutable, see moveFrom(). 499 mutable ValueType Type; 500 mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t, 501 llvm::StringRef, std::string, json::Array, 502 json::Object> 503 Union; 504 friend bool operator==(const Value &, const Value &); 505 }; 506 507 bool operator==(const Value &, const Value &); 508 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } 509 510 // Array Methods 511 inline Value &Array::operator[](size_t I) { return V[I]; } 512 inline const Value &Array::operator[](size_t I) const { return V[I]; } 513 inline Value &Array::front() { return V.front(); } 514 inline const Value &Array::front() const { return V.front(); } 515 inline Value &Array::back() { return V.back(); } 516 inline const Value &Array::back() const { return V.back(); } 517 inline Value *Array::data() { return V.data(); } 518 inline const Value *Array::data() const { return V.data(); } 519 520 inline typename Array::iterator Array::begin() { return V.begin(); } 521 inline typename Array::const_iterator Array::begin() const { return V.begin(); } 522 inline typename Array::iterator Array::end() { return V.end(); } 523 inline typename Array::const_iterator Array::end() const { return V.end(); } 524 525 inline bool Array::empty() const { return V.empty(); } 526 inline size_t Array::size() const { return V.size(); } 527 inline void Array::reserve(size_t S) { V.reserve(S); } 528 529 inline void Array::clear() { V.clear(); } 530 inline void Array::push_back(const Value &E) { V.push_back(E); } 531 inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); } 532 template <typename... Args> inline void Array::emplace_back(Args &&...A) { 533 V.emplace_back(std::forward<Args>(A)...); 534 } 535 inline void Array::pop_back() { V.pop_back(); } 536 inline typename Array::iterator Array::insert(iterator P, const Value &E) { 537 return V.insert(P, E); 538 } 539 inline typename Array::iterator Array::insert(iterator P, Value &&E) { 540 return V.insert(P, std::move(E)); 541 } 542 template <typename It> 543 inline typename Array::iterator Array::insert(iterator P, It A, It Z) { 544 return V.insert(P, A, Z); 545 } 546 template <typename... Args> 547 inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) { 548 return V.emplace(P, std::forward<Args>(A)...); 549 } 550 inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; } 551 552 /// ObjectKey is a used to capture keys in Object. Like Value but: 553 /// - only strings are allowed 554 /// - it's optimized for the string literal case (Owned == nullptr) 555 /// Like Value, strings must be UTF-8. See isUTF8 documentation for details. 556 class ObjectKey { 557 public: 558 ObjectKey(const char *S) : ObjectKey(StringRef(S)) {} 559 ObjectKey(std::string S) : Owned(new std::string(std::move(S))) { 560 if (LLVM_UNLIKELY(!isUTF8(*Owned))) { 561 assert(false && "Invalid UTF-8 in value used as JSON"); 562 *Owned = fixUTF8(std::move(*Owned)); 563 } 564 Data = *Owned; 565 } 566 ObjectKey(llvm::StringRef S) : Data(S) { 567 if (LLVM_UNLIKELY(!isUTF8(Data))) { 568 assert(false && "Invalid UTF-8 in value used as JSON"); 569 *this = ObjectKey(fixUTF8(S)); 570 } 571 } 572 ObjectKey(const llvm::SmallVectorImpl<char> &V) 573 : ObjectKey(std::string(V.begin(), V.end())) {} 574 ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {} 575 576 ObjectKey(const ObjectKey &C) { *this = C; } 577 ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {} 578 ObjectKey &operator=(const ObjectKey &C) { 579 if (C.Owned) { 580 Owned.reset(new std::string(*C.Owned)); 581 Data = *Owned; 582 } else { 583 Data = C.Data; 584 } 585 return *this; 586 } 587 ObjectKey &operator=(ObjectKey &&) = default; 588 589 operator llvm::StringRef() const { return Data; } 590 std::string str() const { return Data.str(); } 591 592 private: 593 // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned 594 // could be 2 pointers at most. 595 std::unique_ptr<std::string> Owned; 596 llvm::StringRef Data; 597 }; 598 599 inline bool operator==(const ObjectKey &L, const ObjectKey &R) { 600 return llvm::StringRef(L) == llvm::StringRef(R); 601 } 602 inline bool operator!=(const ObjectKey &L, const ObjectKey &R) { 603 return !(L == R); 604 } 605 inline bool operator<(const ObjectKey &L, const ObjectKey &R) { 606 return StringRef(L) < StringRef(R); 607 } 608 609 struct Object::KV { 610 ObjectKey K; 611 Value V; 612 }; 613 614 inline Object::Object(std::initializer_list<KV> Properties) { 615 for (const auto &P : Properties) { 616 auto R = try_emplace(P.K, nullptr); 617 if (R.second) 618 R.first->getSecond().moveFrom(std::move(P.V)); 619 } 620 } 621 inline std::pair<Object::iterator, bool> Object::insert(KV E) { 622 return try_emplace(std::move(E.K), std::move(E.V)); 623 } 624 inline bool Object::erase(StringRef K) { 625 return M.erase(ObjectKey(K)); 626 } 627 628 /// A "cursor" marking a position within a Value. 629 /// The Value is a tree, and this is the path from the root to the current node. 630 /// This is used to associate errors with particular subobjects. 631 class Path { 632 public: 633 class Root; 634 635 /// Records that the value at the current path is invalid. 636 /// Message is e.g. "expected number" and becomes part of the final error. 637 /// This overwrites any previously written error message in the root. 638 void report(llvm::StringLiteral Message); 639 640 /// The root may be treated as a Path. 641 Path(Root &R) : Parent(nullptr), Seg(&R) {} 642 /// Derives a path for an array element: this[Index] 643 Path index(unsigned Index) const { return Path(this, Segment(Index)); } 644 /// Derives a path for an object field: this.Field 645 Path field(StringRef Field) const { return Path(this, Segment(Field)); } 646 647 private: 648 /// One element in a JSON path: an object field (.foo) or array index [27]. 649 /// Exception: the root Path encodes a pointer to the Path::Root. 650 class Segment { 651 uintptr_t Pointer; 652 unsigned Offset; 653 654 public: 655 Segment() = default; 656 Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {} 657 Segment(llvm::StringRef Field) 658 : Pointer(reinterpret_cast<uintptr_t>(Field.data())), 659 Offset(static_cast<unsigned>(Field.size())) {} 660 Segment(unsigned Index) : Pointer(0), Offset(Index) {} 661 662 bool isField() const { return Pointer != 0; } 663 StringRef field() const { 664 return StringRef(reinterpret_cast<const char *>(Pointer), Offset); 665 } 666 unsigned index() const { return Offset; } 667 Root *root() const { return reinterpret_cast<Root *>(Pointer); } 668 }; 669 670 const Path *Parent; 671 Segment Seg; 672 673 Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {} 674 }; 675 676 /// The root is the trivial Path to the root value. 677 /// It also stores the latest reported error and the path where it occurred. 678 class Path::Root { 679 llvm::StringRef Name; 680 llvm::StringLiteral ErrorMessage; 681 std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed. 682 683 friend void Path::report(llvm::StringLiteral Message); 684 685 public: 686 Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {} 687 // No copy/move allowed as there are incoming pointers. 688 Root(Root &&) = delete; 689 Root &operator=(Root &&) = delete; 690 Root(const Root &) = delete; 691 Root &operator=(const Root &) = delete; 692 693 /// Returns the last error reported, or else a generic error. 694 Error getError() const; 695 /// Print the root value with the error shown inline as a comment. 696 /// Unrelated parts of the value are elided for brevity, e.g. 697 /// { 698 /// "id": 42, 699 /// "name": /* expected string */ null, 700 /// "properties": { ... } 701 /// } 702 void printErrorContext(const Value &, llvm::raw_ostream &) const; 703 }; 704 705 // Standard deserializers are provided for primitive types. 706 // See comments on Value. 707 inline bool fromJSON(const Value &E, std::string &Out, Path P) { 708 if (auto S = E.getAsString()) { 709 Out = std::string(*S); 710 return true; 711 } 712 P.report("expected string"); 713 return false; 714 } 715 inline bool fromJSON(const Value &E, int &Out, Path P) { 716 if (auto S = E.getAsInteger()) { 717 Out = *S; 718 return true; 719 } 720 P.report("expected integer"); 721 return false; 722 } 723 inline bool fromJSON(const Value &E, int64_t &Out, Path P) { 724 if (auto S = E.getAsInteger()) { 725 Out = *S; 726 return true; 727 } 728 P.report("expected integer"); 729 return false; 730 } 731 inline bool fromJSON(const Value &E, double &Out, Path P) { 732 if (auto S = E.getAsNumber()) { 733 Out = *S; 734 return true; 735 } 736 P.report("expected number"); 737 return false; 738 } 739 inline bool fromJSON(const Value &E, bool &Out, Path P) { 740 if (auto S = E.getAsBoolean()) { 741 Out = *S; 742 return true; 743 } 744 P.report("expected boolean"); 745 return false; 746 } 747 inline bool fromJSON(const Value &E, uint64_t &Out, Path P) { 748 if (auto S = E.getAsUINT64()) { 749 Out = *S; 750 return true; 751 } 752 P.report("expected uint64_t"); 753 return false; 754 } 755 inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) { 756 if (auto S = E.getAsNull()) { 757 Out = *S; 758 return true; 759 } 760 P.report("expected null"); 761 return false; 762 } 763 template <typename T> 764 bool fromJSON(const Value &E, llvm::Optional<T> &Out, Path P) { 765 if (E.getAsNull()) { 766 Out = llvm::None; 767 return true; 768 } 769 T Result; 770 if (!fromJSON(E, Result, P)) 771 return false; 772 Out = std::move(Result); 773 return true; 774 } 775 template <typename T> 776 bool fromJSON(const Value &E, std::vector<T> &Out, Path P) { 777 if (auto *A = E.getAsArray()) { 778 Out.clear(); 779 Out.resize(A->size()); 780 for (size_t I = 0; I < A->size(); ++I) 781 if (!fromJSON((*A)[I], Out[I], P.index(I))) 782 return false; 783 return true; 784 } 785 P.report("expected array"); 786 return false; 787 } 788 template <typename T> 789 bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) { 790 if (auto *O = E.getAsObject()) { 791 Out.clear(); 792 for (const auto &KV : *O) 793 if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))], 794 P.field(KV.first))) 795 return false; 796 return true; 797 } 798 P.report("expected object"); 799 return false; 800 } 801 802 // Allow serialization of Optional<T> for supported T. 803 template <typename T> Value toJSON(const llvm::Optional<T> &Opt) { 804 return Opt ? Value(*Opt) : Value(nullptr); 805 } 806 807 /// Helper for mapping JSON objects onto protocol structs. 808 /// 809 /// Example: 810 /// \code 811 /// bool fromJSON(const Value &E, MyStruct &R, Path P) { 812 /// ObjectMapper O(E, P); 813 /// // When returning false, error details were already reported. 814 /// return O && O.map("mandatory_field", R.MandatoryField) && 815 /// O.mapOptional("optional_field", R.OptionalField); 816 /// } 817 /// \endcode 818 class ObjectMapper { 819 public: 820 /// If O is not an object, this mapper is invalid and an error is reported. 821 ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) { 822 if (!O) 823 P.report("expected object"); 824 } 825 826 /// True if the expression is an object. 827 /// Must be checked before calling map(). 828 operator bool() const { return O; } 829 830 /// Maps a property to a field. 831 /// If the property is missing or invalid, reports an error. 832 template <typename T> bool map(StringLiteral Prop, T &Out) { 833 assert(*this && "Must check this is an object before calling map()"); 834 if (const Value *E = O->get(Prop)) 835 return fromJSON(*E, Out, P.field(Prop)); 836 P.field(Prop).report("missing value"); 837 return false; 838 } 839 840 /// Maps a property to a field, if it exists. 841 /// If the property exists and is invalid, reports an error. 842 /// (Optional requires special handling, because missing keys are OK). 843 template <typename T> bool map(StringLiteral Prop, llvm::Optional<T> &Out) { 844 assert(*this && "Must check this is an object before calling map()"); 845 if (const Value *E = O->get(Prop)) 846 return fromJSON(*E, Out, P.field(Prop)); 847 Out = llvm::None; 848 return true; 849 } 850 851 /// Maps a property to a field, if it exists. 852 /// If the property exists and is invalid, reports an error. 853 /// If the property does not exist, Out is unchanged. 854 template <typename T> bool mapOptional(StringLiteral Prop, T &Out) { 855 assert(*this && "Must check this is an object before calling map()"); 856 if (const Value *E = O->get(Prop)) 857 return fromJSON(*E, Out, P.field(Prop)); 858 return true; 859 } 860 861 private: 862 const Object *O; 863 Path P; 864 }; 865 866 /// Parses the provided JSON source, or returns a ParseError. 867 /// The returned Value is self-contained and owns its strings (they do not refer 868 /// to the original source). 869 llvm::Expected<Value> parse(llvm::StringRef JSON); 870 871 class ParseError : public llvm::ErrorInfo<ParseError> { 872 const char *Msg; 873 unsigned Line, Column, Offset; 874 875 public: 876 static char ID; 877 ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset) 878 : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {} 879 void log(llvm::raw_ostream &OS) const override { 880 OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg); 881 } 882 std::error_code convertToErrorCode() const override { 883 return llvm::inconvertibleErrorCode(); 884 } 885 }; 886 887 /// Version of parse() that converts the parsed value to the type T. 888 /// RootName describes the root object and is used in error messages. 889 template <typename T> 890 Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") { 891 auto V = parse(JSON); 892 if (!V) 893 return V.takeError(); 894 Path::Root R(RootName); 895 T Result; 896 if (fromJSON(*V, Result, R)) 897 return std::move(Result); 898 return R.getError(); 899 } 900 901 /// json::OStream allows writing well-formed JSON without materializing 902 /// all structures as json::Value ahead of time. 903 /// It's faster, lower-level, and less safe than OS << json::Value. 904 /// It also allows emitting more constructs, such as comments. 905 /// 906 /// Only one "top-level" object can be written to a stream. 907 /// Simplest usage involves passing lambdas (Blocks) to fill in containers: 908 /// 909 /// json::OStream J(OS); 910 /// J.array([&]{ 911 /// for (const Event &E : Events) 912 /// J.object([&] { 913 /// J.attribute("timestamp", int64_t(E.Time)); 914 /// J.attributeArray("participants", [&] { 915 /// for (const Participant &P : E.Participants) 916 /// J.value(P.toString()); 917 /// }); 918 /// }); 919 /// }); 920 /// 921 /// This would produce JSON like: 922 /// 923 /// [ 924 /// { 925 /// "timestamp": 19287398741, 926 /// "participants": [ 927 /// "King Kong", 928 /// "Miley Cyrus", 929 /// "Cleopatra" 930 /// ] 931 /// }, 932 /// ... 933 /// ] 934 /// 935 /// The lower level begin/end methods (arrayBegin()) are more flexible but 936 /// care must be taken to pair them correctly: 937 /// 938 /// json::OStream J(OS); 939 // J.arrayBegin(); 940 /// for (const Event &E : Events) { 941 /// J.objectBegin(); 942 /// J.attribute("timestamp", int64_t(E.Time)); 943 /// J.attributeBegin("participants"); 944 /// for (const Participant &P : E.Participants) 945 /// J.value(P.toString()); 946 /// J.attributeEnd(); 947 /// J.objectEnd(); 948 /// } 949 /// J.arrayEnd(); 950 /// 951 /// If the call sequence isn't valid JSON, asserts will fire in debug mode. 952 /// This can be mismatched begin()/end() pairs, trying to emit attributes inside 953 /// an array, and so on. 954 /// With asserts disabled, this is undefined behavior. 955 class OStream { 956 public: 957 using Block = llvm::function_ref<void()>; 958 // If IndentSize is nonzero, output is pretty-printed. 959 explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0) 960 : OS(OS), IndentSize(IndentSize) { 961 Stack.emplace_back(); 962 } 963 ~OStream() { 964 assert(Stack.size() == 1 && "Unmatched begin()/end()"); 965 assert(Stack.back().Ctx == Singleton); 966 assert(Stack.back().HasValue && "Did not write top-level value"); 967 } 968 969 /// Flushes the underlying ostream. OStream does not buffer internally. 970 void flush() { OS.flush(); } 971 972 // High level functions to output a value. 973 // Valid at top-level (exactly once), in an attribute value (exactly once), 974 // or in an array (any number of times). 975 976 /// Emit a self-contained value (number, string, vector<string> etc). 977 void value(const Value &V); 978 /// Emit an array whose elements are emitted in the provided Block. 979 void array(Block Contents) { 980 arrayBegin(); 981 Contents(); 982 arrayEnd(); 983 } 984 /// Emit an object whose elements are emitted in the provided Block. 985 void object(Block Contents) { 986 objectBegin(); 987 Contents(); 988 objectEnd(); 989 } 990 /// Emit an externally-serialized value. 991 /// The caller must write exactly one valid JSON value to the provided stream. 992 /// No validation or formatting of this value occurs. 993 void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) { 994 rawValueBegin(); 995 Contents(OS); 996 rawValueEnd(); 997 } 998 void rawValue(llvm::StringRef Contents) { 999 rawValue([&](raw_ostream &OS) { OS << Contents; }); 1000 } 1001 /// Emit a JavaScript comment associated with the next printed value. 1002 /// The string must be valid until the next attribute or value is emitted. 1003 /// Comments are not part of standard JSON, and many parsers reject them! 1004 void comment(llvm::StringRef); 1005 1006 // High level functions to output object attributes. 1007 // Valid only within an object (any number of times). 1008 1009 /// Emit an attribute whose value is self-contained (number, vector<int> etc). 1010 void attribute(llvm::StringRef Key, const Value& Contents) { 1011 attributeImpl(Key, [&] { value(Contents); }); 1012 } 1013 /// Emit an attribute whose value is an array with elements from the Block. 1014 void attributeArray(llvm::StringRef Key, Block Contents) { 1015 attributeImpl(Key, [&] { array(Contents); }); 1016 } 1017 /// Emit an attribute whose value is an object with attributes from the Block. 1018 void attributeObject(llvm::StringRef Key, Block Contents) { 1019 attributeImpl(Key, [&] { object(Contents); }); 1020 } 1021 1022 // Low-level begin/end functions to output arrays, objects, and attributes. 1023 // Must be correctly paired. Allowed contexts are as above. 1024 1025 void arrayBegin(); 1026 void arrayEnd(); 1027 void objectBegin(); 1028 void objectEnd(); 1029 void attributeBegin(llvm::StringRef Key); 1030 void attributeEnd(); 1031 raw_ostream &rawValueBegin(); 1032 void rawValueEnd(); 1033 1034 private: 1035 void attributeImpl(llvm::StringRef Key, Block Contents) { 1036 attributeBegin(Key); 1037 Contents(); 1038 attributeEnd(); 1039 } 1040 1041 void valueBegin(); 1042 void flushComment(); 1043 void newline(); 1044 1045 enum Context { 1046 Singleton, // Top level, or object attribute. 1047 Array, 1048 Object, 1049 RawValue, // External code writing a value to OS directly. 1050 }; 1051 struct State { 1052 Context Ctx = Singleton; 1053 bool HasValue = false; 1054 }; 1055 llvm::SmallVector<State, 16> Stack; // Never empty. 1056 llvm::StringRef PendingComment; 1057 llvm::raw_ostream &OS; 1058 unsigned IndentSize; 1059 unsigned Indent = 0; 1060 }; 1061 1062 /// Serializes this Value to JSON, writing it to the provided stream. 1063 /// The formatting is compact (no extra whitespace) and deterministic. 1064 /// For pretty-printing, use the formatv() format_provider below. 1065 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) { 1066 OStream(OS).value(V); 1067 return OS; 1068 } 1069 } // namespace json 1070 1071 /// Allow printing json::Value with formatv(). 1072 /// The default style is basic/compact formatting, like operator<<. 1073 /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2. 1074 template <> struct format_provider<llvm::json::Value> { 1075 static void format(const llvm::json::Value &, raw_ostream &, StringRef); 1076 }; 1077 } // namespace llvm 1078 1079 #endif 1080