1 //===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is a MessagePack reader. 11 /// 12 /// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full 13 /// standard. 14 /// 15 /// Typical usage: 16 /// \code 17 /// StringRef input = GetInput(); 18 /// msgpack::Reader MPReader(input); 19 /// msgpack::Object Obj; 20 /// 21 /// while (MPReader.read(Obj)) { 22 /// switch (Obj.Kind) { 23 /// case msgpack::Type::Int: 24 // // Use Obj.Int 25 /// break; 26 /// // ... 27 /// } 28 /// } 29 /// \endcode 30 /// 31 //===----------------------------------------------------------------------===// 32 33 #ifndef LLVM_BINARYFORMAT_MSGPACKREADER_H 34 #define LLVM_BINARYFORMAT_MSGPACKREADER_H 35 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cstdint> 40 41 namespace llvm { 42 namespace msgpack { 43 44 /// MessagePack types as defined in the standard, with the exception of Integer 45 /// being divided into a signed Int and unsigned UInt variant in order to map 46 /// directly to C++ types. 47 /// 48 /// The types map onto corresponding union members of the \c Object struct. 49 enum class Type : uint8_t { 50 Int, 51 UInt, 52 Nil, 53 Boolean, 54 Float, 55 String, 56 Binary, 57 Array, 58 Map, 59 Extension, 60 Empty, // Used by MsgPackDocument to represent an empty node 61 }; 62 63 /// Extension types are composed of a user-defined type ID and an uninterpreted 64 /// sequence of bytes. 65 struct ExtensionType { 66 /// User-defined extension type. 67 int8_t Type; 68 /// Raw bytes of the extension object. 69 StringRef Bytes; 70 }; 71 72 /// MessagePack object, represented as a tagged union of C++ types. 73 /// 74 /// All types except \c Type::Nil (which has only one value, and so is 75 /// completely represented by the \c Kind itself) map to a exactly one union 76 /// member. 77 struct Object { 78 Type Kind; 79 union { 80 /// Value for \c Type::Int. 81 int64_t Int; 82 /// Value for \c Type::Uint. 83 uint64_t UInt; 84 /// Value for \c Type::Boolean. 85 bool Bool; 86 /// Value for \c Type::Float. 87 double Float; 88 /// Value for \c Type::String and \c Type::Binary. 89 StringRef Raw; 90 /// Value for \c Type::Array and \c Type::Map. 91 size_t Length; 92 /// Value for \c Type::Extension. 93 ExtensionType Extension; 94 }; 95 ObjectObject96 Object() : Kind(Type::Int), Int(0) {} 97 }; 98 99 /// Reads MessagePack objects from memory, one at a time. 100 class Reader { 101 public: 102 /// Construct a reader, keeping a reference to the \p InputBuffer. 103 Reader(MemoryBufferRef InputBuffer); 104 /// Construct a reader, keeping a reference to the \p Input. 105 Reader(StringRef Input); 106 107 Reader(const Reader &) = delete; 108 Reader &operator=(const Reader &) = delete; 109 110 /// Read one object from the input buffer, advancing past it. 111 /// 112 /// The \p Obj is updated with the kind of the object read, and the 113 /// corresponding union member is updated. 114 /// 115 /// For the collection objects (Array and Map), only the length is read, and 116 /// the caller must make and additional \c N calls (in the case of Array) or 117 /// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection 118 /// elements. 119 /// 120 /// \param [out] Obj filled with next object on success. 121 /// 122 /// \returns true when object successfully read, false when at end of 123 /// input (and so \p Obj was not updated), otherwise an error. 124 Expected<bool> read(Object &Obj); 125 126 private: 127 MemoryBufferRef InputBuffer; 128 StringRef::iterator Current; 129 StringRef::iterator End; 130 remainingSpace()131 size_t remainingSpace() { 132 // The rest of the code maintains the invariant that End >= Current, so 133 // that this cast is always defined behavior. 134 return static_cast<size_t>(End - Current); 135 } 136 137 template <class T> Expected<bool> readRaw(Object &Obj); 138 template <class T> Expected<bool> readInt(Object &Obj); 139 template <class T> Expected<bool> readUInt(Object &Obj); 140 template <class T> Expected<bool> readLength(Object &Obj); 141 template <class T> Expected<bool> readExt(Object &Obj); 142 Expected<bool> createRaw(Object &Obj, uint32_t Size); 143 Expected<bool> createExt(Object &Obj, uint32_t Size); 144 }; 145 146 } // end namespace msgpack 147 } // end namespace llvm 148 149 #endif // LLVM_BINARYFORMAT_MSGPACKREADER_H 150