1 //===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 ///  \file
10 ///  This is a MessagePack reader.
11 ///
12 ///  See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
13 ///  standard.
14 ///
15 ///  Typical usage:
16 ///  \code
17 ///  StringRef input = GetInput();
18 ///  msgpack::Reader MPReader(input);
19 ///  msgpack::Object Obj;
20 ///
21 ///  while (MPReader.read(Obj)) {
22 ///    switch (Obj.Kind) {
23 ///    case msgpack::Type::Int:
24 //       // Use Obj.Int
25 ///      break;
26 ///    // ...
27 ///    }
28 ///  }
29 ///  \endcode
30 ///
31 //===----------------------------------------------------------------------===//
32 
33 #ifndef LLVM_BINARYFORMAT_MSGPACKREADER_H
34 #define LLVM_BINARYFORMAT_MSGPACKREADER_H
35 
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/MemoryBufferRef.h"
38 #include <cstdint>
39 
40 namespace llvm {
41 namespace msgpack {
42 
43 /// MessagePack types as defined in the standard, with the exception of Integer
44 /// being divided into a signed Int and unsigned UInt variant in order to map
45 /// directly to C++ types.
46 ///
47 /// The types map onto corresponding union members of the \c Object struct.
48 enum class Type : uint8_t {
49   Int,
50   UInt,
51   Nil,
52   Boolean,
53   Float,
54   String,
55   Binary,
56   Array,
57   Map,
58   Extension,
59   Empty, // Used by MsgPackDocument to represent an empty node
60 };
61 
62 /// Extension types are composed of a user-defined type ID and an uninterpreted
63 /// sequence of bytes.
64 struct ExtensionType {
65   /// User-defined extension type.
66   int8_t Type;
67   /// Raw bytes of the extension object.
68   StringRef Bytes;
69 };
70 
71 /// MessagePack object, represented as a tagged union of C++ types.
72 ///
73 /// All types except \c Type::Nil (which has only one value, and so is
74 /// completely represented by the \c Kind itself) map to a exactly one union
75 /// member.
76 struct Object {
77   Type Kind;
78   union {
79     /// Value for \c Type::Int.
80     int64_t Int;
81     /// Value for \c Type::Uint.
82     uint64_t UInt;
83     /// Value for \c Type::Boolean.
84     bool Bool;
85     /// Value for \c Type::Float.
86     double Float;
87     /// Value for \c Type::String and \c Type::Binary.
88     StringRef Raw;
89     /// Value for \c Type::Array and \c Type::Map.
90     size_t Length;
91     /// Value for \c Type::Extension.
92     ExtensionType Extension;
93   };
94 
95   Object() : Kind(Type::Int), Int(0) {}
96 };
97 
98 /// Reads MessagePack objects from memory, one at a time.
99 class Reader {
100 public:
101   /// Construct a reader, keeping a reference to the \p InputBuffer.
102   Reader(MemoryBufferRef InputBuffer);
103   /// Construct a reader, keeping a reference to the \p Input.
104   Reader(StringRef Input);
105 
106   Reader(const Reader &) = delete;
107   Reader &operator=(const Reader &) = delete;
108 
109   /// Read one object from the input buffer, advancing past it.
110   ///
111   /// The \p Obj is updated with the kind of the object read, and the
112   /// corresponding union member is updated.
113   ///
114   /// For the collection objects (Array and Map), only the length is read, and
115   /// the caller must make and additional \c N calls (in the case of Array) or
116   /// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection
117   /// elements.
118   ///
119   /// \param [out] Obj filled with next object on success.
120   ///
121   /// \returns true when object successfully read, false when at end of
122   /// input (and so \p Obj was not updated), otherwise an error.
123   Expected<bool> read(Object &Obj);
124 
125 private:
126   MemoryBufferRef InputBuffer;
127   StringRef::iterator Current;
128   StringRef::iterator End;
129 
130   size_t remainingSpace() {
131     // The rest of the code maintains the invariant that End >= Current, so
132     // that this cast is always defined behavior.
133     return static_cast<size_t>(End - Current);
134   }
135 
136   template <class T> Expected<bool> readRaw(Object &Obj);
137   template <class T> Expected<bool> readInt(Object &Obj);
138   template <class T> Expected<bool> readUInt(Object &Obj);
139   template <class T> Expected<bool> readLength(Object &Obj);
140   template <class T> Expected<bool> readExt(Object &Obj);
141   Expected<bool> createRaw(Object &Obj, uint32_t Size);
142   Expected<bool> createExt(Object &Obj, uint32_t Size);
143 };
144 
145 } // end namespace msgpack
146 } // end namespace llvm
147 
148 #endif // LLVM_BINARYFORMAT_MSGPACKREADER_H
149