1 //===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 ///  \file
10 ///  This is a MessagePack reader.
11 ///
12 ///  See https://github.com/msgpack/msgpack/blob/master/spec.md for the full
13 ///  standard.
14 ///
15 ///  Typical usage:
16 ///  \code
17 ///  StringRef input = GetInput();
18 ///  msgpack::Reader MPReader(input);
19 ///  msgpack::Object Obj;
20 ///
21 ///  while (true) {
22 ///    Expected<bool> ReadObj = MPReader.read(&Obj);
23 ///    if (!ReadObj)
24 ///      // Handle error...
25 ///    if (!ReadObj.get())
26 ///      break; // Reached end of input
27 ///    switch (Obj.Kind) {
28 ///    case msgpack::Type::Int:
29 //       // Use Obj.Int
30 ///      break;
31 ///    // ...
32 ///    }
33 ///  }
34 ///  \endcode
35 ///
36 //===----------------------------------------------------------------------===//
37 
38 #ifndef LLVM_BINARYFORMAT_MSGPACKREADER_H
39 #define LLVM_BINARYFORMAT_MSGPACKREADER_H
40 
41 #include "llvm/Support/Error.h"
42 #include "llvm/Support/MemoryBufferRef.h"
43 #include <cstdint>
44 
45 namespace llvm {
46 namespace msgpack {
47 
48 /// MessagePack types as defined in the standard, with the exception of Integer
49 /// being divided into a signed Int and unsigned UInt variant in order to map
50 /// directly to C++ types.
51 ///
52 /// The types map onto corresponding union members of the \c Object struct.
53 enum class Type : uint8_t {
54   Int,
55   UInt,
56   Nil,
57   Boolean,
58   Float,
59   String,
60   Binary,
61   Array,
62   Map,
63   Extension,
64   Empty, // Used by MsgPackDocument to represent an empty node
65 };
66 
67 /// Extension types are composed of a user-defined type ID and an uninterpreted
68 /// sequence of bytes.
69 struct ExtensionType {
70   /// User-defined extension type.
71   int8_t Type;
72   /// Raw bytes of the extension object.
73   StringRef Bytes;
74 };
75 
76 /// MessagePack object, represented as a tagged union of C++ types.
77 ///
78 /// All types except \c Type::Nil (which has only one value, and so is
79 /// completely represented by the \c Kind itself) map to a exactly one union
80 /// member.
81 struct Object {
82   Type Kind;
83   union {
84     /// Value for \c Type::Int.
85     int64_t Int;
86     /// Value for \c Type::Uint.
87     uint64_t UInt;
88     /// Value for \c Type::Boolean.
89     bool Bool;
90     /// Value for \c Type::Float.
91     double Float;
92     /// Value for \c Type::String and \c Type::Binary.
93     StringRef Raw;
94     /// Value for \c Type::Array and \c Type::Map.
95     size_t Length;
96     /// Value for \c Type::Extension.
97     ExtensionType Extension;
98   };
99 
ObjectObject100   Object() : Kind(Type::Int), Int(0) {}
101 };
102 
103 /// Reads MessagePack objects from memory, one at a time.
104 class Reader {
105 public:
106   /// Construct a reader, keeping a reference to the \p InputBuffer.
107   Reader(MemoryBufferRef InputBuffer);
108   /// Construct a reader, keeping a reference to the \p Input.
109   Reader(StringRef Input);
110 
111   Reader(const Reader &) = delete;
112   Reader &operator=(const Reader &) = delete;
113 
114   /// Read one object from the input buffer, advancing past it.
115   ///
116   /// The \p Obj is updated with the kind of the object read, and the
117   /// corresponding union member is updated.
118   ///
119   /// For the collection objects (Array and Map), only the length is read, and
120   /// the caller must make and additional \c N calls (in the case of Array) or
121   /// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection
122   /// elements.
123   ///
124   /// \param [out] Obj filled with next object on success.
125   ///
126   /// \returns true when object successfully read, false when at end of
127   /// input (and so \p Obj was not updated), otherwise an error.
128   Expected<bool> read(Object &Obj);
129 
130 private:
131   MemoryBufferRef InputBuffer;
132   StringRef::iterator Current;
133   StringRef::iterator End;
134 
remainingSpace()135   size_t remainingSpace() {
136     // The rest of the code maintains the invariant that End >= Current, so
137     // that this cast is always defined behavior.
138     return static_cast<size_t>(End - Current);
139   }
140 
141   template <class T> Expected<bool> readRaw(Object &Obj);
142   template <class T> Expected<bool> readInt(Object &Obj);
143   template <class T> Expected<bool> readUInt(Object &Obj);
144   template <class T> Expected<bool> readLength(Object &Obj);
145   template <class T> Expected<bool> readExt(Object &Obj);
146   Expected<bool> createRaw(Object &Obj, uint32_t Size);
147   Expected<bool> createExt(Object &Obj, uint32_t Size);
148 };
149 
150 } // end namespace msgpack
151 } // end namespace llvm
152 
153 #endif // LLVM_BINARYFORMAT_MSGPACKREADER_H
154