1 //===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file declares a class that exposes a simple in-memory representation
10 /// of a document of MsgPack objects, that can be read from MsgPack, written to
11 /// MsgPack, and inspected and modified in memory. This is intended to be a
12 /// lighter-weight (in terms of memory allocations) replacement for
13 /// MsgPackTypes.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
18 #define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
19 
20 #include "llvm/BinaryFormat/MsgPackReader.h"
21 #include <map>
22 
23 namespace llvm {
24 namespace msgpack {
25 
26 class ArrayDocNode;
27 class Document;
28 class MapDocNode;
29 
30 /// The kind of a DocNode and its owning Document.
31 struct KindAndDocument {
32   Document *Doc;
33   Type Kind;
34 };
35 
36 /// A node in a MsgPack Document. This is a simple copyable and
37 /// passable-by-value type that does not own any memory.
38 class DocNode {
39   friend Document;
40 
41 public:
42   typedef std::map<DocNode, DocNode> MapTy;
43   typedef std::vector<DocNode> ArrayTy;
44 
45 private:
46   // Using KindAndDocument allows us to squeeze Kind and a pointer to the
47   // owning Document into the same word. Having a pointer to the owning
48   // Document makes the API of DocNode more convenient, and allows its use in
49   // YAMLIO.
50   const KindAndDocument *KindAndDoc;
51 
52 protected:
53   // The union of different values.
54   union {
55     int64_t Int;
56     uint64_t UInt;
57     bool Bool;
58     double Float;
59     StringRef Raw;
60     ArrayTy *Array;
61     MapTy *Map;
62   };
63 
64 public:
65   // Default constructor gives an empty node with no associated Document. All
66   // you can do with it is "isEmpty()".
DocNode()67   DocNode() : KindAndDoc(nullptr) {}
68 
69   // Type methods
isMap()70   bool isMap() const { return getKind() == Type::Map; }
isArray()71   bool isArray() const { return getKind() == Type::Array; }
isScalar()72   bool isScalar() const { return !isMap() && !isArray(); }
isString()73   bool isString() const { return getKind() == Type::String; }
74 
75   // Accessors. isEmpty() returns true for both a default-constructed DocNode
76   // that has no associated Document, and the result of getEmptyNode(), which
77   // does have an associated document.
isEmpty()78   bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; }
getKind()79   Type getKind() const { return KindAndDoc->Kind; }
getDocument()80   Document *getDocument() const { return KindAndDoc->Doc; }
81 
getInt()82   int64_t &getInt() {
83     assert(getKind() == Type::Int);
84     return Int;
85   }
86 
getUInt()87   uint64_t &getUInt() {
88     assert(getKind() == Type::UInt);
89     return UInt;
90   }
91 
getBool()92   bool &getBool() {
93     assert(getKind() == Type::Boolean);
94     return Bool;
95   }
96 
getFloat()97   double &getFloat() {
98     assert(getKind() == Type::Float);
99     return Float;
100   }
101 
getInt()102   int64_t getInt() const {
103     assert(getKind() == Type::Int);
104     return Int;
105   }
106 
getUInt()107   uint64_t getUInt() const {
108     assert(getKind() == Type::UInt);
109     return UInt;
110   }
111 
getBool()112   bool getBool() const {
113     assert(getKind() == Type::Boolean);
114     return Bool;
115   }
116 
getFloat()117   double getFloat() const {
118     assert(getKind() == Type::Float);
119     return Float;
120   }
121 
getString()122   StringRef getString() const {
123     assert(getKind() == Type::String);
124     return Raw;
125   }
126 
127   /// Get an ArrayDocNode for an array node. If Convert, convert the node to an
128   /// array node if necessary.
129   ArrayDocNode &getArray(bool Convert = false) {
130     if (getKind() != Type::Array) {
131       assert(Convert);
132       convertToArray();
133     }
134     // This could be a static_cast, except ArrayDocNode is a forward reference.
135     return *reinterpret_cast<ArrayDocNode *>(this);
136   }
137 
138   /// Get a MapDocNode for a map node. If Convert, convert the node to a map
139   /// node if necessary.
140   MapDocNode &getMap(bool Convert = false) {
141     if (getKind() != Type::Map) {
142       assert(Convert);
143       convertToMap();
144     }
145     // This could be a static_cast, except MapDocNode is a forward reference.
146     return *reinterpret_cast<MapDocNode *>(this);
147   }
148 
149   /// Comparison operator, used for map keys.
150   friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
151     // This has to cope with one or both of the nodes being default-constructed,
152     // such that KindAndDoc is not set.
153     if (Rhs.isEmpty())
154       return false;
155     if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
156       if (Lhs.isEmpty())
157         return true;
158       return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
159     }
160     switch (Lhs.getKind()) {
161     case Type::Int:
162       return Lhs.Int < Rhs.Int;
163     case Type::UInt:
164       return Lhs.UInt < Rhs.UInt;
165     case Type::Nil:
166       return false;
167     case Type::Boolean:
168       return Lhs.Bool < Rhs.Bool;
169     case Type::Float:
170       return Lhs.Float < Rhs.Float;
171     case Type::String:
172     case Type::Binary:
173       return Lhs.Raw < Rhs.Raw;
174     default:
175       llvm_unreachable("bad map key type");
176     }
177   }
178 
179   /// Equality operator
180   friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) {
181     return !(Lhs < Rhs) && !(Rhs < Lhs);
182   }
183 
184   /// Inequality operator
185   friend bool operator!=(const DocNode &Lhs, const DocNode &Rhs) {
186     return !(Lhs == Rhs);
187   }
188 
189   /// Convert this node to a string, assuming it is scalar.
190   std::string toString() const;
191 
192   /// Convert the StringRef and use it to set this DocNode (assuming scalar). If
193   /// it is a string, copy the string into the Document's strings list so we do
194   /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
195   StringRef fromString(StringRef S, StringRef Tag = "");
196 
197   /// Convenience assignment operators. This only works if the destination
198   /// DocNode has an associated Document, i.e. it was not constructed using the
199   /// default constructor. The string one does not copy, so the string must
200   /// remain valid for the lifetime of the Document. Use fromString to avoid
201   /// that restriction.
202   DocNode &operator=(const char *Val) { return *this = StringRef(Val); }
203   DocNode &operator=(StringRef Val);
204   DocNode &operator=(bool Val);
205   DocNode &operator=(int Val);
206   DocNode &operator=(unsigned Val);
207   DocNode &operator=(int64_t Val);
208   DocNode &operator=(uint64_t Val);
209 
210 private:
211   // Private constructor setting KindAndDoc, used by methods in Document.
DocNode(const KindAndDocument * KindAndDoc)212   DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {}
213 
214   void convertToArray();
215   void convertToMap();
216 };
217 
218 /// A DocNode that is a map.
219 class MapDocNode : public DocNode {
220 public:
MapDocNode()221   MapDocNode() {}
MapDocNode(DocNode & N)222   MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
223 
224   // Map access methods.
size()225   size_t size() const { return Map->size(); }
empty()226   bool empty() const { return !size(); }
begin()227   MapTy::iterator begin() { return Map->begin(); }
end()228   MapTy::iterator end() { return Map->end(); }
find(DocNode Key)229   MapTy::iterator find(DocNode Key) { return Map->find(Key); }
230   MapTy::iterator find(StringRef Key);
erase(MapTy::const_iterator I)231   MapTy::iterator erase(MapTy::const_iterator I) { return Map->erase(I); }
erase(DocNode Key)232   size_t erase(DocNode Key) { return Map->erase(Key); }
erase(MapTy::const_iterator First,MapTy::const_iterator Second)233   MapTy::iterator erase(MapTy::const_iterator First,
234                         MapTy::const_iterator Second) {
235     return Map->erase(First, Second);
236   }
237   /// Member access. The string data must remain valid for the lifetime of the
238   /// Document.
239   DocNode &operator[](StringRef S);
240   /// Member access, with convenience versions for an integer key.
241   DocNode &operator[](DocNode Key);
242   DocNode &operator[](int Key);
243   DocNode &operator[](unsigned Key);
244   DocNode &operator[](int64_t Key);
245   DocNode &operator[](uint64_t Key);
246 };
247 
248 /// A DocNode that is an array.
249 class ArrayDocNode : public DocNode {
250 public:
ArrayDocNode()251   ArrayDocNode() {}
ArrayDocNode(DocNode & N)252   ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
253 
254   // Array access methods.
size()255   size_t size() const { return Array->size(); }
empty()256   bool empty() const { return !size(); }
back()257   DocNode &back() const { return Array->back(); }
begin()258   ArrayTy::iterator begin() { return Array->begin(); }
end()259   ArrayTy::iterator end() { return Array->end(); }
push_back(DocNode N)260   void push_back(DocNode N) {
261     assert(N.isEmpty() || N.getDocument() == getDocument());
262     Array->push_back(N);
263   }
264 
265   /// Element access. This extends the array if necessary, with empty nodes.
266   DocNode &operator[](size_t Index);
267 };
268 
269 /// Simple in-memory representation of a document of msgpack objects with
270 /// ability to find and create array and map elements.  Does not currently cope
271 /// with any extension types.
272 class Document {
273   // Maps, arrays and strings used by nodes in the document. No attempt is made
274   // to free unused ones.
275   std::vector<std::unique_ptr<DocNode::MapTy>> Maps;
276   std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays;
277   std::vector<std::unique_ptr<char[]>> Strings;
278 
279   // The root node of the document.
280   DocNode Root;
281 
282   // The KindAndDocument structs pointed to by nodes in the document.
283   KindAndDocument KindAndDocs[size_t(Type::Empty) + 1];
284 
285   // Whether YAML output uses hex for UInt.
286   bool HexMode = false;
287 
288 public:
Document()289   Document() {
290     clear();
291     for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T)
292       KindAndDocs[T] = {this, Type(T)};
293   }
294 
295   /// Get ref to the document's root element.
getRoot()296   DocNode &getRoot() { return Root; }
297 
298   /// Restore the Document to an empty state.
clear()299   void clear() { getRoot() = getEmptyNode(); }
300 
301   /// Create an empty node associated with this Document.
getEmptyNode()302   DocNode getEmptyNode() {
303     auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]);
304     return N;
305   }
306 
307   /// Create a nil node associated with this Document.
getNode()308   DocNode getNode() {
309     auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]);
310     return N;
311   }
312 
313   /// Create an Int node associated with this Document.
getNode(int64_t V)314   DocNode getNode(int64_t V) {
315     auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
316     N.Int = V;
317     return N;
318   }
319 
320   /// Create an Int node associated with this Document.
getNode(int V)321   DocNode getNode(int V) {
322     auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
323     N.Int = V;
324     return N;
325   }
326 
327   /// Create a UInt node associated with this Document.
getNode(uint64_t V)328   DocNode getNode(uint64_t V) {
329     auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
330     N.UInt = V;
331     return N;
332   }
333 
334   /// Create a UInt node associated with this Document.
getNode(unsigned V)335   DocNode getNode(unsigned V) {
336     auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
337     N.UInt = V;
338     return N;
339   }
340 
341   /// Create a Boolean node associated with this Document.
getNode(bool V)342   DocNode getNode(bool V) {
343     auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]);
344     N.Bool = V;
345     return N;
346   }
347 
348   /// Create a Float node associated with this Document.
getNode(double V)349   DocNode getNode(double V) {
350     auto N = DocNode(&KindAndDocs[size_t(Type::Float)]);
351     N.Float = V;
352     return N;
353   }
354 
355   /// Create a String node associated with this Document. If !Copy, the passed
356   /// string must remain valid for the lifetime of the Document.
357   DocNode getNode(StringRef V, bool Copy = false) {
358     if (Copy)
359       V = addString(V);
360     auto N = DocNode(&KindAndDocs[size_t(Type::String)]);
361     N.Raw = V;
362     return N;
363   }
364 
365   /// Create a String node associated with this Document. If !Copy, the passed
366   /// string must remain valid for the lifetime of the Document.
367   DocNode getNode(const char *V, bool Copy = false) {
368     return getNode(StringRef(V), Copy);
369   }
370 
371   /// Create an empty Map node associated with this Document.
getMapNode()372   MapDocNode getMapNode() {
373     auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
374     Maps.push_back(std::unique_ptr<DocNode::MapTy>(new DocNode::MapTy));
375     N.Map = Maps.back().get();
376     return N.getMap();
377   }
378 
379   /// Create an empty Array node associated with this Document.
getArrayNode()380   ArrayDocNode getArrayNode() {
381     auto N = DocNode(&KindAndDocs[size_t(Type::Array)]);
382     Arrays.push_back(std::unique_ptr<DocNode::ArrayTy>(new DocNode::ArrayTy));
383     N.Array = Arrays.back().get();
384     return N.getArray();
385   }
386 
387   /// Read a document from a binary msgpack blob, merging into anything already
388   /// in the Document. The blob data must remain valid for the lifetime of this
389   /// Document (because a string object in the document contains a StringRef
390   /// into the original blob). If Multi, then this sets root to an array and
391   /// adds top-level objects to it. If !Multi, then it only reads a single
392   /// top-level object, even if there are more, and sets root to that. Returns
393   /// false if failed due to illegal format or merge error.
394   ///
395   /// The Merger arg is a callback function that is called when the merge has a
396   /// conflict, that is, it is trying to set an item that is already set. If the
397   /// conflict cannot be resolved, the callback function returns -1. If the
398   /// conflict can be resolved, the callback returns a non-negative number and
399   /// sets *DestNode to the resolved node. The returned non-negative number is
400   /// significant only for an array node; it is then the array index to start
401   /// populating at. That allows Merger to choose whether to merge array
402   /// elements (returns 0) or append new elements (returns existing size).
403   ///
404   /// If SrcNode is an array or map, the resolution must be that *DestNode is an
405   /// array or map respectively, although it could be the array or map
406   /// (respectively) that was already there. MapKey is the key if *DestNode is a
407   /// map entry, a nil node otherwise.
408   ///
409   /// The default for Merger is to disallow any conflict.
410   bool readFromBlob(
411       StringRef Blob, bool Multi,
412       function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
413           Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
414             return -1;
415           });
416 
417   /// Write a MsgPack document to a binary MsgPack blob.
418   void writeToBlob(std::string &Blob);
419 
420   /// Copy a string into the Document's strings list, and return the copy that
421   /// is owned by the Document.
addString(StringRef S)422   StringRef addString(StringRef S) {
423     Strings.push_back(std::unique_ptr<char[]>(new char[S.size()]));
424     memcpy(&Strings.back()[0], S.data(), S.size());
425     return StringRef(&Strings.back()[0], S.size());
426   }
427 
428   /// Set whether YAML output uses hex for UInt. Default off.
429   void setHexMode(bool Val = true) { HexMode = Val; }
430 
431   /// Get Hexmode flag.
getHexMode()432   bool getHexMode() const { return HexMode; }
433 
434   /// Convert MsgPack Document to YAML text.
435   void toYAML(raw_ostream &OS);
436 
437   /// Read YAML text into the MsgPack document. Returns false on failure.
438   bool fromYAML(StringRef S);
439 };
440 
441 } // namespace msgpack
442 } // namespace llvm
443 
444 #endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
445