1 //===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file declares a class that exposes a simple in-memory representation
10 /// of a document of MsgPack objects, that can be read from MsgPack, written to
11 /// MsgPack, and inspected and modified in memory. This is intended to be a
12 /// lighter-weight (in terms of memory allocations) replacement for
13 /// MsgPackTypes.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
18 #define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
19 
20 #include "llvm/BinaryFormat/MsgPackReader.h"
21 #include <map>
22 
23 namespace llvm {
24 namespace msgpack {
25 
26 class ArrayDocNode;
27 class Document;
28 class MapDocNode;
29 
30 /// The kind of a DocNode and its owning Document.
31 struct KindAndDocument {
32   Document *Doc;
33   Type Kind;
34 };
35 
36 /// A node in a MsgPack Document. This is a simple copyable and
37 /// passable-by-value type that does not own any memory.
38 class DocNode {
39   friend Document;
40 
41 public:
42   typedef std::map<DocNode, DocNode> MapTy;
43   typedef std::vector<DocNode> ArrayTy;
44 
45 private:
46   // Using KindAndDocument allows us to squeeze Kind and a pointer to the
47   // owning Document into the same word. Having a pointer to the owning
48   // Document makes the API of DocNode more convenient, and allows its use in
49   // YAMLIO.
50   const KindAndDocument *KindAndDoc;
51 
52 protected:
53   // The union of different values.
54   union {
55     int64_t Int;
56     uint64_t UInt;
57     bool Bool;
58     double Float;
59     StringRef Raw;
60     ArrayTy *Array;
61     MapTy *Map;
62   };
63 
64 public:
65   // Default constructor gives an empty node with no associated Document. All
66   // you can do with it is "isEmpty()".
DocNode()67   DocNode() : KindAndDoc(nullptr) {}
68 
69   // Type methods
isMap()70   bool isMap() const { return getKind() == Type::Map; }
isArray()71   bool isArray() const { return getKind() == Type::Array; }
isScalar()72   bool isScalar() const { return !isMap() && !isArray(); }
isString()73   bool isString() const { return getKind() == Type::String; }
74 
75   // Accessors. isEmpty() returns true for both a default-constructed DocNode
76   // that has no associated Document, and the result of getEmptyNode(), which
77   // does have an associated document.
isEmpty()78   bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; }
getKind()79   Type getKind() const { return KindAndDoc->Kind; }
getDocument()80   Document *getDocument() const { return KindAndDoc->Doc; }
81 
getInt()82   int64_t &getInt() {
83     assert(getKind() == Type::Int);
84     return Int;
85   }
86 
getUInt()87   uint64_t &getUInt() {
88     assert(getKind() == Type::UInt);
89     return UInt;
90   }
91 
getBool()92   bool &getBool() {
93     assert(getKind() == Type::Boolean);
94     return Bool;
95   }
96 
getFloat()97   double &getFloat() {
98     assert(getKind() == Type::Float);
99     return Float;
100   }
101 
getInt()102   int64_t getInt() const {
103     assert(getKind() == Type::Int);
104     return Int;
105   }
106 
getUInt()107   uint64_t getUInt() const {
108     assert(getKind() == Type::UInt);
109     return UInt;
110   }
111 
getBool()112   bool getBool() const {
113     assert(getKind() == Type::Boolean);
114     return Bool;
115   }
116 
getFloat()117   double getFloat() const {
118     assert(getKind() == Type::Float);
119     return Float;
120   }
121 
getString()122   StringRef getString() const {
123     assert(getKind() == Type::String);
124     return Raw;
125   }
126 
getBinary()127   MemoryBufferRef getBinary() const {
128     assert(getKind() == Type::Binary);
129     return MemoryBufferRef(Raw, "");
130   }
131 
132   /// Get an ArrayDocNode for an array node. If Convert, convert the node to an
133   /// array node if necessary.
134   ArrayDocNode &getArray(bool Convert = false) {
135     if (getKind() != Type::Array) {
136       assert(Convert);
137       convertToArray();
138     }
139     // This could be a static_cast, except ArrayDocNode is a forward reference.
140     return *reinterpret_cast<ArrayDocNode *>(this);
141   }
142 
143   /// Get a MapDocNode for a map node. If Convert, convert the node to a map
144   /// node if necessary.
145   MapDocNode &getMap(bool Convert = false) {
146     if (getKind() != Type::Map) {
147       assert(Convert);
148       convertToMap();
149     }
150     // This could be a static_cast, except MapDocNode is a forward reference.
151     return *reinterpret_cast<MapDocNode *>(this);
152   }
153 
154   /// Comparison operator, used for map keys.
155   friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
156     // This has to cope with one or both of the nodes being default-constructed,
157     // such that KindAndDoc is not set.
158     if (Rhs.isEmpty())
159       return false;
160     if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
161       if (Lhs.isEmpty())
162         return true;
163       return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
164     }
165     switch (Lhs.getKind()) {
166     case Type::Int:
167       return Lhs.Int < Rhs.Int;
168     case Type::UInt:
169       return Lhs.UInt < Rhs.UInt;
170     case Type::Nil:
171       return false;
172     case Type::Boolean:
173       return Lhs.Bool < Rhs.Bool;
174     case Type::Float:
175       return Lhs.Float < Rhs.Float;
176     case Type::String:
177     case Type::Binary:
178       return Lhs.Raw < Rhs.Raw;
179     default:
180       llvm_unreachable("bad map key type");
181     }
182   }
183 
184   /// Equality operator
185   friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) {
186     return !(Lhs < Rhs) && !(Rhs < Lhs);
187   }
188 
189   /// Inequality operator
190   friend bool operator!=(const DocNode &Lhs, const DocNode &Rhs) {
191     return !(Lhs == Rhs);
192   }
193 
194   /// Convert this node to a string, assuming it is scalar.
195   std::string toString() const;
196 
197   /// Convert the StringRef and use it to set this DocNode (assuming scalar). If
198   /// it is a string, copy the string into the Document's strings list so we do
199   /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
200   StringRef fromString(StringRef S, StringRef Tag = "");
201 
202   /// Convenience assignment operators. This only works if the destination
203   /// DocNode has an associated Document, i.e. it was not constructed using the
204   /// default constructor. The string one does not copy, so the string must
205   /// remain valid for the lifetime of the Document. Use fromString to avoid
206   /// that restriction.
207   DocNode &operator=(const char *Val) { return *this = StringRef(Val); }
208   DocNode &operator=(StringRef Val);
209   DocNode &operator=(MemoryBufferRef Val);
210   DocNode &operator=(bool Val);
211   DocNode &operator=(int Val);
212   DocNode &operator=(unsigned Val);
213   DocNode &operator=(int64_t Val);
214   DocNode &operator=(uint64_t Val);
215 
216 private:
217   // Private constructor setting KindAndDoc, used by methods in Document.
DocNode(const KindAndDocument * KindAndDoc)218   DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {}
219 
220   void convertToArray();
221   void convertToMap();
222 };
223 
224 /// A DocNode that is a map.
225 class MapDocNode : public DocNode {
226 public:
227   MapDocNode() = default;
MapDocNode(DocNode & N)228   MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
229 
230   // Map access methods.
size()231   size_t size() const { return Map->size(); }
empty()232   bool empty() const { return !size(); }
begin()233   MapTy::iterator begin() { return Map->begin(); }
end()234   MapTy::iterator end() { return Map->end(); }
find(DocNode Key)235   MapTy::iterator find(DocNode Key) { return Map->find(Key); }
236   MapTy::iterator find(StringRef Key);
erase(MapTy::const_iterator I)237   MapTy::iterator erase(MapTy::const_iterator I) { return Map->erase(I); }
erase(DocNode Key)238   size_t erase(DocNode Key) { return Map->erase(Key); }
erase(MapTy::const_iterator First,MapTy::const_iterator Second)239   MapTy::iterator erase(MapTy::const_iterator First,
240                         MapTy::const_iterator Second) {
241     return Map->erase(First, Second);
242   }
243   /// Member access. The string data must remain valid for the lifetime of the
244   /// Document.
245   DocNode &operator[](StringRef S);
246   /// Member access, with convenience versions for an integer key.
247   DocNode &operator[](DocNode Key);
248   DocNode &operator[](int Key);
249   DocNode &operator[](unsigned Key);
250   DocNode &operator[](int64_t Key);
251   DocNode &operator[](uint64_t Key);
252 };
253 
254 /// A DocNode that is an array.
255 class ArrayDocNode : public DocNode {
256 public:
257   ArrayDocNode() = default;
ArrayDocNode(DocNode & N)258   ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
259 
260   // Array access methods.
size()261   size_t size() const { return Array->size(); }
empty()262   bool empty() const { return !size(); }
back()263   DocNode &back() const { return Array->back(); }
begin()264   ArrayTy::iterator begin() { return Array->begin(); }
end()265   ArrayTy::iterator end() { return Array->end(); }
push_back(DocNode N)266   void push_back(DocNode N) {
267     assert(N.isEmpty() || N.getDocument() == getDocument());
268     Array->push_back(N);
269   }
270 
271   /// Element access. This extends the array if necessary, with empty nodes.
272   DocNode &operator[](size_t Index);
273 };
274 
275 /// Simple in-memory representation of a document of msgpack objects with
276 /// ability to find and create array and map elements.  Does not currently cope
277 /// with any extension types.
278 class Document {
279   // Maps, arrays and strings used by nodes in the document. No attempt is made
280   // to free unused ones.
281   std::vector<std::unique_ptr<DocNode::MapTy>> Maps;
282   std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays;
283   std::vector<std::unique_ptr<char[]>> Strings;
284 
285   // The root node of the document.
286   DocNode Root;
287 
288   // The KindAndDocument structs pointed to by nodes in the document.
289   KindAndDocument KindAndDocs[size_t(Type::Empty) + 1];
290 
291   // Whether YAML output uses hex for UInt.
292   bool HexMode = false;
293 
294 public:
Document()295   Document() {
296     clear();
297     for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T)
298       KindAndDocs[T] = {this, Type(T)};
299   }
300 
301   /// Get ref to the document's root element.
getRoot()302   DocNode &getRoot() { return Root; }
303 
304   /// Restore the Document to an empty state.
clear()305   void clear() { getRoot() = getEmptyNode(); }
306 
307   /// Create an empty node associated with this Document.
getEmptyNode()308   DocNode getEmptyNode() {
309     auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]);
310     return N;
311   }
312 
313   /// Create a nil node associated with this Document.
getNode()314   DocNode getNode() {
315     auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]);
316     return N;
317   }
318 
319   /// Create an Int node associated with this Document.
getNode(int64_t V)320   DocNode getNode(int64_t V) {
321     auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
322     N.Int = V;
323     return N;
324   }
325 
326   /// Create an Int node associated with this Document.
getNode(int V)327   DocNode getNode(int V) {
328     auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
329     N.Int = V;
330     return N;
331   }
332 
333   /// Create a UInt node associated with this Document.
getNode(uint64_t V)334   DocNode getNode(uint64_t V) {
335     auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
336     N.UInt = V;
337     return N;
338   }
339 
340   /// Create a UInt node associated with this Document.
getNode(unsigned V)341   DocNode getNode(unsigned V) {
342     auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
343     N.UInt = V;
344     return N;
345   }
346 
347   /// Create a Boolean node associated with this Document.
getNode(bool V)348   DocNode getNode(bool V) {
349     auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]);
350     N.Bool = V;
351     return N;
352   }
353 
354   /// Create a Float node associated with this Document.
getNode(double V)355   DocNode getNode(double V) {
356     auto N = DocNode(&KindAndDocs[size_t(Type::Float)]);
357     N.Float = V;
358     return N;
359   }
360 
361   /// Create a String node associated with this Document. If !Copy, the passed
362   /// string must remain valid for the lifetime of the Document.
363   DocNode getNode(StringRef V, bool Copy = false) {
364     if (Copy)
365       V = addString(V);
366     auto N = DocNode(&KindAndDocs[size_t(Type::String)]);
367     N.Raw = V;
368     return N;
369   }
370 
371   /// Create a String node associated with this Document. If !Copy, the passed
372   /// string must remain valid for the lifetime of the Document.
373   DocNode getNode(const char *V, bool Copy = false) {
374     return getNode(StringRef(V), Copy);
375   }
376 
377   /// Create a Binary node associated with this Document. If !Copy, the passed
378   /// buffer must remain valid for the lifetime of the Document.
379   DocNode getNode(MemoryBufferRef V, bool Copy = false) {
380     auto Raw = V.getBuffer();
381     if (Copy)
382       Raw = addString(Raw);
383     auto N = DocNode(&KindAndDocs[size_t(Type::Binary)]);
384     N.Raw = Raw;
385     return N;
386   }
387 
388   /// Create an empty Map node associated with this Document.
getMapNode()389   MapDocNode getMapNode() {
390     auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
391     Maps.push_back(std::unique_ptr<DocNode::MapTy>(new DocNode::MapTy));
392     N.Map = Maps.back().get();
393     return N.getMap();
394   }
395 
396   /// Create an empty Array node associated with this Document.
getArrayNode()397   ArrayDocNode getArrayNode() {
398     auto N = DocNode(&KindAndDocs[size_t(Type::Array)]);
399     Arrays.push_back(std::unique_ptr<DocNode::ArrayTy>(new DocNode::ArrayTy));
400     N.Array = Arrays.back().get();
401     return N.getArray();
402   }
403 
404   /// Read a document from a binary msgpack blob, merging into anything already
405   /// in the Document. The blob data must remain valid for the lifetime of this
406   /// Document (because a string object in the document contains a StringRef
407   /// into the original blob). If Multi, then this sets root to an array and
408   /// adds top-level objects to it. If !Multi, then it only reads a single
409   /// top-level object, even if there are more, and sets root to that. Returns
410   /// false if failed due to illegal format or merge error.
411   ///
412   /// The Merger arg is a callback function that is called when the merge has a
413   /// conflict, that is, it is trying to set an item that is already set. If the
414   /// conflict cannot be resolved, the callback function returns -1. If the
415   /// conflict can be resolved, the callback returns a non-negative number and
416   /// sets *DestNode to the resolved node. The returned non-negative number is
417   /// significant only for an array node; it is then the array index to start
418   /// populating at. That allows Merger to choose whether to merge array
419   /// elements (returns 0) or append new elements (returns existing size).
420   ///
421   /// If SrcNode is an array or map, the resolution must be that *DestNode is an
422   /// array or map respectively, although it could be the array or map
423   /// (respectively) that was already there. MapKey is the key if *DestNode is a
424   /// map entry, a nil node otherwise.
425   ///
426   /// The default for Merger is to disallow any conflict.
427   bool readFromBlob(
428       StringRef Blob, bool Multi,
429       function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
430           Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
431             return -1;
432           });
433 
434   /// Write a MsgPack document to a binary MsgPack blob.
435   void writeToBlob(std::string &Blob);
436 
437   /// Copy a string into the Document's strings list, and return the copy that
438   /// is owned by the Document.
addString(StringRef S)439   StringRef addString(StringRef S) {
440     Strings.push_back(std::unique_ptr<char[]>(new char[S.size()]));
441     memcpy(&Strings.back()[0], S.data(), S.size());
442     return StringRef(&Strings.back()[0], S.size());
443   }
444 
445   /// Set whether YAML output uses hex for UInt. Default off.
446   void setHexMode(bool Val = true) { HexMode = Val; }
447 
448   /// Get Hexmode flag.
getHexMode()449   bool getHexMode() const { return HexMode; }
450 
451   /// Convert MsgPack Document to YAML text.
452   void toYAML(raw_ostream &OS);
453 
454   /// Read YAML text into the MsgPack document. Returns false on failure.
455   bool fromYAML(StringRef S);
456 };
457 
458 } // namespace msgpack
459 } // namespace llvm
460 
461 #endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
462