1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     https://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #ifndef avro_GenericDatum_hh__
20 #define avro_GenericDatum_hh__
21 
22 #include <cstdint>
23 #include <map>
24 #include <string>
25 #include <vector>
26 
27 #if __cplusplus >= 201703L
28 #include <any>
29 #else
30 #include "boost/any.hpp"
31 #endif
32 
33 #include "LogicalType.hh"
34 #include "Node.hh"
35 #include "ValidSchema.hh"
36 
37 namespace avro {
38 
39 /**
40  * Generic datum which can hold any Avro type. The datum has a type
41  * and a value. The type is one of the Avro data types. The C++ type for
42  * value corresponds to the Avro type.
43  * \li An Avro <tt>null</tt> corresponds to no C++ type. It is illegal to
44  * to try to access values for <tt>null</tt>.
45  * \li Avro <tt>boolean</tt> maps to C++ <tt>bool</tt>
46  * \li Avro <tt>int</tt> maps to C++ <tt>int32_t</tt>.
47  * \li Avro <tt>long</tt> maps to C++ <tt>int64_t</tt>.
48  * \li Avro <tt>float</tt> maps to C++ <tt>float</tt>.
49  * \li Avro <tt>double</tt> maps to C++ <tt>double</tt>.
50  * \li Avro <tt>string</tt> maps to C++ <tt>std::string</tt>.
51  * \li Avro <tt>bytes</tt> maps to C++ <tt>std::vector&lt;uint_t&gt;</tt>.
52  * \li Avro <tt>fixed</tt> maps to C++ class <tt>GenericFixed</tt>.
53  * \li Avro <tt>enum</tt> maps to C++ class <tt>GenericEnum</tt>.
54  * \li Avro <tt>array</tt> maps to C++ class <tt>GenericArray</tt>.
55  * \li Avro <tt>map</tt> maps to C++ class <tt>GenericMap</tt>.
56  * \li There is no C++ type corresponding to Avro <tt>union</tt>. The
57  * object should have the C++ type corresponding to one of the constituent
58  * types of the union.
59  *
60  */
61 class AVRO_DECL GenericDatum {
62 protected:
63     Type type_;
64     LogicalType logicalType_;
65 #if __cplusplus >= 201703L
66     std::any value_;
67 #else
68     boost::any value_;
69 #endif
70 
GenericDatum(Type t)71     explicit GenericDatum(Type t)
72         : type_(t), logicalType_(LogicalType::NONE) {}
73 
GenericDatum(Type t,LogicalType logicalType)74     GenericDatum(Type t, LogicalType logicalType)
75         : type_(t), logicalType_(logicalType) {}
76 
77     template<typename T>
GenericDatum(Type t,LogicalType logicalType,const T & v)78     GenericDatum(Type t, LogicalType logicalType, const T &v)
79         : type_(t), logicalType_(logicalType), value_(v) {}
80 
81     void init(const NodePtr &schema);
82 
83 public:
84     /**
85      * The avro data type this datum holds.
86      */
87     Type type() const;
88 
89     /**
90      * The avro logical type that augments the main data type this datum holds.
91      */
92     LogicalType logicalType() const;
93 
94     /**
95      * Returns the value held by this datum.
96      * T The type for the value. This must correspond to the
97      * avro type returned by type().
98      */
99     template<typename T>
100     const T &value() const;
101 
102     /**
103      * Returns the reference to the value held by this datum, which
104      * can be used to change the contents. Please note that only
105      * value can be changed, the data type of the value held cannot
106      * be changed.
107      *
108      * T The type for the value. This must correspond to the
109      * avro type returned by type().
110      */
111     template<typename T>
112     T &value();
113 
114     /**
115      * Returns true if and only if this datum is a union.
116      */
isUnion() const117     bool isUnion() const { return type_ == AVRO_UNION; }
118 
119     /**
120      * Returns the index of the current branch, if this is a union.
121      * \sa isUnion().
122      */
123     size_t unionBranch() const;
124 
125     /**
126      * Selects a new branch in the union if this is a union.
127      * \sa isUnion().
128      */
129     void selectBranch(size_t branch);
130 
131     /// Makes a new AVRO_NULL datum.
GenericDatum()132     GenericDatum() : type_(AVRO_NULL), logicalType_(LogicalType::NONE) {}
133 
134     /// Makes a new AVRO_BOOL datum whose value is of type bool.
135     /// We don't make this explicit constructor because we want to allow automatic conversion
136     // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(bool v)137     GenericDatum(bool v)
138         : type_(AVRO_BOOL), logicalType_(LogicalType::NONE), value_(v) {}
139 
140     /// Makes a new AVRO_INT datum whose value is of type int32_t.
141     /// We don't make this explicit constructor because we want to allow automatic conversion
142     // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(int32_t v)143     GenericDatum(int32_t v)
144         : type_(AVRO_INT), logicalType_(LogicalType::NONE), value_(v) {}
145 
146     /// Makes a new AVRO_LONG datum whose value is of type int64_t.
147     /// We don't make this explicit constructor because we want to allow automatic conversion
148     // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(int64_t v)149     GenericDatum(int64_t v)
150         : type_(AVRO_LONG), logicalType_(LogicalType::NONE), value_(v) {}
151 
152     /// Makes a new AVRO_FLOAT datum whose value is of type float.
153     /// We don't make this explicit constructor because we want to allow automatic conversion
154     // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(float v)155     GenericDatum(float v)
156         : type_(AVRO_FLOAT), logicalType_(LogicalType::NONE), value_(v) {}
157 
158     /// Makes a new AVRO_DOUBLE datum whose value is of type double.
159     /// We don't make this explicit constructor because we want to allow automatic conversion
160     // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(double v)161     GenericDatum(double v)
162         : type_(AVRO_DOUBLE), logicalType_(LogicalType::NONE), value_(v) {}
163 
164     /// Makes a new AVRO_STRING datum whose value is of type std::string.
165     /// We don't make this explicit constructor because we want to allow automatic conversion
166     // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(const std::string & v)167     GenericDatum(const std::string &v)
168         : type_(AVRO_STRING), logicalType_(LogicalType::NONE), value_(v) {}
169 
170     /// Makes a new AVRO_BYTES datum whose value is of type
171     /// std::vector<uint8_t>.
172     /// We don't make this explicit constructor because we want to allow automatic conversion
173     // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(const std::vector<uint8_t> & v)174     GenericDatum(const std::vector<uint8_t> &v) : type_(AVRO_BYTES), logicalType_(LogicalType::NONE), value_(v) {}
175 
176     /**
177      * Constructs a datum corresponding to the given avro type.
178      * The value will the appropriate default corresponding to the
179      * data type.
180      * \param schema The schema that defines the avro type.
181      */
182     /// We don't make this explicit constructor because we want to allow automatic conversion
183     // NOLINTNEXTLINE(google-explicit-constructor)
184     GenericDatum(const NodePtr &schema);
185 
186     /**
187      * Constructs a datum corresponding to the given avro type and set
188      * the value.
189      * \param schema The schema that defines the avro type.
190      * \param v The value for this type.
191      */
192     template<typename T>
GenericDatum(const NodePtr & schema,const T & v)193     GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) {
194         init(schema);
195 #if __cplusplus >= 201703L
196         *std::any_cast<T>(&value_) = v;
197 #else
198         *boost::any_cast<T>(&value_) = v;
199 #endif
200     }
201 
202     /**
203      * Constructs a datum corresponding to the given avro type.
204      * The value will the appropriate default corresponding to the
205      * data type.
206      * \param schema The schema that defines the avro type.
207      */
208     explicit GenericDatum(const ValidSchema &schema);
209 };
210 
211 /**
212  * The base class for all generic type for containers.
213  */
214 class AVRO_DECL GenericContainer {
215     NodePtr schema_;
216     static void assertType(const NodePtr &schema, Type type);
217 
218 protected:
219     /**
220      * Constructs a container corresponding to the given schema.
221      */
GenericContainer(Type type,const NodePtr & s)222     GenericContainer(Type type, const NodePtr &s) : schema_(s) {
223         assertType(s, type);
224     }
225 
226 public:
227     /// Returns the schema for this object
schema() const228     const NodePtr &schema() const {
229         return schema_;
230     }
231 };
232 
233 /**
234  * Generic container for unions.
235  */
236 class AVRO_DECL GenericUnion : public GenericContainer {
237     size_t curBranch_;
238     GenericDatum datum_;
239 
240 public:
241     /**
242      * Constructs a generic union corresponding to the given schema \p schema,
243      * and the given value. The schema should be of Avro type union
244      * and the value should correspond to one of the branches of the union.
245      */
GenericUnion(const NodePtr & schema)246     explicit GenericUnion(const NodePtr &schema) : GenericContainer(AVRO_UNION, schema), curBranch_(schema->leaves()) {
247         selectBranch(0);
248     }
249 
250     /**
251      * Returns the index of the current branch.
252      */
currentBranch() const253     size_t currentBranch() const { return curBranch_; }
254 
255     /**
256      * Selects a new branch. The type for the value is changed accordingly.
257      * \param branch The index for the selected branch.
258      */
selectBranch(size_t branch)259     void selectBranch(size_t branch) {
260         if (curBranch_ != branch) {
261             datum_ = GenericDatum(schema()->leafAt(branch));
262             curBranch_ = branch;
263         }
264     }
265 
266     /**
267      * Returns the datum corresponding to the currently selected branch
268      * in this union.
269      */
datum()270     GenericDatum &datum() {
271         return datum_;
272     }
273 
274     /**
275      * Returns the datum corresponding to the currently selected branch
276      * in this union.
277      */
datum() const278     const GenericDatum &datum() const {
279         return datum_;
280     }
281 };
282 
283 /**
284  * The generic container for Avro records.
285  */
286 class AVRO_DECL GenericRecord : public GenericContainer {
287     std::vector<GenericDatum> fields_;
288 
289 public:
290     /**
291      * Constructs a generic record corresponding to the given schema \p schema,
292      * which should be of Avro type record.
293      */
294     explicit GenericRecord(const NodePtr &schema);
295 
296     /**
297      * Returns the number of fields in the current record.
298      */
fieldCount() const299     size_t fieldCount() const {
300         return fields_.size();
301     }
302 
303     /**
304      * Returns index of the field with the given name \p name
305      */
fieldIndex(const std::string & name) const306     size_t fieldIndex(const std::string &name) const {
307         size_t index = 0;
308         if (!schema()->nameIndex(name, index)) {
309             throw Exception("Invalid field name: " + name);
310         }
311         return index;
312     }
313 
314     /**
315      * Returns true if a field with the given name \p name is located in this r
316      * false otherwise
317      */
hasField(const std::string & name) const318     bool hasField(const std::string &name) const {
319         size_t index = 0;
320         return schema()->nameIndex(name, index);
321     }
322 
323     /**
324      * Returns the field with the given name \p name.
325      */
field(const std::string & name) const326     const GenericDatum &field(const std::string &name) const {
327         return fieldAt(fieldIndex(name));
328     }
329 
330     /**
331      * Returns the reference to the field with the given name \p name,
332      * which can be used to change the contents.
333      */
field(const std::string & name)334     GenericDatum &field(const std::string &name) {
335         return fieldAt(fieldIndex(name));
336     }
337 
338     /**
339      * Returns the field at the given position \p pos.
340      */
fieldAt(size_t pos) const341     const GenericDatum &fieldAt(size_t pos) const {
342         return fields_[pos];
343     }
344 
345     /**
346      * Returns the reference to the field at the given position \p pos,
347      * which can be used to change the contents.
348      */
fieldAt(size_t pos)349     GenericDatum &fieldAt(size_t pos) {
350         return fields_[pos];
351     }
352 
353     /**
354      * Replaces the field at the given position \p pos with \p v.
355      */
setFieldAt(size_t pos,const GenericDatum & v)356     void setFieldAt(size_t pos, const GenericDatum &v) {
357         // assertSameType(v, schema()->leafAt(pos));
358         fields_[pos] = v;
359     }
360 };
361 
362 /**
363  * The generic container for Avro arrays.
364  */
365 class AVRO_DECL GenericArray : public GenericContainer {
366 public:
367     /**
368      * The contents type for the array.
369      */
370     typedef std::vector<GenericDatum> Value;
371 
372     /**
373      * Constructs a generic array corresponding to the given schema \p schema,
374      * which should be of Avro type array.
375      */
GenericArray(const NodePtr & schema)376     explicit GenericArray(const NodePtr &schema) : GenericContainer(AVRO_ARRAY, schema) {
377     }
378 
379     /**
380      * Returns the contents of this array.
381      */
value() const382     const Value &value() const {
383         return value_;
384     }
385 
386     /**
387      * Returns the reference to the contents of this array.
388      */
value()389     Value &value() {
390         return value_;
391     }
392 
393 private:
394     Value value_;
395 };
396 
397 /**
398  * The generic container for Avro maps.
399  */
400 class AVRO_DECL GenericMap : public GenericContainer {
401 public:
402     /**
403      * The contents type for the map.
404      */
405     typedef std::vector<std::pair<std::string, GenericDatum>> Value;
406 
407     /**
408      * Constructs a generic map corresponding to the given schema \p schema,
409      * which should be of Avro type map.
410      */
GenericMap(const NodePtr & schema)411     explicit GenericMap(const NodePtr &schema) : GenericContainer(AVRO_MAP, schema) {
412     }
413 
414     /**
415      * Returns the contents of this map.
416      */
value() const417     const Value &value() const {
418         return value_;
419     }
420 
421     /**
422      * Returns the reference to the contents of this map.
423      */
value()424     Value &value() {
425         return value_;
426     }
427 
428 private:
429     Value value_;
430 };
431 
432 /**
433  * Generic container for Avro enum.
434  */
435 class AVRO_DECL GenericEnum : public GenericContainer {
436     size_t value_;
437 
index(const NodePtr & schema,const std::string & symbol)438     static size_t index(const NodePtr &schema, const std::string &symbol) {
439         size_t result;
440         if (schema->nameIndex(symbol, result)) {
441             return result;
442         }
443         throw Exception("No such symbol");
444     }
445 
446 public:
447     /**
448      * Constructs a generic enum corresponding to the given schema \p schema,
449      * which should be of Avro type enum.
450      */
GenericEnum(const NodePtr & schema)451     explicit GenericEnum(const NodePtr &schema) : GenericContainer(AVRO_ENUM, schema), value_(0) {
452     }
453 
GenericEnum(const NodePtr & schema,const std::string & symbol)454     GenericEnum(const NodePtr &schema, const std::string &symbol) : GenericContainer(AVRO_ENUM, schema), value_(index(schema, symbol)) {
455     }
456 
457     /**
458      * Returns the symbol corresponding to the cardinal \p n. If the
459      * value for \p n is not within the limits an exception is thrown.
460      */
symbol(size_t n)461     const std::string &symbol(size_t n) {
462         if (n < schema()->names()) {
463             return schema()->nameAt(n);
464         }
465         throw Exception("Not as many symbols");
466     }
467 
468     /**
469      * Returns the cardinal for the given symbol \c symbol. If the symbol
470      * is not defined for this enum and exception is thrown.
471      */
index(const std::string & symbol) const472     size_t index(const std::string &symbol) const {
473         return index(schema(), symbol);
474     }
475 
476     /**
477      * Set the value for this enum corresponding to the given symbol \c symbol.
478      */
set(const std::string & symbol)479     size_t set(const std::string &symbol) {
480         return value_ = index(symbol);
481     }
482 
483     /**
484      * Set the value for this enum corresponding to the given cardinal \c n.
485      */
set(size_t n)486     void set(size_t n) {
487         if (n < schema()->names()) {
488             value_ = n;
489             return;
490         }
491         throw Exception("Not as many symbols");
492     }
493 
494     /**
495      * Returns the cardinal for the current value of this enum.
496      */
value() const497     size_t value() const {
498         return value_;
499     }
500 
501     /**
502      * Returns the symbol for the current value of this enum.
503      */
symbol() const504     const std::string &symbol() const {
505         return schema()->nameAt(value_);
506     }
507 };
508 
509 /**
510  * Generic container for Avro fixed.
511  */
512 class AVRO_DECL GenericFixed : public GenericContainer {
513     std::vector<uint8_t> value_;
514 
515 public:
516     /**
517      * Constructs a generic enum corresponding to the given schema \p schema,
518      * which should be of Avro type fixed.
519      */
GenericFixed(const NodePtr & schema)520     explicit GenericFixed(const NodePtr &schema) : GenericContainer(AVRO_FIXED, schema) {
521         value_.resize(schema->fixedSize());
522     }
523 
524     GenericFixed(const NodePtr &schema, const std::vector<uint8_t> &v);
525 
526     /**
527      * Returns the contents of this fixed.
528      */
value() const529     const std::vector<uint8_t> &value() const {
530         return value_;
531     }
532 
533     /**
534      * Returns the reference to the contents of this fixed.
535      */
value()536     std::vector<uint8_t> &value() {
537         return value_;
538     }
539 };
540 
type() const541 inline Type GenericDatum::type() const {
542     return (type_ == AVRO_UNION) ?
543 #if __cplusplus >= 201703L
544                                  std::any_cast<GenericUnion>(&value_)->datum().type()
545                                  :
546 #else
547                                  boost::any_cast<GenericUnion>(&value_)->datum().type()
548                                  :
549 #endif
550                                  type_;
551 }
552 
logicalType() const553 inline LogicalType GenericDatum::logicalType() const {
554     return (type_ == AVRO_UNION) ?
555 #if __cplusplus >= 201703L
556         std::any_cast<GenericUnion>(&value_)->datum().logicalType() :
557 #else
558         boost::any_cast<GenericUnion>(&value_)->datum().logicalType() :
559 #endif
560         logicalType_;
561 }
562 
563 template<typename T>
value()564 T &GenericDatum::value() {
565     return (type_ == AVRO_UNION) ?
566 #if __cplusplus >= 201703L
567                                  std::any_cast<GenericUnion>(&value_)->datum().value<T>()
568                                  : *std::any_cast<T>(&value_);
569 #else
570                                  boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
571                                  : *boost::any_cast<T>(&value_);
572 #endif
573 }
574 
575 template<typename T>
value() const576 const T &GenericDatum::value() const {
577     return (type_ == AVRO_UNION) ?
578 #if __cplusplus >= 201703L
579                                  std::any_cast<GenericUnion>(&value_)->datum().value<T>()
580                                  : *std::any_cast<T>(&value_);
581 #else
582                                  boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
583                                  : *boost::any_cast<T>(&value_);
584 #endif
585 }
586 
unionBranch() const587 inline size_t GenericDatum::unionBranch() const {
588 #if __cplusplus >= 201703L
589     return std::any_cast<GenericUnion>(&value_)->currentBranch();
590 #else
591     return boost::any_cast<GenericUnion>(&value_)->currentBranch();
592 #endif
593 }
594 
selectBranch(size_t branch)595 inline void GenericDatum::selectBranch(size_t branch) {
596 #if __cplusplus >= 201703L
597     std::any_cast<GenericUnion>(&value_)->selectBranch(branch);
598 #else
599     boost::any_cast<GenericUnion>(&value_)->selectBranch(branch);
600 #endif
601 }
602 
603 } // namespace avro
604 #endif // avro_GenericDatum_hh__
605