1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 #ifndef avro_GenericDatum_hh__
20 #define avro_GenericDatum_hh__
21
22 #include <cstdint>
23 #include <map>
24 #include <string>
25 #include <vector>
26
27 #if __cplusplus >= 201703L
28 #include <any>
29 #else
30 #include "boost/any.hpp"
31 #endif
32
33 #include "LogicalType.hh"
34 #include "Node.hh"
35 #include "ValidSchema.hh"
36
37 namespace avro {
38
39 /**
40 * Generic datum which can hold any Avro type. The datum has a type
41 * and a value. The type is one of the Avro data types. The C++ type for
42 * value corresponds to the Avro type.
43 * \li An Avro <tt>null</tt> corresponds to no C++ type. It is illegal to
44 * to try to access values for <tt>null</tt>.
45 * \li Avro <tt>boolean</tt> maps to C++ <tt>bool</tt>
46 * \li Avro <tt>int</tt> maps to C++ <tt>int32_t</tt>.
47 * \li Avro <tt>long</tt> maps to C++ <tt>int64_t</tt>.
48 * \li Avro <tt>float</tt> maps to C++ <tt>float</tt>.
49 * \li Avro <tt>double</tt> maps to C++ <tt>double</tt>.
50 * \li Avro <tt>string</tt> maps to C++ <tt>std::string</tt>.
51 * \li Avro <tt>bytes</tt> maps to C++ <tt>std::vector<uint_t></tt>.
52 * \li Avro <tt>fixed</tt> maps to C++ class <tt>GenericFixed</tt>.
53 * \li Avro <tt>enum</tt> maps to C++ class <tt>GenericEnum</tt>.
54 * \li Avro <tt>array</tt> maps to C++ class <tt>GenericArray</tt>.
55 * \li Avro <tt>map</tt> maps to C++ class <tt>GenericMap</tt>.
56 * \li There is no C++ type corresponding to Avro <tt>union</tt>. The
57 * object should have the C++ type corresponding to one of the constituent
58 * types of the union.
59 *
60 */
61 class AVRO_DECL GenericDatum {
62 protected:
63 Type type_;
64 LogicalType logicalType_;
65 #if __cplusplus >= 201703L
66 std::any value_;
67 #else
68 boost::any value_;
69 #endif
70
GenericDatum(Type t)71 explicit GenericDatum(Type t)
72 : type_(t), logicalType_(LogicalType::NONE) {}
73
GenericDatum(Type t,LogicalType logicalType)74 GenericDatum(Type t, LogicalType logicalType)
75 : type_(t), logicalType_(logicalType) {}
76
77 template<typename T>
GenericDatum(Type t,LogicalType logicalType,const T & v)78 GenericDatum(Type t, LogicalType logicalType, const T &v)
79 : type_(t), logicalType_(logicalType), value_(v) {}
80
81 void init(const NodePtr &schema);
82
83 public:
84 /**
85 * The avro data type this datum holds.
86 */
87 Type type() const;
88
89 /**
90 * The avro logical type that augments the main data type this datum holds.
91 */
92 LogicalType logicalType() const;
93
94 /**
95 * Returns the value held by this datum.
96 * T The type for the value. This must correspond to the
97 * avro type returned by type().
98 */
99 template<typename T>
100 const T &value() const;
101
102 /**
103 * Returns the reference to the value held by this datum, which
104 * can be used to change the contents. Please note that only
105 * value can be changed, the data type of the value held cannot
106 * be changed.
107 *
108 * T The type for the value. This must correspond to the
109 * avro type returned by type().
110 */
111 template<typename T>
112 T &value();
113
114 /**
115 * Returns true if and only if this datum is a union.
116 */
isUnion() const117 bool isUnion() const { return type_ == AVRO_UNION; }
118
119 /**
120 * Returns the index of the current branch, if this is a union.
121 * \sa isUnion().
122 */
123 size_t unionBranch() const;
124
125 /**
126 * Selects a new branch in the union if this is a union.
127 * \sa isUnion().
128 */
129 void selectBranch(size_t branch);
130
131 /// Makes a new AVRO_NULL datum.
GenericDatum()132 GenericDatum() : type_(AVRO_NULL), logicalType_(LogicalType::NONE) {}
133
134 /// Makes a new AVRO_BOOL datum whose value is of type bool.
135 /// We don't make this explicit constructor because we want to allow automatic conversion
136 // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(bool v)137 GenericDatum(bool v)
138 : type_(AVRO_BOOL), logicalType_(LogicalType::NONE), value_(v) {}
139
140 /// Makes a new AVRO_INT datum whose value is of type int32_t.
141 /// We don't make this explicit constructor because we want to allow automatic conversion
142 // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(int32_t v)143 GenericDatum(int32_t v)
144 : type_(AVRO_INT), logicalType_(LogicalType::NONE), value_(v) {}
145
146 /// Makes a new AVRO_LONG datum whose value is of type int64_t.
147 /// We don't make this explicit constructor because we want to allow automatic conversion
148 // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(int64_t v)149 GenericDatum(int64_t v)
150 : type_(AVRO_LONG), logicalType_(LogicalType::NONE), value_(v) {}
151
152 /// Makes a new AVRO_FLOAT datum whose value is of type float.
153 /// We don't make this explicit constructor because we want to allow automatic conversion
154 // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(float v)155 GenericDatum(float v)
156 : type_(AVRO_FLOAT), logicalType_(LogicalType::NONE), value_(v) {}
157
158 /// Makes a new AVRO_DOUBLE datum whose value is of type double.
159 /// We don't make this explicit constructor because we want to allow automatic conversion
160 // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(double v)161 GenericDatum(double v)
162 : type_(AVRO_DOUBLE), logicalType_(LogicalType::NONE), value_(v) {}
163
164 /// Makes a new AVRO_STRING datum whose value is of type std::string.
165 /// We don't make this explicit constructor because we want to allow automatic conversion
166 // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(const std::string & v)167 GenericDatum(const std::string &v)
168 : type_(AVRO_STRING), logicalType_(LogicalType::NONE), value_(v) {}
169
170 /// Makes a new AVRO_BYTES datum whose value is of type
171 /// std::vector<uint8_t>.
172 /// We don't make this explicit constructor because we want to allow automatic conversion
173 // NOLINTNEXTLINE(google-explicit-constructor)
GenericDatum(const std::vector<uint8_t> & v)174 GenericDatum(const std::vector<uint8_t> &v) : type_(AVRO_BYTES), logicalType_(LogicalType::NONE), value_(v) {}
175
176 /**
177 * Constructs a datum corresponding to the given avro type.
178 * The value will the appropriate default corresponding to the
179 * data type.
180 * \param schema The schema that defines the avro type.
181 */
182 /// We don't make this explicit constructor because we want to allow automatic conversion
183 // NOLINTNEXTLINE(google-explicit-constructor)
184 GenericDatum(const NodePtr &schema);
185
186 /**
187 * Constructs a datum corresponding to the given avro type and set
188 * the value.
189 * \param schema The schema that defines the avro type.
190 * \param v The value for this type.
191 */
192 template<typename T>
GenericDatum(const NodePtr & schema,const T & v)193 GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) {
194 init(schema);
195 #if __cplusplus >= 201703L
196 *std::any_cast<T>(&value_) = v;
197 #else
198 *boost::any_cast<T>(&value_) = v;
199 #endif
200 }
201
202 /**
203 * Constructs a datum corresponding to the given avro type.
204 * The value will the appropriate default corresponding to the
205 * data type.
206 * \param schema The schema that defines the avro type.
207 */
208 explicit GenericDatum(const ValidSchema &schema);
209 };
210
211 /**
212 * The base class for all generic type for containers.
213 */
214 class AVRO_DECL GenericContainer {
215 NodePtr schema_;
216 static void assertType(const NodePtr &schema, Type type);
217
218 protected:
219 /**
220 * Constructs a container corresponding to the given schema.
221 */
GenericContainer(Type type,const NodePtr & s)222 GenericContainer(Type type, const NodePtr &s) : schema_(s) {
223 assertType(s, type);
224 }
225
226 public:
227 /// Returns the schema for this object
schema() const228 const NodePtr &schema() const {
229 return schema_;
230 }
231 };
232
233 /**
234 * Generic container for unions.
235 */
236 class AVRO_DECL GenericUnion : public GenericContainer {
237 size_t curBranch_;
238 GenericDatum datum_;
239
240 public:
241 /**
242 * Constructs a generic union corresponding to the given schema \p schema,
243 * and the given value. The schema should be of Avro type union
244 * and the value should correspond to one of the branches of the union.
245 */
GenericUnion(const NodePtr & schema)246 explicit GenericUnion(const NodePtr &schema) : GenericContainer(AVRO_UNION, schema), curBranch_(schema->leaves()) {
247 selectBranch(0);
248 }
249
250 /**
251 * Returns the index of the current branch.
252 */
currentBranch() const253 size_t currentBranch() const { return curBranch_; }
254
255 /**
256 * Selects a new branch. The type for the value is changed accordingly.
257 * \param branch The index for the selected branch.
258 */
selectBranch(size_t branch)259 void selectBranch(size_t branch) {
260 if (curBranch_ != branch) {
261 datum_ = GenericDatum(schema()->leafAt(branch));
262 curBranch_ = branch;
263 }
264 }
265
266 /**
267 * Returns the datum corresponding to the currently selected branch
268 * in this union.
269 */
datum()270 GenericDatum &datum() {
271 return datum_;
272 }
273
274 /**
275 * Returns the datum corresponding to the currently selected branch
276 * in this union.
277 */
datum() const278 const GenericDatum &datum() const {
279 return datum_;
280 }
281 };
282
283 /**
284 * The generic container for Avro records.
285 */
286 class AVRO_DECL GenericRecord : public GenericContainer {
287 std::vector<GenericDatum> fields_;
288
289 public:
290 /**
291 * Constructs a generic record corresponding to the given schema \p schema,
292 * which should be of Avro type record.
293 */
294 explicit GenericRecord(const NodePtr &schema);
295
296 /**
297 * Returns the number of fields in the current record.
298 */
fieldCount() const299 size_t fieldCount() const {
300 return fields_.size();
301 }
302
303 /**
304 * Returns index of the field with the given name \p name
305 */
fieldIndex(const std::string & name) const306 size_t fieldIndex(const std::string &name) const {
307 size_t index = 0;
308 if (!schema()->nameIndex(name, index)) {
309 throw Exception("Invalid field name: " + name);
310 }
311 return index;
312 }
313
314 /**
315 * Returns true if a field with the given name \p name is located in this r
316 * false otherwise
317 */
hasField(const std::string & name) const318 bool hasField(const std::string &name) const {
319 size_t index = 0;
320 return schema()->nameIndex(name, index);
321 }
322
323 /**
324 * Returns the field with the given name \p name.
325 */
field(const std::string & name) const326 const GenericDatum &field(const std::string &name) const {
327 return fieldAt(fieldIndex(name));
328 }
329
330 /**
331 * Returns the reference to the field with the given name \p name,
332 * which can be used to change the contents.
333 */
field(const std::string & name)334 GenericDatum &field(const std::string &name) {
335 return fieldAt(fieldIndex(name));
336 }
337
338 /**
339 * Returns the field at the given position \p pos.
340 */
fieldAt(size_t pos) const341 const GenericDatum &fieldAt(size_t pos) const {
342 return fields_[pos];
343 }
344
345 /**
346 * Returns the reference to the field at the given position \p pos,
347 * which can be used to change the contents.
348 */
fieldAt(size_t pos)349 GenericDatum &fieldAt(size_t pos) {
350 return fields_[pos];
351 }
352
353 /**
354 * Replaces the field at the given position \p pos with \p v.
355 */
setFieldAt(size_t pos,const GenericDatum & v)356 void setFieldAt(size_t pos, const GenericDatum &v) {
357 // assertSameType(v, schema()->leafAt(pos));
358 fields_[pos] = v;
359 }
360 };
361
362 /**
363 * The generic container for Avro arrays.
364 */
365 class AVRO_DECL GenericArray : public GenericContainer {
366 public:
367 /**
368 * The contents type for the array.
369 */
370 typedef std::vector<GenericDatum> Value;
371
372 /**
373 * Constructs a generic array corresponding to the given schema \p schema,
374 * which should be of Avro type array.
375 */
GenericArray(const NodePtr & schema)376 explicit GenericArray(const NodePtr &schema) : GenericContainer(AVRO_ARRAY, schema) {
377 }
378
379 /**
380 * Returns the contents of this array.
381 */
value() const382 const Value &value() const {
383 return value_;
384 }
385
386 /**
387 * Returns the reference to the contents of this array.
388 */
value()389 Value &value() {
390 return value_;
391 }
392
393 private:
394 Value value_;
395 };
396
397 /**
398 * The generic container for Avro maps.
399 */
400 class AVRO_DECL GenericMap : public GenericContainer {
401 public:
402 /**
403 * The contents type for the map.
404 */
405 typedef std::vector<std::pair<std::string, GenericDatum>> Value;
406
407 /**
408 * Constructs a generic map corresponding to the given schema \p schema,
409 * which should be of Avro type map.
410 */
GenericMap(const NodePtr & schema)411 explicit GenericMap(const NodePtr &schema) : GenericContainer(AVRO_MAP, schema) {
412 }
413
414 /**
415 * Returns the contents of this map.
416 */
value() const417 const Value &value() const {
418 return value_;
419 }
420
421 /**
422 * Returns the reference to the contents of this map.
423 */
value()424 Value &value() {
425 return value_;
426 }
427
428 private:
429 Value value_;
430 };
431
432 /**
433 * Generic container for Avro enum.
434 */
435 class AVRO_DECL GenericEnum : public GenericContainer {
436 size_t value_;
437
index(const NodePtr & schema,const std::string & symbol)438 static size_t index(const NodePtr &schema, const std::string &symbol) {
439 size_t result;
440 if (schema->nameIndex(symbol, result)) {
441 return result;
442 }
443 throw Exception("No such symbol");
444 }
445
446 public:
447 /**
448 * Constructs a generic enum corresponding to the given schema \p schema,
449 * which should be of Avro type enum.
450 */
GenericEnum(const NodePtr & schema)451 explicit GenericEnum(const NodePtr &schema) : GenericContainer(AVRO_ENUM, schema), value_(0) {
452 }
453
GenericEnum(const NodePtr & schema,const std::string & symbol)454 GenericEnum(const NodePtr &schema, const std::string &symbol) : GenericContainer(AVRO_ENUM, schema), value_(index(schema, symbol)) {
455 }
456
457 /**
458 * Returns the symbol corresponding to the cardinal \p n. If the
459 * value for \p n is not within the limits an exception is thrown.
460 */
symbol(size_t n)461 const std::string &symbol(size_t n) {
462 if (n < schema()->names()) {
463 return schema()->nameAt(n);
464 }
465 throw Exception("Not as many symbols");
466 }
467
468 /**
469 * Returns the cardinal for the given symbol \c symbol. If the symbol
470 * is not defined for this enum and exception is thrown.
471 */
index(const std::string & symbol) const472 size_t index(const std::string &symbol) const {
473 return index(schema(), symbol);
474 }
475
476 /**
477 * Set the value for this enum corresponding to the given symbol \c symbol.
478 */
set(const std::string & symbol)479 size_t set(const std::string &symbol) {
480 return value_ = index(symbol);
481 }
482
483 /**
484 * Set the value for this enum corresponding to the given cardinal \c n.
485 */
set(size_t n)486 void set(size_t n) {
487 if (n < schema()->names()) {
488 value_ = n;
489 return;
490 }
491 throw Exception("Not as many symbols");
492 }
493
494 /**
495 * Returns the cardinal for the current value of this enum.
496 */
value() const497 size_t value() const {
498 return value_;
499 }
500
501 /**
502 * Returns the symbol for the current value of this enum.
503 */
symbol() const504 const std::string &symbol() const {
505 return schema()->nameAt(value_);
506 }
507 };
508
509 /**
510 * Generic container for Avro fixed.
511 */
512 class AVRO_DECL GenericFixed : public GenericContainer {
513 std::vector<uint8_t> value_;
514
515 public:
516 /**
517 * Constructs a generic enum corresponding to the given schema \p schema,
518 * which should be of Avro type fixed.
519 */
GenericFixed(const NodePtr & schema)520 explicit GenericFixed(const NodePtr &schema) : GenericContainer(AVRO_FIXED, schema) {
521 value_.resize(schema->fixedSize());
522 }
523
524 GenericFixed(const NodePtr &schema, const std::vector<uint8_t> &v);
525
526 /**
527 * Returns the contents of this fixed.
528 */
value() const529 const std::vector<uint8_t> &value() const {
530 return value_;
531 }
532
533 /**
534 * Returns the reference to the contents of this fixed.
535 */
value()536 std::vector<uint8_t> &value() {
537 return value_;
538 }
539 };
540
type() const541 inline Type GenericDatum::type() const {
542 return (type_ == AVRO_UNION) ?
543 #if __cplusplus >= 201703L
544 std::any_cast<GenericUnion>(&value_)->datum().type()
545 :
546 #else
547 boost::any_cast<GenericUnion>(&value_)->datum().type()
548 :
549 #endif
550 type_;
551 }
552
logicalType() const553 inline LogicalType GenericDatum::logicalType() const {
554 return (type_ == AVRO_UNION) ?
555 #if __cplusplus >= 201703L
556 std::any_cast<GenericUnion>(&value_)->datum().logicalType() :
557 #else
558 boost::any_cast<GenericUnion>(&value_)->datum().logicalType() :
559 #endif
560 logicalType_;
561 }
562
563 template<typename T>
value()564 T &GenericDatum::value() {
565 return (type_ == AVRO_UNION) ?
566 #if __cplusplus >= 201703L
567 std::any_cast<GenericUnion>(&value_)->datum().value<T>()
568 : *std::any_cast<T>(&value_);
569 #else
570 boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
571 : *boost::any_cast<T>(&value_);
572 #endif
573 }
574
575 template<typename T>
value() const576 const T &GenericDatum::value() const {
577 return (type_ == AVRO_UNION) ?
578 #if __cplusplus >= 201703L
579 std::any_cast<GenericUnion>(&value_)->datum().value<T>()
580 : *std::any_cast<T>(&value_);
581 #else
582 boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
583 : *boost::any_cast<T>(&value_);
584 #endif
585 }
586
unionBranch() const587 inline size_t GenericDatum::unionBranch() const {
588 #if __cplusplus >= 201703L
589 return std::any_cast<GenericUnion>(&value_)->currentBranch();
590 #else
591 return boost::any_cast<GenericUnion>(&value_)->currentBranch();
592 #endif
593 }
594
selectBranch(size_t branch)595 inline void GenericDatum::selectBranch(size_t branch) {
596 #if __cplusplus >= 201703L
597 std::any_cast<GenericUnion>(&value_)->selectBranch(branch);
598 #else
599 boost::any_cast<GenericUnion>(&value_)->selectBranch(branch);
600 #endif
601 }
602
603 } // namespace avro
604 #endif // avro_GenericDatum_hh__
605