GenericDatum.hh 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef avro_GenericDatum_hh__
  19. #define avro_GenericDatum_hh__
  20. #include <cstdint>
  21. #include <map>
  22. #include <string>
  23. #include <vector>
  24. #if __cplusplus >= 201703L
  25. #include <any>
  26. #else
  27. #include "boost/any.hpp"
  28. #endif
  29. #include "LogicalType.hh"
  30. #include "Node.hh"
  31. #include "ValidSchema.hh"
  32. namespace avro {
  33. /**
  34. * Generic datum which can hold any Avro type. The datum has a type
  35. * and a value. The type is one of the Avro data types. The C++ type for
  36. * value corresponds to the Avro type.
  37. * \li An Avro <tt>null</tt> corresponds to no C++ type. It is illegal to
  38. * to try to access values for <tt>null</tt>.
  39. * \li Avro <tt>boolean</tt> maps to C++ <tt>bool</tt>
  40. * \li Avro <tt>int</tt> maps to C++ <tt>int32_t</tt>.
  41. * \li Avro <tt>long</tt> maps to C++ <tt>int64_t</tt>.
  42. * \li Avro <tt>float</tt> maps to C++ <tt>float</tt>.
  43. * \li Avro <tt>double</tt> maps to C++ <tt>double</tt>.
  44. * \li Avro <tt>string</tt> maps to C++ <tt>std::string</tt>.
  45. * \li Avro <tt>bytes</tt> maps to C++ <tt>std::vector&lt;uint_t&gt;</tt>.
  46. * \li Avro <tt>fixed</tt> maps to C++ class <tt>GenericFixed</tt>.
  47. * \li Avro <tt>enum</tt> maps to C++ class <tt>GenericEnum</tt>.
  48. * \li Avro <tt>array</tt> maps to C++ class <tt>GenericArray</tt>.
  49. * \li Avro <tt>map</tt> maps to C++ class <tt>GenericMap</tt>.
  50. * \li There is no C++ type corresponding to Avro <tt>union</tt>. The
  51. * object should have the C++ type corresponding to one of the constituent
  52. * types of the union.
  53. *
  54. */
  55. class AVRO_DECL GenericDatum {
  56. protected:
  57. Type type_;
  58. LogicalType logicalType_;
  59. #if __cplusplus >= 201703L
  60. std::any value_;
  61. #else
  62. boost::any value_;
  63. #endif
  64. explicit GenericDatum(Type t)
  65. : type_(t), logicalType_(LogicalType::NONE) {}
  66. GenericDatum(Type t, LogicalType logicalType)
  67. : type_(t), logicalType_(logicalType) {}
  68. template<typename T>
  69. GenericDatum(Type t, LogicalType logicalType, const T &v)
  70. : type_(t), logicalType_(logicalType), value_(v) {}
  71. void init(const NodePtr &schema);
  72. public:
  73. /**
  74. * The avro data type this datum holds.
  75. */
  76. Type type() const;
  77. /**
  78. * The avro logical type that augments the main data type this datum holds.
  79. */
  80. LogicalType logicalType() const;
  81. /**
  82. * Returns the value held by this datum.
  83. * T The type for the value. This must correspond to the
  84. * avro type returned by type().
  85. */
  86. template<typename T>
  87. const T &value() const;
  88. /**
  89. * Returns the reference to the value held by this datum, which
  90. * can be used to change the contents. Please note that only
  91. * value can be changed, the data type of the value held cannot
  92. * be changed.
  93. *
  94. * T The type for the value. This must correspond to the
  95. * avro type returned by type().
  96. */
  97. template<typename T>
  98. T &value();
  99. /**
  100. * Returns true if and only if this datum is a union.
  101. */
  102. bool isUnion() const { return type_ == AVRO_UNION; }
  103. /**
  104. * Returns the index of the current branch, if this is a union.
  105. * \sa isUnion().
  106. */
  107. size_t unionBranch() const;
  108. /**
  109. * Selects a new branch in the union if this is a union.
  110. * \sa isUnion().
  111. */
  112. void selectBranch(size_t branch);
  113. /// Makes a new AVRO_NULL datum.
  114. GenericDatum() : type_(AVRO_NULL), logicalType_(LogicalType::NONE) {}
  115. /// Makes a new AVRO_BOOL datum whose value is of type bool.
  116. /// We don't make this explicit constructor because we want to allow automatic conversion
  117. // NOLINTNEXTLINE(google-explicit-constructor)
  118. GenericDatum(bool v)
  119. : type_(AVRO_BOOL), logicalType_(LogicalType::NONE), value_(v) {}
  120. /// Makes a new AVRO_INT datum whose value is of type int32_t.
  121. /// We don't make this explicit constructor because we want to allow automatic conversion
  122. // NOLINTNEXTLINE(google-explicit-constructor)
  123. GenericDatum(int32_t v)
  124. : type_(AVRO_INT), logicalType_(LogicalType::NONE), value_(v) {}
  125. /// Makes a new AVRO_LONG datum whose value is of type int64_t.
  126. /// We don't make this explicit constructor because we want to allow automatic conversion
  127. // NOLINTNEXTLINE(google-explicit-constructor)
  128. GenericDatum(int64_t v)
  129. : type_(AVRO_LONG), logicalType_(LogicalType::NONE), value_(v) {}
  130. /// Makes a new AVRO_FLOAT datum whose value is of type float.
  131. /// We don't make this explicit constructor because we want to allow automatic conversion
  132. // NOLINTNEXTLINE(google-explicit-constructor)
  133. GenericDatum(float v)
  134. : type_(AVRO_FLOAT), logicalType_(LogicalType::NONE), value_(v) {}
  135. /// Makes a new AVRO_DOUBLE datum whose value is of type double.
  136. /// We don't make this explicit constructor because we want to allow automatic conversion
  137. // NOLINTNEXTLINE(google-explicit-constructor)
  138. GenericDatum(double v)
  139. : type_(AVRO_DOUBLE), logicalType_(LogicalType::NONE), value_(v) {}
  140. /// Makes a new AVRO_STRING datum whose value is of type std::string.
  141. /// We don't make this explicit constructor because we want to allow automatic conversion
  142. // NOLINTNEXTLINE(google-explicit-constructor)
  143. GenericDatum(const std::string &v)
  144. : type_(AVRO_STRING), logicalType_(LogicalType::NONE), value_(v) {}
  145. /// Makes a new AVRO_BYTES datum whose value is of type
  146. /// std::vector<uint8_t>.
  147. /// We don't make this explicit constructor because we want to allow automatic conversion
  148. // NOLINTNEXTLINE(google-explicit-constructor)
  149. GenericDatum(const std::vector<uint8_t> &v) : type_(AVRO_BYTES), logicalType_(LogicalType::NONE), value_(v) {}
  150. /**
  151. * Constructs a datum corresponding to the given avro type.
  152. * The value will the appropriate default corresponding to the
  153. * data type.
  154. * \param schema The schema that defines the avro type.
  155. */
  156. /// We don't make this explicit constructor because we want to allow automatic conversion
  157. // NOLINTNEXTLINE(google-explicit-constructor)
  158. GenericDatum(const NodePtr &schema);
  159. /**
  160. * Constructs a datum corresponding to the given avro type and set
  161. * the value.
  162. * \param schema The schema that defines the avro type.
  163. * \param v The value for this type.
  164. */
  165. template<typename T>
  166. GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) {
  167. init(schema);
  168. #if __cplusplus >= 201703L
  169. *std::any_cast<T>(&value_) = v;
  170. #else
  171. *boost::any_cast<T>(&value_) = v;
  172. #endif
  173. }
  174. /**
  175. * Constructs a datum corresponding to the given avro type.
  176. * The value will the appropriate default corresponding to the
  177. * data type.
  178. * \param schema The schema that defines the avro type.
  179. */
  180. explicit GenericDatum(const ValidSchema &schema);
  181. };
  182. /**
  183. * The base class for all generic type for containers.
  184. */
  185. class AVRO_DECL GenericContainer {
  186. NodePtr schema_;
  187. static void assertType(const NodePtr &schema, Type type);
  188. protected:
  189. /**
  190. * Constructs a container corresponding to the given schema.
  191. */
  192. GenericContainer(Type type, const NodePtr &s) : schema_(s) {
  193. assertType(s, type);
  194. }
  195. public:
  196. /// Returns the schema for this object
  197. const NodePtr &schema() const {
  198. return schema_;
  199. }
  200. };
  201. /**
  202. * Generic container for unions.
  203. */
  204. class AVRO_DECL GenericUnion : public GenericContainer {
  205. size_t curBranch_;
  206. GenericDatum datum_;
  207. public:
  208. /**
  209. * Constructs a generic union corresponding to the given schema \p schema,
  210. * and the given value. The schema should be of Avro type union
  211. * and the value should correspond to one of the branches of the union.
  212. */
  213. explicit GenericUnion(const NodePtr &schema) : GenericContainer(AVRO_UNION, schema), curBranch_(schema->leaves()) {
  214. selectBranch(0);
  215. }
  216. /**
  217. * Returns the index of the current branch.
  218. */
  219. size_t currentBranch() const { return curBranch_; }
  220. /**
  221. * Selects a new branch. The type for the value is changed accordingly.
  222. * \param branch The index for the selected branch.
  223. */
  224. void selectBranch(size_t branch) {
  225. if (curBranch_ != branch) {
  226. datum_ = GenericDatum(schema()->leafAt(branch));
  227. curBranch_ = branch;
  228. }
  229. }
  230. /**
  231. * Returns the datum corresponding to the currently selected branch
  232. * in this union.
  233. */
  234. GenericDatum &datum() {
  235. return datum_;
  236. }
  237. /**
  238. * Returns the datum corresponding to the currently selected branch
  239. * in this union.
  240. */
  241. const GenericDatum &datum() const {
  242. return datum_;
  243. }
  244. };
  245. /**
  246. * The generic container for Avro records.
  247. */
  248. class AVRO_DECL GenericRecord : public GenericContainer {
  249. std::vector<GenericDatum> fields_;
  250. public:
  251. /**
  252. * Constructs a generic record corresponding to the given schema \p schema,
  253. * which should be of Avro type record.
  254. */
  255. explicit GenericRecord(const NodePtr &schema);
  256. /**
  257. * Returns the number of fields in the current record.
  258. */
  259. size_t fieldCount() const {
  260. return fields_.size();
  261. }
  262. /**
  263. * Returns index of the field with the given name \p name
  264. */
  265. size_t fieldIndex(const std::string &name) const {
  266. size_t index = 0;
  267. if (!schema()->nameIndex(name, index)) {
  268. throw Exception("Invalid field name: " + name);
  269. }
  270. return index;
  271. }
  272. /**
  273. * Returns true if a field with the given name \p name is located in this r
  274. * false otherwise
  275. */
  276. bool hasField(const std::string &name) const {
  277. size_t index = 0;
  278. return schema()->nameIndex(name, index);
  279. }
  280. /**
  281. * Returns the field with the given name \p name.
  282. */
  283. const GenericDatum &field(const std::string &name) const {
  284. return fieldAt(fieldIndex(name));
  285. }
  286. /**
  287. * Returns the reference to the field with the given name \p name,
  288. * which can be used to change the contents.
  289. */
  290. GenericDatum &field(const std::string &name) {
  291. return fieldAt(fieldIndex(name));
  292. }
  293. /**
  294. * Returns the field at the given position \p pos.
  295. */
  296. const GenericDatum &fieldAt(size_t pos) const {
  297. return fields_[pos];
  298. }
  299. /**
  300. * Returns the reference to the field at the given position \p pos,
  301. * which can be used to change the contents.
  302. */
  303. GenericDatum &fieldAt(size_t pos) {
  304. return fields_[pos];
  305. }
  306. /**
  307. * Replaces the field at the given position \p pos with \p v.
  308. */
  309. void setFieldAt(size_t pos, const GenericDatum &v) {
  310. // assertSameType(v, schema()->leafAt(pos));
  311. fields_[pos] = v;
  312. }
  313. };
  314. /**
  315. * The generic container for Avro arrays.
  316. */
  317. class AVRO_DECL GenericArray : public GenericContainer {
  318. public:
  319. /**
  320. * The contents type for the array.
  321. */
  322. typedef std::vector<GenericDatum> Value;
  323. /**
  324. * Constructs a generic array corresponding to the given schema \p schema,
  325. * which should be of Avro type array.
  326. */
  327. explicit GenericArray(const NodePtr &schema) : GenericContainer(AVRO_ARRAY, schema) {
  328. }
  329. /**
  330. * Returns the contents of this array.
  331. */
  332. const Value &value() const {
  333. return value_;
  334. }
  335. /**
  336. * Returns the reference to the contents of this array.
  337. */
  338. Value &value() {
  339. return value_;
  340. }
  341. private:
  342. Value value_;
  343. };
  344. /**
  345. * The generic container for Avro maps.
  346. */
  347. class AVRO_DECL GenericMap : public GenericContainer {
  348. public:
  349. /**
  350. * The contents type for the map.
  351. */
  352. typedef std::vector<std::pair<std::string, GenericDatum>> Value;
  353. /**
  354. * Constructs a generic map corresponding to the given schema \p schema,
  355. * which should be of Avro type map.
  356. */
  357. explicit GenericMap(const NodePtr &schema) : GenericContainer(AVRO_MAP, schema) {
  358. }
  359. /**
  360. * Returns the contents of this map.
  361. */
  362. const Value &value() const {
  363. return value_;
  364. }
  365. /**
  366. * Returns the reference to the contents of this map.
  367. */
  368. Value &value() {
  369. return value_;
  370. }
  371. private:
  372. Value value_;
  373. };
  374. /**
  375. * Generic container for Avro enum.
  376. */
  377. class AVRO_DECL GenericEnum : public GenericContainer {
  378. size_t value_;
  379. static size_t index(const NodePtr &schema, const std::string &symbol) {
  380. size_t result;
  381. if (schema->nameIndex(symbol, result)) {
  382. return result;
  383. }
  384. throw Exception("No such symbol");
  385. }
  386. public:
  387. /**
  388. * Constructs a generic enum corresponding to the given schema \p schema,
  389. * which should be of Avro type enum.
  390. */
  391. explicit GenericEnum(const NodePtr &schema) : GenericContainer(AVRO_ENUM, schema), value_(0) {
  392. }
  393. GenericEnum(const NodePtr &schema, const std::string &symbol) : GenericContainer(AVRO_ENUM, schema), value_(index(schema, symbol)) {
  394. }
  395. /**
  396. * Returns the symbol corresponding to the cardinal \p n. If the
  397. * value for \p n is not within the limits an exception is thrown.
  398. */
  399. const std::string &symbol(size_t n) {
  400. if (n < schema()->names()) {
  401. return schema()->nameAt(n);
  402. }
  403. throw Exception("Not as many symbols");
  404. }
  405. /**
  406. * Returns the cardinal for the given symbol \c symbol. If the symbol
  407. * is not defined for this enum and exception is thrown.
  408. */
  409. size_t index(const std::string &symbol) const {
  410. return index(schema(), symbol);
  411. }
  412. /**
  413. * Set the value for this enum corresponding to the given symbol \c symbol.
  414. */
  415. size_t set(const std::string &symbol) {
  416. return value_ = index(symbol);
  417. }
  418. /**
  419. * Set the value for this enum corresponding to the given cardinal \c n.
  420. */
  421. void set(size_t n) {
  422. if (n < schema()->names()) {
  423. value_ = n;
  424. return;
  425. }
  426. throw Exception("Not as many symbols");
  427. }
  428. /**
  429. * Returns the cardinal for the current value of this enum.
  430. */
  431. size_t value() const {
  432. return value_;
  433. }
  434. /**
  435. * Returns the symbol for the current value of this enum.
  436. */
  437. const std::string &symbol() const {
  438. return schema()->nameAt(value_);
  439. }
  440. };
  441. /**
  442. * Generic container for Avro fixed.
  443. */
  444. class AVRO_DECL GenericFixed : public GenericContainer {
  445. std::vector<uint8_t> value_;
  446. public:
  447. /**
  448. * Constructs a generic enum corresponding to the given schema \p schema,
  449. * which should be of Avro type fixed.
  450. */
  451. explicit GenericFixed(const NodePtr &schema) : GenericContainer(AVRO_FIXED, schema) {
  452. value_.resize(schema->fixedSize());
  453. }
  454. GenericFixed(const NodePtr &schema, const std::vector<uint8_t> &v);
  455. /**
  456. * Returns the contents of this fixed.
  457. */
  458. const std::vector<uint8_t> &value() const {
  459. return value_;
  460. }
  461. /**
  462. * Returns the reference to the contents of this fixed.
  463. */
  464. std::vector<uint8_t> &value() {
  465. return value_;
  466. }
  467. };
  468. inline Type GenericDatum::type() const {
  469. return (type_ == AVRO_UNION) ?
  470. #if __cplusplus >= 201703L
  471. std::any_cast<GenericUnion>(&value_)->datum().type()
  472. :
  473. #else
  474. boost::any_cast<GenericUnion>(&value_)->datum().type()
  475. :
  476. #endif
  477. type_;
  478. }
  479. inline LogicalType GenericDatum::logicalType() const {
  480. return (type_ == AVRO_UNION) ?
  481. #if __cplusplus >= 201703L
  482. std::any_cast<GenericUnion>(&value_)->datum().logicalType() :
  483. #else
  484. boost::any_cast<GenericUnion>(&value_)->datum().logicalType() :
  485. #endif
  486. logicalType_;
  487. }
  488. template<typename T>
  489. T &GenericDatum::value() {
  490. return (type_ == AVRO_UNION) ?
  491. #if __cplusplus >= 201703L
  492. std::any_cast<GenericUnion>(&value_)->datum().value<T>()
  493. : *std::any_cast<T>(&value_);
  494. #else
  495. boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
  496. : *boost::any_cast<T>(&value_);
  497. #endif
  498. }
  499. template<typename T>
  500. const T &GenericDatum::value() const {
  501. return (type_ == AVRO_UNION) ?
  502. #if __cplusplus >= 201703L
  503. std::any_cast<GenericUnion>(&value_)->datum().value<T>()
  504. : *std::any_cast<T>(&value_);
  505. #else
  506. boost::any_cast<GenericUnion>(&value_)->datum().value<T>()
  507. : *boost::any_cast<T>(&value_);
  508. #endif
  509. }
  510. inline size_t GenericDatum::unionBranch() const {
  511. #if __cplusplus >= 201703L
  512. return std::any_cast<GenericUnion>(&value_)->currentBranch();
  513. #else
  514. return boost::any_cast<GenericUnion>(&value_)->currentBranch();
  515. #endif
  516. }
  517. inline void GenericDatum::selectBranch(size_t branch) {
  518. #if __cplusplus >= 201703L
  519. std::any_cast<GenericUnion>(&value_)->selectBranch(branch);
  520. #else
  521. boost::any_cast<GenericUnion>(&value_)->selectBranch(branch);
  522. #endif
  523. }
  524. } // namespace avro
  525. #endif // avro_GenericDatum_hh__