NodeImpl.hh 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef avro_NodeImpl_hh__
  19. #define avro_NodeImpl_hh__
  20. #include "Config.hh"
  21. #include "GenericDatum.hh"
  22. #include <iomanip>
  23. #include <iostream>
  24. #include <limits>
  25. #include <memory>
  26. #include <set>
  27. #include <sstream>
  28. #include <utility>
  29. #include "Node.hh"
  30. #include "NodeConcepts.hh"
  31. #include "CustomAttributes.hh"
  32. namespace avro {
  33. /// Implementation details for Node. NodeImpl represents all the avro types,
  34. /// whose properties are enabled and disabled by selecting concept classes.
  35. template<
  36. class NameConcept,
  37. class LeavesConcept,
  38. class LeafNamesConcept,
  39. class MultiAttributesConcept,
  40. class SizeConcept>
  41. class NodeImpl : public Node {
  42. protected:
  43. explicit NodeImpl(Type type) : Node(type),
  44. nameAttribute_(),
  45. docAttribute_(),
  46. leafAttributes_(),
  47. leafNameAttributes_(),
  48. customAttributes_(),
  49. sizeAttribute_() {}
  50. NodeImpl(Type type,
  51. const NameConcept &name,
  52. const LeavesConcept &leaves,
  53. const LeafNamesConcept &leafNames,
  54. const MultiAttributesConcept &customAttributes,
  55. const SizeConcept &size) : Node(type),
  56. nameAttribute_(name),
  57. docAttribute_(),
  58. leafAttributes_(leaves),
  59. leafNameAttributes_(leafNames),
  60. customAttributes_(customAttributes),
  61. sizeAttribute_(size) {}
  62. // Ctor with "doc"
  63. NodeImpl(Type type,
  64. const NameConcept &name,
  65. const concepts::SingleAttribute<std::string> &doc,
  66. const LeavesConcept &leaves,
  67. const LeafNamesConcept &leafNames,
  68. const MultiAttributesConcept &customAttributes,
  69. const SizeConcept &size) : Node(type),
  70. nameAttribute_(name),
  71. docAttribute_(doc),
  72. leafAttributes_(leaves),
  73. leafNameAttributes_(leafNames),
  74. customAttributes_(customAttributes),
  75. sizeAttribute_(size) {}
  76. void swap(NodeImpl &impl) {
  77. std::swap(nameAttribute_, impl.nameAttribute_);
  78. std::swap(docAttribute_, impl.docAttribute_);
  79. std::swap(leafAttributes_, impl.leafAttributes_);
  80. std::swap(leafNameAttributes_, impl.leafNameAttributes_);
  81. std::swap(sizeAttribute_, impl.sizeAttribute_);
  82. std::swap(customAttributes_, impl.customAttributes_);
  83. std::swap(nameIndex_, impl.nameIndex_);
  84. }
  85. bool hasName() const override {
  86. // e.g.: true for single and multi-attributes, false for no-attributes.
  87. return NameConcept::hasAttribute;
  88. }
  89. void doSetName(const Name &name) override {
  90. nameAttribute_.add(name);
  91. }
  92. const Name &name() const override {
  93. return nameAttribute_.get();
  94. }
  95. void doSetDoc(const std::string &doc) override {
  96. docAttribute_.add(doc);
  97. }
  98. const std::string &getDoc() const override {
  99. return docAttribute_.get();
  100. }
  101. void doAddLeaf(const NodePtr &newLeaf) final {
  102. leafAttributes_.add(newLeaf);
  103. }
  104. size_t leaves() const override {
  105. return leafAttributes_.size();
  106. }
  107. const NodePtr &leafAt(size_t index) const override {
  108. return leafAttributes_.get(index);
  109. }
  110. void doAddName(const std::string &name) override {
  111. if (!nameIndex_.add(name, leafNameAttributes_.size())) {
  112. throw Exception(boost::format("Cannot add duplicate name: %1%") % name);
  113. }
  114. leafNameAttributes_.add(name);
  115. }
  116. size_t names() const override {
  117. return leafNameAttributes_.size();
  118. }
  119. const std::string &nameAt(size_t index) const override {
  120. return leafNameAttributes_.get(index);
  121. }
  122. bool nameIndex(const std::string &name, size_t &index) const override {
  123. return nameIndex_.lookup(name, index);
  124. }
  125. void doSetFixedSize(size_t size) override {
  126. sizeAttribute_.add(size);
  127. }
  128. size_t fixedSize() const override {
  129. return sizeAttribute_.get();
  130. }
  131. bool isValid() const override = 0;
  132. void printBasicInfo(std::ostream &os) const override;
  133. void setLeafToSymbolic(size_t index, const NodePtr &node) override;
  134. void doAddCustomAttribute(const CustomAttributes &customAttributes) override {
  135. customAttributes_.add(customAttributes);
  136. }
  137. SchemaResolution furtherResolution(const Node &reader) const {
  138. SchemaResolution match = RESOLVE_NO_MATCH;
  139. if (reader.type() == AVRO_SYMBOLIC) {
  140. // resolve the symbolic type, and check again
  141. const NodePtr &node = reader.leafAt(0);
  142. match = resolve(*node);
  143. } else if (reader.type() == AVRO_UNION) {
  144. // in this case, need to see if there is an exact match for the
  145. // writer's type, or if not, the first one that can be promoted to a
  146. // match
  147. for (size_t i = 0; i < reader.leaves(); ++i) {
  148. const NodePtr &node = reader.leafAt(i);
  149. SchemaResolution thisMatch = resolve(*node);
  150. // if matched then the search is done
  151. if (thisMatch == RESOLVE_MATCH) {
  152. match = thisMatch;
  153. break;
  154. }
  155. // thisMatch is either no match, or promotable, this will set match to
  156. // promotable if it hasn't been set already
  157. if (match == RESOLVE_NO_MATCH) {
  158. match = thisMatch;
  159. }
  160. }
  161. }
  162. return match;
  163. }
  164. NameConcept nameAttribute_;
  165. // Rem: NameConcept type is HasName (= SingleAttribute<Name>), we use std::string instead
  166. concepts::SingleAttribute<std::string> docAttribute_; /** Doc used to compare schemas */
  167. LeavesConcept leafAttributes_;
  168. LeafNamesConcept leafNameAttributes_;
  169. MultiAttributesConcept customAttributes_;
  170. SizeConcept sizeAttribute_;
  171. concepts::NameIndexConcept<LeafNamesConcept> nameIndex_;
  172. };
  173. using NoName = concepts::NoAttribute<Name>;
  174. using HasName = concepts::SingleAttribute<Name>;
  175. using HasDoc = concepts::SingleAttribute<std::string>;
  176. using NoLeaves = concepts::NoAttribute<NodePtr>;
  177. using SingleLeaf = concepts::SingleAttribute<NodePtr>;
  178. using MultiLeaves = concepts::MultiAttribute<NodePtr>;
  179. using NoLeafNames = concepts::NoAttribute<std::string>;
  180. using LeafNames = concepts::MultiAttribute<std::string>;
  181. using MultiAttributes = concepts::MultiAttribute<CustomAttributes>;
  182. using NoAttributes = concepts::NoAttribute<CustomAttributes>;
  183. using NoSize = concepts::NoAttribute<int>;
  184. using HasSize = concepts::SingleAttribute<int>;
  185. using NodeImplPrimitive = NodeImpl<NoName, NoLeaves, NoLeafNames, MultiAttributes, NoSize>;
  186. using NodeImplSymbolic = NodeImpl<HasName, NoLeaves, NoLeafNames, NoAttributes, NoSize>;
  187. using NodeImplRecord = NodeImpl<HasName, MultiLeaves, LeafNames, MultiAttributes, NoSize>;
  188. using NodeImplEnum = NodeImpl<HasName, NoLeaves, LeafNames, NoAttributes, NoSize>;
  189. using NodeImplArray = NodeImpl<NoName, SingleLeaf, NoLeafNames, NoAttributes, NoSize>;
  190. using NodeImplMap = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoAttributes, NoSize>;
  191. using NodeImplUnion = NodeImpl<NoName, MultiLeaves, NoLeafNames, NoAttributes, NoSize>;
  192. using NodeImplFixed = NodeImpl<HasName, NoLeaves, NoLeafNames, NoAttributes, HasSize>;
  193. class AVRO_DECL NodePrimitive : public NodeImplPrimitive {
  194. public:
  195. explicit NodePrimitive(Type type) : NodeImplPrimitive(type) {}
  196. SchemaResolution resolve(const Node &reader) const override;
  197. void printJson(std::ostream &os, size_t depth) const override;
  198. bool isValid() const override {
  199. return true;
  200. }
  201. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  202. };
  203. class AVRO_DECL NodeSymbolic : public NodeImplSymbolic {
  204. using NodeWeakPtr = std::weak_ptr<Node>;
  205. public:
  206. NodeSymbolic() : NodeImplSymbolic(AVRO_SYMBOLIC) {}
  207. explicit NodeSymbolic(const HasName &name) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()) {}
  208. NodeSymbolic(const HasName &name, const NodePtr &n) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()), actualNode_(n) {}
  209. SchemaResolution resolve(const Node &reader) const override;
  210. void printJson(std::ostream &os, size_t depth) const override;
  211. bool isValid() const override {
  212. return (nameAttribute_.size() == 1);
  213. }
  214. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  215. bool isSet() const {
  216. return (actualNode_.lock() != nullptr);
  217. }
  218. NodePtr getNode() const {
  219. NodePtr node = actualNode_.lock();
  220. if (!node) {
  221. throw Exception(boost::format("Could not follow symbol %1%") % name());
  222. }
  223. return node;
  224. }
  225. void setNode(const NodePtr &node) {
  226. actualNode_ = node;
  227. }
  228. protected:
  229. NodeWeakPtr actualNode_;
  230. };
  231. class AVRO_DECL NodeRecord : public NodeImplRecord {
  232. std::vector<GenericDatum> defaultValues;
  233. public:
  234. NodeRecord() : NodeImplRecord(AVRO_RECORD) {}
  235. NodeRecord(const HasName &name, const MultiLeaves &fields,
  236. const LeafNames &fieldsNames,
  237. std::vector<GenericDatum> dv);
  238. NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
  239. const LeafNames &fieldsNames,
  240. std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, MultiAttributes(), NoSize()),
  241. defaultValues(std::move(dv)) {
  242. leafNameCheck();
  243. }
  244. NodeRecord(const HasName &name, const MultiLeaves &fields,
  245. const LeafNames &fieldsNames,
  246. const std::vector<GenericDatum>& dv,
  247. const MultiAttributes &customAttributes) :
  248. NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, customAttributes, NoSize()),
  249. defaultValues(dv) {
  250. leafNameCheck();
  251. }
  252. NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
  253. const LeafNames &fieldsNames,
  254. const std::vector<GenericDatum>& dv,
  255. const MultiAttributes &customAttributes) :
  256. NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, customAttributes, NoSize()),
  257. defaultValues(dv) {
  258. leafNameCheck();
  259. }
  260. void swap(NodeRecord &r) {
  261. NodeImplRecord::swap(r);
  262. defaultValues.swap(r.defaultValues);
  263. }
  264. SchemaResolution resolve(const Node &reader) const override;
  265. void printJson(std::ostream &os, size_t depth) const override;
  266. bool isValid() const override {
  267. return ((nameAttribute_.size() == 1) &&
  268. (leafAttributes_.size() == leafNameAttributes_.size()) &&
  269. (customAttributes_.size() == 0 ||
  270. customAttributes_.size() == leafAttributes_.size()));
  271. }
  272. const GenericDatum &defaultValueAt(size_t index) override {
  273. return defaultValues[index];
  274. }
  275. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  276. private:
  277. // check if leaf name is valid Name and is not duplicate
  278. void leafNameCheck() {
  279. for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
  280. if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
  281. throw Exception(boost::format(
  282. "Cannot add duplicate field: %1%")
  283. % leafNameAttributes_.get(i));
  284. }
  285. }
  286. }
  287. };
  288. class AVRO_DECL NodeEnum : public NodeImplEnum {
  289. public:
  290. NodeEnum() : NodeImplEnum(AVRO_ENUM) {}
  291. NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoAttributes(), NoSize()) {
  292. for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
  293. if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
  294. throw Exception(boost::format("Cannot add duplicate enum: %1%") % leafNameAttributes_.get(i));
  295. }
  296. }
  297. }
  298. SchemaResolution resolve(const Node &reader) const override;
  299. void printJson(std::ostream &os, size_t depth) const override;
  300. bool isValid() const override {
  301. return (
  302. (nameAttribute_.size() == 1) && (leafNameAttributes_.size() > 0));
  303. }
  304. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  305. };
  306. class AVRO_DECL NodeArray : public NodeImplArray {
  307. public:
  308. NodeArray() : NodeImplArray(AVRO_ARRAY) {}
  309. explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoAttributes(), NoSize()) {}
  310. SchemaResolution resolve(const Node &reader) const override;
  311. void printJson(std::ostream &os, size_t depth) const override;
  312. bool isValid() const override {
  313. return (leafAttributes_.size() == 1);
  314. }
  315. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  316. };
  317. class AVRO_DECL NodeMap : public NodeImplMap {
  318. public:
  319. NodeMap();
  320. explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), MultiLeaves(values), NoLeafNames(), NoAttributes(), NoSize()) {
  321. // need to add the key for the map too
  322. NodePtr key(new NodePrimitive(AVRO_STRING));
  323. doAddLeaf(key);
  324. // key goes before value
  325. std::swap(leafAttributes_.get(0), leafAttributes_.get(1));
  326. }
  327. SchemaResolution resolve(const Node &reader) const override;
  328. void printJson(std::ostream &os, size_t depth) const override;
  329. bool isValid() const override {
  330. return (leafAttributes_.size() == 2);
  331. }
  332. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  333. };
  334. class AVRO_DECL NodeUnion : public NodeImplUnion {
  335. public:
  336. NodeUnion() : NodeImplUnion(AVRO_UNION) {}
  337. explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoAttributes(), NoSize()) {}
  338. SchemaResolution resolve(const Node &reader) const override;
  339. void printJson(std::ostream &os, size_t depth) const override;
  340. bool isValid() const override {
  341. std::set<std::string> seen;
  342. if (leafAttributes_.size() >= 1) {
  343. for (size_t i = 0; i < leafAttributes_.size(); ++i) {
  344. std::string name;
  345. const NodePtr &n = leafAttributes_.get(i);
  346. switch (n->type()) {
  347. case AVRO_STRING:
  348. name = "string";
  349. break;
  350. case AVRO_BYTES:
  351. name = "bytes";
  352. break;
  353. case AVRO_INT:
  354. name = "int";
  355. break;
  356. case AVRO_LONG:
  357. name = "long";
  358. break;
  359. case AVRO_FLOAT:
  360. name = "float";
  361. break;
  362. case AVRO_DOUBLE:
  363. name = "double";
  364. break;
  365. case AVRO_BOOL:
  366. name = "bool";
  367. break;
  368. case AVRO_NULL:
  369. name = "null";
  370. break;
  371. case AVRO_ARRAY:
  372. name = "array";
  373. break;
  374. case AVRO_MAP:
  375. name = "map";
  376. break;
  377. case AVRO_RECORD:
  378. case AVRO_ENUM:
  379. case AVRO_UNION:
  380. case AVRO_FIXED:
  381. case AVRO_SYMBOLIC:
  382. name = n->name().fullname();
  383. break;
  384. default: return false;
  385. }
  386. if (seen.find(name) != seen.end()) {
  387. return false;
  388. }
  389. seen.insert(name);
  390. }
  391. return true;
  392. }
  393. return false;
  394. }
  395. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  396. };
  397. class AVRO_DECL NodeFixed : public NodeImplFixed {
  398. public:
  399. NodeFixed() : NodeImplFixed(AVRO_FIXED) {}
  400. NodeFixed(const HasName &name, const HasSize &size) : NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), NoAttributes(), size) {}
  401. SchemaResolution resolve(const Node &reader) const override;
  402. void printJson(std::ostream &os, size_t depth) const override;
  403. bool isValid() const override {
  404. return (
  405. (nameAttribute_.size() == 1) && (sizeAttribute_.size() == 1));
  406. }
  407. void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override;
  408. };
  409. template<class A, class B, class C, class D, class E>
  410. inline void
  411. NodeImpl<A, B, C, D, E>::setLeafToSymbolic(size_t index, const NodePtr &node) {
  412. if (!B::hasAttribute) {
  413. throw Exception("Cannot change leaf node for nonexistent leaf");
  414. }
  415. auto &replaceNode = const_cast<NodePtr &>(leafAttributes_.get(index));
  416. if (replaceNode->name() != node->name()) {
  417. throw Exception("Symbolic name does not match the name of the schema it references");
  418. }
  419. auto symbol = std::make_shared<NodeSymbolic>();
  420. symbol->setName(node->name());
  421. symbol->setNode(node);
  422. replaceNode = symbol;
  423. }
  424. template<class A, class B, class C, class D, class E>
  425. inline void
  426. NodeImpl<A, B, C, D, E>::printBasicInfo(std::ostream &os) const {
  427. os << type();
  428. if (hasName()) {
  429. os << ' ' << nameAttribute_.get();
  430. }
  431. if (E::hasAttribute) {
  432. os << " " << sizeAttribute_.get();
  433. }
  434. os << '\n';
  435. size_t count = leaves();
  436. count = count ? count : names();
  437. for (size_t i = 0; i < count; ++i) {
  438. if (C::hasAttribute) {
  439. os << "name " << nameAt(i) << '\n';
  440. }
  441. if (type() != AVRO_SYMBOLIC && leafAttributes_.hasAttribute) {
  442. leafAt(i)->printBasicInfo(os);
  443. }
  444. }
  445. if (isCompound(type())) {
  446. os << "end " << type() << '\n';
  447. }
  448. }
  449. inline NodePtr resolveSymbol(const NodePtr &node) {
  450. if (node->type() != AVRO_SYMBOLIC) {
  451. throw Exception("Only symbolic nodes may be resolved");
  452. }
  453. std::shared_ptr<NodeSymbolic> symNode = std::static_pointer_cast<NodeSymbolic>(node);
  454. return symNode->getNode();
  455. }
  456. template<typename T>
  457. inline std::string intToHex(T i) {
  458. std::stringstream stream;
  459. stream << "\\u"
  460. << std::setfill('0') << std::setw(sizeof(T))
  461. << std::hex << i;
  462. return stream.str();
  463. }
  464. } // namespace avro
  465. #endif