NodeImpl.cc 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "NodeImpl.hh"
  19. #include <sstream>
  20. #include <utility>
  21. using std::string;
  22. namespace avro {
  23. namespace {
  24. // Escape string for serialization.
  25. string escape(const string &unescaped) {
  26. string s;
  27. s.reserve(unescaped.length());
  28. for (char c : unescaped) {
  29. switch (c) {
  30. case '\\':
  31. case '"':
  32. case '/':
  33. s += '\\';
  34. s += c;
  35. break;
  36. case '\b':
  37. s += '\\';
  38. s += 'b';
  39. break;
  40. case '\f':
  41. s += '\f';
  42. break;
  43. case '\n':
  44. s += '\\';
  45. s += 'n';
  46. break;
  47. case '\r':
  48. s += '\\';
  49. s += 'r';
  50. break;
  51. case '\t':
  52. s += '\\';
  53. s += 't';
  54. break;
  55. default:
  56. if (!std::iscntrl(c, std::locale::classic())) {
  57. s += c;
  58. continue;
  59. }
  60. s += intToHex(static_cast<unsigned int>(c));
  61. break;
  62. }
  63. }
  64. return s;
  65. }
  66. // Wrap an indentation in a struct for ostream operator<<
  67. struct indent {
  68. explicit indent(size_t depth) : d(depth) {}
  69. size_t d;
  70. };
  71. /// ostream operator for indent
  72. std::ostream &operator<<(std::ostream &os, indent x) {
  73. static const string spaces(" ");
  74. while (x.d--) {
  75. os << spaces;
  76. }
  77. return os;
  78. }
  79. void printCustomAttributes(const CustomAttributes &customAttributes, size_t depth,
  80. std::ostream &os) {
  81. std::map<std::string, std::string>::const_iterator iter =
  82. customAttributes.attributes().begin();
  83. while (iter != customAttributes.attributes().end()) {
  84. os << ",\n"
  85. << indent(depth);
  86. customAttributes.printJson(os, iter->first);
  87. ++iter;
  88. }
  89. }
  90. } // anonymous namespace
  91. const int kByteStringSize = 6;
  92. SchemaResolution
  93. NodePrimitive::resolve(const Node &reader) const {
  94. if (type() == reader.type()) {
  95. return RESOLVE_MATCH;
  96. }
  97. switch (type()) {
  98. case AVRO_INT:
  99. if (reader.type() == AVRO_LONG) {
  100. return RESOLVE_PROMOTABLE_TO_LONG;
  101. }
  102. [[fallthrough]];
  103. case AVRO_LONG:
  104. if (reader.type() == AVRO_FLOAT) {
  105. return RESOLVE_PROMOTABLE_TO_FLOAT;
  106. }
  107. [[fallthrough]];
  108. case AVRO_FLOAT:
  109. if (reader.type() == AVRO_DOUBLE) {
  110. return RESOLVE_PROMOTABLE_TO_DOUBLE;
  111. }
  112. default: break;
  113. }
  114. return furtherResolution(reader);
  115. }
  116. SchemaResolution
  117. NodeRecord::resolve(const Node &reader) const {
  118. if (reader.type() == AVRO_RECORD) {
  119. if (name() == reader.name()) {
  120. return RESOLVE_MATCH;
  121. }
  122. }
  123. return furtherResolution(reader);
  124. }
  125. SchemaResolution
  126. NodeEnum::resolve(const Node &reader) const {
  127. if (reader.type() == AVRO_ENUM) {
  128. return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
  129. }
  130. return furtherResolution(reader);
  131. }
  132. SchemaResolution
  133. NodeArray::resolve(const Node &reader) const {
  134. if (reader.type() == AVRO_ARRAY) {
  135. const NodePtr &arrayType = leafAt(0);
  136. return arrayType->resolve(*reader.leafAt(0));
  137. }
  138. return furtherResolution(reader);
  139. }
  140. SchemaResolution
  141. NodeMap::resolve(const Node &reader) const {
  142. if (reader.type() == AVRO_MAP) {
  143. const NodePtr &mapType = leafAt(1);
  144. return mapType->resolve(*reader.leafAt(1));
  145. }
  146. return furtherResolution(reader);
  147. }
  148. SchemaResolution
  149. NodeUnion::resolve(const Node &reader) const {
  150. // If the writer is union, resolution only needs to occur when the selected
  151. // type of the writer is known, so this function is not very helpful.
  152. //
  153. // In this case, this function returns if there is a possible match given
  154. // any writer type, so just search type by type returning the best match
  155. // found.
  156. SchemaResolution match = RESOLVE_NO_MATCH;
  157. for (size_t i = 0; i < leaves(); ++i) {
  158. const NodePtr &node = leafAt(i);
  159. SchemaResolution thisMatch = node->resolve(reader);
  160. if (thisMatch == RESOLVE_MATCH) {
  161. match = thisMatch;
  162. break;
  163. }
  164. if (match == RESOLVE_NO_MATCH) {
  165. match = thisMatch;
  166. }
  167. }
  168. return match;
  169. }
  170. SchemaResolution
  171. NodeFixed::resolve(const Node &reader) const {
  172. if (reader.type() == AVRO_FIXED) {
  173. return (
  174. (reader.fixedSize() == fixedSize()) && (reader.name() == name()))
  175. ? RESOLVE_MATCH
  176. : RESOLVE_NO_MATCH;
  177. }
  178. return furtherResolution(reader);
  179. }
  180. SchemaResolution
  181. NodeSymbolic::resolve(const Node &reader) const {
  182. const NodePtr &node = leafAt(0);
  183. return node->resolve(reader);
  184. }
  185. void NodePrimitive::printJson(std::ostream &os, size_t depth) const {
  186. bool hasLogicalType = logicalType().type() != LogicalType::NONE;
  187. if (hasLogicalType) {
  188. os << "{\n"
  189. << indent(depth) << "\"type\": ";
  190. }
  191. os << '\"' << type() << '\"';
  192. if (hasLogicalType) {
  193. os << ",\n"
  194. << indent(depth);
  195. logicalType().printJson(os);
  196. os << "\n}";
  197. }
  198. if (!getDoc().empty()) {
  199. os << ",\n"
  200. << indent(depth) << R"("doc": ")"
  201. << escape(getDoc()) << "\"";
  202. }
  203. }
  204. void NodeSymbolic::printJson(std::ostream &os, size_t depth) const {
  205. os << '\"' << nameAttribute_.get() << '\"';
  206. if (!getDoc().empty()) {
  207. os << ",\n"
  208. << indent(depth) << R"("doc": ")"
  209. << escape(getDoc()) << "\"";
  210. }
  211. }
  212. static void printName(std::ostream &os, const Name &n, size_t depth) {
  213. if (!n.ns().empty()) {
  214. os << indent(depth) << R"("namespace": ")" << n.ns() << "\",\n";
  215. }
  216. os << indent(depth) << R"("name": ")" << n.simpleName() << "\",\n";
  217. }
  218. void NodeRecord::printJson(std::ostream &os, size_t depth) const {
  219. os << "{\n";
  220. os << indent(++depth) << "\"type\": \"record\",\n";
  221. const Name &name = nameAttribute_.get();
  222. printName(os, name, depth);
  223. const auto &aliases = name.aliases();
  224. if (!aliases.empty()) {
  225. os << indent(depth) << "\"aliases\": [";
  226. ++depth;
  227. for (size_t i = 0; i < aliases.size(); ++i) {
  228. if (i > 0) {
  229. os << ',';
  230. }
  231. os << '\n'
  232. << indent(depth) << "\"" << aliases[i] << "\"";
  233. }
  234. os << '\n'
  235. << indent(--depth) << "]\n";
  236. }
  237. if (!getDoc().empty()) {
  238. os << indent(depth) << R"("doc": ")"
  239. << escape(getDoc()) << "\",\n";
  240. }
  241. os << indent(depth) << "\"fields\": [";
  242. size_t fields = leafAttributes_.size();
  243. ++depth;
  244. assert(fieldsAliases_.empty() || (fieldsAliases_.size() == fields));
  245. assert(fieldsDefaultValues_.empty() || (fieldsDefaultValues_.size() == fields));
  246. assert(customAttributes_.size() == 0 || customAttributes_.size() == fields);
  247. for (size_t i = 0; i < fields; ++i) {
  248. if (i > 0) {
  249. os << ',';
  250. }
  251. os << '\n'
  252. << indent(depth) << "{\n";
  253. os << indent(++depth) << R"("name": ")" << leafNameAttributes_.get(i) << "\",\n";
  254. os << indent(depth) << "\"type\": ";
  255. leafAttributes_.get(i)->printJson(os, depth);
  256. if (!fieldsAliases_.empty() && !fieldsAliases_[i].empty()) {
  257. os << ",\n"
  258. << indent(depth) << "\"aliases\": [";
  259. ++depth;
  260. for (size_t j = 0; j < fieldsAliases_[i].size(); ++j) {
  261. if (j > 0) {
  262. os << ',';
  263. }
  264. os << '\n'
  265. << indent(depth) << "\"" << fieldsAliases_[i][j] << "\"";
  266. }
  267. os << '\n'
  268. << indent(--depth) << ']';
  269. }
  270. // Serialize "default" field:
  271. if (!fieldsDefaultValues_.empty()) {
  272. if (!fieldsDefaultValues_[i].isUnion() && fieldsDefaultValues_[i].type() == AVRO_NULL) {
  273. // No "default" field.
  274. } else {
  275. os << ",\n"
  276. << indent(depth) << "\"default\": ";
  277. leafAttributes_.get(i)->printDefaultToJson(fieldsDefaultValues_[i], os,
  278. depth);
  279. }
  280. }
  281. if (customAttributes_.size() == fields) {
  282. printCustomAttributes(customAttributes_.get(i), depth, os);
  283. }
  284. os << '\n';
  285. os << indent(--depth) << '}';
  286. }
  287. os << '\n'
  288. << indent(--depth) << "]\n";
  289. os << indent(--depth) << '}';
  290. }
  291. void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  292. size_t) const {
  293. assert(isPrimitive(g.type()));
  294. switch (g.type()) {
  295. case AVRO_NULL:
  296. os << "null";
  297. break;
  298. case AVRO_BOOL:
  299. os << (g.value<bool>() ? "true" : "false");
  300. break;
  301. case AVRO_INT:
  302. os << g.value<int32_t>();
  303. break;
  304. case AVRO_LONG:
  305. os << g.value<int64_t>();
  306. break;
  307. case AVRO_FLOAT:
  308. os << g.value<float>();
  309. break;
  310. case AVRO_DOUBLE:
  311. os << g.value<double>();
  312. break;
  313. case AVRO_STRING:
  314. os << "\"" << escape(g.value<string>()) << "\"";
  315. break;
  316. case AVRO_BYTES: {
  317. // Convert to a string:
  318. const auto &vg = g.value<std::vector<uint8_t>>();
  319. string s;
  320. s.resize(vg.size() * kByteStringSize);
  321. for (unsigned int i = 0; i < vg.size(); i++) {
  322. string hex_string = intToHex(static_cast<int>(vg[i]));
  323. s.replace(i * kByteStringSize, kByteStringSize, hex_string);
  324. }
  325. os << "\"" << s << "\"";
  326. } break;
  327. default: break;
  328. }
  329. }
  330. void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  331. size_t) const {
  332. assert(g.type() == AVRO_ENUM);
  333. os << "\"" << g.value<GenericEnum>().symbol() << "\"";
  334. }
  335. void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  336. size_t) const {
  337. assert(g.type() == AVRO_FIXED);
  338. // ex: "\uOOff"
  339. // Convert to a string
  340. const std::vector<uint8_t> &vg = g.value<GenericFixed>().value();
  341. string s;
  342. s.resize(vg.size() * kByteStringSize);
  343. for (unsigned int i = 0; i < vg.size(); i++) {
  344. string hex_string = intToHex(static_cast<int>(vg[i]));
  345. s.replace(i * kByteStringSize, kByteStringSize, hex_string);
  346. }
  347. os << "\"" << s << "\"";
  348. }
  349. void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  350. size_t depth) const {
  351. leafAt(0)->printDefaultToJson(g, os, depth);
  352. }
  353. void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  354. size_t depth) const {
  355. assert(g.type() == AVRO_ARRAY);
  356. // ex: "default": [1]
  357. if (g.value<GenericArray>().value().empty()) {
  358. os << "[]";
  359. } else {
  360. os << "[\n";
  361. depth++;
  362. // Serialize all values of the array with recursive calls:
  363. for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) {
  364. if (i > 0) {
  365. os << ",\n";
  366. }
  367. os << indent(depth);
  368. leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os,
  369. depth);
  370. }
  371. os << "\n"
  372. << indent(--depth) << "]";
  373. }
  374. }
  375. void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  376. size_t depth) const {
  377. getNode()->printDefaultToJson(g, os, depth);
  378. }
  379. void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  380. size_t depth) const {
  381. assert(g.type() == AVRO_RECORD);
  382. if (g.value<GenericRecord>().fieldCount() == 0) {
  383. os << "{}";
  384. } else {
  385. os << "{\n";
  386. // Serialize all fields of the record with recursive calls:
  387. for (size_t i = 0; i < g.value<GenericRecord>().fieldCount(); i++) {
  388. if (i == 0) {
  389. ++depth;
  390. } else { // i > 0
  391. os << ",\n";
  392. }
  393. os << indent(depth) << "\"";
  394. assert(i < leaves());
  395. os << leafNameAttributes_.get(i);
  396. os << "\": ";
  397. // Recursive call on child node to be able to get the name attribute
  398. // (In case of a record we need the name of the leaves (contained in
  399. // 'this'))
  400. leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os,
  401. depth);
  402. }
  403. os << "\n"
  404. << indent(--depth) << "}";
  405. }
  406. }
  407. NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields,
  408. const LeafNames &fieldsNames, std::vector<GenericDatum> dv)
  409. : NodeRecord(name, HasDoc(), fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {}
  410. NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
  411. const LeafNames &fieldsNames, std::vector<GenericDatum> dv)
  412. : NodeRecord(name, doc, fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {}
  413. NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields,
  414. const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
  415. std::vector<GenericDatum> dv, const MultiAttributes &customAttributes)
  416. : NodeRecord(name, HasDoc(), fields, fieldsNames, std::move(fieldsAliases), std::move(dv), customAttributes) {}
  417. NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
  418. const LeafNames &fieldsNames, std::vector<std::vector<std::string>> fieldsAliases,
  419. std::vector<GenericDatum> dv, const MultiAttributes &customAttributes)
  420. : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, customAttributes, NoSize()),
  421. fieldsAliases_(std::move(fieldsAliases)),
  422. fieldsDefaultValues_(std::move(dv)) {
  423. for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
  424. if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
  425. throw Exception("Cannot add duplicate field: {}", leafNameAttributes_.get(i));
  426. }
  427. if (!fieldsAliases_.empty()) {
  428. for (const auto &alias : fieldsAliases_[i]) {
  429. if (!nameIndex_.add(alias, i)) {
  430. throw Exception("Cannot add duplicate field: {}", alias);
  431. }
  432. }
  433. }
  434. }
  435. }
  436. void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  437. size_t depth) const {
  438. assert(g.type() == AVRO_MAP);
  439. if (g.value<GenericMap>().value().empty()) {
  440. os << "{}";
  441. } else {
  442. os << "{\n";
  443. for (size_t i = 0; i < g.value<GenericMap>().value().size(); i++) {
  444. if (i == 0) {
  445. ++depth;
  446. } else {
  447. os << ",\n";
  448. }
  449. os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first
  450. << "\": ";
  451. leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, os,
  452. depth);
  453. }
  454. os << "\n"
  455. << indent(--depth) << "}";
  456. }
  457. }
  458. void NodeEnum::printJson(std::ostream &os, size_t depth) const {
  459. os << "{\n";
  460. os << indent(++depth) << "\"type\": \"enum\",\n";
  461. if (!getDoc().empty()) {
  462. os << indent(depth) << R"("doc": ")"
  463. << escape(getDoc()) << "\",\n";
  464. }
  465. printName(os, nameAttribute_.get(), depth);
  466. os << indent(depth) << "\"symbols\": [\n";
  467. auto names = leafNameAttributes_.size();
  468. ++depth;
  469. for (size_t i = 0; i < names; ++i) {
  470. if (i > 0) {
  471. os << ",\n";
  472. }
  473. os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
  474. }
  475. os << '\n';
  476. os << indent(--depth) << "]\n";
  477. os << indent(--depth) << '}';
  478. }
  479. void NodeArray::printJson(std::ostream &os, size_t depth) const {
  480. os << "{\n";
  481. os << indent(depth + 1) << "\"type\": \"array\",\n";
  482. if (!getDoc().empty()) {
  483. os << indent(depth + 1) << R"("doc": ")"
  484. << escape(getDoc()) << "\",\n";
  485. }
  486. os << indent(depth + 1) << "\"items\": ";
  487. leafAttributes_.get()->printJson(os, depth + 1);
  488. os << '\n';
  489. os << indent(depth) << '}';
  490. }
  491. void NodeMap::printJson(std::ostream &os, size_t depth) const {
  492. os << "{\n";
  493. os << indent(depth + 1) << "\"type\": \"map\",\n";
  494. if (!getDoc().empty()) {
  495. os << indent(depth + 1) << R"("doc": ")"
  496. << escape(getDoc()) << "\",\n";
  497. }
  498. os << indent(depth + 1) << "\"values\": ";
  499. leafAttributes_.get(1)->printJson(os, depth + 1);
  500. os << '\n';
  501. os << indent(depth) << '}';
  502. }
  503. NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) {
  504. NodePtr key(new NodePrimitive(AVRO_STRING));
  505. doAddLeaf(key);
  506. }
  507. void NodeUnion::printJson(std::ostream &os, size_t depth) const {
  508. os << "[\n";
  509. auto fields = leafAttributes_.size();
  510. ++depth;
  511. for (size_t i = 0; i < fields; ++i) {
  512. if (i > 0) {
  513. os << ",\n";
  514. }
  515. os << indent(depth);
  516. leafAttributes_.get(i)->printJson(os, depth);
  517. }
  518. os << '\n';
  519. os << indent(--depth) << ']';
  520. }
  521. void NodeFixed::printJson(std::ostream &os, size_t depth) const {
  522. os << "{\n";
  523. os << indent(++depth) << "\"type\": \"fixed\",\n";
  524. if (!getDoc().empty()) {
  525. os << indent(depth) << R"("doc": ")"
  526. << escape(getDoc()) << "\",\n";
  527. }
  528. printName(os, nameAttribute_.get(), depth);
  529. os << indent(depth) << "\"size\": " << sizeAttribute_.get();
  530. if (logicalType().type() != LogicalType::NONE) {
  531. os << ",\n"
  532. << indent(depth);
  533. logicalType().printJson(os);
  534. }
  535. os << "\n"
  536. << indent(--depth) << '}';
  537. }
  538. } // namespace avro