NodeImpl.cc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "NodeImpl.hh"
  19. #include <sstream>
  20. #include <utility>
  21. using std::string;
  22. namespace avro {
  23. namespace {
  24. // Escape string for serialization.
  25. string escape(const string &unescaped) {
  26. string s;
  27. s.reserve(unescaped.length());
  28. for (char c : unescaped) {
  29. switch (c) {
  30. case '\\':
  31. case '"':
  32. case '/':
  33. s += '\\';
  34. s += c;
  35. break;
  36. case '\b':
  37. s += '\\';
  38. s += 'b';
  39. break;
  40. case '\f':
  41. s += '\f';
  42. break;
  43. case '\n':
  44. s += '\\';
  45. s += 'n';
  46. break;
  47. case '\r':
  48. s += '\\';
  49. s += 'r';
  50. break;
  51. case '\t':
  52. s += '\\';
  53. s += 't';
  54. break;
  55. default:
  56. if (!std::iscntrl(c, std::locale::classic())) {
  57. s += c;
  58. continue;
  59. }
  60. s += intToHex(static_cast<unsigned int>(c));
  61. break;
  62. }
  63. }
  64. return s;
  65. }
  66. // Wrap an indentation in a struct for ostream operator<<
  67. struct indent {
  68. explicit indent(size_t depth) : d(depth) {}
  69. int d;
  70. };
  71. /// ostream operator for indent
  72. std::ostream &operator<<(std::ostream &os, indent x) {
  73. static const string spaces(" ");
  74. while (x.d--) {
  75. os << spaces;
  76. }
  77. return os;
  78. }
  79. void printCustomAttributes(const CustomAttributes& customAttributes, int depth,
  80. std::ostream &os) {
  81. std::map<std::string, std::string>::const_iterator iter =
  82. customAttributes.attributes().begin();
  83. while (iter != customAttributes.attributes().end()) {
  84. os << ",\n" << indent(depth);
  85. customAttributes.printJson(os, iter->first);
  86. ++iter;
  87. }
  88. }
  89. } // anonymous namespace
  90. const int kByteStringSize = 6;
  91. SchemaResolution
  92. NodePrimitive::resolve(const Node &reader) const {
  93. if (type() == reader.type()) {
  94. return RESOLVE_MATCH;
  95. }
  96. switch (type()) {
  97. case AVRO_INT:
  98. if (reader.type() == AVRO_LONG) {
  99. return RESOLVE_PROMOTABLE_TO_LONG;
  100. }
  101. // fall-through intentional
  102. case AVRO_LONG:
  103. if (reader.type() == AVRO_FLOAT) {
  104. return RESOLVE_PROMOTABLE_TO_FLOAT;
  105. }
  106. // fall-through intentional
  107. case AVRO_FLOAT:
  108. if (reader.type() == AVRO_DOUBLE) {
  109. return RESOLVE_PROMOTABLE_TO_DOUBLE;
  110. }
  111. default: break;
  112. }
  113. return furtherResolution(reader);
  114. }
  115. SchemaResolution
  116. NodeRecord::resolve(const Node &reader) const {
  117. if (reader.type() == AVRO_RECORD) {
  118. if (name() == reader.name()) {
  119. return RESOLVE_MATCH;
  120. }
  121. }
  122. return furtherResolution(reader);
  123. }
  124. SchemaResolution
  125. NodeEnum::resolve(const Node &reader) const {
  126. if (reader.type() == AVRO_ENUM) {
  127. return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
  128. }
  129. return furtherResolution(reader);
  130. }
  131. SchemaResolution
  132. NodeArray::resolve(const Node &reader) const {
  133. if (reader.type() == AVRO_ARRAY) {
  134. const NodePtr &arrayType = leafAt(0);
  135. return arrayType->resolve(*reader.leafAt(0));
  136. }
  137. return furtherResolution(reader);
  138. }
  139. SchemaResolution
  140. NodeMap::resolve(const Node &reader) const {
  141. if (reader.type() == AVRO_MAP) {
  142. const NodePtr &mapType = leafAt(1);
  143. return mapType->resolve(*reader.leafAt(1));
  144. }
  145. return furtherResolution(reader);
  146. }
  147. SchemaResolution
  148. NodeUnion::resolve(const Node &reader) const {
  149. // If the writer is union, resolution only needs to occur when the selected
  150. // type of the writer is known, so this function is not very helpful.
  151. //
  152. // In this case, this function returns if there is a possible match given
  153. // any writer type, so just search type by type returning the best match
  154. // found.
  155. SchemaResolution match = RESOLVE_NO_MATCH;
  156. for (size_t i = 0; i < leaves(); ++i) {
  157. const NodePtr &node = leafAt(i);
  158. SchemaResolution thisMatch = node->resolve(reader);
  159. if (thisMatch == RESOLVE_MATCH) {
  160. match = thisMatch;
  161. break;
  162. }
  163. if (match == RESOLVE_NO_MATCH) {
  164. match = thisMatch;
  165. }
  166. }
  167. return match;
  168. }
  169. SchemaResolution
  170. NodeFixed::resolve(const Node &reader) const {
  171. if (reader.type() == AVRO_FIXED) {
  172. return (
  173. (reader.fixedSize() == fixedSize()) && (reader.name() == name()))
  174. ? RESOLVE_MATCH
  175. : RESOLVE_NO_MATCH;
  176. }
  177. return furtherResolution(reader);
  178. }
  179. SchemaResolution
  180. NodeSymbolic::resolve(const Node &reader) const {
  181. const NodePtr &node = leafAt(0);
  182. return node->resolve(reader);
  183. }
  184. void NodePrimitive::printJson(std::ostream &os, size_t depth) const {
  185. bool hasLogicalType = logicalType().type() != LogicalType::NONE;
  186. if (hasLogicalType) {
  187. os << "{\n"
  188. << indent(depth) << "\"type\": ";
  189. }
  190. os << '\"' << type() << '\"';
  191. if (hasLogicalType) {
  192. os << ",\n"
  193. << indent(depth);
  194. logicalType().printJson(os);
  195. os << "\n}";
  196. }
  197. if (!getDoc().empty()) {
  198. os << ",\n"
  199. << indent(depth) << R"("doc": ")"
  200. << escape(getDoc()) << "\"";
  201. }
  202. }
  203. void NodeSymbolic::printJson(std::ostream &os, size_t depth) const {
  204. os << '\"' << nameAttribute_.get() << '\"';
  205. if (!getDoc().empty()) {
  206. os << ",\n"
  207. << indent(depth) << R"("doc": ")"
  208. << escape(getDoc()) << "\"";
  209. }
  210. }
  211. static void printName(std::ostream &os, const Name &n, size_t depth) {
  212. if (!n.ns().empty()) {
  213. os << indent(depth) << R"("namespace": ")" << n.ns() << "\",\n";
  214. }
  215. os << indent(depth) << R"("name": ")" << n.simpleName() << "\",\n";
  216. }
  217. void NodeRecord::printJson(std::ostream &os, size_t depth) const {
  218. os << "{\n";
  219. os << indent(++depth) << "\"type\": \"record\",\n";
  220. printName(os, nameAttribute_.get(), depth);
  221. if (!getDoc().empty()) {
  222. os << indent(depth) << R"("doc": ")"
  223. << escape(getDoc()) << "\",\n";
  224. }
  225. os << indent(depth) << "\"fields\": [";
  226. size_t fields = leafAttributes_.size();
  227. ++depth;
  228. // Serialize "default" field:
  229. assert(defaultValues.empty() || (defaultValues.size() == fields));
  230. assert(customAttributes_.size() == 0 || customAttributes_.size() == fields);
  231. for (size_t i = 0; i < fields; ++i) {
  232. if (i > 0) {
  233. os << ',';
  234. }
  235. os << '\n'
  236. << indent(depth) << "{\n";
  237. os << indent(++depth) << R"("name": ")" << leafNameAttributes_.get(i) << "\",\n";
  238. os << indent(depth) << "\"type\": ";
  239. leafAttributes_.get(i)->printJson(os, depth);
  240. if (!defaultValues.empty()) {
  241. if (!defaultValues[i].isUnion() && defaultValues[i].type() == AVRO_NULL) {
  242. // No "default" field.
  243. } else {
  244. os << ",\n"
  245. << indent(depth) << "\"default\": ";
  246. leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os,
  247. depth);
  248. }
  249. }
  250. if(customAttributes_.size() == fields) {
  251. printCustomAttributes(customAttributes_.get(i), depth, os);
  252. }
  253. os << '\n';
  254. os << indent(--depth) << '}';
  255. }
  256. os << '\n'
  257. << indent(--depth) << "]\n";
  258. os << indent(--depth) << '}';
  259. }
  260. void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  261. size_t depth) const {
  262. assert(isPrimitive(g.type()));
  263. switch (g.type()) {
  264. case AVRO_NULL:
  265. os << "null";
  266. break;
  267. case AVRO_BOOL:
  268. os << (g.value<bool>() ? "true" : "false");
  269. break;
  270. case AVRO_INT:
  271. os << g.value<int32_t>();
  272. break;
  273. case AVRO_LONG:
  274. os << g.value<int64_t>();
  275. break;
  276. case AVRO_FLOAT:
  277. os << g.value<float>();
  278. break;
  279. case AVRO_DOUBLE:
  280. os << g.value<double>();
  281. break;
  282. case AVRO_STRING:
  283. os << "\"" << escape(g.value<string>()) << "\"";
  284. break;
  285. case AVRO_BYTES: {
  286. // Convert to a string:
  287. const auto &vg = g.value<std::vector<uint8_t>>();
  288. string s;
  289. s.resize(vg.size() * kByteStringSize);
  290. for (unsigned int i = 0; i < vg.size(); i++) {
  291. string hex_string = intToHex(static_cast<int>(vg[i]));
  292. s.replace(i * kByteStringSize, kByteStringSize, hex_string);
  293. }
  294. os << "\"" << s << "\"";
  295. } break;
  296. default: break;
  297. }
  298. }
  299. void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  300. size_t depth) const {
  301. assert(g.type() == AVRO_ENUM);
  302. os << "\"" << g.value<GenericEnum>().symbol() << "\"";
  303. }
  304. void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  305. size_t depth) const {
  306. assert(g.type() == AVRO_FIXED);
  307. // ex: "\uOOff"
  308. // Convert to a string
  309. const std::vector<uint8_t> &vg = g.value<GenericFixed>().value();
  310. string s;
  311. s.resize(vg.size() * kByteStringSize);
  312. for (unsigned int i = 0; i < vg.size(); i++) {
  313. string hex_string = intToHex(static_cast<int>(vg[i]));
  314. s.replace(i * kByteStringSize, kByteStringSize, hex_string);
  315. }
  316. os << "\"" << s << "\"";
  317. }
  318. void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  319. size_t depth) const {
  320. leafAt(0)->printDefaultToJson(g, os, depth);
  321. }
  322. void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  323. size_t depth) const {
  324. assert(g.type() == AVRO_ARRAY);
  325. // ex: "default": [1]
  326. if (g.value<GenericArray>().value().empty()) {
  327. os << "[]";
  328. } else {
  329. os << "[\n";
  330. depth++;
  331. // Serialize all values of the array with recursive calls:
  332. for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) {
  333. if (i > 0) {
  334. os << ",\n";
  335. }
  336. os << indent(depth);
  337. leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os,
  338. depth);
  339. }
  340. os << "\n"
  341. << indent(--depth) << "]";
  342. }
  343. }
  344. void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  345. size_t depth) const {
  346. getNode()->printDefaultToJson(g, os, depth);
  347. }
  348. void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  349. size_t depth) const {
  350. assert(g.type() == AVRO_RECORD);
  351. if (g.value<GenericRecord>().fieldCount() == 0) {
  352. os << "{}";
  353. } else {
  354. os << "{\n";
  355. // Serialize all fields of the record with recursive calls:
  356. for (size_t i = 0; i < g.value<GenericRecord>().fieldCount(); i++) {
  357. if (i == 0) {
  358. ++depth;
  359. } else { // i > 0
  360. os << ",\n";
  361. }
  362. os << indent(depth) << "\"";
  363. assert(i < leaves());
  364. os << leafNameAttributes_.get(i);
  365. os << "\": ";
  366. // Recursive call on child node to be able to get the name attribute
  367. // (In case of a record we need the name of the leaves (contained in
  368. // 'this'))
  369. leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os,
  370. depth);
  371. }
  372. os << "\n"
  373. << indent(--depth) << "}";
  374. }
  375. }
  376. NodeRecord::NodeRecord(const HasName &name,
  377. const MultiLeaves &fields,
  378. const LeafNames &fieldsNames,
  379. std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, MultiAttributes(), NoSize()),
  380. defaultValues(std::move(dv)) {
  381. for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
  382. if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
  383. throw Exception(boost::format(
  384. "Cannot add duplicate field: %1%")
  385. % leafNameAttributes_.get(i));
  386. }
  387. }
  388. }
  389. void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os,
  390. size_t depth) const {
  391. assert(g.type() == AVRO_MAP);
  392. if (g.value<GenericMap>().value().empty()) {
  393. os << "{}";
  394. } else {
  395. os << "{\n";
  396. for (size_t i = 0; i < g.value<GenericMap>().value().size(); i++) {
  397. if (i == 0) {
  398. ++depth;
  399. } else {
  400. os << ",\n";
  401. }
  402. os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first
  403. << "\": ";
  404. leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, os,
  405. depth);
  406. }
  407. os << "\n"
  408. << indent(--depth) << "}";
  409. }
  410. }
  411. void NodeEnum::printJson(std::ostream &os, size_t depth) const {
  412. os << "{\n";
  413. os << indent(++depth) << "\"type\": \"enum\",\n";
  414. if (!getDoc().empty()) {
  415. os << indent(depth) << R"("doc": ")"
  416. << escape(getDoc()) << "\",\n";
  417. }
  418. printName(os, nameAttribute_.get(), depth);
  419. os << indent(depth) << "\"symbols\": [\n";
  420. int names = leafNameAttributes_.size();
  421. ++depth;
  422. for (int i = 0; i < names; ++i) {
  423. if (i > 0) {
  424. os << ",\n";
  425. }
  426. os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
  427. }
  428. os << '\n';
  429. os << indent(--depth) << "]\n";
  430. os << indent(--depth) << '}';
  431. }
  432. void NodeArray::printJson(std::ostream &os, size_t depth) const {
  433. os << "{\n";
  434. os << indent(depth + 1) << "\"type\": \"array\",\n";
  435. if (!getDoc().empty()) {
  436. os << indent(depth + 1) << R"("doc": ")"
  437. << escape(getDoc()) << "\",\n";
  438. }
  439. os << indent(depth + 1) << "\"items\": ";
  440. leafAttributes_.get()->printJson(os, depth + 1);
  441. os << '\n';
  442. os << indent(depth) << '}';
  443. }
  444. void NodeMap::printJson(std::ostream &os, size_t depth) const {
  445. os << "{\n";
  446. os << indent(depth + 1) << "\"type\": \"map\",\n";
  447. if (!getDoc().empty()) {
  448. os << indent(depth + 1) << R"("doc": ")"
  449. << escape(getDoc()) << "\",\n";
  450. }
  451. os << indent(depth + 1) << "\"values\": ";
  452. leafAttributes_.get(1)->printJson(os, depth + 1);
  453. os << '\n';
  454. os << indent(depth) << '}';
  455. }
  456. NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) {
  457. NodePtr key(new NodePrimitive(AVRO_STRING));
  458. doAddLeaf(key);
  459. }
  460. void NodeUnion::printJson(std::ostream &os, size_t depth) const {
  461. os << "[\n";
  462. int fields = leafAttributes_.size();
  463. ++depth;
  464. for (int i = 0; i < fields; ++i) {
  465. if (i > 0) {
  466. os << ",\n";
  467. }
  468. os << indent(depth);
  469. leafAttributes_.get(i)->printJson(os, depth);
  470. }
  471. os << '\n';
  472. os << indent(--depth) << ']';
  473. }
  474. void NodeFixed::printJson(std::ostream &os, size_t depth) const {
  475. os << "{\n";
  476. os << indent(++depth) << "\"type\": \"fixed\",\n";
  477. if (!getDoc().empty()) {
  478. os << indent(depth) << R"("doc": ")"
  479. << escape(getDoc()) << "\",\n";
  480. }
  481. printName(os, nameAttribute_.get(), depth);
  482. os << indent(depth) << "\"size\": " << sizeAttribute_.get();
  483. if (logicalType().type() != LogicalType::NONE) {
  484. os << ",\n"
  485. << indent(depth);
  486. logicalType().printJson(os);
  487. }
  488. os << "\n"
  489. << indent(--depth) << '}';
  490. }
  491. } // namespace avro