123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555 |
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "NodeImpl.hh"
- #include <sstream>
- #include <utility>
- using std::string;
- namespace avro {
- namespace {
- // Escape string for serialization.
- string escape(const string &unescaped) {
- string s;
- s.reserve(unescaped.length());
- for (char c : unescaped) {
- switch (c) {
- case '\\':
- case '"':
- case '/':
- s += '\\';
- s += c;
- break;
- case '\b':
- s += '\\';
- s += 'b';
- break;
- case '\f':
- s += '\f';
- break;
- case '\n':
- s += '\\';
- s += 'n';
- break;
- case '\r':
- s += '\\';
- s += 'r';
- break;
- case '\t':
- s += '\\';
- s += 't';
- break;
- default:
- if (!std::iscntrl(c, std::locale::classic())) {
- s += c;
- continue;
- }
- s += intToHex(static_cast<unsigned int>(c));
- break;
- }
- }
- return s;
- }
- // Wrap an indentation in a struct for ostream operator<<
- struct indent {
- explicit indent(size_t depth) : d(depth) {}
- int d;
- };
- /// ostream operator for indent
- std::ostream &operator<<(std::ostream &os, indent x) {
- static const string spaces(" ");
- while (x.d--) {
- os << spaces;
- }
- return os;
- }
- void printCustomAttributes(const CustomAttributes& customAttributes, int depth,
- std::ostream &os) {
- std::map<std::string, std::string>::const_iterator iter =
- customAttributes.attributes().begin();
- while (iter != customAttributes.attributes().end()) {
- os << ",\n" << indent(depth);
- customAttributes.printJson(os, iter->first);
- ++iter;
- }
- }
- } // anonymous namespace
- const int kByteStringSize = 6;
- SchemaResolution
- NodePrimitive::resolve(const Node &reader) const {
- if (type() == reader.type()) {
- return RESOLVE_MATCH;
- }
- switch (type()) {
- case AVRO_INT:
- if (reader.type() == AVRO_LONG) {
- return RESOLVE_PROMOTABLE_TO_LONG;
- }
- // fall-through intentional
- case AVRO_LONG:
- if (reader.type() == AVRO_FLOAT) {
- return RESOLVE_PROMOTABLE_TO_FLOAT;
- }
- // fall-through intentional
- case AVRO_FLOAT:
- if (reader.type() == AVRO_DOUBLE) {
- return RESOLVE_PROMOTABLE_TO_DOUBLE;
- }
- default: break;
- }
- return furtherResolution(reader);
- }
- SchemaResolution
- NodeRecord::resolve(const Node &reader) const {
- if (reader.type() == AVRO_RECORD) {
- if (name() == reader.name()) {
- return RESOLVE_MATCH;
- }
- }
- return furtherResolution(reader);
- }
- SchemaResolution
- NodeEnum::resolve(const Node &reader) const {
- if (reader.type() == AVRO_ENUM) {
- return (name() == reader.name()) ? RESOLVE_MATCH : RESOLVE_NO_MATCH;
- }
- return furtherResolution(reader);
- }
- SchemaResolution
- NodeArray::resolve(const Node &reader) const {
- if (reader.type() == AVRO_ARRAY) {
- const NodePtr &arrayType = leafAt(0);
- return arrayType->resolve(*reader.leafAt(0));
- }
- return furtherResolution(reader);
- }
- SchemaResolution
- NodeMap::resolve(const Node &reader) const {
- if (reader.type() == AVRO_MAP) {
- const NodePtr &mapType = leafAt(1);
- return mapType->resolve(*reader.leafAt(1));
- }
- return furtherResolution(reader);
- }
- SchemaResolution
- NodeUnion::resolve(const Node &reader) const {
- // If the writer is union, resolution only needs to occur when the selected
- // type of the writer is known, so this function is not very helpful.
- //
- // In this case, this function returns if there is a possible match given
- // any writer type, so just search type by type returning the best match
- // found.
- SchemaResolution match = RESOLVE_NO_MATCH;
- for (size_t i = 0; i < leaves(); ++i) {
- const NodePtr &node = leafAt(i);
- SchemaResolution thisMatch = node->resolve(reader);
- if (thisMatch == RESOLVE_MATCH) {
- match = thisMatch;
- break;
- }
- if (match == RESOLVE_NO_MATCH) {
- match = thisMatch;
- }
- }
- return match;
- }
- SchemaResolution
- NodeFixed::resolve(const Node &reader) const {
- if (reader.type() == AVRO_FIXED) {
- return (
- (reader.fixedSize() == fixedSize()) && (reader.name() == name()))
- ? RESOLVE_MATCH
- : RESOLVE_NO_MATCH;
- }
- return furtherResolution(reader);
- }
- SchemaResolution
- NodeSymbolic::resolve(const Node &reader) const {
- const NodePtr &node = leafAt(0);
- return node->resolve(reader);
- }
- void NodePrimitive::printJson(std::ostream &os, size_t depth) const {
- bool hasLogicalType = logicalType().type() != LogicalType::NONE;
- if (hasLogicalType) {
- os << "{\n"
- << indent(depth) << "\"type\": ";
- }
- os << '\"' << type() << '\"';
- if (hasLogicalType) {
- os << ",\n"
- << indent(depth);
- logicalType().printJson(os);
- os << "\n}";
- }
- if (!getDoc().empty()) {
- os << ",\n"
- << indent(depth) << R"("doc": ")"
- << escape(getDoc()) << "\"";
- }
- }
- void NodeSymbolic::printJson(std::ostream &os, size_t depth) const {
- os << '\"' << nameAttribute_.get() << '\"';
- if (!getDoc().empty()) {
- os << ",\n"
- << indent(depth) << R"("doc": ")"
- << escape(getDoc()) << "\"";
- }
- }
- static void printName(std::ostream &os, const Name &n, size_t depth) {
- if (!n.ns().empty()) {
- os << indent(depth) << R"("namespace": ")" << n.ns() << "\",\n";
- }
- os << indent(depth) << R"("name": ")" << n.simpleName() << "\",\n";
- }
- void NodeRecord::printJson(std::ostream &os, size_t depth) const {
- os << "{\n";
- os << indent(++depth) << "\"type\": \"record\",\n";
- printName(os, nameAttribute_.get(), depth);
- if (!getDoc().empty()) {
- os << indent(depth) << R"("doc": ")"
- << escape(getDoc()) << "\",\n";
- }
- os << indent(depth) << "\"fields\": [";
- size_t fields = leafAttributes_.size();
- ++depth;
- // Serialize "default" field:
- assert(defaultValues.empty() || (defaultValues.size() == fields));
- assert(customAttributes_.size() == 0 || customAttributes_.size() == fields);
- for (size_t i = 0; i < fields; ++i) {
- if (i > 0) {
- os << ',';
- }
- os << '\n'
- << indent(depth) << "{\n";
- os << indent(++depth) << R"("name": ")" << leafNameAttributes_.get(i) << "\",\n";
- os << indent(depth) << "\"type\": ";
- leafAttributes_.get(i)->printJson(os, depth);
- if (!defaultValues.empty()) {
- if (!defaultValues[i].isUnion() && defaultValues[i].type() == AVRO_NULL) {
- // No "default" field.
- } else {
- os << ",\n"
- << indent(depth) << "\"default\": ";
- leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os,
- depth);
- }
- }
- if(customAttributes_.size() == fields) {
- printCustomAttributes(customAttributes_.get(i), depth, os);
- }
- os << '\n';
- os << indent(--depth) << '}';
- }
- os << '\n'
- << indent(--depth) << "]\n";
- os << indent(--depth) << '}';
- }
- void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- assert(isPrimitive(g.type()));
- switch (g.type()) {
- case AVRO_NULL:
- os << "null";
- break;
- case AVRO_BOOL:
- os << (g.value<bool>() ? "true" : "false");
- break;
- case AVRO_INT:
- os << g.value<int32_t>();
- break;
- case AVRO_LONG:
- os << g.value<int64_t>();
- break;
- case AVRO_FLOAT:
- os << g.value<float>();
- break;
- case AVRO_DOUBLE:
- os << g.value<double>();
- break;
- case AVRO_STRING:
- os << "\"" << escape(g.value<string>()) << "\"";
- break;
- case AVRO_BYTES: {
- // Convert to a string:
- const auto &vg = g.value<std::vector<uint8_t>>();
- string s;
- s.resize(vg.size() * kByteStringSize);
- for (unsigned int i = 0; i < vg.size(); i++) {
- string hex_string = intToHex(static_cast<int>(vg[i]));
- s.replace(i * kByteStringSize, kByteStringSize, hex_string);
- }
- os << "\"" << s << "\"";
- } break;
- default: break;
- }
- }
- void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- assert(g.type() == AVRO_ENUM);
- os << "\"" << g.value<GenericEnum>().symbol() << "\"";
- }
- void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- assert(g.type() == AVRO_FIXED);
- // ex: "\uOOff"
- // Convert to a string
- const std::vector<uint8_t> &vg = g.value<GenericFixed>().value();
- string s;
- s.resize(vg.size() * kByteStringSize);
- for (unsigned int i = 0; i < vg.size(); i++) {
- string hex_string = intToHex(static_cast<int>(vg[i]));
- s.replace(i * kByteStringSize, kByteStringSize, hex_string);
- }
- os << "\"" << s << "\"";
- }
- void NodeUnion::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- leafAt(0)->printDefaultToJson(g, os, depth);
- }
- void NodeArray::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- assert(g.type() == AVRO_ARRAY);
- // ex: "default": [1]
- if (g.value<GenericArray>().value().empty()) {
- os << "[]";
- } else {
- os << "[\n";
- depth++;
- // Serialize all values of the array with recursive calls:
- for (unsigned int i = 0; i < g.value<GenericArray>().value().size(); i++) {
- if (i > 0) {
- os << ",\n";
- }
- os << indent(depth);
- leafAt(0)->printDefaultToJson(g.value<GenericArray>().value()[i], os,
- depth);
- }
- os << "\n"
- << indent(--depth) << "]";
- }
- }
- void NodeSymbolic::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- getNode()->printDefaultToJson(g, os, depth);
- }
- void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- assert(g.type() == AVRO_RECORD);
- if (g.value<GenericRecord>().fieldCount() == 0) {
- os << "{}";
- } else {
- os << "{\n";
- // Serialize all fields of the record with recursive calls:
- for (size_t i = 0; i < g.value<GenericRecord>().fieldCount(); i++) {
- if (i == 0) {
- ++depth;
- } else { // i > 0
- os << ",\n";
- }
- os << indent(depth) << "\"";
- assert(i < leaves());
- os << leafNameAttributes_.get(i);
- os << "\": ";
- // Recursive call on child node to be able to get the name attribute
- // (In case of a record we need the name of the leaves (contained in
- // 'this'))
- leafAt(i)->printDefaultToJson(g.value<GenericRecord>().fieldAt(i), os,
- depth);
- }
- os << "\n"
- << indent(--depth) << "}";
- }
- }
- NodeRecord::NodeRecord(const HasName &name,
- const MultiLeaves &fields,
- const LeafNames &fieldsNames,
- std::vector<GenericDatum> dv) : NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, MultiAttributes(), NoSize()),
- defaultValues(std::move(dv)) {
- for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
- if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
- throw Exception(boost::format(
- "Cannot add duplicate field: %1%")
- % leafNameAttributes_.get(i));
- }
- }
- }
- void NodeMap::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
- assert(g.type() == AVRO_MAP);
- if (g.value<GenericMap>().value().empty()) {
- os << "{}";
- } else {
- os << "{\n";
- for (size_t i = 0; i < g.value<GenericMap>().value().size(); i++) {
- if (i == 0) {
- ++depth;
- } else {
- os << ",\n";
- }
- os << indent(depth) << "\"" << g.value<GenericMap>().value()[i].first
- << "\": ";
- leafAt(i)->printDefaultToJson(g.value<GenericMap>().value()[i].second, os,
- depth);
- }
- os << "\n"
- << indent(--depth) << "}";
- }
- }
- void NodeEnum::printJson(std::ostream &os, size_t depth) const {
- os << "{\n";
- os << indent(++depth) << "\"type\": \"enum\",\n";
- if (!getDoc().empty()) {
- os << indent(depth) << R"("doc": ")"
- << escape(getDoc()) << "\",\n";
- }
- printName(os, nameAttribute_.get(), depth);
- os << indent(depth) << "\"symbols\": [\n";
- int names = leafNameAttributes_.size();
- ++depth;
- for (int i = 0; i < names; ++i) {
- if (i > 0) {
- os << ",\n";
- }
- os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
- }
- os << '\n';
- os << indent(--depth) << "]\n";
- os << indent(--depth) << '}';
- }
- void NodeArray::printJson(std::ostream &os, size_t depth) const {
- os << "{\n";
- os << indent(depth + 1) << "\"type\": \"array\",\n";
- if (!getDoc().empty()) {
- os << indent(depth + 1) << R"("doc": ")"
- << escape(getDoc()) << "\",\n";
- }
- os << indent(depth + 1) << "\"items\": ";
- leafAttributes_.get()->printJson(os, depth + 1);
- os << '\n';
- os << indent(depth) << '}';
- }
- void NodeMap::printJson(std::ostream &os, size_t depth) const {
- os << "{\n";
- os << indent(depth + 1) << "\"type\": \"map\",\n";
- if (!getDoc().empty()) {
- os << indent(depth + 1) << R"("doc": ")"
- << escape(getDoc()) << "\",\n";
- }
- os << indent(depth + 1) << "\"values\": ";
- leafAttributes_.get(1)->printJson(os, depth + 1);
- os << '\n';
- os << indent(depth) << '}';
- }
- NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) {
- NodePtr key(new NodePrimitive(AVRO_STRING));
- doAddLeaf(key);
- }
- void NodeUnion::printJson(std::ostream &os, size_t depth) const {
- os << "[\n";
- int fields = leafAttributes_.size();
- ++depth;
- for (int i = 0; i < fields; ++i) {
- if (i > 0) {
- os << ",\n";
- }
- os << indent(depth);
- leafAttributes_.get(i)->printJson(os, depth);
- }
- os << '\n';
- os << indent(--depth) << ']';
- }
- void NodeFixed::printJson(std::ostream &os, size_t depth) const {
- os << "{\n";
- os << indent(++depth) << "\"type\": \"fixed\",\n";
- if (!getDoc().empty()) {
- os << indent(depth) << R"("doc": ")"
- << escape(getDoc()) << "\",\n";
- }
- printName(os, nameAttribute_.get(), depth);
- os << indent(depth) << "\"size\": " << sizeAttribute_.get();
- if (logicalType().type() != LogicalType::NONE) {
- os << ",\n"
- << indent(depth);
- logicalType().printJson(os);
- }
- os << "\n"
- << indent(--depth) << '}';
- }
- } // namespace avro
|