123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include <boost/format.hpp>
- #include <cctype>
- #include <sstream>
- #include <utility>
- #include "Node.hh"
- #include "Schema.hh"
- #include "ValidSchema.hh"
- using boost::format;
- using std::make_pair;
- using std::ostringstream;
- using std::shared_ptr;
- using std::static_pointer_cast;
- using std::string;
- namespace avro {
- using SymbolMap = std::map<Name, NodePtr>;
- static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
- if (!node->isValid()) {
- throw Exception(format("Schema is invalid, due to bad node of type %1%")
- % node->type());
- }
- if (node->hasName()) {
- const Name &nm = node->name();
- // FIXME: replace "find" with "lower_bound". The author seems to have intended
- // "lower_bound" here because of (1) the check for the contents of the iterator
- // that follows and (2) use of the iterator in insert later in the code.
- auto it = symbolMap.find(nm);
- auto found = it != symbolMap.end() && nm == it->first;
- if (node->type() == AVRO_SYMBOLIC) {
- if (!found) {
- throw Exception(format("Symbolic name \"%1%\" is unknown") % node->name());
- }
- shared_ptr<NodeSymbolic> symNode =
- static_pointer_cast<NodeSymbolic>(node);
- // if the symbolic link is already resolved, we return true,
- // otherwise returning false will force it to be resolved
- return symNode->isSet();
- }
- if (found) {
- return false;
- }
- symbolMap.insert(it, make_pair(nm, node));
- }
- node->lock();
- size_t leaves = node->leaves();
- for (size_t i = 0; i < leaves; ++i) {
- const NodePtr &leaf(node->leafAt(i));
- if (!validate(leaf, symbolMap)) {
- // if validate returns false it means a node with this name already
- // existed in the map, instead of keeping this node twice in the
- // map (which could potentially create circular shared pointer
- // links that would not be freed), replace this node with a
- // symbolic link to the original one.
- node->setLeafToSymbolic(i, symbolMap.find(leaf->name())->second);
- }
- }
- return true;
- }
- static void validate(const NodePtr &p) {
- SymbolMap m;
- validate(p, m);
- }
- ValidSchema::ValidSchema(NodePtr root) : root_(std::move(root)) {
- validate(root_);
- }
- ValidSchema::ValidSchema(const Schema &schema) : root_(schema.root()) {
- validate(root_);
- }
- ValidSchema::ValidSchema() : root_(NullSchema().root()) {
- validate(root_);
- }
- void ValidSchema::setSchema(const Schema &schema) {
- root_ = schema.root();
- validate(root_);
- }
- void ValidSchema::toJson(std::ostream &os) const {
- root_->printJson(os, 0);
- os << '\n';
- }
- string
- ValidSchema::toJson(bool prettyPrint) const {
- ostringstream oss;
- toJson(oss);
- if (!prettyPrint) {
- return compactSchema(oss.str());
- }
- return oss.str();
- }
- void ValidSchema::toFlatList(std::ostream &os) const {
- root_->printBasicInfo(os);
- }
- /*
- * compactSchema compacts and returns a formatted string representation
- * of a ValidSchema object by removing the whitespaces outside of the quoted
- * field names and values. It can handle the cases where the quoted value is
- * in UTF-8 format. Note that this method is not responsible for validating
- * the schema.
- */
- string ValidSchema::compactSchema(const string &schema) {
- auto insideQuote = false;
- size_t newPos = 0;
- string data = schema;
- for (auto c : schema) {
- if (!insideQuote && std::isspace(c)) {
- // Skip the white spaces outside quotes.
- continue;
- }
- if (c == '\"') {
- // It is valid for a quote to be part of the value for some fields,
- // e.g., the "doc" field. In that case, the quote is expected to be
- // escaped inside the schema. Since the escape character '\\' could
- // be escaped itself, we need to check whether there are an even
- // number of consecutive slashes prior to the quote.
- auto leadingSlashes = 0;
- for (int i = static_cast<int>(newPos) - 1; i >= 0; i--) {
- if (data[i] == '\\') {
- leadingSlashes++;
- } else {
- break;
- }
- }
- if (leadingSlashes % 2 == 0) {
- // Found a real quote which identifies either the start or the
- // end of a field name or value.
- insideQuote = !insideQuote;
- }
- }
- data[newPos++] = c;
- }
- if (insideQuote) {
- throw Exception("Schema is not well formed with mismatched quotes");
- }
- if (newPos < schema.size()) {
- data.resize(newPos);
- }
- return data;
- }
- } // namespace avro
|