ValidSchema.cc 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include <boost/format.hpp>
  19. #include <cctype>
  20. #include <sstream>
  21. #include <utility>
  22. #include "Node.hh"
  23. #include "Schema.hh"
  24. #include "ValidSchema.hh"
  25. using boost::format;
  26. using std::make_pair;
  27. using std::ostringstream;
  28. using std::shared_ptr;
  29. using std::static_pointer_cast;
  30. using std::string;
  31. namespace avro {
  32. using SymbolMap = std::map<Name, NodePtr>;
  33. static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
  34. if (!node->isValid()) {
  35. throw Exception(format("Schema is invalid, due to bad node of type %1%")
  36. % node->type());
  37. }
  38. if (node->hasName()) {
  39. const Name &nm = node->name();
  40. // FIXME: replace "find" with "lower_bound". The author seems to have intended
  41. // "lower_bound" here because of (1) the check for the contents of the iterator
  42. // that follows and (2) use of the iterator in insert later in the code.
  43. auto it = symbolMap.find(nm);
  44. auto found = it != symbolMap.end() && nm == it->first;
  45. if (node->type() == AVRO_SYMBOLIC) {
  46. if (!found) {
  47. throw Exception(format("Symbolic name \"%1%\" is unknown") % node->name());
  48. }
  49. shared_ptr<NodeSymbolic> symNode =
  50. static_pointer_cast<NodeSymbolic>(node);
  51. // if the symbolic link is already resolved, we return true,
  52. // otherwise returning false will force it to be resolved
  53. return symNode->isSet();
  54. }
  55. if (found) {
  56. return false;
  57. }
  58. symbolMap.insert(it, make_pair(nm, node));
  59. }
  60. node->lock();
  61. size_t leaves = node->leaves();
  62. for (size_t i = 0; i < leaves; ++i) {
  63. const NodePtr &leaf(node->leafAt(i));
  64. if (!validate(leaf, symbolMap)) {
  65. // if validate returns false it means a node with this name already
  66. // existed in the map, instead of keeping this node twice in the
  67. // map (which could potentially create circular shared pointer
  68. // links that would not be freed), replace this node with a
  69. // symbolic link to the original one.
  70. node->setLeafToSymbolic(i, symbolMap.find(leaf->name())->second);
  71. }
  72. }
  73. return true;
  74. }
  75. static void validate(const NodePtr &p) {
  76. SymbolMap m;
  77. validate(p, m);
  78. }
  79. ValidSchema::ValidSchema(NodePtr root) : root_(std::move(root)) {
  80. validate(root_);
  81. }
  82. ValidSchema::ValidSchema(const Schema &schema) : root_(schema.root()) {
  83. validate(root_);
  84. }
  85. ValidSchema::ValidSchema() : root_(NullSchema().root()) {
  86. validate(root_);
  87. }
  88. void ValidSchema::setSchema(const Schema &schema) {
  89. root_ = schema.root();
  90. validate(root_);
  91. }
  92. void ValidSchema::toJson(std::ostream &os) const {
  93. root_->printJson(os, 0);
  94. os << '\n';
  95. }
  96. string
  97. ValidSchema::toJson(bool prettyPrint) const {
  98. ostringstream oss;
  99. toJson(oss);
  100. if (!prettyPrint) {
  101. return compactSchema(oss.str());
  102. }
  103. return oss.str();
  104. }
  105. void ValidSchema::toFlatList(std::ostream &os) const {
  106. root_->printBasicInfo(os);
  107. }
  108. /*
  109. * compactSchema compacts and returns a formatted string representation
  110. * of a ValidSchema object by removing the whitespaces outside of the quoted
  111. * field names and values. It can handle the cases where the quoted value is
  112. * in UTF-8 format. Note that this method is not responsible for validating
  113. * the schema.
  114. */
  115. string ValidSchema::compactSchema(const string &schema) {
  116. auto insideQuote = false;
  117. size_t newPos = 0;
  118. string data = schema;
  119. for (auto c : schema) {
  120. if (!insideQuote && std::isspace(c)) {
  121. // Skip the white spaces outside quotes.
  122. continue;
  123. }
  124. if (c == '\"') {
  125. // It is valid for a quote to be part of the value for some fields,
  126. // e.g., the "doc" field. In that case, the quote is expected to be
  127. // escaped inside the schema. Since the escape character '\\' could
  128. // be escaped itself, we need to check whether there are an even
  129. // number of consecutive slashes prior to the quote.
  130. auto leadingSlashes = 0;
  131. for (int i = static_cast<int>(newPos) - 1; i >= 0; i--) {
  132. if (data[i] == '\\') {
  133. leadingSlashes++;
  134. } else {
  135. break;
  136. }
  137. }
  138. if (leadingSlashes % 2 == 0) {
  139. // Found a real quote which identifies either the start or the
  140. // end of a field name or value.
  141. insideQuote = !insideQuote;
  142. }
  143. }
  144. data[newPos++] = c;
  145. }
  146. if (insideQuote) {
  147. throw Exception("Schema is not well formed with mismatched quotes");
  148. }
  149. if (newPos < schema.size()) {
  150. data.resize(newPos);
  151. }
  152. return data;
  153. }
  154. } // namespace avro