ValidSchema.cc 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include <cctype>
  19. #include <sstream>
  20. #include <utility>
  21. #include "Node.hh"
  22. #include "Schema.hh"
  23. #include "ValidSchema.hh"
  24. using std::make_pair;
  25. using std::ostringstream;
  26. using std::shared_ptr;
  27. using std::static_pointer_cast;
  28. using std::string;
  29. namespace avro {
  30. using SymbolMap = std::map<Name, NodePtr>;
  31. static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
  32. if (!node->isValid()) {
  33. throw Exception("Schema is invalid, due to bad node of type {}", node->type());
  34. }
  35. if (node->hasName()) {
  36. const Name &nm = node->name();
  37. // FIXME: replace "find" with "lower_bound". The author seems to have intended
  38. // "lower_bound" here because of (1) the check for the contents of the iterator
  39. // that follows and (2) use of the iterator in insert later in the code.
  40. auto it = symbolMap.find(nm);
  41. auto found = it != symbolMap.end() && nm == it->first;
  42. if (node->type() == AVRO_SYMBOLIC) {
  43. if (!found) {
  44. throw Exception("Symbolic name \"{}\" is unknown", node->name());
  45. }
  46. shared_ptr<NodeSymbolic> symNode =
  47. static_pointer_cast<NodeSymbolic>(node);
  48. // if the symbolic link is already resolved, we return true,
  49. // otherwise returning false will force it to be resolved
  50. return symNode->isSet();
  51. }
  52. if (found) {
  53. return false;
  54. }
  55. symbolMap.insert(it, make_pair(nm, node));
  56. }
  57. node->lock();
  58. size_t leaves = node->leaves();
  59. for (size_t i = 0; i < leaves; ++i) {
  60. const NodePtr &leaf(node->leafAt(i));
  61. if (!validate(leaf, symbolMap)) {
  62. // if validate returns false it means a node with this name already
  63. // existed in the map, instead of keeping this node twice in the
  64. // map (which could potentially create circular shared pointer
  65. // links that would not be freed), replace this node with a
  66. // symbolic link to the original one.
  67. node->setLeafToSymbolic(i, symbolMap.find(leaf->name())->second);
  68. }
  69. }
  70. return true;
  71. }
  72. static void validate(const NodePtr &p) {
  73. SymbolMap m;
  74. validate(p, m);
  75. }
  76. ValidSchema::ValidSchema(NodePtr root) : root_(std::move(root)) {
  77. validate(root_);
  78. }
  79. ValidSchema::ValidSchema(const Schema &schema) : root_(schema.root()) {
  80. validate(root_);
  81. }
  82. ValidSchema::ValidSchema() : root_(NullSchema().root()) {
  83. validate(root_);
  84. }
  85. void ValidSchema::setSchema(const Schema &schema) {
  86. root_ = schema.root();
  87. validate(root_);
  88. }
  89. void ValidSchema::toJson(std::ostream &os) const {
  90. root_->printJson(os, 0);
  91. os << '\n';
  92. }
  93. string
  94. ValidSchema::toJson(bool prettyPrint) const {
  95. ostringstream oss;
  96. toJson(oss);
  97. if (!prettyPrint) {
  98. return compactSchema(oss.str());
  99. }
  100. return oss.str();
  101. }
  102. void ValidSchema::toFlatList(std::ostream &os) const {
  103. root_->printBasicInfo(os);
  104. }
  105. /*
  106. * compactSchema compacts and returns a formatted string representation
  107. * of a ValidSchema object by removing the whitespaces outside of the quoted
  108. * field names and values. It can handle the cases where the quoted value is
  109. * in UTF-8 format. Note that this method is not responsible for validating
  110. * the schema.
  111. */
  112. string ValidSchema::compactSchema(const string &schema) {
  113. auto insideQuote = false;
  114. size_t newPos = 0;
  115. string data = schema;
  116. for (auto c : schema) {
  117. if (!insideQuote && std::isspace(c)) {
  118. // Skip the white spaces outside quotes.
  119. continue;
  120. }
  121. if (c == '\"') {
  122. // It is valid for a quote to be part of the value for some fields,
  123. // e.g., the "doc" field. In that case, the quote is expected to be
  124. // escaped inside the schema. Since the escape character '\\' could
  125. // be escaped itself, we need to check whether there are an even
  126. // number of consecutive slashes prior to the quote.
  127. auto leadingSlashes = 0;
  128. for (int i = static_cast<int>(newPos) - 1; i >= 0; i--) {
  129. if (data[i] == '\\') {
  130. leadingSlashes++;
  131. } else {
  132. break;
  133. }
  134. }
  135. if (leadingSlashes % 2 == 0) {
  136. // Found a real quote which identifies either the start or the
  137. // end of a field name or value.
  138. insideQuote = !insideQuote;
  139. }
  140. }
  141. data[newPos++] = c;
  142. }
  143. if (insideQuote) {
  144. throw Exception("Schema is not well formed with mismatched quotes");
  145. }
  146. if (newPos < schema.size()) {
  147. data.resize(newPos);
  148. }
  149. return data;
  150. }
  151. } // namespace avro