123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535 |
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include "ValidatingCodec.hh"
- #include <algorithm>
- #include <boost/any.hpp>
- #include <map>
- #include <memory>
- #include <utility>
- #include "Decoder.hh"
- #include "Encoder.hh"
- #include "NodeImpl.hh"
- #include "ValidSchema.hh"
- namespace avro {
- using std::make_shared;
- namespace parsing {
- using std::shared_ptr;
- using std::static_pointer_cast;
- using std::map;
- using std::ostringstream;
- using std::pair;
- using std::reverse;
- using std::string;
- using std::vector;
- /** Follows the design of Avro Parser in Java. */
- ProductionPtr ValidatingGrammarGenerator::generate(const NodePtr &n) {
- map<NodePtr, ProductionPtr> m;
- ProductionPtr result = doGenerate(n, m);
- fixup(result, m);
- return result;
- }
- Symbol ValidatingGrammarGenerator::generate(const ValidSchema &schema) {
- ProductionPtr r = generate(schema.root());
- return Symbol::rootSymbol(r);
- }
- ProductionPtr ValidatingGrammarGenerator::doGenerate(const NodePtr &n,
- map<NodePtr, ProductionPtr> &m) {
- switch (n->type()) {
- case AVRO_NULL:
- return make_shared<Production>(1, Symbol::nullSymbol());
- case AVRO_BOOL:
- return make_shared<Production>(1, Symbol::boolSymbol());
- case AVRO_INT:
- return make_shared<Production>(1, Symbol::intSymbol());
- case AVRO_LONG:
- return make_shared<Production>(1, Symbol::longSymbol());
- case AVRO_FLOAT:
- return make_shared<Production>(1, Symbol::floatSymbol());
- case AVRO_DOUBLE:
- return make_shared<Production>(1, Symbol::doubleSymbol());
- case AVRO_STRING:
- return make_shared<Production>(1, Symbol::stringSymbol());
- case AVRO_BYTES:
- return make_shared<Production>(1, Symbol::bytesSymbol());
- case AVRO_FIXED: {
- ProductionPtr result = make_shared<Production>();
- result->push_back(Symbol::sizeCheckSymbol(n->fixedSize()));
- result->push_back(Symbol::fixedSymbol());
- m[n] = result;
- return result;
- }
- case AVRO_RECORD: {
- ProductionPtr result = make_shared<Production>();
- m.erase(n);
- size_t c = n->leaves();
- for (size_t i = 0; i < c; ++i) {
- const NodePtr &leaf = n->leafAt(i);
- ProductionPtr v = doGenerate(leaf, m);
- copy(v->rbegin(), v->rend(), back_inserter(*result));
- }
- reverse(result->begin(), result->end());
- m[n] = result;
- return make_shared<Production>(1, Symbol::indirect(result));
- }
- case AVRO_ENUM: {
- ProductionPtr result = make_shared<Production>();
- result->push_back(Symbol::sizeCheckSymbol(n->names()));
- result->push_back(Symbol::enumSymbol());
- m[n] = result;
- return result;
- }
- case AVRO_ARRAY: {
- ProductionPtr result = make_shared<Production>();
- result->push_back(Symbol::arrayEndSymbol());
- result->push_back(Symbol::repeater(doGenerate(n->leafAt(0), m), true));
- result->push_back(Symbol::arrayStartSymbol());
- return result;
- }
- case AVRO_MAP: {
- ProductionPtr pp = doGenerate(n->leafAt(1), m);
- ProductionPtr v(new Production(*pp));
- v->push_back(Symbol::stringSymbol());
- ProductionPtr result = make_shared<Production>();
- result->push_back(Symbol::mapEndSymbol());
- result->push_back(Symbol::repeater(v, false));
- result->push_back(Symbol::mapStartSymbol());
- return result;
- }
- case AVRO_UNION: {
- vector<ProductionPtr> vv;
- size_t c = n->leaves();
- vv.reserve(c);
- for (size_t i = 0; i < c; ++i) {
- vv.push_back(doGenerate(n->leafAt(i), m));
- }
- ProductionPtr result = make_shared<Production>();
- result->push_back(Symbol::alternative(vv));
- result->push_back(Symbol::unionSymbol());
- return result;
- }
- case AVRO_SYMBOLIC: {
- shared_ptr<NodeSymbolic> ns = static_pointer_cast<NodeSymbolic>(n);
- NodePtr nn = ns->getNode();
- auto it = m.find(nn);
- if (it != m.end() && it->second) {
- return it->second;
- } else {
- m[nn] = ProductionPtr();
- return make_shared<Production>(1, Symbol::placeholder(nn));
- }
- }
- default:
- throw Exception("Unknown node type");
- }
- }
- struct DummyHandler {
- static size_t handle(const Symbol &) {
- return 0;
- }
- };
- template<typename P>
- class ValidatingDecoder : public Decoder {
- const shared_ptr<Decoder> base;
- DummyHandler handler_;
- P parser;
- void init(InputStream &is) final;
- void decodeNull() final;
- bool decodeBool() final;
- int32_t decodeInt() final;
- int64_t decodeLong() final;
- float decodeFloat() final;
- double decodeDouble() final;
- void decodeString(string &value) final;
- void skipString() final;
- void decodeBytes(vector<uint8_t> &value) final;
- void skipBytes() final;
- void decodeFixed(size_t n, vector<uint8_t> &value) final;
- void skipFixed(size_t n) final;
- size_t decodeEnum() final;
- size_t arrayStart() final;
- size_t arrayNext() final;
- size_t skipArray() final;
- size_t mapStart() final;
- size_t mapNext() final;
- size_t skipMap() final;
- size_t decodeUnionIndex() final;
- void drain() final {
- base->drain();
- }
- public:
- ValidatingDecoder(const ValidSchema &s, const shared_ptr<Decoder> &b) : base(b),
- parser(ValidatingGrammarGenerator().generate(s), NULL, handler_) {}
- };
- template<typename P>
- void ValidatingDecoder<P>::init(InputStream &is) {
- base->init(is);
- }
- template<typename P>
- void ValidatingDecoder<P>::decodeNull() {
- parser.advance(Symbol::Kind::Null);
- base->decodeNull();
- }
- template<typename P>
- bool ValidatingDecoder<P>::decodeBool() {
- parser.advance(Symbol::Kind::Bool);
- return base->decodeBool();
- }
- template<typename P>
- int32_t ValidatingDecoder<P>::decodeInt() {
- parser.advance(Symbol::Kind::Int);
- return base->decodeInt();
- }
- template<typename P>
- int64_t ValidatingDecoder<P>::decodeLong() {
- parser.advance(Symbol::Kind::Long);
- return base->decodeLong();
- }
- template<typename P>
- float ValidatingDecoder<P>::decodeFloat() {
- parser.advance(Symbol::Kind::Float);
- return base->decodeFloat();
- }
- template<typename P>
- double ValidatingDecoder<P>::decodeDouble() {
- parser.advance(Symbol::Kind::Double);
- return base->decodeDouble();
- }
- template<typename P>
- void ValidatingDecoder<P>::decodeString(string &value) {
- parser.advance(Symbol::Kind::String);
- base->decodeString(value);
- }
- template<typename P>
- void ValidatingDecoder<P>::skipString() {
- parser.advance(Symbol::Kind::String);
- base->skipString();
- }
- template<typename P>
- void ValidatingDecoder<P>::decodeBytes(vector<uint8_t> &value) {
- parser.advance(Symbol::Kind::Bytes);
- base->decodeBytes(value);
- }
- template<typename P>
- void ValidatingDecoder<P>::skipBytes() {
- parser.advance(Symbol::Kind::Bytes);
- base->skipBytes();
- }
- template<typename P>
- void ValidatingDecoder<P>::decodeFixed(size_t n, vector<uint8_t> &value) {
- parser.advance(Symbol::Kind::Fixed);
- parser.assertSize(n);
- base->decodeFixed(n, value);
- }
- template<typename P>
- void ValidatingDecoder<P>::skipFixed(size_t n) {
- parser.advance(Symbol::Kind::Fixed);
- parser.assertSize(n);
- base->skipFixed(n);
- }
- template<typename P>
- size_t ValidatingDecoder<P>::decodeEnum() {
- parser.advance(Symbol::Kind::Enum);
- size_t result = base->decodeEnum();
- parser.assertLessThanSize(result);
- return result;
- }
- template<typename P>
- size_t ValidatingDecoder<P>::arrayStart() {
- parser.advance(Symbol::Kind::ArrayStart);
- size_t result = base->arrayStart();
- parser.pushRepeatCount(result);
- if (result == 0) {
- parser.popRepeater();
- parser.advance(Symbol::Kind::ArrayEnd);
- }
- return result;
- }
- template<typename P>
- size_t ValidatingDecoder<P>::arrayNext() {
- size_t result = base->arrayNext();
- parser.nextRepeatCount(result);
- if (result == 0) {
- parser.popRepeater();
- parser.advance(Symbol::Kind::ArrayEnd);
- }
- return result;
- }
- template<typename P>
- size_t ValidatingDecoder<P>::skipArray() {
- parser.advance(Symbol::Kind::ArrayStart);
- size_t n = base->skipArray();
- if (n == 0) {
- parser.pop();
- } else {
- parser.pushRepeatCount(n);
- parser.skip(*base);
- }
- parser.advance(Symbol::Kind::ArrayEnd);
- return 0;
- }
- template<typename P>
- size_t ValidatingDecoder<P>::mapStart() {
- parser.advance(Symbol::Kind::MapStart);
- size_t result = base->mapStart();
- parser.pushRepeatCount(result);
- if (result == 0) {
- parser.popRepeater();
- parser.advance(Symbol::Kind::MapEnd);
- }
- return result;
- }
- template<typename P>
- size_t ValidatingDecoder<P>::mapNext() {
- size_t result = base->mapNext();
- parser.nextRepeatCount(result);
- if (result == 0) {
- parser.popRepeater();
- parser.advance(Symbol::Kind::MapEnd);
- }
- return result;
- }
- template<typename P>
- size_t ValidatingDecoder<P>::skipMap() {
- parser.advance(Symbol::Kind::MapStart);
- size_t n = base->skipMap();
- if (n == 0) {
- parser.pop();
- } else {
- parser.pushRepeatCount(n);
- parser.skip(*base);
- }
- parser.advance(Symbol::Kind::MapEnd);
- return 0;
- }
- template<typename P>
- size_t ValidatingDecoder<P>::decodeUnionIndex() {
- parser.advance(Symbol::Kind::Union);
- size_t result = base->decodeUnionIndex();
- parser.selectBranch(result);
- return result;
- }
- template<typename P>
- class ValidatingEncoder : public Encoder {
- DummyHandler handler_;
- P parser_;
- EncoderPtr base_;
- void init(OutputStream &os) final;
- void flush() final;
- int64_t byteCount() const final;
- void encodeNull() final;
- void encodeBool(bool b) final;
- void encodeInt(int32_t i) final;
- void encodeLong(int64_t l) final;
- void encodeFloat(float f) final;
- void encodeDouble(double d) final;
- void encodeString(const std::string &s) final;
- void encodeBytes(const uint8_t *bytes, size_t len) final;
- void encodeFixed(const uint8_t *bytes, size_t len) final;
- void encodeEnum(size_t e) final;
- void arrayStart() final;
- void arrayEnd() final;
- void mapStart() final;
- void mapEnd() final;
- void setItemCount(size_t count) final;
- void startItem() final;
- void encodeUnionIndex(size_t e) final;
- public:
- ValidatingEncoder(const ValidSchema &schema, EncoderPtr base) : parser_(ValidatingGrammarGenerator().generate(schema), NULL, handler_),
- base_(std::move(base)) {}
- };
- template<typename P>
- void ValidatingEncoder<P>::init(OutputStream &os) {
- base_->init(os);
- }
- template<typename P>
- void ValidatingEncoder<P>::flush() {
- base_->flush();
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeNull() {
- parser_.advance(Symbol::Kind::Null);
- base_->encodeNull();
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeBool(bool b) {
- parser_.advance(Symbol::Kind::Bool);
- base_->encodeBool(b);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeInt(int32_t i) {
- parser_.advance(Symbol::Kind::Int);
- base_->encodeInt(i);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeLong(int64_t l) {
- parser_.advance(Symbol::Kind::Long);
- base_->encodeLong(l);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeFloat(float f) {
- parser_.advance(Symbol::Kind::Float);
- base_->encodeFloat(f);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeDouble(double d) {
- parser_.advance(Symbol::Kind::Double);
- base_->encodeDouble(d);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeString(const std::string &s) {
- parser_.advance(Symbol::Kind::String);
- base_->encodeString(s);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeBytes(const uint8_t *bytes, size_t len) {
- parser_.advance(Symbol::Kind::Bytes);
- base_->encodeBytes(bytes, len);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeFixed(const uint8_t *bytes, size_t len) {
- parser_.advance(Symbol::Kind::Fixed);
- parser_.assertSize(len);
- base_->encodeFixed(bytes, len);
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeEnum(size_t e) {
- parser_.advance(Symbol::Kind::Enum);
- parser_.assertLessThanSize(e);
- base_->encodeEnum(e);
- }
- template<typename P>
- void ValidatingEncoder<P>::arrayStart() {
- parser_.advance(Symbol::Kind::ArrayStart);
- parser_.pushRepeatCount(0);
- base_->arrayStart();
- }
- template<typename P>
- void ValidatingEncoder<P>::arrayEnd() {
- parser_.popRepeater();
- parser_.advance(Symbol::Kind::ArrayEnd);
- base_->arrayEnd();
- }
- template<typename P>
- void ValidatingEncoder<P>::mapStart() {
- parser_.advance(Symbol::Kind::MapStart);
- parser_.pushRepeatCount(0);
- base_->mapStart();
- }
- template<typename P>
- void ValidatingEncoder<P>::mapEnd() {
- parser_.popRepeater();
- parser_.advance(Symbol::Kind::MapEnd);
- base_->mapEnd();
- }
- template<typename P>
- void ValidatingEncoder<P>::setItemCount(size_t count) {
- parser_.nextRepeatCount(count);
- base_->setItemCount(count);
- }
- template<typename P>
- void ValidatingEncoder<P>::startItem() {
- parser_.processImplicitActions();
- if (parser_.top() != Symbol::Kind::Repeater) {
- throw Exception("startItem at not an item boundary");
- }
- base_->startItem();
- }
- template<typename P>
- void ValidatingEncoder<P>::encodeUnionIndex(size_t e) {
- parser_.advance(Symbol::Kind::Union);
- parser_.selectBranch(e);
- base_->encodeUnionIndex(e);
- }
- template<typename P>
- int64_t ValidatingEncoder<P>::byteCount() const {
- return base_->byteCount();
- }
- } // namespace parsing
- DecoderPtr validatingDecoder(const ValidSchema &s,
- const DecoderPtr &base) {
- return make_shared<parsing::ValidatingDecoder<parsing::SimpleParser<parsing::DummyHandler>>>(s, base);
- }
- EncoderPtr validatingEncoder(const ValidSchema &schema, const EncoderPtr &base) {
- return make_shared<parsing::ValidatingEncoder<parsing::SimpleParser<parsing::DummyHandler>>>(schema, base);
- }
- } // namespace avro
|