123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #ifndef avro_Decoder_hh__
- #define avro_Decoder_hh__
- #include "Config.hh"
- #include <cstdint>
- #include <memory>
- #include <string>
- #include <vector>
- #include "Stream.hh"
- #include "ValidSchema.hh"
- /// \file
- ///
- /// Low level support for decoding avro values.
- /// This class has two types of functions. One type of functions support
- /// decoding of leaf values (for example, decodeLong and
- /// decodeString). These functions have analogs in Encoder.
- ///
- /// The other type of functions support decoding of maps and arrays.
- /// These functions are arrayStart, startItem, and arrayEnd
- /// (and similar functions for maps).
- namespace avro {
- /**
- * Decoder is an interface implemented by every decoder capable
- * of decoding Avro data.
- */
- class AVRO_DECL Decoder {
- public:
- virtual ~Decoder() = default;
- /// All future decoding will come from is, which should be valid
- /// until replaced by another call to init() or this Decoder is
- /// destructed.
- virtual void init(InputStream &is) = 0;
- /// Decodes a null from the current stream.
- virtual void decodeNull() = 0;
- /// Decodes a bool from the current stream
- virtual bool decodeBool() = 0;
- /// Decodes a 32-bit int from the current stream.
- virtual int32_t decodeInt() = 0;
- /// Decodes a 64-bit signed int from the current stream.
- virtual int64_t decodeLong() = 0;
- /// Decodes a single-precision floating point number from current stream.
- virtual float decodeFloat() = 0;
- /// Decodes a double-precision floating point number from current stream.
- virtual double decodeDouble() = 0;
- /// Decodes a UTF-8 string from the current stream.
- std::string decodeString() {
- std::string result;
- decodeString(result);
- return result;
- }
- /**
- * Decodes a UTF-8 string from the stream and assigns it to value.
- */
- virtual void decodeString(std::string &value) = 0;
- /// Skips a string on the current stream.
- virtual void skipString() = 0;
- /// Decodes arbitrary binary data from the current stream.
- std::vector<uint8_t> decodeBytes() {
- std::vector<uint8_t> result;
- decodeBytes(result);
- return result;
- }
- /// Decodes arbitrary binary data from the current stream and puts it
- /// in value.
- virtual void decodeBytes(std::vector<uint8_t> &value) = 0;
- /// Skips bytes on the current stream.
- virtual void skipBytes() = 0;
- /**
- * Decodes fixed length binary from the current stream.
- * \param[in] n The size (byte count) of the fixed being read.
- * \return The fixed data that has been read. The size of the returned
- * vector is guaranteed to be equal to \p n.
- */
- std::vector<uint8_t> decodeFixed(size_t n) {
- std::vector<uint8_t> result;
- decodeFixed(n, result);
- return result;
- }
- /**
- * Decodes a fixed from the current stream.
- * \param[in] n The size (byte count) of the fixed being read.
- * \param[out] value The value that receives the fixed. The vector will
- * be size-adjusted based on the fixed schema's size.
- */
- virtual void decodeFixed(size_t n, std::vector<uint8_t> &value) = 0;
- /// Skips fixed length binary on the current stream.
- virtual void skipFixed(size_t n) = 0;
- /// Decodes enum from the current stream.
- virtual size_t decodeEnum() = 0;
- /// Start decoding an array. Returns the number of entries in first chunk.
- virtual size_t arrayStart() = 0;
- /// Returns the number of entries in next chunk. 0 if last.
- virtual size_t arrayNext() = 0;
- /// Tries to skip an array. If it can, it returns 0. Otherwise
- /// it returns the number of elements to be skipped. The client
- /// should skip the individual items. In such cases, skipArray
- /// is identical to arrayStart.
- virtual size_t skipArray() = 0;
- /// Start decoding a map. Returns the number of entries in first chunk.
- virtual size_t mapStart() = 0;
- /// Returns the number of entries in next chunk. 0 if last.
- virtual size_t mapNext() = 0;
- /// Tries to skip a map. If it can, it returns 0. Otherwise
- /// it returns the number of elements to be skipped. The client
- /// should skip the individual items. In such cases, skipMap
- /// is identical to mapStart.
- virtual size_t skipMap() = 0;
- /// Decodes a branch of a union. The actual value is to follow.
- virtual size_t decodeUnionIndex() = 0;
- /// Drains any additional data at the end of the current entry in a stream.
- /// It also returns any unused bytes back to any underlying input stream.
- /// One situation this happens is when the reader's schema and
- /// the writer's schema are records but are different and the writer's
- /// record has more fields at the end of the record.
- /// Leaving such data unread is usually not a problem. If multiple
- /// records are stored consecutively in a stream (e.g. Avro data file)
- /// any attempt to read the next record will automatically skip
- /// those extra fields of the current record. It would still leave
- /// the extra fields at the end of the last record in the stream.
- /// This would mean that the stream is not in a good state. For example,
- /// if some non-avro information is stored at the end of the stream,
- /// the consumers of such data would see the bytes left behind
- /// by the avro decoder. Similar set of problems occur if the Decoder
- /// consumes more than what it should.
- virtual void drain() = 0;
- };
- /**
- * Shared pointer to Decoder.
- */
- using DecoderPtr = std::shared_ptr<Decoder>;
- /**
- * ResolvingDecoder is derived from \ref Decoder, with an additional
- * function to obtain the field ordering of fields within a record.
- */
- class AVRO_DECL ResolvingDecoder : public Decoder {
- public:
- /// Returns the order of fields for records.
- /// The order of fields could be different from the order of their
- /// order in the schema because the writer's field order could
- /// be different. In order to avoid buffering and later use,
- /// we return the values in the writer's field order.
- virtual const std::vector<size_t> &fieldOrder() = 0;
- };
- /**
- * Shared pointer to ResolvingDecoder.
- */
- using ResolvingDecoderPtr = std::shared_ptr<ResolvingDecoder>;
- /**
- * Returns an decoder that can decode binary Avro standard.
- */
- AVRO_DECL DecoderPtr binaryDecoder();
- /**
- * Returns an decoder that validates sequence of calls to an underlying
- * Decoder against the given schema.
- */
- AVRO_DECL DecoderPtr validatingDecoder(const ValidSchema &schema,
- const DecoderPtr &base);
- /**
- * Returns an decoder that can decode Avro standard for JSON.
- */
- AVRO_DECL DecoderPtr jsonDecoder(const ValidSchema &schema);
- /**
- * Returns a decoder that decodes avro data from base written according to
- * writerSchema and resolves against readerSchema.
- * The client uses the decoder as if the data were written using readerSchema.
- * // FIXME: Handle out of order fields.
- */
- AVRO_DECL ResolvingDecoderPtr resolvingDecoder(const ValidSchema &writer,
- const ValidSchema &reader, const DecoderPtr &base);
- } // namespace avro
- #endif
|