Decoder.hh 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * https://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef avro_Decoder_hh__
  19. #define avro_Decoder_hh__
  20. #include "Config.hh"
  21. #include <cstdint>
  22. #include <memory>
  23. #include <string>
  24. #include <vector>
  25. #include "Stream.hh"
  26. #include "ValidSchema.hh"
  27. /// \file
  28. ///
  29. /// Low level support for decoding avro values.
  30. /// This class has two types of functions. One type of functions support
  31. /// decoding of leaf values (for example, decodeLong and
  32. /// decodeString). These functions have analogs in Encoder.
  33. ///
  34. /// The other type of functions support decoding of maps and arrays.
  35. /// These functions are arrayStart, startItem, and arrayEnd
  36. /// (and similar functions for maps).
  37. namespace avro {
  38. /**
  39. * Decoder is an interface implemented by every decoder capable
  40. * of decoding Avro data.
  41. */
  42. class AVRO_DECL Decoder {
  43. public:
  44. virtual ~Decoder() = default;
  45. /// All future decoding will come from is, which should be valid
  46. /// until replaced by another call to init() or this Decoder is
  47. /// destructed.
  48. virtual void init(InputStream &is) = 0;
  49. /// Decodes a null from the current stream.
  50. virtual void decodeNull() = 0;
  51. /// Decodes a bool from the current stream
  52. virtual bool decodeBool() = 0;
  53. /// Decodes a 32-bit int from the current stream.
  54. virtual int32_t decodeInt() = 0;
  55. /// Decodes a 64-bit signed int from the current stream.
  56. virtual int64_t decodeLong() = 0;
  57. /// Decodes a single-precision floating point number from current stream.
  58. virtual float decodeFloat() = 0;
  59. /// Decodes a double-precision floating point number from current stream.
  60. virtual double decodeDouble() = 0;
  61. /// Decodes a UTF-8 string from the current stream.
  62. std::string decodeString() {
  63. std::string result;
  64. decodeString(result);
  65. return result;
  66. }
  67. /**
  68. * Decodes a UTF-8 string from the stream and assigns it to value.
  69. */
  70. virtual void decodeString(std::string &value) = 0;
  71. /// Skips a string on the current stream.
  72. virtual void skipString() = 0;
  73. /// Decodes arbitrary binary data from the current stream.
  74. std::vector<uint8_t> decodeBytes() {
  75. std::vector<uint8_t> result;
  76. decodeBytes(result);
  77. return result;
  78. }
  79. /// Decodes arbitrary binary data from the current stream and puts it
  80. /// in value.
  81. virtual void decodeBytes(std::vector<uint8_t> &value) = 0;
  82. /// Skips bytes on the current stream.
  83. virtual void skipBytes() = 0;
  84. /**
  85. * Decodes fixed length binary from the current stream.
  86. * \param[in] n The size (byte count) of the fixed being read.
  87. * \return The fixed data that has been read. The size of the returned
  88. * vector is guaranteed to be equal to \p n.
  89. */
  90. std::vector<uint8_t> decodeFixed(size_t n) {
  91. std::vector<uint8_t> result;
  92. decodeFixed(n, result);
  93. return result;
  94. }
  95. /**
  96. * Decodes a fixed from the current stream.
  97. * \param[in] n The size (byte count) of the fixed being read.
  98. * \param[out] value The value that receives the fixed. The vector will
  99. * be size-adjusted based on the fixed schema's size.
  100. */
  101. virtual void decodeFixed(size_t n, std::vector<uint8_t> &value) = 0;
  102. /// Skips fixed length binary on the current stream.
  103. virtual void skipFixed(size_t n) = 0;
  104. /// Decodes enum from the current stream.
  105. virtual size_t decodeEnum() = 0;
  106. /// Start decoding an array. Returns the number of entries in first chunk.
  107. virtual size_t arrayStart() = 0;
  108. /// Returns the number of entries in next chunk. 0 if last.
  109. virtual size_t arrayNext() = 0;
  110. /// Tries to skip an array. If it can, it returns 0. Otherwise
  111. /// it returns the number of elements to be skipped. The client
  112. /// should skip the individual items. In such cases, skipArray
  113. /// is identical to arrayStart.
  114. virtual size_t skipArray() = 0;
  115. /// Start decoding a map. Returns the number of entries in first chunk.
  116. virtual size_t mapStart() = 0;
  117. /// Returns the number of entries in next chunk. 0 if last.
  118. virtual size_t mapNext() = 0;
  119. /// Tries to skip a map. If it can, it returns 0. Otherwise
  120. /// it returns the number of elements to be skipped. The client
  121. /// should skip the individual items. In such cases, skipMap
  122. /// is identical to mapStart.
  123. virtual size_t skipMap() = 0;
  124. /// Decodes a branch of a union. The actual value is to follow.
  125. virtual size_t decodeUnionIndex() = 0;
  126. /// Drains any additional data at the end of the current entry in a stream.
  127. /// It also returns any unused bytes back to any underlying input stream.
  128. /// One situation this happens is when the reader's schema and
  129. /// the writer's schema are records but are different and the writer's
  130. /// record has more fields at the end of the record.
  131. /// Leaving such data unread is usually not a problem. If multiple
  132. /// records are stored consecutively in a stream (e.g. Avro data file)
  133. /// any attempt to read the next record will automatically skip
  134. /// those extra fields of the current record. It would still leave
  135. /// the extra fields at the end of the last record in the stream.
  136. /// This would mean that the stream is not in a good state. For example,
  137. /// if some non-avro information is stored at the end of the stream,
  138. /// the consumers of such data would see the bytes left behind
  139. /// by the avro decoder. Similar set of problems occur if the Decoder
  140. /// consumes more than what it should.
  141. virtual void drain() = 0;
  142. };
  143. /**
  144. * Shared pointer to Decoder.
  145. */
  146. using DecoderPtr = std::shared_ptr<Decoder>;
  147. /**
  148. * ResolvingDecoder is derived from \ref Decoder, with an additional
  149. * function to obtain the field ordering of fields within a record.
  150. */
  151. class AVRO_DECL ResolvingDecoder : public Decoder {
  152. public:
  153. /// Returns the order of fields for records.
  154. /// The order of fields could be different from the order of their
  155. /// order in the schema because the writer's field order could
  156. /// be different. In order to avoid buffering and later use,
  157. /// we return the values in the writer's field order.
  158. virtual const std::vector<size_t> &fieldOrder() = 0;
  159. };
  160. /**
  161. * Shared pointer to ResolvingDecoder.
  162. */
  163. using ResolvingDecoderPtr = std::shared_ptr<ResolvingDecoder>;
  164. /**
  165. * Returns an decoder that can decode binary Avro standard.
  166. */
  167. AVRO_DECL DecoderPtr binaryDecoder();
  168. /**
  169. * Returns an decoder that validates sequence of calls to an underlying
  170. * Decoder against the given schema.
  171. */
  172. AVRO_DECL DecoderPtr validatingDecoder(const ValidSchema &schema,
  173. const DecoderPtr &base);
  174. /**
  175. * Returns an decoder that can decode Avro standard for JSON.
  176. */
  177. AVRO_DECL DecoderPtr jsonDecoder(const ValidSchema &schema);
  178. /**
  179. * Returns a decoder that decodes avro data from base written according to
  180. * writerSchema and resolves against readerSchema.
  181. * The client uses the decoder as if the data were written using readerSchema.
  182. * // FIXME: Handle out of order fields.
  183. */
  184. AVRO_DECL ResolvingDecoderPtr resolvingDecoder(const ValidSchema &writer,
  185. const ValidSchema &reader, const DecoderPtr &base);
  186. } // namespace avro
  187. #endif