StripeStream.hh 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef ORC_STRIPE_STREAM_HH
  19. #define ORC_STRIPE_STREAM_HH
  20. #include "orc/Int128.hh"
  21. #include "orc/OrcFile.hh"
  22. #include "orc/Reader.hh"
  23. #include "ColumnReader.hh"
  24. #include "Timezone.hh"
  25. #include "TypeImpl.hh"
  26. namespace orc {
  27. class RowReaderImpl;
  28. /**
  29. * StripeStream Implementation
  30. */
  31. class StripeStreamsImpl : public StripeStreams {
  32. private:
  33. const RowReaderImpl& reader;
  34. const proto::StripeInformation& stripeInfo;
  35. const proto::StripeFooter& footer;
  36. const uint64_t stripeIndex;
  37. const uint64_t stripeStart;
  38. InputStream& input;
  39. const Timezone& writerTimezone;
  40. const Timezone& readerTimezone;
  41. public:
  42. StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index,
  43. const proto::StripeInformation& stripeInfo, const proto::StripeFooter& footer,
  44. uint64_t stripeStart, InputStream& input, const Timezone& writerTimezone,
  45. const Timezone& readerTimezone);
  46. virtual ~StripeStreamsImpl() override;
  47. virtual const std::vector<bool> getSelectedColumns() const override;
  48. virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const override;
  49. virtual std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId,
  50. proto::Stream_Kind kind,
  51. bool shouldStream) const override;
  52. MemoryPool& getMemoryPool() const override;
  53. ReaderMetrics* getReaderMetrics() const override;
  54. const Timezone& getWriterTimezone() const override;
  55. const Timezone& getReaderTimezone() const override;
  56. std::ostream* getErrorStream() const override;
  57. bool getThrowOnHive11DecimalOverflow() const override;
  58. bool isDecimalAsLong() const override;
  59. int32_t getForcedScaleOnHive11Decimal() const override;
  60. const SchemaEvolution* getSchemaEvolution() const override;
  61. };
  62. /**
  63. * StreamInformation Implementation
  64. */
  65. class StreamInformationImpl : public StreamInformation {
  66. private:
  67. StreamKind kind;
  68. uint64_t column;
  69. uint64_t offset;
  70. uint64_t length;
  71. public:
  72. StreamInformationImpl(uint64_t _offset, const proto::Stream& stream)
  73. : kind(static_cast<StreamKind>(stream.kind())),
  74. column(stream.column()),
  75. offset(_offset),
  76. length(stream.length()) {
  77. // PASS
  78. }
  79. ~StreamInformationImpl() override;
  80. StreamKind getKind() const override {
  81. return kind;
  82. }
  83. uint64_t getColumnId() const override {
  84. return column;
  85. }
  86. uint64_t getOffset() const override {
  87. return offset;
  88. }
  89. uint64_t getLength() const override {
  90. return length;
  91. }
  92. };
  93. /**
  94. * StripeInformation Implementation
  95. */
  96. class StripeInformationImpl : public StripeInformation {
  97. uint64_t offset;
  98. uint64_t indexLength;
  99. uint64_t dataLength;
  100. uint64_t footerLength;
  101. uint64_t numRows;
  102. InputStream* stream;
  103. MemoryPool& memory;
  104. CompressionKind compression;
  105. uint64_t blockSize;
  106. mutable std::unique_ptr<proto::StripeFooter> stripeFooter;
  107. ReaderMetrics* metrics;
  108. void ensureStripeFooterLoaded() const;
  109. public:
  110. StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t _dataLength,
  111. uint64_t _footerLength, uint64_t _numRows, InputStream* _stream,
  112. MemoryPool& _memory, CompressionKind _compression, uint64_t _blockSize,
  113. ReaderMetrics* _metrics)
  114. : offset(_offset),
  115. indexLength(_indexLength),
  116. dataLength(_dataLength),
  117. footerLength(_footerLength),
  118. numRows(_numRows),
  119. stream(_stream),
  120. memory(_memory),
  121. compression(_compression),
  122. blockSize(_blockSize),
  123. metrics(_metrics) {
  124. // PASS
  125. }
  126. virtual ~StripeInformationImpl() override {
  127. // PASS
  128. }
  129. uint64_t getOffset() const override {
  130. return offset;
  131. }
  132. uint64_t getLength() const override {
  133. return indexLength + dataLength + footerLength;
  134. }
  135. uint64_t getIndexLength() const override {
  136. return indexLength;
  137. }
  138. uint64_t getDataLength() const override {
  139. return dataLength;
  140. }
  141. uint64_t getFooterLength() const override {
  142. return footerLength;
  143. }
  144. uint64_t getNumberOfRows() const override {
  145. return numRows;
  146. }
  147. uint64_t getNumberOfStreams() const override {
  148. ensureStripeFooterLoaded();
  149. return static_cast<uint64_t>(stripeFooter->streams_size());
  150. }
  151. std::unique_ptr<StreamInformation> getStreamInformation(uint64_t streamId) const override;
  152. ColumnEncodingKind getColumnEncoding(uint64_t colId) const override {
  153. ensureStripeFooterLoaded();
  154. return static_cast<ColumnEncodingKind>(stripeFooter->columns(static_cast<int>(colId)).kind());
  155. }
  156. uint64_t getDictionarySize(uint64_t colId) const override {
  157. ensureStripeFooterLoaded();
  158. return static_cast<ColumnEncodingKind>(
  159. stripeFooter->columns(static_cast<int>(colId)).dictionary_size());
  160. }
  161. const std::string& getWriterTimezone() const override {
  162. ensureStripeFooterLoaded();
  163. return stripeFooter->writer_timezone();
  164. }
  165. };
  166. } // namespace orc
  167. #endif