12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757 |
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #ifndef ORC_STATISTICS_IMPL_HH
- #define ORC_STATISTICS_IMPL_HH
- #include "orc/Common.hh"
- #include "orc/Int128.hh"
- #include "orc/OrcFile.hh"
- #include "orc/Reader.hh"
- #include "Timezone.hh"
- #include "TypeImpl.hh"
- namespace orc {
- /**
- * StatContext contains fields required to compute statistics
- */
- struct StatContext {
- const bool correctStats;
- const Timezone* const writerTimezone;
- StatContext() : correctStats(false), writerTimezone(nullptr) {}
- StatContext(bool cStat, const Timezone* const timezone = nullptr)
- : correctStats(cStat), writerTimezone(timezone) {}
- };
- /**
- * Internal Statistics Implementation
- */
- template <typename T>
- class InternalStatisticsImpl {
- private:
- bool _hasNull;
- bool _hasMinimum;
- bool _hasMaximum;
- bool _hasSum;
- bool _hasTotalLength;
- uint64_t _totalLength;
- uint64_t _valueCount;
- T _minimum;
- T _maximum;
- T _sum;
- public:
- InternalStatisticsImpl() {
- _hasNull = false;
- _hasMinimum = false;
- _hasMaximum = false;
- _hasSum = false;
- _hasTotalLength = false;
- _totalLength = 0;
- _valueCount = 0;
- }
- ~InternalStatisticsImpl() {}
- // GET / SET _totalLength
- bool hasTotalLength() const {
- return _hasTotalLength;
- }
- void setHasTotalLength(bool hasTotalLength) {
- _hasTotalLength = hasTotalLength;
- }
- uint64_t getTotalLength() const {
- return _totalLength;
- }
- void setTotalLength(uint64_t totalLength) {
- _totalLength = totalLength;
- }
- // GET / SET _sum
- bool hasSum() const {
- return _hasSum;
- }
- void setHasSum(bool hasSum) {
- _hasSum = hasSum;
- }
- T getSum() const {
- return _sum;
- }
- void setSum(T sum) {
- _sum = sum;
- }
- // GET / SET _maximum
- bool hasMaximum() const {
- return _hasMaximum;
- }
- const T& getMaximum() const {
- return _maximum;
- }
- void setHasMaximum(bool hasMax) {
- _hasMaximum = hasMax;
- }
- void setMaximum(T max) {
- _maximum = max;
- }
- // GET / SET _minimum
- bool hasMinimum() const {
- return _hasMinimum;
- }
- void setHasMinimum(bool hasMin) {
- _hasMinimum = hasMin;
- }
- const T& getMinimum() const {
- return _minimum;
- }
- void setMinimum(T min) {
- _minimum = min;
- }
- // GET / SET _valueCount
- uint64_t getNumberOfValues() const {
- return _valueCount;
- }
- void setNumberOfValues(uint64_t numValues) {
- _valueCount = numValues;
- }
- // GET / SET _hasNullValue
- bool hasNull() const {
- return _hasNull;
- }
- void setHasNull(bool hasNull) {
- _hasNull = hasNull;
- }
- void reset() {
- _hasNull = false;
- _hasMinimum = false;
- _hasMaximum = false;
- _hasSum = false;
- _hasTotalLength = false;
- _totalLength = 0;
- _valueCount = 0;
- }
- void updateMinMax(T value) {
- if (!_hasMinimum) {
- _hasMinimum = _hasMaximum = true;
- _minimum = _maximum = value;
- } else if (compare(value, _minimum)) {
- _minimum = value;
- } else if (compare(_maximum, value)) {
- _maximum = value;
- }
- }
- // sum is not merged here as we need to check overflow
- void merge(const InternalStatisticsImpl& other) {
- _hasNull = _hasNull || other._hasNull;
- _valueCount += other._valueCount;
- if (other._hasMinimum) {
- if (!_hasMinimum) {
- _hasMinimum = _hasMaximum = true;
- _minimum = other._minimum;
- _maximum = other._maximum;
- } else {
- // all template types should support operator<
- if (compare(_maximum, other._maximum)) {
- _maximum = other._maximum;
- }
- if (compare(other._minimum, _minimum)) {
- _minimum = other._minimum;
- }
- }
- }
- _hasTotalLength = _hasTotalLength && other._hasTotalLength;
- _totalLength += other._totalLength;
- }
- };
- typedef InternalStatisticsImpl<char> InternalCharStatistics;
- typedef InternalStatisticsImpl<char> InternalBooleanStatistics;
- typedef InternalStatisticsImpl<int64_t> InternalIntegerStatistics;
- typedef InternalStatisticsImpl<int32_t> InternalDateStatistics;
- typedef InternalStatisticsImpl<double> InternalDoubleStatistics;
- typedef InternalStatisticsImpl<Decimal> InternalDecimalStatistics;
- typedef InternalStatisticsImpl<std::string> InternalStringStatistics;
- typedef InternalStatisticsImpl<uint64_t> InternalCollectionStatistics;
- /**
- * Mutable column statistics for use by the writer.
- */
- class MutableColumnStatistics {
- public:
- virtual ~MutableColumnStatistics();
- virtual void increase(uint64_t count) = 0;
- virtual void setNumberOfValues(uint64_t value) = 0;
- virtual void setHasNull(bool hasNull) = 0;
- virtual void merge(const MutableColumnStatistics& other) = 0;
- virtual void reset() = 0;
- virtual void toProtoBuf(proto::ColumnStatistics& pbStats) const = 0;
- };
- /**
- * ColumnStatistics Implementation
- */
- class ColumnStatisticsImpl : public ColumnStatistics, public MutableColumnStatistics {
- private:
- InternalCharStatistics _stats;
- public:
- ColumnStatisticsImpl() {
- reset();
- }
- ColumnStatisticsImpl(const proto::ColumnStatistics& stats);
- virtual ~ColumnStatisticsImpl() override;
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- void merge(const MutableColumnStatistics& other) override {
- _stats.merge(dynamic_cast<const ColumnStatisticsImpl&>(other)._stats);
- }
- void reset() override {
- _stats.reset();
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Column has " << getNumberOfValues() << " values"
- << " and has null value: " << (hasNull() ? "yes" : "no") << std::endl;
- return buffer.str();
- }
- };
- class BinaryColumnStatisticsImpl : public BinaryColumnStatistics, public MutableColumnStatistics {
- private:
- InternalCharStatistics _stats;
- public:
- BinaryColumnStatisticsImpl() {
- reset();
- }
- BinaryColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~BinaryColumnStatisticsImpl() override;
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- bool hasTotalLength() const override {
- return _stats.hasTotalLength();
- }
- uint64_t getTotalLength() const override {
- if (hasTotalLength()) {
- return _stats.getTotalLength();
- } else {
- throw ParseError("Total length is not defined.");
- }
- }
- void setTotalLength(uint64_t length) {
- _stats.setHasTotalLength(true);
- _stats.setTotalLength(length);
- }
- void update(size_t length) {
- _stats.setTotalLength(_stats.getTotalLength() + length);
- }
- void merge(const MutableColumnStatistics& other) override {
- const BinaryColumnStatisticsImpl& binStats =
- dynamic_cast<const BinaryColumnStatisticsImpl&>(other);
- _stats.merge(binStats._stats);
- }
- void reset() override {
- _stats.reset();
- setTotalLength(0);
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::BinaryStatistics* binStats = pbStats.mutable_binary_statistics();
- binStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Binary" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasTotalLength()) {
- buffer << "Total length: " << getTotalLength() << std::endl;
- } else {
- buffer << "Total length: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
- class BooleanColumnStatisticsImpl : public BooleanColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalBooleanStatistics _stats;
- bool _hasCount;
- uint64_t _trueCount;
- public:
- BooleanColumnStatisticsImpl() {
- reset();
- }
- BooleanColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~BooleanColumnStatisticsImpl() override;
- bool hasCount() const override {
- return _hasCount;
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- _hasCount = true;
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- uint64_t getFalseCount() const override {
- if (hasCount()) {
- return getNumberOfValues() - _trueCount;
- } else {
- throw ParseError("False count is not defined.");
- }
- }
- uint64_t getTrueCount() const override {
- if (hasCount()) {
- return _trueCount;
- } else {
- throw ParseError("True count is not defined.");
- }
- }
- void setTrueCount(uint64_t trueCount) {
- _hasCount = true;
- _trueCount = trueCount;
- }
- void update(bool value, size_t repetitions) {
- if (value) {
- _trueCount += repetitions;
- }
- }
- void merge(const MutableColumnStatistics& other) override {
- const BooleanColumnStatisticsImpl& boolStats =
- dynamic_cast<const BooleanColumnStatisticsImpl&>(other);
- _stats.merge(boolStats._stats);
- _hasCount = _hasCount && boolStats._hasCount;
- _trueCount += boolStats._trueCount;
- }
- void reset() override {
- _stats.reset();
- setTrueCount(0);
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::BucketStatistics* bucketStats = pbStats.mutable_bucket_statistics();
- if (_hasCount) {
- bucketStats->add_count(_trueCount);
- } else {
- bucketStats->clear_count();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Boolean" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasCount()) {
- buffer << "(true: " << getTrueCount() << "; false: " << getFalseCount() << ")" << std::endl;
- } else {
- buffer << "(true: not defined; false: not defined)" << std::endl;
- buffer << "True and false counts are not defined" << std::endl;
- }
- return buffer.str();
- }
- };
- class DateColumnStatisticsImpl : public DateColumnStatistics, public MutableColumnStatistics {
- private:
- InternalDateStatistics _stats;
- public:
- DateColumnStatisticsImpl() {
- reset();
- }
- DateColumnStatisticsImpl(const proto::ColumnStatistics& stats, const StatContext& statContext);
- virtual ~DateColumnStatisticsImpl() override;
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- int32_t getMinimum() const override {
- if (hasMinimum()) {
- return _stats.getMinimum();
- } else {
- throw ParseError("Minimum is not defined.");
- }
- }
- int32_t getMaximum() const override {
- if (hasMaximum()) {
- return _stats.getMaximum();
- } else {
- throw ParseError("Maximum is not defined.");
- }
- }
- void setMinimum(int32_t minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
- void setMaximum(int32_t maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
- void update(int32_t value) {
- _stats.updateMinMax(value);
- }
- void merge(const MutableColumnStatistics& other) override {
- const DateColumnStatisticsImpl& dateStats =
- dynamic_cast<const DateColumnStatisticsImpl&>(other);
- _stats.merge(dateStats._stats);
- }
- void reset() override {
- _stats.reset();
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::DateStatistics* dateStatistics = pbStats.mutable_date_statistics();
- if (_stats.hasMinimum()) {
- dateStatistics->set_maximum(_stats.getMaximum());
- dateStatistics->set_minimum(_stats.getMinimum());
- } else {
- dateStatistics->clear_minimum();
- dateStatistics->clear_maximum();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Date" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasMinimum()) {
- buffer << "Minimum: " << getMinimum() << std::endl;
- } else {
- buffer << "Minimum: not defined" << std::endl;
- }
- if (hasMaximum()) {
- buffer << "Maximum: " << getMaximum() << std::endl;
- } else {
- buffer << "Maximum: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
- class DecimalColumnStatisticsImpl : public DecimalColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalDecimalStatistics _stats;
- public:
- DecimalColumnStatisticsImpl() {
- reset();
- }
- DecimalColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~DecimalColumnStatisticsImpl() override;
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
- bool hasSum() const override {
- return _stats.hasSum();
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- Decimal getMinimum() const override {
- if (hasMinimum()) {
- return _stats.getMinimum();
- } else {
- throw ParseError("Minimum is not defined.");
- }
- }
- Decimal getMaximum() const override {
- if (hasMaximum()) {
- return _stats.getMaximum();
- } else {
- throw ParseError("Maximum is not defined.");
- }
- }
- void setMinimum(Decimal minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
- void setMaximum(Decimal maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
- Decimal getSum() const override {
- if (hasSum()) {
- return _stats.getSum();
- } else {
- throw ParseError("Sum is not defined.");
- }
- }
- void setSum(Decimal sum) {
- _stats.setHasSum(true);
- _stats.setSum(sum);
- }
- void update(const Decimal& value) {
- _stats.updateMinMax(value);
- if (_stats.hasSum()) {
- updateSum(value);
- }
- }
- void merge(const MutableColumnStatistics& other) override {
- const DecimalColumnStatisticsImpl& decStats =
- dynamic_cast<const DecimalColumnStatisticsImpl&>(other);
- _stats.merge(decStats._stats);
- _stats.setHasSum(_stats.hasSum() && decStats.hasSum());
- if (_stats.hasSum()) {
- updateSum(decStats.getSum());
- }
- }
- void reset() override {
- _stats.reset();
- setSum(Decimal());
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::DecimalStatistics* decStats = pbStats.mutable_decimal_statistics();
- if (_stats.hasMinimum()) {
- decStats->set_minimum(TString(_stats.getMinimum().toString(true)));
- decStats->set_maximum(TString(_stats.getMaximum().toString(true)));
- } else {
- decStats->clear_minimum();
- decStats->clear_maximum();
- }
- if (_stats.hasSum()) {
- decStats->set_sum(TString(_stats.getSum().toString(true)));
- } else {
- decStats->clear_sum();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Decimal" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasMinimum()) {
- buffer << "Minimum: " << getMinimum().toString() << std::endl;
- } else {
- buffer << "Minimum: not defined" << std::endl;
- }
- if (hasMaximum()) {
- buffer << "Maximum: " << getMaximum().toString() << std::endl;
- } else {
- buffer << "Maximum: not defined" << std::endl;
- }
- if (hasSum()) {
- buffer << "Sum: " << getSum().toString() << std::endl;
- } else {
- buffer << "Sum: not defined" << std::endl;
- }
- return buffer.str();
- }
- private:
- void updateSum(Decimal value) {
- if (_stats.hasSum()) {
- bool overflow = false;
- Decimal sum = _stats.getSum();
- if (sum.scale > value.scale) {
- value.value = scaleUpInt128ByPowerOfTen(value.value, sum.scale - value.scale, overflow);
- } else if (sum.scale < value.scale) {
- sum.value = scaleUpInt128ByPowerOfTen(sum.value, value.scale - sum.scale, overflow);
- sum.scale = value.scale;
- }
- if (!overflow) {
- bool wasPositive = sum.value >= 0;
- sum.value += value.value;
- if ((value.value >= 0) == wasPositive) {
- _stats.setHasSum((sum.value >= 0) == wasPositive);
- }
- } else {
- _stats.setHasSum(false);
- }
- if (_stats.hasSum()) {
- _stats.setSum(sum);
- }
- }
- }
- };
- class DoubleColumnStatisticsImpl : public DoubleColumnStatistics, public MutableColumnStatistics {
- private:
- InternalDoubleStatistics _stats;
- public:
- DoubleColumnStatisticsImpl() {
- reset();
- }
- DoubleColumnStatisticsImpl(const proto::ColumnStatistics& stats);
- virtual ~DoubleColumnStatisticsImpl() override;
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
- bool hasSum() const override {
- return _stats.hasSum();
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- double getMinimum() const override {
- if (hasMinimum()) {
- return _stats.getMinimum();
- } else {
- throw ParseError("Minimum is not defined.");
- }
- }
- double getMaximum() const override {
- if (hasMaximum()) {
- return _stats.getMaximum();
- } else {
- throw ParseError("Maximum is not defined.");
- }
- }
- void setMinimum(double minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
- void setMaximum(double maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
- double getSum() const override {
- if (hasSum()) {
- return _stats.getSum();
- } else {
- throw ParseError("Sum is not defined.");
- }
- }
- void setSum(double sum) {
- _stats.setHasSum(true);
- _stats.setSum(sum);
- }
- void update(double value) {
- _stats.updateMinMax(value);
- _stats.setSum(_stats.getSum() + value);
- }
- void merge(const MutableColumnStatistics& other) override {
- const DoubleColumnStatisticsImpl& doubleStats =
- dynamic_cast<const DoubleColumnStatisticsImpl&>(other);
- _stats.merge(doubleStats._stats);
- _stats.setHasSum(_stats.hasSum() && doubleStats.hasSum());
- if (_stats.hasSum()) {
- _stats.setSum(_stats.getSum() + doubleStats.getSum());
- }
- }
- void reset() override {
- _stats.reset();
- setSum(0.0);
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::DoubleStatistics* doubleStats = pbStats.mutable_double_statistics();
- if (_stats.hasMinimum()) {
- doubleStats->set_minimum(_stats.getMinimum());
- doubleStats->set_maximum(_stats.getMaximum());
- } else {
- doubleStats->clear_minimum();
- doubleStats->clear_maximum();
- }
- if (_stats.hasSum()) {
- doubleStats->set_sum(_stats.getSum());
- } else {
- doubleStats->clear_sum();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Double" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasMinimum()) {
- buffer << "Minimum: " << getMinimum() << std::endl;
- } else {
- buffer << "Minimum: not defined" << std::endl;
- }
- if (hasMaximum()) {
- buffer << "Maximum: " << getMaximum() << std::endl;
- } else {
- buffer << "Maximum: not defined" << std::endl;
- }
- if (hasSum()) {
- buffer << "Sum: " << getSum() << std::endl;
- } else {
- buffer << "Sum: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
- class IntegerColumnStatisticsImpl : public IntegerColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalIntegerStatistics _stats;
- public:
- IntegerColumnStatisticsImpl() {
- reset();
- }
- IntegerColumnStatisticsImpl(const proto::ColumnStatistics& stats);
- virtual ~IntegerColumnStatisticsImpl() override;
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
- bool hasSum() const override {
- return _stats.hasSum();
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- int64_t getMinimum() const override {
- if (hasMinimum()) {
- return _stats.getMinimum();
- } else {
- throw ParseError("Minimum is not defined.");
- }
- }
- int64_t getMaximum() const override {
- if (hasMaximum()) {
- return _stats.getMaximum();
- } else {
- throw ParseError("Maximum is not defined.");
- }
- }
- void setMinimum(int64_t minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
- void setMaximum(int64_t maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
- int64_t getSum() const override {
- if (hasSum()) {
- return _stats.getSum();
- } else {
- throw ParseError("Sum is not defined.");
- }
- }
- void setSum(int64_t sum) {
- _stats.setHasSum(true);
- _stats.setSum(sum);
- }
- void update(int64_t value, int repetitions) {
- _stats.updateMinMax(value);
- if (_stats.hasSum()) {
- if (repetitions > 1) {
- _stats.setHasSum(multiplyExact(value, repetitions, &value));
- }
- if (_stats.hasSum()) {
- _stats.setHasSum(addExact(_stats.getSum(), value, &value));
- if (_stats.hasSum()) {
- _stats.setSum(value);
- }
- }
- }
- }
- void merge(const MutableColumnStatistics& other) override {
- const IntegerColumnStatisticsImpl& intStats =
- dynamic_cast<const IntegerColumnStatisticsImpl&>(other);
- _stats.merge(intStats._stats);
- // update sum and check overflow
- _stats.setHasSum(_stats.hasSum() && intStats.hasSum());
- if (_stats.hasSum()) {
- int64_t value;
- _stats.setHasSum(addExact(_stats.getSum(), intStats.getSum(), &value));
- if (_stats.hasSum()) {
- _stats.setSum(value);
- }
- }
- }
- void reset() override {
- _stats.reset();
- setSum(0);
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::IntegerStatistics* intStats = pbStats.mutable_int_statistics();
- if (_stats.hasMinimum()) {
- intStats->set_minimum(_stats.getMinimum());
- intStats->set_maximum(_stats.getMaximum());
- } else {
- intStats->clear_minimum();
- intStats->clear_maximum();
- }
- if (_stats.hasSum()) {
- intStats->set_sum(_stats.getSum());
- } else {
- intStats->clear_sum();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Integer" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasMinimum()) {
- buffer << "Minimum: " << getMinimum() << std::endl;
- } else {
- buffer << "Minimum: not defined" << std::endl;
- }
- if (hasMaximum()) {
- buffer << "Maximum: " << getMaximum() << std::endl;
- } else {
- buffer << "Maximum: not defined" << std::endl;
- }
- if (hasSum()) {
- buffer << "Sum: " << getSum() << std::endl;
- } else {
- buffer << "Sum: not defined" << std::endl;
- }
- return buffer.str();
- }
- };
- class StringColumnStatisticsImpl : public StringColumnStatistics, public MutableColumnStatistics {
- private:
- InternalStringStatistics _stats;
- public:
- StringColumnStatisticsImpl() {
- reset();
- }
- StringColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~StringColumnStatisticsImpl() override;
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
- bool hasTotalLength() const override {
- return _stats.hasTotalLength();
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- const std::string& getMinimum() const override {
- if (hasMinimum()) {
- return _stats.getMinimum();
- } else {
- throw ParseError("Minimum is not defined.");
- }
- }
- const std::string& getMaximum() const override {
- if (hasMaximum()) {
- return _stats.getMaximum();
- } else {
- throw ParseError("Maximum is not defined.");
- }
- }
- void setMinimum(std::string minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
- void setMaximum(std::string maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
- uint64_t getTotalLength() const override {
- if (hasTotalLength()) {
- return _stats.getTotalLength();
- } else {
- throw ParseError("Total length is not defined.");
- }
- }
- void setTotalLength(uint64_t length) {
- _stats.setHasTotalLength(true);
- _stats.setTotalLength(length);
- }
- void update(const char* value, size_t length) {
- if (value != nullptr) {
- if (!_stats.hasMinimum()) {
- std::string tempStr(value, value + length);
- setMinimum(tempStr);
- setMaximum(tempStr);
- } else {
- // update min
- int minCmp = strncmp(_stats.getMinimum().c_str(), value,
- std::min(_stats.getMinimum().length(), length));
- if (minCmp > 0 || (minCmp == 0 && length < _stats.getMinimum().length())) {
- setMinimum(std::string(value, value + length));
- }
- // update max
- int maxCmp = strncmp(_stats.getMaximum().c_str(), value,
- std::min(_stats.getMaximum().length(), length));
- if (maxCmp < 0 || (maxCmp == 0 && length > _stats.getMaximum().length())) {
- setMaximum(std::string(value, value + length));
- }
- }
- }
- _stats.setTotalLength(_stats.getTotalLength() + length);
- }
- void update(std::string value) {
- update(value.c_str(), value.length());
- }
- void merge(const MutableColumnStatistics& other) override {
- const StringColumnStatisticsImpl& strStats =
- dynamic_cast<const StringColumnStatisticsImpl&>(other);
- _stats.merge(strStats._stats);
- }
- void reset() override {
- _stats.reset();
- setTotalLength(0);
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::StringStatistics* strStats = pbStats.mutable_string_statistics();
- if (_stats.hasMinimum()) {
- strStats->set_minimum(TString(_stats.getMinimum()));
- strStats->set_maximum(TString(_stats.getMaximum()));
- } else {
- strStats->clear_minimum();
- strStats->clear_maximum();
- }
- if (_stats.hasTotalLength()) {
- strStats->set_sum(static_cast<int64_t>(_stats.getTotalLength()));
- } else {
- strStats->clear_sum();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: String" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasMinimum()) {
- buffer << "Minimum: " << getMinimum() << std::endl;
- } else {
- buffer << "Minimum is not defined" << std::endl;
- }
- if (hasMaximum()) {
- buffer << "Maximum: " << getMaximum() << std::endl;
- } else {
- buffer << "Maximum is not defined" << std::endl;
- }
- if (hasTotalLength()) {
- buffer << "Total length: " << getTotalLength() << std::endl;
- } else {
- buffer << "Total length is not defined" << std::endl;
- }
- return buffer.str();
- }
- };
- class TimestampColumnStatisticsImpl : public TimestampColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalIntegerStatistics _stats;
- bool _hasLowerBound;
- bool _hasUpperBound;
- int64_t _lowerBound;
- int64_t _upperBound;
- int32_t _minimumNanos; // last 6 digits of nanosecond of minimum timestamp
- int32_t _maximumNanos; // last 6 digits of nanosecond of maximum timestamp
- static constexpr int32_t DEFAULT_MIN_NANOS = 0;
- static constexpr int32_t DEFAULT_MAX_NANOS = 999999;
- public:
- TimestampColumnStatisticsImpl() {
- reset();
- }
- TimestampColumnStatisticsImpl(const proto::ColumnStatistics& stats,
- const StatContext& statContext);
- virtual ~TimestampColumnStatisticsImpl() override;
- bool hasMinimum() const override {
- return _stats.hasMinimum();
- }
- bool hasMaximum() const override {
- return _stats.hasMaximum();
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- int64_t getMinimum() const override {
- if (hasMinimum()) {
- return _stats.getMinimum();
- } else {
- throw ParseError("Minimum is not defined.");
- }
- }
- int64_t getMaximum() const override {
- if (hasMaximum()) {
- return _stats.getMaximum();
- } else {
- throw ParseError("Maximum is not defined.");
- }
- }
- void setMinimum(int64_t minimum) {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
- void setMaximum(int64_t maximum) {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
- void update(int64_t value) {
- _stats.updateMinMax(value);
- }
- void update(int64_t milli, int32_t nano) {
- if (!_stats.hasMinimum()) {
- _stats.setHasMinimum(true);
- _stats.setHasMaximum(true);
- _stats.setMinimum(milli);
- _stats.setMaximum(milli);
- _maximumNanos = _minimumNanos = nano;
- } else {
- if (milli <= _stats.getMinimum()) {
- if (milli < _stats.getMinimum() || nano < _minimumNanos) {
- _minimumNanos = nano;
- }
- _stats.setMinimum(milli);
- }
- if (milli >= _stats.getMaximum()) {
- if (milli > _stats.getMaximum() || nano > _maximumNanos) {
- _maximumNanos = nano;
- }
- _stats.setMaximum(milli);
- }
- }
- }
- void merge(const MutableColumnStatistics& other) override {
- const TimestampColumnStatisticsImpl& tsStats =
- dynamic_cast<const TimestampColumnStatisticsImpl&>(other);
- _stats.setHasNull(_stats.hasNull() || tsStats.hasNull());
- _stats.setNumberOfValues(_stats.getNumberOfValues() + tsStats.getNumberOfValues());
- if (tsStats.hasMinimum()) {
- if (!_stats.hasMinimum()) {
- _stats.setHasMinimum(true);
- _stats.setHasMaximum(true);
- _stats.setMinimum(tsStats.getMinimum());
- _stats.setMaximum(tsStats.getMaximum());
- _minimumNanos = tsStats.getMinimumNanos();
- _maximumNanos = tsStats.getMaximumNanos();
- } else {
- if (tsStats.getMaximum() >= _stats.getMaximum()) {
- if (tsStats.getMaximum() > _stats.getMaximum() ||
- tsStats.getMaximumNanos() > _maximumNanos) {
- _maximumNanos = tsStats.getMaximumNanos();
- }
- _stats.setMaximum(tsStats.getMaximum());
- }
- if (tsStats.getMinimum() <= _stats.getMinimum()) {
- if (tsStats.getMinimum() < _stats.getMinimum() ||
- tsStats.getMinimumNanos() < _minimumNanos) {
- _minimumNanos = tsStats.getMinimumNanos();
- }
- _stats.setMinimum(tsStats.getMinimum());
- }
- }
- }
- }
- void reset() override {
- _stats.reset();
- _minimumNanos = DEFAULT_MIN_NANOS;
- _maximumNanos = DEFAULT_MAX_NANOS;
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::TimestampStatistics* tsStats = pbStats.mutable_timestamp_statistics();
- if (_stats.hasMinimum()) {
- tsStats->set_minimum_utc(_stats.getMinimum());
- tsStats->set_maximum_utc(_stats.getMaximum());
- if (_minimumNanos != DEFAULT_MIN_NANOS) {
- tsStats->set_minimum_nanos(_minimumNanos + 1);
- }
- if (_maximumNanos != DEFAULT_MAX_NANOS) {
- tsStats->set_maximum_nanos(_maximumNanos + 1);
- }
- } else {
- tsStats->clear_minimum_utc();
- tsStats->clear_maximum_utc();
- tsStats->clear_minimum_nanos();
- tsStats->clear_maximum_nanos();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- struct tm tmValue;
- char timeBuffer[20];
- time_t secs = 0;
- buffer << "Data type: Timestamp" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasMinimum()) {
- secs = static_cast<time_t>(getMinimum() / 1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "Minimum: " << timeBuffer << "." << (getMinimum() % 1000) << std::endl;
- } else {
- buffer << "Minimum is not defined" << std::endl;
- }
- if (hasLowerBound()) {
- secs = static_cast<time_t>(getLowerBound() / 1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "LowerBound: " << timeBuffer << "." << (getLowerBound() % 1000) << std::endl;
- } else {
- buffer << "LowerBound is not defined" << std::endl;
- }
- if (hasMaximum()) {
- secs = static_cast<time_t>(getMaximum() / 1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "Maximum: " << timeBuffer << "." << (getMaximum() % 1000) << std::endl;
- } else {
- buffer << "Maximum is not defined" << std::endl;
- }
- if (hasUpperBound()) {
- secs = static_cast<time_t>(getUpperBound() / 1000);
- gmtime_r(&secs, &tmValue);
- strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue);
- buffer << "UpperBound: " << timeBuffer << "." << (getUpperBound() % 1000) << std::endl;
- } else {
- buffer << "UpperBound is not defined" << std::endl;
- }
- return buffer.str();
- }
- bool hasLowerBound() const override {
- return _hasLowerBound;
- }
- bool hasUpperBound() const override {
- return _hasUpperBound;
- }
- int64_t getLowerBound() const override {
- if (hasLowerBound()) {
- return _lowerBound;
- } else {
- throw ParseError("LowerBound is not defined.");
- }
- }
- int64_t getUpperBound() const override {
- if (hasUpperBound()) {
- return _upperBound;
- } else {
- throw ParseError("UpperBound is not defined.");
- }
- }
- int32_t getMinimumNanos() const override {
- if (hasMinimum()) {
- return _minimumNanos;
- } else {
- throw ParseError("Minimum is not defined.");
- }
- }
- int32_t getMaximumNanos() const override {
- if (hasMaximum()) {
- return _maximumNanos;
- } else {
- throw ParseError("Maximum is not defined.");
- }
- }
- };
- class CollectionColumnStatisticsImpl : public CollectionColumnStatistics,
- public MutableColumnStatistics {
- private:
- InternalCollectionStatistics _stats;
- public:
- CollectionColumnStatisticsImpl() {
- reset();
- }
- CollectionColumnStatisticsImpl(const proto::ColumnStatistics& stats);
- virtual ~CollectionColumnStatisticsImpl() override;
- bool hasMinimumChildren() const override {
- return _stats.hasMinimum();
- }
- bool hasMaximumChildren() const override {
- return _stats.hasMaximum();
- }
- bool hasTotalChildren() const override {
- return _stats.hasSum();
- }
- void increase(uint64_t count) override {
- _stats.setNumberOfValues(_stats.getNumberOfValues() + count);
- }
- uint64_t getNumberOfValues() const override {
- return _stats.getNumberOfValues();
- }
- void setNumberOfValues(uint64_t value) override {
- _stats.setNumberOfValues(value);
- }
- bool hasNull() const override {
- return _stats.hasNull();
- }
- void setHasNull(bool hasNull) override {
- _stats.setHasNull(hasNull);
- }
- uint64_t getMinimumChildren() const override {
- if (hasMinimumChildren()) {
- return _stats.getMinimum();
- } else {
- throw ParseError("MinimumChildren is not defined.");
- }
- }
- uint64_t getMaximumChildren() const override {
- if (hasMaximumChildren()) {
- return _stats.getMaximum();
- } else {
- throw ParseError("MaximumChildren is not defined.");
- }
- }
- uint64_t getTotalChildren() const override {
- if (hasTotalChildren()) {
- return _stats.getSum();
- } else {
- throw ParseError("TotalChildren is not defined.");
- }
- }
- void setMinimumChildren(uint64_t minimum) override {
- _stats.setHasMinimum(true);
- _stats.setMinimum(minimum);
- }
- void setMaximumChildren(uint64_t maximum) override {
- _stats.setHasMaximum(true);
- _stats.setMaximum(maximum);
- }
- void setTotalChildren(uint64_t sum) override {
- _stats.setHasSum(true);
- _stats.setSum(sum);
- }
- void setHasTotalChildren(bool hasSum) override {
- _stats.setHasSum(hasSum);
- }
- void merge(const MutableColumnStatistics& other) override {
- const CollectionColumnStatisticsImpl& collectionStats =
- dynamic_cast<const CollectionColumnStatisticsImpl&>(other);
- _stats.merge(collectionStats._stats);
- // hasSumValue here means no overflow
- _stats.setHasSum(_stats.hasSum() && collectionStats.hasTotalChildren());
- if (_stats.hasSum()) {
- uint64_t oldSum = _stats.getSum();
- _stats.setSum(_stats.getSum() + collectionStats.getTotalChildren());
- if (oldSum > _stats.getSum()) {
- _stats.setHasSum(false);
- }
- }
- }
- void reset() override {
- _stats.reset();
- setTotalChildren(0);
- }
- void update(uint64_t value) {
- _stats.updateMinMax(value);
- if (_stats.hasSum()) {
- uint64_t oldSum = _stats.getSum();
- _stats.setSum(_stats.getSum() + value);
- if (oldSum > _stats.getSum()) {
- _stats.setHasSum(false);
- }
- }
- }
- void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
- pbStats.set_has_null(_stats.hasNull());
- pbStats.set_number_of_values(_stats.getNumberOfValues());
- proto::CollectionStatistics* collectionStats = pbStats.mutable_collection_statistics();
- if (_stats.hasMinimum()) {
- collectionStats->set_min_children(_stats.getMinimum());
- collectionStats->set_max_children(_stats.getMaximum());
- } else {
- collectionStats->clear_min_children();
- collectionStats->clear_max_children();
- }
- if (_stats.hasSum()) {
- collectionStats->set_total_children(_stats.getSum());
- } else {
- collectionStats->clear_total_children();
- }
- }
- std::string toString() const override {
- std::ostringstream buffer;
- buffer << "Data type: Collection(LIST|MAP)" << std::endl
- << "Values: " << getNumberOfValues() << std::endl
- << "Has null: " << (hasNull() ? "yes" : "no") << std::endl;
- if (hasMinimumChildren()) {
- buffer << "MinChildren: " << getMinimumChildren() << std::endl;
- } else {
- buffer << "MinChildren is not defined" << std::endl;
- }
- if (hasMaximumChildren()) {
- buffer << "MaxChildren: " << getMaximumChildren() << std::endl;
- } else {
- buffer << "MaxChildren is not defined" << std::endl;
- }
- if (hasTotalChildren()) {
- buffer << "TotalChildren: " << getTotalChildren() << std::endl;
- } else {
- buffer << "TotalChildren is not defined" << std::endl;
- }
- return buffer.str();
- }
- };
- ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
- const StatContext& statContext);
- class StatisticsImpl : public Statistics {
- private:
- std::vector<ColumnStatistics*> colStats;
- // DELIBERATELY NOT IMPLEMENTED
- StatisticsImpl(const StatisticsImpl&);
- StatisticsImpl& operator=(const StatisticsImpl&);
- public:
- StatisticsImpl(const proto::StripeStatistics& stripeStats, const StatContext& statContext);
- StatisticsImpl(const proto::Footer& footer, const StatContext& statContext);
- virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId) const override {
- return colStats[columnId];
- }
- virtual ~StatisticsImpl() override;
- uint32_t getNumberOfColumns() const override {
- return static_cast<uint32_t>(colStats.size());
- }
- };
- class StripeStatisticsImpl : public StripeStatistics {
- private:
- std::unique_ptr<StatisticsImpl> columnStats;
- std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > rowIndexStats;
- // DELIBERATELY NOT IMPLEMENTED
- StripeStatisticsImpl(const StripeStatisticsImpl&);
- StripeStatisticsImpl& operator=(const StripeStatisticsImpl&);
- public:
- StripeStatisticsImpl(const proto::StripeStatistics& stripeStats,
- std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
- const StatContext& statContext);
- virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId) const override {
- return columnStats->getColumnStatistics(columnId);
- }
- uint32_t getNumberOfColumns() const override {
- return columnStats->getNumberOfColumns();
- }
- virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
- uint32_t rowIndex) const override {
- // check id indices are valid
- return rowIndexStats[columnId][rowIndex].get();
- }
- virtual ~StripeStatisticsImpl() override;
- uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override {
- return static_cast<uint32_t>(rowIndexStats[columnId].size());
- }
- };
- /**
- * Create ColumnStatistics for writers
- * @param type of column
- * @return MutableColumnStatistics instances
- */
- std::unique_ptr<MutableColumnStatistics> createColumnStatistics(const Type& type);
- } // namespace orc
- #endif
|