#pragma once #include #include #include #include #include #include #include namespace NYql { enum EStatisticsType : ui32 { BaseTable, FilteredFactTable, ManyManyJoin }; enum EStorageType : ui32 { NA, RowStorage, ColumnStorage }; // Providers may subclass this struct to associate specific statistics, useful to // derive stats for higher-level operators in the plan. struct IProviderStatistics { virtual ~IProviderStatistics() {} }; struct TColumnStatistics { std::optional NumUniqueVals; std::optional HyperLogLog; std::shared_ptr CountMinSketch; TString Type; TColumnStatistics() {} }; /** * Optimizer Statistics struct records per-table and per-column statistics * for the current operator in the plan. Currently, only Nrows and Ncols are * recorded. * Cost is also included in statistics, as its updated concurrently with statistics * all of the time. */ struct TOptimizerStatistics { struct TKeyColumns : public TSimpleRefCount { TVector Data; TKeyColumns(const TVector& vec) : Data(vec) {} }; struct TSortColumns : public TSimpleRefCount { TVector Columns; TVector Aliases; TSortColumns(const TVector& cols, const TVector& aliases) : Columns(cols) , Aliases(aliases) {} }; struct TColumnStatMap : public TSimpleRefCount { THashMap Data; TColumnStatMap() {} TColumnStatMap(const THashMap& map) : Data(map) {} }; EStatisticsType Type = BaseTable; double Nrows = 0; int Ncols = 0; double ByteSize = 0; double Cost = 0; double Selectivity = 1.0; TIntrusivePtr KeyColumns; TIntrusivePtr ColumnStatistics; EStorageType StorageType = EStorageType::NA; TIntrusivePtr SortColumns; std::shared_ptr Specific; std::shared_ptr> Labels = {}; TOptimizerStatistics(TOptimizerStatistics&&) = default; TOptimizerStatistics& operator=(TOptimizerStatistics&&) = default; TOptimizerStatistics(const TOptimizerStatistics&) = default; TOptimizerStatistics& operator=(const TOptimizerStatistics&) = default; TOptimizerStatistics() = default; TOptimizerStatistics( EStatisticsType type, double nrows = 0.0, int ncols = 0, double byteSize = 0.0, double cost = 0.0, TIntrusivePtr keyColumns = {}, TIntrusivePtr columnMap = {}, EStorageType storageType = EStorageType::NA, std::shared_ptr specific = nullptr); TOptimizerStatistics& operator+=(const TOptimizerStatistics& other); bool Empty() const; friend std::ostream& operator<<(std::ostream& os, const TOptimizerStatistics& s); TString ToString() const; }; std::shared_ptr OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr& stats); }