#include #include #include #include #include #include using namespace NKikimr; using namespace NUdf; using namespace NKiwiAggr; namespace { #define REGISTER_METHOD_UDF(name) \ T##name, #define HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(XX) \ XX(GetSumAboveBound) \ XX(GetSumBelowBound) \ XX(CalcUpperBound) \ XX(CalcLowerBound) \ XX(CalcUpperBoundSafe) \ XX(CalcLowerBoundSafe) #define HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(XX) \ XX(GetSumInRange) #define HISTOGRAM_ALGORITHMS_MAP(XX) \ XX(AdaptiveDistance) \ XX(AdaptiveWeight) \ XX(AdaptiveWard) \ XX(BlockWeight) \ XX(BlockWard) #define HISTOGRAM_FUNCTION_MAP(XX, arg) \ XX(Create, arg) \ XX(AddValue, arg) \ XX(GetResult, arg) \ XX(Serialize, arg) \ XX(Deserialize, arg) \ XX(Merge, arg) #define DECLARE_HISTOGRAM_RESOURCE_NAME(name) extern const char name##HistogramResourceName[] = "Histogram." #name; HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) class TLinearHistogram: public TAdaptiveWardHistogram { public: TLinearHistogram(double step, double begin, double end) : TAdaptiveWardHistogram(1ULL << 24) , Step(step) , Begin(begin) , End(end) { } void Add(double value, double weight) override { if (value < Begin) { value = Begin; } else if (value > End) { value = End; } else { value = std::floor(value / Step + 0.5) * Step; } TAdaptiveWardHistogram::Add(value, weight); } void Add(const THistoRec&) override { Y_ABORT("Not implemented"); } protected: double Step; double Begin; double End; }; class TLogarithmicHistogram: public TLinearHistogram { public: TLogarithmicHistogram(double step, double begin, double end) : TLinearHistogram(step, begin, end) { } void Add(double value, double weight) override { double base = std::log(value) / std::log(Step); double prev = std::pow(Step, std::floor(base)); double next = std::pow(Step, std::ceil(base)); if (std::abs(value - next) > std::abs(value - prev)) { value = prev; } else { value = next; } if (value < Begin) { value = Begin; } else if (value > End) { value = End; } if (!std::isnan(value)) { TAdaptiveWardHistogram::Add(value, weight); } } void Add(const THistoRec&) override { Y_ABORT("Not implemented"); } }; template class THistogram_Create: public TBoxedValue { public: THistogram_Create(TSourcePosition pos) : Pos_(pos) {} typedef TBoxedResource THistogramResource; static const TStringRef& Name() { static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; static auto nameRef = TStringRef(name); return nameRef; } private: TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); THolder histogram(new THistogramResource(args[2].Get())); histogram->Get()->Add(args[0].Get(), args[1].Get()); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } public: static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(double, double, ui32)>(); if (!typesOnly) { builder.Implementation(new THistogram_Create(builder.GetSourcePosition())); } return true; } else { return false; } } private: TSourcePosition Pos_; }; template class THistogram_AddValue: public TBoxedValue { public: THistogram_AddValue(TSourcePosition pos) : Pos_(pos) {} typedef TBoxedResource THistogramResource; static const TStringRef& Name() { static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; static auto nameRef = TStringRef(name); return nameRef; } private: TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); THistogramResource* resource = static_cast(args[0].AsBoxed().Get()); resource->Get()->Add(args[1].Get(), args[2].Get()); return TUnboxedValuePod(args[0]); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } public: static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(TResource, double, double)>(); if (!typesOnly) { builder.Implementation(new THistogram_AddValue(builder.GetSourcePosition())); } return true; } else { return false; } } private: TSourcePosition Pos_; }; template class THistogram_Serialize: public TBoxedValue { public: THistogram_Serialize(TSourcePosition pos) : Pos_(pos) {} typedef TBoxedResource THistogramResource; static const TStringRef& Name() { static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; static auto nameRef = TStringRef(name); return nameRef; } private: TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { try { THistogram proto; TString result; static_cast(args[0].AsBoxed().Get())->Get()->ToProto(proto); Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); return valueBuilder->NewString(result); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } public: static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature)>(); if (!typesOnly) { builder.Implementation(new THistogram_Serialize(builder.GetSourcePosition())); } return true; } else { return false; } } private: TSourcePosition Pos_; }; template class THistogram_Deserialize: public TBoxedValue { public: THistogram_Deserialize(TSourcePosition pos) : Pos_(pos) {} typedef TBoxedResource THistogramResource; static const TStringRef& Name() { static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; static auto nameRef = TStringRef(name); return nameRef; } private: TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); THistogram proto; Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); THolder histogram(new THistogramResource(args[1].Get())); histogram->Get()->FromProto(proto); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } public: static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(char*, ui32)>(); if (!typesOnly) { builder.Implementation(new THistogram_Deserialize(builder.GetSourcePosition())); } return true; } else { return false; } } private: TSourcePosition Pos_; }; template class THistogram_Merge: public TBoxedValue { public: THistogram_Merge(TSourcePosition pos) : Pos_(pos) {} typedef TBoxedResource THistogramResource; static const TStringRef& Name() { static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; static auto nameRef = TStringRef(name); return nameRef; } private: TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); THistogram proto; static_cast(args[0].AsBoxed().Get())->Get()->ToProto(proto); static_cast(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); return TUnboxedValuePod(args[1]); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } public: static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(TResource, TResource)>(); if (!typesOnly) { builder.Implementation(new THistogram_Merge(builder.GetSourcePosition())); } return true; } else { return false; } } private: TSourcePosition Pos_; }; struct THistogramIndexes { static constexpr ui32 BinFieldsCount = 2U; static constexpr ui32 ResultFieldsCount = 5U; THistogramIndexes(IFunctionTypeInfoBuilder& builder) { const auto binStructType = builder.Struct(BinFieldsCount)->AddField("Position", &Position).AddField("Frequency", &Frequency).Build(); const auto binsList = builder.List()->Item(binStructType).Build(); ResultStructType = builder.Struct(ResultFieldsCount)->AddField("Kind", &Kind).AddField("Min", &Min).AddField("Max", &Max).AddField("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); } ui32 Kind; ui32 Min; ui32 Max; ui32 WeightsSum; ui32 Bins; ui32 Position; ui32 Frequency; TType* ResultStructType; }; template class THistogram_GetResult: public TBoxedValue { public: typedef TBoxedResource THistogramResource; THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) : HistogramIndexes(histogramIndexes) , Pos_(pos) { } static const TStringRef& Name() { static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; static auto nameRef = TStringRef(name); return nameRef; } private: TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { THistogram proto; auto histogram = static_cast(args[0].AsBoxed().Get())->Get(); histogram->ToProto(proto); auto size = proto.FreqSize(); TUnboxedValue* fields = nullptr; auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields); fields[HistogramIndexes.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); if (size) { TUnboxedValue* items = nullptr; fields[HistogramIndexes.Bins] = valueBuilder->NewArray(size, items); fields[HistogramIndexes.Min] = TUnboxedValuePod(static_cast(histogram->GetMinValue())); fields[HistogramIndexes.Max] = TUnboxedValuePod(static_cast(histogram->GetMaxValue())); fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(static_cast(histogram->GetSum())); for (ui64 i = 0; i < size; ++i) { TUnboxedValue* binFields = nullptr; *items++ = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields); binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(static_cast(proto.GetFreq(i))); binFields[HistogramIndexes.Position] = TUnboxedValuePod(static_cast(proto.GetPosition(i))); } } else { fields[HistogramIndexes.Bins] = valueBuilder->NewEmptyList(); fields[HistogramIndexes.Min] = TUnboxedValuePod(0.0); fields[HistogramIndexes.Max] = TUnboxedValuePod(0.0); fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(0.0); } return result; } public: static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); THistogramIndexes histogramIndexes(builder); builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); if (!typesOnly) { builder.Implementation(new THistogram_GetResult(histogramIndexes, builder.GetSourcePosition())); } return true; } else { return false; } } private: const THistogramIndexes HistogramIndexes; TSourcePosition Pos_; }; template <> TUnboxedValue THistogram_Create::Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { using THistogramResource = THistogram_Create::THistogramResource; try { Y_UNUSED(valueBuilder); THolder histogram(new THistogramResource( args[1].Get(), args[2].Get(), args[3].Get())); histogram->Get()->Add(args[0].Get(), 1.0); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } template <> bool THistogram_Create::DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(double, double, double, double)>(); if (!typesOnly) { builder.Implementation(new THistogram_Create(builder.GetSourcePosition())); } return true; } else { return false; } } template <> TUnboxedValue THistogram_Deserialize::Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { using THistogramResource = THistogram_Deserialize::THistogramResource; try { Y_UNUSED(valueBuilder); THistogram proto; Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); THolder histogram( new THistogramResource(args[1].Get(), args[2].Get(), args[3].Get())); histogram->Get()->FromProto(proto); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } template <> bool THistogram_Deserialize::DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(char*, double, double, double)>(); if (!typesOnly) { builder.Implementation(new THistogram_Deserialize(builder.GetSourcePosition())); } return true; } else { return false; } } template <> TUnboxedValue THistogram_Create::Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { using THistogramResource = THistogram_Create::THistogramResource; try { Y_UNUSED(valueBuilder); THolder histogram(new THistogramResource( args[1].Get(), args[2].Get(), args[3].Get())); histogram->Get()->Add(args[0].Get(), 1.0); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } template <> bool THistogram_Create::DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(double, double, double, double)>(); if (!typesOnly) { builder.Implementation(new THistogram_Create(builder.GetSourcePosition())); } return true; } else { return false; } } template <> TUnboxedValue THistogram_Deserialize::Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { using THistogramResource = THistogram_Deserialize::THistogramResource; try { Y_UNUSED(valueBuilder); THistogram proto; Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); THolder histogram( new THistogramResource(args[1].Get(), args[2].Get(), args[3].Get())); histogram->Get()->FromProto(proto); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } template <> bool THistogram_Deserialize::DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { builder.SimpleSignature(char*, double, double, double)>(); if (!typesOnly) { builder.Implementation(new THistogram_Deserialize(builder.GetSourcePosition())); } return true; } else { return false; } } class THistogramPrint: public TBoxedValue { public: THistogramPrint(const THistogramIndexes& histogramIndexes) : HistogramIndexes(histogramIndexes) { } static const TStringRef& Name() { static auto name = TStringRef::Of("Print"); return name; } TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto kind = args[0].GetElement(HistogramIndexes.Kind); auto bins = args[0].GetElement(HistogramIndexes.Bins); double min = args[0].GetElement(HistogramIndexes.Min).Get(); double max = args[0].GetElement(HistogramIndexes.Max).Get(); double weightsSum = args[0].GetElement(HistogramIndexes.WeightsSum).Get(); auto binsIterator = bins.GetListIterator(); TStringBuilder result; result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", bins.GetListLength(), weightsSum, min, max); double maxFrequency = 0.0; size_t maxPositionLength = 0; size_t maxFrequencyLength = 0; const ui8 bars = args[1].GetOrDefault(25); for (TUnboxedValue current; binsIterator.Next(current);) { if (bars) { double frequency = current.GetElement(HistogramIndexes.Frequency).Get(); if (frequency > maxFrequency) { maxFrequency = frequency; } } size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes.Position).Get()).length(); size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes.Frequency).Get()).length(); if (positionLength > maxPositionLength) { maxPositionLength = positionLength; } if (frequencyLength > maxFrequencyLength) { maxFrequencyLength = frequencyLength; } } binsIterator = bins.GetListIterator(); for (TUnboxedValue current; binsIterator.Next(current);) { double position = current.GetElement(HistogramIndexes.Position).Get(); double frequency = current.GetElement(HistogramIndexes.Frequency).Get(); result << "\n"; if (bars && maxFrequency > 0) { ui8 filledBars = static_cast(bars * frequency / maxFrequency); for (ui8 i = 0; i < bars; ++i) { if (i < filledBars) { result << "█"; } else { result << "░"; } } } result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); } return valueBuilder->NewString(result); } static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { THistogramIndexes histogramIndexes(builder); auto optionalUi8 = builder.Optional()->Item().Build(); builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns(); if (!typesOnly) { builder.Implementation(new THistogramPrint(histogramIndexes)); } builder.IsStrict(); return true; } else { return false; } } private: const THistogramIndexes HistogramIndexes; }; class THistogramToCumulativeDistributionFunction: public TBoxedValue { public: THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) : HistogramIndexes(histogramIndexes) { } static const TStringRef& Name() { static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); return name; } TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { TUnboxedValue* fields = nullptr; auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields); auto bins = args[0].GetElement(HistogramIndexes.Bins); double minValue = args[0].GetElement(HistogramIndexes.Min).Get(); double maxValue = args[0].GetElement(HistogramIndexes.Max).Get(); double sum = 0.0; double weightsSum = 0.0; std::vector resultBins; if (bins.HasFastListLength()) resultBins.reserve(bins.GetListLength()); const auto binsIterator = bins.GetListIterator(); for (TUnboxedValue current; binsIterator.Next(current);) { TUnboxedValue* binFields = nullptr; auto resultCurrent = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields); const auto frequency = current.GetElement(HistogramIndexes.Frequency).Get(); sum += frequency; weightsSum += sum; binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(sum); binFields[HistogramIndexes.Position] = current.GetElement(HistogramIndexes.Position); resultBins.emplace_back(std::move(resultCurrent)); } auto kind = args[0].GetElement(HistogramIndexes.Kind); fields[HistogramIndexes.Kind] = valueBuilder->AppendString(kind, "Cdf"); fields[HistogramIndexes.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); fields[HistogramIndexes.Max] = TUnboxedValuePod(maxValue); fields[HistogramIndexes.Min] = TUnboxedValuePod(minValue); fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(weightsSum); return result; } static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { THistogramIndexes histogramIndexes(builder); builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); if (!typesOnly) { builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); } builder.IsStrict(); return true; } else { return false; } } private: const THistogramIndexes HistogramIndexes; }; class THistogramNormalize: public TBoxedValue { public: THistogramNormalize(const THistogramIndexes& histogramIndexes) : HistogramIndexes(histogramIndexes) { } static const TStringRef& Name() { static auto name = TStringRef::Of("Normalize"); return name; } TUnboxedValue Run( const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { TUnboxedValue* fields = nullptr; auto result = valueBuilder->NewArray(HistogramIndexes.ResultFieldsCount, fields); auto bins = args[0].GetElement(HistogramIndexes.Bins); double minValue = args[0].GetElement(HistogramIndexes.Min).Get(); double maxValue = args[0].GetElement(HistogramIndexes.Max).Get(); double area = args[1].GetOrDefault(100.0); bool cdfNormalization = args[2].GetOrDefault(false); double sum = 0.0; double weightsSum = 0.0; double lastBinFrequency = 0.0; std::vector resultBins; if (bins.HasFastListLength()) resultBins.reserve(bins.GetListLength()); auto binsIterator = bins.GetListIterator(); for (TUnboxedValue current; binsIterator.Next(current);) { sum += current.GetElement(HistogramIndexes.Frequency).Get(); lastBinFrequency = current.GetElement(HistogramIndexes.Frequency).Get(); } binsIterator = bins.GetListIterator(); for (TUnboxedValue current; binsIterator.Next(current);) { TUnboxedValue* binFields = nullptr; auto resultCurrent = valueBuilder->NewArray(HistogramIndexes.BinFieldsCount, binFields); double frequency = current.GetElement(HistogramIndexes.Frequency).Get(); if (cdfNormalization) { frequency = area * frequency / lastBinFrequency; } else { frequency = area * frequency / sum; } weightsSum += frequency; binFields[HistogramIndexes.Frequency] = TUnboxedValuePod(frequency); binFields[HistogramIndexes.Position] = current.GetElement(HistogramIndexes.Position); resultBins.emplace_back(std::move(resultCurrent)); } TUnboxedValue kind = args[0].GetElement(HistogramIndexes.Kind); if (cdfNormalization) { kind = valueBuilder->AppendString(kind, "Cdf"); } fields[HistogramIndexes.Kind] = kind; fields[HistogramIndexes.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); fields[HistogramIndexes.Max] = TUnboxedValuePod(maxValue); fields[HistogramIndexes.Min] = TUnboxedValuePod(minValue); fields[HistogramIndexes.WeightsSum] = TUnboxedValuePod(weightsSum); return result; } static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { THistogramIndexes histogramIndexes(builder); auto optionalDouble = builder.Optional()->Item().Build(); auto optionalCdfNormalization = builder.Optional()->Item().Build(); builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); builder.OptionalArgs(1); builder.OptionalArgs(2); if (!typesOnly) { builder.Implementation(new THistogramNormalize(histogramIndexes)); } builder.IsStrict(); return true; } else { return false; } } private: const THistogramIndexes HistogramIndexes; }; template class THistogramMethodBase: public TBoxedValue { public: THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) : HistogramIndexes(histogramIndexes) , Pos_(pos) { } virtual TUnboxedValue GetResult( const THistogram& input, const TUnboxedValuePod* args) const = 0; TUnboxedValue Run( const IValueBuilder*, const TUnboxedValuePod* args) const override { try { auto bins = args[0].GetElement(HistogramIndexes.Bins); double min = args[0].GetElement(HistogramIndexes.Min).template Get(); double max = args[0].GetElement(HistogramIndexes.Max).template Get(); auto binsIterator = bins.GetListIterator(); THistogram histogram; histogram.SetType(HT_ADAPTIVE_HISTOGRAM); histogram.SetMinValue(min); histogram.SetMaxValue(max); for (TUnboxedValue current; binsIterator.Next(current);) { double frequency = current.GetElement(HistogramIndexes.Frequency).template Get(); double position = current.GetElement(HistogramIndexes.Position).template Get(); histogram.AddFreq(frequency); histogram.AddPosition(position); } return GetResult(histogram, args); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); } } static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { THistogramIndexes histogramIndexes(builder); if (twoArgs) { builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add().Add().Done().Returns(); } else { builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add().Done().Returns(); } return histogramIndexes; } protected: const THistogramIndexes HistogramIndexes; TSourcePosition Pos_; }; #define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ class T##name: public THistogramMethodBase { \ public: \ T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ : THistogramMethodBase(histogramIndexes, pos) { \ } \ static const TStringRef& Name() { \ static auto name = TStringRef::Of(#name); \ return name; \ } \ static bool DeclareSignature( \ const TStringRef& name, \ TType* userType, \ IFunctionTypeInfoBuilder& builder, \ bool typesOnly) { \ Y_UNUSED(userType); \ if (Name() == name) { \ const auto& histogramIndexes = DeclareSignatureBase(builder); \ if (!typesOnly) { \ builder.Implementation(new T##name(histogramIndexes, \ builder.GetSourcePosition())); \ } \ return true; \ } else { \ return false; \ } \ } \ TUnboxedValue GetResult( \ const THistogram& input, \ const TUnboxedValuePod* args) const override { \ TAdaptiveWardHistogram histo(input, input.FreqSize()); \ double result = histo.name(args[1].Get()); \ return TUnboxedValuePod(result); \ } \ }; #define DECLARE_TWO_DOUBLE_ARG_METHOD_UDF(name) \ class T##name: public THistogramMethodBase { \ public: \ T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ : THistogramMethodBase(histogramIndexes, pos) { \ } \ static const TStringRef& Name() { \ static auto name = TStringRef::Of(#name); \ return name; \ } \ static bool DeclareSignature( \ const TStringRef& name, \ TType* userType, \ IFunctionTypeInfoBuilder& builder, \ bool typesOnly) { \ Y_UNUSED(userType); \ if (Name() == name) { \ const auto& histogramIndexes = DeclareSignatureBase(builder); \ if (!typesOnly) { \ builder.Implementation(new T##name(histogramIndexes, \ builder.GetSourcePosition())); \ } \ return true; \ } else { \ return false; \ } \ } \ TUnboxedValue GetResult( \ const THistogram& input, \ const TUnboxedValuePod* args) const override { \ TAdaptiveWardHistogram histo(input, input.FreqSize()); \ double result = histo.name(args[1].Get(), args[2].Get()); \ return TUnboxedValuePod(result); \ } \ }; #define DECLARE_HISTOGRAM_UDF(functionName, histogramName) \ THistogram_##functionName, #define DECLARE_HISTOGRAM_UDFS(name) \ HISTOGRAM_FUNCTION_MAP(DECLARE_HISTOGRAM_UDF, name) HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) SIMPLE_MODULE(THistogramModule, HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) DECLARE_HISTOGRAM_UDFS(Linear) DECLARE_HISTOGRAM_UDFS(Logarithmic) THistogramPrint, THistogramNormalize, THistogramToCumulativeDistributionFunction) } REGISTER_MODULES(THistogramModule)