topfreq.h 3.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #pragma once
  2. #include <yql/essentials/public/udf/udf_allocator.h>
  3. #include <yql/essentials/public/udf/udf_helpers.h>
  4. #include <yql/essentials/public/udf/udf_type_ops.h>
  5. #include <unordered_map>
  6. template <typename THash, typename TEquals>
  7. class TTopFreqBase {
  8. protected:
  9. using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod;
  10. using TUnboxedValue = NKikimr::NUdf::TUnboxedValue;
  11. using IValueBuilder = NKikimr::NUdf::IValueBuilder;
  12. using TVectorElement = std::pair<TUnboxedValue, ui64>;
  13. using TVectorType = std::vector<TVectorElement, NKikimr::NUdf::TStdAllocatorForUdf<TVectorElement>>;
  14. TVectorType Freqs_;
  15. std::unordered_map<TUnboxedValue, ui32, THash, TEquals, NKikimr::NUdf::TStdAllocatorForUdf<std::pair<const TUnboxedValue, ui32>>> Indices_;
  16. ui32 MinSize_ = 0;
  17. ui32 MaxSize_ = 0;
  18. void Add(const TTopFreqBase& otherCalc);
  19. void Update(const TUnboxedValuePod& key, const ui64 value);
  20. void TryCompress();
  21. void Compress(ui32 newSize, bool sort = false);
  22. TUnboxedValue Convert(const IValueBuilder* valueBuilder) const;
  23. protected:
  24. TTopFreqBase(THash hash, TEquals equals);
  25. void Init(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize);
  26. void Merge(const TTopFreqBase& TopFreq1, const TTopFreqBase& TopFreq2);
  27. void Deserialize(const TUnboxedValuePod& serialized);
  28. TUnboxedValue Serialize(const IValueBuilder* builder);
  29. TUnboxedValue Get(const IValueBuilder* builder, ui32 resultSize);
  30. void AddValue(const TUnboxedValuePod& value);
  31. };
  32. template <NKikimr::NUdf::EDataSlot Slot>
  33. class TTopFreqData
  34. : public TTopFreqBase<
  35. NKikimr::NUdf::TUnboxedValueHash<Slot>,
  36. NKikimr::NUdf::TUnboxedValueEquals<Slot>>
  37. {
  38. public:
  39. using TBase = TTopFreqBase<
  40. NKikimr::NUdf::TUnboxedValueHash<Slot>,
  41. NKikimr::NUdf::TUnboxedValueEquals<Slot>>;
  42. TTopFreqData(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize);
  43. TTopFreqData(const TTopFreqData& topFreq1, const TTopFreqData& topFreq2);
  44. TTopFreqData(const NKikimr::NUdf::TUnboxedValuePod& serialized);
  45. NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder);
  46. NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize);
  47. void AddValue(const NKikimr::NUdf::TUnboxedValuePod& value);
  48. };
  49. struct TGenericHash {
  50. NKikimr::NUdf::IHash::TPtr Hash;
  51. std::size_t operator()(const NKikimr::NUdf::TUnboxedValuePod& value) const {
  52. return Hash->Hash(value);
  53. }
  54. };
  55. struct TGenericEquals {
  56. NKikimr::NUdf::IEquate::TPtr Equate;
  57. bool operator()(
  58. const NKikimr::NUdf::TUnboxedValuePod& left,
  59. const NKikimr::NUdf::TUnboxedValuePod& right) const
  60. {
  61. return Equate->Equals(left, right);
  62. }
  63. };
  64. class TTopFreqGeneric
  65. : public TTopFreqBase<TGenericHash, TGenericEquals>
  66. {
  67. public:
  68. using TBase = TTopFreqBase<TGenericHash, TGenericEquals>;
  69. TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize,
  70. NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate);
  71. TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2,
  72. NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate);
  73. TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& serialized,
  74. NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate);
  75. NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder);
  76. NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize);
  77. void AddValue(const NKikimr::NUdf::TUnboxedValuePod& value);
  78. };