arrow_util.h 6.6 KB


  1. #pragma once
  2. #include "arrow_defs.h"
  3. #include <arrow/array/data.h>
  4. #include <arrow/buffer_builder.h>
  5. #include <arrow/datum.h>
  6. #include <arrow/scalar.h>
  7. #include <arrow/util/bitmap.h>
  8. #include <yql/essentials/minikql/mkql_node.h>
  9. #include <yql/essentials/minikql/arrow/mkql_bit_utils.h>
  10. #include <yql/essentials/public/udf/arrow/util.h>
  11. namespace NKikimr::NMiniKQL {
  12. using NYql::NUdf::DeepSlice;
  13. using NYql::NUdf::Chop;
  14. /// \brief Remove optional from `data` as new ArrayData object
  15. std::shared_ptr<arrow::ArrayData> Unwrap(const arrow::ArrayData& data, TType* itemType);
  16. using NYql::NUdf::AllocateBitmapWithReserve;
  17. using NYql::NUdf::MakeDenseBitmap;
  18. using NYql::NUdf::MakeDenseBitmapCopy;
  19. using NYql::NUdf::MakeDenseFalseBitmap;
  20. inline arrow::internal::Bitmap GetBitmap(const arrow::ArrayData& arr, int index) {
  21. return arrow::internal::Bitmap{ arr.buffers[index], arr.offset, arr.length };
  22. }
  23. using NYql::NUdf::ForEachArrayData;
  24. using NYql::NUdf::MakeArray;
  25. template <typename T>
  26. T GetPrimitiveScalarValue(const arrow::Scalar& scalar) {
  27. return *static_cast<const T*>(dynamic_cast<const arrow::internal::PrimitiveScalarBase&>(scalar).data());
  28. }
  29. inline const void* GetPrimitiveScalarValuePtr(const arrow::Scalar& scalar) {
  30. return dynamic_cast<const arrow::internal::PrimitiveScalarBase&>(scalar).data();
  31. }
  32. inline void* GetPrimitiveScalarValueMutablePtr(arrow::Scalar& scalar) {
  33. return dynamic_cast<arrow::internal::PrimitiveScalarBase&>(scalar).mutable_data();
  34. }
  35. inline std::string_view GetStringScalarValue(const arrow::Scalar& scalar) {
  36. const auto& base = dynamic_cast<const arrow::BaseBinaryScalar&>(scalar);
  37. return std::string_view{reinterpret_cast<const char*>(base.value->data()), static_cast<size_t>(base.value->size())};
  38. }
  39. inline arrow::Datum MakeUint8Array(arrow::MemoryPool* pool, ui8 value, int64_t len) {
  40. std::shared_ptr<arrow::Buffer> data = ARROW_RESULT(arrow::AllocateBuffer(len, pool));
  41. std::memset(data->mutable_data(), value, len);
  42. return arrow::ArrayData::Make(arrow::uint8(), len, { std::shared_ptr<arrow::Buffer>{}, data });
  43. }
  44. inline arrow::Datum MakeFalseArray(arrow::MemoryPool* pool, int64_t len) {
  45. return MakeUint8Array(pool, 0, len);
  46. }
  47. inline arrow::Datum MakeTrueArray(arrow::MemoryPool* pool, int64_t len) {
  48. return MakeUint8Array(pool, 1, len);
  49. }
  50. inline arrow::Datum MakeBitmapArray(arrow::MemoryPool* pool, int64_t len, int64_t offset, const ui8* bitmap) {
  51. std::shared_ptr<arrow::Buffer> data = ARROW_RESULT(arrow::AllocateBuffer(len, pool));
  52. DecompressToSparseBitmap(data->mutable_data(), bitmap, offset, len);
  53. return arrow::ArrayData::Make(arrow::uint8(), len, { std::shared_ptr<arrow::Buffer>{}, data });
  54. }
  55. template<typename T>
  56. struct TPrimitiveDataType;
  57. template<>
  58. struct TPrimitiveDataType<bool> {
  59. using TLayout = ui8;
  60. using TArithmetic = ui8;
  61. using TResult = arrow::UInt8Type;
  62. using TScalarResult = arrow::UInt8Scalar;
  63. };
  64. template<>
  65. struct TPrimitiveDataType<i8> {
  66. using TLayout = i8;
  67. using TArithmetic = i8;
  68. using TResult = arrow::Int8Type;
  69. using TScalarResult = arrow::Int8Scalar;
  70. };
  71. template<>
  72. struct TPrimitiveDataType<ui8> {
  73. using TLayout = ui8;
  74. using TArithmetic = ui8;
  75. using TResult = arrow::UInt8Type;
  76. using TScalarResult = arrow::UInt8Scalar;
  77. };
  78. template<>
  79. struct TPrimitiveDataType<i16> {
  80. using TLayout = i16;
  81. using TArithmetic = i16;
  82. using TResult = arrow::Int16Type;
  83. using TScalarResult = arrow::Int16Scalar;
  84. };
  85. template<>
  86. struct TPrimitiveDataType<ui16> {
  87. using TLayout = ui16;
  88. using TArithmetic = ui16;
  89. using TResult = arrow::UInt16Type;
  90. using TScalarResult = arrow::UInt16Scalar;
  91. };
  92. template<>
  93. struct TPrimitiveDataType<i32> {
  94. using TLayout = i32;
  95. using TArithmetic = i32;
  96. using TResult = arrow::Int32Type;
  97. using TScalarResult = arrow::Int32Scalar;
  98. };
  99. template<>
  100. struct TPrimitiveDataType<ui32> {
  101. using TLayout = ui32;
  102. using TArithmetic = ui32;
  103. using TResult = arrow::UInt32Type;
  104. using TScalarResult = arrow::UInt32Scalar;
  105. };
  106. template<>
  107. struct TPrimitiveDataType<i64> {
  108. using TLayout = i64;
  109. using TArithmetic = i64;
  110. using TResult = arrow::Int64Type;
  111. using TScalarResult = arrow::Int64Scalar;
  112. };
  113. template<>
  114. struct TPrimitiveDataType<ui64> {
  115. using TLayout = ui64;
  116. using TArithmetic = ui64;
  117. using TResult = arrow::UInt64Type;
  118. using TScalarResult = arrow::UInt64Scalar;
  119. };
  120. template<>
  121. struct TPrimitiveDataType<float> {
  122. using TLayout = float;
  123. using TArithmetic = float;
  124. using TResult = arrow::FloatType;
  125. using TScalarResult = arrow::FloatScalar;
  126. };
  127. template<>
  128. struct TPrimitiveDataType<double> {
  129. using TLayout = double;
  130. using TArithmetic = double;
  131. using TResult = arrow::DoubleType;
  132. using TScalarResult = arrow::DoubleScalar;
  133. };
  134. template<>
  135. struct TPrimitiveDataType<char*> {
  136. using TResult = arrow::BinaryType;
  137. using TScalarResult = arrow::BinaryScalar;
  138. };
  139. template<>
  140. struct TPrimitiveDataType<NYql::NUdf::TUtf8> {
  141. using TResult = arrow::StringType;
  142. using TScalarResult = arrow::StringScalar;
  143. };
  144. template<>
  145. struct TPrimitiveDataType<NYql::NDecimal::TInt128> {
  146. using TLayout = NYql::NDecimal::TInt128;
  147. using TArithmetic = NYql::NDecimal::TDecimal;
  148. class TResult: public arrow::FixedSizeBinaryType
  149. {
  150. public:
  151. TResult(): arrow::FixedSizeBinaryType(16)
  152. { }
  153. };
  154. class TScalarResult: public arrow::FixedSizeBinaryScalar
  155. {
  156. public:
  157. TScalarResult(std::shared_ptr<arrow::Buffer> value)
  158. : arrow::FixedSizeBinaryScalar(std::move(value), arrow::fixed_size_binary(16))
  159. { }
  160. TScalarResult()
  161. : arrow::FixedSizeBinaryScalar(arrow::fixed_size_binary(16))
  162. { }
  163. };
  164. };
  165. template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
  166. inline arrow::Datum MakeScalarDatum(T value) {
  167. return arrow::Datum(std::make_shared<typename TPrimitiveDataType<T>::TScalarResult>(value));
  168. }
  169. template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
  170. inline arrow::Datum MakeDefaultScalarDatum() {
  171. return MakeScalarDatum<T>({});
  172. }
  173. template <typename T>
  174. inline std::shared_ptr<arrow::DataType> GetPrimitiveDataType() {
  175. static std::shared_ptr<arrow::DataType> result = std::make_shared<typename TPrimitiveDataType<T>::TResult>();
  176. return result;
  177. }
  178. using NYql::NUdf::TTypedBufferBuilder;
  179. }
  180. namespace arrow {
  181. template <>
  182. struct TypeTraits<typename NKikimr::NMiniKQL::TPrimitiveDataType<NYql::NDecimal::TInt128>::TResult> {
  183. static inline std::shared_ptr<DataType> type_singleton() {
  184. return arrow::fixed_size_binary(16);
  185. }
  186. };
  187. }