mkql_block_reader.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. #include "mkql_block_reader.h"
  2. #include <yql/essentials/minikql/mkql_string_util.h>
  3. #include <yql/essentials/minikql/mkql_node_builder.h>
  4. #include <yql/essentials/minikql/mkql_node_cast.h>
  5. #include <yql/essentials/public/udf/udf_type_inspection.h>
  6. #include <arrow/array/array_binary.h>
  7. #include <arrow/chunked_array.h>
  8. namespace NKikimr {
  9. namespace NMiniKQL {
  10. namespace {
  11. template <typename T, bool Nullable>
  12. class TFixedSizeBlockItemConverter : public IBlockItemConverter {
  13. public:
  14. NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
  15. Y_UNUSED(holderFactory);
  16. if constexpr (Nullable) {
  17. if (!item) {
  18. return {};
  19. }
  20. }
  21. return NUdf::TUnboxedValuePod(item.As<T>());
  22. }
  23. TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
  24. if constexpr (Nullable) {
  25. if (!value) {
  26. return {};
  27. }
  28. }
  29. return TBlockItem(value.Get<T>());
  30. }
  31. };
  32. template <bool Nullable>
  33. class TFixedSizeBlockItemConverter<NYql::NDecimal::TInt128, Nullable> : public IBlockItemConverter {
  34. public:
  35. NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
  36. Y_UNUSED(holderFactory);
  37. if constexpr (Nullable) {
  38. if (!item) {
  39. return {};
  40. }
  41. }
  42. return NUdf::TUnboxedValuePod(item.GetInt128());
  43. }
  44. TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
  45. if constexpr (Nullable) {
  46. if (!value) {
  47. return {};
  48. }
  49. }
  50. return TBlockItem(value.GetInt128());
  51. }
  52. };
  53. template <bool Nullable>
  54. class TResourceBlockItemConverter : public IBlockItemConverter {
  55. public:
  56. NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
  57. Y_UNUSED(holderFactory);
  58. if constexpr (Nullable) {
  59. if (!item) {
  60. return {};
  61. }
  62. }
  63. if (item.IsEmbedded()) {
  64. NUdf::TUnboxedValuePod embedded;
  65. std::memcpy(embedded.GetRawPtr(), item.GetRawPtr(), sizeof(NYql::NUdf::TUnboxedValuePod));
  66. return embedded;
  67. } else if (item.IsBoxed()) {
  68. return NYql::NUdf::TUnboxedValuePod(item.GetBoxed());
  69. } else {
  70. return NYql::NUdf::TUnboxedValuePod(item.AsStringValue());
  71. }
  72. }
  73. TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
  74. if constexpr (Nullable) {
  75. if (!value) {
  76. return {};
  77. }
  78. }
  79. if (value.IsEmbedded()) {
  80. TBlockItem embedded;
  81. std::memcpy(embedded.GetRawPtr(), value.GetRawPtr(), sizeof(TBlockItem));
  82. return embedded;
  83. } else if (value.IsBoxed()) {
  84. return TBlockItem(value.AsBoxed());
  85. } else {
  86. return TBlockItem(value.AsStringValue());
  87. }
  88. }
  89. };
  90. template<typename TStringType, bool Nullable, NUdf::EPgStringType PgString>
  91. class TStringBlockItemConverter : public IBlockItemConverter {
  92. public:
  93. void SetPgBuilder(const NUdf::IPgBuilder* pgBuilder, ui32 pgTypeId, i32 typeLen) {
  94. Y_ENSURE(PgString != NUdf::EPgStringType::None);
  95. PgBuilder = pgBuilder;
  96. PgTypeId = pgTypeId;
  97. TypeLen = typeLen;
  98. }
  99. NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
  100. Y_UNUSED(holderFactory);
  101. if constexpr (Nullable) {
  102. if (!item) {
  103. return {};
  104. }
  105. }
  106. if constexpr (PgString == NUdf::EPgStringType::CString) {
  107. return PgBuilder->MakeCString(item.AsStringRef().Data() + sizeof(void*)).Release();
  108. } else if constexpr (PgString == NUdf::EPgStringType::Text) {
  109. return PgBuilder->MakeText(item.AsStringRef().Data() + sizeof(void*)).Release();
  110. } else if constexpr (PgString == NUdf::EPgStringType::Fixed) {
  111. auto str = item.AsStringRef().Data() + sizeof(void*);
  112. auto len = item.AsStringRef().Size() - sizeof(void*);
  113. Y_DEBUG_ABORT_UNLESS(ui32(TypeLen) <= len);
  114. return PgBuilder->NewString(TypeLen, PgTypeId, NUdf::TStringRef(str, TypeLen)).Release();
  115. } else {
  116. return MakeString(item.AsStringRef());
  117. }
  118. }
  119. TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
  120. if constexpr (Nullable) {
  121. if (!value) {
  122. return {};
  123. }
  124. }
  125. if constexpr (PgString == NUdf::EPgStringType::CString) {
  126. auto buf = PgBuilder->AsCStringBuffer(value);
  127. return TBlockItem(NYql::NUdf::TStringRef(buf.Data() - sizeof(void*), buf.Size() + sizeof(void*)));
  128. } else if constexpr (PgString == NUdf::EPgStringType::Text) {
  129. auto buf = PgBuilder->AsTextBuffer(value);
  130. return TBlockItem(NYql::NUdf::TStringRef(buf.Data() - sizeof(void*), buf.Size() + sizeof(void*)));
  131. } else if constexpr (PgString == NUdf::EPgStringType::Fixed) {
  132. auto buf = PgBuilder->AsFixedStringBuffer(value, (ui32)TypeLen);
  133. return TBlockItem(NYql::NUdf::TStringRef(buf.Data() - sizeof(void*), buf.Size() + sizeof(void*)));
  134. } else {
  135. return TBlockItem(value.AsStringRef());
  136. }
  137. }
  138. private:
  139. const NUdf::IPgBuilder* PgBuilder = nullptr;
  140. ui32 PgTypeId = 0;
  141. i32 TypeLen = 0;
  142. };
  143. template <bool Nullable>
  144. class TTupleBlockItemConverter : public IBlockItemConverter {
  145. public:
  146. TTupleBlockItemConverter(TVector<std::unique_ptr<IBlockItemConverter>>&& children)
  147. : Children(std::move(children))
  148. {
  149. Items.resize(Children.size());
  150. Unboxed.resize(Children.size());
  151. }
  152. NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
  153. if constexpr (Nullable) {
  154. if (!item) {
  155. return {};
  156. }
  157. }
  158. NUdf::TUnboxedValue* values;
  159. auto result = holderFactory.CreateDirectArrayHolder(Children.size(), values);
  160. const TBlockItem* childItems = item.AsTuple();
  161. for (ui32 i = 0; i < Children.size(); ++i) {
  162. values[i] = Children[i]->MakeValue(childItems[i], holderFactory);
  163. }
  164. return result;
  165. }
  166. TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
  167. if constexpr (Nullable) {
  168. if (!value) {
  169. return {};
  170. }
  171. }
  172. auto elements = value.GetElements();
  173. if (!elements) {
  174. for (ui32 i = 0; i < Children.size(); ++i) {
  175. Unboxed[i] = value.GetElement(i);
  176. }
  177. elements = Unboxed.data();
  178. }
  179. for (ui32 i = 0; i < Children.size(); ++i) {
  180. Items[i] = Children[i]->MakeItem(elements[i]);
  181. }
  182. return TBlockItem{ Items.data() };
  183. }
  184. private:
  185. const TVector<std::unique_ptr<IBlockItemConverter>> Children;
  186. mutable TVector<NUdf::TUnboxedValue> Unboxed;
  187. mutable TVector<TBlockItem> Items;
  188. };
  189. template <typename TTzDate, bool Nullable>
  190. class TTzDateBlockItemConverter : public IBlockItemConverter {
  191. public:
  192. using TLayout = typename NYql::NUdf::TDataType<TTzDate>::TLayout;
  193. NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
  194. Y_UNUSED(holderFactory);
  195. if constexpr (Nullable) {
  196. if (!item) {
  197. return {};
  198. }
  199. }
  200. NUdf::TUnboxedValuePod value {item.Get<TLayout>()};
  201. value.SetTimezoneId(item.GetTimezoneId());
  202. return value;
  203. }
  204. TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
  205. if constexpr (Nullable) {
  206. if (!value) {
  207. return {};
  208. }
  209. }
  210. TBlockItem item {value.Get<TLayout>()};
  211. item.SetTimezoneId(value.GetTimezoneId());
  212. return item;
  213. }
  214. };
  215. class TExternalOptionalBlockItemConverter : public IBlockItemConverter {
  216. public:
  217. TExternalOptionalBlockItemConverter(std::unique_ptr<IBlockItemConverter>&& inner)
  218. : Inner(std::move(inner))
  219. {}
  220. NUdf::TUnboxedValuePod MakeValue(TBlockItem item, const THolderFactory& holderFactory) const final {
  221. if (!item) {
  222. return {};
  223. }
  224. return Inner->MakeValue(item.GetOptionalValue(), holderFactory).MakeOptional();
  225. }
  226. TBlockItem MakeItem(const NUdf::TUnboxedValuePod& value) const final {
  227. if (!value) {
  228. return {};
  229. }
  230. return Inner->MakeItem(value.GetOptionalValue()).MakeOptional();
  231. }
  232. private:
  233. const std::unique_ptr<IBlockItemConverter> Inner;
  234. };
  235. struct TConverterTraits {
  236. using TResult = IBlockItemConverter;
  237. template <bool Nullable>
  238. using TTuple = TTupleBlockItemConverter<Nullable>;
  239. template <typename T, bool Nullable>
  240. using TFixedSize = TFixedSizeBlockItemConverter<T, Nullable>;
  241. template <typename TStringType, bool Nullable, NUdf::EDataSlot TOriginal = NUdf::EDataSlot::String, NUdf::EPgStringType PgString = NUdf::EPgStringType::None>
  242. using TStrings = TStringBlockItemConverter<TStringType, Nullable, PgString>;
  243. using TExtOptional = TExternalOptionalBlockItemConverter;
  244. template<typename TTzDate, bool Nullable>
  245. using TTzDateConverter = TTzDateBlockItemConverter<TTzDate, Nullable>;
  246. constexpr static bool PassType = false;
  247. static std::unique_ptr<TResult> MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder) {
  248. if (desc.PassByValue) {
  249. return std::make_unique<TFixedSize<ui64, true>>();
  250. } else {
  251. if (desc.Typelen == -1) {
  252. auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EDataSlot::String, NUdf::EPgStringType::Text>>();
  253. ret->SetPgBuilder(pgBuilder, desc.TypeId, desc.Typelen);
  254. return ret;
  255. } else if (desc.Typelen == -2) {
  256. auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EDataSlot::String, NUdf::EPgStringType::CString>>();
  257. ret->SetPgBuilder(pgBuilder, desc.TypeId, desc.Typelen);
  258. return ret;
  259. } else {
  260. auto ret = std::make_unique<TStrings<arrow::BinaryType, true, NUdf::EDataSlot::String, NUdf::EPgStringType::Fixed>>();
  261. ret->SetPgBuilder(pgBuilder, desc.TypeId, desc.Typelen);
  262. return ret;
  263. }
  264. }
  265. }
  266. static std::unique_ptr<TResult> MakeResource(bool isOptional) {
  267. Y_UNUSED(isOptional);
  268. if (isOptional) {
  269. return std::make_unique<TResourceBlockItemConverter<true>>();
  270. } else {
  271. return std::make_unique<TResourceBlockItemConverter<false>>();
  272. }
  273. }
  274. template<typename TTzDate>
  275. static std::unique_ptr<TResult> MakeTzDate(bool isOptional) {
  276. if (isOptional) {
  277. return std::make_unique<TTzDateConverter<TTzDate, true>>();
  278. } else {
  279. return std::make_unique<TTzDateConverter<TTzDate, false>>();
  280. }
  281. }
  282. };
  283. } // namespace
  284. std::unique_ptr<IBlockItemConverter> MakeBlockItemConverter(const NYql::NUdf::ITypeInfoHelper& typeInfoHelper, const NYql::NUdf::TType* type, const NUdf::IPgBuilder& pgBuilder) {
  285. return NYql::NUdf::DispatchByArrowTraits<TConverterTraits>(typeInfoHelper, type, &pgBuilder);
  286. }
  287. } // namespace NMiniKQL
  288. } // namespace NKikimr