mkql_functions.cpp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. #include "mkql_functions.h"
  2. #include <yql/essentials/minikql/mkql_node_builder.h>
  3. #include <yql/essentials/minikql/mkql_node_cast.h>
  4. #include <yql/essentials/minikql/mkql_type_builder.h>
  5. #include <yql/essentials/minikql/mkql_function_metadata.h>
  6. #include <arrow/datum.h>
  7. #include <arrow/visitor.h>
  8. #include <arrow/compute/registry.h>
  9. #include <arrow/compute/function.h>
  10. #include <arrow/compute/cast.h>
  11. namespace NKikimr::NMiniKQL {
  12. bool ConvertInputArrowType(TType* blockType, arrow::ValueDescr& descr) {
  13. auto asBlockType = AS_TYPE(TBlockType, blockType);
  14. descr.shape = asBlockType->GetShape() == TBlockType::EShape::Scalar ? arrow::ValueDescr::SCALAR : arrow::ValueDescr::ARRAY;
  15. return ConvertArrowType(asBlockType->GetItemType(), descr.type);
  16. }
  17. class TOutputTypeVisitor : public arrow::TypeVisitor
  18. {
  19. public:
  20. TOutputTypeVisitor(TTypeEnvironment& env)
  21. : Env_(env)
  22. {}
  23. arrow::Status Visit(const arrow::BooleanType&) {
  24. SetDataType(NUdf::EDataSlot::Bool);
  25. return arrow::Status::OK();
  26. }
  27. arrow::Status Visit(const arrow::Int8Type&) {
  28. SetDataType(NUdf::EDataSlot::Int8);
  29. return arrow::Status::OK();
  30. }
  31. arrow::Status Visit(const arrow::UInt8Type&) {
  32. SetDataType(NUdf::EDataSlot::Uint8);
  33. return arrow::Status::OK();
  34. }
  35. arrow::Status Visit(const arrow::Int16Type&) {
  36. SetDataType(NUdf::EDataSlot::Int16);
  37. return arrow::Status::OK();
  38. }
  39. arrow::Status Visit(const arrow::UInt16Type&) {
  40. SetDataType(NUdf::EDataSlot::Uint16);
  41. return arrow::Status::OK();
  42. }
  43. arrow::Status Visit(const arrow::Int32Type&) {
  44. SetDataType(NUdf::EDataSlot::Int32);
  45. return arrow::Status::OK();
  46. }
  47. arrow::Status Visit(const arrow::UInt32Type&) {
  48. SetDataType(NUdf::EDataSlot::Uint32);
  49. return arrow::Status::OK();
  50. }
  51. arrow::Status Visit(const arrow::Int64Type&) {
  52. SetDataType(NUdf::EDataSlot::Int64);
  53. return arrow::Status::OK();
  54. }
  55. arrow::Status Visit(const arrow::UInt64Type&) {
  56. SetDataType(NUdf::EDataSlot::Uint64);
  57. return arrow::Status::OK();
  58. }
  59. TType* GetType() const {
  60. return Type_;
  61. }
  62. private:
  63. void SetDataType(NUdf::EDataSlot slot) {
  64. Type_ = TDataType::Create(NUdf::GetDataTypeInfo(slot).TypeId, Env_);
  65. }
  66. private:
  67. TTypeEnvironment& Env_;
  68. TType* Type_ = nullptr;
  69. };
  70. bool ConvertOutputArrowType(const arrow::compute::OutputType& outType, const std::vector<arrow::ValueDescr>& values,
  71. bool optional, TType*& outputType, TTypeEnvironment& env) {
  72. arrow::ValueDescr::Shape shape;
  73. std::shared_ptr<arrow::DataType> dataType;
  74. auto execContext = arrow::compute::ExecContext();
  75. auto kernelContext = arrow::compute::KernelContext(&execContext);
  76. auto descrRes = outType.Resolve(&kernelContext, values);
  77. if (!descrRes.ok()) {
  78. return false;
  79. }
  80. const auto& descr = *descrRes;
  81. dataType = descr.type;
  82. shape = descr.shape;
  83. TOutputTypeVisitor visitor(env);
  84. if (!dataType->Accept(&visitor).ok()) {
  85. return false;
  86. }
  87. TType* itemType = visitor.GetType();
  88. if (optional) {
  89. itemType = TOptionalType::Create(itemType, env);
  90. }
  91. switch (shape) {
  92. case arrow::ValueDescr::SCALAR:
  93. outputType = TBlockType::Create(itemType, TBlockType::EShape::Scalar, env);
  94. return true;
  95. case arrow::ValueDescr::ARRAY:
  96. outputType = TBlockType::Create(itemType, TBlockType::EShape::Many, env);
  97. return true;
  98. default:
  99. return false;
  100. }
  101. }
  102. bool FindArrowFunction(TStringBuf name, const TArrayRef<TType*>& inputTypes, TType* outputType, const IBuiltinFunctionRegistry& registry) {
  103. bool hasOptionals = false;
  104. bool many = false;
  105. std::vector<NUdf::TDataTypeId> argTypes;
  106. for (const auto& t : inputTypes) {
  107. auto asBlockType = AS_TYPE(TBlockType, t);
  108. if (asBlockType->GetShape() == TBlockType::EShape::Many) {
  109. many = true;
  110. }
  111. bool isOptional;
  112. auto baseType = UnpackOptional(asBlockType->GetItemType(), isOptional);
  113. if (!baseType->IsData()) {
  114. return false;
  115. }
  116. hasOptionals = hasOptionals || isOptional;
  117. argTypes.push_back(AS_TYPE(TDataType, baseType)->GetSchemeType());
  118. }
  119. NUdf::TDataTypeId returnType;
  120. bool returnIsOptional;
  121. {
  122. auto asBlockType = AS_TYPE(TBlockType, outputType);
  123. MKQL_ENSURE(many ^ (asBlockType->GetShape() == TBlockType::EShape::Scalar), "Output shape is inconsistent with input shapes");
  124. auto baseType = UnpackOptional(asBlockType->GetItemType(), returnIsOptional);
  125. if (!baseType->IsData()) {
  126. return false;
  127. }
  128. returnType = AS_TYPE(TDataType, baseType)->GetSchemeType();
  129. }
  130. auto kernel = registry.FindKernel(name, argTypes.data(), argTypes.size(), returnType);
  131. if (!kernel) {
  132. return false;
  133. }
  134. bool match = false;
  135. switch (kernel->NullMode) {
  136. case TKernel::ENullMode::Default:
  137. match = returnIsOptional == hasOptionals;
  138. break;
  139. case TKernel::ENullMode::AlwaysNull:
  140. match = returnIsOptional;
  141. break;
  142. case TKernel::ENullMode::AlwaysNotNull:
  143. match = !returnIsOptional;
  144. break;
  145. }
  146. return match;
  147. }
  148. bool HasArrowCast(TType* from, TType* to) {
  149. std::shared_ptr<arrow::DataType> fromArrowType, toArrowType;
  150. if (!ConvertArrowType(from, fromArrowType)) {
  151. return false;
  152. }
  153. if (!ConvertArrowType(to, toArrowType)) {
  154. return false;
  155. }
  156. return arrow::compute::CanCast(*fromArrowType, *toArrowType);
  157. }
  158. }