mkql_builtins.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #include "mkql_builtins.h"
  2. #include "mkql_builtins_impl.h" // Y_IGNORE
  3. #include "mkql_builtins_compare.h"
  4. #include "mkql_builtins_string_kernels.h"
  5. #include <yql/essentials/minikql/arrow/arrow_defs.h>
  6. #include <util/digest/murmur.h>
  7. #include <util/generic/yexception.h>
  8. #include <util/generic/maybe.h>
  9. #include <algorithm>
  10. #include <arrow/compute/registry.h>
  11. #include <arrow/compute/registry_internal.h>
  12. namespace NKikimr {
  13. namespace NMiniKQL {
  14. namespace {
  15. void RegisterDefaultOperations(IBuiltinFunctionRegistry& registry, TKernelFamilyMap& kernelFamilyMap) {
  16. RegisterAdd(registry);
  17. RegisterAdd(kernelFamilyMap);
  18. RegisterAggrAdd(registry);
  19. RegisterSub(registry);
  20. RegisterSub(kernelFamilyMap);
  21. RegisterMul(registry);
  22. RegisterMul(kernelFamilyMap);
  23. RegisterDiv(registry);
  24. RegisterDiv(kernelFamilyMap);
  25. RegisterMod(registry);
  26. RegisterMod(kernelFamilyMap);
  27. RegisterIncrement(registry);
  28. RegisterDecrement(registry);
  29. RegisterBitAnd(registry);
  30. RegisterBitOr(registry);
  31. RegisterBitXor(registry);
  32. RegisterShiftLeft(registry);
  33. RegisterShiftRight(registry);
  34. RegisterRotLeft(registry);
  35. RegisterRotRight(registry);
  36. RegisterPlus(registry);
  37. RegisterMinus(registry);
  38. RegisterMinus(kernelFamilyMap);
  39. RegisterBitNot(registry);
  40. RegisterCountBits(registry);
  41. RegisterAbs(registry);
  42. RegisterAbs(kernelFamilyMap);
  43. RegisterConvert(registry);
  44. RegisterConcat(registry);
  45. RegisterSubstring(registry);
  46. RegisterFind(registry);
  47. RegisterWith(registry);
  48. RegisterWith(kernelFamilyMap);
  49. RegisterInversePresortString(registry);
  50. RegisterInverseString(registry);
  51. RegisterNanvl(registry);
  52. RegisterByteAt(registry);
  53. RegisterMax(registry);
  54. RegisterMin(registry);
  55. RegisterAggrMax(registry);
  56. RegisterAggrMin(registry);
  57. RegisterEquals(registry);
  58. RegisterEquals(kernelFamilyMap);
  59. RegisterNotEquals(registry);
  60. RegisterNotEquals(kernelFamilyMap);
  61. RegisterLess(registry);
  62. RegisterLess(kernelFamilyMap);
  63. RegisterLessOrEqual(registry);
  64. RegisterLessOrEqual(kernelFamilyMap);
  65. RegisterGreater(registry);
  66. RegisterGreater(kernelFamilyMap);
  67. RegisterGreaterOrEqual(registry);
  68. RegisterGreaterOrEqual(kernelFamilyMap);
  69. // Size is missing in registry
  70. RegisterSizeBuiltin(kernelFamilyMap);
  71. }
  72. void PrintType(NUdf::TDataTypeId schemeType, bool isOptional, IOutputStream& out)
  73. {
  74. const auto slot = NUdf::FindDataSlot(schemeType);
  75. out << (slot ? NUdf::GetDataTypeInfo(*slot).Name : "unknown");
  76. if (isOptional) {
  77. out << '?';
  78. }
  79. }
  80. void PrintFunctionSignature(
  81. const std::string_view& funcName,
  82. const TFunctionDescriptor& desc,
  83. IOutputStream& out)
  84. {
  85. const auto* param = desc.ResultAndArgs;
  86. out << '\t';
  87. // print results type
  88. PrintType(param->SchemeType, param->IsNullable(), out);
  89. ++param;
  90. // print function name and args types
  91. out << ' ' << funcName << '(';
  92. while (param->SchemeType != 0) {
  93. PrintType(param->SchemeType, param->IsNullable(), out);
  94. ++param;
  95. if (param->SchemeType != 0) {
  96. out << ", ";
  97. }
  98. }
  99. out << ')';
  100. }
  101. bool IsArgumentsMatch(
  102. const TFunctionParamMetadata* paramsMetadata,
  103. const std::pair<NUdf::TDataTypeId, bool>* argTypes, size_t argTypesCount)
  104. {
  105. size_t index = 0;
  106. while (paramsMetadata->SchemeType) {
  107. if (index >= argTypesCount) {
  108. return false;
  109. }
  110. if (argTypes[index].first != paramsMetadata->SchemeType) {
  111. return false;
  112. }
  113. if (argTypes[index].second != paramsMetadata->IsNullable()) {
  114. return false;
  115. }
  116. ++paramsMetadata;
  117. ++index;
  118. }
  119. return index == argTypesCount;
  120. }
  121. //////////////////////////////////////////////////////////////////////////////
  122. // TBuiltinFunctionRegistry
  123. //////////////////////////////////////////////////////////////////////////////
  124. class TBuiltinFunctionRegistry: public IBuiltinFunctionRegistry
  125. {
  126. public:
  127. TBuiltinFunctionRegistry();
  128. private:
  129. TFunctionDescriptor GetBuiltin(const std::string_view& name,
  130. const std::pair<NUdf::TDataTypeId, bool>* argTypes, size_t argTypesCount) const final;
  131. bool HasBuiltin(const std::string_view& name) const final;
  132. ui64 GetMetadataEtag() const final;
  133. void PrintInfoTo(IOutputStream& out) const final;
  134. void Register(const std::string_view& name, const TFunctionDescriptor& description) final;
  135. void RegisterAll(TFunctionsMap&& functions, TFunctionParamMetadataList&& arguments) final;
  136. const TFunctionsMap& GetFunctions() const final;
  137. void CalculateMetadataEtag();
  138. std::optional<TFunctionDescriptor> FindBuiltin(const std::string_view& name, const std::pair<NUdf::TDataTypeId, bool>* argTypes, size_t argTypesCount) const;
  139. const TDescriptionList& FindCandidates(const std::string_view& name) const;
  140. const TKernel* FindKernel(const std::string_view& name, const NUdf::TDataTypeId* argTypes, size_t argTypesCount, NUdf::TDataTypeId returnType) const final;
  141. void RegisterKernelFamily(const std::string_view& name, std::unique_ptr<TKernelFamily>&& family) final;
  142. TVector<std::pair<TString, const TKernelFamily*>> GetAllKernelFamilies() const final;
  143. TFunctionsMap Functions;
  144. TFunctionParamMetadataList ArgumentsMetadata;
  145. std::optional<ui64> MetadataEtag;
  146. TKernelFamilyMap KernelFamilyMap;
  147. };
  148. TBuiltinFunctionRegistry::TBuiltinFunctionRegistry()
  149. {
  150. RegisterDefaultOperations(*this, KernelFamilyMap);
  151. CalculateMetadataEtag();
  152. }
  153. void TBuiltinFunctionRegistry::Register(const std::string_view& name, const TFunctionDescriptor& description)
  154. {
  155. Functions[TString(name)].push_back(description);
  156. }
  157. void TBuiltinFunctionRegistry::RegisterAll(TFunctionsMap&& functions, TFunctionParamMetadataList&& arguments)
  158. {
  159. Functions = std::move(functions);
  160. ArgumentsMetadata = std::move(arguments);
  161. CalculateMetadataEtag();
  162. }
  163. const TFunctionsMap& TBuiltinFunctionRegistry::GetFunctions() const
  164. {
  165. return Functions;
  166. }
  167. const TDescriptionList& TBuiltinFunctionRegistry::FindCandidates(const std::string_view& name) const {
  168. if (const auto it = Functions.find(TString(name)); it != Functions.cend())
  169. return it->second;
  170. ythrow yexception() << "Not found builtin function: '" << name << "' in " << Functions.size() << " total.";
  171. }
  172. std::optional<TFunctionDescriptor> TBuiltinFunctionRegistry::FindBuiltin(const std::string_view& name, const std::pair<NUdf::TDataTypeId, bool>* argTypes, size_t argTypesCount) const
  173. {
  174. for (const auto& desc: FindCandidates(name)) {
  175. if (IsArgumentsMatch(desc.ResultAndArgs, argTypes, argTypesCount)) {
  176. return desc;
  177. }
  178. }
  179. return std::nullopt;
  180. }
  181. TFunctionDescriptor TBuiltinFunctionRegistry::GetBuiltin(const std::string_view& name,
  182. const std::pair<NUdf::TDataTypeId, bool>* argTypes, size_t argTypesCount) const
  183. {
  184. if (const auto desc = FindBuiltin(name, argTypes, argTypesCount)) {
  185. return *desc;
  186. }
  187. TStringStream ss;
  188. PrintType(argTypes[0].first, argTypes[0].second, ss);
  189. ss << ' ' << name << '(';
  190. for (size_t i = 1U; i < argTypesCount; i++) {
  191. if (i > 1U) {
  192. ss << ", ";
  193. }
  194. PrintType(argTypes[i].first, argTypes[i].second, ss);
  195. }
  196. ss << ").\nCandidates are: [\n";
  197. ui32 i = 0;
  198. for (const TFunctionDescriptor& desc: FindCandidates(name)) {
  199. PrintFunctionSignature(name, desc, ss);
  200. ss << '\n';
  201. if (++i > 32) {
  202. ss << "\t...\n";
  203. break;
  204. }
  205. }
  206. ss << ']';
  207. ythrow yexception() << "Unsupported builtin function: " << ss.Str();
  208. }
  209. bool TBuiltinFunctionRegistry::HasBuiltin(const std::string_view& name) const
  210. {
  211. return Functions.find(TString(name)) != Functions.cend();
  212. }
  213. void TBuiltinFunctionRegistry::CalculateMetadataEtag() {
  214. using TFunctionPair = std::pair<std::string_view, const TDescriptionList*>;
  215. std::vector<TFunctionPair> operations;
  216. for (const auto& func : Functions) {
  217. operations.emplace_back(func.first, &func.second);
  218. }
  219. std::sort(operations.begin(), operations.end(), [](const TFunctionPair& x, const TFunctionPair& y) {
  220. return x.first < y.first;
  221. });
  222. ui64 hash = 0;
  223. for (const auto& op : operations) {
  224. const ui64 nameLength = op.first.size();
  225. hash = MurmurHash<ui64>(&nameLength, sizeof(nameLength), hash);
  226. hash = MurmurHash<ui64>(op.first.data(), op.first.size(), hash);
  227. const auto& descriptions = *op.second;
  228. const ui64 descriptionCount = descriptions.size();
  229. hash = MurmurHash<ui64>(&descriptionCount, sizeof(descriptionCount), hash);
  230. for (const auto& description : descriptions) {
  231. for (const auto* args = description.ResultAndArgs; args->SchemeType; ++args) {
  232. hash = MurmurHash<ui64>(args, sizeof(*args), hash);
  233. }
  234. }
  235. }
  236. MetadataEtag = hash;
  237. }
  238. ui64 TBuiltinFunctionRegistry::GetMetadataEtag() const
  239. {
  240. return *MetadataEtag;
  241. }
  242. void TBuiltinFunctionRegistry::PrintInfoTo(IOutputStream& out) const
  243. {
  244. for (const auto& f: Functions) {
  245. out << f.first << ": [\n";
  246. for (const TFunctionDescriptor& desc: f.second) {
  247. PrintFunctionSignature(f.first, desc, out);
  248. out << '\n';
  249. }
  250. out << "]\n\n";
  251. }
  252. }
  253. const TKernel* TBuiltinFunctionRegistry::FindKernel(const std::string_view& name, const NUdf::TDataTypeId* argTypes, size_t argTypesCount, NUdf::TDataTypeId returnType) const {
  254. auto fit = KernelFamilyMap.find(TString(name));
  255. if (fit == KernelFamilyMap.end()) {
  256. return nullptr;
  257. }
  258. return fit->second->FindKernel(argTypes, argTypesCount, returnType);
  259. }
  260. void TBuiltinFunctionRegistry::RegisterKernelFamily(const std::string_view& name, std::unique_ptr<TKernelFamily>&& family) {
  261. Y_ENSURE(KernelFamilyMap.emplace(TString(name), std::move(family)).second);
  262. }
  263. TVector<std::pair<TString, const TKernelFamily*>> TBuiltinFunctionRegistry::GetAllKernelFamilies() const {
  264. TVector<std::pair<TString, const TKernelFamily*>> ret;
  265. for (const auto& f : KernelFamilyMap) {
  266. ret.emplace_back(std::make_pair(f.first, f.second.get()));
  267. }
  268. return ret;
  269. }
  270. } // namespace
  271. IBuiltinFunctionRegistry::TPtr CreateBuiltinRegistry() {
  272. return MakeIntrusive<TBuiltinFunctionRegistry>();
  273. }
  274. } // namespace NMiniKQL
  275. } // namespace NKikimr