discover.cpp 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. #include "discover.h"
  2. #include <yql/essentials/utils/backtrace/backtrace.h>
  3. #include <yql/essentials/minikql/mkql_node.h>
  4. #include <yql/essentials/minikql/mkql_type_builder.h>
  5. #include <yql/essentials/minikql/mkql_utils.h>
  6. #include <yql/essentials/providers/common/schema/mkql/yql_mkql_schema.h>
  7. #include <yql/essentials/providers/common/proto/udf_resolver.pb.h>
  8. #include <library/cpp/protobuf/util/pb_io.h>
  9. #include <util/generic/hash.h>
  10. #include <util/generic/hash_set.h>
  11. using namespace NKikimr;
  12. using namespace NKikimr::NMiniKQL;
  13. namespace NUdfResolver {
  14. namespace {
  15. NYql::TResolveResult DoDiscover(const NYql::TResolve& inMsg, IMutableFunctionRegistry& functionRegistry) {
  16. NYql::TResolveResult outMsg;
  17. TScopedAlloc alloc(__LOCATION__);
  18. TTypeEnvironment env(alloc);
  19. NUdf::ITypeInfoHelper::TPtr typeInfoHelper(new TTypeInfoHelper());
  20. THashMap<std::pair<TString, TString>, THashSet<TString>> cachedModules;
  21. for (auto& import : inMsg.GetImports()) {
  22. auto importRes = outMsg.AddImports();
  23. importRes->SetFileAlias(import.GetFileAlias());
  24. importRes->SetCustomUdfPrefix(import.GetCustomUdfPrefix());
  25. auto [it, inserted] = cachedModules.emplace(std::make_pair(import.GetPath(), import.GetCustomUdfPrefix()), THashSet<TString>());
  26. if (inserted) {
  27. THashSet<TString> modules;
  28. functionRegistry.LoadUdfs(import.GetPath(),
  29. {},
  30. NUdf::IRegistrator::TFlags::TypesOnly,
  31. import.GetCustomUdfPrefix(),
  32. &modules);
  33. FillImportResultModules(modules, *importRes);
  34. it->second = modules;
  35. } else {
  36. FillImportResultModules(it->second, *importRes);
  37. }
  38. }
  39. for (const auto& module : functionRegistry.GetAllModuleNames()) {
  40. const auto& functions = functionRegistry.GetModuleFunctions(module);
  41. for (auto& f : functions) {
  42. const TString funcName = NKikimr::NMiniKQL::FullName(module, f.first);
  43. auto udfRes = outMsg.AddUdfs();
  44. udfRes->SetName(funcName);
  45. udfRes->SetIsTypeAwareness(f.second.IsTypeAwareness);
  46. TFunctionTypeInfo funcInfo;
  47. if (!f.second.IsTypeAwareness) {
  48. auto status = functionRegistry.FindFunctionTypeInfo(env, typeInfoHelper,
  49. nullptr, funcName, nullptr, nullptr, NUdf::IUdfModule::TFlags::TypesOnly, {}, nullptr, &funcInfo);
  50. if (!status.IsOk()) {
  51. udfRes->SetError("Failed to resolve signature, error: " + status.GetError());
  52. }
  53. }
  54. // nullptr for polymorphic functions
  55. if (funcInfo.FunctionType) {
  56. udfRes->SetCallableType(NYql::NCommon::WriteTypeToYson(funcInfo.FunctionType, NYT::NYson::EYsonFormat::Text));
  57. udfRes->SetArgCount(funcInfo.FunctionType->GetArgumentsCount());
  58. udfRes->SetOptionalArgCount(funcInfo.FunctionType->GetOptionalArgumentsCount());
  59. if (funcInfo.RunConfigType) {
  60. udfRes->SetRunConfigType(NYql::NCommon::WriteTypeToYson(funcInfo.RunConfigType, NYT::NYson::EYsonFormat::Text));
  61. }
  62. udfRes->SetSupportsBlocks(funcInfo.SupportsBlocks);
  63. udfRes->SetIsStrict(funcInfo.IsStrict);
  64. }
  65. }
  66. }
  67. return outMsg;
  68. }
  69. void Print(const NYql::TResolveResult& result, IOutputStream& out, bool printAsProto) {
  70. if (printAsProto) {
  71. result.SerializeToArcadiaStream(&out);
  72. return;
  73. }
  74. SerializeToTextFormat(result, out);
  75. out << "UDF count: " << result.UdfsSize() << Endl;
  76. }
  77. void DiscoverInFiles(const TVector<TString>& udfPaths, IOutputStream& out, bool printAsProto) {
  78. NYql::TResolve inMsg;
  79. for (auto& path : udfPaths) {
  80. auto import = inMsg.AddImports();
  81. import->SetPath(path);
  82. import->SetFileAlias(path);
  83. }
  84. auto functionRegistry = CreateFunctionRegistry(IBuiltinFunctionRegistry::TPtr());
  85. auto newRegistry = functionRegistry->Clone();
  86. newRegistry->SetBackTraceCallback(&NYql::NBacktrace::KikimrBackTrace);
  87. NYql::TResolveResult result = DoDiscover(inMsg, *newRegistry);
  88. Print(result, out, printAsProto);
  89. }
  90. }
  91. void DiscoverInDir(const TString& dir, IOutputStream& out, bool printAsProto) {
  92. TVector<TString> udfPaths;
  93. NMiniKQL::FindUdfsInDir(dir, &udfPaths);
  94. DiscoverInFiles(udfPaths, out, printAsProto);
  95. }
  96. void DiscoverInFile(const TString& filePath, IOutputStream& out, bool printAsProto) {
  97. DiscoverInFiles({ filePath }, out, printAsProto);
  98. }
  99. void Discover(IInputStream& in, IOutputStream& out, bool printAsProto) {
  100. NYql::TResolve inMsg;
  101. if (!inMsg.ParseFromArcadiaStream(&in)) {
  102. ythrow yexception() << "Bad input TResolve proto message";
  103. }
  104. auto functionRegistry = CreateFunctionRegistry(IBuiltinFunctionRegistry::TPtr());
  105. auto newRegistry = functionRegistry->Clone();
  106. newRegistry->SetBackTraceCallback(&NYql::NBacktrace::KikimrBackTrace);
  107. NYql::TResolveResult result = DoDiscover(inMsg, *newRegistry);
  108. Print(result, out, printAsProto);
  109. }
  110. void FillImportResultModules(const THashSet<TString>& modules, NYql::TImportResult& importRes) {
  111. for (auto& m : modules) {
  112. importRes.AddModules(m);
  113. }
  114. }
  115. }