yql_udf_index.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #pragma once
  2. #include "yql_udf_resolver.h"
  3. #include <util/generic/hash.h>
  4. #include <util/generic/map.h>
  5. #include <util/generic/maybe.h>
  6. #include <util/generic/set.h>
  7. #include <util/generic/string.h>
  8. #include <util/generic/vector.h>
  9. namespace NYql {
  10. struct TFunctionInfo {
  11. TString Name;
  12. int ArgCount = 0;
  13. int OptionalArgCount = 0;
  14. bool IsTypeAwareness = false;
  15. TString CallableType;
  16. TString RunConfigType;
  17. bool IsStrict = false;
  18. bool SupportsBlocks = false;
  19. };
  20. // todo: specify whether path is frozen
  21. struct TDownloadLink {
  22. bool IsUrl = false;
  23. TString Path;
  24. TString Md5;
  25. TDownloadLink() {
  26. }
  27. TDownloadLink(bool isUrl, const TString& path, const TString& md5)
  28. : IsUrl(isUrl)
  29. , Path(path)
  30. , Md5(md5)
  31. {
  32. }
  33. TDownloadLink(const TDownloadLink&) = default;
  34. TDownloadLink& operator=(const TDownloadLink&) = default;
  35. static TDownloadLink Url(const TString& path, const TString& md5 = "") {
  36. return { true, path, md5 };
  37. }
  38. static TDownloadLink File(const TString& path, const TString& md5 = "") {
  39. return { false, path, md5 };
  40. }
  41. bool operator==(const TDownloadLink& other) const {
  42. return std::tie(IsUrl, Path, Md5) == std::tie(other.IsUrl, other.Path, Md5);
  43. }
  44. bool operator!=(const TDownloadLink& other) const {
  45. return !(*this == other);
  46. }
  47. size_t Hash() const {
  48. return CombineHashes(
  49. CombineHashes((size_t)IsUrl, ComputeHash(Path)),
  50. ComputeHash(Md5)
  51. );
  52. }
  53. };
  54. struct TResourceInfo : public TThrRefBase {
  55. typedef TIntrusiveConstPtr<TResourceInfo> TPtr;
  56. bool IsTrusted = false;
  57. TDownloadLink Link;
  58. TSet<TString> Modules;
  59. TMap<TString, TFunctionInfo> Functions;
  60. TMap<TString, TSet<TString>> ICaseFuncNames;
  61. void SetFunctions(const TVector<TFunctionInfo>& functions) {
  62. for (auto& f : functions) {
  63. Functions.emplace(f.Name, f);
  64. ICaseFuncNames[to_lower(f.Name)].insert(f.Name);
  65. }
  66. }
  67. };
  68. inline bool operator<(const TResourceInfo::TPtr& p1, const TResourceInfo::TPtr& p2) {
  69. return p1.Get() < p2.Get();
  70. }
  71. class TUdfIndex : public TThrRefBase {
  72. public:
  73. typedef TIntrusivePtr<TUdfIndex> TPtr;
  74. public:
  75. // todo: trusted resources should not be replaceble regardless of specified mode
  76. enum class EOverrideMode {
  77. PreserveExisting,
  78. ReplaceWithNew,
  79. RaiseError
  80. };
  81. enum class EStatus {
  82. Found,
  83. NotFound,
  84. Ambigious
  85. };
  86. public:
  87. TUdfIndex();
  88. void SetCaseSentiveSearch(bool caseSensitive);
  89. bool CanonizeModule(TString& moduleName) const;
  90. EStatus ContainsModule(const TString& moduleName) const;
  91. EStatus FindFunction(const TString& moduleName, const TString& functionName, TFunctionInfo& function) const;
  92. TResourceInfo::TPtr FindResourceByModule(const TString& moduleName) const;
  93. bool ContainsModuleStrict(const TString& moduleName) const;
  94. /*
  95. New resource can contain already registered module.
  96. In this case 'mode' will be used to resolve conflicts.
  97. For instance, if mode == ReplaceWithNew all functions from old resource will be removed and new functions will be registered.
  98. It is important to do it atomically because two .so cannot have intersecting module lists
  99. */
  100. void RegisterResource(const TResourceInfo::TPtr& resource, EOverrideMode mode);
  101. void RegisterResources(const TVector<TResourceInfo::TPtr>& resources, EOverrideMode mode);
  102. TIntrusivePtr<TUdfIndex> Clone() const;
  103. private:
  104. explicit TUdfIndex(const TMap<TString, TResourceInfo::TPtr>& resources, bool caseSensitive);
  105. bool ContainsAnyModule(const TSet<TString>& modules) const;
  106. TSet<TResourceInfo::TPtr> FindResourcesByModules(const TSet<TString>& modules) const;
  107. void UnregisterResource(TResourceInfo::TPtr resource);
  108. private:
  109. // module => Resource
  110. TMap<TString, TResourceInfo::TPtr> Resources_;
  111. bool CaseSensitive_ = true;
  112. TMap<TString, TSet<TString>> ICaseModules_;
  113. };
  114. void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TVector<TString>& paths, bool isTrusted, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);
  115. void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TMap<TString, TString>& pathsWithMd5, bool isTrusted, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);
  116. void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TVector<TUserDataBlock>& blocks, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);
  117. void LoadRichMetadataToUdfIndex(const IUdfResolver& resolver, const TUserDataBlock& block, TUdfIndex::EOverrideMode mode, TUdfIndex& registry);
  118. }