yql_facade.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. #pragma once
  2. #include <yql/essentials/core/credentials/yql_credentials.h>
  3. #include <yql/essentials/core/file_storage/file_storage.h>
  4. #include <yql/essentials/core/services/yql_plan.h>
  5. #include <yql/essentials/core/services/yql_transform_pipeline.h>
  6. #include <yql/essentials/core/url_lister/interface/url_lister_manager.h>
  7. #include <yql/essentials/core/url_preprocessing/interface/url_preprocessing.h>
  8. #include <yql/essentials/core/yql_type_annotation.h>
  9. #include <yql/essentials/core/yql_user_data.h>
  10. #include <yql/essentials/core/qplayer/storage/interface/yql_qstorage.h>
  11. #include <yql/essentials/providers/config/yql_config_provider.h>
  12. #include <yql/essentials/providers/result/provider/yql_result_provider.h>
  13. #include <yql/essentials/providers/common/proto/gateways_config.pb.h>
  14. #include <yql/essentials/public/issue/yql_issue.h>
  15. #include <yql/essentials/sql/sql.h>
  16. #include <library/cpp/random_provider/random_provider.h>
  17. #include <library/cpp/time_provider/time_provider.h>
  18. #include <library/cpp/threading/future/future.h>
  19. #include <util/system/file.h>
  20. #include <util/generic/ptr.h>
  21. #include <functional>
  22. namespace NKikimr {
  23. namespace NMiniKQL {
  24. class IFunctionRegistry;
  25. }
  26. }
  27. namespace NYql {
  28. class TProgram;
  29. using TProgramPtr = TIntrusivePtr<TProgram>;
  30. class TProgramFactory;
  31. using TProgramFactoryPtr = TIntrusivePtr<TProgramFactory>;
  32. ///////////////////////////////////////////////////////////////////////////////
  33. // TProgramFactory
  34. ///////////////////////////////////////////////////////////////////////////////
  35. class TProgramFactory: public TThrRefBase, private TMoveOnly
  36. {
  37. public:
  38. TProgramFactory(
  39. bool useRepeatableRandomAndTimeProviders,
  40. const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry,
  41. ui64 nextUniqueId,
  42. const TVector<TDataProviderInitializer>& dataProvidersInit,
  43. const TString& runner);
  44. void AddUserDataTable(const TUserDataTable& userDataTable);
  45. void SetCredentials(TCredentials::TPtr credentials);
  46. void SetGatewaysConfig(const TGatewaysConfig* gatewaysConfig);
  47. void SetModules(IModuleResolver::TPtr modules);
  48. void SetUrlListerManager(IUrlListerManagerPtr urlListerManager);
  49. void SetUdfResolver(IUdfResolver::TPtr udfResolver);
  50. void SetUdfIndex(TUdfIndex::TPtr udfIndex, TUdfIndexPackageSet::TPtr udfIndexPackageSet);
  51. void SetFileStorage(TFileStoragePtr fileStorage);
  52. void SetUrlPreprocessing(IUrlPreprocessing::TPtr urlPreprocessing);
  53. void EnableRangeComputeFor();
  54. void SetArrowResolver(IArrowResolver::TPtr arrowResolver);
  55. TProgramPtr Create(
  56. const TFile& file,
  57. const TString& sessionId = TString(),
  58. const TQContext& qContext = {});
  59. TProgramPtr Create(
  60. const TString& filename,
  61. const TString& sourceCode,
  62. const TString& sessionId = TString(),
  63. EHiddenMode hiddenMode = EHiddenMode::Disable,
  64. const TQContext& qContext = {});
  65. void UnrepeatableRandom();
  66. private:
  67. const bool UseRepeatableRandomAndTimeProviders_;
  68. bool UseUnrepeatableRandom = false;
  69. const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry_;
  70. const ui64 NextUniqueId_;
  71. TVector<TDataProviderInitializer> DataProvidersInit_;
  72. TUserDataTable UserDataTable_;
  73. TCredentials::TPtr Credentials_;
  74. const TGatewaysConfig* GatewaysConfig_;
  75. IModuleResolver::TPtr Modules_;
  76. IUrlListerManagerPtr UrlListerManager_;
  77. IUdfResolver::TPtr UdfResolver_;
  78. TUdfIndex::TPtr UdfIndex_;
  79. TUdfIndexPackageSet::TPtr UdfIndexPackageSet_;
  80. TFileStoragePtr FileStorage_;
  81. IUrlPreprocessing::TPtr UrlPreprocessing_;
  82. TString Runner_;
  83. bool EnableRangeComputeFor_ = false;
  84. IArrowResolver::TPtr ArrowResolver_;
  85. };
  86. ///////////////////////////////////////////////////////////////////////////////
  87. // TProgram
  88. ///////////////////////////////////////////////////////////////////////////////
  89. class TProgram: public TThrRefBase, private TNonCopyable
  90. {
  91. public:
  92. friend TProgramFactory;
  93. using TStatus = IGraphTransformer::TStatus;
  94. using TFutureStatus = NThreading::TFuture<TStatus>;
  95. public:
  96. ~TProgram();
  97. void AddCredentials(const TVector<std::pair<TString, TCredential>>& credentials);
  98. void ClearCredentials();
  99. void AddUserDataTable(const TUserDataTable& userDataTable);
  100. bool ParseYql();
  101. bool ParseSql();
  102. bool ParseSql(const NSQLTranslation::TTranslationSettings& settings);
  103. bool Compile(const TString& username, bool skipLibraries = false);
  104. TStatus Discover(const TString& username);
  105. TFutureStatus DiscoverAsync(const TString& username);
  106. TStatus Lineage(const TString& username, IOutputStream* traceOut = nullptr, IOutputStream* exprOut = nullptr, bool withTypes = false);
  107. TFutureStatus LineageAsync(const TString& username, IOutputStream* traceOut = nullptr, IOutputStream* exprOut = nullptr, bool withTypes = false);
  108. TStatus Validate(const TString& username, IOutputStream* exprOut = nullptr, bool withTypes = false);
  109. TFutureStatus ValidateAsync(const TString& username, IOutputStream* exprOut = nullptr, bool withTypes = false);
  110. TStatus Optimize(
  111. const TString& username,
  112. IOutputStream* traceOut = nullptr,
  113. IOutputStream* tracePlan = nullptr,
  114. IOutputStream* exprOut = nullptr,
  115. bool withTypes = false);
  116. TFutureStatus OptimizeAsync(
  117. const TString& username,
  118. IOutputStream* traceOut = nullptr,
  119. IOutputStream* tracePlan = nullptr,
  120. IOutputStream* exprOut = nullptr,
  121. bool withTypes = false);
  122. TStatus Run(
  123. const TString& username,
  124. IOutputStream* traceOut = nullptr,
  125. IOutputStream* tracePlan = nullptr,
  126. IOutputStream* exprOut = nullptr,
  127. bool withTypes = false);
  128. TFutureStatus RunAsync(
  129. const TString& username,
  130. IOutputStream* traceOut = nullptr,
  131. IOutputStream* tracePlan = nullptr,
  132. IOutputStream* exprOut = nullptr,
  133. bool withTypes = false);
  134. TStatus LineageWithConfig(
  135. const TString& username,
  136. const IPipelineConfigurator& pipelineConf);
  137. TFutureStatus LineageAsyncWithConfig(
  138. const TString& username,
  139. const IPipelineConfigurator& pipelineConf);
  140. TStatus OptimizeWithConfig(
  141. const TString& username,
  142. const IPipelineConfigurator& pipelineConf);
  143. TFutureStatus OptimizeAsyncWithConfig(
  144. const TString& username,
  145. const IPipelineConfigurator& pipelineConf);
  146. TStatus RunWithConfig(
  147. const TString& username,
  148. const IPipelineConfigurator& pipelineConf);
  149. TFutureStatus RunAsyncWithConfig(
  150. const TString& username,
  151. const IPipelineConfigurator& pipelineConf);
  152. TFutureStatus ContinueAsync();
  153. bool HasActiveProcesses();
  154. bool NeedWaitForActiveProcesses();
  155. [[nodiscard]]
  156. NThreading::TFuture<void> Abort();
  157. TIssues Issues() const;
  158. TIssues CompletedIssues() const;
  159. void FinalizeIssues();
  160. void Print(IOutputStream* exprOut, IOutputStream* planOut, bool cleanPlan = false);
  161. inline void PrintErrorsTo(IOutputStream& out) const {
  162. Issues().PrintWithProgramTo(out, Filename_, SourceCode_);
  163. }
  164. inline const TAstNode* AstRoot() const {
  165. return AstRoot_;
  166. }
  167. inline const TExprNode::TPtr& ExprRoot() const {
  168. return ExprRoot_;
  169. }
  170. inline TExprContext& ExprCtx() const {
  171. return *ExprCtx_;
  172. }
  173. inline bool HasResults() const {
  174. return ResultProviderConfig_ &&
  175. !ResultProviderConfig_->CommittedResults.empty();
  176. }
  177. inline const TVector<TString>& Results() const {
  178. return ResultProviderConfig_->CommittedResults;
  179. }
  180. TMaybe<TString> GetQueryAst(TMaybe<size_t> memoryLimit = {});
  181. TMaybe<TString> GetQueryPlan(const TPlanSettings& settings = {});
  182. void SetDiagnosticFormat(NYson::EYsonFormat format) {
  183. DiagnosticFormat_ = format;
  184. }
  185. void SetResultType(IDataProvider::EResultFormat type) {
  186. ResultType_ = type;
  187. }
  188. TMaybe<TString> GetDiagnostics();
  189. IGraphTransformer::TStatistics GetRawDiagnostics();
  190. TMaybe<TString> GetTasksInfo();
  191. TMaybe<TString> GetStatistics(bool totalOnly = false, THashMap<TString, TStringBuf> extraYsons = {});
  192. TMaybe<TString> GetDiscoveredData();
  193. TMaybe<TString> GetLineage();
  194. TString ResultsAsString() const;
  195. void ConfigureYsonResultFormat(NYson::EYsonFormat format);
  196. inline IOutputStream* ExprStream() const { return ExprStream_; }
  197. inline IOutputStream* PlanStream() const { return PlanStream_; }
  198. NYson::EYsonFormat GetResultFormat() const { return ResultFormat_; }
  199. NYson::EYsonFormat GetOutputFormat() const { return OutputFormat_; }
  200. void SetValidateOptions(NUdf::EValidateMode validateMode);
  201. void SetDisableNativeUdfSupport(bool disable);
  202. void SetUseTableMetaFromGraph(bool use);
  203. void SetProgressWriter(TOperationProgressWriter writer) {
  204. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  205. ProgressWriter_ = ThreadSafeProgressWriter(writer);
  206. }
  207. void SetAuthenticatedUser(const TString& user) {
  208. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  209. OperationOptions_.AuthenticatedUser = user;
  210. }
  211. void SetOperationId(const TString& id) {
  212. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  213. OperationOptions_.Id = id;
  214. }
  215. void SetSharedOperationId(const TString& id) {
  216. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  217. OperationOptions_.SharedId = id;
  218. }
  219. void SetOperationTitle(const TString& title) {
  220. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  221. if (!title.Contains("YQL")) {
  222. ythrow yexception() << "Please mention YQL in the title '" << title << "'";
  223. }
  224. OperationOptions_.Title = title;
  225. }
  226. void SetOperationUrl(const TString& url) {
  227. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  228. OperationOptions_.Url = url;
  229. }
  230. void SetQueryName(const TString& name) {
  231. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  232. OperationOptions_.QueryName = name;
  233. }
  234. void SetOperationAttrsYson(const TString& attrs) {
  235. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  236. OperationOptions_.AttrsYson = attrs;
  237. }
  238. void SetParametersYson(const TString& parameters);
  239. // should be used after Compile phase
  240. bool ExtractQueryParametersMetadata();
  241. const TString& GetExtractedQueryParametersMetadataYson() const {
  242. return ExtractedQueryParametersMetadataYson_;
  243. }
  244. void EnableResultPosition() {
  245. SupportsResultPosition_ = true;
  246. }
  247. IPlanBuilder& GetPlanBuilder();
  248. void SetAbortHidden(std::function<void()>&& func) {
  249. AbortHidden_ = std::move(func);
  250. }
  251. TMaybe<TSet<TString>> GetUsedClusters() {
  252. CollectUsedClusters();
  253. return UsedClusters_;
  254. }
  255. private:
  256. TProgram(
  257. const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry,
  258. const TIntrusivePtr<IRandomProvider> randomProvider,
  259. const TIntrusivePtr<ITimeProvider> timeProvider,
  260. ui64 nextUniqueId,
  261. const TVector<TDataProviderInitializer>& dataProvidersInit,
  262. const TUserDataTable& userDataTable,
  263. const TCredentials::TPtr& credentials,
  264. const IModuleResolver::TPtr& modules,
  265. const IUrlListerManagerPtr& urlListerManager,
  266. const IUdfResolver::TPtr& udfResolver,
  267. const TUdfIndex::TPtr& udfIndex,
  268. const TUdfIndexPackageSet::TPtr& udfIndexPackageSet,
  269. const TFileStoragePtr& fileStorage,
  270. const IUrlPreprocessing::TPtr& urlPreprocessing,
  271. const TGatewaysConfig* gatewaysConfig,
  272. const TString& filename,
  273. const TString& sourceCode,
  274. const TString& sessionId,
  275. const TString& runner,
  276. bool enableRangeComputeFor,
  277. const IArrowResolver::TPtr& arrowResolver,
  278. EHiddenMode hiddenMode,
  279. const TQContext& qContext);
  280. TTypeAnnotationContextPtr BuildTypeAnnotationContext(const TString& username);
  281. TTypeAnnotationContextPtr GetAnnotationContext() const;
  282. TTypeAnnotationContextPtr ProvideAnnotationContext(const TString& username);
  283. bool CollectUsedClusters();
  284. NThreading::TFuture<void> OpenSession(const TString& username);
  285. [[nodiscard]]
  286. NThreading::TFuture<void> CleanupLastSession();
  287. [[nodiscard]]
  288. NThreading::TFuture<void> CloseLastSession();
  289. TFutureStatus RemoteKikimrValidate(const TString& cluster);
  290. TFutureStatus RemoteKikimrOptimize(const TString& cluster, const IPipelineConfigurator* pipelineConf);
  291. TFutureStatus RemoteKikimrRun(const TString& cluster, const IPipelineConfigurator* pipelineConf);
  292. bool FillParseResult(NYql::TAstParseResult&& astRes, NYql::TWarningRules* warningRules = nullptr);
  293. TString GetSessionId() const;
  294. NThreading::TFuture<IGraphTransformer::TStatus> AsyncTransformWithFallback(bool applyAsyncChanges);
  295. void SaveExprRoot();
  296. private:
  297. std::optional<bool> CheckFallbackIssues(const TIssues& issues);
  298. void HandleSourceCode(TString& sourceCode);
  299. void HandleTranslationSettings(NSQLTranslation::TTranslationSettings& loadedSettings,
  300. const NSQLTranslation::TTranslationSettings*& currentSettings);
  301. const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry_;
  302. const TIntrusivePtr<IRandomProvider> RandomProvider_;
  303. const TIntrusivePtr<ITimeProvider> TimeProvider_;
  304. const ui64 NextUniqueId_;
  305. TAstNode* AstRoot_;
  306. std::unique_ptr<TMemoryPool> AstPool_;
  307. const IModuleResolver::TPtr Modules_;
  308. TAutoPtr<TExprContext> ExprCtx_;
  309. TTypeAnnotationContextPtr TypeCtx_;
  310. TVector<TDataProviderInitializer> DataProvidersInit_;
  311. TAdaptiveLock DataProvidersLock_;
  312. TVector<TDataProviderInfo> DataProviders_;
  313. TYqlOperationOptions OperationOptions_;
  314. TCredentials::TPtr Credentials_;
  315. const IUrlListerManagerPtr UrlListerManager_;
  316. IUdfResolver::TPtr UdfResolver_;
  317. const TUdfIndex::TPtr UdfIndex_;
  318. const TUdfIndexPackageSet::TPtr UdfIndexPackageSet_;
  319. const TFileStoragePtr FileStorage_;
  320. TUserDataTable SavedUserDataTable_;
  321. TUserDataStorage::TPtr UserDataStorage_;
  322. const TGatewaysConfig* GatewaysConfig_;
  323. TGatewaysConfig LoadedGatewaysConfig_;
  324. TString Filename_;
  325. TString SourceCode_;
  326. ESourceSyntax SourceSyntax_;
  327. ui16 SyntaxVersion_;
  328. TExprNode::TPtr ExprRoot_;
  329. TExprNode::TPtr SavedExprRoot_;
  330. mutable TAdaptiveLock SessionIdLock_;
  331. TString SessionId_;
  332. NThreading::TFuture<void> CloseLastSessionFuture_;
  333. TAutoPtr<IPlanBuilder> PlanBuilder_;
  334. TAutoPtr<IGraphTransformer> Transformer_;
  335. TIntrusivePtr<TResultProviderConfig> ResultProviderConfig_;
  336. bool SupportsResultPosition_ = false;
  337. IDataProvider::EResultFormat ResultType_;
  338. NYson::EYsonFormat ResultFormat_;
  339. NYson::EYsonFormat OutputFormat_;
  340. TMaybe<NYson::EYsonFormat> DiagnosticFormat_;
  341. NUdf::EValidateMode ValidateMode_ = NUdf::EValidateMode::None;
  342. bool DisableNativeUdfSupport_ = false;
  343. bool UseTableMetaFromGraph_ = false;
  344. TMaybe<TSet<TString>> UsedClusters_;
  345. TMaybe<TSet<TString>> UsedProviders_;
  346. TMaybe<TString> ExternalQueryAst_;
  347. TMaybe<TString> ExternalQueryPlan_;
  348. TMaybe<TString> ExternalDiagnostics_;
  349. IOutputStream* ExprStream_ = nullptr;
  350. IOutputStream* PlanStream_ = nullptr;
  351. TOperationProgressWriter ProgressWriter_ = [](const TOperationProgress&) {};
  352. TString ExtractedQueryParametersMetadataYson_;
  353. const bool EnableRangeComputeFor_;
  354. const IArrowResolver::TPtr ArrowResolver_;
  355. i64 FallbackCounter_ = 0;
  356. const EHiddenMode HiddenMode_ = EHiddenMode::Disable;
  357. THiddenQueryAborter AbortHidden_ = [](){};
  358. TMaybe<TString> LineageStr_;
  359. TQContext QContext_;
  360. TIssues FinalIssues_;
  361. };
  362. void UpdateSqlFlagsFromQContext(const TQContext& qContext, THashSet<TString>& flags);
  363. } // namspace NYql