yql_facade.h 16 KB


  1. #pragma once
  2. #include <yql/essentials/core/credentials/yql_credentials.h>
  3. #include <yql/essentials/core/file_storage/file_storage.h>
  4. #include <yql/essentials/core/services/yql_plan.h>
  5. #include <yql/essentials/core/services/yql_transform_pipeline.h>
  6. #include <yql/essentials/core/url_lister/interface/url_lister_manager.h>
  7. #include <yql/essentials/core/url_preprocessing/interface/url_preprocessing.h>
  8. #include <yql/essentials/core/yql_type_annotation.h>
  9. #include <yql/essentials/core/yql_user_data.h>
  10. #include <yql/essentials/core/qplayer/storage/interface/yql_qstorage.h>
  11. #include <yql/essentials/providers/config/yql_config_provider.h>
  12. #include <yql/essentials/providers/result/provider/yql_result_provider.h>
  13. #include <yql/essentials/providers/common/proto/gateways_config.pb.h>
  14. #include <yql/essentials/public/issue/yql_issue.h>
  15. #include <yql/essentials/sql/sql.h>
  16. #include <library/cpp/random_provider/random_provider.h>
  17. #include <library/cpp/time_provider/time_provider.h>
  18. #include <library/cpp/threading/future/future.h>
  19. #include <util/system/file.h>
  20. #include <util/generic/ptr.h>
  21. #include <functional>
  22. namespace NKikimr {
  23. namespace NMiniKQL {
  24. class IFunctionRegistry;
  25. }
  26. }
  27. namespace NYql {
  28. class TProgram;
  29. using TProgramPtr = TIntrusivePtr<TProgram>;
  30. class TProgramFactory;
  31. using TProgramFactoryPtr = TIntrusivePtr<TProgramFactory>;
  32. ///////////////////////////////////////////////////////////////////////////////
  33. // TProgramFactory
  34. ///////////////////////////////////////////////////////////////////////////////
  35. class TProgramFactory: public TThrRefBase, private TMoveOnly
  36. {
  37. public:
  38. TProgramFactory(
  39. bool useRepeatableRandomAndTimeProviders,
  40. const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry,
  41. ui64 nextUniqueId,
  42. const TVector<TDataProviderInitializer>& dataProvidersInit,
  43. const TString& runner);
  44. void AddUserDataTable(const TUserDataTable& userDataTable);
  45. void SetCredentials(TCredentials::TPtr credentials);
  46. void SetGatewaysConfig(const TGatewaysConfig* gatewaysConfig);
  47. void SetModules(IModuleResolver::TPtr modules);
  48. void SetUrlListerManager(IUrlListerManagerPtr urlListerManager);
  49. void SetUdfResolver(IUdfResolver::TPtr udfResolver);
  50. void SetUdfIndex(TUdfIndex::TPtr udfIndex, TUdfIndexPackageSet::TPtr udfIndexPackageSet);
  51. void SetFileStorage(TFileStoragePtr fileStorage);
  52. void SetUrlPreprocessing(IUrlPreprocessing::TPtr urlPreprocessing);
  53. void EnableRangeComputeFor();
  54. void SetArrowResolver(IArrowResolver::TPtr arrowResolver);
  55. TProgramPtr Create(
  56. const TFile& file,
  57. const TString& sessionId = TString(),
  58. const TQContext& qContext = {},
  59. TMaybe<TString> gatewaysForMerge = {});
  60. TProgramPtr Create(
  61. const TString& filename,
  62. const TString& sourceCode,
  63. const TString& sessionId = TString(),
  64. EHiddenMode hiddenMode = EHiddenMode::Disable,
  65. const TQContext& qContext = {},
  66. TMaybe<TString> gatewaysForMerge = {});
  67. void UnrepeatableRandom();
  68. private:
  69. const bool UseRepeatableRandomAndTimeProviders_;
  70. bool UseUnrepeatableRandom = false;
  71. const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry_;
  72. const ui64 NextUniqueId_;
  73. TVector<TDataProviderInitializer> DataProvidersInit_;
  74. TUserDataTable UserDataTable_;
  75. TCredentials::TPtr Credentials_;
  76. const TGatewaysConfig* GatewaysConfig_;
  77. IModuleResolver::TPtr Modules_;
  78. IUrlListerManagerPtr UrlListerManager_;
  79. IUdfResolver::TPtr UdfResolver_;
  80. TUdfIndex::TPtr UdfIndex_;
  81. TUdfIndexPackageSet::TPtr UdfIndexPackageSet_;
  82. TFileStoragePtr FileStorage_;
  83. IUrlPreprocessing::TPtr UrlPreprocessing_;
  84. TString Runner_;
  85. bool EnableRangeComputeFor_ = false;
  86. IArrowResolver::TPtr ArrowResolver_;
  87. };
  88. ///////////////////////////////////////////////////////////////////////////////
  89. // TProgram
  90. ///////////////////////////////////////////////////////////////////////////////
  91. class TProgram: public TThrRefBase, private TNonCopyable
  92. {
  93. public:
  94. friend TProgramFactory;
  95. using TStatus = IGraphTransformer::TStatus;
  96. using TFutureStatus = NThreading::TFuture<TStatus>;
  97. public:
  98. ~TProgram();
  99. void AddCredentials(const TVector<std::pair<TString, TCredential>>& credentials);
  100. void ClearCredentials();
  101. void AddUserDataTable(const TUserDataTable& userDataTable);
  102. bool ParseYql();
  103. bool ParseSql();
  104. bool ParseSql(const NSQLTranslation::TTranslationSettings& settings);
  105. bool Compile(const TString& username, bool skipLibraries = false);
  106. TStatus Discover(const TString& username);
  107. TFutureStatus DiscoverAsync(const TString& username);
  108. TStatus Lineage(const TString& username, IOutputStream* traceOut = nullptr, IOutputStream* exprOut = nullptr, bool withTypes = false);
  109. TFutureStatus LineageAsync(const TString& username, IOutputStream* traceOut = nullptr, IOutputStream* exprOut = nullptr, bool withTypes = false);
  110. TStatus Validate(const TString& username, IOutputStream* exprOut = nullptr, bool withTypes = false);
  111. TFutureStatus ValidateAsync(const TString& username, IOutputStream* exprOut = nullptr, bool withTypes = false);
  112. TStatus Optimize(
  113. const TString& username,
  114. IOutputStream* traceOut = nullptr,
  115. IOutputStream* tracePlan = nullptr,
  116. IOutputStream* exprOut = nullptr,
  117. bool withTypes = false);
  118. TFutureStatus OptimizeAsync(
  119. const TString& username,
  120. IOutputStream* traceOut = nullptr,
  121. IOutputStream* tracePlan = nullptr,
  122. IOutputStream* exprOut = nullptr,
  123. bool withTypes = false);
  124. TStatus Run(
  125. const TString& username,
  126. IOutputStream* traceOut = nullptr,
  127. IOutputStream* tracePlan = nullptr,
  128. IOutputStream* exprOut = nullptr,
  129. bool withTypes = false);
  130. TFutureStatus RunAsync(
  131. const TString& username,
  132. IOutputStream* traceOut = nullptr,
  133. IOutputStream* tracePlan = nullptr,
  134. IOutputStream* exprOut = nullptr,
  135. bool withTypes = false);
  136. TStatus LineageWithConfig(
  137. const TString& username,
  138. const IPipelineConfigurator& pipelineConf);
  139. TFutureStatus LineageAsyncWithConfig(
  140. const TString& username,
  141. const IPipelineConfigurator& pipelineConf);
  142. TStatus OptimizeWithConfig(
  143. const TString& username,
  144. const IPipelineConfigurator& pipelineConf);
  145. TFutureStatus OptimizeAsyncWithConfig(
  146. const TString& username,
  147. const IPipelineConfigurator& pipelineConf);
  148. TStatus RunWithConfig(
  149. const TString& username,
  150. const IPipelineConfigurator& pipelineConf);
  151. TFutureStatus RunAsyncWithConfig(
  152. const TString& username,
  153. const IPipelineConfigurator& pipelineConf);
  154. TFutureStatus ContinueAsync();
  155. bool HasActiveProcesses();
  156. bool NeedWaitForActiveProcesses();
  157. [[nodiscard]]
  158. NThreading::TFuture<void> Abort();
  159. TIssues Issues() const;
  160. TIssues CompletedIssues() const;
  161. void FinalizeIssues();
  162. void Print(IOutputStream* exprOut, IOutputStream* planOut, bool cleanPlan = false);
  163. inline void PrintErrorsTo(IOutputStream& out) const {
  164. Issues().PrintWithProgramTo(out, Filename_, SourceCode_);
  165. }
  166. inline const TAstNode* AstRoot() const {
  167. return AstRoot_;
  168. }
  169. inline const TExprNode::TPtr& ExprRoot() const {
  170. return ExprRoot_;
  171. }
  172. inline TExprContext& ExprCtx() const {
  173. return *ExprCtx_;
  174. }
  175. inline bool HasResults() const {
  176. return ResultProviderConfig_ &&
  177. !ResultProviderConfig_->CommittedResults.empty();
  178. }
  179. inline const TVector<TString>& Results() const {
  180. return ResultProviderConfig_->CommittedResults;
  181. }
  182. TMaybe<TString> GetQueryAst(TMaybe<size_t> memoryLimit = {});
  183. TMaybe<TString> GetQueryPlan(const TPlanSettings& settings = {});
  184. void SetDiagnosticFormat(NYson::EYsonFormat format) {
  185. DiagnosticFormat_ = format;
  186. }
  187. void SetResultType(IDataProvider::EResultFormat type) {
  188. ResultType_ = type;
  189. }
  190. TMaybe<TString> GetDiagnostics();
  191. IGraphTransformer::TStatistics GetRawDiagnostics();
  192. TMaybe<TString> GetTasksInfo();
  193. TMaybe<TString> GetStatistics(bool totalOnly = false, THashMap<TString, TStringBuf> extraYsons = {});
  194. TMaybe<TString> GetDiscoveredData();
  195. TMaybe<TString> GetLineage();
  196. TString ResultsAsString() const;
  197. void ConfigureYsonResultFormat(NYson::EYsonFormat format);
  198. inline IOutputStream* ExprStream() const { return ExprStream_; }
  199. inline IOutputStream* PlanStream() const { return PlanStream_; }
  200. NYson::EYsonFormat GetResultFormat() const { return ResultFormat_; }
  201. NYson::EYsonFormat GetOutputFormat() const { return OutputFormat_; }
  202. void SetValidateOptions(NUdf::EValidateMode validateMode);
  203. void SetDisableNativeUdfSupport(bool disable);
  204. void SetUseTableMetaFromGraph(bool use);
  205. void SetProgressWriter(TOperationProgressWriter writer) {
  206. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  207. ProgressWriter_ = ThreadSafeProgressWriter(writer);
  208. }
  209. void SetAuthenticatedUser(const TString& user) {
  210. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  211. OperationOptions_.AuthenticatedUser = user;
  212. }
  213. void SetOperationId(const TString& id) {
  214. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  215. OperationOptions_.Id = id;
  216. }
  217. void SetSharedOperationId(const TString& id) {
  218. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  219. OperationOptions_.SharedId = id;
  220. }
  221. void SetOperationTitle(const TString& title) {
  222. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  223. if (!title.Contains("YQL")) {
  224. ythrow yexception() << "Please mention YQL in the title '" << title << "'";
  225. }
  226. OperationOptions_.Title = title;
  227. }
  228. void SetOperationUrl(const TString& url) {
  229. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  230. OperationOptions_.Url = url;
  231. }
  232. void SetQueryName(const TString& name) {
  233. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  234. OperationOptions_.QueryName = name;
  235. }
  236. void SetOperationAttrsYson(const TString& attrs) {
  237. Y_ENSURE(!TypeCtx_, "TypeCtx_ already created");
  238. OperationOptions_.AttrsYson = attrs;
  239. }
  240. void SetParametersYson(const TString& parameters);
  241. // should be used after Compile phase
  242. bool ExtractQueryParametersMetadata();
  243. const TString& GetExtractedQueryParametersMetadataYson() const {
  244. return ExtractedQueryParametersMetadataYson_;
  245. }
  246. void EnableResultPosition() {
  247. SupportsResultPosition_ = true;
  248. }
  249. IPlanBuilder& GetPlanBuilder();
  250. void SetAbortHidden(std::function<void()>&& func) {
  251. AbortHidden_ = std::move(func);
  252. }
  253. TMaybe<TSet<TString>> GetUsedClusters() {
  254. CollectUsedClusters();
  255. return UsedClusters_;
  256. }
  257. private:
  258. TProgram(
  259. const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry,
  260. const TIntrusivePtr<IRandomProvider> randomProvider,
  261. const TIntrusivePtr<ITimeProvider> timeProvider,
  262. ui64 nextUniqueId,
  263. const TVector<TDataProviderInitializer>& dataProvidersInit,
  264. const TUserDataTable& userDataTable,
  265. const TCredentials::TPtr& credentials,
  266. const IModuleResolver::TPtr& modules,
  267. const IUrlListerManagerPtr& urlListerManager,
  268. const IUdfResolver::TPtr& udfResolver,
  269. const TUdfIndex::TPtr& udfIndex,
  270. const TUdfIndexPackageSet::TPtr& udfIndexPackageSet,
  271. const TFileStoragePtr& fileStorage,
  272. const IUrlPreprocessing::TPtr& urlPreprocessing,
  273. const TGatewaysConfig* gatewaysConfig,
  274. const TString& filename,
  275. const TString& sourceCode,
  276. const TString& sessionId,
  277. const TString& runner,
  278. bool enableRangeComputeFor,
  279. const IArrowResolver::TPtr& arrowResolver,
  280. EHiddenMode hiddenMode,
  281. const TQContext& qContext,
  282. TMaybe<TString> gatewaysForMerge);
  283. TTypeAnnotationContextPtr BuildTypeAnnotationContext(const TString& username);
  284. TTypeAnnotationContextPtr GetAnnotationContext() const;
  285. TTypeAnnotationContextPtr ProvideAnnotationContext(const TString& username);
  286. bool CollectUsedClusters();
  287. NThreading::TFuture<void> OpenSession(const TString& username);
  288. [[nodiscard]]
  289. NThreading::TFuture<void> CleanupLastSession();
  290. [[nodiscard]]
  291. NThreading::TFuture<void> CloseLastSession();
  292. TFutureStatus RemoteKikimrValidate(const TString& cluster);
  293. TFutureStatus RemoteKikimrOptimize(const TString& cluster, const IPipelineConfigurator* pipelineConf);
  294. TFutureStatus RemoteKikimrRun(const TString& cluster, const IPipelineConfigurator* pipelineConf);
  295. bool FillParseResult(NYql::TAstParseResult&& astRes, NYql::TWarningRules* warningRules = nullptr);
  296. TString GetSessionId() const;
  297. NThreading::TFuture<IGraphTransformer::TStatus> AsyncTransformWithFallback(bool applyAsyncChanges);
  298. void SaveExprRoot();
  299. private:
  300. std::optional<bool> CheckFallbackIssues(const TIssues& issues);
  301. void HandleSourceCode(TString& sourceCode);
  302. void HandleTranslationSettings(NSQLTranslation::TTranslationSettings& loadedSettings,
  303. NSQLTranslation::TTranslationSettings*& currentSettings);
  304. const NKikimr::NMiniKQL::IFunctionRegistry* FunctionRegistry_;
  305. const TIntrusivePtr<IRandomProvider> RandomProvider_;
  306. const TIntrusivePtr<ITimeProvider> TimeProvider_;
  307. const ui64 NextUniqueId_;
  308. TAstNode* AstRoot_;
  309. std::unique_ptr<TMemoryPool> AstPool_;
  310. TAutoPtr<TExprContext> ExprCtx_;
  311. TTypeAnnotationContextPtr TypeCtx_;
  312. const IModuleResolver::TPtr Modules_;
  313. TVector<TDataProviderInitializer> DataProvidersInit_;
  314. TAdaptiveLock DataProvidersLock_;
  315. TVector<TDataProviderInfo> DataProviders_;
  316. TYqlOperationOptions OperationOptions_;
  317. TCredentials::TPtr Credentials_;
  318. const IUrlListerManagerPtr UrlListerManager_;
  319. IUdfResolver::TPtr UdfResolver_;
  320. const TUdfIndex::TPtr UdfIndex_;
  321. const TUdfIndexPackageSet::TPtr UdfIndexPackageSet_;
  322. const TFileStoragePtr FileStorage_;
  323. TUserDataTable SavedUserDataTable_;
  324. TUserDataStorage::TPtr UserDataStorage_;
  325. const TGatewaysConfig* GatewaysConfig_;
  326. TGatewaysConfig LoadedGatewaysConfig_;
  327. TString Filename_;
  328. TString SourceCode_;
  329. ESourceSyntax SourceSyntax_;
  330. ui16 SyntaxVersion_;
  331. TExprNode::TPtr ExprRoot_;
  332. TExprNode::TPtr SavedExprRoot_;
  333. mutable TAdaptiveLock SessionIdLock_;
  334. TString SessionId_;
  335. NThreading::TFuture<void> CloseLastSessionFuture_;
  336. TAutoPtr<IPlanBuilder> PlanBuilder_;
  337. TAutoPtr<IGraphTransformer> Transformer_;
  338. TIntrusivePtr<TResultProviderConfig> ResultProviderConfig_;
  339. bool SupportsResultPosition_ = false;
  340. IDataProvider::EResultFormat ResultType_;
  341. NYson::EYsonFormat ResultFormat_;
  342. NYson::EYsonFormat OutputFormat_;
  343. TMaybe<NYson::EYsonFormat> DiagnosticFormat_;
  344. NUdf::EValidateMode ValidateMode_ = NUdf::EValidateMode::None;
  345. bool DisableNativeUdfSupport_ = false;
  346. bool UseTableMetaFromGraph_ = false;
  347. TMaybe<TSet<TString>> UsedClusters_;
  348. TMaybe<TSet<TString>> UsedProviders_;
  349. TMaybe<TString> ExternalQueryAst_;
  350. TMaybe<TString> ExternalQueryPlan_;
  351. TMaybe<TString> ExternalDiagnostics_;
  352. IOutputStream* ExprStream_ = nullptr;
  353. IOutputStream* PlanStream_ = nullptr;
  354. TOperationProgressWriter ProgressWriter_ = [](const TOperationProgress&) {};
  355. TString ExtractedQueryParametersMetadataYson_;
  356. const bool EnableRangeComputeFor_;
  357. const IArrowResolver::TPtr ArrowResolver_;
  358. i64 FallbackCounter_ = 0;
  359. const EHiddenMode HiddenMode_ = EHiddenMode::Disable;
  360. THiddenQueryAborter AbortHidden_ = [](){};
  361. TMaybe<TString> LineageStr_;
  362. TQContext QContext_;
  363. TMaybe<TString> GatewaysForMerge_;
  364. TIssues FinalIssues_;
  365. };
  366. void UpdateSqlFlagsFromQContext(const TQContext& qContext, THashSet<TString>& flags);
  367. } // namspace NYql