yql_facade_run.cpp 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. #include "yql_facade_run.h"
  2. #include <yql/essentials/providers/pg/provider/yql_pg_provider.h>
  3. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  4. #include <yql/essentials/providers/common/proto/gateways_config.pb.h>
  5. #include <yql/essentials/providers/common/udf_resolve/yql_outproc_udf_resolver.h>
  6. #include <yql/essentials/providers/common/udf_resolve/yql_simple_udf_resolver.h>
  7. #include <yql/essentials/providers/common/udf_resolve/yql_udf_resolver_with_index.h>
  8. #include <yql/essentials/core/yql_user_data_storage.h>
  9. #include <yql/essentials/core/yql_udf_resolver.h>
  10. #include <yql/essentials/core/yql_udf_index.h>
  11. #include <yql/essentials/core/yql_udf_index_package_set.h>
  12. #include <yql/essentials/core/yql_library_compiler.h>
  13. #include <yql/essentials/core/yql_type_annotation.h>
  14. #include <yql/essentials/core/pg_ext/yql_pg_ext.h>
  15. #include <yql/essentials/core/services/mounts/yql_mounts.h>
  16. #include <yql/essentials/core/services/yql_out_transformers.h>
  17. #include <yql/essentials/core/file_storage/file_storage.h>
  18. #include <yql/essentials/core/file_storage/proto/file_storage.pb.h>
  19. #include <yql/essentials/core/peephole_opt/yql_opt_peephole_physical.h>
  20. #include <yql/essentials/core/facade/yql_facade.h>
  21. #include <yql/essentials/core/url_lister/url_lister_manager.h>
  22. #include <yql/essentials/core/url_preprocessing/url_preprocessing.h>
  23. #include <yql/essentials/core/qplayer/storage/file/yql_qstorage_file.h>
  24. #include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
  25. #include <yql/essentials/minikql/mkql_function_registry.h>
  26. #include <yql/essentials/ast/yql_expr.h>
  27. #include <yql/essentials/parser/pg_wrapper/interface/parser.h>
  28. #include <yql/essentials/parser/pg_catalog/catalog.h>
  29. #include <yql/essentials/public/udf/udf_version.h>
  30. #include <yql/essentials/public/udf/udf_registrator.h>
  31. #include <yql/essentials/public/udf/udf_validate.h>
  32. #include <yql/essentials/public/result_format/yql_result_format_response.h>
  33. #include <yql/essentials/public/result_format/yql_result_format_type.h>
  34. #include <yql/essentials/public/result_format/yql_result_format_data.h>
  35. #include <yql/essentials/utils/failure_injector/failure_injector.h>
  36. #include <yql/essentials/utils/backtrace/backtrace.h>
  37. #include <yql/essentials/utils/log/log.h>
  38. #include <yql/essentials/protos/yql_mount.pb.h>
  39. #include <yql/essentials/protos/pg_ext.pb.h>
  40. #include <yql/essentials/sql/settings/translation_settings.h>
  41. #include <yql/essentials/sql/v1/format/sql_format.h>
  42. #include <yql/essentials/sql/v1/sql.h>
  43. #include <yql/essentials/sql/sql.h>
  44. #include <library/cpp/resource/resource.h>
  45. #include <library/cpp/yson/node/node_io.h>
  46. #include <library/cpp/yson/writer.h>
  47. #include <google/protobuf/text_format.h>
  48. #include <google/protobuf/arena.h>
  49. #include <util/stream/output.h>
  50. #include <util/stream/file.h>
  51. #include <util/stream/null.h>
  52. #include <util/system/user.h>
  53. #include <util/system/env.h>
  54. #include <util/string/split.h>
  55. #include <util/string/join.h>
  56. #include <util/string/builder.h>
  57. #include <util/string/strip.h>
  58. #include <util/generic/vector.h>
  59. #include <util/generic/ptr.h>
  60. #include <util/generic/yexception.h>
  61. #include <util/datetime/base.h>
  62. #ifdef __unix__
  63. #include <sys/resource.h>
  64. #endif
  65. namespace {
  66. const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote |
  67. NYql::TAstPrintFlags::AdaptArbitraryContent;
  68. template <typename TMessage>
  69. THolder<TMessage> ParseProtoConfig(const TString& cfgFile) {
  70. auto config = MakeHolder<TMessage>();
  71. TString configData = TFileInput(cfgFile).ReadAll();
  72. using ::google::protobuf::TextFormat;
  73. if (!TextFormat::ParseFromString(configData, config.Get())) {
  74. throw yexception() << "Bad format of config file " << cfgFile;
  75. }
  76. return config;
  77. }
  78. template <typename TMessage>
  79. THolder<TMessage> ParseProtoFromResource(TStringBuf resourceName) {
  80. if (!NResource::Has(resourceName)) {
  81. return {};
  82. }
  83. auto config = MakeHolder<TMessage>();
  84. TString configData = NResource::Find(resourceName);
  85. using ::google::protobuf::TextFormat;
  86. if (!TextFormat::ParseFromString(configData, config.Get())) {
  87. throw yexception() << "Bad format of config " << resourceName;
  88. }
  89. return config;
  90. }
  91. class TOptPipelineConfigurator : public NYql::IPipelineConfigurator {
  92. public:
  93. TOptPipelineConfigurator(NYql::TProgramPtr prg, IOutputStream* planStream, IOutputStream* exprStream, bool withTypes)
  94. : Program_(std::move(prg))
  95. , PlanStream_(planStream)
  96. , ExprStream_(exprStream)
  97. , WithTypes_(withTypes)
  98. {
  99. }
  100. void AfterCreate(NYql::TTransformationPipeline* pipeline) const final {
  101. Y_UNUSED(pipeline);
  102. }
  103. void AfterTypeAnnotation(NYql::TTransformationPipeline* pipeline) const final {
  104. pipeline->Add(NYql::TExprLogTransformer::Sync("OptimizedExpr", NYql::NLog::EComponent::Core, NYql::NLog::ELevel::TRACE),
  105. "OptTrace", NYql::TIssuesIds::CORE, "OptTrace");
  106. }
  107. void AfterOptimize(NYql::TTransformationPipeline* pipeline) const final {
  108. if (ExprStream_) {
  109. pipeline->Add(NYql::TExprOutputTransformer::Sync(Program_->ExprRoot(), ExprStream_, WithTypes_), "AstOutput");
  110. }
  111. if (PlanStream_) {
  112. pipeline->Add(NYql::TPlanOutputTransformer::Sync(PlanStream_, Program_->GetPlanBuilder(), Program_->GetOutputFormat()), "PlanOutput");
  113. }
  114. }
  115. private:
  116. NYql::TProgramPtr Program_;
  117. IOutputStream* PlanStream_;
  118. IOutputStream* ExprStream_;
  119. bool WithTypes_;
  120. };
  121. class TPeepHolePipelineConfigurator : public NYql::IPipelineConfigurator {
  122. public:
  123. TPeepHolePipelineConfigurator() {
  124. }
  125. void AfterCreate(NYql::TTransformationPipeline* pipeline) const final {
  126. Y_UNUSED(pipeline);
  127. }
  128. void AfterTypeAnnotation(NYql::TTransformationPipeline* pipeline) const final {
  129. pipeline->Add(NYql::TExprLogTransformer::Sync("OptimizedExpr", NYql::NLog::EComponent::Core, NYql::NLog::ELevel::TRACE),
  130. "OptTrace", NYql::TIssuesIds::CORE, "OptTrace");
  131. }
  132. void AfterOptimize(NYql::TTransformationPipeline* pipeline) const final {
  133. pipeline->Add(NYql::MakePeepholeOptimization(pipeline->GetTypeAnnotationContext()), "PeepHole");
  134. }
  135. };
  136. } // unnamed
  137. namespace NYql {
  138. TFacadeRunOptions::TFacadeRunOptions() {
  139. }
  140. TFacadeRunOptions::~TFacadeRunOptions() {
  141. }
  142. void TFacadeRunOptions::InitLogger() {
  143. if (Verbosity != LOG_DEF_PRIORITY || ShowLog) {
  144. NLog::ELevel level = NLog::ELevelHelpers::FromInt(Verbosity);
  145. if (ShowLog) {
  146. level = Max(level, NLog::ELevel::DEBUG);
  147. }
  148. NLog::EComponentHelpers::ForEach([level](NLog::EComponent c) {
  149. NYql::NLog::YqlLogger().SetComponentLevel(c, level);
  150. });
  151. }
  152. if (TraceOptStream) {
  153. NLog::YqlLogger().SetComponentLevel(NLog::EComponent::Core, NLog::ELevel::TRACE);
  154. NLog::YqlLogger().SetComponentLevel(NLog::EComponent::CoreEval, NLog::ELevel::TRACE);
  155. NLog::YqlLogger().SetComponentLevel(NLog::EComponent::CorePeepHole, NLog::ELevel::TRACE);
  156. }
  157. }
  158. void TFacadeRunOptions::PrintInfo(const TString& msg) {
  159. if (!NoDebug && Verbosity >= TLOG_INFO) {
  160. Cerr << msg << Endl;
  161. }
  162. }
  163. void TFacadeRunOptions::Parse(int argc, const char *argv[]) {
  164. User = GetUsername();
  165. if (EnableCredentials) {
  166. Token = GetEnv("YQL_TOKEN");
  167. if (!Token) {
  168. const TString home = GetEnv("HOME");
  169. auto tokenPath = TFsPath(home) / ".yql" / "token";
  170. if (tokenPath.Exists()) {
  171. Token = StripStringRight(TFileInput(tokenPath).ReadAll());
  172. }
  173. }
  174. }
  175. NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
  176. opts.AddHelpOption();
  177. opts.AddLongOption('p', "program", "Program file (use - to read from stdin)").Required().RequiredArgument("FILE")
  178. .Handler1T<TString>([this](const TString& file) {
  179. ProgramFile = file;
  180. if (ProgramFile == "-") {
  181. ProgramFile = "-stdin-";
  182. ProgramText = Cin.ReadAll();
  183. } else {
  184. ProgramText = TFileInput(ProgramFile).ReadAll();
  185. }
  186. User = GetUsername();
  187. });
  188. opts.AddLongOption('s', "sql", "Program is SQL query").NoArgument().StoreValue(&ProgramType, EProgramType::Sql);
  189. if (PgSupport) {
  190. opts.AddLongOption("pg", "Program has PG syntax").NoArgument().StoreValue(&ProgramType, EProgramType::Pg);
  191. opts.AddLongOption("pg-ext", "Pg extensions config file").Optional().RequiredArgument("FILE")
  192. .Handler1T<TString>([this](const TString& file) {
  193. PgExtConfig = ParseProtoConfig<NProto::TPgExtensions>(file);
  194. });
  195. }
  196. opts.AddLongOption('f', "file", "Additional files").RequiredArgument("name@path")
  197. .KVHandler([this](TString name, TString path) {
  198. if (name.empty() || path.empty()) {
  199. throw yexception() << "Incorrect file mapping, expected form name@path, e.g. MyFile@file.txt";
  200. }
  201. auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)];
  202. entry.Type = NYql::EUserDataType::PATH;
  203. entry.Data = path;
  204. }, '@');
  205. opts.AddLongOption('U', "url", "Additional urls").RequiredArgument("name@path")
  206. .KVHandler([this](TString name, TString url) {
  207. if (name.empty() || url.empty()) {
  208. throw yexception() << "url mapping, expected form name@url, e.g. MyUrl@http://example.com/file";
  209. }
  210. auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)];
  211. entry.Type = NYql::EUserDataType::URL;
  212. entry.Data = url;
  213. }, '@');
  214. opts.AddLongOption('m', "mounts", "Mount points config file.").Optional().RequiredArgument("FILE")
  215. .Handler1T<TString>([this](const TString& file) {
  216. MountConfig = ParseProtoConfig<NYqlMountConfig::TMountConfig>(file);
  217. });
  218. opts.AddLongOption("params-file", "Query parameters values in YSON format").Optional().RequiredArgument("FILE")
  219. .Handler1T<TString>([this](const TString& file) {
  220. Params = TFileInput(file).ReadAll();
  221. });
  222. opts.AddLongOption('G', "gateways", TStringBuilder() << "Used gateways, available: " << JoinSeq(",", SupportedGateways_)).DefaultValue(JoinSeq(",", GatewayTypes))
  223. .Handler1T<TString>([this](const TString& gateways) {
  224. ::StringSplitter(gateways).Split(',').Consume([&](const TStringBuf& val) {
  225. if (!SupportedGateways_.contains(val)) {
  226. throw yexception() << "Unsupported gateway \"" << val << '"';
  227. }
  228. GatewayTypes.emplace(val);
  229. });
  230. });
  231. opts.AddLongOption("gateways-cfg", "Gateways configuration file").Optional().RequiredArgument("FILE")
  232. .Handler1T<TString>([this](const TString& file) {
  233. GatewaysConfig = ParseProtoConfig<TGatewaysConfig>(file);
  234. });
  235. opts.AddLongOption("fs-cfg", "Fs configuration file").Optional().RequiredArgument("FILE")
  236. .Handler1T<TString>([this](const TString& file) {
  237. FsConfig = MakeHolder<TFileStorageConfig>();
  238. LoadFsConfigFromFile(file, *FsConfig);
  239. });
  240. opts.AddLongOption('u', "udf", "Load shared library with UDF by given path").RequiredArgument("PATH").AppendTo(&UdfsPaths);
  241. opts.AddLongOption("udfs-dir", "Load all shared libraries with UDFs found in given directory").RequiredArgument("DIR")
  242. .Handler1T<TString>([this](const TString& dir) {
  243. NKikimr::NMiniKQL::FindUdfsInDir(dir, &UdfsPaths);
  244. });
  245. opts.AddLongOption("udf-resolver", "Path to udf-resolver").Optional().RequiredArgument("PATH").StoreResult(&UdfResolverPath);
  246. opts.AddLongOption("udf-resolver-filter-syscalls", "Filter syscalls in udf resolver").Optional().NoArgument().SetFlag(&UdfResolverFilterSyscalls);
  247. opts.AddLongOption("scan-udfs", "Scan specified udfs with external udf-resolver to use static function registry").NoArgument().SetFlag(&ScanUdfs);
  248. opts.AddLongOption("parse-only", "Parse program and exit").NoArgument().StoreValue(&Mode, ERunMode::Parse);
  249. opts.AddLongOption("compile-only", "Compile program and exit").NoArgument().StoreValue(&Mode, ERunMode::Compile);
  250. opts.AddLongOption("validate", "Validate program and exit").NoArgument().StoreValue(&Mode, ERunMode::Validate);
  251. opts.AddLongOption("lineage", "Calculate program lineage and exit").NoArgument().StoreValue(&Mode, ERunMode::Lineage);
  252. opts.AddLongOption('O',"optimize", "Optimize program and exit").NoArgument().StoreValue(&Mode, ERunMode::Optimize);
  253. opts.AddLongOption('D', "discover", "Discover tables in the program and exit").NoArgument().StoreValue(&Mode, ERunMode::Discover);
  254. opts.AddLongOption("peephole", "Perform peephole program optimization and exit").NoArgument().StoreValue(&Mode, ERunMode::Peephole);
  255. opts.AddLongOption('R',"run", "Run program (use by default)").NoArgument().StoreValue(&Mode, ERunMode::Run);
  256. opts.AddLongOption('L', "show-log", "Show transformation log").Optional().NoArgument().SetFlag(&ShowLog);
  257. opts.AddLongOption('v', "verbosity", "Log verbosity level").Optional().RequiredArgument("LEVEL").StoreResult(&Verbosity);
  258. opts.AddLongOption("print-ast", "Print AST after loading").NoArgument().SetFlag(&PrintAst);
  259. opts.AddLongOption("print-expr", "Print rebuild AST before execution").NoArgument()
  260. .Handler0([this]() {
  261. if (!ExprStream) {
  262. ExprStream = &Cout;
  263. }
  264. });
  265. opts.AddLongOption("with-types", "Print types annotation").NoArgument().SetFlag(&WithTypes);
  266. opts.AddLongOption("trace-opt", "Print AST in the begin of each transformation").NoArgument()
  267. .Handler0([this]() {
  268. TraceOptStream = &Cerr;
  269. });
  270. opts.AddLongOption("expr-file", "Print AST to that file instead of stdout").Optional().RequiredArgument("FILE")
  271. .Handler1T<TString>([this](const TString& file) {
  272. ExprStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  273. ExprStream = ExprStreamHolder_.Get();
  274. });
  275. opts.AddLongOption("print-result", "Print program execution result to stdout").NoArgument()
  276. .Handler0([this]() {
  277. if (!ResultStream) {
  278. ResultStream = &Cout;
  279. }
  280. });
  281. opts.AddLongOption("format", "Results format")
  282. .Optional()
  283. .RequiredArgument("STR")
  284. .Choices(THashSet<TString>{"text", "binary", "pretty"})
  285. .Handler1T<TString>([this](const TString& val) {
  286. if (val == "text") {
  287. ResultsFormat = NYson::EYsonFormat::Text;
  288. } else if (val == "binary") {
  289. ResultsFormat = NYson::EYsonFormat::Binary;
  290. } else if (val == "pretty") {
  291. ResultsFormat = NYson::EYsonFormat::Pretty;
  292. } else {
  293. throw yexception() << "Unknown result format " << val;
  294. }
  295. });
  296. opts.AddLongOption("result-file", "Print program execution result to file").Optional().RequiredArgument("FILE")
  297. .Handler1T<TString>([this](const TString& file) {
  298. ResultStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  299. ResultStream = ResultStreamHolder_.Get();
  300. });
  301. opts.AddLongOption('P',"trace-plan", "Print plan before execution").NoArgument()
  302. .Handler0([this]() {
  303. if (!PlanStream) {
  304. PlanStream = &Cerr;
  305. }
  306. });
  307. opts.AddLongOption("plan-file", "Print program plan to file").Optional().RequiredArgument("FILE")
  308. .Handler1T<TString>([this](const TString& file) {
  309. PlanStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  310. PlanStream = PlanStreamHolder_.Get();
  311. });
  312. opts.AddLongOption("err-file", "Print validate/optimize/runtime errors to file")
  313. .Handler1T<TString>([this](const TString& file) {
  314. ErrStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  315. ErrStream = ErrStreamHolder_.Get();
  316. });
  317. opts.AddLongOption("full-expr", "Avoid buffering of expr/plan").NoArgument().SetFlag(&FullExpr);
  318. opts.AddLongOption("mem-limit", "Set memory limit in megabytes")
  319. .Handler1T<ui32>(0, [](ui32 memLimit) {
  320. if (memLimit) {
  321. #ifdef __unix__
  322. auto memLimitBytes = memLimit * 1024 * 1024;
  323. struct rlimit rl;
  324. if (getrlimit(RLIMIT_AS, &rl)) {
  325. throw TSystemError() << "Cannot getrlimit(RLIMIT_AS)";
  326. }
  327. rl.rlim_cur = memLimitBytes;
  328. if (setrlimit(RLIMIT_AS, &rl)) {
  329. throw TSystemError() << "Cannot setrlimit(RLIMIT_AS) to " << memLimitBytes << " bytes";
  330. }
  331. #else
  332. throw yexception() << "Memory limit can not be set on this platfrom";
  333. #endif
  334. }
  335. });
  336. opts.AddLongOption("validate-mode", "Validate udf mode, available values: " + NUdf::ValidateModeAvailables())
  337. .DefaultValue(NUdf::ValidateModeAsStr(NUdf::EValidateMode::Greedy))
  338. .Handler1T<TString>([this](const TString& mode) {
  339. ValidateMode = NUdf::ValidateModeByStr(mode);
  340. });
  341. opts.AddLongOption("stat", "Print execution statistics").Optional().OptionalArgument("FILE")
  342. .Handler1T<TString>([this](const TString& file) {
  343. if (file) {
  344. StatStreamHolder_ = MakeHolder<TFileOutput>(file);
  345. StatStream = StatStreamHolder_.Get();
  346. } else {
  347. StatStream = &Cerr;
  348. }
  349. });
  350. opts.AddLongOption("full-stat", "Output full execution statistics").Optional().NoArgument().SetFlag(&FullStatistics);
  351. opts.AddLongOption("sql-flags", "SQL translator pragma flags").SplitHandler(&SqlFlags, ',');
  352. opts.AddLongOption("syntax-version", "SQL syntax version").StoreResult(&SyntaxVersion).DefaultValue(1);
  353. opts.AddLongOption("ansi-lexer", "Use ansi lexer").NoArgument().SetFlag(&AnsiLexer);
  354. opts.AddLongOption("assume-ydb-on-slash", "Assume YDB provider if cluster name starts with '/'").NoArgument().SetFlag(&AssumeYdbOnClusterWithSlash);
  355. opts.AddLongOption("test-antlr4", "Check antlr4 parser").NoArgument().SetFlag(&TestAntlr4);
  356. opts.AddLongOption("with-final-issues", "Include some final messages (like statistic) in issues").NoArgument().SetFlag(&WithFinalIssues);
  357. if (FailureInjectionSupport) {
  358. opts.AddLongOption("failure-inject", "Activate failure injection")
  359. .Optional()
  360. .RequiredArgument("INJECTION_NAME=FAIL_COUNT or INJECTION_NAME=SKIP_COUNT/FAIL_COUNT")
  361. .KVHandler([](TString name, TString value) {
  362. TFailureInjector::Activate();
  363. TStringBuf fail = value;
  364. TStringBuf skip;
  365. if (TStringBuf(value).TrySplit('/', skip, fail)) {
  366. TFailureInjector::Set(name, FromString<ui32>(skip), FromString<ui32>(fail));
  367. } else {
  368. TFailureInjector::Set(name, 0, FromString<ui32>(fail));
  369. }
  370. });
  371. }
  372. if (EnableCredentials) {
  373. opts.AddLongOption("token", "YQL token")
  374. .Optional()
  375. .RequiredArgument("VALUE")
  376. .StoreResult(&Token);
  377. opts.AddLongOption("custom-tokens", "Custom tokens")
  378. .Optional()
  379. .RequiredArgument("NAME=VALUE or NAME=@PATH")
  380. .KVHandler([this](TString key, TString value) {
  381. if (value.StartsWith('@')) {
  382. value = StripStringRight(TFileInput(value.substr(1)).ReadAll());
  383. }
  384. Credentials->AddCredential(key, TCredential("custom", "", value));
  385. });
  386. }
  387. if (EnableQPlayer) {
  388. opts.AddLongOption("qstorage-dir", "Directory for QStorage").RequiredArgument("DIR")
  389. .Handler1T<TString>([this](const TString& dir) {
  390. QPlayerStorage_ = MakeFileQStorage(dir);
  391. });
  392. opts.AddLongOption("op-id", "QStorage operation id").StoreResult(&OperationId).DefaultValue("dummy_op");
  393. opts.AddLongOption("capture", "Write query metadata to QStorage").NoArgument()
  394. .Handler0([this]() {
  395. if (EQPlayerMode::Replay == QPlayerMode) {
  396. throw yexception() << "replay and capture options can't be used simultaneously";
  397. }
  398. QPlayerMode = EQPlayerMode::Capture;
  399. });
  400. opts.AddLongOption("replay", "Read query metadata from QStorage").NoArgument()
  401. .Handler0([this]() {
  402. if (EQPlayerMode::Capture == QPlayerMode) {
  403. throw yexception() << "replay and capture options can't be used simultaneously";
  404. }
  405. QPlayerMode = EQPlayerMode::Replay;
  406. });
  407. }
  408. opts.SetFreeArgsMax(0);
  409. for (auto& ext: OptExtenders_) {
  410. ext(opts);
  411. }
  412. auto res = NLastGetopt::TOptsParseResult(&opts, argc, argv);
  413. if (QPlayerMode != EQPlayerMode::None) {
  414. if (!QPlayerStorage_) {
  415. QPlayerStorage_ = MakeFileQStorage(".");
  416. }
  417. if (EQPlayerMode::Replay == QPlayerMode) {
  418. QPlayerContext = TQContext(QPlayerStorage_->MakeReader(OperationId, {}));
  419. ProgramFile = "-replay-";
  420. ProgramText = "";
  421. } else if (EQPlayerMode::Capture == QPlayerMode) {
  422. QPlayerContext = TQContext(QPlayerStorage_->MakeWriter(OperationId, {}));
  423. }
  424. }
  425. if (Mode >= ERunMode::Validate && GatewayTypes.empty()) {
  426. throw yexception() << "At least one gateway from the list " << JoinSeq(",", SupportedGateways_).Quote() << " must be specified";
  427. }
  428. if (!GatewaysConfig) {
  429. GatewaysConfig = ParseProtoFromResource<TGatewaysConfig>("gateways.conf");
  430. }
  431. if (GatewaysConfig && GatewaysConfig->HasSqlCore()) {
  432. SqlFlags.insert(GatewaysConfig->GetSqlCore().GetTranslationFlags().begin(), GatewaysConfig->GetSqlCore().GetTranslationFlags().end());
  433. }
  434. UpdateSqlFlagsFromQContext(QPlayerContext, SqlFlags);
  435. if (!FsConfig) {
  436. FsConfig = MakeHolder<TFileStorageConfig>();
  437. if (NResource::Has("fs.conf")) {
  438. LoadFsConfigFromResource("fs.conf", *FsConfig);
  439. }
  440. }
  441. if (EnableCredentials && Token) {
  442. for (auto name: SupportedGateways_) {
  443. Credentials->AddCredential(TStringBuilder() << "default_" << name, TCredential(name, "", Token));
  444. }
  445. }
  446. for (auto& handle: OptHandlers_) {
  447. handle(res);
  448. }
  449. }
  450. TFacadeRunner::TFacadeRunner(TString name)
  451. : Name_(std::move(name))
  452. {
  453. }
  454. TFacadeRunner::~TFacadeRunner() {
  455. }
  456. TIntrusivePtr<NKikimr::NMiniKQL::IFunctionRegistry> TFacadeRunner::GetFuncRegistry() {
  457. return FuncRegistry_;
  458. }
  459. int TFacadeRunner::Main(int argc, const char *argv[]) {
  460. NYql::NBacktrace::RegisterKikimrFatalActions();
  461. NYql::NBacktrace::EnableKikimrSymbolize();
  462. NYql::NLog::YqlLoggerScope logger(&Cerr);
  463. try {
  464. return DoMain(argc, argv);
  465. }
  466. catch (...) {
  467. Cerr << CurrentExceptionMessage() << Endl;
  468. return 1;
  469. }
  470. }
  471. int TFacadeRunner::DoMain(int argc, const char *argv[]) {
  472. Y_UNUSED(NUdf::GetStaticSymbols());
  473. RunOptions_.Parse(argc, argv);
  474. if (!RunOptions_.NoDebug) {
  475. Cerr << Name_ << " ABI version: " << NKikimr::NUdf::CurrentAbiVersionStr() << Endl;
  476. }
  477. RunOptions_.InitLogger();
  478. if (RunOptions_.PgSupport) {
  479. ClusterMapping_["pg_catalog"] = PgProviderName;
  480. ClusterMapping_["information_schema"] = PgProviderName;
  481. NPg::SetSqlLanguageParser(NSQLTranslationPG::CreateSqlLanguageParser());
  482. NPg::LoadSystemFunctions(*NSQLTranslationPG::CreateSystemFunctionsParser());
  483. if (RunOptions_.PgExtConfig) {
  484. TVector<NPg::TExtensionDesc> extensions;
  485. PgExtensionsFromProto(*RunOptions_.PgExtConfig, extensions);
  486. NPg::RegisterExtensions(extensions, RunOptions_.QPlayerContext.CanRead(),
  487. *NSQLTranslationPG::CreateExtensionSqlParser(),
  488. NKikimr::NMiniKQL::CreateExtensionLoader().get());
  489. }
  490. NPg::GetSqlLanguageParser()->Freeze();
  491. }
  492. FuncRegistry_ = NKikimr::NMiniKQL::CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace,
  493. NKikimr::NMiniKQL::CreateBuiltinRegistry(), true, RunOptions_.UdfsPaths);
  494. NSQLTranslation::TTranslators translators(
  495. nullptr,
  496. NSQLTranslationV1::MakeTranslator(),
  497. NSQLTranslationPG::MakeTranslator()
  498. );
  499. TExprContext ctx;
  500. if (RunOptions_.PgSupport) {
  501. ctx.NextUniqueId = NPg::GetSqlLanguageParser()->GetContext().NextUniqueId;
  502. }
  503. IModuleResolver::TPtr moduleResolver;
  504. if (RunOptions_.MountConfig) {
  505. TModulesTable modules;
  506. FillUserDataTableFromFileSystem(*RunOptions_.MountConfig, RunOptions_.DataTable);
  507. if (!CompileLibraries(translators, RunOptions_.DataTable, ctx, modules, RunOptions_.OptimizeLibs && RunOptions_.Mode >= ERunMode::Validate)) {
  508. *RunOptions_.ErrStream << "Errors on compile libraries:" << Endl;
  509. ctx.IssueManager.GetIssues().PrintTo(*RunOptions_.ErrStream);
  510. return -1;
  511. }
  512. moduleResolver = std::make_shared<TModuleResolver>(translators, std::move(modules), ctx.NextUniqueId, ClusterMapping_, RunOptions_.SqlFlags, RunOptions_.Mode >= ERunMode::Validate);
  513. } else {
  514. if (!GetYqlDefaultModuleResolver(ctx, moduleResolver, ClusterMapping_, RunOptions_.OptimizeLibs && RunOptions_.Mode >= ERunMode::Validate)) {
  515. *RunOptions_.ErrStream << "Errors loading default YQL libraries:" << Endl;
  516. ctx.IssueManager.GetIssues().PrintTo(*RunOptions_.ErrStream);
  517. return -1;
  518. }
  519. }
  520. TExprContext::TFreezeGuard freezeGuard(ctx);
  521. if (RunOptions_.Mode >= ERunMode::Validate) {
  522. std::vector<NFS::IDownloaderPtr> downloaders;
  523. for (auto& factory: FsDownloadFactories_) {
  524. if (auto download = factory()) {
  525. downloaders.push_back(std::move(download));
  526. }
  527. }
  528. FileStorage_ = WithAsync(CreateFileStorage(*RunOptions_.FsConfig, downloaders));
  529. }
  530. IUdfResolver::TPtr udfResolver;
  531. TUdfIndex::TPtr udfIndex;
  532. if (FileStorage_ && RunOptions_.ScanUdfs) {
  533. if (!RunOptions_.UdfResolverPath) {
  534. Cerr << "udf-resolver path must be specified when use 'scan-udfs'";
  535. return -1;
  536. }
  537. udfResolver = NCommon::CreateOutProcUdfResolver(FuncRegistry_.Get(), FileStorage_, RunOptions_.UdfResolverPath, {}, {}, RunOptions_.UdfResolverFilterSyscalls, {});
  538. RunOptions_.PrintInfo(TStringBuilder() << TInstant::Now().ToStringLocalUpToSeconds() << " Udf scanning started for " << RunOptions_.UdfsPaths.size() << " udfs ...");
  539. udfIndex = new TUdfIndex();
  540. LoadRichMetadataToUdfIndex(*udfResolver, RunOptions_.UdfsPaths, false, TUdfIndex::EOverrideMode::RaiseError, *udfIndex);
  541. RunOptions_.PrintInfo(TStringBuilder() << TInstant::Now().ToStringLocalUpToSeconds() << " UdfIndex done.");
  542. udfResolver = NCommon::CreateUdfResolverWithIndex(udfIndex, udfResolver, FileStorage_);
  543. RunOptions_.PrintInfo(TStringBuilder() << TInstant::Now().ToStringLocalUpToSeconds() << " Udfs scanned");
  544. } else {
  545. udfResolver = FileStorage_ && RunOptions_.UdfResolverPath
  546. ? NCommon::CreateOutProcUdfResolver(FuncRegistry_.Get(), FileStorage_, RunOptions_.UdfResolverPath, {}, {}, RunOptions_.UdfResolverFilterSyscalls, {})
  547. : NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get(), FileStorage_, true);
  548. }
  549. TVector<TDataProviderInitializer> dataProvidersInit;
  550. if (RunOptions_.PgSupport) {
  551. dataProvidersInit.push_back(GetPgDataProviderInitializer());
  552. }
  553. for (auto& factory: ProviderFactories_) {
  554. if (auto init = factory()) {
  555. dataProvidersInit.push_back(std::move(init));
  556. }
  557. }
  558. TVector<IUrlListerPtr> urlListers;
  559. for (auto& factory: UrlListerFactories_) {
  560. if (auto listener = factory()) {
  561. urlListers.push_back(std::move(listener));
  562. }
  563. }
  564. TProgramFactory factory(RunOptions_.UseRepeatableRandomAndTimeProviders, FuncRegistry_.Get(), ctx.NextUniqueId, dataProvidersInit, Name_);
  565. factory.AddUserDataTable(RunOptions_.DataTable);
  566. factory.SetModules(moduleResolver);
  567. factory.SetFileStorage(FileStorage_);
  568. if (RunOptions_.GatewaysConfig && RunOptions_.GatewaysConfig->HasFs()) {
  569. factory.SetUrlPreprocessing(new NYql::TUrlPreprocessing(*RunOptions_.GatewaysConfig));
  570. }
  571. factory.SetUdfIndex(udfIndex, new TUdfIndexPackageSet());
  572. factory.SetUdfResolver(udfResolver);
  573. factory.SetGatewaysConfig(RunOptions_.GatewaysConfig.Get());
  574. factory.SetCredentials(RunOptions_.Credentials);
  575. factory.EnableRangeComputeFor();
  576. if (!urlListers.empty()) {
  577. factory.SetUrlListerManager(MakeUrlListerManager(urlListers));
  578. }
  579. int result = DoRun(factory);
  580. if (result == 0 && EQPlayerMode::Capture == RunOptions_.QPlayerMode) {
  581. RunOptions_.QPlayerContext.GetWriter()->Commit().GetValueSync();
  582. }
  583. return result;
  584. }
  585. int TFacadeRunner::DoRun(TProgramFactory& factory) {
  586. TProgramPtr program = factory.Create(RunOptions_.ProgramFile, RunOptions_.ProgramText, RunOptions_.OperationId, EHiddenMode::Disable, RunOptions_.QPlayerContext);;
  587. if (RunOptions_.Params) {
  588. program->SetParametersYson(RunOptions_.Params);
  589. }
  590. if (RunOptions_.EnableResultPosition) {
  591. program->EnableResultPosition();
  592. }
  593. if (ProgressWriter_) {
  594. program->SetProgressWriter(ProgressWriter_);
  595. }
  596. program->SetUseTableMetaFromGraph(RunOptions_.UseMetaFromGrpah);
  597. program->SetValidateOptions(RunOptions_.ValidateMode);
  598. bool fail = false;
  599. if (RunOptions_.ProgramType != EProgramType::SExpr) {
  600. RunOptions_.PrintInfo("Parse SQL...");
  601. google::protobuf::Arena arena;
  602. NSQLTranslation::TTranslationSettings settings;
  603. settings.Arena = &arena;
  604. settings.PgParser = EProgramType::Pg == RunOptions_.ProgramType;
  605. settings.ClusterMapping = ClusterMapping_;
  606. settings.Flags = RunOptions_.SqlFlags;
  607. settings.SyntaxVersion = RunOptions_.SyntaxVersion;
  608. settings.AnsiLexer = RunOptions_.AnsiLexer;
  609. settings.TestAntlr4 = RunOptions_.TestAntlr4;
  610. settings.V0Behavior = NSQLTranslation::EV0Behavior::Report;
  611. settings.AssumeYdbOnClusterWithSlash = RunOptions_.AssumeYdbOnClusterWithSlash;
  612. if (ERunMode::Discover == RunOptions_.Mode) {
  613. settings.Mode = NSQLTranslation::ESqlMode::DISCOVERY;
  614. }
  615. if (!program->ParseSql(settings)) {
  616. program->PrintErrorsTo(*RunOptions_.ErrStream);
  617. fail = true;
  618. }
  619. if (!fail && RunOptions_.TestSqlFormat && 1 == RunOptions_.SyntaxVersion) {
  620. TString formattedProgramText;
  621. NYql::TIssues issues;
  622. auto formatter = NSQLFormat::MakeSqlFormatter(settings);
  623. if (!formatter->Format(RunOptions_.ProgramText, formattedProgramText, issues)) {
  624. *RunOptions_.ErrStream << "Format failed" << Endl;
  625. issues.PrintTo(*RunOptions_.ErrStream);
  626. return -1;
  627. }
  628. auto frmProgram = factory.Create("formatted SQL", formattedProgramText);
  629. if (!frmProgram->ParseSql(settings)) {
  630. frmProgram->PrintErrorsTo(*RunOptions_.ErrStream);
  631. return -1;
  632. }
  633. TStringStream srcQuery, frmQuery;
  634. program->AstRoot()->PrettyPrintTo(srcQuery, PRETTY_FLAGS);
  635. frmProgram->AstRoot()->PrettyPrintTo(frmQuery, PRETTY_FLAGS);
  636. if (srcQuery.Str() != frmQuery.Str()) {
  637. *RunOptions_.ErrStream << "source query's AST and formatted query's AST are not same" << Endl;
  638. return -1;
  639. }
  640. }
  641. } else {
  642. RunOptions_.PrintInfo("Parse YQL...");
  643. if (!program->ParseYql()) {
  644. program->PrintErrorsTo(*RunOptions_.ErrStream);
  645. fail = true;
  646. }
  647. }
  648. if (RunOptions_.TraceOptStream) {
  649. if (auto ast = program->GetQueryAst()) {
  650. *RunOptions_.TraceOptStream << *ast << Endl;
  651. }
  652. }
  653. if (fail) {
  654. return -1;
  655. }
  656. if (RunOptions_.PrintAst) {
  657. program->AstRoot()->PrettyPrintTo(Cout, PRETTY_FLAGS);
  658. }
  659. if (ERunMode::Parse == RunOptions_.Mode) {
  660. return 0;
  661. }
  662. RunOptions_.PrintInfo("Compile program...");
  663. if (!program->Compile(RunOptions_.User)) {
  664. program->PrintErrorsTo(*RunOptions_.ErrStream);
  665. fail = true;
  666. }
  667. if (RunOptions_.TraceOptStream) {
  668. program->Print(RunOptions_.TraceOptStream, nullptr);
  669. }
  670. if (fail) {
  671. return -1;
  672. }
  673. if (ERunMode::Compile == RunOptions_.Mode) {
  674. if (RunOptions_.ExprStream) {
  675. auto baseAst = ConvertToAst(*program->ExprRoot(), program->ExprCtx(), NYql::TExprAnnotationFlags::None, true);
  676. baseAst.Root->PrettyPrintTo(*RunOptions_.ExprStream, PRETTY_FLAGS);
  677. }
  678. return 0;
  679. }
  680. TProgram::TStatus status = DoRunProgram(program);
  681. if (ERunMode::Peephole == RunOptions_.Mode && RunOptions_.ExprStream && program->ExprRoot()) {
  682. auto ast = ConvertToAst(*program->ExprRoot(), program->ExprCtx(), RunOptions_.WithTypes ? TExprAnnotationFlags::Types : TExprAnnotationFlags::None, true);
  683. ui32 prettyFlags = TAstPrintFlags::ShortQuote;
  684. if (!RunOptions_.WithTypes) {
  685. prettyFlags |= TAstPrintFlags::PerLine;
  686. }
  687. ast.Root->PrettyPrintTo(*RunOptions_.ExprStream, prettyFlags);
  688. }
  689. if (RunOptions_.WithFinalIssues) {
  690. program->FinalizeIssues();
  691. }
  692. program->PrintErrorsTo(*RunOptions_.ErrStream);
  693. if (status == TProgram::TStatus::Error) {
  694. if (RunOptions_.TraceOptStream) {
  695. program->Print(RunOptions_.TraceOptStream, nullptr);
  696. }
  697. return -1;
  698. }
  699. if (!RunOptions_.FullExpr && ERunMode::Peephole != RunOptions_.Mode) {
  700. program->Print(RunOptions_.ExprStream, RunOptions_.PlanStream, /*cleanPlan*/true);
  701. }
  702. program->ConfigureYsonResultFormat(RunOptions_.ResultsFormat);
  703. if (RunOptions_.ResultStream) {
  704. RunOptions_.PrintInfo("Getting results...");
  705. if (ERunMode::Discover == RunOptions_.Mode) {
  706. if (auto data = program->GetDiscoveredData()) {
  707. *RunOptions_.ResultStream << *data;
  708. }
  709. } else if (ERunMode::Lineage == RunOptions_.Mode) {
  710. if (auto data = program->GetLineage()) {
  711. TStringInput in(*data);
  712. NYson::ReformatYsonStream(&in, RunOptions_.ResultStream, RunOptions_.ResultsFormat);
  713. }
  714. } else if (program->HasResults()) {
  715. if (RunOptions_.ValidateResultFormat) {
  716. auto str = program->ResultsAsString();
  717. if (!str.empty()) {
  718. auto node = NYT::NodeFromYsonString(str);
  719. for (const auto& r : NResult::ParseResponse(node)) {
  720. for (const auto& write : r.Writes) {
  721. if (write.Type) {
  722. NResult::TEmptyTypeVisitor visitor;
  723. NResult::ParseType(*write.Type, visitor);
  724. }
  725. if (write.Type && write.Data) {
  726. NResult::TEmptyDataVisitor visitor;
  727. NResult::ParseData(*write.Type, *write.Data, visitor);
  728. }
  729. }
  730. }
  731. }
  732. RunOptions_.ResultStream->Write(str.data(), str.size());
  733. } else {
  734. *RunOptions_.ResultStream << program->ResultsAsString();
  735. }
  736. }
  737. }
  738. if (RunOptions_.StatStream) {
  739. if (auto st = program->GetStatistics(!RunOptions_.FullStatistics)) {
  740. *RunOptions_.StatStream << *st;
  741. }
  742. }
  743. RunOptions_.PrintInfo("");
  744. RunOptions_.PrintInfo("Done");
  745. return 0;
  746. }
  747. TProgram::TStatus TFacadeRunner::DoRunProgram(TProgramPtr program) {
  748. TProgram::TStatus status = TProgram::TStatus::Ok;
  749. auto defOptConfig = TOptPipelineConfigurator(program, RunOptions_.FullExpr ? RunOptions_.PlanStream : nullptr, RunOptions_.FullExpr ? RunOptions_.ExprStream : nullptr, RunOptions_.WithTypes);
  750. IPipelineConfigurator* optConfig = OptPipelineConfigurator_ ? OptPipelineConfigurator_ : &defOptConfig;
  751. if (ERunMode::Peephole == RunOptions_.Mode) {
  752. RunOptions_.PrintInfo("Peephole...");
  753. auto defConfig = TPeepHolePipelineConfigurator();
  754. IPipelineConfigurator* config = PeepholePipelineConfigurator_ ? PeepholePipelineConfigurator_ : &defConfig;
  755. status = program->OptimizeWithConfig(RunOptions_.User, *config);
  756. } else if (ERunMode::Run == RunOptions_.Mode) {
  757. RunOptions_.PrintInfo("Run program...");
  758. status = program->RunWithConfig(RunOptions_.User, *optConfig);
  759. } else if (ERunMode::Optimize == RunOptions_.Mode) {
  760. RunOptions_.PrintInfo("Optimize program...");
  761. status = program->OptimizeWithConfig(RunOptions_.User, *optConfig);
  762. } else if (ERunMode::Validate == RunOptions_.Mode) {
  763. RunOptions_.PrintInfo("Validate program...");
  764. status = program->Validate(RunOptions_.User, RunOptions_.ExprStream, RunOptions_.WithTypes);
  765. } else if (ERunMode::Discover == RunOptions_.Mode) {
  766. RunOptions_.PrintInfo("Discover program...");
  767. status = program->Discover(RunOptions_.User);
  768. } else if (ERunMode::Lineage == RunOptions_.Mode) {
  769. RunOptions_.PrintInfo("Calculating lineage in program...");
  770. status = program->LineageWithConfig(RunOptions_.User, *optConfig);
  771. }
  772. return status;
  773. }
  774. } // NYql