yql_facade_run.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903
  1. #include "yql_facade_run.h"
  2. #include <yql/essentials/providers/pg/provider/yql_pg_provider.h>
  3. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  4. #include <yql/essentials/providers/common/proto/gateways_config.pb.h>
  5. #include <yql/essentials/providers/common/udf_resolve/yql_outproc_udf_resolver.h>
  6. #include <yql/essentials/providers/common/udf_resolve/yql_simple_udf_resolver.h>
  7. #include <yql/essentials/providers/common/udf_resolve/yql_udf_resolver_with_index.h>
  8. #include <yql/essentials/core/yql_user_data_storage.h>
  9. #include <yql/essentials/core/yql_udf_resolver.h>
  10. #include <yql/essentials/core/yql_udf_index.h>
  11. #include <yql/essentials/core/yql_udf_index_package_set.h>
  12. #include <yql/essentials/core/yql_library_compiler.h>
  13. #include <yql/essentials/core/yql_type_annotation.h>
  14. #include <yql/essentials/core/pg_ext/yql_pg_ext.h>
  15. #include <yql/essentials/core/services/mounts/yql_mounts.h>
  16. #include <yql/essentials/core/services/yql_out_transformers.h>
  17. #include <yql/essentials/core/file_storage/file_storage.h>
  18. #include <yql/essentials/core/file_storage/proto/file_storage.pb.h>
  19. #include <yql/essentials/core/peephole_opt/yql_opt_peephole_physical.h>
  20. #include <yql/essentials/core/facade/yql_facade.h>
  21. #include <yql/essentials/core/url_lister/url_lister_manager.h>
  22. #include <yql/essentials/core/url_preprocessing/url_preprocessing.h>
  23. #include <yql/essentials/core/qplayer/storage/file/yql_qstorage_file.h>
  24. #include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
  25. #include <yql/essentials/minikql/mkql_function_registry.h>
  26. #include <yql/essentials/ast/yql_expr.h>
  27. #include <yql/essentials/parser/pg_wrapper/interface/parser.h>
  28. #include <yql/essentials/parser/pg_catalog/catalog.h>
  29. #include <yql/essentials/public/udf/udf_version.h>
  30. #include <yql/essentials/public/udf/udf_registrator.h>
  31. #include <yql/essentials/public/udf/udf_validate.h>
  32. #include <yql/essentials/public/result_format/yql_result_format_response.h>
  33. #include <yql/essentials/public/result_format/yql_result_format_type.h>
  34. #include <yql/essentials/public/result_format/yql_result_format_data.h>
  35. #include <yql/essentials/utils/failure_injector/failure_injector.h>
  36. #include <yql/essentials/utils/backtrace/backtrace.h>
  37. #include <yql/essentials/utils/log/log.h>
  38. #include <yql/essentials/protos/yql_mount.pb.h>
  39. #include <yql/essentials/protos/pg_ext.pb.h>
  40. #include <yql/essentials/sql/settings/translation_settings.h>
  41. #include <yql/essentials/sql/v1/format/sql_format.h>
  42. #include <yql/essentials/sql/v1/sql.h>
  43. #include <yql/essentials/sql/sql.h>
  44. #include <yql/essentials/sql/v1/lexer/antlr4/lexer.h>
  45. #include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h>
  46. #include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h>
  47. #include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h>
  48. #include <library/cpp/resource/resource.h>
  49. #include <library/cpp/yson/node/node_io.h>
  50. #include <library/cpp/yson/writer.h>
  51. #include <google/protobuf/text_format.h>
  52. #include <google/protobuf/arena.h>
  53. #include <util/stream/output.h>
  54. #include <util/stream/file.h>
  55. #include <util/stream/null.h>
  56. #include <util/system/user.h>
  57. #include <util/system/env.h>
  58. #include <util/string/split.h>
  59. #include <util/string/join.h>
  60. #include <util/string/builder.h>
  61. #include <util/string/strip.h>
  62. #include <util/generic/vector.h>
  63. #include <util/generic/ptr.h>
  64. #include <util/generic/yexception.h>
  65. #include <util/datetime/base.h>
  66. #ifdef __unix__
  67. #include <sys/resource.h>
  68. #endif
  69. namespace {
  70. const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote |
  71. NYql::TAstPrintFlags::AdaptArbitraryContent;
  72. template <typename TMessage>
  73. THolder<TMessage> ParseProtoConfig(const TString& cfgFile) {
  74. auto config = MakeHolder<TMessage>();
  75. TString configData = TFileInput(cfgFile).ReadAll();
  76. using ::google::protobuf::TextFormat;
  77. if (!TextFormat::ParseFromString(configData, config.Get())) {
  78. throw yexception() << "Bad format of config file " << cfgFile;
  79. }
  80. return config;
  81. }
  82. template <typename TMessage>
  83. THolder<TMessage> ParseProtoFromResource(TStringBuf resourceName) {
  84. if (!NResource::Has(resourceName)) {
  85. return {};
  86. }
  87. auto config = MakeHolder<TMessage>();
  88. TString configData = NResource::Find(resourceName);
  89. using ::google::protobuf::TextFormat;
  90. if (!TextFormat::ParseFromString(configData, config.Get())) {
  91. throw yexception() << "Bad format of config " << resourceName;
  92. }
  93. return config;
  94. }
  95. class TOptPipelineConfigurator : public NYql::IPipelineConfigurator {
  96. public:
  97. TOptPipelineConfigurator(NYql::TProgramPtr prg, IOutputStream* planStream, IOutputStream* exprStream, bool withTypes)
  98. : Program_(std::move(prg))
  99. , PlanStream_(planStream)
  100. , ExprStream_(exprStream)
  101. , WithTypes_(withTypes)
  102. {
  103. }
  104. void AfterCreate(NYql::TTransformationPipeline* pipeline) const final {
  105. Y_UNUSED(pipeline);
  106. }
  107. void AfterTypeAnnotation(NYql::TTransformationPipeline* pipeline) const final {
  108. pipeline->Add(NYql::TExprLogTransformer::Sync("OptimizedExpr", NYql::NLog::EComponent::Core, NYql::NLog::ELevel::TRACE),
  109. "OptTrace", NYql::TIssuesIds::CORE, "OptTrace");
  110. }
  111. void AfterOptimize(NYql::TTransformationPipeline* pipeline) const final {
  112. if (ExprStream_) {
  113. pipeline->Add(NYql::TExprOutputTransformer::Sync(Program_->ExprRoot(), ExprStream_, WithTypes_), "AstOutput");
  114. }
  115. if (PlanStream_) {
  116. pipeline->Add(NYql::TPlanOutputTransformer::Sync(PlanStream_, Program_->GetPlanBuilder(), Program_->GetOutputFormat()), "PlanOutput");
  117. }
  118. }
  119. private:
  120. NYql::TProgramPtr Program_;
  121. IOutputStream* PlanStream_;
  122. IOutputStream* ExprStream_;
  123. bool WithTypes_;
  124. };
  125. class TPeepHolePipelineConfigurator : public NYql::IPipelineConfigurator {
  126. public:
  127. TPeepHolePipelineConfigurator() {
  128. }
  129. void AfterCreate(NYql::TTransformationPipeline* pipeline) const final {
  130. Y_UNUSED(pipeline);
  131. }
  132. void AfterTypeAnnotation(NYql::TTransformationPipeline* pipeline) const final {
  133. pipeline->Add(NYql::TExprLogTransformer::Sync("OptimizedExpr", NYql::NLog::EComponent::Core, NYql::NLog::ELevel::TRACE),
  134. "OptTrace", NYql::TIssuesIds::CORE, "OptTrace");
  135. }
  136. void AfterOptimize(NYql::TTransformationPipeline* pipeline) const final {
  137. pipeline->Add(NYql::MakePeepholeOptimization(pipeline->GetTypeAnnotationContext()), "PeepHole");
  138. }
  139. };
  140. } // unnamed
  141. namespace NYql {
  142. TFacadeRunOptions::TFacadeRunOptions() {
  143. }
  144. TFacadeRunOptions::~TFacadeRunOptions() {
  145. }
  146. void TFacadeRunOptions::InitLogger() {
  147. if (Verbosity != LOG_DEF_PRIORITY || ShowLog) {
  148. NLog::ELevel level = NLog::ELevelHelpers::FromInt(Verbosity);
  149. if (ShowLog) {
  150. level = Max(level, NLog::ELevel::DEBUG);
  151. }
  152. NLog::EComponentHelpers::ForEach([level](NLog::EComponent c) {
  153. NYql::NLog::YqlLogger().SetComponentLevel(c, level);
  154. });
  155. }
  156. if (TraceOptStream) {
  157. NLog::YqlLogger().SetComponentLevel(NLog::EComponent::Core, NLog::ELevel::TRACE);
  158. NLog::YqlLogger().SetComponentLevel(NLog::EComponent::CoreEval, NLog::ELevel::TRACE);
  159. NLog::YqlLogger().SetComponentLevel(NLog::EComponent::CorePeepHole, NLog::ELevel::TRACE);
  160. }
  161. }
  162. void TFacadeRunOptions::PrintInfo(const TString& msg) {
  163. if (!NoDebug && Verbosity >= TLOG_INFO) {
  164. Cerr << msg << Endl;
  165. }
  166. }
  167. void TFacadeRunOptions::Parse(int argc, const char *argv[]) {
  168. User = GetUsername();
  169. if (EnableCredentials) {
  170. Token = GetEnv("YQL_TOKEN");
  171. if (!Token) {
  172. const TString home = GetEnv("HOME");
  173. auto tokenPath = TFsPath(home) / ".yql" / "token";
  174. if (tokenPath.Exists()) {
  175. Token = StripStringRight(TFileInput(tokenPath).ReadAll());
  176. }
  177. }
  178. }
  179. NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
  180. opts.AddHelpOption();
  181. opts.AddLongOption('p', "program", "Program file (use - to read from stdin)").Required().RequiredArgument("FILE")
  182. .Handler1T<TString>([this](const TString& file) {
  183. ProgramFile = file;
  184. if (ProgramFile == "-") {
  185. ProgramFile = "-stdin-";
  186. ProgramText = Cin.ReadAll();
  187. } else {
  188. ProgramText = TFileInput(ProgramFile).ReadAll();
  189. }
  190. User = GetUsername();
  191. });
  192. opts.AddLongOption('s', "sql", "Program is SQL query").NoArgument().StoreValue(&ProgramType, EProgramType::Sql);
  193. if (PgSupport) {
  194. opts.AddLongOption("pg", "Program has PG syntax").NoArgument().StoreValue(&ProgramType, EProgramType::Pg);
  195. opts.AddLongOption("pg-ext", "Pg extensions config file").Optional().RequiredArgument("FILE")
  196. .Handler1T<TString>([this](const TString& file) {
  197. PgExtConfig = ParseProtoConfig<NProto::TPgExtensions>(file);
  198. });
  199. }
  200. opts.AddLongOption('f', "file", "Additional files").RequiredArgument("name@path")
  201. .KVHandler([this](TString name, TString path) {
  202. if (name.empty() || path.empty()) {
  203. throw yexception() << "Incorrect file mapping, expected form name@path, e.g. MyFile@file.txt";
  204. }
  205. auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)];
  206. entry.Type = NYql::EUserDataType::PATH;
  207. entry.Data = path;
  208. }, '@');
  209. opts.AddLongOption('U', "url", "Additional urls").RequiredArgument("name@path")
  210. .KVHandler([this](TString name, TString url) {
  211. if (name.empty() || url.empty()) {
  212. throw yexception() << "url mapping, expected form name@url, e.g. MyUrl@http://example.com/file";
  213. }
  214. auto& entry = DataTable[NYql::TUserDataKey::File(NYql::GetDefaultFilePrefix() + name)];
  215. entry.Type = NYql::EUserDataType::URL;
  216. entry.Data = url;
  217. }, '@');
  218. opts.AddLongOption('m', "mounts", "Mount points config file.").Optional().RequiredArgument("FILE")
  219. .Handler1T<TString>([this](const TString& file) {
  220. MountConfig = ParseProtoConfig<NYqlMountConfig::TMountConfig>(file);
  221. });
  222. opts.AddLongOption("params-file", "Query parameters values in YSON format").Optional().RequiredArgument("FILE")
  223. .Handler1T<TString>([this](const TString& file) {
  224. Params = TFileInput(file).ReadAll();
  225. });
  226. opts.AddLongOption('G', "gateways", TStringBuilder() << "Used gateways, available: " << JoinSeq(",", SupportedGateways_)).DefaultValue(JoinSeq(",", GatewayTypes))
  227. .Handler1T<TString>([this](const TString& gateways) {
  228. ::StringSplitter(gateways).Split(',').Consume([&](const TStringBuf& val) {
  229. if (!SupportedGateways_.contains(val)) {
  230. throw yexception() << "Unsupported gateway \"" << val << '"';
  231. }
  232. GatewayTypes.emplace(val);
  233. });
  234. });
  235. opts.AddLongOption("gateways-cfg", "Gateways configuration file").Optional().RequiredArgument("FILE")
  236. .Handler1T<TString>([this](const TString& file) {
  237. GatewaysConfig = ParseProtoConfig<TGatewaysConfig>(file);
  238. });
  239. opts.AddLongOption("fs-cfg", "Fs configuration file").Optional().RequiredArgument("FILE")
  240. .Handler1T<TString>([this](const TString& file) {
  241. FsConfig = MakeHolder<TFileStorageConfig>();
  242. LoadFsConfigFromFile(file, *FsConfig);
  243. });
  244. opts.AddLongOption('u', "udf", "Load shared library with UDF by given path").RequiredArgument("PATH").AppendTo(&UdfsPaths);
  245. opts.AddLongOption("udfs-dir", "Load all shared libraries with UDFs found in given directory").RequiredArgument("DIR")
  246. .Handler1T<TString>([this](const TString& dir) {
  247. NKikimr::NMiniKQL::FindUdfsInDir(dir, &UdfsPaths);
  248. });
  249. opts.AddLongOption("udf-resolver", "Path to udf-resolver").Optional().RequiredArgument("PATH").StoreResult(&UdfResolverPath);
  250. opts.AddLongOption("udf-resolver-filter-syscalls", "Filter syscalls in udf resolver").Optional().NoArgument().SetFlag(&UdfResolverFilterSyscalls);
  251. opts.AddLongOption("scan-udfs", "Scan specified udfs with external udf-resolver to use static function registry").NoArgument().SetFlag(&ScanUdfs);
  252. opts.AddLongOption("parse-only", "Parse program and exit").NoArgument().StoreValue(&Mode, ERunMode::Parse);
  253. opts.AddLongOption("compile-only", "Compile program and exit").NoArgument().StoreValue(&Mode, ERunMode::Compile);
  254. opts.AddLongOption("validate", "Validate program and exit").NoArgument().StoreValue(&Mode, ERunMode::Validate);
  255. opts.AddLongOption("lineage", "Calculate program lineage and exit").NoArgument().StoreValue(&Mode, ERunMode::Lineage);
  256. opts.AddLongOption('O',"optimize", "Optimize program and exit").NoArgument().StoreValue(&Mode, ERunMode::Optimize);
  257. opts.AddLongOption('D', "discover", "Discover tables in the program and exit").NoArgument().StoreValue(&Mode, ERunMode::Discover);
  258. opts.AddLongOption("peephole", "Perform peephole program optimization and exit").NoArgument().StoreValue(&Mode, ERunMode::Peephole);
  259. opts.AddLongOption('R',"run", "Run program (use by default)").NoArgument().StoreValue(&Mode, ERunMode::Run);
  260. opts.AddLongOption('L', "show-log", "Show transformation log").Optional().NoArgument().SetFlag(&ShowLog);
  261. opts.AddLongOption('v', "verbosity", "Log verbosity level").Optional().RequiredArgument("LEVEL").StoreResult(&Verbosity);
  262. opts.AddLongOption("print-ast", "Print AST after loading").NoArgument().SetFlag(&PrintAst);
  263. opts.AddLongOption("print-expr", "Print rebuild AST before execution").NoArgument()
  264. .Handler0([this]() {
  265. if (!ExprStream) {
  266. ExprStream = &Cout;
  267. }
  268. });
  269. opts.AddLongOption("with-types", "Print types annotation").NoArgument().SetFlag(&WithTypes);
  270. opts.AddLongOption("trace-opt", "Print AST in the begin of each transformation").NoArgument()
  271. .Handler0([this]() {
  272. TraceOptStream = &Cerr;
  273. });
  274. opts.AddLongOption("expr-file", "Print AST to that file instead of stdout").Optional().RequiredArgument("FILE")
  275. .Handler1T<TString>([this](const TString& file) {
  276. ExprStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  277. ExprStream = ExprStreamHolder_.Get();
  278. });
  279. opts.AddLongOption("print-result", "Print program execution result to stdout").NoArgument()
  280. .Handler0([this]() {
  281. if (!ResultStream) {
  282. ResultStream = &Cout;
  283. }
  284. });
  285. opts.AddLongOption("format", "Results format")
  286. .Optional()
  287. .RequiredArgument("STR")
  288. .Choices(THashSet<TString>{"text", "binary", "pretty"})
  289. .Handler1T<TString>([this](const TString& val) {
  290. if (val == "text") {
  291. ResultsFormat = NYson::EYsonFormat::Text;
  292. } else if (val == "binary") {
  293. ResultsFormat = NYson::EYsonFormat::Binary;
  294. } else if (val == "pretty") {
  295. ResultsFormat = NYson::EYsonFormat::Pretty;
  296. } else {
  297. throw yexception() << "Unknown result format " << val;
  298. }
  299. });
  300. opts.AddLongOption("result-file", "Print program execution result to file").Optional().RequiredArgument("FILE")
  301. .Handler1T<TString>([this](const TString& file) {
  302. ResultStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  303. ResultStream = ResultStreamHolder_.Get();
  304. });
  305. opts.AddLongOption('P',"trace-plan", "Print plan before execution").NoArgument()
  306. .Handler0([this]() {
  307. if (!PlanStream) {
  308. PlanStream = &Cerr;
  309. }
  310. });
  311. opts.AddLongOption("plan-file", "Print program plan to file").Optional().RequiredArgument("FILE")
  312. .Handler1T<TString>([this](const TString& file) {
  313. PlanStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  314. PlanStream = PlanStreamHolder_.Get();
  315. });
  316. opts.AddLongOption("err-file", "Print validate/optimize/runtime errors to file")
  317. .Handler1T<TString>([this](const TString& file) {
  318. ErrStreamHolder_ = MakeHolder<TFixedBufferFileOutput>(file);
  319. ErrStream = ErrStreamHolder_.Get();
  320. });
  321. opts.AddLongOption("full-expr", "Avoid buffering of expr/plan").NoArgument().SetFlag(&FullExpr);
  322. opts.AddLongOption("mem-limit", "Set memory limit in megabytes")
  323. .Handler1T<ui32>(0, [](ui32 memLimit) {
  324. if (memLimit) {
  325. #ifdef __unix__
  326. auto memLimitBytes = memLimit * 1024 * 1024;
  327. struct rlimit rl;
  328. if (getrlimit(RLIMIT_AS, &rl)) {
  329. throw TSystemError() << "Cannot getrlimit(RLIMIT_AS)";
  330. }
  331. rl.rlim_cur = memLimitBytes;
  332. if (setrlimit(RLIMIT_AS, &rl)) {
  333. throw TSystemError() << "Cannot setrlimit(RLIMIT_AS) to " << memLimitBytes << " bytes";
  334. }
  335. #else
  336. throw yexception() << "Memory limit can not be set on this platfrom";
  337. #endif
  338. }
  339. });
  340. opts.AddLongOption("validate-mode", "Validate udf mode, available values: " + NUdf::ValidateModeAvailables())
  341. .DefaultValue(NUdf::ValidateModeAsStr(NUdf::EValidateMode::Greedy))
  342. .Handler1T<TString>([this](const TString& mode) {
  343. ValidateMode = NUdf::ValidateModeByStr(mode);
  344. });
  345. opts.AddLongOption("stat", "Print execution statistics").Optional().OptionalArgument("FILE")
  346. .Handler1T<TString>([this](const TString& file) {
  347. if (file) {
  348. StatStreamHolder_ = MakeHolder<TFileOutput>(file);
  349. StatStream = StatStreamHolder_.Get();
  350. } else {
  351. StatStream = &Cerr;
  352. }
  353. });
  354. opts.AddLongOption("full-stat", "Output full execution statistics").Optional().NoArgument().SetFlag(&FullStatistics);
  355. opts.AddLongOption("sql-flags", "SQL translator pragma flags").SplitHandler(&SqlFlags, ',');
  356. opts.AddLongOption("syntax-version", "SQL syntax version").StoreResult(&SyntaxVersion).DefaultValue(1);
  357. opts.AddLongOption("ansi-lexer", "Use ansi lexer").NoArgument().SetFlag(&AnsiLexer);
  358. opts.AddLongOption("assume-ydb-on-slash", "Assume YDB provider if cluster name starts with '/'").NoArgument().SetFlag(&AssumeYdbOnClusterWithSlash);
  359. opts.AddLongOption("test-antlr4", "Check antlr4 parser").NoArgument().SetFlag(&TestAntlr4);
  360. opts.AddLongOption("with-final-issues", "Include some final messages (like statistic) in issues").NoArgument().SetFlag(&WithFinalIssues);
  361. if (FailureInjectionSupport) {
  362. opts.AddLongOption("failure-inject", "Activate failure injection")
  363. .Optional()
  364. .RequiredArgument("INJECTION_NAME=FAIL_COUNT or INJECTION_NAME=SKIP_COUNT/FAIL_COUNT")
  365. .KVHandler([](TString name, TString value) {
  366. TFailureInjector::Activate();
  367. TStringBuf fail = value;
  368. TStringBuf skip;
  369. if (TStringBuf(value).TrySplit('/', skip, fail)) {
  370. TFailureInjector::Set(name, FromString<ui32>(skip), FromString<ui32>(fail));
  371. } else {
  372. TFailureInjector::Set(name, 0, FromString<ui32>(fail));
  373. }
  374. });
  375. }
  376. if (EnableCredentials) {
  377. opts.AddLongOption("token", "YQL token")
  378. .Optional()
  379. .RequiredArgument("VALUE")
  380. .StoreResult(&Token);
  381. opts.AddLongOption("custom-tokens", "Custom tokens")
  382. .Optional()
  383. .RequiredArgument("NAME=VALUE or NAME=@PATH")
  384. .KVHandler([this](TString key, TString value) {
  385. if (value.StartsWith('@')) {
  386. value = StripStringRight(TFileInput(value.substr(1)).ReadAll());
  387. }
  388. Credentials->AddCredential(key, TCredential("custom", "", value));
  389. });
  390. }
  391. if (EnableQPlayer) {
  392. opts.AddLongOption("qstorage-dir", "Directory for QStorage").RequiredArgument("DIR")
  393. .Handler1T<TString>([this](const TString& dir) {
  394. QPlayerStorage_ = MakeFileQStorage(dir);
  395. });
  396. opts.AddLongOption("op-id", "QStorage operation id").StoreResult(&OperationId).DefaultValue("dummy_op");
  397. opts.AddLongOption("capture", "Write query metadata to QStorage").NoArgument()
  398. .Handler0([this]() {
  399. if (EQPlayerMode::Replay == QPlayerMode) {
  400. throw yexception() << "replay and capture options can't be used simultaneously";
  401. }
  402. QPlayerMode = EQPlayerMode::Capture;
  403. });
  404. opts.AddLongOption("replay", "Read query metadata from QStorage").NoArgument()
  405. .Handler0([this]() {
  406. if (EQPlayerMode::Capture == QPlayerMode) {
  407. throw yexception() << "replay and capture options can't be used simultaneously";
  408. }
  409. QPlayerMode = EQPlayerMode::Replay;
  410. });
  411. }
  412. opts.SetFreeArgsMax(0);
  413. for (auto& ext: OptExtenders_) {
  414. ext(opts);
  415. }
  416. auto res = NLastGetopt::TOptsParseResult(&opts, argc, argv);
  417. if (QPlayerMode != EQPlayerMode::None) {
  418. if (!QPlayerStorage_) {
  419. QPlayerStorage_ = MakeFileQStorage(".");
  420. }
  421. if (EQPlayerMode::Replay == QPlayerMode) {
  422. QPlayerContext = TQContext(QPlayerStorage_->MakeReader(OperationId, {}));
  423. ProgramFile = "-replay-";
  424. ProgramText = "";
  425. } else if (EQPlayerMode::Capture == QPlayerMode) {
  426. QPlayerContext = TQContext(QPlayerStorage_->MakeWriter(OperationId, {}));
  427. }
  428. }
  429. if (Mode >= ERunMode::Validate && GatewayTypes.empty()) {
  430. throw yexception() << "At least one gateway from the list " << JoinSeq(",", SupportedGateways_).Quote() << " must be specified";
  431. }
  432. if (!GatewaysConfig) {
  433. GatewaysConfig = ParseProtoFromResource<TGatewaysConfig>("gateways.conf");
  434. }
  435. if (GatewaysConfig && GatewaysConfig->HasSqlCore()) {
  436. SqlFlags.insert(GatewaysConfig->GetSqlCore().GetTranslationFlags().begin(), GatewaysConfig->GetSqlCore().GetTranslationFlags().end());
  437. }
  438. UpdateSqlFlagsFromQContext(QPlayerContext, SqlFlags);
  439. if (!FsConfig) {
  440. FsConfig = MakeHolder<TFileStorageConfig>();
  441. if (NResource::Has("fs.conf")) {
  442. LoadFsConfigFromResource("fs.conf", *FsConfig);
  443. }
  444. }
  445. if (EnableCredentials && Token) {
  446. for (auto name: SupportedGateways_) {
  447. Credentials->AddCredential(TStringBuilder() << "default_" << name, TCredential(name, "", Token));
  448. }
  449. }
  450. for (auto& handle: OptHandlers_) {
  451. handle(res);
  452. }
  453. }
  454. TFacadeRunner::TFacadeRunner(TString name)
  455. : Name_(std::move(name))
  456. {
  457. }
  458. TFacadeRunner::~TFacadeRunner() {
  459. }
  460. TIntrusivePtr<NKikimr::NMiniKQL::IFunctionRegistry> TFacadeRunner::GetFuncRegistry() {
  461. return FuncRegistry_;
  462. }
  463. int TFacadeRunner::Main(int argc, const char *argv[]) {
  464. NYql::NBacktrace::RegisterKikimrFatalActions();
  465. NYql::NBacktrace::EnableKikimrSymbolize();
  466. NYql::NLog::YqlLoggerScope logger(&Cerr);
  467. try {
  468. return DoMain(argc, argv);
  469. }
  470. catch (...) {
  471. Cerr << CurrentExceptionMessage() << Endl;
  472. return 1;
  473. }
  474. }
  475. int TFacadeRunner::DoMain(int argc, const char *argv[]) {
  476. Y_UNUSED(NUdf::GetStaticSymbols());
  477. RunOptions_.Parse(argc, argv);
  478. if (!RunOptions_.NoDebug) {
  479. Cerr << Name_ << " ABI version: " << NKikimr::NUdf::CurrentAbiVersionStr() << Endl;
  480. }
  481. RunOptions_.InitLogger();
  482. if (RunOptions_.PgSupport) {
  483. ClusterMapping_["pg_catalog"] = PgProviderName;
  484. ClusterMapping_["information_schema"] = PgProviderName;
  485. NPg::SetSqlLanguageParser(NSQLTranslationPG::CreateSqlLanguageParser());
  486. NPg::LoadSystemFunctions(*NSQLTranslationPG::CreateSystemFunctionsParser());
  487. if (RunOptions_.PgExtConfig) {
  488. TVector<NPg::TExtensionDesc> extensions;
  489. PgExtensionsFromProto(*RunOptions_.PgExtConfig, extensions);
  490. NPg::RegisterExtensions(extensions, RunOptions_.QPlayerContext.CanRead(),
  491. *NSQLTranslationPG::CreateExtensionSqlParser(),
  492. NKikimr::NMiniKQL::CreateExtensionLoader().get());
  493. }
  494. NPg::GetSqlLanguageParser()->Freeze();
  495. }
  496. FuncRegistry_ = NKikimr::NMiniKQL::CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace,
  497. NKikimr::NMiniKQL::CreateBuiltinRegistry(), true, RunOptions_.UdfsPaths);
  498. NSQLTranslationV1::TLexers lexers;
  499. lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
  500. lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
  501. NSQLTranslationV1::TParsers parsers;
  502. parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory();
  503. parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory();
  504. NSQLTranslation::TTranslators translators(
  505. nullptr,
  506. NSQLTranslationV1::MakeTranslator(lexers, parsers),
  507. NSQLTranslationPG::MakeTranslator()
  508. );
  509. TExprContext ctx;
  510. if (RunOptions_.PgSupport) {
  511. ctx.NextUniqueId = NPg::GetSqlLanguageParser()->GetContext().NextUniqueId;
  512. }
  513. IModuleResolver::TPtr moduleResolver;
  514. if (RunOptions_.MountConfig) {
  515. TModulesTable modules;
  516. FillUserDataTableFromFileSystem(*RunOptions_.MountConfig, RunOptions_.DataTable);
  517. if (!CompileLibraries(translators, RunOptions_.DataTable, ctx, modules, RunOptions_.OptimizeLibs && RunOptions_.Mode >= ERunMode::Validate)) {
  518. *RunOptions_.ErrStream << "Errors on compile libraries:" << Endl;
  519. ctx.IssueManager.GetIssues().PrintTo(*RunOptions_.ErrStream);
  520. return -1;
  521. }
  522. moduleResolver = std::make_shared<TModuleResolver>(translators, std::move(modules), ctx.NextUniqueId, ClusterMapping_, RunOptions_.SqlFlags, RunOptions_.Mode >= ERunMode::Validate);
  523. } else {
  524. if (!GetYqlDefaultModuleResolver(ctx, moduleResolver, ClusterMapping_, RunOptions_.OptimizeLibs && RunOptions_.Mode >= ERunMode::Validate)) {
  525. *RunOptions_.ErrStream << "Errors loading default YQL libraries:" << Endl;
  526. ctx.IssueManager.GetIssues().PrintTo(*RunOptions_.ErrStream);
  527. return -1;
  528. }
  529. }
  530. TExprContext::TFreezeGuard freezeGuard(ctx);
  531. if (RunOptions_.Mode >= ERunMode::Validate) {
  532. std::vector<NFS::IDownloaderPtr> downloaders;
  533. for (auto& factory: FsDownloadFactories_) {
  534. if (auto download = factory()) {
  535. downloaders.push_back(std::move(download));
  536. }
  537. }
  538. FileStorage_ = WithAsync(CreateFileStorage(*RunOptions_.FsConfig, downloaders));
  539. }
  540. IUdfResolver::TPtr udfResolver;
  541. TUdfIndex::TPtr udfIndex;
  542. if (FileStorage_ && RunOptions_.ScanUdfs) {
  543. if (!RunOptions_.UdfResolverPath) {
  544. Cerr << "udf-resolver path must be specified when use 'scan-udfs'";
  545. return -1;
  546. }
  547. udfResolver = NCommon::CreateOutProcUdfResolver(FuncRegistry_.Get(), FileStorage_, RunOptions_.UdfResolverPath, {}, {}, RunOptions_.UdfResolverFilterSyscalls, {});
  548. RunOptions_.PrintInfo(TStringBuilder() << TInstant::Now().ToStringLocalUpToSeconds() << " Udf scanning started for " << RunOptions_.UdfsPaths.size() << " udfs ...");
  549. udfIndex = new TUdfIndex();
  550. LoadRichMetadataToUdfIndex(*udfResolver, RunOptions_.UdfsPaths, false, TUdfIndex::EOverrideMode::RaiseError, *udfIndex);
  551. RunOptions_.PrintInfo(TStringBuilder() << TInstant::Now().ToStringLocalUpToSeconds() << " UdfIndex done.");
  552. udfResolver = NCommon::CreateUdfResolverWithIndex(udfIndex, udfResolver, FileStorage_);
  553. RunOptions_.PrintInfo(TStringBuilder() << TInstant::Now().ToStringLocalUpToSeconds() << " Udfs scanned");
  554. } else {
  555. udfResolver = FileStorage_ && RunOptions_.UdfResolverPath
  556. ? NCommon::CreateOutProcUdfResolver(FuncRegistry_.Get(), FileStorage_, RunOptions_.UdfResolverPath, {}, {}, RunOptions_.UdfResolverFilterSyscalls, {})
  557. : NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get(), FileStorage_, true);
  558. }
  559. TVector<TDataProviderInitializer> dataProvidersInit;
  560. if (RunOptions_.PgSupport) {
  561. dataProvidersInit.push_back(GetPgDataProviderInitializer());
  562. }
  563. for (auto& factory: ProviderFactories_) {
  564. if (auto init = factory()) {
  565. dataProvidersInit.push_back(std::move(init));
  566. }
  567. }
  568. TVector<IUrlListerPtr> urlListers;
  569. for (auto& factory: UrlListerFactories_) {
  570. if (auto listener = factory()) {
  571. urlListers.push_back(std::move(listener));
  572. }
  573. }
  574. TProgramFactory factory(RunOptions_.UseRepeatableRandomAndTimeProviders, FuncRegistry_.Get(), ctx.NextUniqueId, dataProvidersInit, Name_);
  575. factory.AddUserDataTable(RunOptions_.DataTable);
  576. factory.SetModules(moduleResolver);
  577. factory.SetFileStorage(FileStorage_);
  578. if (RunOptions_.GatewaysConfig && RunOptions_.GatewaysConfig->HasFs()) {
  579. factory.SetUrlPreprocessing(new NYql::TUrlPreprocessing(*RunOptions_.GatewaysConfig));
  580. }
  581. factory.SetUdfIndex(udfIndex, new TUdfIndexPackageSet());
  582. factory.SetUdfResolver(udfResolver);
  583. factory.SetGatewaysConfig(RunOptions_.GatewaysConfig.Get());
  584. factory.SetCredentials(RunOptions_.Credentials);
  585. factory.EnableRangeComputeFor();
  586. if (!urlListers.empty()) {
  587. factory.SetUrlListerManager(MakeUrlListerManager(urlListers));
  588. }
  589. int result = DoRun(factory);
  590. if (result == 0 && EQPlayerMode::Capture == RunOptions_.QPlayerMode) {
  591. RunOptions_.QPlayerContext.GetWriter()->Commit().GetValueSync();
  592. }
  593. return result;
  594. }
  595. int TFacadeRunner::DoRun(TProgramFactory& factory) {
  596. TProgramPtr program = factory.Create(RunOptions_.ProgramFile, RunOptions_.ProgramText, RunOptions_.OperationId, EHiddenMode::Disable, RunOptions_.QPlayerContext);;
  597. if (RunOptions_.Params) {
  598. program->SetParametersYson(RunOptions_.Params);
  599. }
  600. if (RunOptions_.EnableResultPosition) {
  601. program->EnableResultPosition();
  602. }
  603. if (ProgressWriter_) {
  604. program->SetProgressWriter(ProgressWriter_);
  605. }
  606. program->SetUseTableMetaFromGraph(RunOptions_.UseMetaFromGrpah);
  607. program->SetValidateOptions(RunOptions_.ValidateMode);
  608. bool fail = false;
  609. if (RunOptions_.ProgramType != EProgramType::SExpr) {
  610. RunOptions_.PrintInfo("Parse SQL...");
  611. google::protobuf::Arena arena;
  612. NSQLTranslation::TTranslationSettings settings;
  613. settings.Arena = &arena;
  614. settings.PgParser = EProgramType::Pg == RunOptions_.ProgramType;
  615. settings.ClusterMapping = ClusterMapping_;
  616. settings.Flags = RunOptions_.SqlFlags;
  617. settings.SyntaxVersion = RunOptions_.SyntaxVersion;
  618. settings.AnsiLexer = RunOptions_.AnsiLexer;
  619. settings.TestAntlr4 = RunOptions_.TestAntlr4;
  620. settings.V0Behavior = NSQLTranslation::EV0Behavior::Report;
  621. settings.AssumeYdbOnClusterWithSlash = RunOptions_.AssumeYdbOnClusterWithSlash;
  622. if (ERunMode::Discover == RunOptions_.Mode) {
  623. settings.Mode = NSQLTranslation::ESqlMode::DISCOVERY;
  624. }
  625. if (!program->ParseSql(settings)) {
  626. program->PrintErrorsTo(*RunOptions_.ErrStream);
  627. fail = true;
  628. }
  629. if (!fail && RunOptions_.TestSqlFormat && 1 == RunOptions_.SyntaxVersion) {
  630. TString formattedProgramText;
  631. NYql::TIssues issues;
  632. NSQLTranslationV1::TLexers lexers;
  633. lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
  634. lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
  635. NSQLTranslationV1::TParsers parsers;
  636. parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory();
  637. parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory();
  638. auto formatter = NSQLFormat::MakeSqlFormatter(lexers, parsers, settings);
  639. if (!formatter->Format(RunOptions_.ProgramText, formattedProgramText, issues)) {
  640. *RunOptions_.ErrStream << "Format failed" << Endl;
  641. issues.PrintTo(*RunOptions_.ErrStream);
  642. return -1;
  643. }
  644. auto frmProgram = factory.Create("formatted SQL", formattedProgramText);
  645. if (!frmProgram->ParseSql(settings)) {
  646. frmProgram->PrintErrorsTo(*RunOptions_.ErrStream);
  647. return -1;
  648. }
  649. TStringStream srcQuery, frmQuery;
  650. program->AstRoot()->PrettyPrintTo(srcQuery, PRETTY_FLAGS);
  651. frmProgram->AstRoot()->PrettyPrintTo(frmQuery, PRETTY_FLAGS);
  652. if (srcQuery.Str() != frmQuery.Str()) {
  653. *RunOptions_.ErrStream << "source query's AST and formatted query's AST are not same" << Endl;
  654. return -1;
  655. }
  656. }
  657. } else {
  658. RunOptions_.PrintInfo("Parse YQL...");
  659. if (!program->ParseYql()) {
  660. program->PrintErrorsTo(*RunOptions_.ErrStream);
  661. fail = true;
  662. }
  663. }
  664. if (RunOptions_.TraceOptStream) {
  665. if (auto ast = program->GetQueryAst()) {
  666. *RunOptions_.TraceOptStream << *ast << Endl;
  667. }
  668. }
  669. if (fail) {
  670. return -1;
  671. }
  672. if (RunOptions_.PrintAst) {
  673. program->AstRoot()->PrettyPrintTo(Cout, PRETTY_FLAGS);
  674. }
  675. if (ERunMode::Parse == RunOptions_.Mode) {
  676. return 0;
  677. }
  678. RunOptions_.PrintInfo("Compile program...");
  679. if (!program->Compile(RunOptions_.User)) {
  680. program->PrintErrorsTo(*RunOptions_.ErrStream);
  681. fail = true;
  682. }
  683. if (RunOptions_.TraceOptStream) {
  684. program->Print(RunOptions_.TraceOptStream, nullptr);
  685. }
  686. if (fail) {
  687. return -1;
  688. }
  689. if (ERunMode::Compile == RunOptions_.Mode) {
  690. if (RunOptions_.ExprStream) {
  691. auto baseAst = ConvertToAst(*program->ExprRoot(), program->ExprCtx(), NYql::TExprAnnotationFlags::None, true);
  692. baseAst.Root->PrettyPrintTo(*RunOptions_.ExprStream, PRETTY_FLAGS);
  693. }
  694. return 0;
  695. }
  696. TProgram::TStatus status = DoRunProgram(program);
  697. if (ERunMode::Peephole == RunOptions_.Mode && RunOptions_.ExprStream && program->ExprRoot()) {
  698. auto ast = ConvertToAst(*program->ExprRoot(), program->ExprCtx(), RunOptions_.WithTypes ? TExprAnnotationFlags::Types : TExprAnnotationFlags::None, true);
  699. ui32 prettyFlags = TAstPrintFlags::ShortQuote;
  700. if (!RunOptions_.WithTypes) {
  701. prettyFlags |= TAstPrintFlags::PerLine;
  702. }
  703. ast.Root->PrettyPrintTo(*RunOptions_.ExprStream, prettyFlags);
  704. }
  705. if (RunOptions_.WithFinalIssues) {
  706. program->FinalizeIssues();
  707. }
  708. program->PrintErrorsTo(*RunOptions_.ErrStream);
  709. if (status == TProgram::TStatus::Error) {
  710. if (RunOptions_.TraceOptStream) {
  711. program->Print(RunOptions_.TraceOptStream, nullptr);
  712. }
  713. return -1;
  714. }
  715. if (!RunOptions_.FullExpr && ERunMode::Peephole != RunOptions_.Mode) {
  716. program->Print(RunOptions_.ExprStream, RunOptions_.PlanStream, /*cleanPlan*/true);
  717. }
  718. program->ConfigureYsonResultFormat(RunOptions_.ResultsFormat);
  719. if (RunOptions_.ResultStream) {
  720. RunOptions_.PrintInfo("Getting results...");
  721. if (ERunMode::Discover == RunOptions_.Mode) {
  722. if (auto data = program->GetDiscoveredData()) {
  723. *RunOptions_.ResultStream << *data;
  724. }
  725. } else if (ERunMode::Lineage == RunOptions_.Mode) {
  726. if (auto data = program->GetLineage()) {
  727. TStringInput in(*data);
  728. NYson::ReformatYsonStream(&in, RunOptions_.ResultStream, RunOptions_.ResultsFormat);
  729. }
  730. } else if (program->HasResults()) {
  731. if (RunOptions_.ValidateResultFormat) {
  732. auto str = program->ResultsAsString();
  733. if (!str.empty()) {
  734. auto node = NYT::NodeFromYsonString(str);
  735. for (const auto& r : NResult::ParseResponse(node)) {
  736. for (const auto& write : r.Writes) {
  737. if (write.Type) {
  738. NResult::TEmptyTypeVisitor visitor;
  739. NResult::ParseType(*write.Type, visitor);
  740. }
  741. if (write.Type && write.Data) {
  742. NResult::TEmptyDataVisitor visitor;
  743. NResult::ParseData(*write.Type, *write.Data, visitor);
  744. }
  745. }
  746. }
  747. }
  748. RunOptions_.ResultStream->Write(str.data(), str.size());
  749. } else {
  750. *RunOptions_.ResultStream << program->ResultsAsString();
  751. }
  752. }
  753. }
  754. if (RunOptions_.StatStream) {
  755. if (auto st = program->GetStatistics(!RunOptions_.FullStatistics)) {
  756. *RunOptions_.StatStream << *st;
  757. }
  758. }
  759. RunOptions_.PrintInfo("");
  760. RunOptions_.PrintInfo("Done");
  761. return 0;
  762. }
  763. TProgram::TStatus TFacadeRunner::DoRunProgram(TProgramPtr program) {
  764. TProgram::TStatus status = TProgram::TStatus::Ok;
  765. auto defOptConfig = TOptPipelineConfigurator(program, RunOptions_.FullExpr ? RunOptions_.PlanStream : nullptr, RunOptions_.FullExpr ? RunOptions_.ExprStream : nullptr, RunOptions_.WithTypes);
  766. IPipelineConfigurator* optConfig = OptPipelineConfigurator_ ? OptPipelineConfigurator_ : &defOptConfig;
  767. if (ERunMode::Peephole == RunOptions_.Mode) {
  768. RunOptions_.PrintInfo("Peephole...");
  769. auto defConfig = TPeepHolePipelineConfigurator();
  770. IPipelineConfigurator* config = PeepholePipelineConfigurator_ ? PeepholePipelineConfigurator_ : &defConfig;
  771. status = program->OptimizeWithConfig(RunOptions_.User, *config);
  772. } else if (ERunMode::Run == RunOptions_.Mode) {
  773. RunOptions_.PrintInfo("Run program...");
  774. status = program->RunWithConfig(RunOptions_.User, *optConfig);
  775. } else if (ERunMode::Optimize == RunOptions_.Mode) {
  776. RunOptions_.PrintInfo("Optimize program...");
  777. status = program->OptimizeWithConfig(RunOptions_.User, *optConfig);
  778. } else if (ERunMode::Validate == RunOptions_.Mode) {
  779. RunOptions_.PrintInfo("Validate program...");
  780. status = program->Validate(RunOptions_.User, RunOptions_.ExprStream, RunOptions_.WithTypes);
  781. } else if (ERunMode::Discover == RunOptions_.Mode) {
  782. RunOptions_.PrintInfo("Discover program...");
  783. status = program->Discover(RunOptions_.User);
  784. } else if (ERunMode::Lineage == RunOptions_.Mode) {
  785. RunOptions_.PrintInfo("Calculating lineage in program...");
  786. status = program->LineageWithConfig(RunOptions_.User, *optConfig);
  787. }
  788. return status;
  789. }
  790. } // NYql