sql2yql.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. #include <yql/essentials/ast/yql_ast.h>
  2. #include <yql/essentials/ast/yql_ast_annotation.h>
  3. #include <yql/essentials/ast/yql_expr.h>
  4. #include <yql/essentials/parser/lexer_common/hints.h>
  5. #include <yql/essentials/sql/sql.h>
  6. #include <yql/essentials/sql/v1/sql.h>
  7. #include <yql/essentials/sql/v1/lexer/antlr4/lexer.h>
  8. #include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h>
  9. #include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h>
  10. #include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h>
  11. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  12. #include <yql/essentials/parser/pg_wrapper/interface/parser.h>
  13. #include <library/cpp/getopt/last_getopt.h>
  14. #include <yql/essentials/sql/v1/format/sql_format.h>
  15. #include <library/cpp/testing/unittest/registar.h>
  16. #include <util/stream/file.h>
  17. #include <util/generic/hash.h>
  18. #include <util/generic/hash_set.h>
  19. #include <util/generic/string.h>
  20. #include <util/string/escape.h>
  21. #include <google/protobuf/message.h>
  22. #include <google/protobuf/descriptor.h>
  23. #include <google/protobuf/repeated_field.h>
  24. struct TPosOutput {
  25. IOutputStream& Out;
  26. ui32 Line;
  27. ui32 Column;
  28. TPosOutput(IOutputStream& out)
  29. : Out(out)
  30. , Line(1)
  31. , Column(0)
  32. {
  33. }
  34. void Output(ui32 line, ui32 column, const TString& value) {
  35. while (Line < line) {
  36. Out << Endl;
  37. ++Line;
  38. Column = 0;
  39. }
  40. while (Column < column) {
  41. Out << " ";
  42. ++Column;
  43. }
  44. if (value != "<EOF>") {
  45. Out << value;
  46. Column += value.size();
  47. }
  48. }
  49. };
  50. static void ExtractQuery(TPosOutput& out, const google::protobuf::Message& node);
  51. static void VisitField(TPosOutput& out, const google::protobuf::FieldDescriptor& descr, const google::protobuf::Message& field) {
  52. using namespace google::protobuf;
  53. const Descriptor* d = descr.message_type();
  54. if (!d) {
  55. ythrow yexception() << "Invalid AST: non-message node encountered";
  56. }
  57. if (d->name() == "TToken") {
  58. const Reflection* r = field.GetReflection();
  59. out.Output(r->GetUInt32(field, d->field(0)), r->GetUInt32(field, d->field(1)), r->GetString(field, d->field(2)));
  60. } else {
  61. ExtractQuery(out, field);
  62. }
  63. }
  64. static void ExtractQuery(TPosOutput& out, const google::protobuf::Message& node) {
  65. using namespace google::protobuf;
  66. TVector<const FieldDescriptor*> fields;
  67. const Reflection* ref = node.GetReflection();
  68. ref->ListFields(node, &fields);
  69. for (auto it = fields.begin(); it != fields.end(); ++it) {
  70. if ((*it)->is_repeated()) {
  71. const ui32 fieldSize = ref->FieldSize(node, *it);
  72. for (ui32 i = 0; i < fieldSize; ++i) {
  73. VisitField(out, **it, ref->GetRepeatedMessage(node, *it, i));
  74. }
  75. } else {
  76. VisitField(out, **it, ref->GetMessage(node, *it));
  77. }
  78. }
  79. }
  80. bool TestFormat(
  81. const NSQLTranslation::TTranslators& translators,
  82. const TString& query,
  83. const NSQLTranslation::TTranslationSettings& settings,
  84. const TString& queryFile,
  85. const NYql::TAstParseResult& parseRes,
  86. const TString& outFileName,
  87. const bool checkDoubleFormatting
  88. ) {
  89. TStringStream yqlProgram;
  90. parseRes.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  91. TString frmQuery;
  92. NYql::TIssues issues;
  93. NSQLTranslationV1::TLexers lexers;
  94. lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
  95. lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
  96. NSQLTranslationV1::TParsers parsers;
  97. parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory();
  98. parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory();
  99. auto formatter = NSQLFormat::MakeSqlFormatter(lexers, parsers, settings);
  100. if (!formatter->Format(query, frmQuery, issues)) {
  101. Cerr << "Failed to format query: " << issues.ToString() << Endl;
  102. return false;
  103. }
  104. NYql::TAstParseResult frmParseRes = NSQLTranslation::SqlToYql(translators, frmQuery, settings);
  105. if (!frmParseRes.Issues.Empty()) {
  106. frmParseRes.Issues.PrintWithProgramTo(Cerr, queryFile, frmQuery);
  107. if (AnyOf(frmParseRes.Issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;})) {
  108. return false;
  109. }
  110. }
  111. if (!frmParseRes.IsOk()) {
  112. Cerr << "No error reported, but no yql compiled result!" << Endl << Endl;
  113. return false;
  114. }
  115. TStringStream frmYqlProgram;
  116. frmParseRes.Root->PrettyPrintTo(frmYqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  117. if (yqlProgram.Str() != frmYqlProgram.Str()) {
  118. Cerr << "source query's AST and formatted query's AST are not same\n";
  119. return false;
  120. }
  121. TString frmQuery2;
  122. if (!formatter->Format(frmQuery, frmQuery2, issues)) {
  123. Cerr << "Failed to format already formatted query: " << issues.ToString() << Endl;
  124. return false;
  125. }
  126. if (checkDoubleFormatting && frmQuery != frmQuery2) {
  127. Cerr << "Formatting an already formatted query yielded a different resut" << Endl
  128. << "Add /* skip double format */ to suppress" << Endl;
  129. return false;
  130. }
  131. if (!outFileName.empty()) {
  132. TFixedBufferFileOutput out{outFileName};
  133. out << frmQuery;
  134. }
  135. return true;
  136. }
  137. class TStoreMappingFunctor: public NLastGetopt::IOptHandler {
  138. public:
  139. TStoreMappingFunctor(THashMap<TString, TString>* target, char delim = '@')
  140. : Target(target)
  141. , Delim(delim)
  142. {
  143. }
  144. void HandleOpt(const NLastGetopt::TOptsParser* parser) final {
  145. const TStringBuf val(parser->CurValOrDef());
  146. const auto service = TString(val.After(Delim));
  147. auto res = Target->emplace(TString(val.Before(Delim)), service);
  148. if (!res.second) {
  149. /// force replace already exist parametr
  150. res.first->second = service;
  151. }
  152. }
  153. private:
  154. THashMap<TString, TString>* Target;
  155. char Delim;
  156. };
  157. int BuildAST(int argc, char* argv[]) {
  158. NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
  159. TString outFileName;
  160. TString queryString;
  161. ui16 syntaxVersion;
  162. TString outFileNameFormat;
  163. THashMap<TString, TString> clusterMapping;
  164. clusterMapping["plato"] = NYql::YtProviderName;
  165. clusterMapping["pg_catalog"] = NYql::PgProviderName;
  166. clusterMapping["information_schema"] = NYql::PgProviderName;
  167. THashMap<TString, TString> tables;
  168. THashSet<TString> flags;
  169. opts.AddLongOption('o', "output", "save output to file").RequiredArgument("file").StoreResult(&outFileName);
  170. opts.AddLongOption('q', "query", "query string").RequiredArgument("query").StoreResult(&queryString);
  171. opts.AddLongOption('t', "tree", "print AST proto text").NoArgument();
  172. opts.AddLongOption('d', "diff", "print inlined diff for original query and query build from AST if they differ").NoArgument();
  173. opts.AddLongOption('D', "dump", "dump inlined diff for original query and query build from AST").NoArgument();
  174. opts.AddLongOption('p', "print-query", "print given query before parsing").NoArgument();
  175. opts.AddLongOption('y', "yql", "translate result to Yql and print it").NoArgument();
  176. opts.AddLongOption('l', "lexer", "print query token stream").NoArgument();
  177. opts.AddLongOption("ansi-lexer", "use ansi lexer").NoArgument();
  178. opts.AddLongOption("pg", "use pg_query parser").NoArgument();
  179. opts.AddLongOption('a', "ann", "print Yql annotations").NoArgument();
  180. opts.AddLongOption('C', "cluster", "set cluster to service mapping").RequiredArgument("name@service").Handler(new TStoreMappingFunctor(&clusterMapping));
  181. opts.AddLongOption('T', "table", "set table to filename mapping").RequiredArgument("table@path").Handler(new TStoreMappingFunctor(&tables));
  182. opts.AddLongOption('R', "replace", "replace Output table with each statement result").NoArgument();
  183. opts.AddLongOption("sqllogictest", "input files are in sqllogictest format").NoArgument();
  184. opts.AddLongOption("syntax-version", "SQL syntax version").StoreResult(&syntaxVersion).DefaultValue(1);
  185. opts.AddLongOption('F', "flags", "SQL pragma flags").SplitHandler(&flags, ',');
  186. opts.AddLongOption("assume-ydb-on-slash", "Assume YDB provider if cluster name starts with '/'").NoArgument();
  187. opts.AddLongOption("test-format", "compare formatted query's AST with the original query's AST (only syntaxVersion=1 is supported).").NoArgument();
  188. opts.AddLongOption("test-double-format", "check if formatting already formatted query produces the same result").NoArgument();
  189. opts.AddLongOption("test-antlr4", "check antlr4 parser").NoArgument();
  190. opts.AddLongOption("format-output", "Saves formatted query to it").RequiredArgument("format-output").StoreResult(&outFileNameFormat);
  191. opts.SetFreeArgDefaultTitle("query file");
  192. opts.AddHelpOption();
  193. NLastGetopt::TOptsParseResult res(&opts, argc, argv);
  194. TVector<TString> queryFiles(res.GetFreeArgs());
  195. THolder<TFixedBufferFileOutput> outFile;
  196. if (!outFileName.empty()) {
  197. outFile.Reset(new TFixedBufferFileOutput(outFileName));
  198. }
  199. IOutputStream& out = outFile ? *outFile.Get() : Cout;
  200. if (!res.Has("query") && queryFiles.empty()) {
  201. Cerr << "No --query nor query file was specified" << Endl << Endl;
  202. opts.PrintUsage(argv[0], Cerr);
  203. }
  204. NSQLTranslationV1::TLexers lexers;
  205. lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
  206. lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
  207. NSQLTranslationV1::TParsers parsers;
  208. parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory();
  209. parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory();
  210. NSQLTranslation::TTranslators translators(
  211. nullptr,
  212. NSQLTranslationV1::MakeTranslator(lexers, parsers),
  213. NSQLTranslationPG::MakeTranslator()
  214. );
  215. TVector<TString> queries;
  216. int errors = 0;
  217. for (ui32 i = 0; i <= queryFiles.size(); ++i) {
  218. queries.clear();
  219. TString queryFile("query");
  220. if (i < queryFiles.size()) {
  221. queryFile = queryFiles[i];
  222. TAutoPtr<TFileInput> filePtr;
  223. if (queryFile != "-") {
  224. filePtr.Reset(new TFileInput(queryFile));
  225. }
  226. IInputStream& in = filePtr.Get() ? *filePtr : Cin;
  227. if (res.Has("sqllogictest")) {
  228. ui32 lineNum = 1;
  229. TString line;
  230. bool take = false;
  231. while (in.ReadLine(line)) {
  232. if (line.StartsWith("statement") || line.StartsWith("query")) {
  233. take = true;
  234. queries.emplace_back();
  235. queryFile = queryFiles[i] + " line " + ToString(lineNum + 1);
  236. } else if (line.StartsWith("----") || line.empty()) {
  237. take = false;
  238. } else if (take) {
  239. queries.back().append(line).append("\n");
  240. }
  241. ++lineNum;
  242. }
  243. } else {
  244. queries.push_back(in.ReadAll());
  245. }
  246. } else {
  247. queries.push_back(queryString);
  248. }
  249. for (const auto& query: queries) {
  250. if (query.empty()) {
  251. continue;
  252. }
  253. if (res.Has("print-query")) {
  254. out << query << Endl;
  255. }
  256. google::protobuf::Arena arena;
  257. NSQLTranslation::TTranslationSettings settings;
  258. settings.Arena = &arena;
  259. settings.ClusterMapping = clusterMapping;
  260. settings.Flags = flags;
  261. settings.SyntaxVersion = syntaxVersion;
  262. settings.AnsiLexer = res.Has("ansi-lexer");
  263. settings.WarnOnV0 = false;
  264. settings.V0ForceDisable = false;
  265. settings.AssumeYdbOnClusterWithSlash = res.Has("assume-ydb-on-slash");
  266. settings.TestAntlr4 = res.Has("test-antlr4");
  267. settings.EmitReadsForExists = true;
  268. if (res.Has("lexer")) {
  269. NYql::TIssues issues;
  270. auto lexer = NSQLTranslation::SqlLexer(translators, query, issues, settings);
  271. NSQLTranslation::TParsedTokenList tokens;
  272. if (lexer && NSQLTranslation::Tokenize(*lexer, query, queryFile, tokens, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
  273. for (auto& token : tokens) {
  274. out << token.Line << ":" << token.LinePos << "\t\t" << token.Name << "(" << EscapeC(token.Content) << ")\n";
  275. }
  276. }
  277. if (!issues.Empty()) {
  278. issues.PrintTo(Cerr);
  279. }
  280. bool hasError = AnyOf(issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;});
  281. if (hasError) {
  282. ++errors;
  283. }
  284. continue;
  285. }
  286. NYql::TAstParseResult parseRes;
  287. if (res.Has("pg")) {
  288. parseRes = NSQLTranslationPG::PGToYql(query, settings);
  289. } else {
  290. if (res.Has("tree") || res.Has("diff") || res.Has("dump")) {
  291. google::protobuf::Message* ast(NSQLTranslation::SqlAST(translators, query, queryFile, parseRes.Issues,
  292. NSQLTranslation::SQL_MAX_PARSER_ERRORS, settings));
  293. if (ast) {
  294. if (res.Has("tree")) {
  295. out << ast->DebugString() << Endl;
  296. }
  297. if (res.Has("diff") || res.Has("dump")) {
  298. TStringStream result;
  299. TPosOutput posOut(result);
  300. ExtractQuery(posOut, *ast);
  301. if (res.Has("dump") || query != result.Str()) {
  302. out << NUnitTest::ColoredDiff(query, result.Str()) << Endl;
  303. }
  304. }
  305. NSQLTranslation::TSQLHints hints;
  306. auto lexer = SqlLexer(translators, query, parseRes.Issues, settings);
  307. if (lexer && CollectSqlHints(*lexer, query, queryFile, settings.File, hints, parseRes.Issues,
  308. settings.MaxErrors, settings.Antlr4Parser)) {
  309. parseRes = NSQLTranslation::SqlASTToYql(translators, query, *ast, hints, settings);
  310. }
  311. }
  312. } else {
  313. parseRes = NSQLTranslation::SqlToYql(translators, query, settings);
  314. }
  315. }
  316. if (parseRes.Root) {
  317. TStringStream yqlProgram;
  318. parseRes.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  319. if (res.Has("yql")) {
  320. out << yqlProgram.Str();
  321. }
  322. if (res.Has("ann")) {
  323. TMemoryPool pool(1024);
  324. NYql::AnnotatePositions(*parseRes.Root, pool)->PrettyPrintTo(out, NYql::TAstPrintFlags::PerLine);
  325. }
  326. }
  327. bool hasError = false;
  328. if (!parseRes.Issues.Empty()) {
  329. parseRes.Issues.PrintWithProgramTo(Cerr, queryFile, query);
  330. hasError = AnyOf(parseRes.Issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;});
  331. }
  332. if (!parseRes.IsOk() && !hasError) {
  333. hasError = true;
  334. Cerr << "No error reported, but no yql compiled result!" << Endl << Endl;
  335. }
  336. if (res.Has("test-format") && syntaxVersion == 1 && !hasError && parseRes.Root) {
  337. hasError = !TestFormat(translators, query, settings, queryFile, parseRes, outFileNameFormat, res.Has("test-double-format"));
  338. }
  339. if (hasError) {
  340. ++errors;
  341. }
  342. }
  343. }
  344. return errors;
  345. }
  346. int main(int argc, char* argv[]) {
  347. try {
  348. return BuildAST(argc, argv);
  349. } catch (const yexception& e) {
  350. Cerr << "Caught exception:" << e.what() << Endl;
  351. return 1;
  352. } catch (...) {
  353. Cerr << "Caught exception" << Endl;
  354. return 1;
  355. }
  356. return 0;
  357. }