sql2yql.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. #include <yql/essentials/ast/yql_ast.h>
  2. #include <yql/essentials/ast/yql_ast_annotation.h>
  3. #include <yql/essentials/ast/yql_expr.h>
  4. #include <yql/essentials/parser/lexer_common/hints.h>
  5. #include <yql/essentials/sql/sql.h>
  6. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  7. #include <yql/essentials/parser/pg_wrapper/interface/parser.h>
  8. #include <library/cpp/getopt/last_getopt.h>
  9. #include <yql/essentials/sql/v1/format/sql_format.h>
  10. #include <library/cpp/testing/unittest/registar.h>
  11. #include <util/stream/file.h>
  12. #include <util/generic/hash.h>
  13. #include <util/generic/hash_set.h>
  14. #include <util/generic/string.h>
  15. #include <util/string/escape.h>
  16. #include <google/protobuf/message.h>
  17. #include <google/protobuf/descriptor.h>
  18. #include <google/protobuf/repeated_field.h>
  19. struct TPosOutput {
  20. IOutputStream& Out;
  21. ui32 Line;
  22. ui32 Column;
  23. TPosOutput(IOutputStream& out)
  24. : Out(out)
  25. , Line(1)
  26. , Column(0)
  27. {
  28. }
  29. void Output(ui32 line, ui32 column, const TString& value) {
  30. while (Line < line) {
  31. Out << Endl;
  32. ++Line;
  33. Column = 0;
  34. }
  35. while (Column < column) {
  36. Out << " ";
  37. ++Column;
  38. }
  39. if (value != "<EOF>") {
  40. Out << value;
  41. Column += value.size();
  42. }
  43. }
  44. };
  45. static void ExtractQuery(TPosOutput& out, const google::protobuf::Message& node);
  46. static void VisitField(TPosOutput& out, const google::protobuf::FieldDescriptor& descr, const google::protobuf::Message& field) {
  47. using namespace google::protobuf;
  48. const Descriptor* d = descr.message_type();
  49. if (!d) {
  50. ythrow yexception() << "Invalid AST: non-message node encountered";
  51. }
  52. if (d->name() == "TToken") {
  53. const Reflection* r = field.GetReflection();
  54. out.Output(r->GetUInt32(field, d->field(0)), r->GetUInt32(field, d->field(1)), r->GetString(field, d->field(2)));
  55. } else {
  56. ExtractQuery(out, field);
  57. }
  58. }
  59. static void ExtractQuery(TPosOutput& out, const google::protobuf::Message& node) {
  60. using namespace google::protobuf;
  61. TVector<const FieldDescriptor*> fields;
  62. const Reflection* ref = node.GetReflection();
  63. ref->ListFields(node, &fields);
  64. for (auto it = fields.begin(); it != fields.end(); ++it) {
  65. if ((*it)->is_repeated()) {
  66. const ui32 fieldSize = ref->FieldSize(node, *it);
  67. for (ui32 i = 0; i < fieldSize; ++i) {
  68. VisitField(out, **it, ref->GetRepeatedMessage(node, *it, i));
  69. }
  70. } else {
  71. VisitField(out, **it, ref->GetMessage(node, *it));
  72. }
  73. }
  74. }
  75. bool TestFormat(
  76. const TString& query,
  77. const NSQLTranslation::TTranslationSettings& settings,
  78. const TString& queryFile,
  79. const NYql::TAstParseResult& parseRes,
  80. const TString& outFileName,
  81. const bool checkDoubleFormatting
  82. ) {
  83. TStringStream yqlProgram;
  84. parseRes.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  85. TString frmQuery;
  86. NYql::TIssues issues;
  87. auto formatter = NSQLFormat::MakeSqlFormatter(settings);
  88. if (!formatter->Format(query, frmQuery, issues)) {
  89. Cerr << "Failed to format query: " << issues.ToString() << Endl;
  90. return false;
  91. }
  92. NYql::TAstParseResult frmParseRes = NSQLTranslation::SqlToYql(frmQuery, settings);
  93. if (!frmParseRes.Issues.Empty()) {
  94. frmParseRes.Issues.PrintWithProgramTo(Cerr, queryFile, frmQuery);
  95. if (AnyOf(frmParseRes.Issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;})) {
  96. return false;
  97. }
  98. }
  99. if (!frmParseRes.IsOk()) {
  100. Cerr << "No error reported, but no yql compiled result!" << Endl << Endl;
  101. return false;
  102. }
  103. TStringStream frmYqlProgram;
  104. frmParseRes.Root->PrettyPrintTo(frmYqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  105. if (yqlProgram.Str() != frmYqlProgram.Str()) {
  106. Cerr << "source query's AST and formatted query's AST are not same\n";
  107. return false;
  108. }
  109. TString frmQuery2;
  110. if (!formatter->Format(frmQuery, frmQuery2, issues)) {
  111. Cerr << "Failed to format already formatted query: " << issues.ToString() << Endl;
  112. return false;
  113. }
  114. if (checkDoubleFormatting && frmQuery != frmQuery2) {
  115. Cerr << "Formatting an already formatted query yielded a different resut" << Endl
  116. << "Add /* skip double format */ to suppress" << Endl;
  117. return false;
  118. }
  119. if (!outFileName.empty()) {
  120. TFixedBufferFileOutput out{outFileName};
  121. out << frmQuery;
  122. }
  123. return true;
  124. }
  125. class TStoreMappingFunctor: public NLastGetopt::IOptHandler {
  126. public:
  127. TStoreMappingFunctor(THashMap<TString, TString>* target, char delim = '@')
  128. : Target(target)
  129. , Delim(delim)
  130. {
  131. }
  132. void HandleOpt(const NLastGetopt::TOptsParser* parser) final {
  133. const TStringBuf val(parser->CurValOrDef());
  134. const auto service = TString(val.After(Delim));
  135. auto res = Target->emplace(TString(val.Before(Delim)), service);
  136. if (!res.second) {
  137. /// force replace already exist parametr
  138. res.first->second = service;
  139. }
  140. }
  141. private:
  142. THashMap<TString, TString>* Target;
  143. char Delim;
  144. };
  145. int BuildAST(int argc, char* argv[]) {
  146. NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
  147. TString outFileName;
  148. TString queryString;
  149. ui16 syntaxVersion;
  150. TString outFileNameFormat;
  151. THashMap<TString, TString> clusterMapping;
  152. clusterMapping["plato"] = NYql::YtProviderName;
  153. clusterMapping["pg_catalog"] = NYql::PgProviderName;
  154. clusterMapping["information_schema"] = NYql::PgProviderName;
  155. THashMap<TString, TString> tables;
  156. THashSet<TString> flags;
  157. opts.AddLongOption('o', "output", "save output to file").RequiredArgument("file").StoreResult(&outFileName);
  158. opts.AddLongOption('q', "query", "query string").RequiredArgument("query").StoreResult(&queryString);
  159. opts.AddLongOption('t', "tree", "print AST proto text").NoArgument();
  160. opts.AddLongOption('d', "diff", "print inlined diff for original query and query build from AST if they differ").NoArgument();
  161. opts.AddLongOption('D', "dump", "dump inlined diff for original query and query build from AST").NoArgument();
  162. opts.AddLongOption('p', "print-query", "print given query before parsing").NoArgument();
  163. opts.AddLongOption('y', "yql", "translate result to Yql and print it").NoArgument();
  164. opts.AddLongOption('l', "lexer", "print query token stream").NoArgument();
  165. opts.AddLongOption("ansi-lexer", "use ansi lexer").NoArgument();
  166. opts.AddLongOption("pg", "use pg_query parser").NoArgument();
  167. opts.AddLongOption('a', "ann", "print Yql annotations").NoArgument();
  168. opts.AddLongOption('C', "cluster", "set cluster to service mapping").RequiredArgument("name@service").Handler(new TStoreMappingFunctor(&clusterMapping));
  169. opts.AddLongOption('T', "table", "set table to filename mapping").RequiredArgument("table@path").Handler(new TStoreMappingFunctor(&tables));
  170. opts.AddLongOption('R', "replace", "replace Output table with each statement result").NoArgument();
  171. opts.AddLongOption("sqllogictest", "input files are in sqllogictest format").NoArgument();
  172. opts.AddLongOption("syntax-version", "SQL syntax version").StoreResult(&syntaxVersion).DefaultValue(1);
  173. opts.AddLongOption('F', "flags", "SQL pragma flags").SplitHandler(&flags, ',');
  174. opts.AddLongOption("assume-ydb-on-slash", "Assume YDB provider if cluster name starts with '/'").NoArgument();
  175. opts.AddLongOption("test-format", "compare formatted query's AST with the original query's AST (only syntaxVersion=1 is supported).").NoArgument();
  176. opts.AddLongOption("test-double-format", "check if formatting already formatted query produces the same result").NoArgument();
  177. opts.AddLongOption("test-antlr4", "check antlr4 parser").NoArgument();
  178. opts.AddLongOption("format-output", "Saves formatted query to it").RequiredArgument("format-output").StoreResult(&outFileNameFormat);
  179. opts.SetFreeArgDefaultTitle("query file");
  180. opts.AddHelpOption();
  181. NLastGetopt::TOptsParseResult res(&opts, argc, argv);
  182. TVector<TString> queryFiles(res.GetFreeArgs());
  183. THolder<TFixedBufferFileOutput> outFile;
  184. if (!outFileName.empty()) {
  185. outFile.Reset(new TFixedBufferFileOutput(outFileName));
  186. }
  187. IOutputStream& out = outFile ? *outFile.Get() : Cout;
  188. if (!res.Has("query") && queryFiles.empty()) {
  189. Cerr << "No --query nor query file was specified" << Endl << Endl;
  190. opts.PrintUsage(argv[0], Cerr);
  191. }
  192. TVector<TString> queries;
  193. int errors = 0;
  194. for (ui32 i = 0; i <= queryFiles.size(); ++i) {
  195. queries.clear();
  196. TString queryFile("query");
  197. if (i < queryFiles.size()) {
  198. queryFile = queryFiles[i];
  199. TAutoPtr<TFileInput> filePtr;
  200. if (queryFile != "-") {
  201. filePtr.Reset(new TFileInput(queryFile));
  202. }
  203. IInputStream& in = filePtr.Get() ? *filePtr : Cin;
  204. if (res.Has("sqllogictest")) {
  205. ui32 lineNum = 1;
  206. TString line;
  207. bool take = false;
  208. while (in.ReadLine(line)) {
  209. if (line.StartsWith("statement") || line.StartsWith("query")) {
  210. take = true;
  211. queries.emplace_back();
  212. queryFile = queryFiles[i] + " line " + ToString(lineNum + 1);
  213. } else if (line.StartsWith("----") || line.empty()) {
  214. take = false;
  215. } else if (take) {
  216. queries.back().append(line).append("\n");
  217. }
  218. ++lineNum;
  219. }
  220. } else {
  221. queries.push_back(in.ReadAll());
  222. }
  223. } else {
  224. queries.push_back(queryString);
  225. }
  226. for (const auto& query: queries) {
  227. if (query.empty()) {
  228. continue;
  229. }
  230. if (res.Has("print-query")) {
  231. out << query << Endl;
  232. }
  233. google::protobuf::Arena arena;
  234. NSQLTranslation::TTranslationSettings settings;
  235. settings.Arena = &arena;
  236. settings.ClusterMapping = clusterMapping;
  237. settings.Flags = flags;
  238. settings.SyntaxVersion = syntaxVersion;
  239. settings.AnsiLexer = res.Has("ansi-lexer");
  240. settings.WarnOnV0 = false;
  241. settings.V0ForceDisable = false;
  242. settings.AssumeYdbOnClusterWithSlash = res.Has("assume-ydb-on-slash");
  243. settings.TestAntlr4 = res.Has("test-antlr4");
  244. settings.EmitReadsForExists = true;
  245. if (res.Has("lexer")) {
  246. NYql::TIssues issues;
  247. auto lexer = NSQLTranslation::SqlLexer(query, issues, settings);
  248. NSQLTranslation::TParsedTokenList tokens;
  249. if (lexer && NSQLTranslation::Tokenize(*lexer, query, queryFile, tokens, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
  250. for (auto& token : tokens) {
  251. out << token.Line << ":" << token.LinePos << "\t\t" << token.Name << "(" << EscapeC(token.Content) << ")\n";
  252. }
  253. }
  254. if (!issues.Empty()) {
  255. issues.PrintTo(Cerr);
  256. }
  257. bool hasError = AnyOf(issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;});
  258. if (hasError) {
  259. ++errors;
  260. }
  261. continue;
  262. }
  263. NYql::TAstParseResult parseRes;
  264. if (res.Has("pg")) {
  265. parseRes = NSQLTranslationPG::PGToYql(query, settings);
  266. } else {
  267. if (res.Has("tree") || res.Has("diff") || res.Has("dump")) {
  268. google::protobuf::Message* ast(NSQLTranslation::SqlAST(query, queryFile, parseRes.Issues,
  269. NSQLTranslation::SQL_MAX_PARSER_ERRORS, settings));
  270. if (ast) {
  271. if (res.Has("tree")) {
  272. out << ast->DebugString() << Endl;
  273. }
  274. if (res.Has("diff") || res.Has("dump")) {
  275. TStringStream result;
  276. TPosOutput posOut(result);
  277. ExtractQuery(posOut, *ast);
  278. if (res.Has("dump") || query != result.Str()) {
  279. out << NUnitTest::ColoredDiff(query, result.Str()) << Endl;
  280. }
  281. }
  282. NSQLTranslation::TSQLHints hints;
  283. auto lexer = SqlLexer(query, parseRes.Issues, settings);
  284. if (lexer && CollectSqlHints(*lexer, query, queryFile, settings.File, hints, parseRes.Issues,
  285. settings.MaxErrors, settings.Antlr4Parser)) {
  286. parseRes = NSQLTranslation::SqlASTToYql(query, *ast, hints, settings);
  287. }
  288. }
  289. } else {
  290. parseRes = NSQLTranslation::SqlToYql(query, settings);
  291. }
  292. }
  293. if (parseRes.Root) {
  294. TStringStream yqlProgram;
  295. parseRes.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  296. if (res.Has("yql")) {
  297. out << yqlProgram.Str();
  298. }
  299. if (res.Has("ann")) {
  300. TMemoryPool pool(1024);
  301. NYql::AnnotatePositions(*parseRes.Root, pool)->PrettyPrintTo(out, NYql::TAstPrintFlags::PerLine);
  302. }
  303. }
  304. bool hasError = false;
  305. if (!parseRes.Issues.Empty()) {
  306. parseRes.Issues.PrintWithProgramTo(Cerr, queryFile, query);
  307. hasError = AnyOf(parseRes.Issues, [](const auto& issue) { return issue.GetSeverity() == NYql::TSeverityIds::S_ERROR;});
  308. }
  309. if (!parseRes.IsOk() && !hasError) {
  310. hasError = true;
  311. Cerr << "No error reported, but no yql compiled result!" << Endl << Endl;
  312. }
  313. if (res.Has("test-format") && syntaxVersion == 1 && !hasError && parseRes.Root) {
  314. hasError = !TestFormat(query, settings, queryFile, parseRes, outFileNameFormat, res.Has("test-double-format"));
  315. }
  316. if (hasError) {
  317. ++errors;
  318. }
  319. }
  320. }
  321. return errors;
  322. }
  323. int main(int argc, char* argv[]) {
  324. try {
  325. return BuildAST(argc, argv);
  326. } catch (const yexception& e) {
  327. Cerr << "Caught exception:" << e.what() << Endl;
  328. return 1;
  329. } catch (...) {
  330. Cerr << "Caught exception" << Endl;
  331. return 1;
  332. }
  333. return 0;
  334. }