sql_ut.h 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  2. #include <yql/essentials/sql/sql.h>
  3. #include <yql/essentials/sql/v1/sql.h>
  4. #include <yql/essentials/sql/v1/lexer/antlr3/lexer.h>
  5. #include <yql/essentials/sql/v1/lexer/antlr3_ansi/lexer.h>
  6. #include <yql/essentials/sql/v1/proto_parser/antlr3/proto_parser.h>
  7. #include <yql/essentials/sql/v1/proto_parser/antlr3_ansi/proto_parser.h>
  8. #include <util/generic/map.h>
  9. #include <library/cpp/regex/pcre/pcre.h>
  10. #include <library/cpp/testing/unittest/registar.h>
  11. #include <util/string/split.h>
  12. #include <deque>
  13. #include <unordered_set>
  14. using namespace NSQLTranslation;
  15. enum class EDebugOutput {
  16. None,
  17. ToCerr,
  18. };
  19. const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote |
  20. NYql::TAstPrintFlags::AdaptArbitraryContent;
  21. inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) {
  22. TStringStream s;
  23. res.Issues.PrintTo(s);
  24. if (debug == EDebugOutput::ToCerr) {
  25. Cerr << s.Str() << Endl;
  26. }
  27. return s.Str();
  28. }
  29. inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {},
  30. EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {})
  31. {
  32. google::protobuf::Arena arena;
  33. const auto service = provider ? provider : TString(NYql::YtProviderName);
  34. const TString cluster = "plato";
  35. settings.ClusterMapping[cluster] = service;
  36. settings.ClusterMapping["hahn"] = NYql::YtProviderName;
  37. settings.ClusterMapping["mon"] = NYql::SolomonProviderName;
  38. settings.MaxErrors = maxErrors;
  39. settings.Mode = mode;
  40. settings.Arena = &arena;
  41. settings.AnsiLexer = ansiLexer;
  42. settings.Antlr4Parser = false;
  43. settings.SyntaxVersion = 1;
  44. NSQLTranslationV1::TLexers lexers;
  45. lexers.Antlr3 = NSQLTranslationV1::MakeAntlr3LexerFactory();
  46. lexers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiLexerFactory();
  47. NSQLTranslationV1::TParsers parsers;
  48. parsers.Antlr3 = NSQLTranslationV1::MakeAntlr3ParserFactory();
  49. parsers.Antlr3Ansi = NSQLTranslationV1::MakeAntlr3AnsiParserFactory();
  50. NSQLTranslation::TTranslators translators(
  51. nullptr,
  52. NSQLTranslationV1::MakeTranslator(lexers, parsers),
  53. nullptr
  54. );
  55. auto res = SqlToYql(translators, query, settings);
  56. if (debug == EDebugOutput::ToCerr) {
  57. Err2Str(res, debug);
  58. }
  59. return res;
  60. }
  61. inline NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
  62. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug);
  63. }
  64. inline NYql::TAstParseResult
  65. SqlToYqlWithSettings(const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
  66. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, 10, {}, EDebugOutput::None, false, settings);
  67. }
  68. inline void ExpectFailWithError(const TString& query, const TString& error) {
  69. NYql::TAstParseResult res = SqlToYql(query);
  70. UNIT_ASSERT(!res.Root);
  71. UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
  72. }
  73. inline void ExpectFailWithFuzzyError(const TString& query, const TString& errorRegex) {
  74. NYql::TAstParseResult res = SqlToYql(query);
  75. UNIT_ASSERT(!res.Root);
  76. UNIT_ASSERT(NPcre::TPcre<char>(errorRegex.c_str()).Matches(Err2Str(res)));
  77. }
  78. inline NYql::TAstParseResult SqlToYqlWithAnsiLexer(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
  79. bool ansiLexer = true;
  80. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug, ansiLexer);
  81. }
  82. inline void ExpectFailWithErrorForAnsiLexer(const TString& query, const TString& error) {
  83. NYql::TAstParseResult res = SqlToYqlWithAnsiLexer(query);
  84. UNIT_ASSERT(!res.Root);
  85. UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
  86. }
  87. inline TString GetPrettyPrint(const NYql::TAstParseResult& res) {
  88. TStringStream yqlProgram;
  89. res.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  90. return yqlProgram.Str();
  91. }
  92. inline TString Quote(const char* str) {
  93. return TStringBuilder() << "'\"" << str << "\"";
  94. }
  95. class TWordCountHive: public TMap<TString, unsigned> {
  96. public:
  97. TWordCountHive(std::initializer_list<TString> strings) {
  98. for (auto& str: strings) {
  99. emplace(str, 0);
  100. }
  101. }
  102. TWordCountHive(std::initializer_list<std::pair<const TString, unsigned>> list)
  103. : TMap(list)
  104. {
  105. }
  106. };
  107. typedef std::function<void (const TString& word, const TString& line)> TVerifyLineFunc;
  108. inline TString VerifyProgram(const NYql::TAstParseResult& res, TWordCountHive& wordCounter, TVerifyLineFunc verifyLine = TVerifyLineFunc()) {
  109. const auto programm = GetPrettyPrint(res);
  110. TVector<TString> yqlProgram;
  111. Split(programm, "\n", yqlProgram);
  112. for (const auto& line: yqlProgram) {
  113. for (auto& counterIter: wordCounter) {
  114. const auto& word = counterIter.first;
  115. auto pos = line.find(word);
  116. while (pos != TString::npos) {
  117. ++counterIter.second;
  118. if (verifyLine) {
  119. verifyLine(word, line);
  120. }
  121. pos = line.find(word, pos + word.length());
  122. }
  123. }
  124. }
  125. return programm;
  126. }
  127. inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints, TMaybe<bool> ansi) {
  128. TString pragma;
  129. if (ansi.Defined()) {
  130. pragma = *ansi ? "PRAGMA AnsiInForEmptyOrNullableItemsCollections;" :
  131. "PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;";
  132. }
  133. NYql::TAstParseResult res = SqlToYql(pragma + query);
  134. UNIT_ASSERT(res.Root);
  135. TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
  136. Y_UNUSED(word);
  137. if (!ansi.Defined()) {
  138. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('warnNoAnsi)"));
  139. } else if (*ansi) {
  140. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('ansi)"));
  141. }
  142. for (auto& hint : expectedHints) {
  143. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(hint));
  144. }
  145. };
  146. TWordCountHive elementStat = {{TString("SqlIn"), 0}};
  147. VerifyProgram(res, elementStat, verifyLine);
  148. }
  149. inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints) {
  150. VerifySqlInHints(query, expectedHints, false);
  151. VerifySqlInHints(query, expectedHints, true);
  152. }
  153. inline NSQLTranslation::TTranslationSettings GetSettingsWithS3Binding(const TString& name) {
  154. NSQLTranslation::TTranslationSettings settings;
  155. NSQLTranslation::TTableBindingSettings bindSettings;
  156. bindSettings.ClusterType = "s3";
  157. bindSettings.Settings["cluster"] = "cluster";
  158. bindSettings.Settings["path"] = "path";
  159. bindSettings.Settings["format"] = "format";
  160. bindSettings.Settings["compression"] = "ccompression";
  161. bindSettings.Settings["bar"] = "1";
  162. // schema is not validated in this test but should be valid YSON text
  163. bindSettings.Settings["schema"] = R"__("[
  164. "StructType";
  165. [
  166. [
  167. "key";
  168. [
  169. "DataType";
  170. "String"
  171. ]
  172. ];
  173. [
  174. "subkey";
  175. [
  176. "DataType";
  177. "String"
  178. ]
  179. ];
  180. [
  181. "value";
  182. [
  183. "DataType";
  184. "String"
  185. ]
  186. ]
  187. ]])__";
  188. bindSettings.Settings["partitioned_by"] = "[\"key\", \"subkey\"]";
  189. settings.Bindings[name] = bindSettings;
  190. return settings;
  191. }
  192. inline void AstBfs(NYql::TAstNode const* root, std::function<bool (NYql::TAstNode const*)> visitor) {
  193. std::deque<NYql::TAstNode const*> wishList{ root };
  194. std::unordered_set<NYql::TAstNode const*> visited;
  195. while(!wishList.empty()){
  196. auto v = wishList.front();
  197. wishList.pop_front();
  198. if (!visitor(v))
  199. return;
  200. visited.insert(v);
  201. if (v->IsList()) {
  202. for (ui32 i = 0; i != v->GetChildrenCount(); ++i) {
  203. auto child = v->GetChild(i);
  204. if (visited.find(child) == visited.cend()) {
  205. wishList.push_back(child);
  206. }
  207. }
  208. }
  209. }
  210. }
  211. inline const NYql::TAstNode* FindNodeByChildAtomContent(const NYql::TAstNode* root, uint32_t childIndex, TStringBuf name){
  212. const NYql::TAstNode* result = nullptr;
  213. AstBfs(root, [&result, childIndex, name](auto v) {
  214. if (v->IsList() && v->GetChildrenCount() > childIndex &&
  215. v->GetChild(childIndex)->IsAtom() && v->GetChild(childIndex)->GetContent() == name) {
  216. result = v;
  217. return false;
  218. }
  219. return true; });
  220. return result;
  221. }