sql_ut_antlr4.h 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  2. #include <yql/essentials/sql/sql.h>
  3. #include <util/generic/map.h>
  4. #include <library/cpp/testing/unittest/registar.h>
  5. #include <util/string/split.h>
  6. #include <deque>
  7. #include <unordered_set>
  8. using namespace NSQLTranslation;
  9. enum class EDebugOutput {
  10. None,
  11. ToCerr,
  12. };
  13. const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote |
  14. NYql::TAstPrintFlags::AdaptArbitraryContent;
  15. inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) {
  16. TStringStream s;
  17. res.Issues.PrintTo(s);
  18. if (debug == EDebugOutput::ToCerr) {
  19. Cerr << s.Str() << Endl;
  20. }
  21. return s.Str();
  22. }
  23. inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {},
  24. EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {})
  25. {
  26. google::protobuf::Arena arena;
  27. const auto service = provider ? provider : TString(NYql::YtProviderName);
  28. const TString cluster = "plato";
  29. settings.ClusterMapping[cluster] = service;
  30. settings.ClusterMapping["hahn"] = NYql::YtProviderName;
  31. settings.ClusterMapping["mon"] = NYql::SolomonProviderName;
  32. settings.MaxErrors = maxErrors;
  33. settings.Mode = mode;
  34. settings.Arena = &arena;
  35. settings.AnsiLexer = ansiLexer;
  36. settings.Antlr4Parser = true;
  37. settings.SyntaxVersion = 1;
  38. auto res = SqlToYql(query, settings);
  39. if (debug == EDebugOutput::ToCerr) {
  40. Err2Str(res, debug);
  41. }
  42. return res;
  43. }
  44. inline NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
  45. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug);
  46. }
  47. inline NYql::TAstParseResult SqlToYqlWithSettings(const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
  48. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, 10, {}, EDebugOutput::None, false, settings);
  49. }
  50. inline void ExpectFailWithError(const TString& query, const TString& error) {
  51. NYql::TAstParseResult res = SqlToYql(query);
  52. UNIT_ASSERT(!res.Root);
  53. UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
  54. }
  55. inline NYql::TAstParseResult SqlToYqlWithAnsiLexer(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
  56. bool ansiLexer = true;
  57. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug, ansiLexer);
  58. }
  59. inline void ExpectFailWithErrorForAnsiLexer(const TString& query, const TString& error) {
  60. NYql::TAstParseResult res = SqlToYqlWithAnsiLexer(query);
  61. UNIT_ASSERT(!res.Root);
  62. UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
  63. }
  64. inline TString GetPrettyPrint(const NYql::TAstParseResult& res) {
  65. TStringStream yqlProgram;
  66. res.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  67. return yqlProgram.Str();
  68. }
  69. inline TString Quote(const char* str) {
  70. return TStringBuilder() << "'\"" << str << "\"";
  71. }
  72. class TWordCountHive: public TMap<TString, unsigned> {
  73. public:
  74. TWordCountHive(std::initializer_list<TString> strings) {
  75. for (auto& str: strings) {
  76. emplace(str, 0);
  77. }
  78. }
  79. TWordCountHive(std::initializer_list<std::pair<const TString, unsigned>> list)
  80. : TMap(list)
  81. {
  82. }
  83. };
  84. typedef std::function<void (const TString& word, const TString& line)> TVerifyLineFunc;
  85. inline TString VerifyProgram(const NYql::TAstParseResult& res, TWordCountHive& wordCounter, TVerifyLineFunc verifyLine = TVerifyLineFunc()) {
  86. const auto programm = GetPrettyPrint(res);
  87. TVector<TString> yqlProgram;
  88. Split(programm, "\n", yqlProgram);
  89. for (const auto& line: yqlProgram) {
  90. for (auto& counterIter: wordCounter) {
  91. const auto& word = counterIter.first;
  92. auto pos = line.find(word);
  93. while (pos != TString::npos) {
  94. ++counterIter.second;
  95. if (verifyLine) {
  96. verifyLine(word, line);
  97. }
  98. pos = line.find(word, pos + word.length());
  99. }
  100. }
  101. }
  102. return programm;
  103. }
  104. inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints, TMaybe<bool> ansi) {
  105. TString pragma;
  106. if (ansi.Defined()) {
  107. pragma = *ansi ? "PRAGMA AnsiInForEmptyOrNullableItemsCollections;" :
  108. "PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;";
  109. }
  110. NYql::TAstParseResult res = SqlToYql(pragma + query);
  111. UNIT_ASSERT(res.Root);
  112. TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
  113. Y_UNUSED(word);
  114. if (!ansi.Defined()) {
  115. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('warnNoAnsi)"));
  116. } else if (*ansi) {
  117. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('ansi)"));
  118. }
  119. for (auto& hint : expectedHints) {
  120. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(hint));
  121. }
  122. };
  123. TWordCountHive elementStat = {{TString("SqlIn"), 0}};
  124. VerifyProgram(res, elementStat, verifyLine);
  125. }
  126. inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints) {
  127. VerifySqlInHints(query, expectedHints, false);
  128. VerifySqlInHints(query, expectedHints, true);
  129. }
  130. inline NSQLTranslation::TTranslationSettings GetSettingsWithS3Binding(const TString& name) {
  131. NSQLTranslation::TTranslationSettings settings;
  132. NSQLTranslation::TTableBindingSettings bindSettings;
  133. bindSettings.ClusterType = "s3";
  134. bindSettings.Settings["cluster"] = "cluster";
  135. bindSettings.Settings["path"] = "path";
  136. bindSettings.Settings["format"] = "format";
  137. bindSettings.Settings["compression"] = "ccompression";
  138. bindSettings.Settings["bar"] = "1";
  139. // schema is not validated in this test but should be valid YSON text
  140. bindSettings.Settings["schema"] = R"__("[
  141. "StructType";
  142. [
  143. [
  144. "key";
  145. [
  146. "DataType";
  147. "String"
  148. ]
  149. ];
  150. [
  151. "subkey";
  152. [
  153. "DataType";
  154. "String"
  155. ]
  156. ];
  157. [
  158. "value";
  159. [
  160. "DataType";
  161. "String"
  162. ]
  163. ]
  164. ]])__";
  165. bindSettings.Settings["partitioned_by"] = "[\"key\", \"subkey\"]";
  166. settings.Bindings[name] = bindSettings;
  167. return settings;
  168. }
  169. inline void AstBfs(NYql::TAstNode const* root, std::function<bool (NYql::TAstNode const*)> visitor) {
  170. std::deque<NYql::TAstNode const*> wishList{ root };
  171. std::unordered_set<NYql::TAstNode const*> visited;
  172. while(!wishList.empty()){
  173. auto v = wishList.front();
  174. wishList.pop_front();
  175. if (!visitor(v))
  176. return;
  177. visited.insert(v);
  178. if (v->IsList()) {
  179. for (ui32 i = 0; i != v->GetChildrenCount(); ++i) {
  180. auto child = v->GetChild(i);
  181. if (visited.find(child) == visited.cend()) {
  182. wishList.push_back(child);
  183. }
  184. }
  185. }
  186. }
  187. }
  188. inline const NYql::TAstNode* FindNodeByChildAtomContent(const NYql::TAstNode* root, uint32_t childIndex, TStringBuf name){
  189. const NYql::TAstNode* result = nullptr;
  190. AstBfs(root, [&result, childIndex, name](auto v) {
  191. if (v->IsList() && v->GetChildrenCount() > childIndex &&
  192. v->GetChild(childIndex)->IsAtom() && v->GetChild(childIndex)->GetContent() == name) {
  193. result = v;
  194. return false;
  195. }
  196. return true; });
  197. return result;
  198. }