sql_ut.h 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  2. #include <yql/essentials/sql/sql.h>
  3. #include <yql/essentials/sql/v1/sql.h>
  4. #include <util/generic/map.h>
  5. #include <library/cpp/regex/pcre/pcre.h>
  6. #include <library/cpp/testing/unittest/registar.h>
  7. #include <util/string/split.h>
  8. #include <deque>
  9. #include <unordered_set>
  10. using namespace NSQLTranslation;
  11. enum class EDebugOutput {
  12. None,
  13. ToCerr,
  14. };
  15. const ui32 PRETTY_FLAGS = NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote |
  16. NYql::TAstPrintFlags::AdaptArbitraryContent;
  17. inline TString Err2Str(NYql::TAstParseResult& res, EDebugOutput debug = EDebugOutput::None) {
  18. TStringStream s;
  19. res.Issues.PrintTo(s);
  20. if (debug == EDebugOutput::ToCerr) {
  21. Cerr << s.Str() << Endl;
  22. }
  23. return s.Str();
  24. }
  25. inline NYql::TAstParseResult SqlToYqlWithMode(const TString& query, NSQLTranslation::ESqlMode mode = NSQLTranslation::ESqlMode::QUERY, size_t maxErrors = 10, const TString& provider = {},
  26. EDebugOutput debug = EDebugOutput::None, bool ansiLexer = false, NSQLTranslation::TTranslationSettings settings = {})
  27. {
  28. google::protobuf::Arena arena;
  29. const auto service = provider ? provider : TString(NYql::YtProviderName);
  30. const TString cluster = "plato";
  31. settings.ClusterMapping[cluster] = service;
  32. settings.ClusterMapping["hahn"] = NYql::YtProviderName;
  33. settings.ClusterMapping["mon"] = NYql::SolomonProviderName;
  34. settings.MaxErrors = maxErrors;
  35. settings.Mode = mode;
  36. settings.Arena = &arena;
  37. settings.AnsiLexer = ansiLexer;
  38. settings.Antlr4Parser = false;
  39. settings.SyntaxVersion = 1;
  40. NSQLTranslation::TTranslators translators(
  41. nullptr,
  42. NSQLTranslationV1::MakeTranslator(),
  43. nullptr
  44. );
  45. auto res = SqlToYql(translators, query, settings);
  46. if (debug == EDebugOutput::ToCerr) {
  47. Err2Str(res, debug);
  48. }
  49. return res;
  50. }
  51. inline NYql::TAstParseResult SqlToYql(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
  52. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug);
  53. }
  54. inline NYql::TAstParseResult
  55. SqlToYqlWithSettings(const TString& query, const NSQLTranslation::TTranslationSettings& settings) {
  56. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, 10, {}, EDebugOutput::None, false, settings);
  57. }
  58. inline void ExpectFailWithError(const TString& query, const TString& error) {
  59. NYql::TAstParseResult res = SqlToYql(query);
  60. UNIT_ASSERT(!res.Root);
  61. UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
  62. }
  63. inline void ExpectFailWithFuzzyError(const TString& query, const TString& errorRegex) {
  64. NYql::TAstParseResult res = SqlToYql(query);
  65. UNIT_ASSERT(!res.Root);
  66. UNIT_ASSERT(NPcre::TPcre<char>(errorRegex.c_str()).Matches(Err2Str(res)));
  67. }
  68. inline NYql::TAstParseResult SqlToYqlWithAnsiLexer(const TString& query, size_t maxErrors = 10, const TString& provider = {}, EDebugOutput debug = EDebugOutput::None) {
  69. bool ansiLexer = true;
  70. return SqlToYqlWithMode(query, NSQLTranslation::ESqlMode::QUERY, maxErrors, provider, debug, ansiLexer);
  71. }
  72. inline void ExpectFailWithErrorForAnsiLexer(const TString& query, const TString& error) {
  73. NYql::TAstParseResult res = SqlToYqlWithAnsiLexer(query);
  74. UNIT_ASSERT(!res.Root);
  75. UNIT_ASSERT_NO_DIFF(Err2Str(res), error);
  76. }
  77. inline TString GetPrettyPrint(const NYql::TAstParseResult& res) {
  78. TStringStream yqlProgram;
  79. res.Root->PrettyPrintTo(yqlProgram, NYql::TAstPrintFlags::PerLine | NYql::TAstPrintFlags::ShortQuote);
  80. return yqlProgram.Str();
  81. }
  82. inline TString Quote(const char* str) {
  83. return TStringBuilder() << "'\"" << str << "\"";
  84. }
  85. class TWordCountHive: public TMap<TString, unsigned> {
  86. public:
  87. TWordCountHive(std::initializer_list<TString> strings) {
  88. for (auto& str: strings) {
  89. emplace(str, 0);
  90. }
  91. }
  92. TWordCountHive(std::initializer_list<std::pair<const TString, unsigned>> list)
  93. : TMap(list)
  94. {
  95. }
  96. };
  97. typedef std::function<void (const TString& word, const TString& line)> TVerifyLineFunc;
  98. inline TString VerifyProgram(const NYql::TAstParseResult& res, TWordCountHive& wordCounter, TVerifyLineFunc verifyLine = TVerifyLineFunc()) {
  99. const auto programm = GetPrettyPrint(res);
  100. TVector<TString> yqlProgram;
  101. Split(programm, "\n", yqlProgram);
  102. for (const auto& line: yqlProgram) {
  103. for (auto& counterIter: wordCounter) {
  104. const auto& word = counterIter.first;
  105. auto pos = line.find(word);
  106. while (pos != TString::npos) {
  107. ++counterIter.second;
  108. if (verifyLine) {
  109. verifyLine(word, line);
  110. }
  111. pos = line.find(word, pos + word.length());
  112. }
  113. }
  114. }
  115. return programm;
  116. }
  117. inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints, TMaybe<bool> ansi) {
  118. TString pragma;
  119. if (ansi.Defined()) {
  120. pragma = *ansi ? "PRAGMA AnsiInForEmptyOrNullableItemsCollections;" :
  121. "PRAGMA DisableAnsiInForEmptyOrNullableItemsCollections;";
  122. }
  123. NYql::TAstParseResult res = SqlToYql(pragma + query);
  124. UNIT_ASSERT(res.Root);
  125. TVerifyLineFunc verifyLine = [&](const TString& word, const TString& line) {
  126. Y_UNUSED(word);
  127. if (!ansi.Defined()) {
  128. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('warnNoAnsi)"));
  129. } else if (*ansi) {
  130. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find("'('ansi)"));
  131. }
  132. for (auto& hint : expectedHints) {
  133. UNIT_ASSERT_VALUES_UNEQUAL(TString::npos, line.find(hint));
  134. }
  135. };
  136. TWordCountHive elementStat = {{TString("SqlIn"), 0}};
  137. VerifyProgram(res, elementStat, verifyLine);
  138. }
  139. inline void VerifySqlInHints(const TString& query, const THashSet<TString>& expectedHints) {
  140. VerifySqlInHints(query, expectedHints, false);
  141. VerifySqlInHints(query, expectedHints, true);
  142. }
  143. inline NSQLTranslation::TTranslationSettings GetSettingsWithS3Binding(const TString& name) {
  144. NSQLTranslation::TTranslationSettings settings;
  145. NSQLTranslation::TTableBindingSettings bindSettings;
  146. bindSettings.ClusterType = "s3";
  147. bindSettings.Settings["cluster"] = "cluster";
  148. bindSettings.Settings["path"] = "path";
  149. bindSettings.Settings["format"] = "format";
  150. bindSettings.Settings["compression"] = "ccompression";
  151. bindSettings.Settings["bar"] = "1";
  152. // schema is not validated in this test but should be valid YSON text
  153. bindSettings.Settings["schema"] = R"__("[
  154. "StructType";
  155. [
  156. [
  157. "key";
  158. [
  159. "DataType";
  160. "String"
  161. ]
  162. ];
  163. [
  164. "subkey";
  165. [
  166. "DataType";
  167. "String"
  168. ]
  169. ];
  170. [
  171. "value";
  172. [
  173. "DataType";
  174. "String"
  175. ]
  176. ]
  177. ]])__";
  178. bindSettings.Settings["partitioned_by"] = "[\"key\", \"subkey\"]";
  179. settings.Bindings[name] = bindSettings;
  180. return settings;
  181. }
  182. inline void AstBfs(NYql::TAstNode const* root, std::function<bool (NYql::TAstNode const*)> visitor) {
  183. std::deque<NYql::TAstNode const*> wishList{ root };
  184. std::unordered_set<NYql::TAstNode const*> visited;
  185. while(!wishList.empty()){
  186. auto v = wishList.front();
  187. wishList.pop_front();
  188. if (!visitor(v))
  189. return;
  190. visited.insert(v);
  191. if (v->IsList()) {
  192. for (ui32 i = 0; i != v->GetChildrenCount(); ++i) {
  193. auto child = v->GetChild(i);
  194. if (visited.find(child) == visited.cend()) {
  195. wishList.push_back(child);
  196. }
  197. }
  198. }
  199. }
  200. }
  201. inline const NYql::TAstNode* FindNodeByChildAtomContent(const NYql::TAstNode* root, uint32_t childIndex, TStringBuf name){
  202. const NYql::TAstNode* result = nullptr;
  203. AstBfs(root, [&result, childIndex, name](auto v) {
  204. if (v->IsList() && v->GetChildrenCount() > childIndex &&
  205. v->GetChild(childIndex)->IsAtom() && v->GetChild(childIndex)->GetContent() == name) {
  206. result = v;
  207. return false;
  208. }
  209. return true; });
  210. return result;
  211. }