yql_ast_ut.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. #include "yql_ast.h"
  2. #include "yql_ast_annotation.h"
  3. #include <library/cpp/testing/unittest/registar.h>
  4. #include <util/string/util.h>
  5. #include <util/system/sanitizers.h>
  6. namespace NYql {
  7. Y_UNIT_TEST_SUITE(TParseYqlAst) {
  8. constexpr TStringBuf TEST_PROGRAM =
  9. "(\n"
  10. "#comment\n"
  11. "(let mr_source (DataSource 'yamr 'cedar))\n"
  12. "(let x (Read! world mr_source (Key '('table (KeyString 'Input))) '('key 'value) '()))\n"
  13. "(let world (Left! x))\n"
  14. "(let table1 (Right! x))\n"
  15. "(let tresh (Int32 '100))\n"
  16. "(let table1low (Filter table1 (lambda '(item) (< (member item 'key) tresh))))\n"
  17. "(let mr_sink (DataSink 'yamr (quote cedar)))\n"
  18. "(let world (Write! world mr_sink (Key '('table (KeyString 'Output))) table1low '('('mode 'append))))\n"
  19. "(let world (Commit! world mr_sink))\n"
  20. "(return world)\n"
  21. ")";
  22. Y_UNIT_TEST(ParseAstTest) {
  23. TAstParseResult res = ParseAst(TEST_PROGRAM);
  24. UNIT_ASSERT(res.IsOk());
  25. UNIT_ASSERT(res.Root->IsList());
  26. UNIT_ASSERT(res.Issues.Empty());
  27. }
  28. Y_UNIT_TEST(ParseAstTestPerf) {
  29. #ifdef WITH_VALGRIND
  30. const ui32 n = 1000;
  31. #else
  32. const ui32 n = NSan::PlainOrUnderSanitizer(100000, 1000);
  33. #endif
  34. auto t1 = TInstant::Now();
  35. for (ui32 i = 0; i < n; ++i) {
  36. TAstParseResult res = ParseAst(TEST_PROGRAM);
  37. UNIT_ASSERT(res.IsOk());
  38. UNIT_ASSERT(res.Root->IsList());
  39. UNIT_ASSERT(res.Issues.Empty());
  40. }
  41. auto t2 = TInstant::Now();
  42. Cout << t2 - t1 << Endl;
  43. }
  44. Y_UNIT_TEST(PrintAstTest) {
  45. TAstParseResult ast = ParseAst(TEST_PROGRAM);
  46. UNIT_ASSERT(ast.IsOk());
  47. TString printedProgram = ast.Root->ToString();
  48. UNIT_ASSERT(printedProgram.find('\n') == TString::npos);
  49. TAstParseResult parsedAst = ParseAst(printedProgram);
  50. UNIT_ASSERT(parsedAst.IsOk());
  51. }
  52. Y_UNIT_TEST(PrettyPrintAst) {
  53. const ui32 testFlags[] = {
  54. TAstPrintFlags::Default,
  55. TAstPrintFlags::PerLine,
  56. //TAstPrintFlags::ShortQuote, //-- generates invalid AST
  57. TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote
  58. };
  59. TAstParseResult ast = ParseAst(TEST_PROGRAM);
  60. UNIT_ASSERT(ast.IsOk());
  61. for (ui32 i = 0; i < Y_ARRAY_SIZE(testFlags); ++i) {
  62. ui32 prettyFlags = testFlags[i];
  63. TString printedProgram1 = ast.Root->ToString(prettyFlags);
  64. TAstParseResult parsedAst = ParseAst(printedProgram1);
  65. UNIT_ASSERT(parsedAst.IsOk());
  66. TString printedProgram2 = parsedAst.Root->ToString(prettyFlags);
  67. UNIT_ASSERT_STRINGS_EQUAL(printedProgram1, printedProgram2);
  68. }
  69. }
  70. Y_UNIT_TEST(AnnotatedAstPrint) {
  71. TMemoryPool pool(4096);
  72. TAstParseResult ast = ParseAst(TEST_PROGRAM, &pool);
  73. UNIT_ASSERT(ast.IsOk());
  74. TAstParseResult astWithPositions;
  75. astWithPositions.Root = AnnotatePositions(*ast.Root, pool);
  76. UNIT_ASSERT(!!astWithPositions.Root);
  77. TString sAnn = astWithPositions.Root->ToString();
  78. UNIT_ASSERT(false == sAnn.empty());
  79. TAstParseResult annRes = ParseAst(sAnn);
  80. UNIT_ASSERT(annRes.IsOk());
  81. TAstParseResult removedAnn;
  82. removedAnn.Root = RemoveAnnotations(*annRes.Root, pool);
  83. UNIT_ASSERT(!!removedAnn.Root);
  84. TString strOriginal = ast.Root->ToString();
  85. TString strAnnRemoved = removedAnn.Root->ToString();
  86. UNIT_ASSERT_VALUES_EQUAL(strOriginal, strAnnRemoved);
  87. astWithPositions.Root->GetChild(0)->SetContent("100:100", pool);
  88. TAstParseResult appliedPositionsAnn;
  89. appliedPositionsAnn.Root = ApplyPositionAnnotations(*astWithPositions.Root, 0, pool);
  90. UNIT_ASSERT(appliedPositionsAnn.Root);
  91. TAstParseResult removedAnn2;
  92. removedAnn2.Root = RemoveAnnotations(*appliedPositionsAnn.Root, pool);
  93. UNIT_ASSERT(removedAnn2.Root);
  94. UNIT_ASSERT_VALUES_EQUAL(removedAnn2.Root->GetPosition().Row, 100);
  95. }
  96. template <typename TCharType>
  97. void TestGoodArbitraryAtom(
  98. const TString& program,
  99. const TBasicStringBuf<TCharType>& expectedValue)
  100. {
  101. TAstParseResult ast = ParseAst(program);
  102. UNIT_ASSERT(ast.IsOk());
  103. UNIT_ASSERT_VALUES_EQUAL(ast.Root->GetChildrenCount(), 1);
  104. TAstNode* atom = ast.Root->GetChild(0);
  105. UNIT_ASSERT(atom->IsAtom());
  106. UNIT_ASSERT_STRINGS_EQUAL_C(
  107. atom->GetContent(),
  108. TString((char*)expectedValue.data(), expectedValue.size()),
  109. program);
  110. }
  111. Y_UNIT_TEST(GoodArbitraryAtom) {
  112. TestGoodArbitraryAtom("(\"\")", TStringBuf());
  113. TestGoodArbitraryAtom("(\" 1 a 3 b \")", TStringBuf(" 1 a 3 b "));
  114. ui8 expectedHex[] = { 0xab, 'c', 'd', 0x00 };
  115. TestGoodArbitraryAtom("(\"\\xabcd\")", TBasicStringBuf<ui8>(expectedHex));
  116. TestGoodArbitraryAtom("(\" \\x3d \")", TStringBuf(" \x3d "));
  117. ui8 expectedOctal[] = { 056, '7', '8', 0x00 };
  118. TestGoodArbitraryAtom("(\"\\05678\")", TBasicStringBuf<ui8>(expectedOctal));
  119. TestGoodArbitraryAtom("(\" \\056 \")", TStringBuf(" \056 "));
  120. TestGoodArbitraryAtom("(\" \\177 \")", TStringBuf(" \177 "));
  121. TestGoodArbitraryAtom("(\" \\377 \")", TStringBuf(" \377 "));
  122. TestGoodArbitraryAtom("(\" \\477 \")", TStringBuf(" 477 "));
  123. {
  124. ui8 expected1[] = { 0x01, 0x00 };
  125. TestGoodArbitraryAtom("(\"\\u0001\")", TBasicStringBuf<ui8>(expected1));
  126. ui8 expected2[] = { 0xE1, 0x88, 0xB4, 0x00 };
  127. TestGoodArbitraryAtom("(\"\\u1234\")", TBasicStringBuf<ui8>(expected2));
  128. ui8 expected3[] = { 0xef, 0xbf, 0xbf, 0x00 };
  129. TestGoodArbitraryAtom("(\"\\uffff\")", TBasicStringBuf<ui8>(expected3));
  130. }
  131. {
  132. ui8 expected1[] = { 0x01, 0x00 };
  133. TestGoodArbitraryAtom("(\"\\U00000001\")", TBasicStringBuf<ui8>(expected1));
  134. ui8 expected2[] = { 0xf4, 0x8f, 0xbf, 0xbf, 0x00 };
  135. TestGoodArbitraryAtom("(\"\\U0010ffff\")", TBasicStringBuf<ui8>(expected2));
  136. }
  137. TestGoodArbitraryAtom("(\"\\t\")", TStringBuf("\t"));
  138. TestGoodArbitraryAtom("(\"\\n\")", TStringBuf("\n"));
  139. TestGoodArbitraryAtom("(\"\\r\")", TStringBuf("\r"));
  140. TestGoodArbitraryAtom("(\"\\b\")", TStringBuf("\b"));
  141. TestGoodArbitraryAtom("(\"\\f\")", TStringBuf("\f"));
  142. TestGoodArbitraryAtom("(\"\\a\")", TStringBuf("\a"));
  143. TestGoodArbitraryAtom("(\"\\v\")", TStringBuf("\v"));
  144. }
  145. void TestBadArbitraryAtom(
  146. const TString& program,
  147. const TString& expectedError)
  148. {
  149. TAstParseResult ast = ParseAst(program);
  150. UNIT_ASSERT(false == ast.IsOk());
  151. UNIT_ASSERT(false == !!ast.Root);
  152. UNIT_ASSERT(false == ast.Issues.Empty());
  153. UNIT_ASSERT_STRINGS_EQUAL(ast.Issues.begin()->GetMessage(), expectedError);
  154. }
  155. Y_UNIT_TEST(BadArbitraryAtom) {
  156. TestBadArbitraryAtom("(a\")", "Unexpected \"");
  157. TestBadArbitraryAtom("(\"++++\"11111)", "Unexpected end of \"");
  158. TestBadArbitraryAtom("(\"\\", "Expected escape sequence");
  159. TestBadArbitraryAtom("(\"\\\")", "Unexpected end of atom");
  160. TestBadArbitraryAtom("(\"abc)", "Unexpected end of atom");
  161. TestBadArbitraryAtom("(\"\\018\")", "Invalid octal value");
  162. TestBadArbitraryAtom("(\"\\01\")", "Invalid octal value");
  163. TestBadArbitraryAtom("(\"\\378\")", "Invalid octal value");
  164. TestBadArbitraryAtom("(\"\\x1g\")", "Invalid hexadecimal value");
  165. TestBadArbitraryAtom("(\"\\xf\")", "Invalid hexadecimal value");
  166. TestBadArbitraryAtom("(\"\\u\")", "Invalid unicode value");
  167. TestBadArbitraryAtom("(\"\\u1\")", "Invalid unicode value");
  168. TestBadArbitraryAtom("(\"\\u12\")", "Invalid unicode value");
  169. TestBadArbitraryAtom("(\"\\u123\")", "Invalid unicode value");
  170. TestBadArbitraryAtom("(\"\\ughij\")", "Invalid unicode value");
  171. TestBadArbitraryAtom("(\"\\U\")", "Invalid unicode value");
  172. TestBadArbitraryAtom("(\"\\U11\")", "Invalid unicode value");
  173. TestBadArbitraryAtom("(\"\\U1122\")", "Invalid unicode value");
  174. TestBadArbitraryAtom("(\"\\U112233\")", "Invalid unicode value");
  175. TestBadArbitraryAtom("(\"\\Ughijklmn\")", "Invalid unicode value");
  176. TestBadArbitraryAtom("(\"\\U00110000\")", "Invalid unicode value");
  177. TestBadArbitraryAtom("(\"\\U00123456\")", "Invalid unicode value");
  178. TestBadArbitraryAtom("(\"\\U00200000\")", "Invalid unicode value");
  179. TestBadArbitraryAtom("(\"\\Uffffffff\")", "Invalid unicode value");
  180. // surrogate range
  181. TestBadArbitraryAtom("(\"\\ud800\")", "Invalid unicode value");
  182. TestBadArbitraryAtom("(\"\\udfff\")", "Invalid unicode value");
  183. TestBadArbitraryAtom("(\"\\U0000d800\")", "Invalid unicode value");
  184. TestBadArbitraryAtom("(\"\\U0000dfff\")", "Invalid unicode value");
  185. TestBadArbitraryAtom("(x\"ag\")", "Invalid binary value");
  186. TestBadArbitraryAtom("(x\"abc\")", "Invalid binary value");
  187. TestBadArbitraryAtom("(x\"abcd)", "Invalid binary value");
  188. TestBadArbitraryAtom("(x\"abcd", "Unexpected end of atom");
  189. }
  190. void ParseAndPrint(const TString& program, const TString& expected) {
  191. TAstParseResult ast = ParseAst(program);
  192. UNIT_ASSERT_C(ast.IsOk(), program);
  193. TString result = ast.Root->ToString();
  194. UNIT_ASSERT_STRINGS_EQUAL_C(result, expected, program);
  195. }
  196. Y_UNIT_TEST(ArbitraryAtomEscaping) {
  197. ParseAndPrint(
  198. "(\"\\t\\n\\r\\b\\a\\f\\v\")",
  199. "(\"\\t\\n\\r\\b\\a\\f\\v\")");
  200. ParseAndPrint("(\"\\u1234\")", "(\"\\u1234\")");
  201. ParseAndPrint("(\"\\u1234abcd\")", "(\"\\u1234abcd\")");
  202. ParseAndPrint("(\"\\177\")", "(\"\\x7F\")");
  203. ParseAndPrint("(\"\\377\")", "(\"\\xFF\")");
  204. ParseAndPrint(
  205. "(\"тестовая строка\")",
  206. "(\"\\u0442\\u0435\\u0441\\u0442\\u043E\\u0432\\u0430"
  207. "\\u044F \\u0441\\u0442\\u0440\\u043E\\u043A\\u0430\")");
  208. ParseAndPrint("(\"\")", "(\"\")");
  209. }
  210. Y_UNIT_TEST(BinaryAtom) {
  211. ParseAndPrint("(x\"abcdef\")", "(x\"ABCDEF\")");
  212. ParseAndPrint("(x\"aBcDeF\")", "(x\"ABCDEF\")");
  213. ParseAndPrint("(x)", "(x)");
  214. ParseAndPrint("(x x)", "(x x)");
  215. ParseAndPrint("(x\"\" x)", "(x\"\" x)");
  216. ParseAndPrint("(x\"ab12cd\" x)", "(x\"AB12CD\" x)");
  217. }
  218. void ParseAndAdaptPrint(const TString& program, const TString& expected) {
  219. TAstParseResult ast = ParseAst(program);
  220. UNIT_ASSERT_C(ast.IsOk(), program);
  221. TString result = ast.Root->ToString(
  222. TAstPrintFlags::ShortQuote | TAstPrintFlags::PerLine |
  223. TAstPrintFlags::AdaptArbitraryContent);
  224. RemoveAll(result, '\n'); // for simplify expected string
  225. UNIT_ASSERT_STRINGS_EQUAL_C(result, expected, program);
  226. }
  227. Y_UNIT_TEST(AdaptArbitraryAtom) {
  228. ParseAndAdaptPrint("(\"test\")", "(test)");
  229. ParseAndAdaptPrint("(\"another test\")", "(\"another test\")");
  230. ParseAndAdaptPrint("(\"braces(in)test\")", "(\"braces(in)test\")");
  231. ParseAndAdaptPrint("(\"escaped\\u1234sequence\")", "(\"escaped\\u1234sequence\")");
  232. ParseAndAdaptPrint("(\"escaped\\x41sequence\")", "(escapedAsequence)");
  233. ParseAndAdaptPrint("(\"\")", "(\"\")");
  234. }
  235. void ParseError(const TString& program) {
  236. TAstParseResult ast = ParseAst(program);
  237. UNIT_ASSERT_C(!ast.IsOk(), program);
  238. }
  239. Y_UNIT_TEST(MultilineAtomTrivial) {
  240. TStringStream s;
  241. for (ui32 i = 4; i < 13; ++i) {
  242. TStringStream prog;
  243. prog << "(";
  244. for (ui32 j = 0; j < i; ++j) {
  245. prog << "@";
  246. }
  247. prog << ")";
  248. TAstParseResult ast = ParseAst(prog.Str());
  249. s << prog.Str() << " --> ";
  250. if (ast.IsOk()) {
  251. UNIT_ASSERT_VALUES_EQUAL(ast.Root->GetChildrenCount(), 1);
  252. TAstNode* atom = ast.Root->GetChild(0);
  253. UNIT_ASSERT(atom->IsAtom());
  254. UNIT_ASSERT(atom->GetFlags() & TNodeFlags::MultilineContent);
  255. s << "'" << atom->GetContent() << "'" << Endl;
  256. } else {
  257. s << "Error" << Endl;
  258. }
  259. }
  260. //~ Cerr << s.Str() << Endl;
  261. UNIT_ASSERT_NO_DIFF(
  262. "(@@@@) --> ''\n"
  263. "(@@@@@) --> '@'\n"
  264. "(@@@@@@) --> Error\n"
  265. "(@@@@@@@) --> Error\n"
  266. "(@@@@@@@@) --> '@@'\n"
  267. "(@@@@@@@@@) --> '@@@'\n"
  268. "(@@@@@@@@@@) --> Error\n"
  269. "(@@@@@@@@@@@) --> Error\n"
  270. "(@@@@@@@@@@@@) --> '@@@@'\n",
  271. s.Str()
  272. );
  273. }
  274. Y_UNIT_TEST(MultilineAtom) {
  275. TString s1 = "(@@multi \n"
  276. "line \n"
  277. "string@@)";
  278. ParseAndPrint(s1, s1);
  279. TString s2 = "(@@multi \n"
  280. "l@ine \n"
  281. "string@@)";
  282. ParseAndPrint(s2, s2);
  283. TString s3 = "(@@multi \n"
  284. "l@@@ine \n"
  285. "string@@)";
  286. ParseError(s3);
  287. TString s4 = "(@@multi \n"
  288. "l@@@@ine \n"
  289. "string@@)";
  290. ParseAndPrint(s4, s4);
  291. TString s5 = "(@@\n"
  292. "one@\n"
  293. "two@@@@\n"
  294. "four@@@@@@@@\n"
  295. "@@@@two\n"
  296. "@one\n"
  297. "@@)";
  298. TAstParseResult ast = ParseAst(s5);
  299. UNIT_ASSERT(ast.IsOk());
  300. UNIT_ASSERT_VALUES_EQUAL(ast.Root->GetChildrenCount(), 1);
  301. TAstNode* atom = ast.Root->GetChild(0);
  302. UNIT_ASSERT(atom->IsAtom());
  303. UNIT_ASSERT(atom->GetFlags() & TNodeFlags::MultilineContent);
  304. TString expected = "\n"
  305. "one@\n"
  306. "two@@\n"
  307. "four@@@@\n"
  308. "@@two\n"
  309. "@one\n";
  310. UNIT_ASSERT_STRINGS_EQUAL(atom->GetContent(), expected);
  311. TString printResult = ast.Root->ToString();
  312. UNIT_ASSERT_STRINGS_EQUAL(s5, printResult);
  313. }
  314. Y_UNIT_TEST(UnicodePrettyPrint) {
  315. ParseAndAdaptPrint("(\"абв αβγ ﬡ\")", "(\"\\u0430\\u0431\\u0432 \\u03B1\\u03B2\\u03B3 \\uFB21\")");
  316. }
  317. Y_UNIT_TEST(SerializeQuotedEmptyAtom) {
  318. TMemoryPool pool(1024);
  319. TPosition pos(1, 1);
  320. TAstNode* empty = TAstNode::Quote(pos, pool, TAstNode::NewAtom(pos, "", pool));
  321. TString expected = "'\"\"";
  322. UNIT_ASSERT_STRINGS_EQUAL(empty->ToString(), expected);
  323. TString pretty = empty->ToString(TAstPrintFlags::ShortQuote | TAstPrintFlags::PerLine |
  324. TAstPrintFlags::AdaptArbitraryContent);
  325. RemoveAll(pretty, '\n');
  326. UNIT_ASSERT_EQUAL(pretty, expected);
  327. pretty = empty->ToString(TAstPrintFlags::ShortQuote | TAstPrintFlags::PerLine);
  328. RemoveAll(pretty, '\n');
  329. UNIT_ASSERT_EQUAL(pretty, expected);
  330. }
  331. }
  332. } // namespace NYql