sql_match_recognize_ut.cpp 24 KB


  1. #include "sql_ut.h"
  2. #include <yql/essentials/core/sql_types/match_recognize.h>
  3. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  4. #include <yql/essentials/sql/sql.h>
  5. #include <library/cpp/testing/unittest/registar.h>
  6. NYql::TAstParseResult MatchRecognizeSqlToYql(const TString& query) {
  7. TString enablingPragma = R"(
  8. pragma FeatureR010="prototype";
  9. )";
  10. return SqlToYql(enablingPragma + query);
  11. }
  12. const NYql::TAstNode* FindMatchRecognizeParam(const NYql::TAstNode* root, TString name) {
  13. auto matchRecognizeBlock = FindNodeByChildAtomContent(root, 1, "match_recognize");
  14. UNIT_ASSERT(matchRecognizeBlock);
  15. auto paramNode = FindNodeByChildAtomContent(matchRecognizeBlock, 1, name);
  16. return paramNode->GetChild(2);
  17. }
  18. std::string_view GetAtom(const NYql::TAstNode* node) {
  19. UNIT_ASSERT(node);
  20. UNIT_ASSERT(node->IsAtom());
  21. return node->GetContent();
  22. }
  23. bool IsAtom(const NYql::TAstNode* node, std::string_view value) {
  24. UNIT_ASSERT_NO_DIFF(GetAtom(node), value);
  25. return true;
  26. }
  27. bool IsListOfSize(const NYql::TAstNode* node, ui32 size) {
  28. UNIT_ASSERT(node);
  29. UNIT_ASSERT(node->IsList());
  30. UNIT_ASSERT_EQUAL(node->GetChildrenCount(), size);
  31. return true;
  32. }
  33. template<typename Proj = std::identity>
  34. bool IsListOfAtoms(const NYql::TAstNode* node, std::vector<std::string_view> atoms, Proj proj = {}) {
  35. UNIT_ASSERT(IsListOfSize(node, atoms.size()));
  36. for (ui32 i = 0; i < atoms.size(); ++i) {
  37. const auto child = std::invoke(proj, node->GetChild(i));
  38. UNIT_ASSERT(IsAtom(child, atoms[i]));
  39. }
  40. return true;
  41. }
  42. const NYql::TAstNode* GetQuoted(const NYql::TAstNode* node) {
  43. UNIT_ASSERT(IsListOfSize(node, 2));
  44. UNIT_ASSERT(IsAtom(node->GetChild(0), "quote"));
  45. return node->GetChild(1);
  46. }
  47. bool IsLambda(const NYql::TAstNode* node, ui32 numberOfArgs) {
  48. UNIT_ASSERT(IsListOfSize(node, 3));
  49. UNIT_ASSERT(IsAtom(node->GetChild(0), "lambda"));
  50. return IsListOfSize(GetQuoted(node->GetChild(1)), numberOfArgs);
  51. }
  52. Y_UNIT_TEST_SUITE(MatchRecognize) {
  53. auto minValidMatchRecognizeSql = R"(
  54. USE plato;
  55. SELECT *
  56. FROM Input MATCH_RECOGNIZE(
  57. PATTERN ( A )
  58. DEFINE A as A
  59. )
  60. )";
  61. Y_UNIT_TEST(EnabledWithPragma) {
  62. UNIT_ASSERT(not SqlToYql(minValidMatchRecognizeSql).IsOk());
  63. UNIT_ASSERT(MatchRecognizeSqlToYql(minValidMatchRecognizeSql).IsOk());
  64. }
  65. Y_UNIT_TEST(InputTableName) {
  66. auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
  67. UNIT_ASSERT(r.IsOk());
  68. auto input = FindMatchRecognizeParam(r.Root, "input");
  69. UNIT_ASSERT(IsAtom(input, "core"));
  70. }
  71. Y_UNIT_TEST(MatchRecognizeAndSample) {
  72. auto matchRecognizeAndSample = R"(
  73. USE plato;
  74. SELECT *
  75. FROM Input MATCH_RECOGNIZE(
  76. PATTERN ( A )
  77. DEFINE A as A
  78. ) TABLESAMPLE BERNOULLI(1.0)
  79. )";
  80. UNIT_ASSERT(not MatchRecognizeSqlToYql(matchRecognizeAndSample).IsOk());
  81. }
  82. Y_UNIT_TEST(NoPartitionBy) {
  83. auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
  84. UNIT_ASSERT(r.IsOk());
  85. auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
  86. UNIT_ASSERT(IsListOfSize(GetQuoted(partitionKeySelector->GetChild(2)), 0)); //empty tuple
  87. auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
  88. UNIT_ASSERT(IsListOfSize(GetQuoted(partitionColumns), 0)); //empty tuple
  89. }
  90. Y_UNIT_TEST(PartitionBy) {
  91. auto stmt = R"(
  92. USE plato;
  93. SELECT *
  94. FROM Input MATCH_RECOGNIZE(
  95. PARTITION BY col1 as c1, ~CAST(col1 as Int32) as invertedC1, c2
  96. PATTERN ( A )
  97. DEFINE A as A
  98. )
  99. )";
  100. auto r = MatchRecognizeSqlToYql(stmt);
  101. UNIT_ASSERT(r.IsOk());
  102. auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
  103. UNIT_ASSERT(IsListOfSize(GetQuoted(partitionKeySelector->GetChild(2)), 3));
  104. auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
  105. UNIT_ASSERT(IsListOfSize(GetQuoted(partitionColumns), 3));
  106. //TODO check partitioner lambdas(alias/no alias)
  107. }
  108. Y_UNIT_TEST(NoOrderBy) {
  109. auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
  110. UNIT_ASSERT(r.IsOk());
  111. auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
  112. UNIT_ASSERT(IsListOfAtoms(sortTraits, {"Void"}));
  113. }
  114. Y_UNIT_TEST(OrderBy) {
  115. auto stmt = R"(
  116. USE plato;
  117. SELECT *
  118. FROM Input MATCH_RECOGNIZE(
  119. ORDER BY col1, ~CAST(col1 as Int32), c2
  120. PATTERN ( A )
  121. DEFINE A as A
  122. )
  123. )";
  124. auto r = MatchRecognizeSqlToYql(stmt);
  125. UNIT_ASSERT(r.IsOk());
  126. auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
  127. UNIT_ASSERT(IsListOfSize(sortTraits, 4));
  128. UNIT_ASSERT(IsAtom(sortTraits->GetChild(0), "SortTraits"));
  129. UNIT_ASSERT(IsListOfSize(GetQuoted(sortTraits->GetChild(2)), 3));
  130. UNIT_ASSERT(IsListOfSize(GetQuoted(sortTraits->GetChild(3)->GetChild(2)), 3));
  131. }
  132. Y_UNIT_TEST(Measures) {
  133. auto stmt = R"(
  134. USE plato;
  135. SELECT *
  136. FROM Input MATCH_RECOGNIZE(
  137. MEASURES
  138. Last(Q.dt) as T,
  139. First(Y.key) as Key
  140. PATTERN ( Y Q )
  141. DEFINE Y as true
  142. )
  143. )";
  144. auto r = MatchRecognizeSqlToYql(stmt);
  145. UNIT_ASSERT(r.IsOk());
  146. const auto measures = FindMatchRecognizeParam(r.Root, "measures");
  147. UNIT_ASSERT(IsListOfSize(measures, 5));
  148. const auto patternVars = measures->GetChild(2);
  149. UNIT_ASSERT(IsListOfAtoms(GetQuoted(patternVars), {"Y", "Q"}, GetQuoted));
  150. const auto measuresNames = measures->GetChild(3);
  151. UNIT_ASSERT(IsListOfAtoms(GetQuoted(measuresNames), {"T", "Key"}, GetQuoted));
  152. const auto measuresCallables = measures->GetChild(4);
  153. UNIT_ASSERT(IsListOfSize(GetQuoted(measuresCallables), 2));
  154. }
  155. Y_UNIT_TEST(RowsPerMatch) {
  156. {
  157. const auto stmt = R"(
  158. USE plato;
  159. SELECT *
  160. FROM Input MATCH_RECOGNIZE(
  161. ONE ROW PER MATCH
  162. PATTERN (A)
  163. DEFINE A as A
  164. )
  165. )";
  166. auto r = MatchRecognizeSqlToYql(stmt);
  167. UNIT_ASSERT(r.IsOk());
  168. auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
  169. UNIT_ASSERT(IsAtom(GetQuoted(rowsPerMatch), "RowsPerMatch_OneRow"));
  170. }
  171. {
  172. const auto stmt = R"(
  173. USE plato;
  174. SELECT *
  175. FROM Input MATCH_RECOGNIZE(
  176. ALL ROWS PER MATCH
  177. PATTERN (A)
  178. DEFINE A as A
  179. )
  180. )";
  181. auto r = MatchRecognizeSqlToYql(stmt);
  182. UNIT_ASSERT(r.IsOk());
  183. }
  184. { //default
  185. const auto stmt = R"(
  186. USE plato;
  187. SELECT *
  188. FROM Input MATCH_RECOGNIZE(
  189. PATTERN (A)
  190. DEFINE A as A
  191. )
  192. )";
  193. auto r = MatchRecognizeSqlToYql(stmt);
  194. UNIT_ASSERT(r.IsOk());
  195. auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
  196. UNIT_ASSERT(IsAtom(GetQuoted(rowsPerMatch), "RowsPerMatch_OneRow"));
  197. }
  198. }
  199. Y_UNIT_TEST(SkipAfterMatch) {
  200. {
  201. const auto stmt = R"(
  202. USE plato;
  203. SELECT *
  204. FROM Input MATCH_RECOGNIZE(
  205. AFTER MATCH SKIP TO NEXT ROW
  206. PATTERN (A)
  207. DEFINE A as A
  208. )
  209. )";
  210. auto r = MatchRecognizeSqlToYql(stmt);
  211. UNIT_ASSERT(r.IsOk());
  212. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  213. UNIT_ASSERT(IsListOfAtoms(GetQuoted(skipTo), {"AfterMatchSkip_NextRow", ""}, GetQuoted));
  214. }
  215. {
  216. const auto stmt = R"(
  217. USE plato;
  218. SELECT *
  219. FROM Input MATCH_RECOGNIZE(
  220. AFTER MATCH SKIP PAST LAST ROW
  221. PATTERN (A)
  222. DEFINE A as A
  223. )
  224. )";
  225. auto r = MatchRecognizeSqlToYql(stmt);
  226. UNIT_ASSERT(r.IsOk());
  227. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  228. UNIT_ASSERT(IsListOfAtoms(GetQuoted(skipTo), {"AfterMatchSkip_PastLastRow", ""}, GetQuoted));
  229. }
  230. {
  231. const auto stmt = R"(
  232. USE plato;
  233. SELECT *
  234. FROM Input MATCH_RECOGNIZE(
  235. AFTER MATCH SKIP TO FIRST Y
  236. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  237. DEFINE A as A
  238. )
  239. )";
  240. auto r = MatchRecognizeSqlToYql(stmt);
  241. UNIT_ASSERT(r.IsOk());
  242. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  243. UNIT_ASSERT(IsListOfAtoms(GetQuoted(skipTo), {"AfterMatchSkip_ToFirst", "Y"}, GetQuoted));
  244. }
  245. {
  246. const auto stmt = R"(
  247. USE plato;
  248. SELECT *
  249. FROM Input MATCH_RECOGNIZE(
  250. AFTER MATCH SKIP TO FIRST T -- unknown pattern var
  251. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  252. DEFINE A as A
  253. )
  254. )";
  255. auto r = MatchRecognizeSqlToYql(stmt);
  256. UNIT_ASSERT(not r.IsOk());
  257. }
  258. {
  259. const auto stmt = R"(
  260. USE plato;
  261. SELECT *
  262. FROM Input MATCH_RECOGNIZE(
  263. AFTER MATCH SKIP TO LAST Y
  264. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  265. DEFINE A as A
  266. )
  267. )";
  268. auto r = MatchRecognizeSqlToYql(stmt);
  269. UNIT_ASSERT(r.IsOk());
  270. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  271. UNIT_ASSERT(IsListOfAtoms(GetQuoted(skipTo), {"AfterMatchSkip_ToLast", "Y"}, GetQuoted));
  272. }
  273. {
  274. const auto stmt = R"(
  275. USE plato;
  276. SELECT *
  277. FROM Input MATCH_RECOGNIZE(
  278. AFTER MATCH SKIP TO LAST T -- unknown pattern var
  279. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  280. DEFINE A as A
  281. )
  282. )";
  283. auto r = MatchRecognizeSqlToYql(stmt);
  284. UNIT_ASSERT(not r.IsOk());
  285. }
  286. {
  287. const auto stmt = R"(
  288. USE plato;
  289. SELECT *
  290. FROM Input MATCH_RECOGNIZE(
  291. AFTER MATCH SKIP TO Y
  292. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  293. DEFINE A as A
  294. )
  295. )";
  296. auto r = MatchRecognizeSqlToYql(stmt);
  297. UNIT_ASSERT(r.IsOk());
  298. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  299. UNIT_ASSERT(IsListOfAtoms(GetQuoted(skipTo), {"AfterMatchSkip_To", "Y"}, GetQuoted));
  300. }
  301. {
  302. const auto stmt = R"(
  303. USE plato;
  304. SELECT *
  305. FROM Input MATCH_RECOGNIZE(
  306. AFTER MATCH SKIP TO T -- unknown pattern var
  307. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  308. DEFINE A as A
  309. )
  310. )";
  311. auto r = MatchRecognizeSqlToYql(stmt);
  312. UNIT_ASSERT(not r.IsOk());
  313. }
  314. }
  315. Y_UNIT_TEST(row_pattern_initial) {
  316. const auto stmt = R"(
  317. USE plato;
  318. SELECT *
  319. FROM Input MATCH_RECOGNIZE(
  320. INITIAL
  321. PATTERN (A+ B* C?)
  322. DEFINE A as A
  323. )
  324. )";
  325. auto r = MatchRecognizeSqlToYql(stmt);
  326. UNIT_ASSERT(not r.IsOk());
  327. }
  328. Y_UNIT_TEST(row_pattern_seek) {
  329. const auto stmt = R"(
  330. USE plato;
  331. SELECT *
  332. FROM Input MATCH_RECOGNIZE(
  333. SEEK
  334. PATTERN (A+ B* C?)
  335. DEFINE A as A
  336. )
  337. )";
  338. auto r = MatchRecognizeSqlToYql(stmt);
  339. UNIT_ASSERT(not r.IsOk());
  340. }
  341. Y_UNIT_TEST(PatternSimple) {
  342. const auto stmt = R"(
  343. USE plato;
  344. SELECT *
  345. FROM Input MATCH_RECOGNIZE(
  346. PATTERN (A+ B* C?)
  347. DEFINE A as A
  348. )
  349. )";
  350. const auto& r = MatchRecognizeSqlToYql(stmt);
  351. UNIT_ASSERT(r.IsOk());
  352. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  353. UNIT_ASSERT(IsListOfSize(patternCallable, 1 + 1));
  354. UNIT_ASSERT(IsAtom(patternCallable->GetChild(0), "MatchRecognizePattern"));
  355. UNIT_ASSERT(IsListOfSize(GetQuoted(patternCallable->GetChild(1)), 3));
  356. }
  357. Y_UNIT_TEST(PatternMultiTerm) {
  358. const auto stmt = R"(
  359. USE plato;
  360. SELECT *
  361. FROM Input MATCH_RECOGNIZE(
  362. PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? I J ^)
  363. DEFINE A as A
  364. )
  365. )";
  366. const auto& r = MatchRecognizeSqlToYql(stmt);
  367. UNIT_ASSERT(r.IsOk());
  368. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  369. UNIT_ASSERT(IsListOfSize(patternCallable, 1 + 4));
  370. UNIT_ASSERT(IsAtom(patternCallable->GetChild(0), "MatchRecognizePattern"));
  371. UNIT_ASSERT(IsListOfSize(GetQuoted(patternCallable->GetChild(4)), 5));
  372. }
  373. Y_UNIT_TEST(PatternWithParanthesis) {
  374. const auto stmt = R"(
  375. USE plato;
  376. SELECT *
  377. FROM Input MATCH_RECOGNIZE(
  378. PATTERN (
  379. A | ($ B)+ C D
  380. )
  381. DEFINE A as A
  382. )
  383. )";
  384. const auto& r = MatchRecognizeSqlToYql(stmt);
  385. UNIT_ASSERT(r.IsOk());
  386. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  387. UNIT_ASSERT(IsListOfSize(patternCallable, 1 + 2));
  388. UNIT_ASSERT(IsAtom(patternCallable->GetChild(0), "MatchRecognizePattern"));
  389. const auto& firstTerm = patternCallable->GetChild(1);
  390. UNIT_ASSERT(IsListOfSize(GetQuoted(firstTerm), 1));
  391. const auto& lastTerm = patternCallable->GetChild(2);
  392. UNIT_ASSERT(IsListOfSize(GetQuoted(lastTerm), 3));
  393. const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0);
  394. UNIT_ASSERT(IsListOfSize(GetQuoted(firstFactorOfLastTerm), 6));
  395. const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0);
  396. UNIT_ASSERT(IsListOfSize(nestedPattern, 1 + 1));
  397. UNIT_ASSERT(IsAtom(nestedPattern->GetChild(0), "MatchRecognizePattern"));
  398. UNIT_ASSERT(IsListOfSize(GetQuoted(nestedPattern->GetChild(1)), 2));
  399. }
  400. Y_UNIT_TEST(PatternManyAlternatives) {
  401. const auto stmt = R"(
  402. USE plato;
  403. SELECT *
  404. FROM Input MATCH_RECOGNIZE(
  405. PATTERN (
  406. (A B C D ) | (B A C D ) | (C B A D ) | (B C A D ) | (C A B D ) | (A C B D ) | (D A B C ) | (A D B C ) | (B A D C ) | (A B D C ) | (B D A C ) | (D B A C ) | (C D A B ) | (D C A B ) | (A D C B ) | (D A C B ) | (A C D B ) | (C A D B ) | (B C D A ) | (C B D A ) | (D C B A ) | (C D B A ) | (D B C A ) | (B D C A )
  407. )
  408. DEFINE A as A
  409. )
  410. )";
  411. UNIT_ASSERT(MatchRecognizeSqlToYql(stmt).IsOk());
  412. }
  413. Y_UNIT_TEST(PatternLimitedNesting) {
  414. constexpr size_t MaxNesting = 20;
  415. for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) {
  416. std::string pattern;
  417. for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
  418. pattern.push_back('(');
  419. pattern.push_back('A');
  420. for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
  421. pattern.push_back(')');
  422. const auto stmt = TString(R"(
  423. USE plato;
  424. SELECT *
  425. FROM Input MATCH_RECOGNIZE(
  426. PATTERN(
  427. )") + pattern + R"(
  428. )
  429. DEFINE A as A
  430. )
  431. )";
  432. const auto &r = MatchRecognizeSqlToYql(stmt);
  433. if (not extraNesting) {
  434. UNIT_ASSERT(r.IsOk());
  435. } else {
  436. UNIT_ASSERT(not r.IsOk());
  437. }
  438. }
  439. }
  440. Y_UNIT_TEST(PatternFactorQuantifiers) {
  441. auto makeRequest = [](const TString& factor) {
  442. return TString(R"(
  443. USE plato;
  444. SELECT *
  445. FROM Input MATCH_RECOGNIZE(
  446. PATTERN(
  447. )") + factor + R"(
  448. )
  449. DEFINE A as A
  450. )
  451. )";
  452. };
  453. auto getTheFactor = [](const NYql::TAstNode* root) {
  454. const auto& patternCallable = FindMatchRecognizeParam(root, "pattern");
  455. const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1);
  456. return NYql::NMatchRecognize::TRowPatternFactor{
  457. TString(), // Primary var or subexpression, not used in this test
  458. FromString<uint64_t>(GetAtom(GetQuoted(factor->GetChild(1)))), // QuantityMin
  459. FromString<uint64_t>(GetAtom(GetQuoted(factor->GetChild(2)))), // QuantityMax
  460. FromString<bool>(GetAtom(GetQuoted(factor->GetChild(3)))), // Greedy
  461. FromString<bool>(GetAtom(GetQuoted(factor->GetChild(4)))), // Output, not used in this test
  462. FromString<bool>(GetAtom(GetQuoted(factor->GetChild(5)))), // Flag "Unused", not used in this test
  463. };
  464. };
  465. {
  466. //no quantifiers
  467. const auto stmt = makeRequest("A");
  468. const auto &r = MatchRecognizeSqlToYql(stmt);
  469. UNIT_ASSERT(r.IsOk());
  470. const auto& factor = getTheFactor(r.Root);
  471. UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
  472. UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
  473. UNIT_ASSERT(factor.Greedy);
  474. }
  475. {
  476. //optional greedy(default)
  477. const auto stmt = makeRequest("A?");
  478. const auto &r = MatchRecognizeSqlToYql(stmt);
  479. UNIT_ASSERT(r.IsOk());
  480. const auto& factor = getTheFactor(r.Root);
  481. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  482. UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
  483. UNIT_ASSERT(factor.Greedy);
  484. }
  485. {
  486. //optional reluctant
  487. const auto stmt = makeRequest("A??");
  488. const auto &r = MatchRecognizeSqlToYql(stmt);
  489. UNIT_ASSERT(r.IsOk());
  490. const auto& factor = getTheFactor(r.Root);
  491. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  492. UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
  493. UNIT_ASSERT(!factor.Greedy);
  494. }
  495. {
  496. //+ greedy(default)
  497. const auto stmt = makeRequest("A+");
  498. const auto &r = MatchRecognizeSqlToYql(stmt);
  499. UNIT_ASSERT(r.IsOk());
  500. const auto& factor = getTheFactor(r.Root);
  501. UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
  502. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  503. UNIT_ASSERT(factor.Greedy);
  504. }
  505. {
  506. //+ reluctant
  507. const auto stmt = makeRequest("A+?");
  508. const auto &r = MatchRecognizeSqlToYql(stmt);
  509. UNIT_ASSERT(r.IsOk());
  510. const auto& factor = getTheFactor(r.Root);
  511. UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
  512. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  513. UNIT_ASSERT(!factor.Greedy);
  514. }
  515. {
  516. //* greedy(default)
  517. const auto stmt = makeRequest("A*");
  518. const auto &r = MatchRecognizeSqlToYql(stmt);
  519. UNIT_ASSERT(r.IsOk());
  520. const auto& factor = getTheFactor(r.Root);
  521. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  522. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  523. UNIT_ASSERT(factor.Greedy);
  524. }
  525. {
  526. //* reluctant
  527. const auto stmt = makeRequest("A*?");
  528. const auto &r = MatchRecognizeSqlToYql(stmt);
  529. UNIT_ASSERT(r.IsOk());
  530. const auto& factor = getTheFactor(r.Root);
  531. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  532. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  533. UNIT_ASSERT(!factor.Greedy);
  534. }
  535. {
  536. //exact n
  537. const auto stmt = makeRequest("A{4}");
  538. const auto &r = MatchRecognizeSqlToYql(stmt);
  539. UNIT_ASSERT(r.IsOk());
  540. const auto& factor = getTheFactor(r.Root);
  541. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  542. UNIT_ASSERT_EQUAL(4, factor.QuantityMax);
  543. }
  544. {
  545. //from n to m greedy(default
  546. const auto stmt = makeRequest("A{4, 7}");
  547. const auto &r = MatchRecognizeSqlToYql(stmt);
  548. UNIT_ASSERT(r.IsOk());
  549. const auto& factor = getTheFactor(r.Root);
  550. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  551. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  552. UNIT_ASSERT(factor.Greedy);
  553. }
  554. {
  555. //from n to m reluctant
  556. const auto stmt = makeRequest("A{4,7}?");
  557. const auto &r = MatchRecognizeSqlToYql(stmt);
  558. UNIT_ASSERT(r.IsOk());
  559. const auto& factor = getTheFactor(r.Root);
  560. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  561. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  562. UNIT_ASSERT(!factor.Greedy);
  563. }
  564. {
  565. //at least n greedy(default)
  566. const auto stmt = makeRequest("A{4,}");
  567. const auto &r = MatchRecognizeSqlToYql(stmt);
  568. UNIT_ASSERT(r.IsOk());
  569. const auto& factor = getTheFactor(r.Root);
  570. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  571. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  572. UNIT_ASSERT(factor.Greedy);
  573. }
  574. {
  575. //at least n reluctant
  576. const auto stmt = makeRequest("A{4,}?");
  577. const auto &r = MatchRecognizeSqlToYql(stmt);
  578. UNIT_ASSERT(r.IsOk());
  579. const auto& factor = getTheFactor(r.Root);
  580. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  581. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  582. UNIT_ASSERT(!factor.Greedy);
  583. }
  584. {
  585. //at most m greedy(default)
  586. const auto stmt = makeRequest("A{,7}");
  587. const auto &r = MatchRecognizeSqlToYql(stmt);
  588. UNIT_ASSERT(r.IsOk());
  589. const auto& factor = getTheFactor(r.Root);
  590. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  591. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  592. UNIT_ASSERT(factor.Greedy);
  593. }
  594. {
  595. //at least n reluctant
  596. const auto stmt = makeRequest("A{,7}?");
  597. const auto &r = MatchRecognizeSqlToYql(stmt);
  598. UNIT_ASSERT(r.IsOk());
  599. const auto& factor = getTheFactor(r.Root);
  600. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  601. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  602. UNIT_ASSERT(!factor.Greedy);
  603. }
  604. {
  605. //quantifiers on subexpression
  606. const auto stmt = makeRequest("(A B+ C | D | ^){4,7}?");
  607. const auto &r = MatchRecognizeSqlToYql(stmt);
  608. UNIT_ASSERT(r.IsOk());
  609. const auto& factor = getTheFactor(r.Root);
  610. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  611. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  612. UNIT_ASSERT(!factor.Greedy);
  613. }
  614. }
  615. Y_UNIT_TEST(Permute) {
  616. const auto stmt = R"(
  617. USE plato;
  618. SELECT *
  619. FROM Input MATCH_RECOGNIZE(
  620. PATTERN (
  621. PERMUTE(A, B, C, D, E) --5 variables produce 5! permutations
  622. )
  623. DEFINE A as A
  624. )
  625. )";
  626. const auto& r = MatchRecognizeSqlToYql(stmt);
  627. UNIT_ASSERT(r.IsOk());
  628. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  629. const auto permutePattern = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1)->GetChild(0);
  630. UNIT_ASSERT(IsListOfSize(permutePattern, 1 + 120)); //CallableName + 5!
  631. }
  632. Y_UNIT_TEST(PermuteTooMuch) {
  633. for (size_t n = 1; n <= 6 + 1; ++n) {
  634. std::vector<std::string> vars(n);
  635. std::generate(begin(vars), end(vars), [n = 0] () mutable { return "A" + std::to_string(n++);});
  636. const auto stmt = TString(R"(
  637. USE plato;
  638. SELECT *
  639. FROM Input MATCH_RECOGNIZE(
  640. PATTERN (
  641. PERMUTE( )" + std::accumulate(cbegin(vars) + 1, cend(vars), vars.front(),
  642. [](const std::string& acc, const std::string& v) {
  643. return acc + ", " + v;
  644. }) +
  645. R"(
  646. )
  647. )
  648. DEFINE A0 as A0
  649. )
  650. )"
  651. );
  652. const auto &r = MatchRecognizeSqlToYql(stmt);
  653. if (n <= 6) {
  654. UNIT_ASSERT(r.IsOk());
  655. } else {
  656. UNIT_ASSERT(!r.IsOk());
  657. }
  658. }
  659. }
  660. Y_UNIT_TEST(row_pattern_subset_clause) {
  661. //TODO https://st.yandex-team.ru/YQL-16186
  662. }
  663. Y_UNIT_TEST(Defines) {
  664. auto stmt = R"(
  665. USE plato;
  666. SELECT *
  667. FROM Input MATCH_RECOGNIZE(
  668. PATTERN ( Y Q L )
  669. DEFINE
  670. Y as true,
  671. Q as Q.V = "value",
  672. L as L.V = LAST(Q.T)
  673. )
  674. )";
  675. auto r = MatchRecognizeSqlToYql(stmt);
  676. UNIT_ASSERT(r.IsOk());
  677. const auto defines = FindMatchRecognizeParam(r.Root, "define");
  678. UNIT_ASSERT(IsListOfSize(defines, 7));
  679. const auto varNames = defines->GetChild(3);
  680. UNIT_ASSERT(IsListOfAtoms(GetQuoted(varNames), {"Y", "Q", "L"}, GetQuoted));
  681. UNIT_ASSERT(IsLambda(defines->GetChild(4), 3));
  682. UNIT_ASSERT(IsLambda(defines->GetChild(5), 3));
  683. UNIT_ASSERT(IsLambda(defines->GetChild(6), 3));
  684. }
  685. Y_UNIT_TEST(AbsentRowPatternVariableInDefines) {
  686. auto getStatement = [](const TString &var) {
  687. return TStringBuilder() << R"(
  688. USE plato;
  689. SELECT *
  690. FROM Input MATCH_RECOGNIZE(
  691. PATTERN ( Q )
  692. DEFINE
  693. )" << var << " AS TRUE )";
  694. };
  695. UNIT_ASSERT(MatchRecognizeSqlToYql(getStatement("Q")).IsOk());
  696. UNIT_ASSERT(!MatchRecognizeSqlToYql(getStatement("Y")).IsOk());
  697. }
  698. Y_UNIT_TEST(CheckRequiredNavigationFunction) {
  699. TString stmtPrefix = R"(
  700. USE plato;
  701. SELECT *
  702. FROM Input MATCH_RECOGNIZE(
  703. PATTERN ( Y Q L )
  704. DEFINE
  705. L as L.V =
  706. )";
  707. //Be aware that right parenthesis is added at the end of the query as required
  708. UNIT_ASSERT(MatchRecognizeSqlToYql(stmtPrefix + "LAST(Q.dt) )").IsOk());
  709. UNIT_ASSERT(!MatchRecognizeSqlToYql(stmtPrefix + "Q.dt )").IsOk());
  710. }
  711. }