sql_match_recognize_ut.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742
  1. #include "sql_ut.h"
  2. #include "match_recognize.h"
  3. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  4. #include <yql/essentials/core/sql_types/match_recognize.h>
  5. #include <yql/essentials/sql/sql.h>
  6. #include <util/generic/map.h>
  7. #include <library/cpp/testing/unittest/registar.h>
  8. #include <util/string/split.h>
  9. using namespace NSQLTranslation;
  10. NYql::TAstParseResult MatchRecognizeSqlToYql(const TString& query) {
  11. TString enablingPragma = R"(
  12. pragma FeatureR010="prototype";
  13. )";
  14. return SqlToYql(enablingPragma + query);
  15. }
  16. const NYql::TAstNode* FindMatchRecognizeParam(const NYql::TAstNode* root, TString name) {
  17. auto matchRecognizeBlock = FindNodeByChildAtomContent(root, 1, "match_recognize");
  18. UNIT_ASSERT(matchRecognizeBlock);
  19. auto paramNode = FindNodeByChildAtomContent(matchRecognizeBlock, 1, name);
  20. return paramNode->GetChild(2);
  21. }
  22. bool IsQuotedListOfSize(const NYql::TAstNode* node, ui32 size) {
  23. UNIT_ASSERT(node->IsListOfSize(2));
  24. if (!node->IsListOfSize(2))
  25. return false;
  26. UNIT_ASSERT_EQUAL(node->GetChild(0)->GetContent(), "quote");
  27. if (node->GetChild(0)->GetContent() != "quote")
  28. return false;
  29. UNIT_ASSERT_EQUAL(node->GetChild(1)->GetChildrenCount(), size);
  30. return node->GetChild(1)->IsListOfSize(size);
  31. }
  32. bool IsLambda(const NYql::TAstNode* node, ui32 numberOfArgs) {
  33. if (!node->IsListOfSize(3)) {
  34. return false;
  35. }
  36. if (!node->GetChild(0)->IsAtom() || node->GetChild(0)->GetContent() != "lambda") {
  37. return false;
  38. }
  39. return IsQuotedListOfSize(node->GetChild(1), numberOfArgs);
  40. }
  41. Y_UNIT_TEST_SUITE(MatchRecognize) {
  42. auto minValidMatchRecognizeSql = R"(
  43. USE plato;
  44. SELECT *
  45. FROM Input MATCH_RECOGNIZE(
  46. PATTERN ( A )
  47. DEFINE A as A
  48. )
  49. )";
  50. Y_UNIT_TEST(EnabledWithPragma) {
  51. UNIT_ASSERT(not SqlToYql(minValidMatchRecognizeSql).IsOk());
  52. UNIT_ASSERT(MatchRecognizeSqlToYql(minValidMatchRecognizeSql).IsOk());
  53. }
  54. Y_UNIT_TEST(InputTableName) {
  55. auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
  56. UNIT_ASSERT(r.IsOk());
  57. auto input = FindMatchRecognizeParam(r.Root, "input");
  58. UNIT_ASSERT(input->IsAtom() && input->GetContent() == "core");
  59. }
  60. Y_UNIT_TEST(MatchRecognizeAndSample) {
  61. auto matchRecognizeAndSample = R"(
  62. USE plato;
  63. SELECT *
  64. FROM Input MATCH_RECOGNIZE(
  65. PATTERN ( A )
  66. DEFINE A as A
  67. ) TABLESAMPLE BERNOULLI(1.0)
  68. )";
  69. UNIT_ASSERT(not MatchRecognizeSqlToYql(matchRecognizeAndSample).IsOk());
  70. }
  71. Y_UNIT_TEST(NoPartitionBy) {
  72. auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
  73. UNIT_ASSERT(r.IsOk());
  74. auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
  75. UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 0)); //empty tuple
  76. auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
  77. UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 0)); //empty tuple
  78. }
  79. Y_UNIT_TEST(PartitionBy) {
  80. auto stmt = R"(
  81. USE plato;
  82. SELECT *
  83. FROM Input MATCH_RECOGNIZE(
  84. PARTITION BY col1 as c1, ~CAST(col1 as Int32) as invertedC1, c2
  85. PATTERN ( A )
  86. DEFINE A as A
  87. )
  88. )";
  89. auto r = MatchRecognizeSqlToYql(stmt);
  90. UNIT_ASSERT(r.IsOk());
  91. auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
  92. UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 3));
  93. auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
  94. UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 3));
  95. //TODO check partitioner lambdas(alias/no alias)
  96. }
  97. Y_UNIT_TEST(NoOrderBy) {
  98. auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
  99. UNIT_ASSERT(r.IsOk());
  100. auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
  101. UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(1));
  102. UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "Void");
  103. }
  104. Y_UNIT_TEST(OrderBy) {
  105. auto stmt = R"(
  106. USE plato;
  107. SELECT *
  108. FROM Input MATCH_RECOGNIZE(
  109. ORDER BY col1, ~CAST(col1 as Int32), c2
  110. PATTERN ( A )
  111. DEFINE A as A
  112. )
  113. )";
  114. auto r = MatchRecognizeSqlToYql(stmt);
  115. UNIT_ASSERT(r.IsOk());
  116. auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
  117. UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(4));
  118. UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "SortTraits");
  119. UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(2), 3));
  120. UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(3)->GetChild(2), 3));
  121. }
  122. Y_UNIT_TEST(Measures) {
  123. auto stmt = R"(
  124. USE plato;
  125. SELECT *
  126. FROM Input MATCH_RECOGNIZE(
  127. MEASURES
  128. Last(Q.dt) as T,
  129. First(Y.key) as Key
  130. PATTERN ( Y Q )
  131. DEFINE Y as true
  132. )
  133. )";
  134. auto r = MatchRecognizeSqlToYql(stmt);
  135. UNIT_ASSERT(r.IsOk());
  136. const auto measures = FindMatchRecognizeParam(r.Root, "measures");
  137. UNIT_ASSERT_VALUES_EQUAL(7, measures->GetChildrenCount());
  138. const auto columnNames = measures->GetChild(3);
  139. UNIT_ASSERT(IsQuotedListOfSize(columnNames, 2));
  140. UNIT_ASSERT_VALUES_EQUAL("T", columnNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
  141. UNIT_ASSERT_VALUES_EQUAL("Key", columnNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
  142. UNIT_ASSERT(IsQuotedListOfSize(measures->GetChild(4), 2));
  143. UNIT_ASSERT(IsQuotedListOfSize(measures->GetChild(5), 2));
  144. }
  145. Y_UNIT_TEST(RowsPerMatch) {
  146. {
  147. const auto stmt = R"(
  148. USE plato;
  149. SELECT *
  150. FROM Input MATCH_RECOGNIZE(
  151. ONE ROW PER MATCH
  152. PATTERN (A)
  153. DEFINE A as A
  154. )
  155. )";
  156. auto r = MatchRecognizeSqlToYql(stmt);
  157. UNIT_ASSERT(r.IsOk());
  158. auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
  159. UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent());
  160. }
  161. {
  162. const auto stmt = R"(
  163. USE plato;
  164. SELECT *
  165. FROM Input MATCH_RECOGNIZE(
  166. ALL ROWS PER MATCH
  167. PATTERN (A)
  168. DEFINE A as A
  169. )
  170. )";
  171. auto r = MatchRecognizeSqlToYql(stmt);
  172. UNIT_ASSERT(r.IsOk());
  173. }
  174. { //default
  175. const auto stmt = R"(
  176. USE plato;
  177. SELECT *
  178. FROM Input MATCH_RECOGNIZE(
  179. PATTERN (A)
  180. DEFINE A as A
  181. )
  182. )";
  183. auto r = MatchRecognizeSqlToYql(stmt);
  184. UNIT_ASSERT(r.IsOk());
  185. auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
  186. UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent());
  187. }
  188. }
  189. Y_UNIT_TEST(SkipAfterMatch) {
  190. {
  191. const auto stmt = R"(
  192. USE plato;
  193. SELECT *
  194. FROM Input MATCH_RECOGNIZE(
  195. AFTER MATCH SKIP TO NEXT ROW
  196. PATTERN (A)
  197. DEFINE A as A
  198. )
  199. )";
  200. auto r = MatchRecognizeSqlToYql(stmt);
  201. UNIT_ASSERT(r.IsOk());
  202. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  203. UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_NextRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
  204. }
  205. {
  206. const auto stmt = R"(
  207. USE plato;
  208. SELECT *
  209. FROM Input MATCH_RECOGNIZE(
  210. AFTER MATCH SKIP PAST LAST ROW
  211. PATTERN (A)
  212. DEFINE A as A
  213. )
  214. )";
  215. auto r = MatchRecognizeSqlToYql(stmt);
  216. UNIT_ASSERT(r.IsOk());
  217. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  218. UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_PastLastRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
  219. }
  220. {
  221. const auto stmt = R"(
  222. USE plato;
  223. SELECT *
  224. FROM Input MATCH_RECOGNIZE(
  225. AFTER MATCH SKIP TO FIRST Y
  226. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  227. DEFINE A as A
  228. )
  229. )";
  230. auto r = MatchRecognizeSqlToYql(stmt);
  231. UNIT_ASSERT(r.IsOk());
  232. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  233. UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToFirst", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
  234. UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
  235. }
  236. {
  237. const auto stmt = R"(
  238. USE plato;
  239. SELECT *
  240. FROM Input MATCH_RECOGNIZE(
  241. AFTER MATCH SKIP TO FIRST T -- unknown pattern var
  242. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  243. DEFINE A as A
  244. )
  245. )";
  246. auto r = MatchRecognizeSqlToYql(stmt);
  247. UNIT_ASSERT(not r.IsOk());
  248. }
  249. {
  250. const auto stmt = R"(
  251. USE plato;
  252. SELECT *
  253. FROM Input MATCH_RECOGNIZE(
  254. AFTER MATCH SKIP TO LAST Y
  255. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  256. DEFINE A as A
  257. )
  258. )";
  259. auto r = MatchRecognizeSqlToYql(stmt);
  260. UNIT_ASSERT(r.IsOk());
  261. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  262. UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToLast", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
  263. UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
  264. }
  265. {
  266. const auto stmt = R"(
  267. USE plato;
  268. SELECT *
  269. FROM Input MATCH_RECOGNIZE(
  270. AFTER MATCH SKIP TO LAST T -- unknown pattern var
  271. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  272. DEFINE A as A
  273. )
  274. )";
  275. auto r = MatchRecognizeSqlToYql(stmt);
  276. UNIT_ASSERT(not r.IsOk());
  277. }
  278. {
  279. const auto stmt = R"(
  280. USE plato;
  281. SELECT *
  282. FROM Input MATCH_RECOGNIZE(
  283. AFTER MATCH SKIP TO Y
  284. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  285. DEFINE A as A
  286. )
  287. )";
  288. auto r = MatchRecognizeSqlToYql(stmt);
  289. UNIT_ASSERT(r.IsOk());
  290. auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
  291. UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_To", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
  292. UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
  293. }
  294. {
  295. const auto stmt = R"(
  296. USE plato;
  297. SELECT *
  298. FROM Input MATCH_RECOGNIZE(
  299. AFTER MATCH SKIP TO T -- unknown pattern var
  300. PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
  301. DEFINE A as A
  302. )
  303. )";
  304. auto r = MatchRecognizeSqlToYql(stmt);
  305. UNIT_ASSERT(not r.IsOk());
  306. }
  307. }
  308. Y_UNIT_TEST(row_pattern_initial) {
  309. const auto stmt = R"(
  310. USE plato;
  311. SELECT *
  312. FROM Input MATCH_RECOGNIZE(
  313. INITIAL
  314. PATTERN (A+ B* C?)
  315. DEFINE A as A
  316. )
  317. )";
  318. auto r = MatchRecognizeSqlToYql(stmt);
  319. UNIT_ASSERT(not r.IsOk());
  320. }
  321. Y_UNIT_TEST(row_pattern_seek) {
  322. const auto stmt = R"(
  323. USE plato;
  324. SELECT *
  325. FROM Input MATCH_RECOGNIZE(
  326. SEEK
  327. PATTERN (A+ B* C?)
  328. DEFINE A as A
  329. )
  330. )";
  331. auto r = MatchRecognizeSqlToYql(stmt);
  332. UNIT_ASSERT(not r.IsOk());
  333. }
  334. Y_UNIT_TEST(PatternSimple) {
  335. const auto stmt = R"(
  336. USE plato;
  337. SELECT *
  338. FROM Input MATCH_RECOGNIZE(
  339. PATTERN (A+ B* C?)
  340. DEFINE A as A
  341. )
  342. )";
  343. const auto& r = MatchRecognizeSqlToYql(stmt);
  344. UNIT_ASSERT(r.IsOk());
  345. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  346. UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
  347. UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 1);
  348. const auto& term = patternCallable->GetChild(1);
  349. UNIT_ASSERT(IsQuotedListOfSize(term, 3));
  350. }
  351. Y_UNIT_TEST(PatternMultiTerm) {
  352. const auto stmt = R"(
  353. USE plato;
  354. SELECT *
  355. FROM Input MATCH_RECOGNIZE(
  356. PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? I J ^)
  357. DEFINE A as A
  358. )
  359. )";
  360. const auto& r = MatchRecognizeSqlToYql(stmt);
  361. UNIT_ASSERT(r.IsOk());
  362. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  363. UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
  364. UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 4);
  365. const auto& lastTerm = patternCallable->GetChild(4);
  366. UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 5));
  367. }
  368. Y_UNIT_TEST(PatternWithParanthesis) {
  369. const auto stmt = R"(
  370. USE plato;
  371. SELECT *
  372. FROM Input MATCH_RECOGNIZE(
  373. PATTERN (
  374. A | ($ B)+ C D
  375. )
  376. DEFINE A as A
  377. )
  378. )";
  379. const auto& r = MatchRecognizeSqlToYql(stmt);
  380. UNIT_ASSERT(r.IsOk());
  381. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  382. UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
  383. UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 2);
  384. const auto& firstTerm = patternCallable->GetChild(1);
  385. UNIT_ASSERT(IsQuotedListOfSize(firstTerm, 1));
  386. const auto& lastTerm = patternCallable->GetChild(2);
  387. UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 3));
  388. const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0);
  389. UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 6));
  390. const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0);
  391. UNIT_ASSERT_EQUAL(nestedPattern->GetChildrenCount(), 1 + 1);
  392. UNIT_ASSERT_EQUAL(nestedPattern->GetChild(0)->GetContent(), "MatchRecognizePattern");
  393. UNIT_ASSERT(IsQuotedListOfSize(nestedPattern->GetChild(1), 2));
  394. }
  395. Y_UNIT_TEST(PatternManyAlternatives) {
  396. const auto stmt = R"(
  397. USE plato;
  398. SELECT *
  399. FROM Input MATCH_RECOGNIZE(
  400. PATTERN (
  401. (A B C D ) | (B A C D ) | (C B A D ) | (B C A D ) | (C A B D ) | (A C B D ) | (D A B C ) | (A D B C ) | (B A D C ) | (A B D C ) | (B D A C ) | (D B A C ) | (C D A B ) | (D C A B ) | (A D C B ) | (D A C B ) | (A C D B ) | (C A D B ) | (B C D A ) | (C B D A ) | (D C B A ) | (C D B A ) | (D B C A ) | (B D C A )
  402. )
  403. DEFINE A as A
  404. )
  405. )";
  406. UNIT_ASSERT(MatchRecognizeSqlToYql(stmt).IsOk());
  407. }
  408. Y_UNIT_TEST(PatternLimitedNesting) {
  409. constexpr size_t MaxNesting = 20;
  410. for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) {
  411. std::string pattern;
  412. for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
  413. pattern.push_back('(');
  414. pattern.push_back('A');
  415. for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
  416. pattern.push_back(')');
  417. const auto stmt = TString(R"(
  418. USE plato;
  419. SELECT *
  420. FROM Input MATCH_RECOGNIZE(
  421. PATTERN(
  422. )") + pattern + R"(
  423. )
  424. DEFINE A as A
  425. )
  426. )";
  427. const auto &r = MatchRecognizeSqlToYql(stmt);
  428. if (not extraNesting) {
  429. UNIT_ASSERT(r.IsOk());
  430. } else {
  431. UNIT_ASSERT(not r.IsOk());
  432. }
  433. }
  434. }
  435. Y_UNIT_TEST(PatternFactorQuantifiers) {
  436. auto makeRequest = [](const TString& factor) {
  437. return TString(R"(
  438. USE plato;
  439. SELECT *
  440. FROM Input MATCH_RECOGNIZE(
  441. PATTERN(
  442. )") + factor + R"(
  443. )
  444. DEFINE A as A
  445. )
  446. )";
  447. };
  448. auto getTheFactor = [](const NYql::TAstNode* root) {
  449. const auto& patternCallable = FindMatchRecognizeParam(root, "pattern");
  450. const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1);
  451. return NYql::NMatchRecognize::TRowPatternFactor{
  452. TString(), //primary var or subexpression, not used in this test
  453. FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin
  454. FromString<uint64_t>(factor->GetChild(2)->GetChild(1)->GetContent()), //QuantityMax
  455. FromString<bool>(factor->GetChild(3)->GetChild(1)->GetContent()), //Greedy
  456. false, //Output, not used in this test
  457. false, // Flag "Unused", not used in this test
  458. };
  459. };
  460. {
  461. //no quantifiers
  462. const auto stmt = makeRequest("A");
  463. const auto &r = MatchRecognizeSqlToYql(stmt);
  464. UNIT_ASSERT(r.IsOk());
  465. const auto& factor = getTheFactor(r.Root);
  466. UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
  467. UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
  468. UNIT_ASSERT(factor.Greedy);
  469. }
  470. {
  471. //optional greedy(default)
  472. const auto stmt = makeRequest("A?");
  473. const auto &r = MatchRecognizeSqlToYql(stmt);
  474. UNIT_ASSERT(r.IsOk());
  475. const auto& factor = getTheFactor(r.Root);
  476. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  477. UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
  478. UNIT_ASSERT(factor.Greedy);
  479. }
  480. {
  481. //optional reluctant
  482. const auto stmt = makeRequest("A??");
  483. const auto &r = MatchRecognizeSqlToYql(stmt);
  484. UNIT_ASSERT(r.IsOk());
  485. const auto& factor = getTheFactor(r.Root);
  486. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  487. UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
  488. UNIT_ASSERT(!factor.Greedy);
  489. }
  490. {
  491. //+ greedy(default)
  492. const auto stmt = makeRequest("A+");
  493. const auto &r = MatchRecognizeSqlToYql(stmt);
  494. UNIT_ASSERT(r.IsOk());
  495. const auto& factor = getTheFactor(r.Root);
  496. UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
  497. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  498. UNIT_ASSERT(factor.Greedy);
  499. }
  500. {
  501. //+ reluctant
  502. const auto stmt = makeRequest("A+?");
  503. const auto &r = MatchRecognizeSqlToYql(stmt);
  504. UNIT_ASSERT(r.IsOk());
  505. const auto& factor = getTheFactor(r.Root);
  506. UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
  507. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  508. UNIT_ASSERT(!factor.Greedy);
  509. }
  510. {
  511. //* greedy(default)
  512. const auto stmt = makeRequest("A*");
  513. const auto &r = MatchRecognizeSqlToYql(stmt);
  514. UNIT_ASSERT(r.IsOk());
  515. const auto& factor = getTheFactor(r.Root);
  516. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  517. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  518. UNIT_ASSERT(factor.Greedy);
  519. }
  520. {
  521. //* reluctant
  522. const auto stmt = makeRequest("A*?");
  523. const auto &r = MatchRecognizeSqlToYql(stmt);
  524. UNIT_ASSERT(r.IsOk());
  525. const auto& factor = getTheFactor(r.Root);
  526. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  527. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  528. UNIT_ASSERT(!factor.Greedy);
  529. }
  530. {
  531. //exact n
  532. const auto stmt = makeRequest("A{4}");
  533. const auto &r = MatchRecognizeSqlToYql(stmt);
  534. UNIT_ASSERT(r.IsOk());
  535. const auto& factor = getTheFactor(r.Root);
  536. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  537. UNIT_ASSERT_EQUAL(4, factor.QuantityMax);
  538. }
  539. {
  540. //from n to m greedy(default
  541. const auto stmt = makeRequest("A{4, 7}");
  542. const auto &r = MatchRecognizeSqlToYql(stmt);
  543. UNIT_ASSERT(r.IsOk());
  544. const auto& factor = getTheFactor(r.Root);
  545. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  546. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  547. UNIT_ASSERT(factor.Greedy);
  548. }
  549. {
  550. //from n to m reluctant
  551. const auto stmt = makeRequest("A{4,7}?");
  552. const auto &r = MatchRecognizeSqlToYql(stmt);
  553. UNIT_ASSERT(r.IsOk());
  554. const auto& factor = getTheFactor(r.Root);
  555. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  556. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  557. UNIT_ASSERT(!factor.Greedy);
  558. }
  559. {
  560. //at least n greedy(default)
  561. const auto stmt = makeRequest("A{4,}");
  562. const auto &r = MatchRecognizeSqlToYql(stmt);
  563. UNIT_ASSERT(r.IsOk());
  564. const auto& factor = getTheFactor(r.Root);
  565. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  566. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  567. UNIT_ASSERT(factor.Greedy);
  568. }
  569. {
  570. //at least n reluctant
  571. const auto stmt = makeRequest("A{4,}?");
  572. const auto &r = MatchRecognizeSqlToYql(stmt);
  573. UNIT_ASSERT(r.IsOk());
  574. const auto& factor = getTheFactor(r.Root);
  575. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  576. UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
  577. UNIT_ASSERT(!factor.Greedy);
  578. }
  579. {
  580. //at most m greedy(default)
  581. const auto stmt = makeRequest("A{,7}");
  582. const auto &r = MatchRecognizeSqlToYql(stmt);
  583. UNIT_ASSERT(r.IsOk());
  584. const auto& factor = getTheFactor(r.Root);
  585. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  586. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  587. UNIT_ASSERT(factor.Greedy);
  588. }
  589. {
  590. //at least n reluctant
  591. const auto stmt = makeRequest("A{,7}?");
  592. const auto &r = MatchRecognizeSqlToYql(stmt);
  593. UNIT_ASSERT(r.IsOk());
  594. const auto& factor = getTheFactor(r.Root);
  595. UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
  596. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  597. UNIT_ASSERT(!factor.Greedy);
  598. }
  599. {
  600. //quantifiers on subexpression
  601. const auto stmt = makeRequest("(A B+ C | D | ^){4,7}?");
  602. const auto &r = MatchRecognizeSqlToYql(stmt);
  603. UNIT_ASSERT(r.IsOk());
  604. const auto& factor = getTheFactor(r.Root);
  605. UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
  606. UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
  607. UNIT_ASSERT(!factor.Greedy);
  608. }
  609. }
  610. Y_UNIT_TEST(Permute) {
  611. const auto stmt = R"(
  612. USE plato;
  613. SELECT *
  614. FROM Input MATCH_RECOGNIZE(
  615. PATTERN (
  616. PERMUTE(A, B, C, D, E) --5 variables produce 5! permutations
  617. )
  618. DEFINE A as A
  619. )
  620. )";
  621. const auto& r = MatchRecognizeSqlToYql(stmt);
  622. UNIT_ASSERT(r.IsOk());
  623. const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
  624. const auto permutePattern = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1)->GetChild(0);
  625. UNIT_ASSERT(permutePattern->IsListOfSize(1 + 120)); //CallableName + 5!
  626. }
  627. Y_UNIT_TEST(PermuteTooMuch) {
  628. for (size_t n = 1; n <= 6 + 1; ++n) {
  629. std::vector<std::string> vars(n);
  630. std::generate(begin(vars), end(vars), [n = 0] () mutable { return "A" + std::to_string(n++);});
  631. const auto stmt = TString(R"(
  632. USE plato;
  633. SELECT *
  634. FROM Input MATCH_RECOGNIZE(
  635. PATTERN (
  636. PERMUTE( )" + std::accumulate(cbegin(vars) + 1, cend(vars), vars.front(),
  637. [](const std::string& acc, const std::string& v) {
  638. return acc + ", " + v;
  639. }) +
  640. R"(
  641. )
  642. )
  643. DEFINE A0 as A0
  644. )
  645. )"
  646. );
  647. const auto &r = MatchRecognizeSqlToYql(stmt);
  648. if (n <= 6) {
  649. UNIT_ASSERT(r.IsOk());
  650. } else {
  651. UNIT_ASSERT(!r.IsOk());
  652. }
  653. }
  654. }
  655. Y_UNIT_TEST(row_pattern_subset_clause) {
  656. //TODO https://st.yandex-team.ru/YQL-16186
  657. }
  658. Y_UNIT_TEST(Defines) {
  659. auto stmt = R"(
  660. USE plato;
  661. SELECT *
  662. FROM Input MATCH_RECOGNIZE(
  663. PATTERN ( Y Q L )
  664. DEFINE
  665. Y as true,
  666. Q as Q.V = "value",
  667. L as L.V = LAST(Q.T)
  668. )
  669. )";
  670. auto r = MatchRecognizeSqlToYql(stmt);
  671. UNIT_ASSERT(r.IsOk());
  672. const auto defines = FindMatchRecognizeParam(r.Root, "define");
  673. UNIT_ASSERT_VALUES_EQUAL(7, defines->GetChildrenCount());
  674. const auto varNames = defines->GetChild(3);
  675. UNIT_ASSERT(IsQuotedListOfSize(varNames, 3));
  676. UNIT_ASSERT_VALUES_EQUAL("Y", varNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
  677. UNIT_ASSERT_VALUES_EQUAL("Q", varNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
  678. UNIT_ASSERT_VALUES_EQUAL("L", varNames->GetChild(1)->GetChild(2)->GetChild(1)->GetContent());
  679. UNIT_ASSERT(IsLambda(defines->GetChild(4), 3));
  680. UNIT_ASSERT(IsLambda(defines->GetChild(5), 3));
  681. UNIT_ASSERT(IsLambda(defines->GetChild(6), 3));
  682. }
  683. Y_UNIT_TEST(AbsentRowPatternVariableInDefines) {
  684. auto getStatement = [](const TString &var) {
  685. return TStringBuilder() << R"(
  686. USE plato;
  687. SELECT *
  688. FROM Input MATCH_RECOGNIZE(
  689. PATTERN ( Q )
  690. DEFINE
  691. )" << var << " AS TRUE )";
  692. };
  693. UNIT_ASSERT(MatchRecognizeSqlToYql(getStatement("Q")).IsOk());
  694. UNIT_ASSERT(!MatchRecognizeSqlToYql(getStatement("Y")).IsOk());
  695. }
  696. Y_UNIT_TEST(CheckRequiredNavigationFunction) {
  697. TString stmtPrefix = R"(
  698. USE plato;
  699. SELECT *
  700. FROM Input MATCH_RECOGNIZE(
  701. PATTERN ( Y Q L )
  702. DEFINE
  703. L as L.V =
  704. )";
  705. //Be aware that right parenthesis is added at the end of the query as required
  706. UNIT_ASSERT(MatchRecognizeSqlToYql(stmtPrefix + "LAST(Q.dt) )").IsOk());
  707. UNIT_ASSERT(!MatchRecognizeSqlToYql(stmtPrefix + "Q.dt )").IsOk());
  708. }
  709. }