123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742 |
- #include "sql_ut.h"
- #include "match_recognize.h"
- #include <yql/essentials/providers/common/provider/yql_provider_names.h>
- #include <yql/essentials/core/sql_types/match_recognize.h>
- #include <yql/essentials/sql/sql.h>
- #include <util/generic/map.h>
- #include <library/cpp/testing/unittest/registar.h>
- #include <util/string/split.h>
- using namespace NSQLTranslation;
- NYql::TAstParseResult MatchRecognizeSqlToYql(const TString& query) {
- TString enablingPragma = R"(
- pragma FeatureR010="prototype";
- )";
- return SqlToYql(enablingPragma + query);
- }
- const NYql::TAstNode* FindMatchRecognizeParam(const NYql::TAstNode* root, TString name) {
- auto matchRecognizeBlock = FindNodeByChildAtomContent(root, 1, "match_recognize");
- UNIT_ASSERT(matchRecognizeBlock);
- auto paramNode = FindNodeByChildAtomContent(matchRecognizeBlock, 1, name);
- return paramNode->GetChild(2);
- }
- bool IsQuotedListOfSize(const NYql::TAstNode* node, ui32 size) {
- UNIT_ASSERT(node->IsListOfSize(2));
- if (!node->IsListOfSize(2))
- return false;
- UNIT_ASSERT_EQUAL(node->GetChild(0)->GetContent(), "quote");
- if (node->GetChild(0)->GetContent() != "quote")
- return false;
- UNIT_ASSERT_EQUAL(node->GetChild(1)->GetChildrenCount(), size);
- return node->GetChild(1)->IsListOfSize(size);
- }
- bool IsLambda(const NYql::TAstNode* node, ui32 numberOfArgs) {
- if (!node->IsListOfSize(3)) {
- return false;
- }
- if (!node->GetChild(0)->IsAtom() || node->GetChild(0)->GetContent() != "lambda") {
- return false;
- }
- return IsQuotedListOfSize(node->GetChild(1), numberOfArgs);
- }
- Y_UNIT_TEST_SUITE(MatchRecognize) {
- auto minValidMatchRecognizeSql = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN ( A )
- DEFINE A as A
- )
- )";
- Y_UNIT_TEST(EnabledWithPragma) {
- UNIT_ASSERT(not SqlToYql(minValidMatchRecognizeSql).IsOk());
- UNIT_ASSERT(MatchRecognizeSqlToYql(minValidMatchRecognizeSql).IsOk());
- }
- Y_UNIT_TEST(InputTableName) {
- auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
- UNIT_ASSERT(r.IsOk());
- auto input = FindMatchRecognizeParam(r.Root, "input");
- UNIT_ASSERT(input->IsAtom() && input->GetContent() == "core");
- }
- Y_UNIT_TEST(MatchRecognizeAndSample) {
- auto matchRecognizeAndSample = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN ( A )
- DEFINE A as A
- ) TABLESAMPLE BERNOULLI(1.0)
- )";
- UNIT_ASSERT(not MatchRecognizeSqlToYql(matchRecognizeAndSample).IsOk());
- }
- Y_UNIT_TEST(NoPartitionBy) {
- auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
- UNIT_ASSERT(r.IsOk());
- auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
- UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 0)); //empty tuple
- auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
- UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 0)); //empty tuple
- }
- Y_UNIT_TEST(PartitionBy) {
- auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PARTITION BY col1 as c1, ~CAST(col1 as Int32) as invertedC1, c2
- PATTERN ( A )
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto partitionKeySelector = FindMatchRecognizeParam(r.Root, "partitionKeySelector");
- UNIT_ASSERT(IsQuotedListOfSize(partitionKeySelector->GetChild(2), 3));
- auto partitionColumns = FindMatchRecognizeParam(r.Root, "partitionColumns");
- UNIT_ASSERT(IsQuotedListOfSize(partitionColumns, 3));
- //TODO check partitioner lambdas(alias/no alias)
- }
- Y_UNIT_TEST(NoOrderBy) {
- auto r = MatchRecognizeSqlToYql(minValidMatchRecognizeSql);
- UNIT_ASSERT(r.IsOk());
- auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
- UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(1));
- UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "Void");
- }
- Y_UNIT_TEST(OrderBy) {
- auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- ORDER BY col1, ~CAST(col1 as Int32), c2
- PATTERN ( A )
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto sortTraits = FindMatchRecognizeParam(r.Root, "sortTraits");
- UNIT_ASSERT(sortTraits && sortTraits->IsListOfSize(4));
- UNIT_ASSERT(sortTraits->GetChild(0)->GetContent() == "SortTraits");
- UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(2), 3));
- UNIT_ASSERT(IsQuotedListOfSize(sortTraits->GetChild(3)->GetChild(2), 3));
- }
- Y_UNIT_TEST(Measures) {
- auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- MEASURES
- Last(Q.dt) as T,
- First(Y.key) as Key
- PATTERN ( Y Q )
- DEFINE Y as true
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto measures = FindMatchRecognizeParam(r.Root, "measures");
- UNIT_ASSERT_VALUES_EQUAL(7, measures->GetChildrenCount());
- const auto columnNames = measures->GetChild(3);
- UNIT_ASSERT(IsQuotedListOfSize(columnNames, 2));
- UNIT_ASSERT_VALUES_EQUAL("T", columnNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
- UNIT_ASSERT_VALUES_EQUAL("Key", columnNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
- UNIT_ASSERT(IsQuotedListOfSize(measures->GetChild(4), 2));
- UNIT_ASSERT(IsQuotedListOfSize(measures->GetChild(5), 2));
- }
- Y_UNIT_TEST(RowsPerMatch) {
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- ONE ROW PER MATCH
- PATTERN (A)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
- UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- ALL ROWS PER MATCH
- PATTERN (A)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- }
- { //default
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN (A)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto rowsPerMatch = FindMatchRecognizeParam(r.Root, "rowsPerMatch");
- UNIT_ASSERT_VALUES_EQUAL("RowsPerMatch_OneRow", rowsPerMatch->GetChild(1)->GetContent());
- }
- }
- Y_UNIT_TEST(SkipAfterMatch) {
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP TO NEXT ROW
- PATTERN (A)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
- UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_NextRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP PAST LAST ROW
- PATTERN (A)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
- UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_PastLastRow", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP TO FIRST Y
- PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
- UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToFirst", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
- UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP TO FIRST T -- unknown pattern var
- PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(not r.IsOk());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP TO LAST Y
- PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
- UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_ToLast", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
- UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP TO LAST T -- unknown pattern var
- PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(not r.IsOk());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP TO Y
- PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- auto skipTo = FindMatchRecognizeParam(r.Root, "skipTo");
- UNIT_ASSERT_VALUES_EQUAL("AfterMatchSkip_To", skipTo->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
- UNIT_ASSERT_VALUES_EQUAL("Y", skipTo->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
- }
- {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- AFTER MATCH SKIP TO T -- unknown pattern var
- PATTERN (A | (U | (Q | Y)) | ($ B)+ C D)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(not r.IsOk());
- }
- }
- Y_UNIT_TEST(row_pattern_initial) {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- INITIAL
- PATTERN (A+ B* C?)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(not r.IsOk());
- }
- Y_UNIT_TEST(row_pattern_seek) {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- SEEK
- PATTERN (A+ B* C?)
- DEFINE A as A
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(not r.IsOk());
- }
- Y_UNIT_TEST(PatternSimple) {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN (A+ B* C?)
- DEFINE A as A
- )
- )";
- const auto& r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
- UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
- UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 1);
- const auto& term = patternCallable->GetChild(1);
- UNIT_ASSERT(IsQuotedListOfSize(term, 3));
- }
- Y_UNIT_TEST(PatternMultiTerm) {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN ($ A+ B{1,3} | C{3} D{1,4} E? | F?? | G{3,}? H*? I J ^)
- DEFINE A as A
- )
- )";
- const auto& r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
- UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
- UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 4);
- const auto& lastTerm = patternCallable->GetChild(4);
- UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 5));
- }
- Y_UNIT_TEST(PatternWithParanthesis) {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN (
- A | ($ B)+ C D
- )
- DEFINE A as A
- )
- )";
- const auto& r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
- UNIT_ASSERT_EQUAL(patternCallable->GetChild(0)->GetContent(), "MatchRecognizePattern");
- UNIT_ASSERT_EQUAL(patternCallable->GetChildrenCount(), 1 + 2);
- const auto& firstTerm = patternCallable->GetChild(1);
- UNIT_ASSERT(IsQuotedListOfSize(firstTerm, 1));
- const auto& lastTerm = patternCallable->GetChild(2);
- UNIT_ASSERT(IsQuotedListOfSize(lastTerm, 3));
- const auto& firstFactorOfLastTerm = lastTerm->GetChild(1)->GetChild(0);
- UNIT_ASSERT(IsQuotedListOfSize(firstFactorOfLastTerm, 6));
- const auto nestedPattern = firstFactorOfLastTerm->GetChild(1)->GetChild(0);
- UNIT_ASSERT_EQUAL(nestedPattern->GetChildrenCount(), 1 + 1);
- UNIT_ASSERT_EQUAL(nestedPattern->GetChild(0)->GetContent(), "MatchRecognizePattern");
- UNIT_ASSERT(IsQuotedListOfSize(nestedPattern->GetChild(1), 2));
- }
- Y_UNIT_TEST(PatternManyAlternatives) {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN (
- (A B C D ) | (B A C D ) | (C B A D ) | (B C A D ) | (C A B D ) | (A C B D ) | (D A B C ) | (A D B C ) | (B A D C ) | (A B D C ) | (B D A C ) | (D B A C ) | (C D A B ) | (D C A B ) | (A D C B ) | (D A C B ) | (A C D B ) | (C A D B ) | (B C D A ) | (C B D A ) | (D C B A ) | (C D B A ) | (D B C A ) | (B D C A )
- )
- DEFINE A as A
- )
- )";
- UNIT_ASSERT(MatchRecognizeSqlToYql(stmt).IsOk());
- }
- Y_UNIT_TEST(PatternLimitedNesting) {
- constexpr size_t MaxNesting = 20;
- for (size_t extraNesting = 0; extraNesting <= 1; ++extraNesting) {
- std::string pattern;
- for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
- pattern.push_back('(');
- pattern.push_back('A');
- for (size_t i = 0; i != MaxNesting + extraNesting; ++i)
- pattern.push_back(')');
- const auto stmt = TString(R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN(
- )") + pattern + R"(
- )
- DEFINE A as A
- )
- )";
- const auto &r = MatchRecognizeSqlToYql(stmt);
- if (not extraNesting) {
- UNIT_ASSERT(r.IsOk());
- } else {
- UNIT_ASSERT(not r.IsOk());
- }
- }
- }
- Y_UNIT_TEST(PatternFactorQuantifiers) {
- auto makeRequest = [](const TString& factor) {
- return TString(R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN(
- )") + factor + R"(
- )
- DEFINE A as A
- )
- )";
- };
- auto getTheFactor = [](const NYql::TAstNode* root) {
- const auto& patternCallable = FindMatchRecognizeParam(root, "pattern");
- const auto& factor = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1);
- return NYql::NMatchRecognize::TRowPatternFactor{
- TString(), //primary var or subexpression, not used in this test
- FromString<uint64_t>(factor->GetChild(1)->GetChild(1)->GetContent()), //QuantityMin
- FromString<uint64_t>(factor->GetChild(2)->GetChild(1)->GetContent()), //QuantityMax
- FromString<bool>(factor->GetChild(3)->GetChild(1)->GetContent()), //Greedy
- false, //Output, not used in this test
- false, // Flag "Unused", not used in this test
- };
- };
- {
- //no quantifiers
- const auto stmt = makeRequest("A");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
- UNIT_ASSERT(factor.Greedy);
- }
- {
- //optional greedy(default)
- const auto stmt = makeRequest("A?");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
- UNIT_ASSERT(factor.Greedy);
- }
- {
- //optional reluctant
- const auto stmt = makeRequest("A??");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(1, factor.QuantityMax);
- UNIT_ASSERT(!factor.Greedy);
- }
- {
- //+ greedy(default)
- const auto stmt = makeRequest("A+");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
- UNIT_ASSERT(factor.Greedy);
- }
- {
- //+ reluctant
- const auto stmt = makeRequest("A+?");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(1, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
- UNIT_ASSERT(!factor.Greedy);
- }
- {
- //* greedy(default)
- const auto stmt = makeRequest("A*");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
- UNIT_ASSERT(factor.Greedy);
- }
- {
- //* reluctant
- const auto stmt = makeRequest("A*?");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
- UNIT_ASSERT(!factor.Greedy);
- }
- {
- //exact n
- const auto stmt = makeRequest("A{4}");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(4, factor.QuantityMax);
- }
- {
- //from n to m greedy(default
- const auto stmt = makeRequest("A{4, 7}");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
- UNIT_ASSERT(factor.Greedy);
- }
- {
- //from n to m reluctant
- const auto stmt = makeRequest("A{4,7}?");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
- UNIT_ASSERT(!factor.Greedy);
- }
- {
- //at least n greedy(default)
- const auto stmt = makeRequest("A{4,}");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
- UNIT_ASSERT(factor.Greedy);
- }
- {
- //at least n reluctant
- const auto stmt = makeRequest("A{4,}?");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(std::numeric_limits<uint64_t>::max(), factor.QuantityMax);
- UNIT_ASSERT(!factor.Greedy);
- }
- {
- //at most m greedy(default)
- const auto stmt = makeRequest("A{,7}");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
- UNIT_ASSERT(factor.Greedy);
- }
- {
- //at least n reluctant
- const auto stmt = makeRequest("A{,7}?");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(0, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
- UNIT_ASSERT(!factor.Greedy);
- }
- {
- //quantifiers on subexpression
- const auto stmt = makeRequest("(A B+ C | D | ^){4,7}?");
- const auto &r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& factor = getTheFactor(r.Root);
- UNIT_ASSERT_EQUAL(4, factor.QuantityMin);
- UNIT_ASSERT_EQUAL(7, factor.QuantityMax);
- UNIT_ASSERT(!factor.Greedy);
- }
- }
- Y_UNIT_TEST(Permute) {
- const auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN (
- PERMUTE(A, B, C, D, E) --5 variables produce 5! permutations
- )
- DEFINE A as A
- )
- )";
- const auto& r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto& patternCallable = FindMatchRecognizeParam(r.Root, "pattern");
- const auto permutePattern = patternCallable->GetChild(1)->GetChild(1)->GetChild(0)->GetChild(1)->GetChild(0);
- UNIT_ASSERT(permutePattern->IsListOfSize(1 + 120)); //CallableName + 5!
- }
- Y_UNIT_TEST(PermuteTooMuch) {
- for (size_t n = 1; n <= 6 + 1; ++n) {
- std::vector<std::string> vars(n);
- std::generate(begin(vars), end(vars), [n = 0] () mutable { return "A" + std::to_string(n++);});
- const auto stmt = TString(R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN (
- PERMUTE( )" + std::accumulate(cbegin(vars) + 1, cend(vars), vars.front(),
- [](const std::string& acc, const std::string& v) {
- return acc + ", " + v;
- }) +
- R"(
- )
- )
- DEFINE A0 as A0
- )
- )"
- );
- const auto &r = MatchRecognizeSqlToYql(stmt);
- if (n <= 6) {
- UNIT_ASSERT(r.IsOk());
- } else {
- UNIT_ASSERT(!r.IsOk());
- }
- }
- }
- Y_UNIT_TEST(row_pattern_subset_clause) {
- //TODO https://st.yandex-team.ru/YQL-16186
- }
- Y_UNIT_TEST(Defines) {
- auto stmt = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN ( Y Q L )
- DEFINE
- Y as true,
- Q as Q.V = "value",
- L as L.V = LAST(Q.T)
- )
- )";
- auto r = MatchRecognizeSqlToYql(stmt);
- UNIT_ASSERT(r.IsOk());
- const auto defines = FindMatchRecognizeParam(r.Root, "define");
- UNIT_ASSERT_VALUES_EQUAL(7, defines->GetChildrenCount());
- const auto varNames = defines->GetChild(3);
- UNIT_ASSERT(IsQuotedListOfSize(varNames, 3));
- UNIT_ASSERT_VALUES_EQUAL("Y", varNames->GetChild(1)->GetChild(0)->GetChild(1)->GetContent());
- UNIT_ASSERT_VALUES_EQUAL("Q", varNames->GetChild(1)->GetChild(1)->GetChild(1)->GetContent());
- UNIT_ASSERT_VALUES_EQUAL("L", varNames->GetChild(1)->GetChild(2)->GetChild(1)->GetContent());
- UNIT_ASSERT(IsLambda(defines->GetChild(4), 3));
- UNIT_ASSERT(IsLambda(defines->GetChild(5), 3));
- UNIT_ASSERT(IsLambda(defines->GetChild(6), 3));
- }
- Y_UNIT_TEST(AbsentRowPatternVariableInDefines) {
- auto getStatement = [](const TString &var) {
- return TStringBuilder() << R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN ( Q )
- DEFINE
- )" << var << " AS TRUE )";
- };
- UNIT_ASSERT(MatchRecognizeSqlToYql(getStatement("Q")).IsOk());
- UNIT_ASSERT(!MatchRecognizeSqlToYql(getStatement("Y")).IsOk());
- }
- Y_UNIT_TEST(CheckRequiredNavigationFunction) {
- TString stmtPrefix = R"(
- USE plato;
- SELECT *
- FROM Input MATCH_RECOGNIZE(
- PATTERN ( Y Q L )
- DEFINE
- L as L.V =
- )";
- //Be aware that right parenthesis is added at the end of the query as required
- UNIT_ASSERT(MatchRecognizeSqlToYql(stmtPrefix + "LAST(Q.dt) )").IsOk());
- UNIT_ASSERT(!MatchRecognizeSqlToYql(stmtPrefix + "Q.dt )").IsOk());
- }
- }
|