123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382 |
- #include "yql_constraint.h"
- #include "yql_expr.h"
- #include <util/digest/murmur.h>
- #include <util/generic/utility.h>
- #include <util/generic/algorithm.h>
- #include <util/string/join.h>
- #include <algorithm>
- #include <iterator>
- namespace NYql {
- TConstraintNode::TConstraintNode(TExprContext& ctx, std::string_view name)
- : Hash_(MurmurHash<ui64>(name.data(), name.size()))
- , Name_(ctx.AppendString(name))
- {
- }
- TConstraintNode::TConstraintNode(TConstraintNode&& constr)
- : Hash_(constr.Hash_)
- , Name_(constr.Name_)
- {
- constr.Hash_ = 0;
- constr.Name_ = {};
- }
- void TConstraintNode::Out(IOutputStream& out) const {
- out.Write(Name_);
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- TPartOfConstraintBase::TPartOfConstraintBase(TExprContext& ctx, std::string_view name)
- : TConstraintNode(ctx, name)
- {}
- TConstraintWithFieldsNode::TConstraintWithFieldsNode(TExprContext& ctx, std::string_view name)
- : TPartOfConstraintBase(ctx, name)
- {}
- const TTypeAnnotationNode* TPartOfConstraintBase::GetSubTypeByPath(const TPathType& path, const TTypeAnnotationNode& type) {
- if (path.empty() && ETypeAnnotationKind::Optional != type.GetKind())
- return &type;
- const auto tail = [](const TPathType& path) {
- auto p(path);
- p.pop_front();
- return p;
- };
- switch (type.GetKind()) {
- case ETypeAnnotationKind::Optional:
- return GetSubTypeByPath(path, *type.Cast<TOptionalExprType>()->GetItemType());
- case ETypeAnnotationKind::List: // TODO: Remove later: temporary stub for single AsList in FlatMap and same cases.
- return GetSubTypeByPath(path, *type.Cast<TListExprType>()->GetItemType());
- case ETypeAnnotationKind::Struct:
- if (const auto itemType = type.Cast<TStructExprType>()->FindItemType(path.front()))
- return GetSubTypeByPath(tail(path), *itemType);
- break;
- case ETypeAnnotationKind::Tuple:
- if (const auto index = TryFromString<ui64>(TStringBuf(path.front())))
- if (const auto typleType = type.Cast<TTupleExprType>(); typleType->GetSize() > *index)
- return GetSubTypeByPath(tail(path), *typleType->GetItems()[*index]);
- break;
- case ETypeAnnotationKind::Multi:
- if (const auto index = TryFromString<ui64>(TStringBuf(path.front())))
- if (const auto multiType = type.Cast<TMultiExprType>(); multiType->GetSize() > *index)
- return GetSubTypeByPath(tail(path), *multiType->GetItems()[*index]);
- break;
- case ETypeAnnotationKind::Variant:
- return GetSubTypeByPath(path, *type.Cast<TVariantExprType>()->GetUnderlyingType());
- case ETypeAnnotationKind::Dict:
- if (const auto index = TryFromString<ui8>(TStringBuf(path.front())))
- switch (*index) {
- case 0U: return GetSubTypeByPath(tail(path), *type.Cast<TDictExprType>()->GetKeyType());
- case 1U: return GetSubTypeByPath(tail(path), *type.Cast<TDictExprType>()->GetPayloadType());
- default: break;
- }
- break;
- default:
- break;
- }
- return nullptr;
- }
- bool TPartOfConstraintBase::HasDuplicates(const TSetOfSetsType& sets) {
- for (auto ot = sets.cbegin(); sets.cend() != ot; ++ot) {
- for (auto it = sets.cbegin(); sets.cend() != it; ++it) {
- if (ot->size() < it->size() && std::all_of(ot->cbegin(), ot->cend(), [it](const TPathType& path) { return it->contains(path); }))
- return true;
- }
- }
- return false;
- }
- NYT::TNode TPartOfConstraintBase::PathToNode(const TPartOfConstraintBase::TPathType& path) {
- if (1U == path.size())
- return TStringBuf(path.front());
- return std::accumulate(path.cbegin(), path.cend(),
- NYT::TNode::CreateList(),
- [](NYT::TNode node, std::string_view p) -> NYT::TNode { return std::move(node).Add(TStringBuf(p)); }
- );
- };
- NYT::TNode TPartOfConstraintBase::SetToNode(const TPartOfConstraintBase::TSetType& set, bool withShortcut) {
- if (withShortcut && 1U == set.size() && 1U == set.front().size())
- return TStringBuf(set.front().front());
- return std::accumulate(set.cbegin(), set.cend(),
- NYT::TNode::CreateList(),
- [](NYT::TNode node, const TPathType& path) -> NYT::TNode { return std::move(node).Add(PathToNode(path)); }
- );
- };
- NYT::TNode TPartOfConstraintBase::SetOfSetsToNode(const TPartOfConstraintBase::TSetOfSetsType& sets) {
- return std::accumulate(sets.cbegin(), sets.cend(),
- NYT::TNode::CreateList(),
- [](NYT::TNode node, const TSetType& s) {
- return std::move(node).Add(TPartOfConstraintBase::SetToNode(s, true));
- });
- }
- TPartOfConstraintBase::TPathType TPartOfConstraintBase::NodeToPath(TExprContext& ctx, const NYT::TNode& node) {
- if (node.IsString())
- return TPartOfConstraintBase::TPathType{ctx.AppendString(node.AsString())};
- TPartOfConstraintBase::TPathType path;
- for (const auto& col : node.AsList()) {
- path.emplace_back(ctx.AppendString(col.AsString()));
- }
- return path;
- };
- TPartOfConstraintBase::TSetType TPartOfConstraintBase::NodeToSet(TExprContext& ctx, const NYT::TNode& node) {
- if (node.IsString())
- return TPartOfConstraintBase::TSetType{TPartOfConstraintBase::TPathType(1U, ctx.AppendString(node.AsString()))};
- TPartOfConstraintBase::TSetType set;
- for (const auto& col : node.AsList()) {
- set.insert_unique(NodeToPath(ctx, col));
- }
- return set;
- };
- TPartOfConstraintBase::TSetOfSetsType TPartOfConstraintBase::NodeToSetOfSets(TExprContext& ctx, const NYT::TNode& node) {
- TSetOfSetsType sets;
- for (const auto& s : node.AsList()) {
- sets.insert_unique(NodeToSet(ctx, s));
- }
- return sets;
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- const TConstraintNode* TConstraintSet::GetConstraint(std::string_view name) const {
- const auto it = std::lower_bound(Constraints_.cbegin(), Constraints_.cend(), name, TConstraintNode::TCompare());
- if (it != Constraints_.cend() && (*it)->GetName() == name) {
- return *it;
- }
- return nullptr;
- }
- void TConstraintSet::AddConstraint(const TConstraintNode* node) {
- if (!node) {
- return;
- }
- const auto it = std::lower_bound(Constraints_.begin(), Constraints_.end(), node, TConstraintNode::TCompare());
- if (it == Constraints_.end() || (*it)->GetName() != node->GetName()) {
- Constraints_.insert(it, node);
- } else {
- Y_ENSURE(node->Equals(**it), "Adding unequal constraint: " << *node << " != " << **it);
- }
- }
- const TConstraintNode* TConstraintSet::RemoveConstraint(std::string_view name) {
- const TConstraintNode* res = nullptr;
- const auto it = std::lower_bound(Constraints_.begin(), Constraints_.end(), name, TConstraintNode::TCompare());
- if (it != Constraints_.end() && (*it)->GetName() == name) {
- res = *it;
- Constraints_.erase(it);
- }
- return res;
- }
- void TConstraintSet::Out(IOutputStream& out) const {
- out.Write('{');
- bool first = true;
- for (const auto& c: Constraints_) {
- if (!first)
- out.Write(',');
- out << *c;
- first = false;
- }
- out.Write('}');
- }
- void TConstraintSet::ToJson(NJson::TJsonWriter& writer) const {
- writer.OpenMap();
- for (const auto& node : Constraints_) {
- writer.WriteKey(node->GetName());
- node->ToJson(writer);
- }
- writer.CloseMap();
- }
- NYT::TNode TConstraintSet::ToYson() const {
- auto res = NYT::TNode::CreateMap();
- for (const auto& node : Constraints_) {
- auto serialized = node->ToYson();
- YQL_ENSURE(!serialized.IsUndefined(), "Cannot serialize " << node->GetName() << " constraint");
- res[node->GetName()] = std::move(serialized);
- }
- return res;
- }
- bool TConstraintSet::FilterConstraints(const TPredicate& predicate) {
- const auto size = Constraints_.size();
- for (auto it = Constraints_.begin(); Constraints_.end() != it;)
- if (predicate((*it)->GetName()))
- ++it;
- else
- it = Constraints_.erase(it);
- return Constraints_.size() != size;
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- namespace {
- size_t GetElementsCount(const TTypeAnnotationNode* type) {
- if (type) {
- switch (type->GetKind()) {
- case ETypeAnnotationKind::Tuple: return type->Cast<TTupleExprType>()->GetSize();
- case ETypeAnnotationKind::Multi: return type->Cast<TMultiExprType>()->GetSize();
- case ETypeAnnotationKind::Struct: return type->Cast<TStructExprType>()->GetSize();
- default: break;
- }
- }
- return 0U;
- }
- std::deque<std::string_view> GetAllItemTypeFields(const TTypeAnnotationNode* type, TExprContext& ctx) {
- std::deque<std::string_view> fields;
- if (type) {
- switch (type->GetKind()) {
- case ETypeAnnotationKind::Struct:
- if (const auto structType = type->Cast<TStructExprType>()) {
- fields.resize(structType->GetSize());
- std::transform(structType->GetItems().cbegin(), structType->GetItems().cend(), fields.begin(), std::bind(&TItemExprType::GetName, std::placeholders::_1));
- }
- break;
- case ETypeAnnotationKind::Tuple:
- if (const auto size = type->Cast<TTupleExprType>()->GetSize()) {
- fields.resize(size);
- ui32 i = 0U;
- std::generate(fields.begin(), fields.end(), [&]() { return ctx.GetIndexAsString(i++); });
- }
- break;
- case ETypeAnnotationKind::Multi:
- if (const auto size = type->Cast<TMultiExprType>()->GetSize()) {
- fields.resize(size);
- ui32 i = 0U;
- std::generate(fields.begin(), fields.end(), [&]() { return ctx.GetIndexAsString(i++); });
- }
- break;
- default:
- break;
- }
- }
- return fields;
- }
- TPartOfConstraintBase::TSetOfSetsType MakeFullSet(const TPartOfConstraintBase::TSetType& keys) {
- TPartOfConstraintBase::TSetOfSetsType sets;
- sets.reserve(sets.size());
- for (const auto& key : keys)
- sets.insert_unique(TPartOfConstraintBase::TSetType{key});
- return sets;
- }
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- TSortedConstraintNode::TSortedConstraintNode(TExprContext& ctx, TContainerType&& content)
- : TConstraintWithFieldsT(ctx, Name())
- , Content_(std::move(content))
- {
- YQL_ENSURE(!Content_.empty());
- for (const auto& c : Content_) {
- YQL_ENSURE(!c.first.empty());
- for (const auto& path : c.first)
- Hash_ = std::accumulate(path.cbegin(), path.cend(), c.second ? Hash_ : ~Hash_, [](ui64 hash, const std::string_view& field) { return MurmurHash<ui64>(field.data(), field.size(), hash); });
- }
- }
- TSortedConstraintNode::TSortedConstraintNode(TExprContext& ctx, const NYT::TNode& serialized)
- : TSortedConstraintNode(ctx, NodeToContainer(ctx, serialized))
- {
- }
- TSortedConstraintNode::TContainerType TSortedConstraintNode::NodeToContainer(TExprContext& ctx, const NYT::TNode& serialized) {
- TSortedConstraintNode::TContainerType sorted;
- try {
- for (const auto& pair : serialized.AsList()) {
- TPartOfConstraintBase::TSetType set = TPartOfConstraintBase::NodeToSet(ctx, pair.AsList().front());
- sorted.emplace_back(std::move(set), pair.AsList().back().AsBool());
- }
- } catch (...) {
- YQL_ENSURE(false, "Cannot deserialize " << Name() << " constraint: " << CurrentExceptionMessage());
- }
- return sorted;
- }
- TSortedConstraintNode::TSortedConstraintNode(TSortedConstraintNode&&) = default;
- bool TSortedConstraintNode::Equals(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (const auto c = dynamic_cast<const TSortedConstraintNode*>(&node)) {
- return GetContent() == c->GetContent();
- }
- return false;
- }
- bool TSortedConstraintNode::Includes(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (GetName() != node.GetName()) {
- return false;
- }
- const auto& content = static_cast<const TSortedConstraintNode&>(node).GetContent();
- if (content.size() > Content_.size())
- return false;
- for (TContainerType::size_type i = 0U; i < content.size(); ++i) {
- if (Content_[i].second != content[i].second ||
- !(std::includes(Content_[i].first.cbegin(), Content_[i].first.cend(), content[i].first.cbegin(), content[i].first.cend()) || std::includes(content[i].first.cbegin(), content[i].first.cend(), Content_[i].first.cbegin(), Content_[i].first.cend())))
- return false;
- }
- return true;
- }
- void TSortedConstraintNode::Out(IOutputStream& out) const {
- TConstraintNode::Out(out);
- out.Write('(');
- bool first = true;
- for (const auto& c : Content_) {
- if (first)
- first = false;
- else
- out.Write(';');
- out.Write(JoinSeq(',', c.first));
- out.Write('[');
- out.Write(c.second ? "asc" : "desc");
- out.Write(']');
- }
- out.Write(')');
- }
- void TSortedConstraintNode::ToJson(NJson::TJsonWriter& out) const {
- out.OpenArray();
- for (const auto& c : Content_) {
- out.OpenArray();
- out.Write(JoinSeq(';', c.first));
- out.Write(c.second);
- out.CloseArray();
- }
- out.CloseArray();
- }
- NYT::TNode TSortedConstraintNode::ToYson() const {
- return std::accumulate(Content_.cbegin(), Content_.cend(),
- NYT::TNode::CreateList(),
- [](NYT::TNode node, const std::pair<TSetType, bool>& pair) {
- return std::move(node).Add(NYT::TNode::CreateList().Add(TPartOfConstraintBase::SetToNode(pair.first, false)).Add(pair.second));
- });
- }
- bool TSortedConstraintNode::IsPrefixOf(const TSortedConstraintNode& node) const {
- return node.Includes(*this);
- }
- bool TSortedConstraintNode::StartsWith(const TSetType& prefix) const {
- auto set = prefix;
- for (const auto& key : Content_) {
- bool found = false;
- std::for_each(key.first.cbegin(), key.first.cend(), [&set, &found] (const TPathType& path) {
- if (const auto it = set.find(path); set.cend() != it) {
- set.erase(it);
- found = true;
- }
- });
- if (!found)
- break;
- }
- return set.empty();
- }
- TPartOfConstraintBase::TSetType TSortedConstraintNode::GetFullSet() const {
- TSetType set;
- set.reserve(Content_.size());
- for (const auto& key : Content_)
- set.insert_unique(key.first.cbegin(), key.first.cend());
- return set;
- }
- void TSortedConstraintNode::FilterUncompleteReferences(TSetType& references) const {
- TSetType complete;
- complete.reserve(references.size());
- for (const auto& item : Content_) {
- bool found = false;
- for (const auto& path : item.first) {
- if (references.contains(path)) {
- found = true;
- complete.insert_unique(path);
- }
- }
- if (!found)
- break;
- }
- references = std::move(complete);
- }
- const TSortedConstraintNode* TSortedConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) {
- if (constraints.empty()) {
- return nullptr;
- }
- if (constraints.size() == 1) {
- return constraints.front()->GetConstraint<TSortedConstraintNode>();
- }
- std::optional<TContainerType> content;
- for (size_t i = 0U; i < constraints.size(); ++i) {
- if (const auto sort = constraints[i]->GetConstraint<TSortedConstraintNode>()) {
- const auto& nextContent = sort->GetContent();
- if (content) {
- const auto size = std::min(content->size(), nextContent.size());
- content->resize(size);
- for (auto j = 0U; j < size; ++j) {
- auto& one = (*content)[j];
- auto& two = nextContent[j];
- TSetType common;
- common.reserve(std::min(one.first.size(), two.first.size()));
- std::set_intersection(one.first.cbegin(), one.first.cend(), two.first.cbegin(), two.first.cend(), std::back_inserter(common));
- if (common.empty() || one.second != two.second) {
- content->resize(j);
- break;
- } else
- one.first = std::move(common);
- }
- if (content->empty())
- break;
- } else {
- content = nextContent;
- }
- } else if (!constraints[i]->GetConstraint<TEmptyConstraintNode>()) {
- content.reset();
- break;
- }
- }
- return !content || content->empty() ? nullptr : ctx.MakeConstraint<TSortedConstraintNode>(std::move(*content));
- }
- const TSortedConstraintNode* TSortedConstraintNode::MakeCommon(const TSortedConstraintNode* other, TExprContext& ctx) const {
- if (!other) {
- return nullptr;
- } else if (this == other) {
- return this;
- }
- auto content = other->GetContent();
- const auto size = std::min(content.size(), Content_.size());
- content.resize(size);
- for (auto j = 0U; j < size; ++j) {
- auto& one = content[j];
- auto& two = Content_[j];
- TSetType common;
- common.reserve(std::min(one.first.size(), two.first.size()));
- std::set_intersection(one.first.cbegin(), one.first.cend(), two.first.cbegin(), two.first.cend(), std::back_inserter(common));
- if (common.empty() || one.second != two.second) {
- content.resize(j);
- break;
- } else
- one.first = std::move(common);
- }
- return content.empty() ? nullptr : ctx.MakeConstraint<TSortedConstraintNode>(std::move(content));
- }
- const TSortedConstraintNode* TSortedConstraintNode::CutPrefix(size_t newPrefixLength, TExprContext& ctx) const {
- if (!newPrefixLength)
- return nullptr;
- if (newPrefixLength >= Content_.size())
- return this;
- auto content = Content_;
- content.resize(newPrefixLength);
- return ctx.MakeConstraint<TSortedConstraintNode>(std::move(content));
- }
- const TConstraintWithFieldsNode* TSortedConstraintNode::DoFilterFields(TExprContext& ctx, const TPathFilter& filter) const {
- if (!filter)
- return this;
- TContainerType sorted;
- sorted.reserve(Content_.size());
- for (const auto& item : Content_) {
- TSetType newSet;
- newSet.reserve(item.first.size());
- for (const auto& path : item.first) {
- if (filter(path))
- newSet.insert_unique(path);
- }
- if (newSet.empty())
- break;
- else
- sorted.emplace_back(std::move(newSet), item.second);
- }
- return sorted.empty() ? nullptr : ctx.MakeConstraint<TSortedConstraintNode>(std::move(sorted));
- }
- const TConstraintWithFieldsNode* TSortedConstraintNode::DoRenameFields(TExprContext& ctx, const TPathReduce& reduce) const {
- if (!reduce)
- return this;
- TContainerType sorted;
- sorted.reserve(Content_.size());
- for (const auto& item : Content_) {
- TSetType newSet;
- newSet.reserve(item.first.size());
- for (const auto& path : item.first) {
- if (const auto& newPaths = reduce(path); !newPaths.empty())
- newSet.insert_unique(newPaths.cbegin(), newPaths.cend());
- }
- if (newSet.empty())
- break;
- else
- sorted.emplace_back(std::move(newSet), item.second);
- }
- return sorted.empty() ? nullptr : ctx.MakeConstraint<TSortedConstraintNode>(std::move(sorted));
- }
- bool TSortedConstraintNode::IsApplicableToType(const TTypeAnnotationNode& type) const {
- const auto& itemType = GetSeqItemType(type);
- return std::all_of(Content_.cbegin(), Content_.cend(), [&itemType](const std::pair<TSetType, bool>& pair) {
- return std::all_of(pair.first.cbegin(), pair.first.cend(), std::bind(&GetSubTypeByPath, std::placeholders::_1, std::cref(itemType)));
- });
- }
- const TConstraintWithFieldsNode*
- TSortedConstraintNode::DoGetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const {
- const auto& rowType = GetSeqItemType(type);
- bool changed = false;
- auto content = Content_;
- for (auto it = content.begin(); content.end() != it;) {
- const auto subType = GetSubTypeByPath(it->first.front(), rowType);
- auto fields = GetAllItemTypeFields(subType, ctx);
- for (auto j = it->first.cbegin(); it->first.cend() != ++j;) {
- if (!IsSameAnnotation(*GetSubTypeByPath(*j, rowType), *subType)) {
- fields.clear();
- break;
- }
- }
- if (fields.empty() || ETypeAnnotationKind::Struct == subType->GetKind())
- ++it;
- else {
- changed = true;
- const bool dir = it->second;
- auto set = it->first;
- for (auto& path : set)
- path.emplace_back();
- for (it = content.erase(it); !fields.empty(); fields.pop_front()) {
- auto paths = set;
- for (auto& path : paths)
- path.back() = fields.front();
- it = content.emplace(it, std::move(paths), dir);
- ++it;
- }
- }
- }
- return changed ? ctx.MakeConstraint<TSortedConstraintNode>(std::move(content)) : this;
- }
- const TConstraintWithFieldsNode*
- TSortedConstraintNode::DoGetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const {
- if (Content_.size() == 1U && Content_.front().first.size() == 1U && Content_.front().first.front().empty())
- return DoGetComplicatedForType(type, ctx);
- const auto& rowType = GetSeqItemType(type);
- const auto getPrefix = [](TPartOfConstraintBase::TPathType path) {
- path.pop_back();
- return path;
- };
- bool changed = false;
- auto content = Content_;
- for (bool setChanged = true; setChanged;) {
- setChanged = false;
- for (auto it = content.begin(); content.end() != it;) {
- if (it->first.size() > 1U) {
- for (const auto& path : it->first) {
- if (path.size() > 1U && path.back() == ctx.GetIndexAsString(0U)) {
- const auto prefix = getPrefix(path);
- if (const auto subType = GetSubTypeByPath(prefix, rowType); ETypeAnnotationKind::Struct != subType->GetKind() && 1 == GetElementsCount(subType)) {
- it->first.erase(path);
- it->first.insert(prefix);
- changed = setChanged = true;
- }
- }
- }
- ++it;
- } else if (it->first.size() == 1U && it->first.front().size() > 1U) {
- const auto prefix = getPrefix(it->first.front());
- if (const auto subType = GetSubTypeByPath(prefix, rowType); it->first.front().back() == ctx.GetIndexAsString(0U) && ETypeAnnotationKind::Struct != subType->GetKind()) {
- auto from = it++;
- for (auto i = 1U; content.cend() != it && it->first.size() == 1U && it->first.front().size() > 1U && ctx.GetIndexAsString(i) == it->first.front().back() && prefix == getPrefix(it->first.front()) && from->second == it->second; ++i)
- ++it;
- if (ssize_t(GetElementsCount(subType)) == std::distance(from, it)) {
- *from = std::make_pair(TPartOfConstraintBase::TSetType{std::move(prefix)}, from->second);
- ++from;
- it = content.erase(from, it);
- changed = setChanged = true;
- }
- } else
- ++it;
- } else
- ++it;
- }
- }
- return changed ? ctx.MakeConstraint<TSortedConstraintNode>(std::move(content)) : this;
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- TChoppedConstraintNode::TChoppedConstraintNode(TExprContext& ctx, TSetOfSetsType&& sets)
- : TConstraintWithFieldsT(ctx, Name())
- , Sets_(std::move(sets))
- {
- YQL_ENSURE(!Sets_.empty());
- YQL_ENSURE(!HasDuplicates(Sets_));
- const auto size = Sets_.size();
- Hash_ = MurmurHash<ui64>(&size, sizeof(size), Hash_);
- for (const auto& set : Sets_) {
- YQL_ENSURE(!set.empty());
- for (const auto& path : set)
- Hash_ = std::accumulate(path.cbegin(), path.cend(), Hash_, [](ui64 hash, const std::string_view& field) { return MurmurHash<ui64>(field.data(), field.size(), hash); });
- }
- }
- TChoppedConstraintNode::TChoppedConstraintNode(TExprContext& ctx, const TSetType& keys)
- : TChoppedConstraintNode(ctx, MakeFullSet(keys))
- {}
- TChoppedConstraintNode::TChoppedConstraintNode(TExprContext& ctx, const NYT::TNode& serialized)
- : TChoppedConstraintNode(ctx, NodeToSets(ctx, serialized))
- {
- }
- TChoppedConstraintNode::TSetOfSetsType TChoppedConstraintNode::NodeToSets(TExprContext& ctx, const NYT::TNode& serialized) {
- try {
- return TPartOfConstraintBase::NodeToSetOfSets(ctx, serialized);
- } catch (...) {
- YQL_ENSURE(false, "Cannot deserialize " << Name() << " constraint: " << CurrentExceptionMessage());
- }
- Y_UNREACHABLE();
- }
- TChoppedConstraintNode::TChoppedConstraintNode(TChoppedConstraintNode&& constr) = default;
- TPartOfConstraintBase::TSetType TChoppedConstraintNode::GetFullSet() const {
- TSetType set;
- set.reserve(Sets_.size());
- for (const auto& key : Sets_)
- set.insert_unique(key.cbegin(), key.cend());
- return set;
- }
- bool TChoppedConstraintNode::Equals(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (GetHash() != node.GetHash()) {
- return false;
- }
- if (const auto c = dynamic_cast<const TChoppedConstraintNode*>(&node)) {
- return Sets_ == c->Sets_;
- }
- return false;
- }
- bool TChoppedConstraintNode::Includes(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (const auto c = dynamic_cast<const TChoppedConstraintNode*>(&node)) {
- return std::includes(Sets_.cbegin(), Sets_.cend(), c->Sets_.cbegin(), c->Sets_.cend());
- }
- return false;
- }
- void TChoppedConstraintNode::Out(IOutputStream& out) const {
- TConstraintNode::Out(out);
- out.Write('(');
- for (const auto& set : Sets_) {
- out.Write('(');
- bool first = true;
- for (const auto& path : set) {
- if (first)
- first = false;
- else
- out.Write(',');
- out << path;
- }
- out.Write(')');
- }
- out.Write(')');
- }
- void TChoppedConstraintNode::ToJson(NJson::TJsonWriter& out) const {
- out.OpenArray();
- for (const auto& set : Sets_) {
- out.OpenArray();
- for (const auto& path : set) {
- out.Write(JoinSeq(';', path));
- }
- out.CloseArray();
- }
- out.CloseArray();
- }
- NYT::TNode TChoppedConstraintNode::ToYson() const {
- return TPartOfConstraintBase::SetOfSetsToNode(Sets_);
- }
- bool TChoppedConstraintNode::Equals(const TSetType& prefix) const {
- auto set = prefix;
- for (const auto& key : Sets_) {
- bool found = false;
- std::for_each(key.cbegin(), key.cend(), [&set, &found] (const TPathType& path) {
- if (const auto it = set.find(path); set.cend() != it) {
- set.erase(it);
- found = true;
- }
- });
- if (!found)
- return false;
- }
- return set.empty();
- }
- void TChoppedConstraintNode::FilterUncompleteReferences(TSetType& references) const {
- TSetType complete;
- complete.reserve(references.size());
- for (const auto& item : Sets_) {
- bool found = false;
- for (const auto& path : item) {
- if (references.contains(path)) {
- found = true;
- complete.insert_unique(path);
- }
- }
- if (!found)
- break;
- }
- references = std::move(complete);
- }
- const TChoppedConstraintNode* TChoppedConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) {
- if (constraints.empty()) {
- return nullptr;
- }
- if (constraints.size() == 1) {
- return constraints.front()->GetConstraint<TChoppedConstraintNode>();
- }
- TSetOfSetsType sets;
- for (auto c: constraints) {
- if (const auto uniq = c->GetConstraint<TChoppedConstraintNode>()) {
- if (sets.empty())
- sets = uniq->GetContent();
- else {
- TSetOfSetsType both;
- both.reserve(std::min(sets.size(), uniq->GetContent().size()));
- std::set_intersection(sets.cbegin(), sets.cend(), uniq->GetContent().cbegin(), uniq->GetContent().cend(), std::back_inserter(both));
- if (both.empty()) {
- if (!c->GetConstraint<TEmptyConstraintNode>())
- return nullptr;
- } else
- sets = std::move(both);
- }
- } else if (!c->GetConstraint<TEmptyConstraintNode>()) {
- return nullptr;
- }
- }
- return sets.empty() ? nullptr : ctx.MakeConstraint<TChoppedConstraintNode>(std::move(sets));
- }
- const TConstraintWithFieldsNode*
- TChoppedConstraintNode::DoFilterFields(TExprContext& ctx, const TPathFilter& predicate) const {
- if (!predicate)
- return this;
- TSetOfSetsType chopped;
- chopped.reserve(Sets_.size());
- for (const auto& set : Sets_) {
- auto newSet = set;
- for (auto it = newSet.cbegin(); newSet.cend() != it;) {
- if (predicate(*it))
- ++it;
- else
- it = newSet.erase(it);
- }
- if (newSet.empty())
- return nullptr;;
- chopped.insert_unique(std::move(newSet));
- }
- return ctx.MakeConstraint<TChoppedConstraintNode>(std::move(chopped));
- }
- const TConstraintWithFieldsNode*
- TChoppedConstraintNode::DoRenameFields(TExprContext& ctx, const TPathReduce& reduce) const {
- if (!reduce)
- return this;
- TSetOfSetsType chopped;
- chopped.reserve(Sets_.size());
- for (const auto& set : Sets_) {
- TSetType newSet;
- newSet.reserve(set.size());
- for (const auto& path : set) {
- if (const auto& newPaths = reduce(path); !newPaths.empty())
- newSet.insert_unique(newPaths.cbegin(), newPaths.cend());
- }
- if (newSet.empty())
- return nullptr;
- chopped.insert_unique(std::move(newSet));
- }
- return ctx.MakeConstraint<TChoppedConstraintNode>(std::move(chopped));
- }
- const TChoppedConstraintNode*
- TChoppedConstraintNode::MakeCommon(const TChoppedConstraintNode* other, TExprContext& ctx) const {
- if (!other) {
- return nullptr;
- }
- if (this == other) {
- return this;
- }
- TSetOfSetsType both;
- both.reserve(std::min(Sets_.size(), other->Sets_.size()));
- std::set_intersection(Sets_.cbegin(), Sets_.cend(), other->Sets_.cbegin(), other->Sets_.cend(), std::back_inserter(both));
- return both.empty() ? nullptr : ctx.MakeConstraint<TChoppedConstraintNode>(std::move(both));
- }
- bool TChoppedConstraintNode::IsApplicableToType(const TTypeAnnotationNode& type) const {
- const auto& itemType = GetSeqItemType(type);
- return std::all_of(Sets_.cbegin(), Sets_.cend(), [&itemType](const TSetType& set) {
- return std::all_of(set.cbegin(), set.cend(), std::bind(&GetSubTypeByPath, std::placeholders::_1, std::cref(itemType)));
- });
- }
- const TConstraintWithFieldsNode*
- TChoppedConstraintNode::DoGetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const {
- const auto& rowType = GetSeqItemType(type);
- bool changed = false;
- auto sets = Sets_;
- for (auto it = sets.begin(); sets.end() != it;) {
- auto fields = GetAllItemTypeFields(GetSubTypeByPath(it->front(), rowType), ctx);
- for (auto j = it->cbegin(); it->cend() != ++j;) {
- if (const auto& copy = GetAllItemTypeFields(GetSubTypeByPath(*j, rowType), ctx); copy != fields) {
- fields.clear();
- break;
- }
- }
- if (fields.empty())
- ++it;
- else {
- changed = true;
- auto set = *it;
- for (auto& path : set)
- path.emplace_back();
- for (it = sets.erase(it); !fields.empty(); fields.pop_front()) {
- auto paths = set;
- for (auto& path : paths)
- path.back() = fields.front();
- it = sets.insert_unique(std::move(paths)).first;
- }
- }
- }
- return changed ? ctx.MakeConstraint<TChoppedConstraintNode>(std::move(sets)) : this;
- }
- const TConstraintWithFieldsNode*
- TChoppedConstraintNode::DoGetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const {
- if (Sets_.size() == 1U && Sets_.front().size() == 1U && Sets_.front().front().empty())
- return DoGetComplicatedForType(type, ctx);
- const auto& rowType = GetSeqItemType(type);
- const auto getPrefix = [](TPartOfConstraintBase::TPathType path) {
- path.pop_back();
- return path;
- };
- bool changed = false;
- auto sets = Sets_;
- for (bool setChanged = true; setChanged;) {
- setChanged = false;
- for (auto it = sets.begin(); sets.end() != it;) {
- if (it->size() != 1U || it->front().size() <= 1U)
- ++it;
- else {
- auto from = it++;
- const auto prefix = getPrefix(from->front());
- while (sets.cend() != it && it->size() == 1U && it->front().size() > 1U && prefix == getPrefix(it->front()))
- ++it;
- if (ssize_t(GetElementsCount(GetSubTypeByPath(prefix, rowType))) == std::distance(from, it)) {
- *from++ = TPartOfConstraintBase::TSetType{std::move(prefix)};
- it = sets.erase(from, it);
- changed = setChanged = true;
- }
- }
- }
- }
- return changed ? ctx.MakeConstraint<TChoppedConstraintNode>(std::move(sets)) : this;
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- template<bool Distinct>
- TConstraintWithFieldsNode::TSetOfSetsType
- TUniqueConstraintNodeBase<Distinct>::ColumnsListToSets(const std::vector<std::string_view>& columns) {
- YQL_ENSURE(!columns.empty());
- TConstraintWithFieldsNode::TSetOfSetsType sets;
- sets.reserve(columns.size());
- std::for_each(columns.cbegin(), columns.cend(), [&sets](const std::string_view& column) { sets.insert_unique(TConstraintWithFieldsNode::TSetType{column.empty() ? TConstraintWithFieldsNode::TPathType() : TConstraintWithFieldsNode::TPathType(1U, column)}); });
- return sets;
- }
- template<bool Distinct>
- typename TUniqueConstraintNodeBase<Distinct>::TContentType
- TUniqueConstraintNodeBase<Distinct>::DedupSets(TContentType&& sets) {
- for (bool found = true; found && sets.size() > 1U;) {
- found = false;
- for (auto ot = sets.cbegin(); !found && sets.cend() != ot; ++ot) {
- for (auto it = sets.cbegin(); sets.cend() != it;) {
- if (ot->size() < it->size() && std::all_of(ot->cbegin(), ot->cend(), [it](const TConstraintWithFieldsNode::TSetType& set) { return it->contains(set); })) {
- it = sets.erase(it);
- found = true;
- } else
- ++it;
- }
- }
- }
- return std::move(sets);
- }
- template<bool Distinct>
- typename TUniqueConstraintNodeBase<Distinct>::TContentType
- TUniqueConstraintNodeBase<Distinct>::MakeCommonContent(const TContentType& one, const TContentType& two) {
- TContentType both;
- both.reserve(std::min(one.size(), two.size()));
- for (const auto& setsOne : one) {
- for (const auto& setsTwo : two) {
- if (setsOne.size() == setsTwo.size()) {
- TConstraintWithFieldsNode::TSetOfSetsType sets;
- sets.reserve(setsTwo.size());
- for (const auto& setOne : setsOne) {
- for (const auto& setTwo : setsTwo) {
- TConstraintWithFieldsNode::TSetType set;
- set.reserve(std::min(setOne.size(), setTwo.size()));
- std::set_intersection(setOne.cbegin(), setOne.cend(), setTwo.cbegin(), setTwo.cend(), std::back_inserter(set));
- if (!set.empty())
- sets.insert_unique(std::move(set));
- }
- }
- if (sets.size() == setsOne.size())
- both.insert_unique(std::move(sets));
- }
- }
- }
- return both;
- }
- template<bool Distinct>
- TUniqueConstraintNodeBase<Distinct>::TUniqueConstraintNodeBase(TExprContext& ctx, TContentType&& sets)
- : TBase(ctx, Name())
- , Content_(DedupSets(std::move(sets)))
- {
- YQL_ENSURE(!Content_.empty());
- const auto size = Content_.size();
- TBase::Hash_ = MurmurHash<ui64>(&size, sizeof(size), TBase::Hash_);
- for (const auto& sets : Content_) {
- YQL_ENSURE(!sets.empty());
- YQL_ENSURE(!TConstraintWithFieldsNode::HasDuplicates(sets));
- for (const auto& set : sets) {
- YQL_ENSURE(!set.empty());
- for (const auto& path : set)
- TBase::Hash_ = std::accumulate(path.cbegin(), path.cend(), TBase::Hash_, [](ui64 hash, const std::string_view& field) { return MurmurHash<ui64>(field.data(), field.size(), hash); });
- }
- }
- }
- template<bool Distinct>
- TUniqueConstraintNodeBase<Distinct>::TUniqueConstraintNodeBase(TExprContext& ctx, const std::vector<std::string_view>& columns)
- : TUniqueConstraintNodeBase(ctx, TContentType{TPartOfConstraintBase::TSetOfSetsType{ColumnsListToSets(columns)}})
- {}
- template<bool Distinct>
- TUniqueConstraintNodeBase<Distinct>::TUniqueConstraintNodeBase(TExprContext& ctx, const NYT::TNode& serialized)
- : TUniqueConstraintNodeBase(ctx, NodeToContent(ctx, serialized))
- {
- }
- template<bool Distinct>
- typename TUniqueConstraintNodeBase<Distinct>::TContentType TUniqueConstraintNodeBase<Distinct>::NodeToContent(TExprContext& ctx, const NYT::TNode& serialized) {
- TUniqueConstraintNode::TContentType content;
- try {
- for (const auto& item : serialized.AsList()) {
- content.insert_unique(TPartOfConstraintBase::NodeToSetOfSets(ctx, item));
- }
- } catch (...) {
- YQL_ENSURE(false, "Cannot deserialize " << Name() << " constraint: " << CurrentExceptionMessage());
- }
- return content;
- }
- template<bool Distinct>
- TUniqueConstraintNodeBase<Distinct>::TUniqueConstraintNodeBase(TUniqueConstraintNodeBase&& constr) = default;
- template<bool Distinct>
- TPartOfConstraintBase::TSetType
- TUniqueConstraintNodeBase<Distinct>::GetFullSet() const {
- TPartOfConstraintBase::TSetType set;
- set.reserve(Content_.size());
- for (const auto& sets : Content_)
- for (const auto& key : sets)
- set.insert_unique(key.cbegin(), key.cend());
- return set;
- }
- template<bool Distinct>
- bool TUniqueConstraintNodeBase<Distinct>::Equals(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (TBase::GetHash() != node.GetHash()) {
- return false;
- }
- if (const auto c = dynamic_cast<const TUniqueConstraintNodeBase*>(&node)) {
- return Content_ == c->Content_;
- }
- return false;
- }
- template<bool Distinct>
- bool TUniqueConstraintNodeBase<Distinct>::Includes(const TConstraintNode& node) const {
- if (this == &node)
- return true;
- if (const auto c = dynamic_cast<const TUniqueConstraintNodeBase*>(&node)) {
- return std::all_of(c->Content_.cbegin(), c->Content_.cend(), [&] (const TConstraintWithFieldsNode::TSetOfSetsType& oldSets) {
- return std::any_of(Content_.cbegin(), Content_.cend(), [&] (const TConstraintWithFieldsNode::TSetOfSetsType& newSets) {
- return oldSets.size() == newSets.size() && std::all_of(oldSets.cbegin(), oldSets.cend(), [&] (const TConstraintWithFieldsNode::TSetType& oldSet) {
- return std::any_of(newSets.cbegin(), newSets.cend(), [&] (const TConstraintWithFieldsNode::TSetType& newSet) {
- return std::includes(newSet.cbegin(), newSet.cend(), oldSet.cbegin(), oldSet.cend());
- });
- });
- });
- });
- }
- return false;
- }
- template<bool Distinct>
- void TUniqueConstraintNodeBase<Distinct>::Out(IOutputStream& out) const {
- TConstraintNode::Out(out);
- out.Write('(');
- for (const auto& sets : Content_) {
- out.Write('(');
- bool first = true;
- for (const auto& set : sets) {
- if (first)
- first = false;
- else
- out << ',';
- if (1U == set.size())
- out << set.front();
- else
- out << set;
- }
- out.Write(')');
- }
- out.Write(')');
- }
- template<bool Distinct>
- void TUniqueConstraintNodeBase<Distinct>::ToJson(NJson::TJsonWriter& out) const {
- out.OpenArray();
- for (const auto& sets : Content_) {
- out.OpenArray();
- for (const auto& set : sets) {
- out.OpenArray();
- for (const auto& path : set) {
- out.Write(JoinSeq(';', path));
- }
- out.CloseArray();
- }
- out.CloseArray();
- }
- out.CloseArray();
- }
- template<bool Distinct>
- NYT::TNode TUniqueConstraintNodeBase<Distinct>::ToYson() const {
- return std::accumulate(Content_.cbegin(), Content_.cend(),
- NYT::TNode::CreateList(),
- [](NYT::TNode node, const TConstraintWithFieldsNode::TSetOfSetsType& sets) {
- return std::move(node).Add(TConstraintWithFieldsNode::SetOfSetsToNode(sets));
- });
- }
- template<bool Distinct>
- const TUniqueConstraintNodeBase<Distinct>* TUniqueConstraintNodeBase<Distinct>::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) {
- if (constraints.empty()) {
- return nullptr;
- }
- if (constraints.size() == 1) {
- return constraints.front()->GetConstraint<TUniqueConstraintNodeBase>();
- }
- TContentType content;
- for (auto c: constraints) {
- if (const auto uniq = c->GetConstraint<TUniqueConstraintNodeBase>()) {
- if (content.empty())
- content = uniq->GetContent();
- else {
- if (auto both = MakeCommonContent(content, uniq->Content_); both.empty()) {
- if (!c->GetConstraint<TEmptyConstraintNode>())
- return nullptr;
- } else
- content = std::move(both);
- }
- } else if (!c->GetConstraint<TEmptyConstraintNode>()) {
- return nullptr;
- }
- }
- return content.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase>(std::move(content));
- }
- template<bool Distinct>
- bool TUniqueConstraintNodeBase<Distinct>::IsOrderBy(const TSortedConstraintNode& sorted) const {
- TConstraintWithFieldsNode::TSetType ordered;
- TConstraintWithFieldsNode::TSetOfSetsType columns;
- for (const auto& key : sorted.GetContent()) {
- ordered.insert_unique(key.first.cbegin(), key.first.cend());
- columns.insert_unique(key.first);
- }
- for (const auto& sets : Content_) {
- if (std::all_of(sets.cbegin(), sets.cend(), [&ordered](const TConstraintWithFieldsNode::TSetType& set) {
- return std::any_of(set.cbegin(), set.cend(), [&ordered](const TConstraintWithFieldsNode::TPathType& path) { return ordered.contains(path); });
- })) {
- std::for_each(sets.cbegin(), sets.cend(), [&columns](const TConstraintWithFieldsNode::TSetType& set) {
- std::for_each(set.cbegin(), set.cend(), [&columns](const TConstraintWithFieldsNode::TPathType& path) {
- if (const auto it = std::find_if(columns.cbegin(), columns.cend(), [&path](const TConstraintWithFieldsNode::TSetType& s) { return s.contains(path); }); columns.cend() != it)
- columns.erase(it);
- });
- });
- if (columns.empty())
- return true;
- }
- }
- return false;
- }
- template<bool Distinct>
- bool TUniqueConstraintNodeBase<Distinct>::ContainsCompleteSet(const std::vector<std::string_view>& columns) const {
- if (columns.empty())
- return false;
- const std::unordered_set<std::string_view> ordered(columns.cbegin(), columns.cend());
- for (const auto& sets : Content_) {
- if (std::all_of(sets.cbegin(), sets.cend(), [&ordered](const TConstraintWithFieldsNode::TSetType& set) {
- return std::any_of(set.cbegin(), set.cend(), [&ordered](const TConstraintWithFieldsNode::TPathType& path) { return !path.empty() && ordered.contains(path.front()); });
- }))
- return true;
- }
- return false;
- }
- template<bool Distinct>
- void TUniqueConstraintNodeBase<Distinct>::FilterUncompleteReferences(TPartOfConstraintBase::TSetType& references) const {
- TPartOfConstraintBase::TSetType input(std::move(references));
- references.clear();
- references.reserve(input.size());
- for (const auto& sets : Content_) {
- if (std::all_of(sets.cbegin(), sets.cend(), [&input] (const TPartOfConstraintBase::TSetType& set) { return std::any_of(set.cbegin(), set.cend(), std::bind(&TPartOfConstraintBase::TSetType::contains<TPartOfConstraintBase::TPathType>, std::cref(input), std::placeholders::_1)); }))
- std::for_each(sets.cbegin(), sets.cend(), [&] (const TPartOfConstraintBase::TSetType& set) { std::for_each(set.cbegin(), set.cend(), [&] (const TPartOfConstraintBase::TPathType& path) {
- if (input.contains(path))
- references.insert_unique(path);
- }); });
- }
- }
- template<bool Distinct>
- const TConstraintWithFieldsNode*
- TUniqueConstraintNodeBase<Distinct>::DoFilterFields(TExprContext& ctx, const TPartOfConstraintBase::TPathFilter& predicate) const {
- if (!predicate)
- return this;
- TContentType content;
- content.reserve(Content_.size());
- for (const auto& sets : Content_) {
- if (std::all_of(sets.cbegin(), sets.cend(), [&predicate](const TPartOfConstraintBase::TSetType& set) { return std::any_of(set.cbegin(), set.cend(), predicate); })) {
- TPartOfConstraintBase::TSetOfSetsType newSets;
- newSets.reserve(sets.size());
- std::for_each(sets.cbegin(), sets.cend(), [&](const TPartOfConstraintBase::TSetType& set) {
- TPartOfConstraintBase::TSetType newSet;
- newSet.reserve(set.size());
- std::copy_if(set.cbegin(), set.cend(), std::back_inserter(newSet), predicate);
- newSets.insert_unique(std::move(newSet));
- });
- content.insert_unique(std::move(newSets));
- }
- }
- return content.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase>(std::move(content));
- }
- template<bool Distinct>
- const TConstraintWithFieldsNode*
- TUniqueConstraintNodeBase<Distinct>::DoRenameFields(TExprContext& ctx, const TPartOfConstraintBase::TPathReduce& reduce) const {
- if (!reduce)
- return this;
- TContentType content;
- content.reserve(Content_.size());
- for (const auto& sets : Content_) {
- TPartOfConstraintBase::TSetOfSetsType newSets;
- newSets.reserve(sets.size());
- for (const auto& set : sets) {
- TPartOfConstraintBase::TSetType newSet;
- newSet.reserve(set.size());
- for (const auto& path : set) {
- const auto newPaths = reduce(path);
- newSet.insert_unique(newPaths.cbegin(), newPaths.cend());
- }
- if (!newSet.empty())
- newSets.insert_unique(std::move(newSet));
- }
- if (sets.size() == newSets.size())
- content.insert_unique(std::move(newSets));
- }
- return content.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase>(std::move(content));
- }
- template<bool Distinct>
- const TUniqueConstraintNodeBase<Distinct>*
- TUniqueConstraintNodeBase<Distinct>::MakeCommon(const TUniqueConstraintNodeBase* other, TExprContext& ctx) const {
- if (!other)
- return nullptr;
- if (this == other)
- return this;
- auto both = MakeCommonContent(Content_, other->Content_);
- return both.empty() ? nullptr : ctx.MakeConstraint<TUniqueConstraintNodeBase>(std::move(both));
- }
- template<bool Distinct>
- const TUniqueConstraintNodeBase<Distinct>* TUniqueConstraintNodeBase<Distinct>::Merge(const TUniqueConstraintNodeBase* one, const TUniqueConstraintNodeBase* two, TExprContext& ctx) {
- if (!one)
- return two;
- if (!two)
- return one;
- auto content = one->Content_;
- content.insert_unique(two->Content_.cbegin(), two->Content_.cend());
- return ctx.MakeConstraint<TUniqueConstraintNodeBase<Distinct>>(std::move(content));
- }
- template<bool Distinct>
- const TConstraintWithFieldsNode*
- TUniqueConstraintNodeBase<Distinct>::DoGetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const {
- const auto& rowType = GetSeqItemType(type);
- bool changed = false;
- auto content = Content_;
- for (auto& sets : content) {
- for (auto it = sets.begin(); sets.end() != it;) {
- auto fields = GetAllItemTypeFields(TBase::GetSubTypeByPath(it->front(), rowType), ctx);
- for (auto j = it->cbegin(); it->cend() != ++j;) {
- if (const auto& copy = GetAllItemTypeFields(TBase::GetSubTypeByPath(*j, rowType), ctx); copy != fields) {
- fields.clear();
- break;
- }
- }
- if (fields.empty())
- ++it;
- else {
- changed = true;
- auto set = *it;
- for (auto& path : set)
- path.emplace_back();
- for (it = sets.erase(it); !fields.empty(); fields.pop_front()) {
- auto paths = set;
- for (auto& path : paths)
- path.back() = fields.front();
- it = sets.insert_unique(std::move(paths)).first;
- }
- }
- }
- }
- return changed ? ctx.MakeConstraint<TUniqueConstraintNodeBase<Distinct>>(std::move(content)) : this;
- }
- template<bool Distinct>
- const TConstraintWithFieldsNode*
- TUniqueConstraintNodeBase<Distinct>::DoGetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const {
- if (Content_.size() == 1U && Content_.front().size() == 1U && Content_.front().front().size() == 1U && Content_.front().front().front().empty())
- return DoGetComplicatedForType(type, ctx);
- const auto& rowType = GetSeqItemType(type);
- const auto getPrefix = [](TPartOfConstraintBase::TPathType path) {
- path.pop_back();
- return path;
- };
- bool changed = false;
- auto content = Content_;
- for (auto& sets : content) {
- for (bool setChanged = true; setChanged;) {
- setChanged = false;
- for (auto it = sets.begin(); sets.end() != it;) {
- if (!it->empty() && it->front().size() > 1U) {
- TPartOfConstraintBase::TSetType prefixes;
- prefixes.reserve(it->size());
- for (const auto& path : *it) {
- if (path.size() > 1U) {
- prefixes.emplace_back(getPrefix(path));
- }
- }
- auto from = it++;
- if (prefixes.size() < from->size())
- continue;
- while (sets.cend() != it && it->size() == prefixes.size() &&
- std::all_of(it->cbegin(), it->cend(), [&](const TPartOfConstraintBase::TPathType& path) { return path.size() > 1U && prefixes.contains(getPrefix(path)); })) {
- ++it;
- }
- if (std::all_of(prefixes.cbegin(), prefixes.cend(),
- [width = std::distance(from, it), &rowType] (const TPartOfConstraintBase::TPathType& path) { return width == ssize_t(GetElementsCount(TBase::GetSubTypeByPath(path, rowType))); })) {
- *from++ =std::move(prefixes);
- it = sets.erase(from, it);
- changed = setChanged = true;
- }
- } else
- ++it;
- }
- }
- }
- return changed ? ctx.MakeConstraint<TUniqueConstraintNodeBase<Distinct>>(std::move(content)) : this;
- }
- template<bool Distinct>
- bool TUniqueConstraintNodeBase<Distinct>::IsApplicableToType(const TTypeAnnotationNode& type) const {
- if (ETypeAnnotationKind::Dict == type.GetKind())
- return true; // TODO: check for dict.
- const auto& itemType = GetSeqItemType(type);
- return std::all_of(Content_.cbegin(), Content_.cend(), [&itemType](const TConstraintWithFieldsNode::TSetOfSetsType& sets) {
- return std::all_of(sets.cbegin(), sets.cend(), [&itemType](const TConstraintWithFieldsNode::TSetType& set) {
- return std::all_of(set.cbegin(), set.cend(), std::bind(&TConstraintWithFieldsNode::GetSubTypeByPath, std::placeholders::_1, std::cref(itemType)));
- });
- });
- }
- template class TUniqueConstraintNodeBase<false>;
- template class TUniqueConstraintNodeBase<true>;
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- template<class TOriginalConstraintNode>
- TPartOfConstraintNode<TOriginalConstraintNode>::TPartOfConstraintNode(TExprContext& ctx, TMapType&& mapping)
- : TBase(ctx, Name())
- , Mapping_(std::move(mapping))
- {
- YQL_ENSURE(!Mapping_.empty());
- for (const auto& part : Mapping_) {
- YQL_ENSURE(!part.second.empty());
- const auto hash = part.first->GetHash();
- TBase::Hash_ = MurmurHash<ui64>(&hash, sizeof(hash), TBase::Hash_);
- for (const auto& item: part.second) {
- TBase::Hash_ = std::accumulate(item.first.cbegin(), item.first.cend(), TBase::Hash_, [](ui64 hash, const std::string_view& field) { return MurmurHash<ui64>(field.data(), field.size(), hash); });
- TBase::Hash_ = std::accumulate(item.second.cbegin(), item.second.cend(), TBase::Hash_, [](ui64 hash, const std::string_view& field) { return MurmurHash<ui64>(field.data(), field.size(), hash); });
- }
- }
- }
- template<class TOriginalConstraintNode>
- TPartOfConstraintNode<TOriginalConstraintNode>::TPartOfConstraintNode(TExprContext& ctx, const NYT::TNode&)
- : TBase(ctx, Name())
- {
- YQL_ENSURE(false, "TPartOfConstraintNode cannot be deserialized");
- }
- template<class TOriginalConstraintNode>
- TPartOfConstraintNode<TOriginalConstraintNode>::TPartOfConstraintNode(TPartOfConstraintNode&& constr) = default;
- template<class TOriginalConstraintNode>
- bool TPartOfConstraintNode<TOriginalConstraintNode>::Equals(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (TBase::GetHash() != node.GetHash()) {
- return false;
- }
- if (const auto c = dynamic_cast<const TPartOfConstraintNode*>(&node)) {
- return Mapping_ == c->Mapping_;
- }
- return false;
- }
- template<class TOriginalConstraintNode>
- bool TPartOfConstraintNode<TOriginalConstraintNode>::Includes(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (const auto c = dynamic_cast<const TPartOfConstraintNode*>(&node)) {
- for (const auto& part : c->Mapping_) {
- if (const auto it = Mapping_.find(part.first); Mapping_.cend() != it) {
- for (const auto& pair : part.second) {
- if (const auto p = it->second.find(pair.first); it->second.cend() == p || p->second != pair.second) {
- return false;
- }
- }
- } else
- return false;
- }
- return true;
- }
- return false;
- }
- template<class TOriginalConstraintNode>
- void TPartOfConstraintNode<TOriginalConstraintNode>::Out(IOutputStream& out) const {
- TConstraintNode::Out(out);
- out.Write('(');
- bool first = true;
- for (const auto& part : Mapping_) {
- for (const auto& item : part.second) {
- if (first)
- first = false;
- else
- out.Write(',');
- out << item.first;
- out.Write(':');
- out << item.second;
- }
- }
- out.Write(')');
- }
- template<class TOriginalConstraintNode>
- void TPartOfConstraintNode<TOriginalConstraintNode>::ToJson(NJson::TJsonWriter& out) const {
- out.OpenMap();
- for (const auto& part : Mapping_) {
- for (const auto& [resultColumn, originalColumn] : part.second) {
- out.Write(JoinSeq(';', resultColumn), JoinSeq(';', originalColumn));
- }
- }
- out.CloseMap();
- }
- template<class TOriginalConstraintNode>
- NYT::TNode TPartOfConstraintNode<TOriginalConstraintNode>::ToYson() const {
- return {}; // cannot be serialized
- }
- template<class TOriginalConstraintNode>
- const TPartOfConstraintNode<TOriginalConstraintNode>*
- TPartOfConstraintNode<TOriginalConstraintNode>::ExtractField(TExprContext& ctx, const std::string_view& field) const {
- TMapType passtrought;
- for (const auto& part : Mapping_) {
- auto it = part.second.lower_bound(TPartOfConstraintBase::TPathType(1U, field));
- if (part.second.cend() == it || it->first.front() != field)
- continue;
- TPartType mapping;
- mapping.reserve(part.second.size());
- while (it < part.second.cend() && !it->first.empty() && field == it->first.front()) {
- auto item = *it++;
- item.first.pop_front();
- mapping.emplace_back(std::move(item));
- }
- if (!mapping.empty()) {
- passtrought.emplace(part.first, std::move(mapping));
- }
- }
- return passtrought.empty() ? nullptr : ctx.MakeConstraint<TPartOfConstraintNode>(std::move(passtrought));
- }
- template<class TOriginalConstraintNode>
- const TPartOfConstraintBase*
- TPartOfConstraintNode<TOriginalConstraintNode>::DoFilterFields(TExprContext& ctx, const TPartOfConstraintBase::TPathFilter& predicate) const {
- if (!predicate)
- return this;
- auto mapping = Mapping_;
- for (auto part = mapping.begin(); mapping.end() != part;) {
- for (auto it = part->second.cbegin(); part->second.cend() != it;) {
- if (predicate(it->first))
- ++it;
- else
- it = part->second.erase(it);
- }
- if (part->second.empty())
- part = mapping.erase(part);
- else
- ++part;
- }
- return mapping.empty() ? nullptr : ctx.MakeConstraint<TPartOfConstraintNode>(std::move(mapping));
- }
- template<class TOriginalConstraintNode>
- const TPartOfConstraintBase*
- TPartOfConstraintNode<TOriginalConstraintNode>::DoRenameFields(TExprContext& ctx, const TPartOfConstraintBase::TPathReduce& rename) const {
- if (!rename)
- return this;
- TMapType mapping(Mapping_.size());
- for (const auto& part : Mapping_) {
- TPartType map;
- map.reserve(part.second.size());
- for (const auto& item : part.second) {
- for (auto& path : rename(item.first)) {
- map.insert_unique(std::make_pair(std::move(path), item.second));
- }
- }
- if (!map.empty())
- mapping.emplace(part.first, std::move(map));
- }
- return mapping.empty() ? nullptr : ctx.MakeConstraint<TPartOfConstraintNode>(std::move(mapping));
- }
- template<class TOriginalConstraintNode>
- const TPartOfConstraintNode<TOriginalConstraintNode>*
- TPartOfConstraintNode<TOriginalConstraintNode>::CompleteOnly(TExprContext& ctx) const {
- TMapType mapping(Mapping_);
- for (auto it = mapping.begin(); mapping.end() != it;) {
- TPartOfConstraintBase::TSetType set;
- set.reserve(it->second.size());
- std::for_each(it->second.cbegin(), it->second.cend(), [&](const typename TPartType::value_type& pair) { set.insert_unique(pair.second); });
- it->first->FilterUncompleteReferences(set);
- for (auto jt = it->second.cbegin(); it->second.cend() != jt;) {
- if (set.contains(jt->second))
- ++jt;
- else
- jt = it->second.erase(jt);
- }
- if (it->second.empty())
- it = mapping.erase(it);
- else
- ++it;
- }
- return mapping.empty() ? nullptr : ctx.MakeConstraint<TPartOfConstraintNode>(std::move(mapping));
- }
- template<class TOriginalConstraintNode>
- const TPartOfConstraintNode<TOriginalConstraintNode>*
- TPartOfConstraintNode<TOriginalConstraintNode>:: RemoveOriginal(TExprContext& ctx, const TMainConstraint* original) const {
- TMapType mapping(Mapping_);
- mapping.erase(original);
- return mapping.empty() ? nullptr : ctx.MakeConstraint<TPartOfConstraintNode>(std::move(mapping));
- }
- template<class TOriginalConstraintNode>
- typename TPartOfConstraintNode<TOriginalConstraintNode>::TMapType
- TPartOfConstraintNode<TOriginalConstraintNode>::GetColumnMapping(const std::string_view& asField) const {
- auto mapping = Mapping_;
- if (!asField.empty()) {
- for (auto& item : mapping) {
- for (auto& part : item.second) {
- part.first.emplace_front(asField);
- }
- }
- }
- return mapping;
- }
- template<class TOriginalConstraintNode>
- typename TPartOfConstraintNode<TOriginalConstraintNode>::TMapType
- TPartOfConstraintNode<TOriginalConstraintNode>::GetColumnMapping(TExprContext& ctx, const std::string_view& prefix) const {
- auto mapping = Mapping_;
- if (!prefix.empty()) {
- const TString str(prefix);
- for (auto& item : mapping) {
- for (auto& part : item.second) {
- if (part.first.empty())
- part.first.emplace_front(prefix);
- else
- part.first.front() = ctx.AppendString(str + part.first.front());
- }
- }
- }
- return mapping;
- }
- template<class TOriginalConstraintNode>
- const TPartOfConstraintNode<TOriginalConstraintNode>*
- TPartOfConstraintNode<TOriginalConstraintNode>::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) {
- if (constraints.empty()) {
- return nullptr;
- }
- if (constraints.size() == 1) {
- return constraints.front()->GetConstraint<TPartOfConstraintNode>();
- }
- bool first = true;
- TMapType mapping;
- for (size_t i = 0; i < constraints.size(); ++i) {
- const auto part = constraints[i]->GetConstraint<TPartOfConstraintNode>();
- if (!part)
- return nullptr;
- if (first) {
- mapping = part->GetColumnMapping();
- first = false;
- } else {
- for (const auto& nextMapping : part->GetColumnMapping()) {
- if (const auto it = mapping.find(nextMapping.first); mapping.cend() != it) {
- TPartType result;
- std::set_intersection(
- it->second.cbegin(), it->second.cend(),
- nextMapping.second.cbegin(), nextMapping.second.cend(),
- std::back_inserter(result),
- [] (const typename TPartType::value_type& c1, const typename TPartType::value_type& c2) {
- return c1 < c2;
- }
- );
- if (result.empty())
- mapping.erase(it);
- else
- it->second = std::move(result);
- }
- }
- }
- if (mapping.empty()) {
- break;
- }
- }
- return mapping.empty() ? nullptr : ctx.MakeConstraint<TPartOfConstraintNode>(std::move(mapping));
- }
- template<class TOriginalConstraintNode>
- const typename TPartOfConstraintNode<TOriginalConstraintNode>::TMapType&
- TPartOfConstraintNode<TOriginalConstraintNode>::GetColumnMapping() const {
- return Mapping_;
- }
- template<class TOriginalConstraintNode>
- typename TPartOfConstraintNode<TOriginalConstraintNode>::TMapType
- TPartOfConstraintNode<TOriginalConstraintNode>::GetCommonMapping(const TOriginalConstraintNode* complete, const TPartOfConstraintNode* incomplete, const std::string_view& field) {
- TMapType mapping;
- if (incomplete) {
- mapping = incomplete->GetColumnMapping();
- mapping.erase(complete);
- if (!field.empty()) {
- for (auto& part : mapping) {
- std::for_each(part.second.begin(), part.second.end(), [&field](typename TPartType::value_type& map) { map.first.push_front(field); });
- }
- }
- }
- if (complete) {
- auto& part = mapping[complete];
- for (const auto& path : complete->GetFullSet()) {
- auto key = path;
- if (!field.empty())
- key.emplace_front(field);
- part.insert_unique(std::make_pair(key, path));
- }
- }
- return mapping;
- }
- template<class TOriginalConstraintNode>
- void TPartOfConstraintNode<TOriginalConstraintNode>::UniqueMerge(TMapType& output, TMapType&& input) {
- output.merge(input);
- while (!input.empty()) {
- const auto exists = input.extract(input.cbegin());
- auto& target = output[exists.key()];
- target.reserve(target.size() + exists.mapped().size());
- for (auto& item : exists.mapped())
- target.insert_unique(std::move(item));
- }
- }
- template<class TOriginalConstraintNode>
- typename TPartOfConstraintNode<TOriginalConstraintNode>::TMapType
- TPartOfConstraintNode<TOriginalConstraintNode>::ExtractField(const TMapType& mapping, const std::string_view& field) {
- TMapType parts;
- for (const auto& part : mapping) {
- auto it = part.second.lower_bound(TPartOfConstraintBase::TPathType(1U, field));
- if (part.second.cend() == it || it->first.empty() || it->first.front() != field)
- continue;
- TPartType mapping;
- mapping.reserve(part.second.size());
- while (it < part.second.cend() && !it->first.empty() && field == it->first.front()) {
- auto item = *it++;
- item.first.pop_front();
- mapping.emplace_back(std::move(item));
- }
- if (!mapping.empty()) {
- parts.emplace(part.first, std::move(mapping));
- }
- }
- return parts;
- }
- template<class TOriginalConstraintNode>
- const TOriginalConstraintNode*
- TPartOfConstraintNode<TOriginalConstraintNode>::MakeComplete(TExprContext& ctx, const TMapType& mapping, const TOriginalConstraintNode* original, const std::string_view& field) {
- if (const auto it = mapping.find(original); mapping.cend() != it) {
- TReversePartType reverseMap;
- reverseMap.reserve(it->second.size());
- for (const auto& map : it->second)
- reverseMap[map.second].insert_unique(map.first);
- const auto rename = [&](const TPartOfConstraintBase::TPathType& path) {
- const auto& set = reverseMap[path];
- std::vector<TPartOfConstraintBase::TPathType> out(set.cbegin(), set.cend());
- if (!field.empty())
- std::for_each(out.begin(), out.end(), [&field](TPartOfConstraintBase::TPathType& path) { path.emplace_front(field); });
- return out;
- };
- return it->first->RenameFields(ctx, rename);
- }
- return nullptr;
- }
- template<class TOriginalConstraintNode>
- const TOriginalConstraintNode*
- TPartOfConstraintNode<TOriginalConstraintNode>::MakeComplete(TExprContext& ctx, const TPartOfConstraintNode* partial, const TOriginalConstraintNode* original, const std::string_view& field) {
- if (!partial)
- return nullptr;
- return MakeComplete(ctx, partial->GetColumnMapping(), original, field);
- }
- template<class TOriginalConstraintNode>
- bool TPartOfConstraintNode<TOriginalConstraintNode>::IsApplicableToType(const TTypeAnnotationNode& type) const {
- if (ETypeAnnotationKind::Dict == type.GetKind())
- return true; // TODO: check for dict.
- const auto itemType = GetSeqItemType(&type);
- const auto& actualType = itemType ? *itemType : type;
- return std::all_of(Mapping_.cbegin(), Mapping_.cend(), [&actualType](const typename TMapType::value_type& pair) {
- return std::all_of(pair.second.cbegin(), pair.second.cend(), [&actualType](const typename TPartType::value_type& part) { return bool(TPartOfConstraintBase::GetSubTypeByPath(part.first, actualType)); });
- });
- }
- template class TPartOfConstraintNode<TSortedConstraintNode>;
- template class TPartOfConstraintNode<TChoppedConstraintNode>;
- template class TPartOfConstraintNode<TUniqueConstraintNode>;
- template class TPartOfConstraintNode<TDistinctConstraintNode>;
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- TEmptyConstraintNode::TEmptyConstraintNode(TExprContext& ctx)
- : TConstraintNode(ctx, Name())
- {
- }
- TEmptyConstraintNode::TEmptyConstraintNode(TEmptyConstraintNode&& constr)
- : TConstraintNode(std::move(static_cast<TConstraintNode&>(constr)))
- {
- }
- TEmptyConstraintNode::TEmptyConstraintNode(TExprContext& ctx, const NYT::TNode& serialized)
- : TConstraintNode(ctx, Name())
- {
- YQL_ENSURE(serialized.IsEntity(), "Unexpected serialized content of " << Name() << " constraint");
- }
- bool TEmptyConstraintNode::Equals(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (GetHash() != node.GetHash()) {
- return false;
- }
- return GetName() == node.GetName();
- }
- void TEmptyConstraintNode::ToJson(NJson::TJsonWriter& out) const {
- out.Write(true);
- }
- NYT::TNode TEmptyConstraintNode::ToYson() const {
- return NYT::TNode::CreateEntity();
- }
- const TEmptyConstraintNode* TEmptyConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& /*ctx*/) {
- if (constraints.empty()) {
- return nullptr;
- }
- auto empty = constraints.front()->GetConstraint<TEmptyConstraintNode>();
- if (AllOf(constraints.cbegin() + 1, constraints.cend(), [empty](const TConstraintSet* c) { return c->GetConstraint<TEmptyConstraintNode>() == empty; })) {
- return empty;
- }
- return nullptr;
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- TVarIndexConstraintNode::TVarIndexConstraintNode(TExprContext& ctx, const TMapType& mapping)
- : TConstraintNode(ctx, Name())
- , Mapping_(mapping)
- {
- Hash_ = MurmurHash<ui64>(Mapping_.data(), Mapping_.size() * sizeof(TMapType::value_type), Hash_);
- YQL_ENSURE(!Mapping_.empty());
- }
- TVarIndexConstraintNode::TVarIndexConstraintNode(TExprContext& ctx, const TVariantExprType& itemType)
- : TVarIndexConstraintNode(ctx, itemType.GetUnderlyingType()->Cast<TTupleExprType>()->GetSize())
- {
- }
- TVarIndexConstraintNode::TVarIndexConstraintNode(TExprContext& ctx, size_t mapItemsCount)
- : TConstraintNode(ctx, Name())
- {
- YQL_ENSURE(mapItemsCount > 0);
- for (size_t i = 0; i < mapItemsCount; ++i) {
- Mapping_.push_back(std::make_pair(i, i));
- }
- Hash_ = MurmurHash<ui64>(Mapping_.data(), Mapping_.size() * sizeof(TMapType::value_type), Hash_);
- YQL_ENSURE(!Mapping_.empty());
- }
- TVarIndexConstraintNode::TVarIndexConstraintNode(TExprContext& ctx, const NYT::TNode& serialized)
- : TVarIndexConstraintNode(ctx, NodeToMapping(serialized))
- {
- }
- TVarIndexConstraintNode::TVarIndexConstraintNode(TVarIndexConstraintNode&& constr)
- : TConstraintNode(std::move(static_cast<TConstraintNode&>(constr)))
- , Mapping_(std::move(constr.Mapping_))
- {
- }
- TVarIndexConstraintNode::TMapType TVarIndexConstraintNode::NodeToMapping(const NYT::TNode& serialized) {
- TMapType mapping;
- try {
- for (const auto& pair: serialized.AsList()) {
- mapping.insert(std::make_pair<ui32, ui32>(pair.AsList().front().AsUint64(), pair.AsList().back().AsUint64()));
- }
- } catch (...) {
- YQL_ENSURE(false, "Cannot deserialize " << Name() << " constraint: " << CurrentExceptionMessage());
- }
- return mapping;
- }
- TVarIndexConstraintNode::TMapType TVarIndexConstraintNode::GetReverseMapping() const {
- TMapType reverseMapping;
- std::transform(Mapping_.cbegin(), Mapping_.cend(),
- std::back_inserter(reverseMapping),
- [] (const std::pair<size_t, size_t>& p) { return std::make_pair(p.second, p.first); }
- );
- ::Sort(reverseMapping);
- return reverseMapping;
- }
- bool TVarIndexConstraintNode::Equals(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (GetHash() != node.GetHash()) {
- return false;
- }
- if (GetName() != node.GetName()) {
- return false;
- }
- if (auto c = dynamic_cast<const TVarIndexConstraintNode*>(&node)) {
- return GetIndexMapping() == c->GetIndexMapping();
- }
- return false;
- }
- bool TVarIndexConstraintNode::Includes(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (GetName() != node.GetName()) {
- return false;
- }
- if (auto c = dynamic_cast<const TVarIndexConstraintNode*>(&node)) {
- for (auto& pair: c->Mapping_) {
- if (auto p = Mapping_.FindPtr(pair.first)) {
- if (*p != pair.second) {
- return false;
- }
- } else {
- return false;
- }
- }
- return true;
- }
- return false;
- }
- void TVarIndexConstraintNode::Out(IOutputStream& out) const {
- TConstraintNode::Out(out);
- out.Write('(');
- bool first = true;
- for (auto& item: Mapping_) {
- if (!first) {
- out.Write(',');
- }
- out << item.first << ':' << item.second;
- first = false;
- }
- out.Write(')');
- }
- void TVarIndexConstraintNode::ToJson(NJson::TJsonWriter& out) const {
- out.OpenArray();
- for (const auto& [resultIndex, originalIndex]: Mapping_) {
- out.OpenArray();
- out.Write(resultIndex);
- out.Write(originalIndex);
- out.CloseArray();
- }
- out.CloseArray();
- }
- NYT::TNode TVarIndexConstraintNode::ToYson() const {
- return std::accumulate(Mapping_.cbegin(), Mapping_.cend(),
- NYT::TNode::CreateList(),
- [](NYT::TNode node, const TMapType::value_type& p) {
- return std::move(node).Add(NYT::TNode::CreateList().Add(p.first).Add(p.second));
- });
- }
- const TVarIndexConstraintNode* TVarIndexConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) {
- if (constraints.empty()) {
- return nullptr;
- }
- if (constraints.size() == 1) {
- return constraints.front()->GetConstraint<TVarIndexConstraintNode>();
- }
- TVarIndexConstraintNode::TMapType mapping;
- for (size_t i = 0; i < constraints.size(); ++i) {
- if (auto varIndex = constraints[i]->GetConstraint<TVarIndexConstraintNode>()) {
- mapping.insert(varIndex->GetIndexMapping().begin(), varIndex->GetIndexMapping().end());
- }
- }
- if (mapping.empty()) {
- return nullptr;
- }
- ::SortUnique(mapping);
- return ctx.MakeConstraint<TVarIndexConstraintNode>(std::move(mapping));
- }
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- TMultiConstraintNode::TMultiConstraintNode(TExprContext& ctx, TMapType&& items)
- : TConstraintNode(ctx, Name())
- , Items_(std::move(items))
- {
- YQL_ENSURE(Items_.size());
- for (auto& item: Items_) {
- Hash_ = MurmurHash<ui64>(&item.first, sizeof(item.first), Hash_);
- for (auto c: item.second.GetAllConstraints()) {
- const auto itemHash = c->GetHash();
- Hash_ = MurmurHash<ui64>(&itemHash, sizeof(itemHash), Hash_);
- }
- }
- }
- TMultiConstraintNode::TMultiConstraintNode(TExprContext& ctx, ui32 index, const TConstraintSet& constraints)
- : TMultiConstraintNode(ctx, TMapType{{index, constraints}})
- {
- }
- TMultiConstraintNode::TMultiConstraintNode(TExprContext& ctx, const NYT::TNode& serialized)
- : TMultiConstraintNode(ctx, NodeToMapping(ctx, serialized))
- {
- }
- TMultiConstraintNode::TMultiConstraintNode(TMultiConstraintNode&& constr)
- : TConstraintNode(std::move(static_cast<TConstraintNode&>(constr)))
- , Items_(std::move(constr.Items_))
- {
- }
- TMultiConstraintNode::TMapType TMultiConstraintNode::NodeToMapping(TExprContext& ctx, const NYT::TNode& serialized) {
- TMapType mapping;
- try {
- for (const auto& pair: serialized.AsList()) {
- mapping.insert(std::make_pair((ui32)pair.AsList().front().AsUint64(), ctx.MakeConstraintSet(pair.AsList().back())));
- }
- } catch (...) {
- YQL_ENSURE(false, "Cannot deserialize " << Name() << " constraint: " << CurrentExceptionMessage());
- }
- return mapping;
- }
- bool TMultiConstraintNode::Equals(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (GetHash() != node.GetHash()) {
- return false;
- }
- if (GetName() != node.GetName()) {
- return false;
- }
- if (auto c = dynamic_cast<const TMultiConstraintNode*>(&node)) {
- return GetItems() == c->GetItems();
- }
- return false;
- }
- bool TMultiConstraintNode::Includes(const TConstraintNode& node) const {
- if (this == &node) {
- return true;
- }
- if (GetName() != node.GetName()) {
- return false;
- }
- if (auto m = dynamic_cast<const TMultiConstraintNode*>(&node)) {
- for (auto& item: Items_) {
- const auto it = m->Items_.find(item.first);
- if (it == m->Items_.end()) {
- if (!item.second.GetConstraint<TEmptyConstraintNode>()) {
- return false;
- }
- continue;
- }
- for (auto c: it->second.GetAllConstraints()) {
- auto cit = item.second.GetConstraint(c->GetName());
- if (!cit) {
- return false;
- }
- if (!cit->Includes(*c)) {
- return false;
- }
- }
- }
- return true;
- }
- return false;
- }
- bool TMultiConstraintNode::FilteredIncludes(const TConstraintNode& node, const THashSet<TString>& blacklist) const {
- if (this == &node) {
- return true;
- }
- if (GetName() != node.GetName()) {
- return false;
- }
- if (auto m = dynamic_cast<const TMultiConstraintNode*>(&node)) {
- for (auto& item: Items_) {
- const auto it = m->Items_.find(item.first);
- if (it == m->Items_.end()) {
- if (!item.second.GetConstraint<TEmptyConstraintNode>()) {
- return false;
- }
- continue;
- }
- for (auto c: it->second.GetAllConstraints()) {
- if (!blacklist.contains(c->GetName())) {
- const auto cit = item.second.GetConstraint(c->GetName());
- if (!cit) {
- return false;
- }
- if (!cit->Includes(*c)) {
- return false;
- }
- }
- }
- }
- return true;
- }
- return false;
- }
- void TMultiConstraintNode::Out(IOutputStream& out) const {
- TConstraintNode::Out(out);
- out.Write('(');
- bool first = true;
- for (auto& item: Items_) {
- if (!first) {
- out.Write(',');
- }
- out << item.first << ':' << '{';
- bool firstConstr = true;
- for (auto c: item.second.GetAllConstraints()) {
- if (!firstConstr) {
- out.Write(',');
- }
- out << *c;
- firstConstr = false;
- }
- out << '}';
- first = false;
- }
- out.Write(')');
- }
- void TMultiConstraintNode::ToJson(NJson::TJsonWriter& out) const {
- out.OpenMap();
- for (const auto& [index, constraintSet] : Items_) {
- out.WriteKey(ToString(index));
- constraintSet.ToJson(out);
- }
- out.CloseMap();
- }
- NYT::TNode TMultiConstraintNode::ToYson() const {
- return std::accumulate(Items_.cbegin(), Items_.cend(),
- NYT::TNode::CreateList(),
- [](NYT::TNode node, const TMapType::value_type& p) {
- return std::move(node).Add(NYT::TNode::CreateList().Add(p.first).Add(p.second.ToYson()));
- });
- }
- const TMultiConstraintNode* TMultiConstraintNode::MakeCommon(const std::vector<const TConstraintSet*>& constraints, TExprContext& ctx) {
- if (constraints.empty()) {
- return nullptr;
- } else if (constraints.size() == 1) {
- return constraints.front()->GetConstraint<TMultiConstraintNode>();
- }
- TMapType multiItems;
- for (auto c: constraints) {
- if (auto m = c->GetConstraint<TMultiConstraintNode>()) {
- multiItems.insert(m->GetItems().begin(), m->GetItems().end());
- } else if (!c->GetConstraint<TEmptyConstraintNode>()) {
- return nullptr;
- }
- }
- if (multiItems.empty()) {
- return nullptr;
- }
- multiItems.sort();
- // Remove duplicates
- // For duplicated items keep only Empty constraint
- auto cur = multiItems.begin();
- while (cur != multiItems.end()) {
- auto start = cur;
- do {
- ++cur;
- } while (cur != multiItems.end() && start->first == cur->first);
- switch (std::distance(start, cur)) {
- case 0:
- break;
- case 1:
- if (start->second.GetConstraint<TEmptyConstraintNode>()) {
- cur = multiItems.erase(start, cur);
- }
- break;
- default:
- {
- std::vector<TMapType::value_type> nonEmpty;
- std::copy_if(start, cur, std::back_inserter(nonEmpty),
- [] (const TMapType::value_type& v) {
- return !v.second.GetConstraint<TEmptyConstraintNode>();
- }
- );
- start->second.Clear();
- if (nonEmpty.empty()) {
- start->second.AddConstraint(ctx.MakeConstraint<TEmptyConstraintNode>());
- } else if (nonEmpty.size() == 1) {
- start->second = nonEmpty.front().second;
- }
- cur = multiItems.erase(start + 1, cur);
- }
- }
- }
- if (!multiItems.empty()) {
- return ctx.MakeConstraint<TMultiConstraintNode>(std::move(multiItems));
- }
- return nullptr;
- }
- const TMultiConstraintNode* TMultiConstraintNode::FilterConstraints(TExprContext& ctx, const TConstraintSet::TPredicate& predicate) const {
- auto items = Items_;
- bool hasContent = false, hasChanges = false;
- for (auto& item : items) {
- hasChanges = hasChanges || item.second.FilterConstraints(predicate);
- hasContent = hasContent || item.second;
- }
- return hasContent ? hasChanges ? ctx.MakeConstraint<TMultiConstraintNode>(std::move(items)) : this : nullptr;
- }
- } // namespace NYql
- //////////////////////////////////////////////////////////////////////////////////////////////////////////////
- template<>
- void Out<NYql::TPartOfConstraintBase::TPathType>(IOutputStream& out, const NYql::TPartOfConstraintBase::TPathType& path) {
- if (path.empty())
- out.Write('/');
- else {
- bool first = true;
- for (const auto& c : path) {
- if (first)
- first = false;
- else
- out.Write('/');
- out.Write(c);
- }
- }
- }
- template<>
- void Out<NYql::TPartOfConstraintBase::TSetType>(IOutputStream& out, const NYql::TPartOfConstraintBase::TSetType& c) {
- out.Write('{');
- bool first = true;
- for (const auto& path : c) {
- if (first)
- first = false;
- else
- out.Write(',');
- out << path;
- }
- out.Write('}');
- }
- template<>
- void Out<NYql::TPartOfConstraintBase::TSetOfSetsType>(IOutputStream& out, const NYql::TPartOfConstraintBase::TSetOfSetsType& c) {
- out.Write('{');
- bool first = true;
- for (const auto& path : c) {
- if (first)
- first = false;
- else
- out.Write(',');
- out << path;
- }
- out.Write('}');
- }
- template<>
- void Out<NYql::TConstraintNode>(IOutputStream& out, const NYql::TConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TConstraintSet>(IOutputStream& out, const NYql::TConstraintSet& s) {
- s.Out(out);
- }
- template<>
- void Out<NYql::TSortedConstraintNode>(IOutputStream& out, const NYql::TSortedConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TChoppedConstraintNode>(IOutputStream& out, const NYql::TChoppedConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TUniqueConstraintNode>(IOutputStream& out, const NYql::TUniqueConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TDistinctConstraintNode>(IOutputStream& out, const NYql::TDistinctConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TPartOfSortedConstraintNode>(IOutputStream& out, const NYql::TPartOfSortedConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TPartOfChoppedConstraintNode>(IOutputStream& out, const NYql::TPartOfChoppedConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TPartOfUniqueConstraintNode>(IOutputStream& out, const NYql::TPartOfUniqueConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TPartOfDistinctConstraintNode>(IOutputStream& out, const NYql::TPartOfDistinctConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TEmptyConstraintNode>(IOutputStream& out, const NYql::TEmptyConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TVarIndexConstraintNode>(IOutputStream& out, const NYql::TVarIndexConstraintNode& c) {
- c.Out(out);
- }
- template<>
- void Out<NYql::TMultiConstraintNode>(IOutputStream& out, const NYql::TMultiConstraintNode& c) {
- c.Out(out);
- }
|