#pragma once #include #include #include #include #include #include #include #include namespace NYql { struct TExprContext; class TTypeAnnotationNode; class TStructExprType; class TVariantExprType; class TConstraintNode { protected: TConstraintNode(TExprContext& ctx, std::string_view name); TConstraintNode(TConstraintNode&& constr); public: using TListType = std::vector; struct THash { size_t operator()(const TConstraintNode* node) const { return node->GetHash(); } }; struct TEqual { bool operator()(const TConstraintNode* one, const TConstraintNode* two) const { return one->Equals(*two); } }; struct TCompare { inline bool operator()(const TConstraintNode* l, const TConstraintNode* r) const { return l->GetName() < r->GetName(); } inline bool operator()(const std::string_view name, const TConstraintNode* r) const { return name < r->GetName(); } inline bool operator()(const TConstraintNode* l, const std::string_view name) const { return l->GetName() < name; } }; virtual ~TConstraintNode() = default; ui64 GetHash() const { return Hash_; } virtual bool Equals(const TConstraintNode& node) const = 0; virtual bool Includes(const TConstraintNode& node) const { return Equals(node); } virtual void Out(IOutputStream& out) const; virtual void ToJson(NJson::TJsonWriter& out) const = 0; virtual NYT::TNode ToYson() const = 0; virtual bool IsApplicableToType(const TTypeAnnotationNode&) const { return true; } template const T* Cast() const { static_assert(std::is_base_of::value, "Should be derived from TConstraintNode"); const auto ret = dynamic_cast(this); YQL_ENSURE(ret, "Cannot cast '" << Name_ << "' constraint to " << T::Name()); return ret; } const std::string_view& GetName() const { return Name_; } protected: ui64 Hash_; std::string_view Name_; }; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class TPartOfConstraintBase : public TConstraintNode { protected: TPartOfConstraintBase(TExprContext& ctx, std::string_view name); TPartOfConstraintBase(TPartOfConstraintBase&& constr) = default; public: // Path to constraint components through nested static containers (Struct/Tuple/Multi). // All elements is struct member name or tuple element index. // Empty deque means root. using TPathType = std::deque; using TSetType = NSorted::TSimpleSet; using TSetOfSetsType = NSorted::TSimpleSet; using TPathFilter = std::function; using TPathReduce = std::function(const TPathType&)>; static const TTypeAnnotationNode* GetSubTypeByPath(const TPathType& path, const TTypeAnnotationNode& type); static NYT::TNode PathToNode(const TPathType& path); static NYT::TNode SetToNode(const TSetType& set, bool withShortcut); static NYT::TNode SetOfSetsToNode(const TSetOfSetsType& sets); static TPathType NodeToPath(TExprContext& ctx, const NYT::TNode& node); static TSetType NodeToSet(TExprContext& ctx, const NYT::TNode& node); static TSetOfSetsType NodeToSetOfSets(TExprContext& ctx, const NYT::TNode& node); protected: virtual const TPartOfConstraintBase* DoFilterFields(TExprContext& ctx, const TPathFilter& predicate) const = 0; virtual const TPartOfConstraintBase* DoRenameFields(TExprContext& ctx, const TPathReduce& reduce) const = 0; static bool HasDuplicates(const TSetOfSetsType& sets); }; class TConstraintWithFieldsNode : public TPartOfConstraintBase { protected: TConstraintWithFieldsNode(TExprContext& ctx, std::string_view name); TConstraintWithFieldsNode(TConstraintWithFieldsNode&& constr) = default; // Split fields with static containers (Struct/Tuple/Multi) on separeted list of all components. // As example (/tuple_of_two_elements) -> (/tuple_of_two_elements/0,/tuple_of_two_elements/1) virtual const TConstraintWithFieldsNode* DoGetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const = 0; // Combine list of separeted fields of static containers (Struct/Tuple/Multi) in single path if possible. // As example (/tuple_of_two_elements/0,/tuple_of_two_elements/1) -> (/tuple_of_two_elements) virtual const TConstraintWithFieldsNode* DoGetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const = 0; public: // Leaves in the set of references only those that currently contain some complete constraint. Basically all or nothing. virtual void FilterUncompleteReferences(TPartOfConstraintBase::TSetType& references) const = 0; }; template class TPartOfConstraintBaseT : public TPartOfConstraintBase { protected: TPartOfConstraintBaseT(TExprContext& ctx, std::string_view name) : TPartOfConstraintBase(ctx, name) {} TPartOfConstraintBaseT(TPartOfConstraintBaseT&& constr) = default; public: const TTnheritConstraint* FilterFields(TExprContext& ctx, const TPathFilter& predicate) const { return static_cast(DoFilterFields(ctx, predicate)); } const TTnheritConstraint* RenameFields(TExprContext& ctx, const TPathReduce& reduce) const { return static_cast(DoRenameFields(ctx, reduce)); } }; template class TConstraintWithFieldsT : public TConstraintWithFieldsNode { protected: TConstraintWithFieldsT(TExprContext& ctx, std::string_view name) : TConstraintWithFieldsNode(ctx, name) {} TConstraintWithFieldsT(TConstraintWithFieldsT&& constr) = default; public: const TTnheritConstraint* FilterFields(TExprContext& ctx, const TPathFilter& predicate) const { return static_cast(DoFilterFields(ctx, predicate)); } const TTnheritConstraint* RenameFields(TExprContext& ctx, const TPathReduce& reduce) const { return static_cast(DoRenameFields(ctx, reduce)); } const TTnheritConstraint* GetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const { return static_cast(DoGetComplicatedForType(type, ctx)); } const TTnheritConstraint* GetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const { return static_cast(DoGetSimplifiedForType(type, ctx)); } }; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class TConstraintSet { public: TConstraintSet() = default; TConstraintSet(const TConstraintSet&) = default; TConstraintSet(TConstraintSet&&) = default; TConstraintSet& operator =(const TConstraintSet&) = default; TConstraintSet& operator =(TConstraintSet&&) = default; template const TConstraintType* GetConstraint() const { auto res = GetConstraint(TConstraintType::Name()); return res ? res->template Cast() : nullptr; } template const TConstraintType* RemoveConstraint() { auto res = RemoveConstraint(TConstraintType::Name()); return res ? res->template Cast() : nullptr; } const TConstraintNode::TListType& GetAllConstraints() const { return Constraints_; } void Clear() { Constraints_.clear(); } explicit operator bool() const { return !Constraints_.empty(); } bool operator ==(const TConstraintSet& s) const { return Constraints_ == s.Constraints_; } bool operator !=(const TConstraintSet& s) const { return Constraints_ != s.Constraints_; } const TConstraintNode* GetConstraint(std::string_view name) const; void AddConstraint(const TConstraintNode* node); const TConstraintNode* RemoveConstraint(std::string_view name); using TPredicate = std::function; bool FilterConstraints(const TPredicate& predicate); void Out(IOutputStream& out) const; void ToJson(NJson::TJsonWriter& writer) const; NYT::TNode ToYson() const; private: TConstraintNode::TListType Constraints_; }; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class TSortedConstraintNode final: public TConstraintWithFieldsT { public: using TContainerType = TSmallVec>; private: friend struct TExprContext; TSortedConstraintNode(TExprContext& ctx, TContainerType&& content); TSortedConstraintNode(TExprContext& ctx, const NYT::TNode& serialized); TSortedConstraintNode(TSortedConstraintNode&& constr); public: static constexpr std::string_view Name() { return "Sorted"; } const TContainerType& GetContent() const { return Content_; } TSetType GetFullSet() const; bool Equals(const TConstraintNode& node) const override; bool Includes(const TConstraintNode& node) const override; void Out(IOutputStream& out) const override; void ToJson(NJson::TJsonWriter& out) const override; NYT::TNode ToYson() const override; bool IsPrefixOf(const TSortedConstraintNode& node) const; bool StartsWith(const TSetType& prefix) const; const TSortedConstraintNode* CutPrefix(size_t newPrefixLength, TExprContext& ctx) const; void FilterUncompleteReferences(TSetType& references) const final; static const TSortedConstraintNode* MakeCommon(const std::vector& constraints, TExprContext& ctx); const TSortedConstraintNode* MakeCommon(const TSortedConstraintNode* other, TExprContext& ctx) const; bool IsApplicableToType(const TTypeAnnotationNode& type) const override; private: const TConstraintWithFieldsNode* DoFilterFields(TExprContext& ctx, const TPathFilter& predicate) const final; const TConstraintWithFieldsNode* DoRenameFields(TExprContext& ctx, const TPathReduce& reduce) const final; const TConstraintWithFieldsNode* DoGetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const final; const TConstraintWithFieldsNode* DoGetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const final; static TContainerType NodeToContainer(TExprContext& ctx, const NYT::TNode& serialized); TContainerType Content_; }; class TChoppedConstraintNode final: public TConstraintWithFieldsT { private: friend struct TExprContext; TChoppedConstraintNode(TExprContext& ctx, TSetOfSetsType&& sets); TChoppedConstraintNode(TExprContext& ctx, const TSetType& keys); TChoppedConstraintNode(TExprContext& ctx, const NYT::TNode& serialized); TChoppedConstraintNode(TChoppedConstraintNode&& constr); public: static constexpr std::string_view Name() { return "Chopped"; } const TSetOfSetsType& GetContent() const { return Sets_; } TSetType GetFullSet() const; bool Equals(const TConstraintNode& node) const override; bool Includes(const TConstraintNode& node) const override; void Out(IOutputStream& out) const override; void ToJson(NJson::TJsonWriter& out) const override; NYT::TNode ToYson() const override; bool Equals(const TSetType& prefix) const; void FilterUncompleteReferences(TSetType& references) const final; static const TChoppedConstraintNode* MakeCommon(const std::vector& constraints, TExprContext& ctx); const TChoppedConstraintNode* MakeCommon(const TChoppedConstraintNode* other, TExprContext& ctx) const; bool IsApplicableToType(const TTypeAnnotationNode& type) const override; private: const TConstraintWithFieldsNode* DoFilterFields(TExprContext& ctx, const TPathFilter& predicate) const final; const TConstraintWithFieldsNode* DoRenameFields(TExprContext& ctx, const TPathReduce& reduce) const final; const TConstraintWithFieldsNode* DoGetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const final; const TConstraintWithFieldsNode* DoGetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const final; static TSetOfSetsType NodeToSets(TExprContext& ctx, const NYT::TNode& serialized); TSetOfSetsType Sets_; }; template class TUniqueConstraintNodeBase final: public TConstraintWithFieldsT> { public: using TBase = TConstraintWithFieldsT>; using TContentType = NSorted::TSimpleSet; protected: friend struct TExprContext; TUniqueConstraintNodeBase(TExprContext& ctx, const std::vector& columns); TUniqueConstraintNodeBase(TExprContext& ctx, TContentType&& sets); TUniqueConstraintNodeBase(TExprContext& ctx, const NYT::TNode& serialized); TUniqueConstraintNodeBase(TUniqueConstraintNodeBase&& constr); public: static constexpr std::string_view Name() { return Distinct ? "Distinct" : "Unique"; } const TContentType& GetContent() const { return Content_; } TPartOfConstraintBase::TSetType GetFullSet() const; bool Equals(const TConstraintNode& node) const override; bool Includes(const TConstraintNode& node) const override; void Out(IOutputStream& out) const override; void ToJson(NJson::TJsonWriter& out) const override; NYT::TNode ToYson() const override; bool IsOrderBy(const TSortedConstraintNode& sorted) const; bool ContainsCompleteSet(const std::vector& columns) const; void FilterUncompleteReferences(TPartOfConstraintBase::TSetType& references) const final; static const TUniqueConstraintNodeBase* MakeCommon(const std::vector& constraints, TExprContext& ctx); const TUniqueConstraintNodeBase* MakeCommon(const TUniqueConstraintNodeBase* other, TExprContext& ctx) const; static const TUniqueConstraintNodeBase* Merge(const TUniqueConstraintNodeBase* one, const TUniqueConstraintNodeBase* two, TExprContext& ctx); bool IsApplicableToType(const TTypeAnnotationNode& type) const override; private: const TConstraintWithFieldsNode* DoFilterFields(TExprContext& ctx, const TPartOfConstraintBase::TPathFilter& predicate) const final; const TConstraintWithFieldsNode* DoRenameFields(TExprContext& ctx, const TPartOfConstraintBase::TPathReduce& reduce) const final; const TConstraintWithFieldsNode* DoGetComplicatedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const final; const TConstraintWithFieldsNode* DoGetSimplifiedForType(const TTypeAnnotationNode& type, TExprContext& ctx) const final; static TConstraintWithFieldsNode::TSetOfSetsType ColumnsListToSets(const std::vector& columns); static TContentType DedupSets(TContentType&& sets); static TContentType MakeCommonContent(const TContentType& one, const TContentType& two); static TContentType NodeToContent(TExprContext& ctx, const NYT::TNode& serialized); TContentType Content_; }; using TUniqueConstraintNode = TUniqueConstraintNodeBase; using TDistinctConstraintNode = TUniqueConstraintNodeBase; template class TPartOfConstraintNode final: public TPartOfConstraintBaseT> { public: using TBase = TPartOfConstraintBaseT>; using TMainConstraint = TOriginalConstraintNode; using TPartType = NSorted::TSimpleMap; using TReversePartType = NSorted::TSimpleMap>; using TMapType = std::unordered_map; private: friend struct TExprContext; TPartOfConstraintNode(TPartOfConstraintNode&& constr); TPartOfConstraintNode(TExprContext& ctx, TMapType&& mapping); TPartOfConstraintNode(TExprContext& ctx, const NYT::TNode& serialized); public: static constexpr std::string_view Name(); const TMapType& GetColumnMapping() const; TMapType GetColumnMapping(const std::string_view& asField) const; TMapType GetColumnMapping(TExprContext& ctx, const std::string_view& prefix) const; bool Equals(const TConstraintNode& node) const override; bool Includes(const TConstraintNode& node) const override; void Out(IOutputStream& out) const override; void ToJson(NJson::TJsonWriter& out) const override; NYT::TNode ToYson() const override; const TPartOfConstraintNode* ExtractField(TExprContext& ctx, const std::string_view& field) const; const TPartOfConstraintNode* CompleteOnly(TExprContext& ctx) const; const TPartOfConstraintNode* RemoveOriginal(TExprContext& ctx, const TMainConstraint* original) const; static const TPartOfConstraintNode* MakeCommon(const std::vector& constraints, TExprContext& ctx); static TMapType GetCommonMapping(const TMainConstraint* complete, const TPartOfConstraintNode* incomplete = nullptr, const std::string_view& field = {}); static void UniqueMerge(TMapType& output, TMapType&& input); static TMapType ExtractField(const TMapType& mapping, const std::string_view& field); static const TMainConstraint* MakeComplete(TExprContext& ctx, const TMapType& mapping, const TMainConstraint* original, const std::string_view& field = {}); static const TMainConstraint* MakeComplete(TExprContext& ctx, const TPartOfConstraintNode* partial, const TMainConstraint* original, const std::string_view& field = {}); bool IsApplicableToType(const TTypeAnnotationNode& type) const override; private: const TPartOfConstraintBase* DoFilterFields(TExprContext& ctx, const TPartOfConstraintBase::TPathFilter& predicate) const final; const TPartOfConstraintBase* DoRenameFields(TExprContext& ctx, const TPartOfConstraintBase::TPathReduce& reduce) const final; TMapType Mapping_; }; using TPartOfSortedConstraintNode = TPartOfConstraintNode; using TPartOfChoppedConstraintNode = TPartOfConstraintNode; using TPartOfUniqueConstraintNode = TPartOfConstraintNode; using TPartOfDistinctConstraintNode = TPartOfConstraintNode; template<> constexpr std::string_view TPartOfSortedConstraintNode::Name() { return "PartOfSorted"; } template<> constexpr std::string_view TPartOfChoppedConstraintNode::Name() { return "PartOfChopped"; } template<> constexpr std::string_view TPartOfUniqueConstraintNode::Name() { return "PartOfUnique"; } template<> constexpr std::string_view TPartOfDistinctConstraintNode::Name() { return "PartOfDistinct"; } class TEmptyConstraintNode final: public TConstraintNode { protected: friend struct TExprContext; TEmptyConstraintNode(TExprContext& ctx); TEmptyConstraintNode(TExprContext& ctx, const NYT::TNode& serialized); TEmptyConstraintNode(TEmptyConstraintNode&& constr); public: static constexpr std::string_view Name() { return "Empty"; } bool Equals(const TConstraintNode& node) const override; void ToJson(NJson::TJsonWriter& out) const override; NYT::TNode ToYson() const override; static const TEmptyConstraintNode* MakeCommon(const std::vector& constraints, TExprContext& ctx); }; class TVarIndexConstraintNode final: public TConstraintNode { public: using TMapType = NSorted::TSimpleMap; protected: friend struct TExprContext; TVarIndexConstraintNode(TExprContext& ctx, const TMapType& mapping); TVarIndexConstraintNode(TExprContext& ctx, const TVariantExprType& itemType); TVarIndexConstraintNode(TExprContext& ctx, size_t mapItemsCount); TVarIndexConstraintNode(TExprContext& ctx, const NYT::TNode& serialized); TVarIndexConstraintNode(TVarIndexConstraintNode&& constr); public: static constexpr std::string_view Name() { return "VarIndex"; } // multimap: result index -> {original indices} const TMapType& GetIndexMapping() const { return Mapping_; } // original index -> {result indices} TMapType GetReverseMapping() const; bool Equals(const TConstraintNode& node) const override; bool Includes(const TConstraintNode& node) const override; void Out(IOutputStream& out) const override; void ToJson(NJson::TJsonWriter& out) const override; NYT::TNode ToYson() const override; static const TVarIndexConstraintNode* MakeCommon(const std::vector& constraints, TExprContext& ctx); private: static TMapType NodeToMapping(const NYT::TNode& serialized); TMapType Mapping_; }; class TMultiConstraintNode final: public TConstraintNode { public: using TMapType = NSorted::TSimpleMap; public: TMultiConstraintNode(TExprContext& ctx, TMapType&& items); TMultiConstraintNode(TExprContext& ctx, ui32 index, const TConstraintSet& constraints); TMultiConstraintNode(TExprContext& ctx, const NYT::TNode& serialized); TMultiConstraintNode(TMultiConstraintNode&& constr); public: static constexpr std::string_view Name() { return "Multi"; } const TMapType& GetItems() const { return Items_; } const TConstraintSet* GetItem(ui32 index) const { return Items_.FindPtr(index); } bool Equals(const TConstraintNode& node) const override; bool Includes(const TConstraintNode& node) const override; void Out(IOutputStream& out) const override; void ToJson(NJson::TJsonWriter& out) const override; NYT::TNode ToYson() const override; static const TMultiConstraintNode* MakeCommon(const std::vector& constraints, TExprContext& ctx); const TMultiConstraintNode* FilterConstraints(TExprContext& ctx, const TConstraintSet::TPredicate& predicate) const; bool FilteredIncludes(const TConstraintNode& node, const THashSet& blacklist) const; private: static TMapType NodeToMapping(TExprContext& ctx, const NYT::TNode& serialized); TMapType Items_; }; } // namespace NYql