#include "node.h" #include "context.h" #include "list_builtin.h" #include #include #include #include #include #include #include #include #include #include #include using namespace NYql; namespace NSQLTranslationV0 { class TGroupingNode final: public TAstListNode { public: TGroupingNode(TPosition pos, const TVector& args) : TAstListNode(pos) , Args(args) {} bool DoInit(TContext& ctx, ISource* src) final { if (!src) { ctx.Error(Pos) << "Grouping function should have source"; return false; } TVector columns; columns.reserve(Args.size()); for (const auto& node: Args) { auto namePtr = node->GetColumnName(); if (!namePtr || !*namePtr) { ctx.Error(Pos) << "Grouping function should use columns as arguments"; return false; } const auto column = *namePtr; ISource* composite = src->GetCompositeSource(); if (!src->IsGroupByColumn(column) && !src->IsAlias(EExprSeat::GroupBy, column) && (!composite || !composite->IsGroupByColumn(column))) { ctx.Error(node->GetPos()) << "Column '" << column << "' not used as grouping column"; return false; } columns.emplace_back(column); } ui64 hint; if (!src->CalculateGroupingHint(ctx, columns, hint)) { return false; } Nodes.push_back(BuildAtom(Pos, "Uint64")); Nodes.push_back(BuildQuotedAtom(Pos, IntToString<10>(hint))); return TAstListNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TGroupingNode(Pos, Args); } private: const TVector Args; }; class TBasicAggrFunc final: public TAstListNode { public: TBasicAggrFunc(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector& args) : TAstListNode(pos) , Name(name) , Aggr(aggr) , Args(args) {} TCiString GetName() const { return Name; } bool DoInit(TContext& ctx, ISource* src) final { if (!src) { ctx.Error(Pos) << "Unable to use aggregation function '" << Name << "' without data source"; return false; } if (!DoInitAggregation(ctx, src)) { return false; } return TAstListNode::DoInit(ctx, src); } TNodePtr DoClone() const final { TAggregationPtr aggrClone = static_cast(Aggr->Clone().Release()); return new TBasicAggrFunc(Pos, Name, aggrClone, CloneContainer(Args)); } TAggregationPtr GetAggregation() const override { return Aggr; } private: bool DoInitAggregation(TContext& ctx, ISource* src) { if (!Aggr->InitAggr(ctx, false, src, *this, Args)) { return false; } return src->AddAggregation(ctx, Aggr); } void DoUpdateState() const final { State.Set(ENodeState::Const, Args.front()->IsConstant()); State.Set(ENodeState::Aggregated); } protected: const TString Name; TAggregationPtr Aggr; TVector Args; }; class TBasicAggrFactory final : public TAstListNode { public: TBasicAggrFactory(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector& args) : TAstListNode(pos) , Name(name) , Aggr(aggr) , Args(args) {} TCiString GetName() const { return Name; } bool DoInit(TContext& ctx, ISource* src) final { if (!DoInitAggregation(ctx)) { return false; } auto factory = Aggr->AggregationTraitsFactory(); auto apply = Y("Apply", factory, Y("ListType", "type")); auto columnIndices = Aggr->GetFactoryColumnIndices(); if (columnIndices.size() == 1) { apply = L(apply, "extractor"); } else { // make several extractors from main that returns a tuple for (ui32 arg = 0; arg < columnIndices.size(); ++arg) { auto partial = BuildLambda(Pos, Y("row"), Y("Nth", Y("Apply", "extractor", "row"), Q(ToString(columnIndices[arg])))); apply = L(apply, partial); } } Aggr->AddFactoryArguments(apply); Lambda = BuildLambda(Pos, Y("type", "extractor"), apply); return TAstListNode::DoInit(ctx, src); } TAstNode* Translate(TContext& ctx) const override { return Lambda->Translate(ctx); } TNodePtr DoClone() const final { TAggregationPtr aggrClone = static_cast(Aggr->Clone().Release()); return new TBasicAggrFactory(Pos, Name, aggrClone, CloneContainer(Args)); } TAggregationPtr GetAggregation() const override { return Aggr; } private: bool DoInitAggregation(TContext& ctx) { return Aggr->InitAggr(ctx, true, nullptr, *this, Args); } protected: const TString Name; TAggregationPtr Aggr; TVector Args; TNodePtr Lambda; }; typedef THolder TAggrFuncPtr; class TLiteralStringAtom: public INode { public: TLiteralStringAtom(TPosition pos, TNodePtr node, const TString& info) : INode(pos) , Node(node) , Info(info) { } bool DoInit(TContext& ctx, ISource* src) override { Y_UNUSED(src); if (!Node) { ctx.Error(Pos) << Info; return false; } if (!Node->Init(ctx, src)) { return false; } Atom = MakeAtomFromExpression(ctx, Node).Build(); return true; } TAstNode* Translate(TContext& ctx) const override { return Atom->Translate(ctx); } TPtr DoClone() const final { return {}; } private: TNodePtr Node; TNodePtr Atom; TString Info; }; class TYqlAsAtom: public TLiteralStringAtom { public: TYqlAsAtom(TPosition pos, const TVector& args) : TLiteralStringAtom(pos, args.size() == 1 ? args[0] : nullptr, "Literal string is required as argument") { } }; class TYqlData: public TCallNode { public: TYqlData(TPosition pos, const TString& type, const TVector& args) : TCallNode(pos, type, 1, 1, args) { } bool DoInit(TContext& ctx, ISource* src) override { auto slot = NUdf::FindDataSlot(GetOpName()); if (!slot) { ctx.Error(Pos) << "Unexpected type " << GetOpName(); return false; } if (*slot == NUdf::EDataSlot::Decimal) { MinArgs = MaxArgs = 3; } if (!ValidateArguments(ctx)) { return false; } auto stringNode = Args[0]; auto atom = stringNode->GetLiteral("String"); if (!atom) { ctx.Error(Pos) << "Expected literal string as argument in " << GetOpName() << " function"; return false; } TString value; if (*slot == NUdf::EDataSlot::Decimal) { const auto precision = Args[1]->GetLiteral("Int32"); const auto scale = Args[2]->GetLiteral("Int32"); if (!NKikimr::NMiniKQL::IsValidDecimal(*atom)) { ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); return false; } ui8 stub; if (!(precision && TryFromString(*precision, stub))) { ctx.Error(Pos) << "Invalid precision " << (precision ? precision->Quote() : "") << " for type " << GetOpName(); return false; } if (!(scale && TryFromString(*scale, stub))) { ctx.Error(Pos) << "Invalid scale " << (scale ? scale->Quote() : "") << " for type " << GetOpName(); return false; } Args[0] = BuildQuotedAtom(GetPos(), *atom); Args[1] = BuildQuotedAtom(GetPos(), *precision); Args[2] = BuildQuotedAtom(GetPos(), *scale); return TCallNode::DoInit(ctx, src); } else if (NUdf::GetDataTypeInfo(*slot).Features & (NUdf::DateType | NUdf::TzDateType | NUdf::TimeIntervalType)) { const auto out = NKikimr::NMiniKQL::ValueFromString(*slot, *atom); if (!out) { ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); return false; } switch (*slot) { case NUdf::EDataSlot::Date: case NUdf::EDataSlot::TzDate: value = ToString(out.Get()); break; case NUdf::EDataSlot::Datetime: case NUdf::EDataSlot::TzDatetime: value = ToString(out.Get()); break; case NUdf::EDataSlot::Timestamp: case NUdf::EDataSlot::TzTimestamp: value = ToString(out.Get()); break; case NUdf::EDataSlot::Interval: value = ToString(out.Get()); if ('T' == atom->back()) { ctx.Warning(Pos, TIssuesIds::YQL_DEPRECATED_INTERVAL_CONSTANT) << "Time prefix 'T' at end of interval constant"; } break; default: Y_ABORT("Unexpected data slot"); } if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) { value += ","; value += NKikimr::NMiniKQL::GetTimezoneIANAName(out.GetTimezoneId()); } } else if (NUdf::EDataSlot::Uuid == *slot) { char out[0x10]; if (!NKikimr::NMiniKQL::ParseUuid(*atom, out)) { ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); return false; } value.assign(out, sizeof(out)); } else { if (!NKikimr::NMiniKQL::IsValidStringValue(*slot, *atom)) { ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName(); return false; } value = *atom; } Args[0] = BuildQuotedAtom(GetPos(), value); return TCallNode::DoInit(ctx, src); } TPtr DoClone() const final { return new TYqlData(GetPos(), OpName, CloneContainer(Args)); } }; class TTableName : public TCallNode { public: TTableName(TPosition pos, const TVector& args, const TString& cluster) : TCallNode(pos, "TableName", 0, 2, args) , Cluster(cluster) { } bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (Args.empty()) { Args.push_back(Y("TablePath", Y("DependsOn", "row"))); } if (Args.size() == 2) { auto literal = Args[1]->GetLiteral("String"); if (!literal) { ctx.Error(Args[1]->GetPos()) << "Expected literal string as second argument in TableName function"; return false; } Args[1] = BuildQuotedAtom(Args[1]->GetPos(), to_lower(*literal)); } else { if (Cluster.empty()) { ctx.Error(GetPos()) << GetOpName() << " requires either one of \"yt\"/\"kikimr\"/\"rtmr\" as second argument or current cluster name"; return false; } auto service = ctx.GetClusterProvider(Cluster); if (!service) { ctx.Error() << "Unknown cluster name: " << Cluster; return false; } Args.push_back(BuildQuotedAtom(GetPos(), to_lower(*service))); } return TCallNode::DoInit(ctx, src); } TPtr DoClone() const final { return new TTableName(GetPos(), CloneContainer(Args), Cluster); } void DoUpdateState() const override { State.Set(ENodeState::Const, false); } private: TString Cluster; }; class TYqlParseType final : public INode { public: TYqlParseType(TPosition pos, const TVector& args) : INode(pos) , Args(args) {} TAstNode* Translate(TContext& ctx) const override { if (Args.size() != 1) { ctx.Error(Pos) << "Expected 1 argument in ParseType function"; return nullptr; } auto literal = Args[0]->GetLiteral("String"); if (!literal) { ctx.Error(Args[0]->GetPos()) << "Expected literal string as argument in ParseType function"; return nullptr; } auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos()); if (!parsed) { ctx.Error(Args[0]->GetPos()) << "Failed to parse type"; return nullptr; } return parsed; } TNodePtr DoClone() const final { return new TYqlParseType(Pos, Args); } private: TVector Args; }; class TYqlAddTimezone: public TCallNode { public: TYqlAddTimezone(TPosition pos, const TVector& args) : TCallNode(pos, "AddTimezone", 2, 2, args) { } bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } Args[1] = Y("TimezoneId", Args[1]); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlAddTimezone(Pos, CloneContainer(Args)); } }; template class TYqlTypeAssert : public TCallNode { public: TYqlTypeAssert(TPosition pos, const TVector& args) : TCallNode(pos, Strict ? "EnsureType" : "EnsureConvertibleTo", 2, 3, args) { } bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } auto literal = Args[1]->GetLiteral("String"); INode::TPtr type; if (literal) { auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos()); if (!parsed) { ctx.Error(Args[1]->GetPos()) << "Failed to parse type"; return false; } type = AstNode(parsed); } else { type = Args[1]; } if (!type->Init(ctx, src)) { return false; } Args[1] = type; if (Args.size() == 3) { if (!Args[2]->Init(ctx, src)) { return false; } auto message = MakeAtomFromExpression(ctx, Args[2]).Build(); Args[2] = message; } return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlTypeAssert(Pos, Args); } }; class TFromBytes final : public TCallNode { public: TFromBytes(TPosition pos, const TVector& args) : TCallNode(pos, "FromBytes", 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[1]->Init(ctx, src)) { return false; } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TFromBytes(Pos, Args); } }; class TYqlTaggedBase : public TCallNode { public: TYqlTaggedBase(TPosition pos, const TString& opName, const TVector& args) : TCallNode(pos, opName, 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[1]->Init(ctx, src)) { return false; } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } }; class TYqlAsTagged final : public TYqlTaggedBase { public: TYqlAsTagged(TPosition pos, const TVector& args) : TYqlTaggedBase(pos, "AsTagged", args) {} TNodePtr DoClone() const final { return new TYqlAsTagged(Pos, Args); } }; class TYqlUntag final : public TYqlTaggedBase { public: TYqlUntag(TPosition pos, const TVector& args) : TYqlTaggedBase(pos, "Untag", args) {} TNodePtr DoClone() const final { return new TYqlUntag(Pos, Args); } }; class TYqlVariant final : public TCallNode { public: TYqlVariant(TPosition pos, const TVector& args) : TCallNode(pos, "Variant", 3, 3, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[1]->Init(ctx, src)) { return false; } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlVariant(Pos, Args); } }; TNodePtr BuildFileNameArgument(TPosition pos, const TNodePtr& argument) { return new TLiteralStringAtom(pos, argument, "FilePath requires string literal as parameter"); } class TYqlAtom final: public TCallNode { public: TYqlAtom(TPosition pos, const TString& opName, const TVector& args) : TCallNode(pos, opName, 1, 1, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!Args.empty()) { Args[0] = BuildFileNameArgument(ctx.Pos(), Args[0]); } return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlAtom(Pos, OpName, Args); } }; class TAddMember final: public TCallNode { public: TAddMember(TPosition pos, const TString& opName, const TVector& args) : TCallNode(pos, opName, 3, 3, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (Args.size() != 3) { ctx.Error(Pos) << OpName << " requires exactly three arguments"; return false; } for (const auto& arg : Args) { if (!arg->Init(ctx, src)) { return false; } } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TAddMember(Pos, OpName, Args); } }; class TRemoveMember final: public TCallNode { public: TRemoveMember(TPosition pos, const TString& opName, const TVector& args) : TCallNode(pos, opName, 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (Args.size() != 2) { ctx.Error(Pos) << OpName << " requires exactly two arguments"; return false; } for (const auto& arg : Args) { if (!arg->Init(ctx, src)) { return false; } } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TRemoveMember(Pos, OpName, Args); } }; class TCombineMembers final: public TCallNode { public: TCombineMembers(TPosition pos, const TString& opName, const TVector& args) : TCallNode(pos, opName, 1, -1, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (Args.empty()) { ctx.Error(Pos) << "CombineMembers requires at least one argument"; return false; } for (size_t i = 0; i < Args.size(); ++i) { Args[i] = Q(Y(Q(""), Args[i])); // flatten without prefix } return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TCombineMembers(Pos, OpName, Args); } }; class TFlattenMembers final: public TCallNode { public: TFlattenMembers(TPosition pos, const TString& opName, const TVector& args) : TCallNode(pos, opName, 1, -1, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (Args.empty()) { ctx.Error(Pos) << OpName << " requires at least one argument"; return false; } for (size_t i = 0; i < Args.size(); ++i) { if (!Args[i]->Init(ctx, src)) { return false; } if (Args[i]->GetTupleSize() == 2) { // flatten with prefix Args[i] = Q(Y( MakeAtomFromExpression(ctx, Args[i]->GetTupleElement(0)).Build(), Args[i]->GetTupleElement(1) )); } else { ctx.Error(Pos) << OpName << " requires arguments to be tuples of size 2: prefix and struct"; return false; } } return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TFlattenMembers(Pos, OpName, Args); } }; TString NormalizeTypeString(const TString& str) { auto ret = to_title(str); if (ret.StartsWith("Tz")) { ret = "Tz" + to_title(ret.substr(2)); } return ret; } static const TSet AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json", "Date", "Datetime", "Timestamp", "Interval", "Uint8", "Int8", "Uint16", "Int16", "TzDate", "TzDatetime", "TzTimestamp", "Uuid", "Decimal"}; TNodePtr GetDataTypeStringNode(TContext& ctx, TCallNode& node, unsigned argNum, TString* outTypeStrPtr = nullptr) { auto errMsgFunc = [&node, argNum]() { static std::array numToName = {{"first", "second"}}; TStringBuilder sb; sb << "At " << numToName.at(argNum) << " argument of " << node.GetOpName() << " expected type string, available one of: " << JoinRange(", ", AvailableDataTypes.begin(), AvailableDataTypes.end()) << ";"; return TString(sb); }; auto typeStringNode = node.GetArgs().at(argNum); auto typeStringPtr = typeStringNode->GetLiteral("String"); TNodePtr dataTypeNode; if (typeStringPtr) { TString typeString = NormalizeTypeString(*typeStringPtr); if (!AvailableDataTypes.contains(typeString)) { ctx.Error(typeStringNode->GetPos()) << "Bad type string: '" << typeString << "'. " << errMsgFunc(); return {}; } if (outTypeStrPtr) { *outTypeStrPtr = typeString; } dataTypeNode = typeStringNode->Q(typeString); } else { ctx.Error(typeStringNode->GetPos()) << errMsgFunc(); return {}; } return dataTypeNode; } class TYqlParseFileOp final: public TCallNode { public: TYqlParseFileOp(TPosition pos, const TVector& args) : TCallNode(pos, "ParseFile", 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0); if (!dataTypeStringNode) { return false; } auto aliasNode = BuildFileNameArgument(Args[1]->GetPos(), Args[1]); OpName = "Apply"; Args[0] = Y("Udf", Q("File.ByLines"), Y("Void"), Y("TupleType", Y("TupleType", Y("DataType", dataTypeStringNode)), Y("StructType"), Y("TupleType"))); Args[1] = Y("FilePath", aliasNode); return TCallNode::DoInit(ctx, src); } TString GetOpName() const override { return "ParseFile"; } TNodePtr DoClone() const final { return new TYqlParseFileOp(Pos, Args); } }; class TYqlDataType final : public TCallNode { public: TYqlDataType(TPosition pos, const TVector& args) : TCallNode(pos, "DataType", 1, 1, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0); if (!dataTypeStringNode) { return false; } Args[0] = dataTypeStringNode; return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlDataType(Pos, Args); } }; TNodePtr TryBuildDataType(TPosition pos, const TString& stringType) { auto normStringType = NormalizeTypeString(stringType); if (!AvailableDataTypes.contains(normStringType)) { return {}; } return new TYqlDataType(pos, {BuildLiteralRawString(pos, normStringType)}); } class TYqlResourceType final : public TCallNode { public: TYqlResourceType(TPosition pos, const TVector& args) : TCallNode(pos, "ResourceType", 1, 1, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[0]->Init(ctx, src)) { return false; } Args[0] = MakeAtomFromExpression(ctx, Args[0]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlResourceType(Pos, Args); } }; class TYqlTaggedType final : public TCallNode { public: TYqlTaggedType(TPosition pos, const TVector& args) : TCallNode(pos, "TaggedType", 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[1]->Init(ctx, src)) { return false; } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlTaggedType(Pos, Args); } }; class TYqlCallableType final : public TCallNode { public: TYqlCallableType(TPosition pos, const TVector& args) : TCallNode(pos, "CallableType", 2, -1, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!dynamic_cast(Args[0].Get())) { ui32 numOptArgs; if (!Parseui32(Args[0], numOptArgs)) { ctx.Error(Args[0]->GetPos()) << "Expected either tuple or number of optional arguments"; return false; } Args[0] = Q(Y(BuildQuotedAtom(Args[0]->GetPos(), ToString(numOptArgs)))); } if (!dynamic_cast(Args[1].Get())) { Args[1] = Q(Y(Args[1])); } for (ui32 index = 2; index < Args.size(); ++index) { if (!dynamic_cast(Args[index].Get())) { Args[index] = Q(Y(Args[index])); } } return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlCallableType(Pos, Args); } }; class TYqlTupleElementType final : public TCallNode { public: TYqlTupleElementType(TPosition pos, const TVector& args) : TCallNode(pos, "TupleElementType", 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[1]->Init(ctx, src)) { return false; } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlTupleElementType(Pos, Args); } }; class TYqlStructMemberType final : public TCallNode { public: TYqlStructMemberType(TPosition pos, const TVector& args) : TCallNode(pos, "StructMemberType", 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[1]->Init(ctx, src)) { return false; } Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build(); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlStructMemberType(Pos, Args); } }; class TYqlCallableArgumentType final : public TCallNode { public: TYqlCallableArgumentType(TPosition pos, const TVector& args) : TCallNode(pos, "CallableArgumentType", 2, 2, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } ui32 index; if (!Parseui32(Args[1], index)) { ctx.Error(Args[1]->GetPos()) << "Expected index of the callable argument"; return false; } Args[1] = BuildQuotedAtom(Args[1]->GetPos(), ToString(index)); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlCallableArgumentType(Pos, Args); } }; class TStructTypeNode : public TAstListNode { public: TStructTypeNode(TPosition pos, const TVector& exprs) : TAstListNode(pos) , Exprs(exprs) {} bool DoInit(TContext& ctx, ISource* src) override { Nodes.push_back(BuildAtom(Pos, "StructType", TNodeFlags::Default)); for (const auto& expr : Exprs) { const auto& label = expr->GetLabel(); if (!label) { ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members"; return false; } Nodes.push_back(Q(Y(Q(label), expr))); } return TAstListNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TStructTypeNode(Pos, CloneContainer(Exprs)); } private: const TVector Exprs; }; template class TYqlIf final: public TCallNode { public: TYqlIf(TPosition pos, const TVector& args) : TCallNode(pos, IsStrict ? "IfStrict" : "If", 2, 3, args) {} private: TCallNode::TPtr DoClone() const override { return new TYqlIf(GetPos(), CloneContainer(Args)); } bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } Args[0] = Y("Coalesce", Args[0], Y("Bool", Q("false"))); if (Args.size() == 2) { Args.push_back(Y("Null")); } return TCallNode::DoInit(ctx, src); } }; class TYqlSubstring final: public TCallNode { public: TYqlSubstring(TPosition pos, const TVector& args) : TCallNode(pos, "Substring", 2, 3, args) {} private: TCallNode::TPtr DoClone() const override { return new TYqlSubstring(GetPos(), CloneContainer(Args)); } bool DoInit(TContext& ctx, ISource* src) override { if (Args.size() == 2) { Args.push_back(Y("Uint32", Q(ToString(Max())))); } return TCallNode::DoInit(ctx, src); } }; class TYqlIn final: public TCallNode { public: TYqlIn(TPosition pos, const TVector& args) : TCallNode(pos, "IN", 3, 3, args) {} private: TNodePtr DoClone() const final { return new TYqlIn(Pos, CloneContainer(Args)); } bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } auto key = Args[0]; auto inNode = Args[1]; auto hints = Args[2]; if (!key->Init(ctx, src)) { return false; } if (!inNode->Init(ctx, inNode->GetSource() ? nullptr : src)) { return false; } if (inNode->GetLiteral("String")) { ctx.Error(inNode->GetPos()) << "Unable to use IN predicate with string argument, it won't search substring - " "expecting tuple, list, dict or single column table source"; return false; } if (inNode->GetTupleSize() == 1) { auto singleElement = inNode->GetTupleElement(0); // TODO: 'IN ((select ...))' is parsed exactly like 'IN (select ...)' instead of a single element tuple if (singleElement->GetSource() || singleElement->IsSelect()) { TStringBuf parenKind = singleElement->GetSource() ? "" : "external "; ctx.Warning(inNode->GetPos(), TIssuesIds::YQL_CONST_SUBREQUEST_IN_LIST) << "Using subrequest in scalar context after IN, " << "perhaps you should remove " << parenKind << "parenthesis here"; } } if (inNode->GetSource() || inNode->IsSelect()) { TVector hintElements; for (size_t i = 0; i < hints->GetTupleSize(); ++i) { hintElements.push_back(hints->GetTupleElement(i)); } auto pos = inNode->GetPos(); auto tableSourceHint = BuildTuple(pos, { BuildQuotedAtom(pos, "tableSource", NYql::TNodeFlags::Default) }); hintElements.push_back(tableSourceHint); hints = BuildTuple(pos, hintElements); } OpName = "SqlIn"; MinArgs = MaxArgs = 3; Args = { inNode->GetSource() ? inNode->GetSource() : inNode, key, hints }; return TCallNode::DoInit(ctx, src); } TString GetOpName() const override { return "IN predicate"; } }; class TYqlUdfBase : public TCallNode { public: TYqlUdfBase(TPosition pos, const TString& name) : TCallNode(pos, "Udf", 1, 1, UdfArgs(pos, name)) {} TYqlUdfBase(TPosition pos, const TString& name, const TVector& args, ui32 argsCount = 2) : TCallNode(pos, "Udf", argsCount, argsCount, UdfArgs(pos, name, &args)) {} protected: TYqlUdfBase(TPosition pos, const TString& opName, ui32 minArgs, ui32 maxArgs, const TVector& args) : TCallNode(pos, opName, minArgs, maxArgs, args) {} private: static TVector UdfArgs(TPosition pos, const TString& name, const TVector* args = nullptr) { TVector res = { BuildQuotedAtom(pos, name) }; if (args) { res.insert(res.end(), args->begin(), args->end()); } return res; } void DoUpdateState() const override { TCallNode::DoUpdateState(); State.Set(ENodeState::Aggregated, false/*!RunConfig || RunConfig->IsAggregated()*/); State.Set(ENodeState::Const, true /* FIXME: To avoid CheckAggregationLevel issue for non-const TypeOf. */); } private: TNodePtr RunConfig; }; class TYqlUdf final : public TYqlUdfBase { public: TYqlUdf(TPosition pos, const TString& name) : TYqlUdfBase(pos, name) {} TYqlUdf(TPosition pos, const TString& name, const TVector& args, ui32 argsCount = 2) : TYqlUdfBase(pos, name, args, argsCount) {} private: TYqlUdf(const TYqlUdf& other) : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args)) {} TNodePtr DoClone() const final { return new TYqlUdf(*this); } }; class TYqlTypeConfigUdf final : public TYqlUdfBase { public: TYqlTypeConfigUdf(TPosition pos, const TString& name) : TYqlUdfBase(pos, name) {} TYqlTypeConfigUdf(TPosition pos, const TString& name, const TVector& args, ui32 argsCount = 2) : TYqlUdfBase(pos, name, args, argsCount) {} private: TYqlTypeConfigUdf(const TYqlTypeConfigUdf& other) : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args)) {} bool DoInit(TContext& ctx, ISource* src) override { if (!ValidateArguments(ctx)) { return false; } if (!Args[3]->Init(ctx, src)) { return false; } Args[3] = MakeAtomFromExpression(ctx, Args[3]).Build(); return TYqlUdfBase::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TYqlTypeConfigUdf(*this); } }; class TWeakFieldOp final: public TCallNode { public: TWeakFieldOp(TPosition pos, const TVector& args) : TCallNode(pos, "WeakField", 2, 3, args) {} bool DoInit(TContext& ctx, ISource* src) override { if (!src) { ctx.Error(Pos) << GetCallExplain() << " unable use without source"; return false; } src->AllColumns(); if (!ValidateArguments(ctx)) { return false; } PrecacheState(); const auto memberPos = Args[0]->GetPos(); TVector repackArgs = {BuildAtom(memberPos, "row", NYql::TNodeFlags::Default)}; if (auto literal = Args[1]->GetLiteral("String")) { TString targetType; if (!GetDataTypeStringNode(ctx, *this, 1, &targetType)) { return false; } repackArgs.push_back(Args[1]->Q(targetType)); } else { repackArgs.push_back(Args[1]); } TVector column; auto namePtr = Args[0]->GetColumnName(); if (!namePtr || !*namePtr) { ctx.Error(Pos) << GetCallExplain() << " expect as first argument column name"; return false; } auto memberName = *namePtr; column.push_back(Args[0]->Q(*namePtr)); if (src->GetJoin() && !src->IsJoinKeysInitializing()) { const auto sourcePtr = Args[0]->GetSourceName(); if (!sourcePtr || !*sourcePtr) { ctx.Error(Pos) << GetOpName() << " required to have correlation name in case of JOIN for column at first parameter"; return false; } column.push_back(Args[0]->Q(*sourcePtr)); memberName = DotJoin(*sourcePtr, memberName); } if (!GetLabel()) { SetLabel(memberName); } repackArgs.push_back(BuildTuple(memberPos, column)); if (Args.size() == 3) { repackArgs.push_back(Args[2]); } ++MinArgs; ++MaxArgs; Args.swap(repackArgs); return TCallNode::DoInit(ctx, src); } TNodePtr DoClone() const final { return new TWeakFieldOp(Pos, Args); } }; class TTableRow final : public TAstAtomNode { public: TTableRow(TPosition pos, const TVector& args) : TTableRow(pos, args.size()) {} TTableRow(TPosition pos, ui32 argsCount) : TAstAtomNode(pos, "row", 0) , ArgsCount(argsCount) {} bool DoInit(TContext& ctx, ISource* src) override { if (!src || src->IsFake()) { ctx.Error(Pos) << "TableRow requires FROM section"; return false; } if (ArgsCount > 0) { ctx.Error(Pos) << "TableRow requires exactly 0 arguments"; return false; } src->AllColumns(); return true; } void DoUpdateState() const final { State.Set(ENodeState::Const, false); } TNodePtr DoClone() const final { return MakeIntrusive(Pos, ArgsCount); } private: ui32 ArgsCount; }; TNodePtr BuildUdfUserTypeArg(TPosition pos, const TVector& args, TNodePtr customUserType) { TVector argsTypeItems; for (auto& arg : args) { argsTypeItems.push_back(new TCallNodeImpl(pos, "TypeOf", TVector(1, arg))); } TVector userTypeItems; userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", argsTypeItems)); userTypeItems.push_back(new TCallNodeImpl(pos, "StructType", {})); if (customUserType) { userTypeItems.push_back(customUserType); } else { userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {})); } return new TCallNodeImpl(pos, "TupleType", userTypeItems); } TNodePtr BuildUdfUserTypeArg(TPosition pos, TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) { TVector userTypeItems; userTypeItems.reserve(3); userTypeItems.push_back(positionalArgs->Y("TypeOf", positionalArgs)); userTypeItems.push_back(positionalArgs->Y("TypeOf", namedArgs)); if (customUserType) { userTypeItems.push_back(customUserType); } else { userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {})); } return new TCallNodeImpl(pos, "TupleType", userTypeItems); } TVector BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector& args, TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) { if (!ctx.Settings.EnableGenericUdfs) { return {}; } TVector udfArgs; udfArgs.push_back(new TAstListNodeImpl(pos)); udfArgs[0]->Add(new TAstAtomNodeImpl(pos, "Void", 0)); if (namedArgs) { udfArgs.push_back(BuildUdfUserTypeArg(pos, positionalArgs, namedArgs, customUserType)); } else { udfArgs.push_back(BuildUdfUserTypeArg(pos, args, customUserType)); } return udfArgs; } class TCallableNode final: public INode { public: TCallableNode(TPosition pos, const TString& module, const TString& name, const TVector& args) : INode(pos) , Module(module) , Name(name) , Args(args) {} bool DoInit(TContext& ctx, ISource* src) override { if (Module == "yql") { ui32 flags; TString nameParseError; TPosition pos = Pos; TString parsedName; if (!TryStringContent(Name, parsedName, flags, nameParseError, pos)) { ctx.Error(pos) << "Failed to parse YQL: " << nameParseError; return false; } const TString yql("(" + parsedName + ")"); TAstParseResult ast = ParseAst(yql, ctx.Pool.get()); /// TODO: do not drop warnings if (ast.IsOk()) { Node = AstNode(ast.Root->GetChild(0)); } else { ctx.Error(Pos) << "Failed to parse YQL: " << ast.Issues.ToString(); return false; } if (src) { src->AllColumns(); } } else if (ctx.Settings.ModuleMapping.contains(Module)) { Node = Y("bind", Module + "_module", Q(Name)); if (src) { src->AllColumns(); } } else { TNodePtr customUserType = nullptr; if (Module == "Tensorflow" && Name == "RunBatch") { if (Args.size() > 2) { auto passThroughAtom = Q("PassThrough"); auto passThroughType = Y("StructMemberType", Y("ListItemType", Y("TypeOf", Args[1])), passThroughAtom); customUserType = Y("AddMemberType", Args[2], passThroughAtom, passThroughType); Args.erase(Args.begin() + 2); } } auto udfArgs = BuildUdfArgs(ctx, Pos, Args, nullptr, nullptr, customUserType); Node = BuildUdf(ctx, Pos, Module, Name, udfArgs); } return Node->Init(ctx, src); } TAstNode* Translate(TContext& ctx) const override { Y_DEBUG_ABORT_UNLESS(Node); return Node->Translate(ctx); } void DoUpdateState() const override { YQL_ENSURE(Node); State.Set(ENodeState::Const, Node->IsConstant()); State.Set(ENodeState::Aggregated, Node->IsAggregated()); } TNodePtr DoClone() const override { return new TCallableNode(Pos, Module, Name, Args); } private: TCiString Module; TString Name; TVector Args; TNodePtr Node; }; TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector& args) { return new TCallableNode(pos, module, name, args); } TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector& args) { auto fullName = module + "." + name; if (!args.empty()) { return new TYqlUdf(pos, fullName, args, args.size() + 1); } else { auto varName = ctx.AddSimpleUdf(fullName); return new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent); } } class TScriptUdf final: public INode { public: TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector& args) : INode(pos) , ModuleName(moduleName) , FuncName(funcName) , Args(args) {} bool DoInit(TContext& ctx, ISource* src) override { const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos; if (!isPython) { if (Args.size() != 2) { ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters"; return false; } } else { if (Args.size() < 1 || Args.size() > 2) { ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters"; return false; } } auto nameAtom = BuildQuotedAtom(Pos, FuncName); auto scriptNode = Args.back(); if (!scriptNode->Init(ctx, src)) { return false; } auto scriptStrPtr = Args.back()->GetLiteral("String"); if (scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) { scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode)); } INode::TPtr type; if (Args.size() == 2) { auto literal = Args[0]->GetLiteral("String"); if (literal) { auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos()); if (!parsed) { ctx.Error(Args[0]->GetPos()) << "Failed to parse script signature"; return false; } type = AstNode(parsed); } else { type = Args[0]; } } else { // Python supports getting functions signatures right from docstrings type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply", Y("bind", "core_module", Q("PythonFuncSignature")), Q(ModuleName), scriptNode, Y("String", nameAtom) ))); } if (!type->Init(ctx, src)) { return false; } Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode); return true; } TAstNode* Translate(TContext& ctx) const override { Y_UNUSED(ctx); Y_DEBUG_ABORT_UNLESS(Node); return Node->Translate(ctx); } void DoUpdateState() const override { State.Set(ENodeState::Const, true); } TNodePtr DoClone() const final { return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args)); } private: TString ModuleName; TString FuncName; TVector Args; TNodePtr Node; }; template class TYqlToDict final: public TCallNode { public: TYqlToDict(TPosition pos, const TString& mode, const TVector& args) : TCallNode(pos, "ToDict", 4, 4, args) , Mode(mode) {} private: TCallNode::TPtr DoClone() const override { return new TYqlToDict(GetPos(), Mode, CloneContainer(Args)); } bool DoInit(TContext& ctx, ISource* src) override { if (Args.size() != 1) { ctx.Error(Pos) << "ToDict required exactly one argument"; return false; } Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("0")))); Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("1")))); Args.push_back(Q(Y(Q(Sorted ? "Sorted" : "Hashed"), Q(Mode)))); return TCallNode::DoInit(ctx, src); } private: TString Mode; }; template class THoppingTime final: public TAstListNode { public: THoppingTime(TPosition pos, const TVector& args = {}) : TAstListNode(pos) { Y_UNUSED(args); } private: TNodePtr DoClone() const override { return new THoppingTime(GetPos()); } bool DoInit(TContext& ctx, ISource* src) override { Y_UNUSED(ctx); auto window = src->GetHoppingWindowSpec(); if (!window) { ctx.Error(Pos) << "No hopping window parameters in aggregation"; return false; } Nodes.clear(); if (!IsStart) { Add("Member", "row", Q("_yql_time")); return true; } Add("Sub", Y("Member", "row", Q("_yql_time")), window->Interval); return true; } void DoUpdateState() const override { State.Set(ENodeState::Aggregated, true); } }; class TInvalidBuiltin final: public INode { public: TInvalidBuiltin(TPosition pos, const TString& info) : INode(pos) , Info(info) { } bool DoInit(TContext& ctx, ISource*) override { ctx.Error(Pos) << Info; return false; } TAstNode* Translate(TContext&) const override { return nullptr; } TPtr DoClone() const override { return {}; } private: TString Info; }; enum EAggrFuncTypeCallback { NORMAL, WINDOW_AUTOARGS, KEY_PAYLOAD, PAYLOAD_PREDICATE, TWO_ARGS, COUNT, HISTOGRAM, LINEAR_HISTOGRAM, PERCENTILE, TOPFREQ, TOP, TOP_BY, COUNT_DISTINCT_ESTIMATE, LIST, UDAF }; struct TCoreFuncInfo { TString Name; ui32 MinArgs; ui32 MaxArgs; }; using TAggrFuncFactoryCallback = std::function& args, EAggregateMode aggMode, bool isFactory)>; using TAggrFuncFactoryCallbackMap = std::unordered_map>; using TBuiltinFactoryCallback = std::function& args)>; using TBuiltinFactoryCallbackMap = std::unordered_map>; using TCoreFuncMap = std::unordered_map>; TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback( const TString& functionName, const TString& factoryName, EAggrFuncTypeCallback type = NORMAL, const TString& functionNameOverride = TString(), const TVector& validModes = {}) { const TString realFunctionName = functionNameOverride.empty() ? functionName : functionNameOverride; return [functionName, realFunctionName, factoryName, type, validModes] (TPosition pos, const TVector& args, EAggregateMode aggMode, bool isFactory) -> INode::TPtr { if (!validModes.empty()) { if (!IsIn(validModes, aggMode)) { const TString errorText = TStringBuilder() << "Can't use " << functionName << " in " << ToString(aggMode) << " aggregation mode"; return INode::TPtr(new TInvalidBuiltin(pos, errorText)); } } TAggregationPtr factory = nullptr; switch (type) { case NORMAL: factory = BuildFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case WINDOW_AUTOARGS: factory = BuildFactoryAggregationWinAutoarg(pos, realFunctionName, factoryName, aggMode); break; case KEY_PAYLOAD: factory = BuildKeyPayloadFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case PAYLOAD_PREDICATE: factory = BuildPayloadPredicateFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case TWO_ARGS: factory = BuildTwoArgsFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case COUNT: factory = BuildCountAggregation(pos, realFunctionName, factoryName, aggMode); break; case HISTOGRAM: factory = BuildHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case LINEAR_HISTOGRAM: factory = BuildLinearHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case PERCENTILE: factory = BuildPercentileFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case TOPFREQ: factory = BuildTopFreqFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case TOP: factory = BuildTopFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case TOP_BY: factory = BuildTopFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case COUNT_DISTINCT_ESTIMATE: factory = BuildCountDistinctEstimateFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case LIST: factory = BuildListFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; case UDAF: factory = BuildUserDefinedFactoryAggregation(pos, realFunctionName, factoryName, aggMode); break; } if (isFactory) { auto realArgs = args; realArgs.erase(realArgs.begin()); // skip function name return new TBasicAggrFactory(pos, functionName, factory, realArgs); } else { return new TBasicAggrFunc(pos, functionName, factory, args); } }; } TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback( const TString& functionName, const TString& factoryName, const TVector& validModes, EAggrFuncTypeCallback type = NORMAL, const TString& functionNameOverride = TString()) { return BuildAggrFuncFactoryCallback(functionName, factoryName, type, functionNameOverride, validModes); } template TBuiltinFactoryCallback BuildSimpleBuiltinFactoryCallback() { return [] (TPosition pos, const TVector& args) -> TNodePtr { return new TType(pos, args); }; } template TBuiltinFactoryCallback BuildNamedBuiltinFactoryCallback(const TString& name) { return [name] (TPosition pos, const TVector& args) -> TNodePtr { return new TType(pos, name, args); }; } template TBuiltinFactoryCallback BuildArgcBuiltinFactoryCallback(i32 minArgs, i32 maxArgs) { return [minArgs, maxArgs] (TPosition pos, const TVector& args) -> TNodePtr { return new TType(pos, minArgs, maxArgs, args); }; } template TBuiltinFactoryCallback BuildNamedArgcBuiltinFactoryCallback(const TString& name, i32 minArgs, i32 maxArgs) { return [name, minArgs, maxArgs] (TPosition pos, const TVector& args) -> TNodePtr { return new TType(pos, name, minArgs, maxArgs, args); }; } template TBuiltinFactoryCallback BuildNamedDepsArgcBuiltinFactoryCallback(ui32 reqArgsCount, const TString& name, i32 minArgs, i32 maxArgs) { return [reqArgsCount, name, minArgs, maxArgs](TPosition pos, const TVector& args) -> TNodePtr { return new TType(reqArgsCount, pos, name, minArgs, maxArgs, args); }; } template TBuiltinFactoryCallback BuildBoolBuiltinFactoryCallback(bool arg) { return [arg] (TPosition pos, const TVector& args) -> TNodePtr { return new TType(pos, args, arg); }; } template TBuiltinFactoryCallback BuildFoldBuiltinFactoryCallback(const TString& name, const TString& defaultValue) { return [name, defaultValue] (TPosition pos, const TVector& args) -> TNodePtr { return new TType(pos, name, "Bool", defaultValue, 1, args); }; } TNodePtr MakePair(TPosition pos, const TVector& args) { TNodePtr list = new TAstListNodeImpl(pos, { args[0], args.size() > 1 ? args[1] : new TAstListNodeImpl(pos,{ new TAstAtomNodeImpl(pos, "Null", TNodeFlags::Default) }) }); return new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", TNodeFlags::Default), list }); } struct TBuiltinFuncData { const TBuiltinFactoryCallbackMap BuiltinFuncs; const TAggrFuncFactoryCallbackMap AggrFuncs; const TCoreFuncMap CoreFuncs; TBuiltinFuncData(): BuiltinFuncs(MakeBuiltinFuncs()), AggrFuncs(MakeAggrFuncs()), CoreFuncs(MakeCoreFuncs()) { } TBuiltinFactoryCallbackMap MakeBuiltinFuncs() { TBuiltinFactoryCallbackMap builtinFuncs = { // Branching {"if", BuildSimpleBuiltinFactoryCallback>()}, {"ifstrict", BuildSimpleBuiltinFactoryCallback>() }, // String builtins {"len", BuildNamedArgcBuiltinFactoryCallback("Size", 1, 1)}, {"length", BuildNamedArgcBuiltinFactoryCallback("Size", 1, 1)}, {"charlength", BuildNamedArgcBuiltinFactoryCallback("Size", 1, 1)}, {"characterlength", BuildNamedArgcBuiltinFactoryCallback("Size", 1, 1)}, {"substring", BuildSimpleBuiltinFactoryCallback()}, {"byteat", BuildNamedArgcBuiltinFactoryCallback("ByteAt", 2, 2) }, // Numeric builtins {"abs", BuildNamedArgcBuiltinFactoryCallback("Abs", 1, 1) }, {"tobytes", BuildNamedArgcBuiltinFactoryCallback("ToBytes", 1, 1) }, {"frombytes", BuildSimpleBuiltinFactoryCallback() }, // Compare builtins {"minof", BuildNamedArgcBuiltinFactoryCallback("Min", 1, -1)}, {"maxof", BuildNamedArgcBuiltinFactoryCallback("Max", 1, -1)}, {"greatest", BuildNamedArgcBuiltinFactoryCallback("Max", 1, -1)}, {"least", BuildNamedArgcBuiltinFactoryCallback("Min", 1, -1)}, {"in", BuildSimpleBuiltinFactoryCallback()}, // List builtins {"aslist", BuildNamedArgcBuiltinFactoryCallback("AsList", 1, -1)}, {"asliststrict", BuildNamedArgcBuiltinFactoryCallback("AsListStrict", 1, -1) }, {"listlength", BuildNamedArgcBuiltinFactoryCallback("Length", 1, 1)}, {"listhasitems", BuildNamedArgcBuiltinFactoryCallback("HasItems", 1, 1)}, {"listcount", BuildSimpleBuiltinFactoryCallback()}, {"listextend", BuildNamedArgcBuiltinFactoryCallback("Extend", 1, -1)}, {"listunionall", BuildNamedArgcBuiltinFactoryCallback("UnionAll", 1, -1) }, {"listzip", BuildNamedArgcBuiltinFactoryCallback("Zip", -1, -1)}, {"listzipall", BuildNamedArgcBuiltinFactoryCallback("ZipAll", -1, -1)}, {"listenumerate", BuildNamedArgcBuiltinFactoryCallback("Enumerate", 1, 3)}, {"listreverse", BuildNamedArgcBuiltinFactoryCallback("Reverse", 1, 1)}, {"listskip", BuildNamedArgcBuiltinFactoryCallback("Skip", 2, 2)}, {"listtake", BuildNamedArgcBuiltinFactoryCallback("Take", 2, 2)}, {"listsort", BuildBoolBuiltinFactoryCallback(true)}, {"listsortasc", BuildBoolBuiltinFactoryCallback(true)}, {"listsortdesc", BuildBoolBuiltinFactoryCallback(false)}, {"listmap", BuildBoolBuiltinFactoryCallback(false)}, {"listflatmap", BuildBoolBuiltinFactoryCallback(true)}, {"listfilter", BuildSimpleBuiltinFactoryCallback()}, {"listany", BuildFoldBuiltinFactoryCallback("Or", "false")}, {"listall", BuildFoldBuiltinFactoryCallback("And", "true")}, {"listhas", BuildSimpleBuiltinFactoryCallback()}, {"listmax", BuildNamedBuiltinFactoryCallback("AggrMax")}, {"listmin", BuildNamedBuiltinFactoryCallback("AggrMin")}, {"listsum", BuildNamedBuiltinFactoryCallback("AggrAdd")}, {"listavg", BuildSimpleBuiltinFactoryCallback()}, {"listconcat", BuildNamedBuiltinFactoryCallback("Concat")}, {"listextract", BuildSimpleBuiltinFactoryCallback()}, {"listuniq", BuildSimpleBuiltinFactoryCallback()}, {"listcreate", BuildSimpleBuiltinFactoryCallback()}, {"listfromrange", BuildNamedArgcBuiltinFactoryCallback("ListFromRange", 2, 3) }, {"listreplicate", BuildNamedArgcBuiltinFactoryCallback("Replicate", 2, 2) }, {"listtakewhile", BuildNamedArgcBuiltinFactoryCallback("TakeWhile", 2, 2) }, {"listskipwhile", BuildNamedArgcBuiltinFactoryCallback("SkipWhile", 2, 2) }, {"listcollect", BuildNamedArgcBuiltinFactoryCallback("Collect", 1, 1) }, // Dict builtins {"dictcreate", BuildSimpleBuiltinFactoryCallback()}, {"asdict", BuildNamedArgcBuiltinFactoryCallback("AsDict", 1, -1)}, {"asdictstrict", BuildNamedArgcBuiltinFactoryCallback("AsDictStrict", 1, -1)}, {"todict", BuildNamedBuiltinFactoryCallback>("One")}, {"tomultidict", BuildNamedBuiltinFactoryCallback>("Many")}, {"tosorteddict", BuildNamedBuiltinFactoryCallback>("One")}, {"tosortedmultidict", BuildNamedBuiltinFactoryCallback>("Many")}, {"dictkeys", BuildNamedArgcBuiltinFactoryCallback("DictKeys", 1, 1) }, {"dictpayloads", BuildNamedArgcBuiltinFactoryCallback("DictPayloads", 1, 1) }, {"dictitems", BuildNamedArgcBuiltinFactoryCallback("DictItems", 1, 1) }, {"dictlookup", BuildNamedArgcBuiltinFactoryCallback("Lookup", 2, 2) }, {"dictcontains", BuildNamedArgcBuiltinFactoryCallback("Contains", 2, 2) }, // Atom builtins {"asatom", BuildSimpleBuiltinFactoryCallback()}, {"secureparam", BuildNamedBuiltinFactoryCallback("SecureParam")}, {"void", BuildNamedArgcBuiltinFactoryCallback("Void", 0, 0)}, {"callable", BuildNamedArgcBuiltinFactoryCallback("Callable", 2, 2)}, {"way", BuildNamedArgcBuiltinFactoryCallback("Way", 1, 1) }, {"variant", BuildSimpleBuiltinFactoryCallback() }, {"astagged", BuildSimpleBuiltinFactoryCallback() }, {"untag", BuildSimpleBuiltinFactoryCallback() }, {"parsetype", BuildSimpleBuiltinFactoryCallback() }, {"ensuretype", BuildSimpleBuiltinFactoryCallback>() }, {"ensureconvertibleto", BuildSimpleBuiltinFactoryCallback>() }, {"ensure", BuildNamedArgcBuiltinFactoryCallback("Ensure", 2, 3) }, {"evaluateexpr", BuildNamedArgcBuiltinFactoryCallback("EvaluateExpr", 1, 1) }, {"evaluateatom", BuildNamedArgcBuiltinFactoryCallback("EvaluateAtom", 1, 1) }, {"evaluatetype", BuildNamedArgcBuiltinFactoryCallback("EvaluateType", 1, 1) }, {"unwrap", BuildNamedArgcBuiltinFactoryCallback("Unwrap", 1, 2) }, {"just", BuildNamedArgcBuiltinFactoryCallback("Just", 1, 1) }, {"nothing", BuildNamedArgcBuiltinFactoryCallback("Nothing", 1, 1) }, {"formattype", BuildNamedArgcBuiltinFactoryCallback("FormatType", 1, 1) }, {"typeof", BuildNamedArgcBuiltinFactoryCallback("TypeOf", 1, 1) }, {"instanceof", BuildNamedArgcBuiltinFactoryCallback("InstanceOf", 1, 1) }, {"datatype", BuildSimpleBuiltinFactoryCallback() }, {"optionaltype", BuildNamedArgcBuiltinFactoryCallback("OptionalType", 1, 1) }, {"listtype", BuildNamedArgcBuiltinFactoryCallback("ListType", 1, 1) }, {"streamtype", BuildNamedArgcBuiltinFactoryCallback("StreamType", 1, 1) }, {"dicttype", BuildNamedArgcBuiltinFactoryCallback("DictType", 2, 2) }, {"tupletype", BuildNamedArgcBuiltinFactoryCallback("TupleType", 0, -1) }, {"generictype", BuildNamedArgcBuiltinFactoryCallback("GenericType", 0, 0) }, {"unittype", BuildNamedArgcBuiltinFactoryCallback("UnitType", 0, 0) }, {"voidtype", BuildNamedArgcBuiltinFactoryCallback("VoidType", 0, 0) }, {"resourcetype", BuildSimpleBuiltinFactoryCallback() }, {"taggedtype", BuildSimpleBuiltinFactoryCallback() }, {"varianttype", BuildNamedArgcBuiltinFactoryCallback("VariantType", 1, 1) }, {"callabletype", BuildSimpleBuiltinFactoryCallback() }, {"optionalitemtype", BuildNamedArgcBuiltinFactoryCallback("OptionalItemType", 1, 1) }, {"listitemtype", BuildNamedArgcBuiltinFactoryCallback("ListItemType", 1, 1) }, {"streamitemtype", BuildNamedArgcBuiltinFactoryCallback("StreamItemType", 1, 1) }, {"dictkeytype", BuildNamedArgcBuiltinFactoryCallback("DictKeyType", 1, 1) }, {"dictpayloadtype", BuildNamedArgcBuiltinFactoryCallback("DictPayloadType", 1, 1) }, {"tupleelementtype", BuildSimpleBuiltinFactoryCallback() }, {"structmembertype", BuildSimpleBuiltinFactoryCallback() }, {"callableresulttype", BuildNamedArgcBuiltinFactoryCallback("CallableResultType", 1, 1) }, {"callableargumenttype", BuildSimpleBuiltinFactoryCallback() }, {"variantunderlyingtype", BuildNamedArgcBuiltinFactoryCallback("VariantUnderlyingType", 1, 1) }, {"fromysonsimpletype", BuildNamedArgcBuiltinFactoryCallback("FromYsonSimpleType", 2, 2) }, {"currentutcdate", BuildNamedDepsArgcBuiltinFactoryCallback(0, "CurrentUtcDate", 0, -1) }, {"currentutcdatetime", BuildNamedDepsArgcBuiltinFactoryCallback(0, "CurrentUtcDatetime", 0, -1) }, {"currentutctimestamp", BuildNamedDepsArgcBuiltinFactoryCallback(0, "CurrentUtcTimestamp", 0, -1) }, {"currentoperationid", BuildNamedArgcBuiltinFactoryCallback("CurrentOperationId", 0, 0) }, {"currentoperationsharedid", BuildNamedArgcBuiltinFactoryCallback("CurrentOperationSharedId", 0, 0) }, {"currentauthenticateduser", BuildNamedArgcBuiltinFactoryCallback("CurrentAuthenticatedUser", 0, 0) }, {"addtimezone", BuildSimpleBuiltinFactoryCallback() }, {"removetimezone", BuildNamedArgcBuiltinFactoryCallback("RemoveTimezone", 1, 1) }, {"typehandle", BuildNamedArgcBuiltinFactoryCallback("TypeHandle", 1, 1) }, {"parsetypehandle", BuildNamedArgcBuiltinFactoryCallback("ParseTypeHandle", 1, 1) }, {"typekind", BuildNamedArgcBuiltinFactoryCallback("TypeKind", 1, 1) }, {"datatypecomponents", BuildNamedArgcBuiltinFactoryCallback("DataTypeComponents", 1, 1) }, {"datatypehandle", BuildNamedArgcBuiltinFactoryCallback("DataTypeHandle", 1, 1) }, {"optionaltypehandle", BuildNamedArgcBuiltinFactoryCallback("OptionalTypeHandle", 1, 1) }, {"listtypehandle", BuildNamedArgcBuiltinFactoryCallback("ListTypeHandle", 1, 1) }, {"streamtypehandle", BuildNamedArgcBuiltinFactoryCallback("StreamTypeHandle", 1, 1) }, {"tupletypecomponents", BuildNamedArgcBuiltinFactoryCallback("TupleTypeComponents", 1, 1) }, {"tupletypehandle", BuildNamedArgcBuiltinFactoryCallback("TupleTypeHandle", 1, 1) }, {"structtypecomponents", BuildNamedArgcBuiltinFactoryCallback("StructTypeComponents", 1, 1) }, {"structtypehandle", BuildNamedArgcBuiltinFactoryCallback("StructTypeHandle", 1, 1) }, {"dicttypecomponents", BuildNamedArgcBuiltinFactoryCallback("DictTypeComponents", 1, 1) }, {"dicttypehandle", BuildNamedArgcBuiltinFactoryCallback("DictTypeHandle", 2, 2) }, {"resourcetypetag", BuildNamedArgcBuiltinFactoryCallback("ResourceTypeTag", 1, 1) }, {"resourcetypehandle", BuildNamedArgcBuiltinFactoryCallback("ResourceTypeHandle", 1, 1) }, {"taggedtypecomponents", BuildNamedArgcBuiltinFactoryCallback("TaggedTypeComponents", 1, 1) }, {"taggedtypehandle", BuildNamedArgcBuiltinFactoryCallback("TaggedTypeHandle", 2, 2) }, {"varianttypehandle", BuildNamedArgcBuiltinFactoryCallback("VariantTypeHandle", 1, 1) }, {"voidtypehandle", BuildNamedArgcBuiltinFactoryCallback("VoidTypeHandle", 0, 0) }, {"nulltypehandle", BuildNamedArgcBuiltinFactoryCallback("NullTypeHandle", 0, 0) }, {"callabletypecomponents", BuildNamedArgcBuiltinFactoryCallback("CallableTypeComponents", 1, 1) }, {"callableargument", BuildNamedArgcBuiltinFactoryCallback("CallableArgument", 1, 3) }, {"callabletypehandle", BuildNamedArgcBuiltinFactoryCallback("CallableTypeHandle", 2, 4) }, {"formatcode", BuildNamedArgcBuiltinFactoryCallback("FormatCode", 1, 1) }, {"worldcode", BuildNamedArgcBuiltinFactoryCallback("WorldCode", 0, 0) }, {"atomcode", BuildNamedArgcBuiltinFactoryCallback("AtomCode", 1, 1) }, {"listcode", BuildNamedArgcBuiltinFactoryCallback("ListCode", 0, -1) }, {"funccode", BuildNamedArgcBuiltinFactoryCallback("FuncCode", 1, -1) }, {"lambdacode", BuildNamedArgcBuiltinFactoryCallback("LambdaCode", 1, 2) }, {"evaluatecode", BuildNamedArgcBuiltinFactoryCallback("EvaluateCode", 1, 1) }, {"reprcode", BuildNamedArgcBuiltinFactoryCallback("ReprCode", 1, 1) }, {"quotecode", BuildNamedArgcBuiltinFactoryCallback("QuoteCode", 1, 1) }, // Tuple builtins {"astuple", BuildSimpleBuiltinFactoryCallback()}, // Struct builtins {"addmember", BuildNamedBuiltinFactoryCallback("AddMember")}, {"removemember", BuildNamedBuiltinFactoryCallback("RemoveMember")}, {"forceremovemember", BuildNamedBuiltinFactoryCallback("ForceRemoveMember")}, {"combinemembers", BuildNamedBuiltinFactoryCallback("FlattenMembers")}, {"flattenmembers", BuildNamedBuiltinFactoryCallback("FlattenMembers")}, // File builtins {"filepath", BuildNamedBuiltinFactoryCallback("FilePath")}, {"filecontent", BuildNamedBuiltinFactoryCallback("FileContent")}, {"folderpath", BuildNamedBuiltinFactoryCallback("FolderPath") }, {"files", BuildNamedBuiltinFactoryCallback("Files")}, {"parsefile", BuildSimpleBuiltinFactoryCallback()}, // Misc builtins {"coalesce", BuildNamedArgcBuiltinFactoryCallback("Coalesce", 1, -1)}, {"nvl", BuildNamedArgcBuiltinFactoryCallback("Coalesce", 1, -1) }, {"nanvl", BuildNamedArgcBuiltinFactoryCallback("Nanvl", 2, 2) }, {"likely", BuildNamedArgcBuiltinFactoryCallback("Likely", 1, -1)}, {"random", BuildNamedDepsArgcBuiltinFactoryCallback(0, "Random", 1, -1)}, {"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback(0, "RandomNumber", 1, -1)}, {"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback(0, "RandomUuid", 1, -1) }, {"tablepath", BuildNamedBuiltinFactoryCallback("TablePath") }, {"tablerecord", BuildNamedBuiltinFactoryCallback("TableRecord") }, {"tablerecordindex", BuildNamedBuiltinFactoryCallback("TableRecord") }, {"weakfield", BuildSimpleBuiltinFactoryCallback()}, {"tablerow", BuildSimpleBuiltinFactoryCallback() }, // Hint builtins {"grouping", BuildSimpleBuiltinFactoryCallback()}, // Window funcitons {"rownumber", BuildNamedArgcBuiltinFactoryCallback("RowNumber", 0, 0)}, /// by SQL2011 should be with sort {"lead", BuildNamedArgcBuiltinFactoryCallback("Lead", 1, 2)}, {"lag", BuildNamedArgcBuiltinFactoryCallback("Lag", 1, 2)}, // Hopping intervals time functions {"hopstart", BuildSimpleBuiltinFactoryCallback>()}, {"hopend", BuildSimpleBuiltinFactoryCallback>()}, }; return builtinFuncs; } TAggrFuncFactoryCallbackMap MakeAggrFuncs() { constexpr auto OverWindow = EAggregateMode::OverWindow; TAggrFuncFactoryCallbackMap aggrFuncs = { {"min", BuildAggrFuncFactoryCallback("Min", "min_traits_factory")}, {"max", BuildAggrFuncFactoryCallback("Max", "max_traits_factory")}, {"minby", BuildAggrFuncFactoryCallback("MinBy", "min_by_traits_factory", KEY_PAYLOAD)}, {"maxby", BuildAggrFuncFactoryCallback("MaxBy", "max_by_traits_factory", KEY_PAYLOAD)}, {"sum", BuildAggrFuncFactoryCallback("Sum", "sum_traits_factory")}, {"sumif", BuildAggrFuncFactoryCallback("SumIf", "sum_if_traits_factory", PAYLOAD_PREDICATE) }, {"some", BuildAggrFuncFactoryCallback("Some", "some_traits_factory")}, {"somevalue", BuildAggrFuncFactoryCallback("SomeValue", "some_traits_factory")}, {"count", BuildAggrFuncFactoryCallback("Count", "count_traits_factory", COUNT)}, {"countif", BuildAggrFuncFactoryCallback("CountIf", "count_if_traits_factory")}, {"every", BuildAggrFuncFactoryCallback("Every", "and_traits_factory")}, {"booland", BuildAggrFuncFactoryCallback("BoolAnd", "and_traits_factory")}, {"boolor", BuildAggrFuncFactoryCallback("BoolOr", "or_traits_factory")}, {"bitand", BuildAggrFuncFactoryCallback("BitAnd", "bit_and_traits_factory")}, {"bitor", BuildAggrFuncFactoryCallback("BitOr", "bit_or_traits_factory")}, {"bitxor", BuildAggrFuncFactoryCallback("BitXor", "bit_xor_traits_factory")}, {"avg", BuildAggrFuncFactoryCallback("Avg", "avg_traits_factory")}, {"avgif", BuildAggrFuncFactoryCallback("AvgIf", "avg_if_traits_factory", PAYLOAD_PREDICATE) }, {"list", BuildAggrFuncFactoryCallback("List", "list_traits_factory", LIST)}, {"agglist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)}, {"aggrlist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)}, {"aggregatelist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)}, {"listdistinct", BuildAggrFuncFactoryCallback("ListDistinct", "set_traits_factory", LIST)}, {"agglistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)}, {"aggrlistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)}, {"aggregatelistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)}, {"median", BuildAggrFuncFactoryCallback("Median", "percentile_traits_factory", PERCENTILE)}, {"percentile", BuildAggrFuncFactoryCallback("Percentile", "percentile_traits_factory", PERCENTILE)}, {"mode", BuildAggrFuncFactoryCallback("Mode", "topfreq_traits_factory", TOPFREQ) }, {"topfreq", BuildAggrFuncFactoryCallback("TopFreq", "topfreq_traits_factory", TOPFREQ) }, {"top", BuildAggrFuncFactoryCallback("Top", "top_traits_factory", TOP)}, {"bottom", BuildAggrFuncFactoryCallback("Bottom", "bottom_traits_factory", TOP)}, {"topby", BuildAggrFuncFactoryCallback("TopBy", "top_by_traits_factory", TOP_BY)}, {"bottomby", BuildAggrFuncFactoryCallback("BottomBy", "bottom_by_traits_factory", TOP_BY)}, {"histogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM, "Histogram")}, {"adaptivewardhistogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM)}, {"adaptiveweighthistogram", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogram", "histogram_adaptive_weight_traits_factory", HISTOGRAM)}, {"adaptivedistancehistogram", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogram", "histogram_adaptive_distance_traits_factory", HISTOGRAM)}, {"blockwardhistogram", BuildAggrFuncFactoryCallback("BlockWardHistogram", "histogram_block_ward_traits_factory", HISTOGRAM)}, {"blockweighthistogram", BuildAggrFuncFactoryCallback("BlockWeightHistogram", "histogram_block_weight_traits_factory", HISTOGRAM)}, {"linearhistogram", BuildAggrFuncFactoryCallback("LinearHistogram", "histogram_linear_traits_factory", LINEAR_HISTOGRAM)}, {"logarithmichistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM)}, {"loghistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogram")}, {"hyperloglog", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE)}, {"hll", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "HLL")}, {"countdistinctestimate", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "CountDistinctEstimate")}, {"variance", BuildAggrFuncFactoryCallback("Variance", "variance_0_1_traits_factory")}, {"stddev", BuildAggrFuncFactoryCallback("StdDev", "variance_1_1_traits_factory")}, {"populationvariance", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, {"variancepopulation", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, {"populationstddev", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, {"stddevpopulation", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, {"varpop", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, {"stddevpop", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, {"varp", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")}, {"stddevp", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")}, {"variancesample", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")}, {"stddevsample", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")}, {"varsamp", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")}, {"stddevsamp", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")}, {"vars", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")}, {"stddevs", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")}, {"correlation", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS)}, {"corr", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS, "Corr")}, {"covariance", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covariance")}, {"covariancesample", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS)}, {"covarsamp", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarSamp")}, {"covar", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covar")}, {"covars", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarS")}, {"covariancepopulation", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS)}, {"covarpop", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarPop")}, {"covarp", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarP")}, {"udaf", BuildAggrFuncFactoryCallback("UDAF", "udaf_traits_factory", UDAF)}, // Window functions /// by SQL2011 should be with sort {"rank", BuildAggrFuncFactoryCallback("Rank", "rank_traits_factory", WINDOW_AUTOARGS)}, {"denserank", BuildAggrFuncFactoryCallback("DenseRank", "dense_rank_traits_factory", WINDOW_AUTOARGS)}, // \todo unsupported now, required count element in window //{"ntile", BuildAggrFuncFactoryCallback("Ntile", "ntile_traits_factory")}, //{"percentrank", BuildAggrFuncFactoryCallback("PercentRank", "percent_rank_traits_factory")}, //{"cumedist", BuildAggrFuncFactoryCallback("CumeDist", "cume_dist_traits_factory")}, {"firstvalue", BuildAggrFuncFactoryCallback("FirstValue", "first_value_traits_factory", {OverWindow})}, {"lastvalue", BuildAggrFuncFactoryCallback("LastValue", "last_value_traits_factory", {OverWindow})}, {"firstvalueignorenulls", BuildAggrFuncFactoryCallback("FirstValueIgnoreNulls", "first_value_ignore_nulls_traits_factory", {OverWindow})}, {"lastvalueignorenulls", BuildAggrFuncFactoryCallback("LastValueIgnoreNulls", "last_value_ignore_nulls_traits_factory", {OverWindow})}, }; return aggrFuncs; } TCoreFuncMap MakeCoreFuncs() { TCoreFuncMap coreFuncs = { {"listindexof", { "IndexOf", 2, 2}}, {"testbit", { "TestBit", 2, 2}}, {"setbit", { "SetBit", 2, 2}}, {"clearbit", { "ClearBit", 2, 2}}, {"flipbit", { "FlipBit", 2, 2 }}, {"toset", { "ToSet", 1, 1 }}, {"setisdisjoint", { "SetIsDisjoint", 2, 2}}, {"setintersection", { "SetIntersection", 2, 3}}, {"setincludes", { "SetIncludes", 2, 2}}, {"setunion", { "SetUnion", 2, 3}}, {"setdifference", { "SetDifference", 2, 2}}, {"setsymmetricdifference", { "SetSymmetricDifference", 2, 3}}, {"listaggregate", { "ListAggregate", 2, 2}}, {"dictaggregate", { "DictAggregate", 2, 2}}, }; return coreFuncs; } }; TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVector& args, const TString& nameSpace, EAggregateMode aggMode, bool* mustUseNamed, TFuncPrepareNameNode funcPrepareNameNode) { const TBuiltinFuncData* funcData = Singleton(); const TBuiltinFactoryCallbackMap& builtinFuncs = funcData->BuiltinFuncs; const TAggrFuncFactoryCallbackMap& aggrFuncs = funcData->AggrFuncs; const TCoreFuncMap& coreFuncs = funcData->CoreFuncs; for (auto& arg: args) { if (!arg) { return nullptr; } } TString normalizedName(name); TString ns = to_lower(nameSpace); if (ns.empty()) { TMaybe error = NormalizeName(pos, normalizedName); if (!error.Empty()) { return new TInvalidBuiltin(pos, error->GetMessage()); } auto coreFunc = coreFuncs.find(normalizedName); if (coreFunc != coreFuncs.end()) { ns = "core"; name = coreFunc->second.Name; if (args.size() < coreFunc->second.MinArgs || args.size() > coreFunc->second.MaxArgs) { return new TInvalidBuiltin(pos, TStringBuilder() << name << " expected from " << coreFunc->second.MinArgs << " to " << coreFunc->second.MaxArgs << " arguments, but got: " << args.size()); } if (coreFunc->second.MinArgs != coreFunc->second.MaxArgs) { name += ToString(args.size()); } } } TString moduleResource; if (ctx.Settings.ModuleMapping.contains(ns)) { moduleResource = ctx.Settings.ModuleMapping.at(ns); } if (ns == "js") { ns = "javascript"; } auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(ns); if (scriptType == NKikimr::NMiniKQL::EScriptType::SystemPython2) { scriptType = NKikimr::NMiniKQL::EScriptType::Python2; } if (ns == "yql") { return new TCallNodeImpl(pos, name, -1, -1, args); } else if (ns == "string" && name == "SplitToList") { TNodePtr positionalArgs; TNodePtr namedArgs; if (mustUseNamed && *mustUseNamed) { YQL_ENSURE(args.size() == 2); positionalArgs = args[0]; namedArgs = args[1]; *mustUseNamed = false; } TVector reuseArgs; if (!namedArgs && args && funcPrepareNameNode) { TString reusedBaseName = TStringBuilder() << "Arg" << to_title(nameSpace) << to_title(name); reuseArgs.reserve(args.size()); for (const auto& arg: args) { reuseArgs.push_back(funcPrepareNameNode(reusedBaseName, arg)); } } auto usedArgs = reuseArgs ? reuseArgs : args; TVector positionalArgsElements; if (namedArgs) { auto positionalArgsTuple = dynamic_cast(positionalArgs.Get()); Y_DEBUG_ABORT_UNLESS(positionalArgsTuple, "unexpected value at String::SplitToList positional args"); positionalArgsElements = positionalArgsTuple->Elements(); } else { positionalArgsElements = usedArgs; } auto positionalArgsTupleSize = positionalArgsElements.size(); auto argsSize = positionalArgsTupleSize; TNodePtr trueLiteral = BuildLiteralBool(pos, "true"); TNodePtr falseLiteral = BuildLiteralBool(pos, "false"); TNodePtr namedDelimeterStringArg; TNodePtr namedSkipEmptyArg; bool hasDelimeterString = false; if (auto namedArgsStruct = dynamic_cast(namedArgs.Get())) { auto exprs = namedArgsStruct->GetExprs(); for (auto& expr : exprs) { if (expr->GetLabel() == "DelimeterString") { hasDelimeterString = true; break; } } argsSize += namedArgsStruct->GetExprs().size(); } if (argsSize < 3) { positionalArgsElements.push_back(falseLiteral); } if (argsSize < 4 && !hasDelimeterString) { positionalArgsElements.push_back(trueLiteral); } if (namedArgs) { positionalArgs = BuildTuple(pos, positionalArgsElements); } else { usedArgs = positionalArgsElements; } TNodePtr customUserType = nullptr; const auto& udfArgs = BuildUdfArgs(ctx, pos, usedArgs, positionalArgs, namedArgs, customUserType); TNodePtr udfNode = BuildUdf(ctx, pos, nameSpace, name, udfArgs); TVector applyArgs = { udfNode }; applyArgs.insert(applyArgs.end(), usedArgs.begin(), usedArgs.end()); return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs); } else if (moduleResource) { auto exportName = ns == "core" ? name : "$" + name; TVector applyArgs = { new TCallNodeImpl(pos, "bind", { BuildAtom(pos, ns + "_module", 0), BuildQuotedAtom(pos, exportName) }) }; applyArgs.insert(applyArgs.end(), args.begin(), args.end()); return new TCallNodeImpl(pos, "Apply", applyArgs); } else if (ns == "hyperscan" || ns == "pcre" || ns == "pire" || ns.StartsWith("re2")) { TString moduleName(nameSpace); moduleName.to_title(); if ((args.size() == 1 || args.size() == 2) && (name.StartsWith("Multi") || (ns.StartsWith("re2") && name == "Capture"))) { TVector multiArgs{ ns.StartsWith("re2") && name == "Capture" ? MakePair(pos, args) : args[0], new TCallNodeImpl(pos, "Void", 0, 0, {}), args[0] }; auto fullName = moduleName + "." + name; return new TYqlTypeConfigUdf(pos, fullName, multiArgs, multiArgs.size() + 1); } else if (!(ns.StartsWith("re2") && name == "Options")) { auto newArgs = args; if (ns.StartsWith("re2")) { // convert run config is tuple of string and optional options if (args.size() == 1 || args.size() == 2) { newArgs[0] = MakePair(pos, args); if (args.size() == 2) { newArgs.pop_back(); } } else { return new TInvalidBuiltin(pos, TStringBuilder() << ns << "." << name << " expected one or two arguments."); } } return BuildUdf(ctx, pos, moduleName, name, newArgs); } } else if (ns == "datetime2" && (name == "Format" || name == "Parse")) { return BuildUdf(ctx, pos, nameSpace, name, args); } else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) { auto scriptName = NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType); return new TScriptUdf(pos, TString(scriptName), name, args); } else if (ns.empty()) { auto type = NormalizeTypeString(normalizedName); if (AvailableDataTypes.contains(type)) { return new TYqlData(pos, type, args); } if (normalizedName == "tablename") { return new TTableName(pos, args, ctx.CurrCluster); } if (normalizedName == "aggregationfactory") { if (args.size() < 1 || !args[0]->GetLiteral("String")) { return new TInvalidBuiltin(pos, "AGGREGATION_FACTORY requries a function name"); } auto aggNormalizedName = *args[0]->GetLiteral("String"); auto error = NormalizeName(pos, aggNormalizedName); if (!error.Empty()) { return new TInvalidBuiltin(pos, error->GetMessage()); } if (aggNormalizedName == "aggregateby") { return new TInvalidBuiltin(pos, "AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY"); } if (aggNormalizedName == "multiaggregateby") { return new TInvalidBuiltin(pos, "MULTI_AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY"); } auto aggrCallback = aggrFuncs.find(aggNormalizedName); if (aggrCallback == aggrFuncs.end()) { return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String")); } if (aggMode == EAggregateMode::Distinct) { return new TInvalidBuiltin(pos, "Only aggregation functions allow DISTINCT set specification"); } return (*aggrCallback).second(pos, args, aggMode, true).Release(); } if (normalizedName == "aggregateby" || normalizedName == "multiaggregateby") { const bool multi = (normalizedName == "multiaggregateby"); if (args.size() != 2) { return new TInvalidBuiltin(pos, TStringBuilder() << (multi ? "MULTI_AGGREGATE_BY" : "AGGREGATE_BY") << " requries two arguments"); } auto name = multi ? "MultiAggregateBy" : "AggregateBy"; auto aggr = BuildFactoryAggregation(pos, name, "", aggMode, multi); return new TBasicAggrFunc(pos, name, aggr, args); } auto aggrCallback = aggrFuncs.find(normalizedName); if (aggrCallback != aggrFuncs.end()) { return (*aggrCallback).second(pos, args, aggMode, false).Release(); } if (aggMode == EAggregateMode::Distinct) { return new TInvalidBuiltin(pos, "Only aggregation functions allow DISTINCT set specification"); } auto builtinCallback = builtinFuncs.find(normalizedName); if (builtinCallback != builtinFuncs.end()) { return (*builtinCallback).second(pos, args); } else if (normalizedName == "asstruct" || normalizedName == "structtype") { if (args.empty()) { return new TCallNodeImpl(pos, normalizedName == "asstruct" ? "AsStruct" : "StructType", 0, 0, args); } if (mustUseNamed && *mustUseNamed) { *mustUseNamed = false; YQL_ENSURE(args.size() == 2); Y_DEBUG_ABORT_UNLESS(dynamic_cast(args[0].Get())); auto posArgs = static_cast(args[0].Get()); if (posArgs->IsEmpty()) { if (normalizedName == "asstruct") { return args[1]; } else { Y_DEBUG_ABORT_UNLESS(dynamic_cast(args[1].Get())); auto namedArgs = static_cast(args[1].Get()); return new TStructTypeNode(pos, namedArgs->GetExprs()); } } } return new TInvalidBuiltin(pos, TStringBuilder() << (normalizedName == "asstruct" ? "AsStruct" : "StructType") << " requires all argument to be named"); } else if (normalizedName == "expandstruct") { if (mustUseNamed) { if (!*mustUseNamed) { return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires at least one named argument"); } *mustUseNamed = false; } YQL_ENSURE(args.size() == 2); auto posArgs = static_cast(args[0].Get()); Y_DEBUG_ABORT_UNLESS(dynamic_cast(args[0].Get())); Y_DEBUG_ABORT_UNLESS(dynamic_cast(args[1].Get())); if (posArgs->GetTupleSize() != 1) { return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named"); } TVector flattenMembersArgs = { BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}), BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}), }; return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs); } else { return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name << ", to use YQL functions, try YQL::" << name); } } TNodePtr positionalArgs; TNodePtr namedArgs; if (mustUseNamed && *mustUseNamed) { YQL_ENSURE(args.size() == 2); positionalArgs = args[0]; namedArgs = args[1]; *mustUseNamed = false; } TVector reuseArgs; if (!namedArgs && args && funcPrepareNameNode) { TString reusedBaseName = TStringBuilder() << "Arg" << to_title(nameSpace) << to_title(name); reuseArgs.reserve(args.size()); for (const auto& arg: args) { reuseArgs.push_back(funcPrepareNameNode(reusedBaseName, arg)); } } auto usedArgs = reuseArgs ? reuseArgs : args; TNodePtr customUserType = nullptr; if (ns == "yson") { if (name == "ConvertTo" && usedArgs.size() > 1) { customUserType = usedArgs[1]; usedArgs.erase(usedArgs.begin() + 1); } ui32 optionsIndex = name.Contains("Lookup") ? 2 : 1; if (usedArgs.size() <= optionsIndex && (ctx.PragmaYsonAutoConvert || ctx.PragmaYsonStrict)) { usedArgs.push_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict)); } } else if (ns == "json") { ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF) << "Json UDF is deprecated and is going to be removed, please switch to Yson UDF that also supports Json input: https://yql.yandex-team.ru/docs/yt/udf/list/yson/"; } const auto& udfArgs = BuildUdfArgs(ctx, pos, usedArgs, positionalArgs, namedArgs, customUserType); TNodePtr udfNode = BuildUdf(ctx, pos, nameSpace, name, udfArgs); TVector applyArgs = { udfNode }; applyArgs.insert(applyArgs.end(), usedArgs.begin(), usedArgs.end()); return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs); } } // namespace NSQLTranslationV0