#pragma once #include "source.h" #include "sql.h" #include #include #include #include #include #include #include #include #include #include #include #include #define ANTLR3_TOKEN(NAME) SQLv1LexerTokens::TOKEN_##NAME << 16 #define ANTLR4_TOKEN(NAME) (SQLv1Antlr4Lexer::TOKEN_##NAME << 16) + 1 #define IS_TOKEN(ID, NAME) (UnifiedToken(ID) == ANTLR3_TOKEN(NAME) || UnifiedToken(ID) == ANTLR4_TOKEN(NAME)) namespace NSQLTranslationV1 { inline bool IsAnonymousName(const TString& name) { return name == "$_"; } inline bool IsStreamingService(const TString& service) { return service == NYql::RtmrProviderName || service == NYql::PqProviderName; } struct TNodeWithUsageInfo : public TThrRefBase { explicit TNodeWithUsageInfo(const TNodePtr& node, TPosition namePos, int level) : Node(node) , NamePos(namePos) , Level(level) {} TNodePtr Node; TPosition NamePos; int Level = 0; bool IsUsed = false; }; using TNodeWithUsageInfoPtr = TIntrusivePtr; using TNamedNodesMap = THashMap>; using TBlocks = TVector; struct TScopedState : public TThrRefBase { TString CurrService; TDeferredAtom CurrCluster; bool PragmaClassicDivision = true; bool PragmaCheckedOps = false; bool StrictJoinKeyTypes = false; bool UnicodeLiterals = false; bool WarnUntypedStringLiterals = false; TNamedNodesMap NamedNodes; struct TLocal { TVector> UsedClusters; THashSet UsedPlainClusters; THashSet UsedExprClusters; THashMap> ExprClustersMap; TVector ExprClusters; }; TLocal Local; void UseCluster(const TString& service, const TDeferredAtom& cluster); const TVector>& GetUsedClusters(); TNodePtr WrapCluster(const TDeferredAtom& cluster, TContext& ctx); void AddExprCluster(TNodePtr expr, TContext& ctx); void Clear(); TNodePtr LookupNode(const TString& name); }; using TScopedStatePtr = TIntrusivePtr; class TColumnRefScope; enum class EColumnRefState { Deny, Allow, AsStringLiteral, AsPgType, MatchRecognizeMeasures, MatchRecognizeDefine, MatchRecognizeDefineAggregate, }; class TContext { public: TContext(const NSQLTranslation::TTranslationSettings& settings, const NSQLTranslation::TSQLHints& hints, NYql::TIssues& issues, const TString& query = {}); virtual ~TContext(); const NYql::TPosition& Pos() const; void PushCurrentBlocks(TBlocks* blocks); void PopCurrentBlocks(); TBlocks& GetCurrentBlocks() const; TString MakeName(const TString& name); IOutputStream& Error(NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR); IOutputStream& Error(NYql::TPosition pos, NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR); IOutputStream& Warning(NYql::TPosition pos, NYql::TIssueCode code); IOutputStream& Info(NYql::TPosition pos); void SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action); const TString& Token(const NSQLv1Generated::TToken& token) { Position.Row = token.GetLine(); Position.Column = token.GetColumn() + 1; return token.GetValue(); } TPosition TokenPosition(const NSQLv1Generated::TToken& token) { TPosition pos = Position; pos.Row = token.GetLine(); pos.Column = token.GetColumn() + 1; return pos; } inline void IncrementMonCounter(const TString& name, const TString& value) { if (IncrementMonCounterFunction) { IncrementMonCounterFunction(name, value); } } bool HasCluster(const TString& cluster) const { return GetClusterProvider(cluster).Defined(); } TMaybe GetClusterProvider(const TString& cluster) const { TString unusedNormalizedClusterName; return GetClusterProvider(cluster, unusedNormalizedClusterName); } TMaybe GetClusterProvider(const TString& cluster, TString& normalizedClusterName) const { auto provider = ClusterMapping.GetClusterProvider(cluster, normalizedClusterName); if (!provider) { if (Settings.AssumeYdbOnClusterWithSlash && cluster.StartsWith('/')) { normalizedClusterName = cluster; return TString(NYql::KikimrProviderName); } if (Settings.DynamicClusterProvider) { normalizedClusterName = cluster.StartsWith('/') ? cluster : Settings.PathPrefix + "/" + cluster; return Settings.DynamicClusterProvider; } return Nothing(); } return provider; } bool IsDynamicCluster(const TDeferredAtom& cluster) const; bool HasNonYtProvider(const ISource& source) const; bool UseUnordered(const ISource& source) const; bool UseUnordered(const TTableRef& table) const; bool SetPathPrefix(const TString& value, TMaybe arg = TMaybe()); TNodePtr GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path); TStringBuf GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const; TNodePtr UniversalAlias(const TString& baseName, TNodePtr&& node); void BodyPart() { IntoHeading = false; } bool IsParseHeading() const { return IntoHeading; } bool IsAlreadyDeclared(const TString& varName) const; void DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak = false); bool AddExport(TPosition symbolPos, const TString& symbolName); TString AddImport(const TVector& modulePath); TString AddSimpleUdf(const TString& udf); void SetPackageVersion(const TString& packageName, ui32 version); bool IsStreamingService(const TStringBuf service) const; bool CheckColumnReference(TPosition pos, const TString& name) { const bool allowed = GetColumnReferenceState() != EColumnRefState::Deny; if (!allowed) { Error(pos) << "Column reference \"" << name << "\" is not allowed " << NoColumnErrorContext; IncrementMonCounter("sql_errors", "ColumnReferenceInScopeIsNotAllowed"); } return allowed; } EColumnRefState GetColumnReferenceState() const { return ColumnReferenceState; } EColumnRefState GetTopLevelColumnReferenceState() const { return TopLevelColumnReferenceState; } [[nodiscard]] TString GetMatchRecognizeDefineVar() const { YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState, "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas"); return MatchRecognizeDefineVar; } TString ExtractMatchRecognizeAggrVar() { YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState, "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas"); return std::exchange(MatchRecognizeAggrVar, ""); } [[nodiscard]] bool SetMatchRecognizeAggrVar(TString var) { YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState, "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas"); if (MatchRecognizeAggrVar.empty()) { MatchRecognizeAggrVar = std::move(var); } else if (MatchRecognizeAggrVar != var) { Error() << "Illegal use of aggregates or navigation operators in MATCH_RECOGNIZE"; return false; } return true; } TVector PullHintForToken(NYql::TPosition tokenPos); void WarnUnusedHints(); private: IOutputStream& MakeIssue(NYql::ESeverity severity, NYql::TIssueCode code, NYql::TPosition pos); private: NYql::TPosition Position; THolder IssueMsgHolder; NSQLTranslation::TClusterMapping ClusterMapping; TString PathPrefix; THashMap ProviderPathPrefixes; THashMap ClusterPathPrefixes; bool IntoHeading = true; NSQLTranslation::TSQLHints SQLHints; friend class TColumnRefScope; EColumnRefState ColumnReferenceState = EColumnRefState::Deny; EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny; TString MatchRecognizeDefineVar; TString MatchRecognizeAggrVar; TString NoColumnErrorContext = "in current scope"; TVector CurrentBlocks; public: THashMap> Variables; THashSet WeakVariables; NSQLTranslation::TTranslationSettings Settings; const TString Query; std::unique_ptr Pool; NYql::TIssues& Issues; TMap UniversalAliases; THashSet Exports; THashMap ImportModuleAliases; THashMap RequiredModules; TMap SimpleUdfs; NSQLTranslation::TIncrementMonCounterFunction IncrementMonCounterFunction; TScopedStatePtr Scoped; int ScopeLevel = 0; size_t AnonymousNameIndex = 0; TDeque AllScopes; bool HasPendingErrors; THashMap GenIndexes; using TWinSpecsRef = std::reference_wrapper; TDeque WinSpecsScopes; bool PragmaRefSelect = false; bool PragmaSampleSelect = false; bool PragmaAllowDotInAlias = false; bool PragmaInferSchema = false; bool PragmaAutoCommit = false; bool PragmaUseTablePrefixForEach = false; bool SimpleColumns = true; bool CoalesceJoinKeysOnQualifiedAll = false; bool PragmaDirectRead = false; bool PragmaYsonFast = true; bool PragmaYsonAutoConvert = false; bool PragmaYsonStrict = true; bool PragmaRegexUseRe2 = true; bool PragmaPullUpFlatMapOverJoin = true; bool FilterPushdownOverJoinOptionalSide = false; bool RotateJoinTree = true; bool WarnUnnamedColumns = false; bool DiscoveryMode = false; bool EnableSystemColumns = true; bool DqEngineEnable = false; bool DqEngineForce = false; TString CostBasedOptimizer; TMaybe JsonQueryReturnsJsonDocument; TMaybe AnsiInForEmptyOrNullableItemsCollections; TMaybe AnsiRankForNullableKeys = true; const bool AnsiQuotedIdentifiers; bool AnsiOptionalAs = true; bool OrderedColumns = false; bool PositionalUnionAll = false; bool BogousStarInGroupByOverJoin = false; bool UnorderedSubqueries = true; bool PragmaDataWatermarks = true; bool WarnOnAnsiAliasShadowing = true; ui32 ResultRowsLimit = 0; ui64 ResultSizeLimit = 0; ui32 PragmaGroupByLimit = 1 << 6; ui32 PragmaGroupByCubeLimit = 5; // if FlexibleTypes=true, emit TypeOrMember callable and resolve Type/Column uncertainty on type annotation stage, otherwise always emit Type bool FlexibleTypes = true; // see YQL-10265 bool AnsiCurrentRow = false; TMaybe YsonCastToString; using TLiteralWithPosition = std::pair; using TLibraryStuff = std::tuple, std::optional>; std::unordered_map Libraries; // alias -> optional file with token using TPackageStuff = std::tuple< TPosition, TLiteralWithPosition, std::optional >; std::unordered_map Packages; // alias -> url with optional token using TOverrideLibraryStuff = std::tuple; std::unordered_map OverrideLibraries; // alias -> position THashMap PackageVersions; NYql::TWarningPolicy WarningPolicy; TString PqReadByRtmrCluster; bool EmitStartsWith = true; TMaybe EmitAggApply; bool UseBlocks = false; bool EmitTableSource = false; bool AnsiLike = false; bool FeatureR010 = false; //Row pattern recognition: FROM clause TMaybe CompactGroupBy; bool BlockEngineEnable = false; bool BlockEngineForce = false; bool UnorderedResult = false; ui64 ParallelModeCount = 0; bool CompactNamedExprs = true; bool ValidateUnusedExprs = false; bool AnsiImplicitCrossJoin = false; // select * from A,B bool DistinctOverWindow = false; bool SeqMode = false; bool EmitUnionMerge = false; TVector ForAllStatementsParts; TMaybe Engine; }; class TColumnRefScope { public: TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true, const TString& defineVar = "") : PrevTop(ctx.TopLevelColumnReferenceState) , Prev(ctx.ColumnReferenceState) , PrevErr(ctx.NoColumnErrorContext) , PrevDefineVar(ctx.MatchRecognizeDefineVar) , Ctx(ctx) { if (isTopLevelExpr) { Ctx.ColumnReferenceState = Ctx.TopLevelColumnReferenceState = state; } else { Ctx.ColumnReferenceState = state; } YQL_ENSURE( defineVar.empty() || EColumnRefState::MatchRecognizeMeasures == state || EColumnRefState::MatchRecognizeDefine == state || EColumnRefState::MatchRecognizeDefineAggregate == state, "Internal logic error" ); ctx.MatchRecognizeDefineVar = defineVar; } void SetNoColumnErrContext(const TString& msg) { Ctx.NoColumnErrorContext = msg; } ~TColumnRefScope() { Ctx.TopLevelColumnReferenceState = PrevTop; Ctx.ColumnReferenceState = Prev; std::swap(Ctx.NoColumnErrorContext, PrevErr); std::swap(Ctx.MatchRecognizeDefineVar, PrevDefineVar); } private: const EColumnRefState PrevTop; const EColumnRefState Prev; TString PrevErr; TString PrevDefineVar; TContext& Ctx; }; TMaybe GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex); class TTranslation { protected: typedef TSet TSetType; protected: TTranslation(TContext& ctx); public: TContext& Context(); IOutputStream& Error(); const TString& Token(const NSQLv1Generated::TToken& token) { return Ctx.Token(token); } ui32 UnifiedToken(ui32 id) const { return Ctx.Settings.Antlr4Parser + (id << 16); } TString Identifier(const NSQLv1Generated::TToken& token) { return IdContent(Ctx, Token(token)); } TString Identifier(const TString& str) const { return IdContent(Ctx, str); } TNodePtr GetNamedNode(const TString& name); using TNodeBuilderByName = std::function; TString PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder); TString PushNamedNode(TPosition namePos, const TString& name, TNodePtr node); TString PushNamedAtom(TPosition namePos, const TString& name); void PopNamedNode(const TString& name); void WarnUnusedNodes() const; template void AltNotImplemented(const TString& ruleName, const TNode& node) { AltNotImplemented(ruleName, node.Alt_case(), node, TNode::descriptor()); } template TString AltDescription(const TNode& node) const { return AltDescription(node, node.Alt_case(), TNode::descriptor()); } protected: void AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr); TString AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const; protected: TContext& Ctx; }; } // namespace NSQLTranslationV1