context.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. #pragma once
  2. #include "source.h"
  3. #include "sql.h"
  4. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  5. #include <yql/essentials/core/issue/protos/issue_id.pb.h>
  6. #include <yql/essentials/public/issue/yql_warning.h>
  7. #include <yql/essentials/sql/settings/translation_settings.h>
  8. #include <yql/essentials/sql/cluster_mapping.h>
  9. #include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
  10. #include <util/generic/hash.h>
  11. #include <util/generic/map.h>
  12. #include <util/generic/maybe.h>
  13. #include <util/generic/set.h>
  14. #include <util/generic/deque.h>
  15. #include <util/generic/vector.h>
  16. #define ANTLR3_TOKEN(NAME) SQLv1LexerTokens::TOKEN_##NAME << 16
  17. #define ANTLR4_TOKEN(NAME) (SQLv1Antlr4Lexer::TOKEN_##NAME << 16) + 1
  18. #define IS_TOKEN(ID, NAME) (UnifiedToken(ID) == ANTLR3_TOKEN(NAME) || UnifiedToken(ID) == ANTLR4_TOKEN(NAME))
  19. namespace NSQLTranslationV1 {
  20. inline bool IsAnonymousName(const TString& name) {
  21. return name == "$_";
  22. }
  23. inline bool IsStreamingService(const TString& service) {
  24. return service == NYql::RtmrProviderName || service == NYql::PqProviderName;
  25. }
  26. struct TNodeWithUsageInfo : public TThrRefBase {
  27. explicit TNodeWithUsageInfo(const TNodePtr& node, TPosition namePos, int level)
  28. : Node(node)
  29. , NamePos(namePos)
  30. , Level(level)
  31. {}
  32. TNodePtr Node;
  33. TPosition NamePos;
  34. int Level = 0;
  35. bool IsUsed = false;
  36. };
  37. using TNodeWithUsageInfoPtr = TIntrusivePtr<TNodeWithUsageInfo>;
  38. using TNamedNodesMap = THashMap<TString, TDeque<TNodeWithUsageInfoPtr>>;
  39. using TBlocks = TVector<TNodePtr>;
  40. struct TScopedState : public TThrRefBase {
  41. TString CurrService;
  42. TDeferredAtom CurrCluster;
  43. bool PragmaClassicDivision = true;
  44. bool PragmaCheckedOps = false;
  45. bool StrictJoinKeyTypes = false;
  46. bool UnicodeLiterals = false;
  47. bool WarnUntypedStringLiterals = false;
  48. TNamedNodesMap NamedNodes;
  49. struct TLocal {
  50. TVector<std::pair<TString, TDeferredAtom>> UsedClusters;
  51. THashSet<TString> UsedPlainClusters;
  52. THashSet<INode*> UsedExprClusters;
  53. THashMap<INode*, std::pair<TString, TNodePtr>> ExprClustersMap;
  54. TVector<TNodePtr> ExprClusters;
  55. };
  56. TLocal Local;
  57. void UseCluster(const TString& service, const TDeferredAtom& cluster);
  58. const TVector<std::pair<TString, TDeferredAtom>>& GetUsedClusters();
  59. TNodePtr WrapCluster(const TDeferredAtom& cluster, TContext& ctx);
  60. void AddExprCluster(TNodePtr expr, TContext& ctx);
  61. void Clear();
  62. TNodePtr LookupNode(const TString& name);
  63. };
  64. using TScopedStatePtr = TIntrusivePtr<TScopedState>;
  65. class TColumnRefScope;
  66. enum class EColumnRefState {
  67. Deny,
  68. Allow,
  69. AsStringLiteral,
  70. AsPgType,
  71. MatchRecognizeMeasures,
  72. MatchRecognizeDefine,
  73. MatchRecognizeDefineAggregate,
  74. };
  75. class TContext {
  76. public:
  77. //FIXME remove
  78. TContext(const NSQLTranslation::TTranslationSettings& settings,
  79. const NSQLTranslation::TSQLHints& hints,
  80. NYql::TIssues& issues,
  81. const TString& query = {});
  82. TContext(const TLexers& lexers,
  83. const TParsers& parsers,
  84. const NSQLTranslation::TTranslationSettings& settings,
  85. const NSQLTranslation::TSQLHints& hints,
  86. NYql::TIssues& issues,
  87. const TString& query = {});
  88. virtual ~TContext();
  89. const NYql::TPosition& Pos() const;
  90. void PushCurrentBlocks(TBlocks* blocks);
  91. void PopCurrentBlocks();
  92. TBlocks& GetCurrentBlocks() const;
  93. TString MakeName(const TString& name);
  94. IOutputStream& Error(NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
  95. IOutputStream& Error(NYql::TPosition pos, NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
  96. IOutputStream& Warning(NYql::TPosition pos, NYql::TIssueCode code);
  97. IOutputStream& Info(NYql::TPosition pos);
  98. void SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action);
  99. const TString& Token(const NSQLv1Generated::TToken& token) {
  100. Position.Row = token.GetLine();
  101. Position.Column = token.GetColumn() + 1;
  102. return token.GetValue();
  103. }
  104. TPosition TokenPosition(const NSQLv1Generated::TToken& token) {
  105. TPosition pos = Position;
  106. pos.Row = token.GetLine();
  107. pos.Column = token.GetColumn() + 1;
  108. return pos;
  109. }
  110. inline void IncrementMonCounter(const TString& name, const TString& value) {
  111. if (IncrementMonCounterFunction) {
  112. IncrementMonCounterFunction(name, value);
  113. }
  114. }
  115. bool HasCluster(const TString& cluster) const {
  116. return GetClusterProvider(cluster).Defined();
  117. }
  118. TMaybe<TString> GetClusterProvider(const TString& cluster) const {
  119. TString unusedNormalizedClusterName;
  120. return GetClusterProvider(cluster, unusedNormalizedClusterName);
  121. }
  122. TMaybe<TString> GetClusterProvider(const TString& cluster, TString& normalizedClusterName) const {
  123. auto provider = ClusterMapping.GetClusterProvider(cluster, normalizedClusterName);
  124. if (!provider) {
  125. if (Settings.AssumeYdbOnClusterWithSlash && cluster.StartsWith('/')) {
  126. normalizedClusterName = cluster;
  127. return TString(NYql::KikimrProviderName);
  128. }
  129. if (Settings.DynamicClusterProvider) {
  130. normalizedClusterName = cluster.StartsWith('/') ? cluster : Settings.PathPrefix + "/" + cluster;
  131. return Settings.DynamicClusterProvider;
  132. }
  133. return Nothing();
  134. }
  135. return provider;
  136. }
  137. bool IsDynamicCluster(const TDeferredAtom& cluster) const;
  138. bool HasNonYtProvider(const ISource& source) const;
  139. bool UseUnordered(const ISource& source) const;
  140. bool UseUnordered(const TTableRef& table) const;
  141. bool SetPathPrefix(const TString& value, TMaybe<TString> arg = TMaybe<TString>());
  142. TNodePtr GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path);
  143. TStringBuf GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const;
  144. TNodePtr UniversalAlias(const TString& baseName, TNodePtr&& node);
  145. void BodyPart() {
  146. IntoHeading = false;
  147. }
  148. bool IsParseHeading() const {
  149. return IntoHeading;
  150. }
  151. bool IsAlreadyDeclared(const TString& varName) const;
  152. void DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak = false);
  153. bool AddExport(TPosition symbolPos, const TString& symbolName);
  154. TString AddImport(const TVector<TString>& modulePath);
  155. TString AddSimpleUdf(const TString& udf);
  156. void SetPackageVersion(const TString& packageName, ui32 version);
  157. bool IsStreamingService(const TStringBuf service) const;
  158. bool CheckColumnReference(TPosition pos, const TString& name) {
  159. const bool allowed = GetColumnReferenceState() != EColumnRefState::Deny;
  160. if (!allowed) {
  161. Error(pos) << "Column reference \"" << name << "\" is not allowed " << NoColumnErrorContext;
  162. IncrementMonCounter("sql_errors", "ColumnReferenceInScopeIsNotAllowed");
  163. }
  164. return allowed;
  165. }
  166. EColumnRefState GetColumnReferenceState() const {
  167. return ColumnReferenceState;
  168. }
  169. EColumnRefState GetTopLevelColumnReferenceState() const {
  170. return TopLevelColumnReferenceState;
  171. }
  172. [[nodiscard]] TString GetMatchRecognizeDefineVar() const {
  173. YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
  174. || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
  175. || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
  176. "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  177. return MatchRecognizeDefineVar;
  178. }
  179. TString ExtractMatchRecognizeAggrVar() {
  180. YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
  181. || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
  182. || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
  183. "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  184. return std::exchange(MatchRecognizeAggrVar, "");
  185. }
  186. [[nodiscard]] bool SetMatchRecognizeAggrVar(TString var) {
  187. YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
  188. || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
  189. || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
  190. "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  191. if (MatchRecognizeAggrVar.empty()) {
  192. MatchRecognizeAggrVar = std::move(var);
  193. } else if (MatchRecognizeAggrVar != var) {
  194. Error() << "Illegal use of aggregates or navigation operators in MATCH_RECOGNIZE";
  195. return false;
  196. }
  197. return true;
  198. }
  199. [[nodiscard]] auto& GetMatchRecognizeAggregations() {
  200. YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
  201. || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
  202. || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
  203. "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  204. return MatchRecognizeAggregations;
  205. }
  206. TVector<NSQLTranslation::TSQLHint> PullHintForToken(NYql::TPosition tokenPos);
  207. void WarnUnusedHints();
  208. private:
  209. IOutputStream& MakeIssue(NYql::ESeverity severity, NYql::TIssueCode code, NYql::TPosition pos);
  210. public:
  211. const TLexers Lexers;
  212. const TParsers Parsers;
  213. private:
  214. NYql::TPosition Position;
  215. THolder<TStringOutput> IssueMsgHolder;
  216. NSQLTranslation::TClusterMapping ClusterMapping;
  217. TString PathPrefix;
  218. THashMap<TString, TString> ProviderPathPrefixes;
  219. THashMap<TString, TString> ClusterPathPrefixes;
  220. bool IntoHeading = true;
  221. NSQLTranslation::TSQLHints SQLHints;
  222. friend class TColumnRefScope;
  223. EColumnRefState ColumnReferenceState = EColumnRefState::Deny;
  224. EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny;
  225. TString MatchRecognizeDefineVar;
  226. TString MatchRecognizeAggrVar;
  227. struct TMatchRecognizeAggregation {
  228. TString Var;
  229. TAggregationPtr Aggr;
  230. };
  231. TVector<TMatchRecognizeAggregation> MatchRecognizeAggregations;
  232. TString NoColumnErrorContext = "in current scope";
  233. TVector<TBlocks*> CurrentBlocks;
  234. public:
  235. THashMap<TString, std::pair<TPosition, TNodePtr>> Variables;
  236. THashSet<TString> WeakVariables;
  237. NSQLTranslation::TTranslationSettings Settings;
  238. const TString Query;
  239. std::unique_ptr<TMemoryPool> Pool;
  240. NYql::TIssues& Issues;
  241. TMap<TString, TNodePtr> UniversalAliases;
  242. THashSet<TString> Exports;
  243. THashMap<TString, TString> ImportModuleAliases;
  244. THashMap<TString, TString> RequiredModules;
  245. TMap<TString, TString> SimpleUdfs;
  246. NSQLTranslation::TIncrementMonCounterFunction IncrementMonCounterFunction;
  247. TScopedStatePtr Scoped;
  248. int ScopeLevel = 0;
  249. size_t AnonymousNameIndex = 0;
  250. TDeque<TScopedStatePtr> AllScopes;
  251. bool HasPendingErrors;
  252. THashMap<TString, ui32> GenIndexes;
  253. using TWinSpecsRef = std::reference_wrapper<TWinSpecs>;
  254. TDeque<TWinSpecsRef> WinSpecsScopes;
  255. bool PragmaRefSelect = false;
  256. bool PragmaSampleSelect = false;
  257. bool PragmaAllowDotInAlias = false;
  258. bool PragmaInferSchema = false;
  259. bool PragmaAutoCommit = false;
  260. bool PragmaUseTablePrefixForEach = false;
  261. bool SimpleColumns = true;
  262. bool CoalesceJoinKeysOnQualifiedAll = false;
  263. bool PragmaDirectRead = false;
  264. bool PragmaYsonFast = true;
  265. bool PragmaYsonAutoConvert = false;
  266. bool PragmaYsonStrict = true;
  267. bool PragmaRegexUseRe2 = true;
  268. bool PragmaPullUpFlatMapOverJoin = true;
  269. bool FilterPushdownOverJoinOptionalSide = false;
  270. bool RotateJoinTree = true;
  271. bool WarnUnnamedColumns = false;
  272. bool DiscoveryMode = false;
  273. bool EnableSystemColumns = true;
  274. bool DqEngineEnable = false;
  275. bool DqEngineForce = false;
  276. TString CostBasedOptimizer;
  277. TMaybe<bool> JsonQueryReturnsJsonDocument;
  278. TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections;
  279. TMaybe<bool> AnsiRankForNullableKeys = true;
  280. const bool AnsiQuotedIdentifiers;
  281. bool AnsiOptionalAs = true;
  282. bool OrderedColumns = false;
  283. bool PositionalUnionAll = false;
  284. bool BogousStarInGroupByOverJoin = false;
  285. bool UnorderedSubqueries = true;
  286. bool PragmaDataWatermarks = true;
  287. bool WarnOnAnsiAliasShadowing = true;
  288. ui32 ResultRowsLimit = 0;
  289. ui64 ResultSizeLimit = 0;
  290. ui32 PragmaGroupByLimit = 1 << 6;
  291. ui32 PragmaGroupByCubeLimit = 5;
  292. // if FlexibleTypes=true, emit TypeOrMember callable and resolve Type/Column uncertainty on type annotation stage, otherwise always emit Type
  293. bool FlexibleTypes = true;
  294. // see YQL-10265
  295. bool AnsiCurrentRow = false;
  296. TMaybe<bool> YsonCastToString;
  297. using TLiteralWithPosition = std::pair<TString, TPosition>;
  298. using TLibraryStuff = std::tuple<TPosition, std::optional<TLiteralWithPosition>, std::optional<TLiteralWithPosition>>;
  299. std::unordered_map<TString, TLibraryStuff> Libraries; // alias -> optional file with token
  300. using TPackageStuff = std::tuple<
  301. TPosition, TLiteralWithPosition,
  302. std::optional<TLiteralWithPosition>
  303. >;
  304. std::unordered_map<TString, TPackageStuff> Packages; // alias -> url with optional token
  305. using TOverrideLibraryStuff = std::tuple<TPosition>;
  306. std::unordered_map<TString, TOverrideLibraryStuff> OverrideLibraries; // alias -> position
  307. THashMap<TString, ui32> PackageVersions;
  308. NYql::TWarningPolicy WarningPolicy;
  309. TString PqReadByRtmrCluster;
  310. bool EmitStartsWith = true;
  311. TMaybe<bool> EmitAggApply;
  312. bool UseBlocks = false;
  313. bool EmitTableSource = false;
  314. bool AnsiLike = false;
  315. bool FeatureR010 = false; //Row pattern recognition: FROM clause
  316. TMaybe<bool> CompactGroupBy;
  317. bool BlockEngineEnable = false;
  318. bool BlockEngineForce = false;
  319. bool UnorderedResult = false;
  320. ui64 ParallelModeCount = 0;
  321. bool CompactNamedExprs = true;
  322. bool ValidateUnusedExprs = false;
  323. bool AnsiImplicitCrossJoin = false; // select * from A,B
  324. bool DistinctOverWindow = false;
  325. bool SeqMode = false;
  326. bool EmitUnionMerge = false;
  327. TVector<size_t> ForAllStatementsParts;
  328. TMaybe<TString> Engine;
  329. };
  330. class TColumnRefScope {
  331. public:
  332. TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true, const TString& defineVar = "")
  333. : PrevTop(ctx.TopLevelColumnReferenceState)
  334. , Prev(ctx.ColumnReferenceState)
  335. , PrevErr(ctx.NoColumnErrorContext)
  336. , PrevDefineVar(ctx.MatchRecognizeDefineVar)
  337. , Ctx(ctx)
  338. {
  339. if (isTopLevelExpr) {
  340. Ctx.ColumnReferenceState = Ctx.TopLevelColumnReferenceState = state;
  341. } else {
  342. Ctx.ColumnReferenceState = state;
  343. }
  344. YQL_ENSURE(
  345. defineVar.empty()
  346. || EColumnRefState::MatchRecognizeMeasures == state
  347. || EColumnRefState::MatchRecognizeDefine == state
  348. || EColumnRefState::MatchRecognizeDefineAggregate == state,
  349. "Internal logic error"
  350. );
  351. ctx.MatchRecognizeDefineVar = defineVar;
  352. }
  353. void SetNoColumnErrContext(const TString& msg) {
  354. Ctx.NoColumnErrorContext = msg;
  355. }
  356. ~TColumnRefScope() {
  357. Ctx.TopLevelColumnReferenceState = PrevTop;
  358. Ctx.ColumnReferenceState = Prev;
  359. std::swap(Ctx.NoColumnErrorContext, PrevErr);
  360. std::swap(Ctx.MatchRecognizeDefineVar, PrevDefineVar);
  361. }
  362. private:
  363. const EColumnRefState PrevTop;
  364. const EColumnRefState Prev;
  365. TString PrevErr;
  366. TString PrevDefineVar;
  367. TContext& Ctx;
  368. };
  369. TMaybe<EColumnRefState> GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex);
  370. class TTranslation {
  371. protected:
  372. typedef TSet<ui32> TSetType;
  373. protected:
  374. TTranslation(TContext& ctx);
  375. public:
  376. TContext& Context();
  377. IOutputStream& Error();
  378. const TString& Token(const NSQLv1Generated::TToken& token) {
  379. return Ctx.Token(token);
  380. }
  381. ui32 UnifiedToken(ui32 id) const {
  382. return Ctx.Settings.Antlr4Parser + (id << 16);
  383. }
  384. TString Identifier(const NSQLv1Generated::TToken& token) {
  385. return IdContent(Ctx, Token(token));
  386. }
  387. TString Identifier(const TString& str) const {
  388. return IdContent(Ctx, str);
  389. }
  390. TNodePtr GetNamedNode(const TString& name);
  391. using TNodeBuilderByName = std::function<TNodePtr(const TString& effectiveName)>;
  392. TString PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder);
  393. TString PushNamedNode(TPosition namePos, const TString& name, TNodePtr node);
  394. TString PushNamedAtom(TPosition namePos, const TString& name);
  395. void PopNamedNode(const TString& name);
  396. void WarnUnusedNodes() const;
  397. template <typename TNode>
  398. void AltNotImplemented(const TString& ruleName, const TNode& node) {
  399. AltNotImplemented(ruleName, node.Alt_case(), node, TNode::descriptor());
  400. }
  401. template <typename TNode>
  402. TString AltDescription(const TNode& node) const {
  403. return AltDescription(node, node.Alt_case(), TNode::descriptor());
  404. }
  405. protected:
  406. void AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr);
  407. TString AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const;
  408. protected:
  409. TContext& Ctx;
  410. };
  411. } // namespace NSQLTranslationV1