context.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. #pragma once
  2. #include "source.h"
  3. #include "sql.h"
  4. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  5. #include <yql/essentials/core/issue/protos/issue_id.pb.h>
  6. #include <yql/essentials/public/issue/yql_warning.h>
  7. #include <yql/essentials/sql/settings/translation_settings.h>
  8. #include <yql/essentials/sql/cluster_mapping.h>
  9. #include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
  10. #include <util/generic/hash.h>
  11. #include <util/generic/map.h>
  12. #include <util/generic/maybe.h>
  13. #include <util/generic/set.h>
  14. #include <util/generic/deque.h>
  15. #include <util/generic/vector.h>
  16. #define ANTLR3_TOKEN(NAME) SQLv1LexerTokens::TOKEN_##NAME << 16
  17. #define ANTLR4_TOKEN(NAME) (SQLv1Antlr4Lexer::TOKEN_##NAME << 16) + 1
  18. #define IS_TOKEN(ID, NAME) (UnifiedToken(ID) == ANTLR3_TOKEN(NAME) || UnifiedToken(ID) == ANTLR4_TOKEN(NAME))
  19. namespace NSQLTranslationV1 {
  20. inline bool IsAnonymousName(const TString& name) {
  21. return name == "$_";
  22. }
  23. inline bool IsStreamingService(const TString& service) {
  24. return service == NYql::RtmrProviderName || service == NYql::PqProviderName;
  25. }
  26. struct TNodeWithUsageInfo : public TThrRefBase {
  27. explicit TNodeWithUsageInfo(const TNodePtr& node, TPosition namePos, int level)
  28. : Node(node)
  29. , NamePos(namePos)
  30. , Level(level)
  31. {}
  32. TNodePtr Node;
  33. TPosition NamePos;
  34. int Level = 0;
  35. bool IsUsed = false;
  36. };
  37. using TNodeWithUsageInfoPtr = TIntrusivePtr<TNodeWithUsageInfo>;
  38. using TNamedNodesMap = THashMap<TString, TDeque<TNodeWithUsageInfoPtr>>;
  39. using TBlocks = TVector<TNodePtr>;
  40. struct TScopedState : public TThrRefBase {
  41. TString CurrService;
  42. TDeferredAtom CurrCluster;
  43. bool PragmaClassicDivision = true;
  44. bool PragmaCheckedOps = false;
  45. bool StrictJoinKeyTypes = false;
  46. bool UnicodeLiterals = false;
  47. bool WarnUntypedStringLiterals = false;
  48. TNamedNodesMap NamedNodes;
  49. struct TLocal {
  50. TVector<std::pair<TString, TDeferredAtom>> UsedClusters;
  51. THashSet<TString> UsedPlainClusters;
  52. THashSet<INode*> UsedExprClusters;
  53. THashMap<INode*, std::pair<TString, TNodePtr>> ExprClustersMap;
  54. TVector<TNodePtr> ExprClusters;
  55. };
  56. TLocal Local;
  57. void UseCluster(const TString& service, const TDeferredAtom& cluster);
  58. const TVector<std::pair<TString, TDeferredAtom>>& GetUsedClusters();
  59. TNodePtr WrapCluster(const TDeferredAtom& cluster, TContext& ctx);
  60. void AddExprCluster(TNodePtr expr, TContext& ctx);
  61. void Clear();
  62. TNodePtr LookupNode(const TString& name);
  63. };
  64. using TScopedStatePtr = TIntrusivePtr<TScopedState>;
  65. class TColumnRefScope;
  66. enum class EColumnRefState {
  67. Deny,
  68. Allow,
  69. AsStringLiteral,
  70. AsPgType,
  71. MatchRecognizeMeasures,
  72. MatchRecognizeDefine,
  73. MatchRecognizeDefineAggregate,
  74. };
  75. class TContext {
  76. public:
  77. TContext(const NSQLTranslation::TTranslationSettings& settings,
  78. const NSQLTranslation::TSQLHints& hints,
  79. NYql::TIssues& issues,
  80. const TString& query = {});
  81. virtual ~TContext();
  82. const NYql::TPosition& Pos() const;
  83. void PushCurrentBlocks(TBlocks* blocks);
  84. void PopCurrentBlocks();
  85. TBlocks& GetCurrentBlocks() const;
  86. TString MakeName(const TString& name);
  87. IOutputStream& Error(NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
  88. IOutputStream& Error(NYql::TPosition pos, NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
  89. IOutputStream& Warning(NYql::TPosition pos, NYql::TIssueCode code);
  90. IOutputStream& Info(NYql::TPosition pos);
  91. void SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action);
  92. const TString& Token(const NSQLv1Generated::TToken& token) {
  93. Position.Row = token.GetLine();
  94. Position.Column = token.GetColumn() + 1;
  95. return token.GetValue();
  96. }
  97. TPosition TokenPosition(const NSQLv1Generated::TToken& token) {
  98. TPosition pos = Position;
  99. pos.Row = token.GetLine();
  100. pos.Column = token.GetColumn() + 1;
  101. return pos;
  102. }
  103. inline void IncrementMonCounter(const TString& name, const TString& value) {
  104. if (IncrementMonCounterFunction) {
  105. IncrementMonCounterFunction(name, value);
  106. }
  107. }
  108. bool HasCluster(const TString& cluster) const {
  109. return GetClusterProvider(cluster).Defined();
  110. }
  111. TMaybe<TString> GetClusterProvider(const TString& cluster) const {
  112. TString unusedNormalizedClusterName;
  113. return GetClusterProvider(cluster, unusedNormalizedClusterName);
  114. }
  115. TMaybe<TString> GetClusterProvider(const TString& cluster, TString& normalizedClusterName) const {
  116. auto provider = ClusterMapping.GetClusterProvider(cluster, normalizedClusterName);
  117. if (!provider) {
  118. if (Settings.AssumeYdbOnClusterWithSlash && cluster.StartsWith('/')) {
  119. normalizedClusterName = cluster;
  120. return TString(NYql::KikimrProviderName);
  121. }
  122. if (Settings.DynamicClusterProvider) {
  123. normalizedClusterName = cluster.StartsWith('/') ? cluster : Settings.PathPrefix + "/" + cluster;
  124. return Settings.DynamicClusterProvider;
  125. }
  126. return Nothing();
  127. }
  128. return provider;
  129. }
  130. bool IsDynamicCluster(const TDeferredAtom& cluster) const;
  131. bool HasNonYtProvider(const ISource& source) const;
  132. bool UseUnordered(const ISource& source) const;
  133. bool UseUnordered(const TTableRef& table) const;
  134. bool SetPathPrefix(const TString& value, TMaybe<TString> arg = TMaybe<TString>());
  135. TNodePtr GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path);
  136. TStringBuf GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const;
  137. TNodePtr UniversalAlias(const TString& baseName, TNodePtr&& node);
  138. void BodyPart() {
  139. IntoHeading = false;
  140. }
  141. bool IsParseHeading() const {
  142. return IntoHeading;
  143. }
  144. bool IsAlreadyDeclared(const TString& varName) const;
  145. void DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak = false);
  146. bool AddExport(TPosition symbolPos, const TString& symbolName);
  147. TString AddImport(const TVector<TString>& modulePath);
  148. TString AddSimpleUdf(const TString& udf);
  149. void SetPackageVersion(const TString& packageName, ui32 version);
  150. bool IsStreamingService(const TStringBuf service) const;
  151. bool CheckColumnReference(TPosition pos, const TString& name) {
  152. const bool allowed = GetColumnReferenceState() != EColumnRefState::Deny;
  153. if (!allowed) {
  154. Error(pos) << "Column reference \"" << name << "\" is not allowed " << NoColumnErrorContext;
  155. IncrementMonCounter("sql_errors", "ColumnReferenceInScopeIsNotAllowed");
  156. }
  157. return allowed;
  158. }
  159. EColumnRefState GetColumnReferenceState() const {
  160. return ColumnReferenceState;
  161. }
  162. EColumnRefState GetTopLevelColumnReferenceState() const {
  163. return TopLevelColumnReferenceState;
  164. }
  165. [[nodiscard]] TString GetMatchRecognizeDefineVar() const {
  166. YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
  167. || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
  168. || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
  169. "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  170. return MatchRecognizeDefineVar;
  171. }
  172. TString ExtractMatchRecognizeAggrVar() {
  173. YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
  174. || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
  175. || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
  176. "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  177. return std::exchange(MatchRecognizeAggrVar, "");
  178. }
  179. [[nodiscard]] bool SetMatchRecognizeAggrVar(TString var) {
  180. YQL_ENSURE(EColumnRefState::MatchRecognizeMeasures == ColumnReferenceState
  181. || EColumnRefState::MatchRecognizeDefine == ColumnReferenceState
  182. || EColumnRefState::MatchRecognizeDefineAggregate == ColumnReferenceState,
  183. "MATCH_RECOGNIZE Var can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  184. if (MatchRecognizeAggrVar.empty()) {
  185. MatchRecognizeAggrVar = std::move(var);
  186. } else if (MatchRecognizeAggrVar != var) {
  187. Error() << "Illegal use of aggregates or navigation operators in MATCH_RECOGNIZE";
  188. return false;
  189. }
  190. return true;
  191. }
  192. TVector<NSQLTranslation::TSQLHint> PullHintForToken(NYql::TPosition tokenPos);
  193. void WarnUnusedHints();
  194. private:
  195. IOutputStream& MakeIssue(NYql::ESeverity severity, NYql::TIssueCode code, NYql::TPosition pos);
  196. private:
  197. NYql::TPosition Position;
  198. THolder<TStringOutput> IssueMsgHolder;
  199. NSQLTranslation::TClusterMapping ClusterMapping;
  200. TString PathPrefix;
  201. THashMap<TString, TString> ProviderPathPrefixes;
  202. THashMap<TString, TString> ClusterPathPrefixes;
  203. bool IntoHeading = true;
  204. NSQLTranslation::TSQLHints SQLHints;
  205. friend class TColumnRefScope;
  206. EColumnRefState ColumnReferenceState = EColumnRefState::Deny;
  207. EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny;
  208. TString MatchRecognizeDefineVar;
  209. TString MatchRecognizeAggrVar;
  210. TString NoColumnErrorContext = "in current scope";
  211. TVector<TBlocks*> CurrentBlocks;
  212. public:
  213. THashMap<TString, std::pair<TPosition, TNodePtr>> Variables;
  214. THashSet<TString> WeakVariables;
  215. NSQLTranslation::TTranslationSettings Settings;
  216. const TString Query;
  217. std::unique_ptr<TMemoryPool> Pool;
  218. NYql::TIssues& Issues;
  219. TMap<TString, TNodePtr> UniversalAliases;
  220. THashSet<TString> Exports;
  221. THashMap<TString, TString> ImportModuleAliases;
  222. THashMap<TString, TString> RequiredModules;
  223. TMap<TString, TString> SimpleUdfs;
  224. NSQLTranslation::TIncrementMonCounterFunction IncrementMonCounterFunction;
  225. TScopedStatePtr Scoped;
  226. int ScopeLevel = 0;
  227. size_t AnonymousNameIndex = 0;
  228. TDeque<TScopedStatePtr> AllScopes;
  229. bool HasPendingErrors;
  230. THashMap<TString, ui32> GenIndexes;
  231. using TWinSpecsRef = std::reference_wrapper<TWinSpecs>;
  232. TDeque<TWinSpecsRef> WinSpecsScopes;
  233. bool PragmaRefSelect = false;
  234. bool PragmaSampleSelect = false;
  235. bool PragmaAllowDotInAlias = false;
  236. bool PragmaInferSchema = false;
  237. bool PragmaAutoCommit = false;
  238. bool PragmaUseTablePrefixForEach = false;
  239. bool SimpleColumns = true;
  240. bool CoalesceJoinKeysOnQualifiedAll = false;
  241. bool PragmaDirectRead = false;
  242. bool PragmaYsonFast = true;
  243. bool PragmaYsonAutoConvert = false;
  244. bool PragmaYsonStrict = true;
  245. bool PragmaRegexUseRe2 = true;
  246. bool PragmaPullUpFlatMapOverJoin = true;
  247. bool FilterPushdownOverJoinOptionalSide = false;
  248. bool RotateJoinTree = true;
  249. bool WarnUnnamedColumns = false;
  250. bool DiscoveryMode = false;
  251. bool EnableSystemColumns = true;
  252. bool DqEngineEnable = false;
  253. bool DqEngineForce = false;
  254. TString CostBasedOptimizer;
  255. TMaybe<bool> JsonQueryReturnsJsonDocument;
  256. TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections;
  257. TMaybe<bool> AnsiRankForNullableKeys = true;
  258. const bool AnsiQuotedIdentifiers;
  259. bool AnsiOptionalAs = true;
  260. bool OrderedColumns = false;
  261. bool PositionalUnionAll = false;
  262. bool BogousStarInGroupByOverJoin = false;
  263. bool UnorderedSubqueries = true;
  264. bool PragmaDataWatermarks = true;
  265. bool WarnOnAnsiAliasShadowing = true;
  266. ui32 ResultRowsLimit = 0;
  267. ui64 ResultSizeLimit = 0;
  268. ui32 PragmaGroupByLimit = 1 << 6;
  269. ui32 PragmaGroupByCubeLimit = 5;
  270. // if FlexibleTypes=true, emit TypeOrMember callable and resolve Type/Column uncertainty on type annotation stage, otherwise always emit Type
  271. bool FlexibleTypes = true;
  272. // see YQL-10265
  273. bool AnsiCurrentRow = false;
  274. TMaybe<bool> YsonCastToString;
  275. using TLiteralWithPosition = std::pair<TString, TPosition>;
  276. using TLibraryStuff = std::tuple<TPosition, std::optional<TLiteralWithPosition>, std::optional<TLiteralWithPosition>>;
  277. std::unordered_map<TString, TLibraryStuff> Libraries; // alias -> optional file with token
  278. using TPackageStuff = std::tuple<
  279. TPosition, TLiteralWithPosition,
  280. std::optional<TLiteralWithPosition>
  281. >;
  282. std::unordered_map<TString, TPackageStuff> Packages; // alias -> url with optional token
  283. using TOverrideLibraryStuff = std::tuple<TPosition>;
  284. std::unordered_map<TString, TOverrideLibraryStuff> OverrideLibraries; // alias -> position
  285. THashMap<TString, ui32> PackageVersions;
  286. NYql::TWarningPolicy WarningPolicy;
  287. TString PqReadByRtmrCluster;
  288. bool EmitStartsWith = true;
  289. TMaybe<bool> EmitAggApply;
  290. bool UseBlocks = false;
  291. bool EmitTableSource = false;
  292. bool AnsiLike = false;
  293. bool FeatureR010 = false; //Row pattern recognition: FROM clause
  294. TMaybe<bool> CompactGroupBy;
  295. bool BlockEngineEnable = false;
  296. bool BlockEngineForce = false;
  297. bool UnorderedResult = false;
  298. ui64 ParallelModeCount = 0;
  299. bool CompactNamedExprs = true;
  300. bool ValidateUnusedExprs = false;
  301. bool AnsiImplicitCrossJoin = false; // select * from A,B
  302. bool DistinctOverWindow = false;
  303. bool SeqMode = false;
  304. bool EmitUnionMerge = false;
  305. TVector<size_t> ForAllStatementsParts;
  306. };
  307. class TColumnRefScope {
  308. public:
  309. TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true, const TString& defineVar = "")
  310. : PrevTop(ctx.TopLevelColumnReferenceState)
  311. , Prev(ctx.ColumnReferenceState)
  312. , PrevErr(ctx.NoColumnErrorContext)
  313. , PrevDefineVar(ctx.MatchRecognizeDefineVar)
  314. , Ctx(ctx)
  315. {
  316. if (isTopLevelExpr) {
  317. Ctx.ColumnReferenceState = Ctx.TopLevelColumnReferenceState = state;
  318. } else {
  319. Ctx.ColumnReferenceState = state;
  320. }
  321. YQL_ENSURE(
  322. defineVar.empty()
  323. || EColumnRefState::MatchRecognizeMeasures == state
  324. || EColumnRefState::MatchRecognizeDefine == state
  325. || EColumnRefState::MatchRecognizeDefineAggregate == state,
  326. "Internal logic error"
  327. );
  328. ctx.MatchRecognizeDefineVar = defineVar;
  329. }
  330. void SetNoColumnErrContext(const TString& msg) {
  331. Ctx.NoColumnErrorContext = msg;
  332. }
  333. ~TColumnRefScope() {
  334. Ctx.TopLevelColumnReferenceState = PrevTop;
  335. Ctx.ColumnReferenceState = Prev;
  336. std::swap(Ctx.NoColumnErrorContext, PrevErr);
  337. std::swap(Ctx.MatchRecognizeDefineVar, PrevDefineVar);
  338. }
  339. private:
  340. const EColumnRefState PrevTop;
  341. const EColumnRefState Prev;
  342. TString PrevErr;
  343. TString PrevDefineVar;
  344. TContext& Ctx;
  345. };
  346. TMaybe<EColumnRefState> GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex);
  347. class TTranslation {
  348. protected:
  349. typedef TSet<ui32> TSetType;
  350. protected:
  351. TTranslation(TContext& ctx);
  352. public:
  353. TContext& Context();
  354. IOutputStream& Error();
  355. const TString& Token(const NSQLv1Generated::TToken& token) {
  356. return Ctx.Token(token);
  357. }
  358. ui32 UnifiedToken(ui32 id) const {
  359. return Ctx.Settings.Antlr4Parser + (id << 16);
  360. }
  361. TString Identifier(const NSQLv1Generated::TToken& token) {
  362. return IdContent(Ctx, Token(token));
  363. }
  364. TString Identifier(const TString& str) const {
  365. return IdContent(Ctx, str);
  366. }
  367. TNodePtr GetNamedNode(const TString& name);
  368. using TNodeBuilderByName = std::function<TNodePtr(const TString& effectiveName)>;
  369. TString PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder);
  370. TString PushNamedNode(TPosition namePos, const TString& name, TNodePtr node);
  371. TString PushNamedAtom(TPosition namePos, const TString& name);
  372. void PopNamedNode(const TString& name);
  373. void WarnUnusedNodes() const;
  374. template <typename TNode>
  375. void AltNotImplemented(const TString& ruleName, const TNode& node) {
  376. AltNotImplemented(ruleName, node.Alt_case(), node, TNode::descriptor());
  377. }
  378. template <typename TNode>
  379. TString AltDescription(const TNode& node) const {
  380. return AltDescription(node, node.Alt_case(), TNode::descriptor());
  381. }
  382. protected:
  383. void AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr);
  384. TString AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const;
  385. protected:
  386. TContext& Ctx;
  387. };
  388. } // namespace NSQLTranslationV1