context.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. #pragma once
  2. #include "source.h"
  3. #include "sql.h"
  4. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  5. #include <yql/essentials/core/issue/protos/issue_id.pb.h>
  6. #include <yql/essentials/public/issue/yql_warning.h>
  7. #include <yql/essentials/sql/settings/translation_settings.h>
  8. #include <yql/essentials/sql/cluster_mapping.h>
  9. #include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
  10. #include <util/generic/hash.h>
  11. #include <util/generic/map.h>
  12. #include <util/generic/maybe.h>
  13. #include <util/generic/set.h>
  14. #include <util/generic/deque.h>
  15. #include <util/generic/vector.h>
  16. #define ANTLR3_TOKEN(NAME) SQLv1LexerTokens::TOKEN_##NAME << 16
  17. #define ANTLR4_TOKEN(NAME) (SQLv1Antlr4Lexer::TOKEN_##NAME << 16) + 1
  18. #define IS_TOKEN(ID, NAME) (UnifiedToken(ID) == ANTLR3_TOKEN(NAME) || UnifiedToken(ID) == ANTLR4_TOKEN(NAME))
  19. namespace NSQLTranslationV1 {
  20. inline bool IsAnonymousName(const TString& name) {
  21. return name == "$_";
  22. }
  23. inline bool IsStreamingService(const TString& service) {
  24. return service == NYql::RtmrProviderName || service == NYql::PqProviderName;
  25. }
  26. struct TNodeWithUsageInfo : public TThrRefBase {
  27. explicit TNodeWithUsageInfo(const TNodePtr& node, TPosition namePos, int level)
  28. : Node(node)
  29. , NamePos(namePos)
  30. , Level(level)
  31. {}
  32. TNodePtr Node;
  33. TPosition NamePos;
  34. int Level = 0;
  35. bool IsUsed = false;
  36. };
  37. using TNodeWithUsageInfoPtr = TIntrusivePtr<TNodeWithUsageInfo>;
  38. using TNamedNodesMap = THashMap<TString, TDeque<TNodeWithUsageInfoPtr>>;
  39. using TBlocks = TVector<TNodePtr>;
  40. struct TScopedState : public TThrRefBase {
  41. TString CurrService;
  42. TDeferredAtom CurrCluster;
  43. bool PragmaClassicDivision = true;
  44. bool PragmaCheckedOps = false;
  45. bool StrictJoinKeyTypes = false;
  46. bool UnicodeLiterals = false;
  47. bool WarnUntypedStringLiterals = false;
  48. TNamedNodesMap NamedNodes;
  49. struct TLocal {
  50. TVector<std::pair<TString, TDeferredAtom>> UsedClusters;
  51. THashSet<TString> UsedPlainClusters;
  52. THashSet<INode*> UsedExprClusters;
  53. THashMap<INode*, std::pair<TString, TNodePtr>> ExprClustersMap;
  54. TVector<TNodePtr> ExprClusters;
  55. };
  56. TLocal Local;
  57. void UseCluster(const TString& service, const TDeferredAtom& cluster);
  58. const TVector<std::pair<TString, TDeferredAtom>>& GetUsedClusters();
  59. TNodePtr WrapCluster(const TDeferredAtom& cluster, TContext& ctx);
  60. void AddExprCluster(TNodePtr expr, TContext& ctx);
  61. void Clear();
  62. TNodePtr LookupNode(const TString& name);
  63. };
  64. using TScopedStatePtr = TIntrusivePtr<TScopedState>;
  65. class TColumnRefScope;
  66. enum class EColumnRefState {
  67. Deny,
  68. Allow,
  69. AsStringLiteral,
  70. AsPgType,
  71. MatchRecognize,
  72. };
  73. class TContext {
  74. public:
  75. TContext(const NSQLTranslation::TTranslationSettings& settings,
  76. const NSQLTranslation::TSQLHints& hints,
  77. NYql::TIssues& issues);
  78. virtual ~TContext();
  79. const NYql::TPosition& Pos() const;
  80. void PushCurrentBlocks(TBlocks* blocks);
  81. void PopCurrentBlocks();
  82. TBlocks& GetCurrentBlocks() const;
  83. TString MakeName(const TString& name);
  84. IOutputStream& Error(NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
  85. IOutputStream& Error(NYql::TPosition pos, NYql::TIssueCode code = NYql::TIssuesIds::DEFAULT_ERROR);
  86. IOutputStream& Warning(NYql::TPosition pos, NYql::TIssueCode code);
  87. IOutputStream& Info(NYql::TPosition pos);
  88. void SetWarningPolicyFor(NYql::TIssueCode code, NYql::EWarningAction action);
  89. const TString& Token(const NSQLv1Generated::TToken& token) {
  90. Position.Row = token.GetLine();
  91. Position.Column = token.GetColumn() + 1;
  92. return token.GetValue();
  93. }
  94. TPosition TokenPosition(const NSQLv1Generated::TToken& token) {
  95. TPosition pos = Position;
  96. pos.Row = token.GetLine();
  97. pos.Column = token.GetColumn() + 1;
  98. return pos;
  99. }
  100. inline void IncrementMonCounter(const TString& name, const TString& value) {
  101. if (IncrementMonCounterFunction) {
  102. IncrementMonCounterFunction(name, value);
  103. }
  104. }
  105. bool HasCluster(const TString& cluster) const {
  106. return GetClusterProvider(cluster).Defined();
  107. }
  108. TMaybe<TString> GetClusterProvider(const TString& cluster) const {
  109. TString unusedNormalizedClusterName;
  110. return GetClusterProvider(cluster, unusedNormalizedClusterName);
  111. }
  112. TMaybe<TString> GetClusterProvider(const TString& cluster, TString& normalizedClusterName) const {
  113. auto provider = ClusterMapping.GetClusterProvider(cluster, normalizedClusterName);
  114. if (!provider) {
  115. if (Settings.AssumeYdbOnClusterWithSlash && cluster.StartsWith('/')) {
  116. normalizedClusterName = cluster;
  117. return TString(NYql::KikimrProviderName);
  118. }
  119. if (Settings.DynamicClusterProvider) {
  120. normalizedClusterName = cluster.StartsWith('/') ? cluster : Settings.PathPrefix + "/" + cluster;
  121. return Settings.DynamicClusterProvider;
  122. }
  123. return Nothing();
  124. }
  125. return provider;
  126. }
  127. bool IsDynamicCluster(const TDeferredAtom& cluster) const;
  128. bool HasNonYtProvider(const ISource& source) const;
  129. bool UseUnordered(const ISource& source) const;
  130. bool UseUnordered(const TTableRef& table) const;
  131. bool SetPathPrefix(const TString& value, TMaybe<TString> arg = TMaybe<TString>());
  132. TNodePtr GetPrefixedPath(const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& path);
  133. TStringBuf GetPrefixPath(const TString& service, const TDeferredAtom& cluster) const;
  134. TNodePtr UniversalAlias(const TString& baseName, TNodePtr&& node);
  135. void BodyPart() {
  136. IntoHeading = false;
  137. }
  138. bool IsParseHeading() const {
  139. return IntoHeading;
  140. }
  141. bool IsAlreadyDeclared(const TString& varName) const;
  142. void DeclareVariable(const TString& varName, const TPosition& pos, const TNodePtr& typeNode, bool isWeak = false);
  143. bool AddExport(TPosition symbolPos, const TString& symbolName);
  144. TString AddImport(const TVector<TString>& modulePath);
  145. TString AddSimpleUdf(const TString& udf);
  146. void SetPackageVersion(const TString& packageName, ui32 version);
  147. bool IsStreamingService(const TStringBuf service) const;
  148. bool CheckColumnReference(TPosition pos, const TString& name) {
  149. const bool allowed = GetColumnReferenceState() != EColumnRefState::Deny;
  150. if (!allowed) {
  151. Error(pos) << "Column reference \"" << name << "\" is not allowed " << NoColumnErrorContext;
  152. IncrementMonCounter("sql_errors", "ColumnReferenceInScopeIsNotAllowed");
  153. }
  154. return allowed;
  155. }
  156. EColumnRefState GetColumnReferenceState() const {
  157. return ColumnReferenceState;
  158. }
  159. EColumnRefState GetTopLevelColumnReferenceState() const {
  160. return TopLevelColumnReferenceState;
  161. }
  162. TStringBuf GetMatchRecognizeDefineVar() const {
  163. YQL_ENSURE(EColumnRefState::MatchRecognize == ColumnReferenceState,
  164. "DefineVar can only be accessed within processing of MATCH_RECOGNIZE lambdas");
  165. return MatchRecognizeDefineVar;
  166. }
  167. TVector<NSQLTranslation::TSQLHint> PullHintForToken(NYql::TPosition tokenPos);
  168. void WarnUnusedHints();
  169. private:
  170. IOutputStream& MakeIssue(NYql::ESeverity severity, NYql::TIssueCode code, NYql::TPosition pos);
  171. private:
  172. NYql::TPosition Position;
  173. THolder<TStringOutput> IssueMsgHolder;
  174. NSQLTranslation::TClusterMapping ClusterMapping;
  175. TString PathPrefix;
  176. THashMap<TString, TString> ProviderPathPrefixes;
  177. THashMap<TString, TString> ClusterPathPrefixes;
  178. bool IntoHeading = true;
  179. NSQLTranslation::TSQLHints SQLHints;
  180. friend class TColumnRefScope;
  181. EColumnRefState ColumnReferenceState = EColumnRefState::Deny;
  182. EColumnRefState TopLevelColumnReferenceState = EColumnRefState::Deny;
  183. TString MatchRecognizeDefineVar;
  184. TString NoColumnErrorContext = "in current scope";
  185. TVector<TBlocks*> CurrentBlocks;
  186. public:
  187. THashMap<TString, std::pair<TPosition, TNodePtr>> Variables;
  188. THashSet<TString> WeakVariables;
  189. NSQLTranslation::TTranslationSettings Settings;
  190. std::unique_ptr<TMemoryPool> Pool;
  191. NYql::TIssues& Issues;
  192. TMap<TString, TNodePtr> UniversalAliases;
  193. THashSet<TString> Exports;
  194. THashMap<TString, TString> ImportModuleAliases;
  195. THashMap<TString, TString> RequiredModules;
  196. TMap<TString, TString> SimpleUdfs;
  197. NSQLTranslation::TIncrementMonCounterFunction IncrementMonCounterFunction;
  198. TScopedStatePtr Scoped;
  199. int ScopeLevel = 0;
  200. size_t AnonymousNameIndex = 0;
  201. TDeque<TScopedStatePtr> AllScopes;
  202. bool HasPendingErrors;
  203. THashMap<TString, ui32> GenIndexes;
  204. using TWinSpecsRef = std::reference_wrapper<TWinSpecs>;
  205. TDeque<TWinSpecsRef> WinSpecsScopes;
  206. bool PragmaRefSelect = false;
  207. bool PragmaSampleSelect = false;
  208. bool PragmaAllowDotInAlias = false;
  209. bool PragmaInferSchema = false;
  210. bool PragmaAutoCommit = false;
  211. bool PragmaUseTablePrefixForEach = false;
  212. bool SimpleColumns = true;
  213. bool CoalesceJoinKeysOnQualifiedAll = false;
  214. bool PragmaDirectRead = false;
  215. bool PragmaYsonFast = true;
  216. bool PragmaYsonAutoConvert = false;
  217. bool PragmaYsonStrict = true;
  218. bool PragmaRegexUseRe2 = true;
  219. bool PragmaPullUpFlatMapOverJoin = true;
  220. bool FilterPushdownOverJoinOptionalSide = false;
  221. bool RotateJoinTree = true;
  222. bool WarnUnnamedColumns = false;
  223. bool DiscoveryMode = false;
  224. bool EnableSystemColumns = true;
  225. bool DqEngineEnable = false;
  226. bool DqEngineForce = false;
  227. TString CostBasedOptimizer;
  228. TMaybe<bool> JsonQueryReturnsJsonDocument;
  229. TMaybe<bool> AnsiInForEmptyOrNullableItemsCollections;
  230. TMaybe<bool> AnsiRankForNullableKeys = true;
  231. const bool AnsiQuotedIdentifiers;
  232. bool AnsiOptionalAs = true;
  233. bool OrderedColumns = false;
  234. bool PositionalUnionAll = false;
  235. bool BogousStarInGroupByOverJoin = false;
  236. bool UnorderedSubqueries = true;
  237. bool PragmaDataWatermarks = true;
  238. bool WarnOnAnsiAliasShadowing = true;
  239. ui32 ResultRowsLimit = 0;
  240. ui64 ResultSizeLimit = 0;
  241. ui32 PragmaGroupByLimit = 1 << 6;
  242. ui32 PragmaGroupByCubeLimit = 5;
  243. // if FlexibleTypes=true, emit TypeOrMember callable and resolve Type/Column uncertainty on type annotation stage, otherwise always emit Type
  244. bool FlexibleTypes = false;
  245. // see YQL-10265
  246. bool AnsiCurrentRow = false;
  247. TMaybe<bool> YsonCastToString;
  248. using TLiteralWithPosition = std::pair<TString, TPosition>;
  249. using TLibraryStuff = std::tuple<TPosition, std::optional<TLiteralWithPosition>, std::optional<TLiteralWithPosition>>;
  250. std::unordered_map<TString, TLibraryStuff> Libraries; // alias -> optional file with token
  251. using TPackageStuff = std::tuple<
  252. TPosition, TLiteralWithPosition,
  253. std::optional<TLiteralWithPosition>
  254. >;
  255. std::unordered_map<TString, TPackageStuff> Packages; // alias -> url with optional token
  256. using TOverrideLibraryStuff = std::tuple<TPosition>;
  257. std::unordered_map<TString, TOverrideLibraryStuff> OverrideLibraries; // alias -> position
  258. THashMap<TString, ui32> PackageVersions;
  259. NYql::TWarningPolicy WarningPolicy;
  260. TString PqReadByRtmrCluster;
  261. bool EmitStartsWith = true;
  262. TMaybe<bool> EmitAggApply;
  263. bool UseBlocks = false;
  264. bool AnsiLike = false;
  265. bool FeatureR010 = false; //Row pattern recognition: FROM clause
  266. TMaybe<bool> CompactGroupBy;
  267. bool BlockEngineEnable = false;
  268. bool BlockEngineForce = false;
  269. bool UnorderedResult = false;
  270. ui64 ParallelModeCount = 0;
  271. bool CompactNamedExprs = false;
  272. bool ValidateUnusedExprs = false;
  273. bool AnsiImplicitCrossJoin = false; // select * from A,B
  274. bool DistinctOverWindow = false;
  275. bool SeqMode = false;
  276. };
  277. class TColumnRefScope {
  278. public:
  279. TColumnRefScope(TContext& ctx, EColumnRefState state, bool isTopLevelExpr = true, const TString& defineVar = "")
  280. : PrevTop(ctx.TopLevelColumnReferenceState)
  281. , Prev(ctx.ColumnReferenceState)
  282. , PrevErr(ctx.NoColumnErrorContext)
  283. , PrevDefineVar(ctx.MatchRecognizeDefineVar)
  284. , Ctx(ctx)
  285. {
  286. if (isTopLevelExpr) {
  287. Ctx.ColumnReferenceState = Ctx.TopLevelColumnReferenceState = state;
  288. } else {
  289. Ctx.ColumnReferenceState = state;
  290. }
  291. YQL_ENSURE(defineVar.empty() || EColumnRefState::MatchRecognize == state, "Internal logic error");
  292. ctx.MatchRecognizeDefineVar = defineVar;
  293. }
  294. void SetNoColumnErrContext(const TString& msg) {
  295. Ctx.NoColumnErrorContext = msg;
  296. }
  297. ~TColumnRefScope() {
  298. Ctx.TopLevelColumnReferenceState = PrevTop;
  299. Ctx.ColumnReferenceState = Prev;
  300. std::swap(Ctx.NoColumnErrorContext, PrevErr);
  301. std::swap(Ctx.MatchRecognizeDefineVar, PrevDefineVar);
  302. }
  303. private:
  304. const EColumnRefState PrevTop;
  305. const EColumnRefState Prev;
  306. TString PrevErr;
  307. TString PrevDefineVar;
  308. TContext& Ctx;
  309. };
  310. TMaybe<EColumnRefState> GetFunctionArgColumnStatus(TContext& ctx, const TString& module, const TString& func, size_t argIndex);
  311. class TTranslation {
  312. protected:
  313. typedef TSet<ui32> TSetType;
  314. protected:
  315. TTranslation(TContext& ctx);
  316. public:
  317. TContext& Context();
  318. IOutputStream& Error();
  319. const TString& Token(const NSQLv1Generated::TToken& token) {
  320. return Ctx.Token(token);
  321. }
  322. ui32 UnifiedToken(ui32 id) const {
  323. return Ctx.Settings.Antlr4Parser + (id << 16);
  324. }
  325. TString Identifier(const NSQLv1Generated::TToken& token) {
  326. return IdContent(Ctx, Token(token));
  327. }
  328. TString Identifier(const TString& str) const {
  329. return IdContent(Ctx, str);
  330. }
  331. TNodePtr GetNamedNode(const TString& name);
  332. using TNodeBuilderByName = std::function<TNodePtr(const TString& effectiveName)>;
  333. TString PushNamedNode(TPosition namePos, const TString& name, const TNodeBuilderByName& builder);
  334. TString PushNamedNode(TPosition namePos, const TString& name, TNodePtr node);
  335. TString PushNamedAtom(TPosition namePos, const TString& name);
  336. void PopNamedNode(const TString& name);
  337. void WarnUnusedNodes() const;
  338. template <typename TNode>
  339. void AltNotImplemented(const TString& ruleName, const TNode& node) {
  340. AltNotImplemented(ruleName, node.Alt_case(), node, TNode::descriptor());
  341. }
  342. template <typename TNode>
  343. TString AltDescription(const TNode& node) const {
  344. return AltDescription(node, node.Alt_case(), TNode::descriptor());
  345. }
  346. protected:
  347. void AltNotImplemented(const TString& ruleName, ui32 altCase, const google::protobuf::Message& node, const google::protobuf::Descriptor* descr);
  348. TString AltDescription(const google::protobuf::Message& node, ui32 altCase, const google::protobuf::Descriptor* descr) const;
  349. protected:
  350. TContext& Ctx;
  351. };
  352. } // namespace NSQLTranslationV1