yql_expr_optimize.cpp 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046
  1. #include "yql_expr_optimize.h"
  2. #include <util/generic/hash.h>
  3. #include "yql_expr_type_annotation.h"
  4. #include <yql/essentials/utils/log/log.h>
  5. #include <yql/essentials/utils/log/profile.h>
  6. namespace NYql {
  7. namespace {
  8. template<typename TOptimizer>
  9. struct TOptimizationContext : IOptimizationContext {
  10. TOptimizer Optimizer;
  11. TExprContext& Expr;
  12. const TOptimizeExprSettings& Settings;
  13. TNodeOnNodeOwnedMap Memoization;
  14. const TNodeOnNodeOwnedMap* Replaces;
  15. ui64 LastNodeId;
  16. bool HasRemaps;
  17. TOptimizationContext(TOptimizer optimizer, const TNodeOnNodeOwnedMap* replaces, TExprContext& expr, const TOptimizeExprSettings& settings)
  18. : Optimizer(optimizer)
  19. , Expr(expr)
  20. , Settings(settings)
  21. , Replaces(replaces)
  22. , LastNodeId(expr.NextUniqueId)
  23. , HasRemaps(false)
  24. {
  25. }
  26. void RemapNode(const TExprNode& fromNode, const TExprNode::TPtr& toNode) final {
  27. YQL_ENSURE(fromNode.UniqueId() <= LastNodeId);
  28. YQL_ENSURE(toNode->UniqueId() > LastNodeId);
  29. Memoization[&fromNode] = toNode;
  30. HasRemaps = true;
  31. if (Settings.ProcessedNodes) {
  32. Settings.ProcessedNodes->erase(fromNode.UniqueId());
  33. }
  34. }
  35. };
  36. TExprNode::TPtr RunOptimizer(TOptimizationContext<TCallableOptimizer>& ctx, const TExprNode::TPtr& node) {
  37. return (!ctx.Settings.VisitChanges && ctx.HasRemaps) ? node : ctx.Optimizer(node, ctx.Expr);
  38. }
  39. TExprNode::TPtr RunOptimizer(TOptimizationContext<TCallableOptimizerEx>& ctx, const TExprNode::TPtr& node) {
  40. return (!ctx.Settings.VisitChanges && ctx.HasRemaps) ? node : ctx.Optimizer(node, ctx.Expr, ctx);
  41. }
  42. TExprNode::TPtr RunOptimizer(TOptimizationContext<TCallableOptimizerFast>& ctx, bool& changed, const TExprNode::TPtr& node) {
  43. return (!ctx.Settings.VisitChanges && ctx.HasRemaps) ? node : ctx.Optimizer(node, changed, ctx.Expr);
  44. }
  45. template<typename TContext>
  46. TExprNode::TPtr ApplyRemaps(const TExprNode::TPtr& node, TContext& ctx) {
  47. const auto memoization = ctx.Memoization.find(node.Get());
  48. if (ctx.Memoization.cend() != memoization && memoization->second && memoization->second != node) {
  49. return memoization->second;
  50. }
  51. TExprNode::TListType newChildren;
  52. bool hasRemaps = false;
  53. for (const auto& child : node->Children()) {
  54. auto newChild = ApplyRemaps(child, ctx);
  55. YQL_ENSURE(newChild);
  56. if (newChild != child) {
  57. hasRemaps = true;
  58. }
  59. newChildren.emplace_back(std::move(newChild));
  60. }
  61. return hasRemaps ? ctx.Expr.ChangeChildren(*node, std::move(newChildren)) : node;
  62. }
  63. void AddExpected(const TExprNode& src, const TExprNode& dst, TExprContext& ctx, const TOptimizeExprSettings& settings) {
  64. if (!src.GetTypeAnn() || !settings.Types) {
  65. return;
  66. }
  67. if (src.Type() == TExprNode::Argument || src.Type() == TExprNode::Arguments) {
  68. return;
  69. }
  70. settings.Types->ExpectedTypes[dst.UniqueId()] = src.GetTypeAnn();
  71. if (src.GetState() >= TExprNode::EState::ConstrComplete) {
  72. settings.Types->ExpectedConstraints[dst.UniqueId()] = src.GetAllConstraints();
  73. } else {
  74. settings.Types->ExpectedConstraints.erase(dst.UniqueId());
  75. }
  76. auto columnOrder = settings.Types->LookupColumnOrder(src);
  77. if (columnOrder) {
  78. settings.Types->ExpectedColumnOrders[dst.UniqueId()] = *columnOrder;
  79. } else {
  80. settings.Types->ExpectedColumnOrders.erase(dst.UniqueId());
  81. }
  82. if (dst.GetTypeAnn()) {
  83. // we should check expected types / constraints / column order immediately
  84. // TODO: check constraints
  85. CheckExpectedTypeAndColumnOrder(dst, ctx, *settings.Types);
  86. }
  87. }
  88. template<typename TContext>
  89. TExprNode::TPtr OptimizeNode(const TExprNode::TPtr& node, TContext& ctx, size_t level) {
  90. if ((!ctx.Replaces && node->Type() == TExprNode::Argument) || node->Type() == TExprNode::Atom ||
  91. node->Type() == TExprNode::Arguments || node->Type() == TExprNode::World) {
  92. return node;
  93. }
  94. if (!ctx.Settings.VisitStarted && node->StartsExecution()) {
  95. return node;
  96. }
  97. if (ctx.Settings.VisitChecker && !ctx.Settings.VisitChecker(*node)) {
  98. return node;
  99. }
  100. YQL_ENSURE(level < 3000U, "Too deep graph!");
  101. if (ctx.Settings.ProcessedNodes) {
  102. if (ctx.Settings.ProcessedNodes->find(node->UniqueId()) != ctx.Settings.ProcessedNodes->cend()) {
  103. return node;
  104. }
  105. }
  106. const auto it = ctx.Memoization.find(node.Get());
  107. if (it != ctx.Memoization.cend()) {
  108. return it->second ? it->second : node;
  109. }
  110. TExprNode::TPtr current = node;
  111. if (ctx.Replaces) {
  112. const auto it = ctx.Replaces->find(node.Get());
  113. if (it != ctx.Replaces->cend()) {
  114. current = it->second;
  115. }
  116. }
  117. TExprNode::TPtr ret;
  118. if (current->Type() == TExprNode::Lambda) {
  119. ret = current;
  120. if (ctx.Settings.VisitLambdas) {
  121. TExprNode::TListType newBody;
  122. newBody.reserve(node->ChildrenSize() - 1U);
  123. bool bodyChanged = false;
  124. for (ui32 i = 1U; i < node->ChildrenSize(); ++i) {
  125. const auto& oldNode = node->ChildPtr(i);
  126. auto newNode = OptimizeNode(oldNode, ctx, level + 1);
  127. if (!newNode)
  128. return nullptr;
  129. bodyChanged = bodyChanged || newNode != oldNode;
  130. if (newNode->ForDisclosing()) {
  131. auto list = newNode->ChildrenList();
  132. std::move(list.begin(), list.end(), std::back_inserter(newBody));
  133. } else {
  134. newBody.emplace_back(std::move(newNode));
  135. }
  136. }
  137. if (bodyChanged) {
  138. ret = ctx.Expr.DeepCopyLambda(*current, std::move(newBody));
  139. AddExpected(*node, *ret, ctx.Expr, ctx.Settings);
  140. }
  141. }
  142. } else {
  143. TExprNode::TListType newChildren;
  144. newChildren.reserve(current->ChildrenSize());
  145. bool hasRenames = false;
  146. for (auto& child : current->Children()) {
  147. auto newChild = OptimizeNode(child, ctx, level + 1);
  148. if (!newChild)
  149. return nullptr;
  150. if (newChild != child) {
  151. hasRenames = true;
  152. }
  153. if (newChild->ForDisclosing()) {
  154. auto list = newChild->ChildrenList();
  155. std::move(list.begin(), list.end(), std::back_inserter(newChildren));
  156. } else {
  157. newChildren.emplace_back(std::move(newChild));
  158. }
  159. }
  160. auto renamedNode = hasRenames ? ctx.Expr.ChangeChildren(*current, std::move(newChildren)) : TExprNode::TPtr();
  161. newChildren.clear();
  162. if (!ctx.Settings.VisitChanges && hasRenames && ctx.Settings.CustomInstantTypeTransformer) {
  163. auto root = renamedNode ? renamedNode : current;
  164. ctx.Settings.CustomInstantTypeTransformer->Rewind();
  165. auto status = InstantTransform(*ctx.Settings.CustomInstantTypeTransformer, root, ctx.Expr);
  166. if (status.Level == IGraphTransformer::TStatus::Error) {
  167. return nullptr;
  168. }
  169. YQL_ENSURE(root->GetTypeAnn());
  170. if (status.HasRestart) {
  171. ret = std::move(root);
  172. } else {
  173. renamedNode = std::move(root);
  174. hasRenames = false;
  175. }
  176. }
  177. if (!ret) {
  178. const auto& nextNode = renamedNode ? renamedNode : current;
  179. const bool visitTuples = ctx.Settings.VisitTuples && nextNode->Type() == TExprNode::List;
  180. if ((nextNode->Type() != TExprNode::Callable && !visitTuples) || (hasRenames && !ctx.Settings.VisitChanges) || !ctx.Optimizer) {
  181. ret = nextNode;
  182. } else {
  183. ret = RunOptimizer(ctx, nextNode);
  184. if (!ret)
  185. return nullptr;
  186. }
  187. }
  188. AddExpected(*node, *ret, ctx.Expr, ctx.Settings);
  189. }
  190. if (node == ret && ctx.Settings.ProcessedNodes) {
  191. ctx.Settings.ProcessedNodes->insert(node->UniqueId());
  192. }
  193. if (node == ret) {
  194. YQL_ENSURE(ctx.Memoization.emplace(node.Get(), TExprNode::TPtr()).second);
  195. } else {
  196. if (!node->Unique()) {
  197. YQL_ENSURE(ctx.Memoization.emplace(node.Get(), ret).second);
  198. }
  199. if (current != node && current != ret) {
  200. ctx.Memoization.emplace(current.Get(), ret);
  201. }
  202. }
  203. return ret;
  204. }
  205. TExprNode::TPtr OptimizeNode(const TExprNode::TPtr& node, bool& changed, TOptimizationContext<TCallableOptimizerFast>& ctx, size_t level) {
  206. if (node->Type() == TExprNode::Atom || node->Type() == TExprNode::Argument ||
  207. node->Type() == TExprNode::Arguments || node->Type() == TExprNode::World) {
  208. return node;
  209. }
  210. if (ctx.Settings.VisitChecker && !ctx.Settings.VisitChecker(*node)) {
  211. return node;
  212. }
  213. const auto it = ctx.Memoization.find(node.Get());
  214. if (it != ctx.Memoization.cend()) {
  215. changed = changed || bool(it->second);
  216. return it->second ? it->second : node;
  217. }
  218. YQL_ENSURE(level < 3000U, "Too deep graph!");
  219. TExprNode::TPtr ret;
  220. if (node->Type() == TExprNode::Lambda) {
  221. ret = node;
  222. if (ctx.Settings.VisitLambdas) {
  223. TExprNode::TListType newBody;
  224. newBody.reserve(node->ChildrenSize() - 1U);
  225. bool bodyChanged = false;
  226. for (ui32 i = 1U; i < node->ChildrenSize(); ++i) {
  227. auto newNode = OptimizeNode(node->ChildPtr(i), bodyChanged, ctx, level + 1);
  228. if (!newNode)
  229. return nullptr;
  230. if (newNode->ForDisclosing()) {
  231. auto list = newNode->ChildrenList();
  232. std::move(list.begin(), list.end(), std::back_inserter(newBody));
  233. } else {
  234. newBody.emplace_back(std::move(newNode));
  235. }
  236. }
  237. if (bodyChanged) {
  238. ret = ctx.Expr.DeepCopyLambda(*node, std::move(newBody));
  239. changed = true;
  240. }
  241. }
  242. } else {
  243. TExprNode::TListType newChildren;
  244. newChildren.reserve(node->ChildrenSize());
  245. bool hasRenames = false;
  246. for (auto& child : node->Children()) {
  247. bool childChanged = false;
  248. auto newChild = OptimizeNode(child, childChanged, ctx, level + 1);
  249. if (!newChild)
  250. return nullptr;
  251. hasRenames = hasRenames || childChanged;
  252. if (newChild->ForDisclosing()) {
  253. auto list = newChild->ChildrenList();
  254. std::move(list.begin(), list.end(), std::back_inserter(newChildren));
  255. } else {
  256. newChildren.emplace_back(std::move(newChild));
  257. }
  258. }
  259. auto renamedNode = hasRenames ? ctx.Expr.ChangeChildren(*node, std::move(newChildren)) : TExprNode::TPtr();
  260. newChildren.clear();
  261. changed = changed || hasRenames;
  262. const auto& nextNode = renamedNode ? renamedNode : node;
  263. if (nextNode->Type() != TExprNode::Callable && (nextNode->Type() != TExprNode::List || !ctx.Settings.VisitTuples)) {
  264. ret = nextNode;
  265. } else {
  266. bool optimized = false;
  267. ret = RunOptimizer(ctx, optimized, nextNode);
  268. if (!ret)
  269. return nullptr;
  270. changed = changed || optimized;
  271. }
  272. }
  273. if (!node->Unique()) {
  274. YQL_ENSURE(ctx.Memoization.emplace(node.Get(), ret == node ? TExprNode::TPtr() : ret).second);
  275. }
  276. return ret;
  277. }
  278. void VisitExprInternal(const TExprNode::TPtr& node, const TExprVisitPtrFunc& preFunc,
  279. const TExprVisitPtrFunc& postFunc, TNodeSet& visitedNodes)
  280. {
  281. if (!visitedNodes.emplace(node.Get()).second) {
  282. return;
  283. }
  284. if (!preFunc || preFunc(node)) {
  285. for (const auto& child : node->Children()) {
  286. VisitExprInternal(child, preFunc, postFunc, visitedNodes);
  287. }
  288. }
  289. if (postFunc) {
  290. postFunc(node);
  291. }
  292. }
  293. void VisitExprLambdasLastInternal(const TExprNode::TPtr& node,
  294. const TExprVisitPtrFunc& preLambdaFunc,
  295. const TExprVisitPtrFunc& postLambdaFunc,
  296. TNodeSet& visitedNodes)
  297. {
  298. if (!visitedNodes.emplace(node.Get()).second) {
  299. return;
  300. }
  301. for (auto child : node->Children()) {
  302. if (!child->IsLambda()) {
  303. VisitExprLambdasLastInternal(child, preLambdaFunc, postLambdaFunc, visitedNodes);
  304. }
  305. }
  306. preLambdaFunc(node);
  307. for (auto child : node->Children()) {
  308. if (child->IsLambda()) {
  309. VisitExprLambdasLastInternal(child, preLambdaFunc, postLambdaFunc, visitedNodes);
  310. }
  311. }
  312. postLambdaFunc(node);
  313. }
  314. void VisitExprInternal(const TExprNode& node, const TExprVisitRefFunc& preFunc,
  315. const TExprVisitRefFunc& postFunc, TNodeSet& visitedNodes)
  316. {
  317. if (!visitedNodes.emplace(&node).second) {
  318. return;
  319. }
  320. if (!preFunc || preFunc(node)) {
  321. node.ForEachChild([&](const TExprNode& child) {
  322. VisitExprInternal(child, preFunc, postFunc, visitedNodes);
  323. });
  324. }
  325. if (postFunc) {
  326. postFunc(node);
  327. }
  328. }
  329. void VisitExprByFirstInternal(const TExprNode::TPtr& node, const TExprVisitPtrFunc& preFunc,
  330. const TExprVisitPtrFunc& postFunc, TNodeSet& visitedNodes)
  331. {
  332. if (!visitedNodes.emplace(node.Get()).second) {
  333. return;
  334. }
  335. if (!preFunc || preFunc(node)) {
  336. if (node->ChildrenSize() > 0) {
  337. if (node->Content() == SyncName) {
  338. for (const auto& child : node->Children()) {
  339. VisitExprByFirstInternal(child, preFunc, postFunc, visitedNodes);
  340. }
  341. }
  342. else {
  343. VisitExprByFirstInternal(node->HeadPtr(), preFunc, postFunc, visitedNodes);
  344. }
  345. }
  346. }
  347. if (postFunc) {
  348. postFunc(node);
  349. }
  350. }
  351. void VisitExprByFirstInternal(const TExprNode& node, const TExprVisitRefFunc& preFunc,
  352. const TExprVisitRefFunc& postFunc, TNodeSet& visitedNodes)
  353. {
  354. if (!visitedNodes.emplace(&node).second) {
  355. return;
  356. }
  357. if (!preFunc || preFunc(node)) {
  358. if (node.ChildrenSize() > 0) {
  359. if (node.Content() == SyncName) {
  360. for (const auto& child : node.Children()) {
  361. VisitExprByFirstInternal(*child, preFunc, postFunc, visitedNodes);
  362. }
  363. }
  364. else {
  365. VisitExprByFirstInternal(node.Head(), preFunc, postFunc, visitedNodes);
  366. }
  367. }
  368. }
  369. if (postFunc) {
  370. postFunc(node);
  371. }
  372. }
  373. void VisitExprByPrimaryBranch(const TExprNode::TPtr& node, const TExprVisitPtrFunc& predicate, bool& primary, TNodeSet& visitedNodes)
  374. {
  375. if (!visitedNodes.emplace(node.Get()).second) {
  376. return;
  377. }
  378. if (!predicate(node) || !node->ChildrenSize())
  379. return;
  380. if (node->IsCallable({"If", "IfPresent", "And", "Or", "Xor", "Coalesce"})) {
  381. VisitExprByPrimaryBranch(node->HeadPtr(), predicate, primary, visitedNodes);
  382. } else {
  383. for (ui32 i = 0U; i < node->ChildrenSize(); ++i)
  384. VisitExprByPrimaryBranch(node->ChildPtr(i), predicate, primary, visitedNodes);
  385. return;
  386. }
  387. primary = false;
  388. for (ui32 i = 1U; i < node->ChildrenSize(); ++i)
  389. VisitExprByPrimaryBranch(node->ChildPtr(i), predicate, primary, visitedNodes);
  390. }
  391. template<typename TOptimizer>
  392. IGraphTransformer::TStatus OptimizeExprInternal(TExprNode::TPtr input, TExprNode::TPtr& output, TOptimizer optimizer,
  393. const TNodeOnNodeOwnedMap* replaces, TExprContext& ctx, const TOptimizeExprSettings& settings) try
  394. {
  395. YQL_ENSURE(&input != &output);
  396. TOptimizationContext<TOptimizer> optCtx(optimizer, replaces, ctx, settings);
  397. output = OptimizeNode(input, optCtx, 0U);
  398. if (!output)
  399. return IGraphTransformer::TStatus::Error;
  400. if (optCtx.HasRemaps) {
  401. output = ApplyRemaps(output, optCtx);
  402. if (settings.ProcessedNodes) {
  403. settings.ProcessedNodes->clear();
  404. }
  405. }
  406. if (!settings.VisitChanges && (output != input)) {
  407. return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true);
  408. }
  409. return IGraphTransformer::TStatus::Ok;
  410. } catch (const std::exception& e) {
  411. ctx.AddError(ExceptionToIssue(e, ctx.GetPosition(input->Pos())));
  412. return IGraphTransformer::TStatus::Error;
  413. }
  414. IGraphTransformer::TStatus OptimizeExprInternal(TExprNode::TPtr input, TExprNode::TPtr& output, const TCallableOptimizerFast& optimizer,
  415. TExprContext& ctx, const TOptimizeExprSettings& settings) try
  416. {
  417. YQL_ENSURE(optimizer);
  418. TOptimizationContext<TCallableOptimizerFast> optCtx(optimizer, nullptr, ctx, settings);
  419. bool changed = false;
  420. output = OptimizeNode(input, changed, optCtx, 0U);
  421. if (!output)
  422. return IGraphTransformer::TStatus::Error;
  423. if (changed)
  424. return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true);
  425. return IGraphTransformer::TStatus::Ok;
  426. } catch (const std::exception& e) {
  427. ctx.AddError(ExceptionToIssue(e, ctx.GetPosition(input->Pos())));
  428. return IGraphTransformer::TStatus::Error;
  429. }
  430. }
  431. IGraphTransformer::TStatus OptimizeExpr(const TExprNode::TPtr& input, TExprNode::TPtr& output, TCallableOptimizer optimizer,
  432. TExprContext& ctx, const TOptimizeExprSettings& settings)
  433. {
  434. return OptimizeExprInternal(input, output, optimizer, nullptr, ctx, settings);
  435. }
  436. IGraphTransformer::TStatus OptimizeExprEx(const TExprNode::TPtr& input, TExprNode::TPtr& output, TCallableOptimizerEx optimizer,
  437. TExprContext& ctx, const TOptimizeExprSettings& settings)
  438. {
  439. return OptimizeExprInternal(input, output, optimizer, nullptr, ctx, settings);
  440. }
  441. IGraphTransformer::TStatus OptimizeExpr(const TExprNode::TPtr& input, TExprNode::TPtr& output, const TCallableOptimizerFast& optimizer,
  442. TExprContext& ctx, const TOptimizeExprSettings& settings)
  443. {
  444. return OptimizeExprInternal(input, output, optimizer, ctx, settings);
  445. }
  446. IGraphTransformer::TStatus RemapExpr(const TExprNode::TPtr& input, TExprNode::TPtr& output, const TNodeOnNodeOwnedMap& remaps,
  447. TExprContext& ctx, const TOptimizeExprSettings& settings)
  448. {
  449. return OptimizeExprInternal<TCallableOptimizer>(input, output, {}, &remaps, ctx, settings);
  450. }
  451. IGraphTransformer::TStatus ExpandApply(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) {
  452. if (ctx.Step.IsDone(TExprStep::ExpandApplyForLambdas))
  453. return IGraphTransformer::TStatus::Ok;
  454. YQL_PROFILE_SCOPE(DEBUG, "ExpandApply");
  455. TOptimizeExprSettings settings(nullptr);
  456. auto ret = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, bool& changed, TExprContext& ctx) -> TExprNode::TPtr {
  457. if (node->Content() == "WithOptionalArgs") {
  458. if (!EnsureArgsCount(*node, 2, ctx)) {
  459. return nullptr;
  460. }
  461. if (!EnsureLambda(*node->Child(0), ctx)) {
  462. return nullptr;
  463. }
  464. if (!EnsureAtom(*node->Child(1), ctx)) {
  465. return nullptr;
  466. }
  467. ui32 count = 0;
  468. if (!TryFromString(node->Child(1)->Content(), count)) {
  469. ctx.AddError(TIssue(ctx.GetPosition(node->Child(1)->Pos()),
  470. TStringBuilder() << "Failed to convert to integer: " << node->Child(1)->Content()));
  471. return nullptr;
  472. }
  473. if (count > node->Child(0)->Child(0)->ChildrenSize()) {
  474. ctx.AddError(TIssue(ctx.GetPosition(node->Child(1)->Pos()),
  475. TStringBuilder() << "Too many optional arguments: " << count
  476. << ", lambda has only: " << node->Child(0)->Child(0)->ChildrenSize()));
  477. return nullptr;
  478. }
  479. return node;
  480. }
  481. if (node->Content() != "Apply" && node->Content() != "NamedApply")
  482. return node;
  483. ui32 optionalArgsCount = 0;
  484. auto lambdaNode = node;
  485. if (lambdaNode->ChildrenSize() > 0 && lambdaNode->Head().IsCallable("WithOptionalArgs")) {
  486. lambdaNode = lambdaNode->Child(0);
  487. optionalArgsCount = FromString<ui32>(lambdaNode->Child(1)->Content());
  488. }
  489. if (lambdaNode->ChildrenSize() == 0) {
  490. ctx.AddError(TIssue(ctx.GetPosition(lambdaNode->Pos()), "Expected at least one argument - lambda or callable"));
  491. return nullptr;
  492. }
  493. if (lambdaNode->Head().Type() != TExprNode::Lambda) {
  494. return node;
  495. }
  496. auto nullNode = ctx.NewCallable(input->Pos(), "Null", {});
  497. const auto& lambda = lambdaNode->Head();
  498. const auto& args = lambda.Head();
  499. TExprNode::TPtr ret;
  500. if (node->Content() == "Apply") {
  501. const ui32 maxArgs = args.ChildrenSize();
  502. const ui32 minArgs = args.ChildrenSize() - optionalArgsCount;
  503. const ui32 providedArgs = node->ChildrenSize() - 1;
  504. if (providedArgs < minArgs || providedArgs > maxArgs) {
  505. ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "Minimum arguments count: "
  506. << minArgs << ", maximum arguments count: " << maxArgs << ", but provided " << providedArgs));
  507. return nullptr;
  508. }
  509. TNodeOnNodeOwnedMap replaces;
  510. replaces.reserve(args.ChildrenSize());
  511. ui32 i = 0U;
  512. args.ForEachChild([&](const TExprNode& arg) {
  513. auto value = nullNode;
  514. if (i < providedArgs) {
  515. value = node->ChildPtr(++i);
  516. }
  517. replaces.emplace(&arg, value);
  518. });
  519. if (lambda.ChildrenSize() != 2U) {
  520. ret = ctx.NewList(node->Pos(), ctx.ReplaceNodes(GetLambdaBody(lambda), replaces));
  521. ret->SetDisclosing();
  522. } else {
  523. ret = ctx.ReplaceNodes(lambda.TailPtr(), replaces);
  524. }
  525. changed = true;
  526. } else if (node->Content() == "NamedApply") {
  527. if (!EnsureMinArgsCount(*node, 3, ctx)) {
  528. return nullptr;
  529. }
  530. if (!EnsureTuple(*node->Child(1), ctx)) {
  531. return nullptr;
  532. }
  533. if (!EnsureCallable(*node->Child(2), ctx)) {
  534. return nullptr;
  535. }
  536. if (!node->Child(2)->IsCallable("AsStruct") || node->Child(2)->ChildrenSize() != 0) {
  537. ctx.AddError(TIssue(ctx.GetPosition(node->Child(2)->Pos()), "Expected empty AsStruct in NamedApply"));
  538. return nullptr;
  539. }
  540. for (ui32 i = 3; i < node->ChildrenSize(); ++i) {
  541. if (!EnsureCallable(*node->Child(i), ctx)) {
  542. return nullptr;
  543. }
  544. if (!node->Child(i)->IsCallable("DependsOn")) {
  545. ctx.AddError(TIssue(ctx.GetPosition(node->Child(i)->Pos()), "Expected DependsOn"));
  546. return nullptr;
  547. }
  548. }
  549. const auto depArgs = node->ChildrenSize() - 3U;
  550. const auto totalArgs = depArgs + node->Child(1)->ChildrenSize();
  551. if (totalArgs < args.ChildrenSize() - optionalArgsCount) {
  552. ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "Too few arguments, lambda has "
  553. << args.ChildrenSize() << " arguments with optional "<< optionalArgsCount << " arguments, but got: " << totalArgs));
  554. return nullptr;
  555. }
  556. ui32 posArgs = Min(args.ChildrenSize(), node->Child(1)->ChildrenSize());
  557. TNodeOnNodeOwnedMap replaces;
  558. replaces.reserve(args.ChildrenSize());
  559. for (ui32 i = 0; i < posArgs; ++i) {
  560. replaces.emplace(args.Child(i), node->Child(1)->ChildPtr(i));
  561. }
  562. if (posArgs == node->Child(1)->ChildrenSize()) {
  563. for (ui32 i = 3; i < node->ChildrenSize(); ++i) {
  564. auto index = i - 3 + node->Child(1)->ChildrenSize();
  565. if (index >= args.ChildrenSize()) {
  566. break;
  567. }
  568. replaces.emplace(args.Child(index), node->ChildPtr(i));
  569. }
  570. }
  571. for (ui32 i = replaces.size(); i < args.ChildrenSize(); ++i) {
  572. replaces.emplace(args.Child(i), nullNode);
  573. }
  574. if (lambda.ChildrenSize() > 2U) {
  575. ret = ctx.NewList(node->Pos(), ctx.ReplaceNodes(GetLambdaBody(lambda), replaces));
  576. ret->SetDisclosing();
  577. } else {
  578. ret = ctx.ReplaceNodes(lambda.TailPtr(), replaces);
  579. }
  580. changed = true;
  581. }
  582. return ret;
  583. }, ctx, settings);
  584. if (ret.Level == IGraphTransformer::TStatus::Ok) {
  585. ret = ret.Combine(OptimizeExpr(output, output, [&](const TExprNode::TPtr& node, bool& changed, TExprContext& ctx) -> TExprNode::TPtr {
  586. if (node->Content() == "Combine") {
  587. if (!EnsureMinArgsCount(*node, 1, ctx)) {
  588. return nullptr;
  589. }
  590. ui32 flags = 0U;
  591. TString content;
  592. for (const auto& child : node->Children()) {
  593. if (!EnsureAtom(*child, ctx)) {
  594. return nullptr;
  595. }
  596. const auto f = child->Flags();
  597. if ((TNodeFlags::MultilineContent | TNodeFlags::BinaryContent) & f) {
  598. ctx.AddError(TIssue(ctx.GetPosition(child->Pos()), "Can't combine binary or multiline atoms."));
  599. return nullptr;
  600. }
  601. content += child->Content();
  602. flags |= f;
  603. }
  604. changed = true;
  605. return ctx.NewAtom(node->Pos(), content, flags);
  606. } else if (node->Content() == "Nth") {
  607. if (!EnsureArgsCount(*node, 2, ctx)) {
  608. return nullptr;
  609. }
  610. if (!node->Tail().IsAtom()) {
  611. return node;
  612. }
  613. if (node->Head().Type() != TExprNode::List) {
  614. return node;
  615. }
  616. ui32 index = 0U;
  617. if (const auto& indexValue = node->Tail().Content(); !TryFromString(indexValue, index)) {
  618. ctx.AddError(TIssue(ctx.GetPosition(node->Child(1)->Pos()), TStringBuilder() << "Index '" << indexValue << "' isn't UI32."));
  619. return nullptr;
  620. }
  621. if (const auto size = node->Head().ChildrenSize(); size <= index) {
  622. ctx.AddError(TIssue(ctx.GetPosition(node->Tail().Pos()), TStringBuilder() << "Index too large: (" << index << " >= " << size << ")."));
  623. return nullptr;
  624. }
  625. changed = true;
  626. return node->Head().ChildPtr(index);
  627. } else if (node->Content() == "NthArg") {
  628. if (!EnsureArgsCount(*node, 2, ctx)) {
  629. return nullptr;
  630. }
  631. if (!EnsureAtom(node->Head(), ctx)) {
  632. return nullptr;
  633. }
  634. if (!EnsureCallable(node->Tail(), ctx)) {
  635. return nullptr;
  636. }
  637. if (node->Tail().IsCallable("Apply")) {
  638. return node;
  639. }
  640. ui32 index = 0U;
  641. if (const auto& indexValue = node->Head().Content(); !TryFromString(indexValue, index)) {
  642. ctx.AddError(TIssue(ctx.GetPosition(node->Head().Pos()), TStringBuilder() << "Index '" << indexValue << "' isn't UI32."));
  643. return nullptr;
  644. }
  645. if (const auto size = node->Tail().ChildrenSize(); size <= index) {
  646. ctx.AddError(TIssue(ctx.GetPosition(node->Head().Pos()), TStringBuilder() << "Index too large: (" << index << " >= " << size << ")."));
  647. return nullptr;
  648. }
  649. changed = true;
  650. return node->Tail().ChildPtr(index);
  651. }
  652. else if (node->Content() == "Seq!") {
  653. if (!EnsureMinArgsCount(*node, 1, ctx)) {
  654. return nullptr;
  655. }
  656. auto world = node->HeadPtr();
  657. for (ui32 i = 1; i < node->ChildrenSize(); ++i) {
  658. const auto lambda = node->Child(i);
  659. if (!lambda->IsLambda()) {
  660. return node;
  661. }
  662. const auto& lambdaArgs = lambda->Head();
  663. if (!EnsureArgsCount(lambdaArgs, 1, ctx)) {
  664. return nullptr;
  665. }
  666. world = ctx.ReplaceNode(lambda->TailPtr(), lambdaArgs.Head(), std::move(world));
  667. }
  668. changed = true;
  669. return world;
  670. } else if (node->Content() == "SubqueryExtend" || node->Content() == "SubqueryUnionAll" ||
  671. node->Content() == "SubqueryMerge" || node->Content() == "SubqueryUnionMerge") {
  672. if (!EnsureMinArgsCount(*node, 1, ctx)) {
  673. return nullptr;
  674. }
  675. TMaybe<ui32> commonArgs;
  676. for (ui32 i = 0; i < node->ChildrenSize(); ++i) {
  677. const auto lambda = node->Child(i);
  678. if (!lambda->IsLambda()) {
  679. return node;
  680. }
  681. const auto& lambdaArgs = lambda->Head();
  682. if (!EnsureMinArgsCount(lambdaArgs, 1, ctx)) {
  683. return nullptr;
  684. }
  685. if (!commonArgs) {
  686. commonArgs = lambdaArgs.ChildrenSize();
  687. } else if (*commonArgs != lambdaArgs.ChildrenSize()) {
  688. ctx.AddError(TIssue(ctx.GetPosition(lambda->Pos()),
  689. TStringBuilder() << "Mismatch of arguments count in subquery, got: "
  690. << (lambdaArgs.ChildrenSize() - 1) << ", but expected: " << (*commonArgs - 1)));
  691. return nullptr;
  692. }
  693. }
  694. TExprNodeList argItems;
  695. for (ui32 i = 0; i < *commonArgs; ++i) {
  696. argItems.push_back(ctx.NewArgument(node->Pos(), "arg" + ToString(i)));
  697. }
  698. TExprNodeList inputs;
  699. for (ui32 i = 0; i < node->ChildrenSize(); ++i) {
  700. const auto lambda = node->Child(i);
  701. const auto& lambdaArgs = lambda->Head();
  702. TNodeOnNodeOwnedMap replaces;
  703. for (ui32 j = 0; j < argItems.size(); ++j) {
  704. replaces[lambdaArgs.Child(j)] = argItems[j];
  705. }
  706. inputs.push_back(ctx.ReplaceNodes(lambda->TailPtr(), replaces));
  707. }
  708. auto body = ctx.NewCallable(node->Pos(), node->Content().substr(8), std::move(inputs) );
  709. auto args = ctx.NewArguments(node->Pos(), std::move(argItems));
  710. auto merged = ctx.NewLambda(node->Pos(), std::move(args), std::move(body));
  711. changed = true;
  712. return merged;
  713. } else {
  714. return node;
  715. }
  716. }, ctx, settings));
  717. }
  718. if (ret.Level == IGraphTransformer::TStatus::Ok) {
  719. ctx.Step.Done(TExprStep::ExpandApplyForLambdas);
  720. }
  721. return ret;
  722. }
  723. TExprNode::TPtr ApplySyncListToWorld(const TExprNode::TPtr& main, const TSyncMap& syncList, TExprContext& ctx) {
  724. if (syncList.empty()) {
  725. return main;
  726. }
  727. using TPair = std::pair<TExprNode::TPtr, ui64>;
  728. TVector<TPair> sortedList(syncList.cbegin(), syncList.cend());
  729. TExprNode::TListType syncChildren;
  730. syncChildren.push_back(main);
  731. Sort(sortedList, [](const TPair& x, const TPair& y) { return x.second < y.second; });
  732. for (auto x : sortedList) {
  733. if (x.first->IsCallable(RightName)) {
  734. auto world = ctx.NewCallable(main->Pos(), LeftName, { x.first->HeadPtr() });
  735. syncChildren.push_back(world);
  736. } else if (x.first->GetTypeAnn()->GetKind() == ETypeAnnotationKind::World) {
  737. syncChildren.push_back(x.first);
  738. } else {
  739. auto world = ctx.NewCallable(main->Pos(), LeftName, { x.first });
  740. syncChildren.push_back(world);
  741. }
  742. }
  743. return ctx.NewCallable(main->Pos(), SyncName, std::move(syncChildren));
  744. }
  745. void VisitExpr(const TExprNode::TPtr& root, const TExprVisitPtrFunc& func) {
  746. TNodeSet visitedNodes;
  747. VisitExprInternal(root, func, {}, visitedNodes);
  748. }
  749. void VisitExpr(const TExprNode::TPtr& root, const TExprVisitPtrFunc& preFunc, const TExprVisitPtrFunc& postFunc) {
  750. TNodeSet visitedNodes;
  751. VisitExprInternal(root, preFunc, postFunc, visitedNodes);
  752. }
  753. void VisitExpr(const TExprNode& root, const TExprVisitRefFunc& func) {
  754. TNodeSet visitedNodes;
  755. VisitExprInternal(root, func, {}, visitedNodes);
  756. }
  757. void VisitExpr(const TExprNode& root, const TExprVisitRefFunc& preFunc, const TExprVisitRefFunc& postFunc) {
  758. TNodeSet visitedNodes;
  759. VisitExprInternal(root, preFunc, postFunc, visitedNodes);
  760. }
  761. void VisitExpr(const TExprNode::TPtr& root, const TExprVisitPtrFunc& func, TNodeSet& visitedNodes) {
  762. VisitExprInternal(root, func, {}, visitedNodes);
  763. }
  764. void VisitExprLambdasLast(const TExprNode::TPtr& root, const TExprVisitPtrFunc& preLambdaFunc, const TExprVisitPtrFunc& postLambdaFunc)
  765. {
  766. TNodeSet visitedNodes;
  767. VisitExprLambdasLastInternal(root, preLambdaFunc, postLambdaFunc, visitedNodes);
  768. }
  769. void VisitExprByFirst(const TExprNode::TPtr& root, const TExprVisitPtrFunc& func) {
  770. TNodeSet visitedNodes;
  771. VisitExprByFirstInternal(root, func, {}, visitedNodes);
  772. }
  773. void VisitExprByFirst(const TExprNode::TPtr& root, const TExprVisitPtrFunc& preFunc, const TExprVisitPtrFunc& postFunc) {
  774. TNodeSet visitedNodes;
  775. VisitExprByFirstInternal(root, preFunc, postFunc, visitedNodes);
  776. }
  777. void VisitExprByFirst(const TExprNode& root, const TExprVisitRefFunc& func) {
  778. TNodeSet visitedNodes;
  779. VisitExprByFirstInternal(root, func, {}, visitedNodes);
  780. }
  781. void VisitExprByFirst(const TExprNode::TPtr& root, const TExprVisitPtrFunc& func, TNodeSet& visitedNodes) {
  782. VisitExprByFirstInternal(root, func, {}, visitedNodes);
  783. }
  784. TExprNode::TPtr FindNode(const TExprNode::TPtr& root, const TExprVisitPtrFunc& predicate) {
  785. TExprNode::TPtr result;
  786. VisitExpr(root, [&result, &predicate] (const TExprNode::TPtr& node) {
  787. if (result)
  788. return false;
  789. if (predicate(node)) {
  790. result = node;
  791. return false;
  792. }
  793. return true;
  794. });
  795. return result;
  796. }
  797. TExprNode::TPtr FindNode(const TExprNode::TPtr& root, const TExprVisitPtrFunc& filter, const TExprVisitPtrFunc& predicate) {
  798. TExprNode::TPtr result;
  799. VisitExpr(root, filter, [&result, &predicate] (const TExprNode::TPtr& node) {
  800. if (result)
  801. return false;
  802. if (predicate(node)) {
  803. result = node;
  804. return false;
  805. }
  806. return true;
  807. });
  808. return result;
  809. }
  810. TExprNode::TListType FindNodes(const TExprNode::TPtr& root, const TExprVisitPtrFunc& predicate) {
  811. TExprNode::TListType result;
  812. VisitExpr(root, [&result, &predicate] (const TExprNode::TPtr& node) {
  813. if (predicate(node)) {
  814. result.emplace_back(node);
  815. }
  816. return true;
  817. });
  818. return result;
  819. }
  820. TExprNode::TListType FindNodes(const TExprNode::TPtr& root, const TExprVisitPtrFunc& filter, const TExprVisitPtrFunc& predicate) {
  821. TExprNode::TListType result;
  822. VisitExpr(root, filter, [&result, &predicate] (const TExprNode::TPtr& node) {
  823. if (predicate(node)) {
  824. result.emplace_back(node);
  825. }
  826. return true;
  827. });
  828. return result;
  829. }
  830. std::pair<TExprNode::TPtr, bool> FindSharedNode(const TExprNode::TPtr& firstRoot, const TExprNode::TPtr& secondRoot, const TExprVisitPtrFunc& predicate)
  831. {
  832. TNodeSet nodes, visited;
  833. VisitExpr(firstRoot, [&nodes, &predicate] (const TExprNode::TPtr& node) {
  834. if (predicate(node)) {
  835. nodes.insert(node.Get());
  836. }
  837. return true;
  838. });
  839. TExprNode::TPtr result;
  840. bool primary = true;
  841. VisitExprByPrimaryBranch(secondRoot, [&nodes, &result] (const TExprNode::TPtr& node) {
  842. if (result)
  843. return false;
  844. if (nodes.find(node.Get()) != nodes.end()) {
  845. result = node;
  846. return false;
  847. }
  848. return true;
  849. }, primary, visited);
  850. return std::make_pair(std::move(result), primary);
  851. }
  852. bool HaveSharedNodes(const TExprNode::TPtr& firstRoot, const TExprNode::TPtr& secondRoot, const TExprVisitPtrFunc& predicate)
  853. {
  854. return bool(FindSharedNode(firstRoot, secondRoot, predicate).first);
  855. }
  856. TExprNode::TPtr CloneCompleteFlow(TExprNode::TPtr&& node, TExprContext& ctx) {
  857. const TExprNode* original = nullptr;
  858. TExprNode::TPtr copy;
  859. VisitExpr(*node, [&](const TExprNode& child) {
  860. if (const auto kind = child.GetTypeAnn()->GetKind(); (ETypeAnnotationKind::Stream == kind || ETypeAnnotationKind::Flow == kind) && child.IsComplete() && child.IsCallable() && original != &child) {
  861. original = &child;
  862. copy = ctx.ShallowCopy(child);
  863. return true;
  864. }
  865. return false;
  866. });
  867. return original ? ctx.ReplaceNode(std::move(node), *original, std::move(copy)) : std::move(node);
  868. }
  869. }