mkql_wide_combine_ut.cpp 112 KB


  1. #include "mkql_computation_node_ut.h"
  2. #include <yql/essentials/minikql/mkql_node_cast.h>
  3. #include <yql/essentials/minikql/mkql_runtime_version.h>
  4. #include <yql/essentials/minikql/mkql_string_util.h>
  5. #include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
  6. #include <yql/essentials/minikql/computation/mock_spiller_factory_ut.h>
  7. #include <cstring>
  8. #include <algorithm>
  9. namespace NKikimr {
  10. namespace NMiniKQL {
  11. namespace {
  12. constexpr auto border = 9124596000000000ULL;
  13. struct TTestStreamParams {
  14. static constexpr ui64 Yield = std::numeric_limits<ui64>::max();
  15. ui64 StringSize = 1;
  16. std::vector<ui64> TestYieldStreamData;
  17. };
  18. class TTestStreamWrapper: public TMutableComputationNode<TTestStreamWrapper> {
  19. using TBaseComputation = TMutableComputationNode<TTestStreamWrapper>;
  20. public:
  21. class TStreamValue : public TComputationValue<TStreamValue> {
  22. public:
  23. using TBase = TComputationValue<TStreamValue>;
  24. TStreamValue(TMemoryUsageInfo* memInfo, TComputationContext& compCtx, TTestStreamParams& params)
  25. : TBase(memInfo), CompCtx(compCtx), Params(params)
  26. {}
  27. private:
  28. NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override {
  29. auto size = Params.TestYieldStreamData.size();
  30. if (Index == size) {
  31. return NUdf::EFetchStatus::Finish;
  32. }
  33. const auto val = Params.TestYieldStreamData[Index];
  34. if (Params.Yield == val) {
  35. ++Index;
  36. return NUdf::EFetchStatus::Yield;
  37. }
  38. NUdf::TUnboxedValue* items = nullptr;
  39. result = CompCtx.HolderFactory.CreateDirectArrayHolder(2, items);
  40. items[0] = NUdf::TUnboxedValuePod(val);
  41. items[1] = NUdf::TUnboxedValuePod(MakeString(ToString(val) * Params.StringSize));
  42. ++Index;
  43. return NUdf::EFetchStatus::Ok;
  44. }
  45. private:
  46. TComputationContext& CompCtx;
  47. ui64 Index = 0;
  48. TTestStreamParams& Params;
  49. };
  50. TTestStreamWrapper(TComputationMutables& mutables, TTestStreamParams& params)
  51. : TBaseComputation(mutables)
  52. , Params(params)
  53. {}
  54. NUdf::TUnboxedValuePod DoCalculate(TComputationContext& ctx) const {
  55. return ctx.HolderFactory.Create<TStreamValue>(ctx, Params);
  56. }
  57. private:
  58. void RegisterDependencies() const final {}
  59. TTestStreamParams& Params;
  60. };
  61. IComputationNode* WrapTestStream(const TComputationNodeFactoryContext& ctx, TTestStreamParams& params) {
  62. return new TTestStreamWrapper(ctx.Mutables, params);
  63. }
  64. TComputationNodeFactory GetNodeFactory(TTestStreamParams& params) {
  65. return [&params](TCallable& callable, const TComputationNodeFactoryContext& ctx) -> IComputationNode* {
  66. if (callable.GetType()->GetName() == "TestYieldStream") {
  67. return WrapTestStream(ctx, params);
  68. }
  69. return GetBuiltinFactory()(callable, ctx);
  70. };
  71. }
  72. template <bool LLVM>
  73. TRuntimeNode MakeStream(TSetup<LLVM>& setup) {
  74. TProgramBuilder& pb = *setup.PgmBuilder;
  75. TCallableBuilder callableBuilder(*setup.Env, "TestYieldStream",
  76. pb.NewStreamType(
  77. pb.NewStructType({
  78. {TStringBuf("a"), pb.NewDataType(NUdf::EDataSlot::Uint64)},
  79. {TStringBuf("b"), pb.NewDataType(NUdf::EDataSlot::String)}
  80. })
  81. )
  82. );
  83. return TRuntimeNode(callableBuilder.Build(), false);
  84. }
  85. template <bool OverFlow>
  86. TRuntimeNode Combine(TProgramBuilder& pb, TRuntimeNode stream, std::function<TRuntimeNode(TRuntimeNode, TRuntimeNode)> finishLambda) {
  87. const auto keyExtractor = [&](TRuntimeNode item) {
  88. return pb.Member(item, "a");
  89. };
  90. const auto init = [&](TRuntimeNode /*key*/, TRuntimeNode item) {
  91. return item;
  92. };
  93. const auto update = [&](TRuntimeNode /*key*/, TRuntimeNode item, TRuntimeNode state) {
  94. const auto a = pb.Add(pb.Member(item, "a"), pb.Member(state, "a"));
  95. const auto b = pb.Concat(pb.Member(item, "b"), pb.Member(state, "b"));
  96. return pb.NewStruct({
  97. {TStringBuf("a"), a},
  98. {TStringBuf("b"), b},
  99. });
  100. };
  101. return OverFlow ?
  102. pb.FromFlow(pb.CombineCore(pb.ToFlow(stream), keyExtractor, init, update, finishLambda, 64ul << 20)):
  103. pb.CombineCore(stream, keyExtractor, init, update, finishLambda, 64ul << 20);
  104. }
  105. template<bool SPILLING>
  106. TRuntimeNode WideLastCombiner(TProgramBuilder& pb, TRuntimeNode flow, const TProgramBuilder::TWideLambda& extractor, const TProgramBuilder::TBinaryWideLambda& init, const TProgramBuilder::TTernaryWideLambda& update, const TProgramBuilder::TBinaryWideLambda& finish) {
  107. return SPILLING ?
  108. pb.WideLastCombinerWithSpilling(flow, extractor, init, update, finish):
  109. pb.WideLastCombiner(flow, extractor, init, update, finish);
  110. }
  111. void CheckIfStreamHasExpectedStringValues(const NUdf::TUnboxedValue& streamValue, std::unordered_set<TString>& expected) {
  112. NUdf::TUnboxedValue item;
  113. NUdf::EFetchStatus fetchStatus;
  114. while (!expected.empty()) {
  115. fetchStatus = streamValue.Fetch(item);
  116. UNIT_ASSERT_UNEQUAL(fetchStatus, NUdf::EFetchStatus::Finish);
  117. if (fetchStatus == NYql::NUdf::EFetchStatus::Yield) continue;
  118. const auto actual = TString(item.AsStringRef());
  119. auto it = expected.find(actual);
  120. UNIT_ASSERT(it != expected.end());
  121. expected.erase(it);
  122. }
  123. fetchStatus = streamValue.Fetch(item);
  124. UNIT_ASSERT_EQUAL(fetchStatus, NUdf::EFetchStatus::Finish);
  125. }
  126. } // unnamed
  127. #if !defined(MKQL_RUNTIME_VERSION) || MKQL_RUNTIME_VERSION >= 18u
  128. Y_UNIT_TEST_SUITE(TMiniKQLWideCombinerTest) {
  129. Y_UNIT_TEST_LLVM(TestLongStringsRefCounting) {
  130. TSetup<LLVM> setup;
  131. TProgramBuilder& pb = *setup.PgmBuilder;
  132. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  133. const auto optionalType = pb.NewOptionalType(dataType);
  134. const auto tupleType = pb.NewTupleType({dataType, dataType});
  135. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  136. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  137. const auto longKeyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key one");
  138. const auto longKeyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key two");
  139. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 1");
  140. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 2");
  141. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 3");
  142. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 4");
  143. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 5");
  144. const auto value6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 6");
  145. const auto value7 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 7");
  146. const auto value8 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 8");
  147. const auto value9 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 9");
  148. const auto data1 = pb.NewTuple(tupleType, {keyOne, value1});
  149. const auto data2 = pb.NewTuple(tupleType, {keyTwo, value2});
  150. const auto data3 = pb.NewTuple(tupleType, {keyTwo, value3});
  151. const auto data4 = pb.NewTuple(tupleType, {longKeyOne, value4});
  152. const auto data5 = pb.NewTuple(tupleType, {longKeyTwo, value5});
  153. const auto data6 = pb.NewTuple(tupleType, {longKeyTwo, value6});
  154. const auto data7 = pb.NewTuple(tupleType, {longKeyTwo, value7});
  155. const auto data8 = pb.NewTuple(tupleType, {longKeyTwo, value8});
  156. const auto data9 = pb.NewTuple(tupleType, {longKeyTwo, value9});
  157. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  158. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(list),
  159. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }), -100000LL,
  160. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  161. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  162. return {pb.NewOptional(items.back()), pb.NewOptional(keys.front()), pb.NewEmptyOptional(optionalType), pb.NewEmptyOptional(optionalType)};
  163. },
  164. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  165. return {pb.NewOptional(items.back()), state.front(), state[1U], state[2U]};
  166. },
  167. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  168. state.erase(state.cbegin());
  169. return {pb.FlatMap(pb.NewList(optionalType, state), [&](TRuntimeNode item) { return item; } )};
  170. }),
  171. [&](TRuntimeNode::TList items) -> TRuntimeNode {
  172. return pb.Fold1(items.front(),
  173. [&](TRuntimeNode item) { return item; },
  174. [&](TRuntimeNode item, TRuntimeNode state) {
  175. return pb.AggrConcat(pb.AggrConcat(state, pb.NewDataLiteral<NUdf::EDataSlot::String>(" / ")), item);
  176. }
  177. );
  178. }
  179. ));
  180. const auto graph = setup.BuildGraph(pgmReturn);
  181. const auto iterator = graph->GetValue().GetListIterator();
  182. NUdf::TUnboxedValue item;
  183. UNIT_ASSERT(iterator.Next(item));
  184. UNBOXED_VALUE_STR_EQUAL(item, "key one");
  185. UNIT_ASSERT(iterator.Next(item));
  186. UNBOXED_VALUE_STR_EQUAL(item, "very long value 2 / key two");
  187. UNIT_ASSERT(iterator.Next(item));
  188. UNBOXED_VALUE_STR_EQUAL(item, "very long key one");
  189. UNIT_ASSERT(iterator.Next(item));
  190. UNBOXED_VALUE_STR_EQUAL(item, "very long value 8 / very long value 7 / very long value 6");
  191. UNIT_ASSERT(!iterator.Next(item));
  192. UNIT_ASSERT(!iterator.Next(item));
  193. }
  194. Y_UNIT_TEST_LLVM(TestLongStringsPasstroughtRefCounting) {
  195. TSetup<LLVM> setup;
  196. TProgramBuilder& pb = *setup.PgmBuilder;
  197. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  198. const auto tupleType = pb.NewTupleType({dataType, dataType});
  199. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  200. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  201. const auto longKeyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key one");
  202. const auto longKeyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key two");
  203. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 1");
  204. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 2");
  205. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 3");
  206. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 4");
  207. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 5");
  208. const auto value6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 6");
  209. const auto value7 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 7");
  210. const auto value8 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 8");
  211. const auto value9 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 9");
  212. const auto data1 = pb.NewTuple(tupleType, {keyOne, value1});
  213. const auto data2 = pb.NewTuple(tupleType, {keyTwo, value2});
  214. const auto data3 = pb.NewTuple(tupleType, {keyTwo, value3});
  215. const auto data4 = pb.NewTuple(tupleType, {longKeyOne, value4});
  216. const auto data5 = pb.NewTuple(tupleType, {longKeyTwo, value5});
  217. const auto data6 = pb.NewTuple(tupleType, {longKeyTwo, value6});
  218. const auto data7 = pb.NewTuple(tupleType, {longKeyTwo, value7});
  219. const auto data8 = pb.NewTuple(tupleType, {longKeyTwo, value8});
  220. const auto data9 = pb.NewTuple(tupleType, {longKeyTwo, value9});
  221. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  222. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(list),
  223. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }), -1000000LL,
  224. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  225. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  226. return {items.back(), keys.front(), items.back(), items.front()};
  227. },
  228. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  229. return {items.back(), keys.front(), state[2U], state.back()};
  230. },
  231. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  232. return state;
  233. }),
  234. [&](TRuntimeNode::TList items) -> TRuntimeNode {
  235. return pb.Fold1(pb.NewList(dataType, items),
  236. [&](TRuntimeNode item) { return item; },
  237. [&](TRuntimeNode item, TRuntimeNode state) {
  238. return pb.AggrConcat(pb.AggrConcat(state, pb.NewDataLiteral<NUdf::EDataSlot::String>(" / ")), item);
  239. }
  240. );
  241. }
  242. ));
  243. const auto graph = setup.BuildGraph(pgmReturn);
  244. const auto iterator = graph->GetValue().GetListIterator();
  245. NUdf::TUnboxedValue item;
  246. UNIT_ASSERT(iterator.Next(item));
  247. UNBOXED_VALUE_STR_EQUAL(item, "very long value 1 / key one / very long value 1 / key one");
  248. UNIT_ASSERT(iterator.Next(item));
  249. UNBOXED_VALUE_STR_EQUAL(item, "very long value 3 / key two / very long value 2 / key two");
  250. UNIT_ASSERT(iterator.Next(item));
  251. UNBOXED_VALUE_STR_EQUAL(item, "very long value 4 / very long key one / very long value 4 / very long key one");
  252. UNIT_ASSERT(iterator.Next(item));
  253. UNBOXED_VALUE_STR_EQUAL(item, "very long value 9 / very long key two / very long value 5 / very long key two");
  254. UNIT_ASSERT(!iterator.Next(item));
  255. UNIT_ASSERT(!iterator.Next(item));
  256. }
  257. Y_UNIT_TEST_LLVM(TestDoNotCalculateUnusedInput) {
  258. TSetup<LLVM> setup;
  259. TProgramBuilder& pb = *setup.PgmBuilder;
  260. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  261. const auto optionalType = pb.NewOptionalType(dataType);
  262. const auto tupleType = pb.NewTupleType({dataType, optionalType, dataType});
  263. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  264. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  265. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 1");
  266. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 2");
  267. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 3");
  268. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 4");
  269. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 5");
  270. const auto empty = pb.NewDataLiteral<NUdf::EDataSlot::String>("");
  271. const auto none = pb.NewEmptyOptional(optionalType);
  272. const auto data1 = pb.NewTuple(tupleType, {keyOne, none, value1});
  273. const auto data2 = pb.NewTuple(tupleType, {keyTwo, none, value2});
  274. const auto data3 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  275. const auto data4 = pb.NewTuple(tupleType, {keyOne, none, value4});
  276. const auto data5 = pb.NewTuple(tupleType, {keyOne, none, value5});
  277. const auto data6 = pb.NewTuple(tupleType, {keyOne, none, value1});
  278. const auto data7 = pb.NewTuple(tupleType, {keyOne, none, value2});
  279. const auto data8 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  280. const auto data9 = pb.NewTuple(tupleType, {keyTwo, none, value4});
  281. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  282. const auto landmine = pb.NewDataLiteral<NUdf::EDataSlot::String>("ACHTUNG MINEN!");
  283. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(list),
  284. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Unwrap(pb.Nth(item, 1U), landmine, __FILE__, __LINE__, 0), pb.Nth(item, 2U)}; }), -1000000LL,
  285. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  286. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  287. return {items.back(), keys.front(), empty, empty};
  288. },
  289. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  290. return {items.back(), state.front(), state[1U], state[2U]};
  291. },
  292. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  293. state.insert(state.cbegin(), keys.cbegin(), keys.cend());
  294. return {pb.NewList(dataType, state)};
  295. }),
  296. [&](TRuntimeNode::TList items) -> TRuntimeNode {
  297. return pb.Fold1(items.front(),
  298. [&](TRuntimeNode item) { return item; },
  299. [&](TRuntimeNode item, TRuntimeNode state) {
  300. return pb.AggrConcat(pb.AggrConcat(state, pb.NewDataLiteral<NUdf::EDataSlot::String>(" / ")), item);
  301. }
  302. );
  303. }
  304. ));
  305. const auto graph = setup.BuildGraph(pgmReturn);
  306. const auto iterator = graph->GetValue().GetListIterator();
  307. NUdf::TUnboxedValue item;
  308. UNIT_ASSERT(iterator.Next(item));
  309. UNBOXED_VALUE_STR_EQUAL(item, "key one / value 2 / value 1 / value 5 / value 4");
  310. UNIT_ASSERT(iterator.Next(item));
  311. UNBOXED_VALUE_STR_EQUAL(item, "key two / value 4 / value 3 / value 3 / value 2");
  312. UNIT_ASSERT(!iterator.Next(item));
  313. UNIT_ASSERT(!iterator.Next(item));
  314. }
  315. Y_UNIT_TEST_LLVM(TestDoNotCalculateUnusedOutput) {
  316. TSetup<LLVM> setup;
  317. TProgramBuilder& pb = *setup.PgmBuilder;
  318. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  319. const auto optionalType = pb.NewOptionalType(dataType);
  320. const auto tupleType = pb.NewTupleType({dataType, optionalType, dataType});
  321. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  322. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  323. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 1");
  324. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 2");
  325. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 3");
  326. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 4");
  327. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 5");
  328. const auto empty = pb.NewDataLiteral<NUdf::EDataSlot::String>("");
  329. const auto none = pb.NewEmptyOptional(optionalType);
  330. const auto data1 = pb.NewTuple(tupleType, {keyOne, none, value1});
  331. const auto data2 = pb.NewTuple(tupleType, {keyTwo, none, value2});
  332. const auto data3 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  333. const auto data4 = pb.NewTuple(tupleType, {keyOne, none, value4});
  334. const auto data5 = pb.NewTuple(tupleType, {keyOne, none, value5});
  335. const auto data6 = pb.NewTuple(tupleType, {keyOne, none, value1});
  336. const auto data7 = pb.NewTuple(tupleType, {keyOne, none, value2});
  337. const auto data8 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  338. const auto data9 = pb.NewTuple(tupleType, {keyTwo, none, value4});
  339. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  340. const auto landmine = pb.NewDataLiteral<NUdf::EDataSlot::String>("ACHTUNG MINEN!");
  341. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(list),
  342. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }), 0ULL,
  343. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  344. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  345. return {items[1U], items.back()};
  346. },
  347. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  348. return {pb.Concat(state.front(), items[1U]), pb.AggrConcat(pb.AggrConcat(state.back(), pb.NewDataLiteral<NUdf::EDataSlot::String>(", ")), items.back())};
  349. },
  350. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  351. return {pb.Unwrap(state.front(), landmine, __FILE__, __LINE__, 0), pb.AggrConcat(pb.AggrConcat(keys.front(), pb.NewDataLiteral<NUdf::EDataSlot::String>(": ")), state.back())};
  352. }),
  353. [&](TRuntimeNode::TList items) -> TRuntimeNode { return items.back(); }
  354. ));
  355. const auto graph = setup.BuildGraph(pgmReturn);
  356. const auto iterator = graph->GetValue().GetListIterator();
  357. NUdf::TUnboxedValue item;
  358. UNIT_ASSERT(iterator.Next(item));
  359. UNBOXED_VALUE_STR_EQUAL(item, "key one: value 1, value 4, value 5, value 1, value 2");
  360. UNIT_ASSERT(iterator.Next(item));
  361. UNBOXED_VALUE_STR_EQUAL(item, "key two: value 2, value 3, value 3, value 4");
  362. UNIT_ASSERT(!iterator.Next(item));
  363. UNIT_ASSERT(!iterator.Next(item));
  364. }
  365. Y_UNIT_TEST_LLVM(TestThinAllLambdas) {
  366. TSetup<LLVM> setup;
  367. TProgramBuilder& pb = *setup.PgmBuilder;
  368. const auto tupleType = pb.NewTupleType({});
  369. const auto data = pb.NewTuple({});
  370. const auto list = pb.NewList(tupleType, {data, data, data, data});
  371. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(list),
  372. [](TRuntimeNode) -> TRuntimeNode::TList { return {}; }), 0ULL,
  373. [](TRuntimeNode::TList items) { return items; },
  374. [](TRuntimeNode::TList, TRuntimeNode::TList items) { return items; },
  375. [](TRuntimeNode::TList, TRuntimeNode::TList, TRuntimeNode::TList state) { return state; },
  376. [](TRuntimeNode::TList, TRuntimeNode::TList state) { return state; }),
  377. [&](TRuntimeNode::TList) { return pb.NewTuple({}); }
  378. ));
  379. const auto graph = setup.BuildGraph(pgmReturn);
  380. const auto iterator = graph->GetValue().GetListIterator();
  381. NUdf::TUnboxedValue item;
  382. UNIT_ASSERT(!iterator.Next(item));
  383. UNIT_ASSERT(!iterator.Next(item));
  384. }
  385. #if !defined(MKQL_RUNTIME_VERSION) || MKQL_RUNTIME_VERSION >= 46u
  386. Y_UNIT_TEST_LLVM(TestHasLimitButPasstroughtYields) {
  387. TTestStreamParams params;
  388. params.TestYieldStreamData = {0, 1, 0, 2, TTestStreamParams::Yield, 0, TTestStreamParams::Yield, 1, 2, 0, 1, 3, 0, TTestStreamParams::Yield, 1, 2};
  389. TSetup<LLVM> setup(GetNodeFactory(params));
  390. TProgramBuilder& pb = *setup.PgmBuilder;
  391. const auto stream = MakeStream<LLVM>(setup);
  392. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(stream),
  393. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Member(item, "a"), pb.Member(item, "b")}; }), -123456789LL,
  394. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  395. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return items; },
  396. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {state.front(), pb.AggrConcat(state.back(), items.back())}; },
  397. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList { return state; }),
  398. [&](TRuntimeNode::TList items) { return items.back(); }
  399. ));
  400. const auto graph = setup.BuildGraph(pgmReturn);
  401. const auto streamVal = graph->GetValue();
  402. NUdf::TUnboxedValue result;
  403. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Yield);
  404. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Yield);
  405. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Yield);
  406. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  407. UNIT_ASSERT_VALUES_EQUAL(TStringBuf(result.AsStringRef()), "00000");
  408. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  409. UNIT_ASSERT_VALUES_EQUAL(TStringBuf(result.AsStringRef()), "1111");
  410. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  411. UNIT_ASSERT_VALUES_EQUAL(TStringBuf(result.AsStringRef()), "222");
  412. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  413. UNIT_ASSERT_VALUES_EQUAL(TStringBuf(result.AsStringRef()), "3");
  414. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Finish);
  415. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Finish);
  416. }
  417. #endif
  418. #if !defined(MKQL_RUNTIME_VERSION) || MKQL_RUNTIME_VERSION >= 46u
  419. Y_UNIT_TEST_LLVM(TestSkipYieldRespectsMemLimit) {
  420. TTestStreamParams params;
  421. params.StringSize = 50000;
  422. params.TestYieldStreamData = {0, TTestStreamParams::Yield, 2, TTestStreamParams::Yield, 3, TTestStreamParams::Yield, 4};
  423. TSetup<LLVM> setup(GetNodeFactory(params));
  424. TProgramBuilder& pb = *setup.PgmBuilder;
  425. const auto stream = MakeStream<LLVM>(setup);
  426. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(stream),
  427. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Member(item, "a"), pb.Member(item, "b")}; }), -100000LL,
  428. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  429. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return items; },
  430. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {state.front(), pb.AggrConcat(state.back(), items.back())}; },
  431. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList { return state; }),
  432. [&](TRuntimeNode::TList items) { return items.back(); }
  433. ));
  434. const auto graph = setup.BuildGraph(pgmReturn);
  435. const auto streamVal = graph->GetValue();
  436. NUdf::TUnboxedValue result;
  437. // skip first 2 yields
  438. UNIT_ASSERT_VALUES_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Yield);
  439. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Yield);
  440. // return all the collected values
  441. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  442. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  443. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  444. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Yield);
  445. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Ok);
  446. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Finish);
  447. UNIT_ASSERT_EQUAL(streamVal.Fetch(result), NUdf::EFetchStatus::Finish);
  448. }
  449. #endif
  450. }
  451. Y_UNIT_TEST_SUITE(TMiniKQLWideCombinerPerfTest) {
  452. Y_UNIT_TEST_LLVM(TestSumDoubleBooleanKeys) {
  453. TSetup<LLVM> setup;
  454. double positive = 0.0, negative = 0.0;
  455. const auto t = TInstant::Now();
  456. for (const auto& sample : I8Samples) {
  457. (sample.second > 0.0 ? positive : negative) += sample.second;
  458. }
  459. const auto cppTime = TInstant::Now() - t;
  460. TProgramBuilder& pb = *setup.PgmBuilder;
  461. const auto listType = pb.NewListType(pb.NewDataType(NUdf::TDataType<double>::Id));
  462. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  463. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  464. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {item}; }), 0ULL,
  465. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {pb.AggrGreater(items.front(), pb.NewDataLiteral(0.0))}; },
  466. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return items; },
  467. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.front())}; },
  468. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList { return state; }),
  469. [&](TRuntimeNode::TList items) { return items.front(); }
  470. ));
  471. const auto graph = setup.BuildGraph(pgmReturn, {list});
  472. NUdf::TUnboxedValue* items = nullptr;
  473. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
  474. std::transform(I8Samples.cbegin(), I8Samples.cend(), items, [](const std::pair<i8, double> s){ return ToValue<double>(s.second); });
  475. const auto t1 = TInstant::Now();
  476. const auto& value = graph->GetValue();
  477. const auto first = value.GetElement(0);
  478. const auto second = value.GetElement(1);
  479. const auto t2 = TInstant::Now();
  480. if (first.template Get<double>() > 0.0) {
  481. UNIT_ASSERT_VALUES_EQUAL(first.template Get<double>(), positive);
  482. UNIT_ASSERT_VALUES_EQUAL(second.template Get<double>(), negative);
  483. } else {
  484. UNIT_ASSERT_VALUES_EQUAL(first.template Get<double>(), negative);
  485. UNIT_ASSERT_VALUES_EQUAL(second.template Get<double>(), positive);
  486. }
  487. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  488. }
  489. Y_UNIT_TEST_LLVM(TestMinMaxSumDoubleBooleanKeys) {
  490. TSetup<LLVM> setup;
  491. auto samples = I8Samples;
  492. samples.emplace_back(-1, -1.0); //ensure to have at least one negative value
  493. samples.emplace_back(1, 1.0); //ensure to have at least one positive value
  494. double pSum = 0.0, nSum = 0.0, pMax = 0.0, nMax = -1000.0, pMin = 1000.0, nMin = 0.0;
  495. const auto t = TInstant::Now();
  496. for (const auto& sample : samples) {
  497. if (sample.second > 0.0) {
  498. pSum += sample.second;
  499. pMax = std::max(pMax, sample.second);
  500. pMin = std::min(pMin, sample.second);
  501. } else {
  502. nSum += sample.second;
  503. nMax = std::max(nMax, sample.second);
  504. nMin = std::min(nMin, sample.second);
  505. }
  506. }
  507. const auto cppTime = TInstant::Now() - t;
  508. TProgramBuilder& pb = *setup.PgmBuilder;
  509. const auto listType = pb.NewListType(pb.NewDataType(NUdf::TDataType<double>::Id));
  510. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  511. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  512. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {item}; }), 0ULL,
  513. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {pb.AggrGreater(items.front(), pb.NewDataLiteral(0.0))}; },
  514. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front(), items.front(), items.front()}; },
  515. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  516. return {pb.AggrAdd(state.front(), items.front()), pb.AggrMin(state[1U], items.front()), pb.AggrMax(state.back(), items.back()) };
  517. },
  518. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList { return state; }),
  519. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  520. ));
  521. const auto graph = setup.BuildGraph(pgmReturn, {list});
  522. NUdf::TUnboxedValue* items = nullptr;
  523. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(samples.size(), items));
  524. std::transform(samples.cbegin(), samples.cend(), items, [](const std::pair<i8, double> s){ return ToValue<double>(s.second); });
  525. const auto t1 = TInstant::Now();
  526. const auto& value = graph->GetValue();
  527. const auto first = value.GetElement(0);
  528. const auto second = value.GetElement(1);
  529. const auto t2 = TInstant::Now();
  530. if (first.GetElement(0).template Get<double>() > 0.0) {
  531. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(0).template Get<double>(), pSum);
  532. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(1).template Get<double>(), pMin);
  533. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(2).template Get<double>(), pMax);
  534. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(0).template Get<double>(), nSum);
  535. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(1).template Get<double>(), nMin);
  536. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(2).template Get<double>(), nMax);
  537. } else {
  538. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(0).template Get<double>(), nSum);
  539. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(1).template Get<double>(), nMin);
  540. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(2).template Get<double>(), nMax);
  541. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(0).template Get<double>(), pSum);
  542. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(1).template Get<double>(), pMin);
  543. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(2).template Get<double>(), pMax);
  544. }
  545. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  546. }
  547. Y_UNIT_TEST_LLVM(TestSumDoubleSmallKey) {
  548. TSetup<LLVM> setup;
  549. std::unordered_map<i8, double> expects(201);
  550. const auto t = TInstant::Now();
  551. for (const auto& sample : I8Samples) {
  552. expects.emplace(sample.first, 0.0).first->second += sample.second;
  553. }
  554. const auto cppTime = TInstant::Now() - t;
  555. std::vector<std::pair<i8, double>> one, two;
  556. one.reserve(expects.size());
  557. two.reserve(expects.size());
  558. one.insert(one.cend(), expects.cbegin(), expects.cend());
  559. std::sort(one.begin(), one.end(), [](const std::pair<i8, double> l, const std::pair<i8, double> r){ return l.first < r.first; });
  560. TProgramBuilder& pb = *setup.PgmBuilder;
  561. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<i8>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  562. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  563. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  564. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }), 0ULL,
  565. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  566. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back()}; },
  567. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back())}; },
  568. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {keys.front(), state.front()}; }),
  569. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  570. ));
  571. const auto graph = setup.BuildGraph(pgmReturn, {list});
  572. NUdf::TUnboxedValue* items = nullptr;
  573. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
  574. for (const auto& sample : I8Samples) {
  575. NUdf::TUnboxedValue* pair = nullptr;
  576. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  577. pair[0] = NUdf::TUnboxedValuePod(sample.first);
  578. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  579. }
  580. const auto t1 = TInstant::Now();
  581. const auto& value = graph->GetValue();
  582. const auto t2 = TInstant::Now();
  583. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  584. const auto ptr = value.GetElements();
  585. for (size_t i = 0ULL; i < expects.size(); ++i) {
  586. two.emplace_back(ptr[i].GetElement(0).template Get<i8>(), ptr[i].GetElement(1).template Get<double>());
  587. }
  588. std::sort(two.begin(), two.end(), [](const std::pair<i8, double> l, const std::pair<i8, double> r){ return l.first < r.first; });
  589. UNIT_ASSERT_VALUES_EQUAL(one, two);
  590. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  591. }
  592. Y_UNIT_TEST_LLVM(TestMinMaxSumDoubleSmallKey) {
  593. TSetup<LLVM> setup;
  594. std::unordered_map<i8, std::array<double, 3U>> expects(201);
  595. const auto t = TInstant::Now();
  596. for (const auto& sample : I8Samples) {
  597. auto& item = expects.emplace(sample.first, std::array<double, 3U>{0.0, std::numeric_limits<double>::max(), std::numeric_limits<double>::min()}).first->second;
  598. std::get<0U>(item) += sample.second;
  599. std::get<1U>(item) = std::min(std::get<1U>(item), sample.second);
  600. std::get<2U>(item) = std::max(std::get<2U>(item), sample.second);
  601. }
  602. const auto cppTime = TInstant::Now() - t;
  603. std::vector<std::pair<i8, std::array<double, 3U>>> one, two;
  604. one.reserve(expects.size());
  605. two.reserve(expects.size());
  606. one.insert(one.cend(), expects.cbegin(), expects.cend());
  607. std::sort(one.begin(), one.end(), [](const std::pair<i8, std::array<double, 3U>> l, const std::pair<i8, std::array<double, 3U>> r){ return l.first < r.first; });
  608. TProgramBuilder& pb = *setup.PgmBuilder;
  609. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<i8>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  610. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  611. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  612. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }), 0ULL,
  613. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  614. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back(), items.back(), items.back()}; },
  615. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back()), pb.AggrMin(state[1U], items.back()), pb.AggrMax(state.back(), items.back())}; },
  616. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { state.insert(state.cbegin(), keys.front()); return state; }),
  617. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  618. ));
  619. const auto graph = setup.BuildGraph(pgmReturn, {list});
  620. NUdf::TUnboxedValue* items = nullptr;
  621. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
  622. for (const auto& sample : I8Samples) {
  623. NUdf::TUnboxedValue* pair = nullptr;
  624. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  625. pair[0] = NUdf::TUnboxedValuePod(sample.first);
  626. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  627. }
  628. const auto t1 = TInstant::Now();
  629. const auto& value = graph->GetValue();
  630. const auto t2 = TInstant::Now();
  631. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  632. const auto ptr = value.GetElements();
  633. for (size_t i = 0ULL; i < expects.size(); ++i) {
  634. two.emplace_back(ptr[i].GetElement(0).template Get<i8>(), std::array<double, 3U>{ptr[i].GetElement(1).template Get<double>(), ptr[i].GetElement(2).template Get<double>(), ptr[i].GetElement(3).template Get<double>()});
  635. }
  636. std::sort(two.begin(), two.end(), [](const std::pair<i8, std::array<double, 3U>> l, const std::pair<i8, std::array<double, 3U>> r){ return l.first < r.first; });
  637. UNIT_ASSERT_VALUES_EQUAL(one, two);
  638. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  639. }
  640. Y_UNIT_TEST_LLVM(TestSumDoubleStringKey) {
  641. TSetup<LLVM> setup;
  642. std::vector<std::pair<std::string, double>> stringI8Samples(I8Samples.size());
  643. std::transform(I8Samples.cbegin(), I8Samples.cend(), stringI8Samples.begin(), [](std::pair<i8, double> src){ return std::make_pair(ToString(src.first), src.second); });
  644. std::unordered_map<std::string, double> expects(201);
  645. const auto t = TInstant::Now();
  646. for (const auto& sample : stringI8Samples) {
  647. expects.emplace(sample.first, 0.0).first->second += sample.second;
  648. }
  649. const auto cppTime = TInstant::Now() - t;
  650. std::vector<std::pair<std::string_view, double>> one, two;
  651. one.reserve(expects.size());
  652. two.reserve(expects.size());
  653. one.insert(one.cend(), expects.cbegin(), expects.cend());
  654. std::sort(one.begin(), one.end(), [](const std::pair<std::string_view, double> l, const std::pair<std::string_view, double> r){ return l.first < r.first; });
  655. TProgramBuilder& pb = *setup.PgmBuilder;
  656. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<const char*>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  657. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  658. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  659. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }), 0ULL,
  660. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  661. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back()}; },
  662. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back())}; },
  663. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {keys.front(), state.front()}; }),
  664. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  665. ));
  666. const auto graph = setup.BuildGraph(pgmReturn, {list});
  667. NUdf::TUnboxedValue* items = nullptr;
  668. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(stringI8Samples.size(), items));
  669. for (const auto& sample : stringI8Samples) {
  670. NUdf::TUnboxedValue* pair = nullptr;
  671. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  672. pair[0] = NUdf::TUnboxedValuePod::Embedded(sample.first);
  673. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  674. }
  675. const auto t1 = TInstant::Now();
  676. const auto& value = graph->GetValue();
  677. const auto t2 = TInstant::Now();
  678. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  679. const auto ptr = value.GetElements();
  680. for (size_t i = 0ULL; i < expects.size(); ++i) {
  681. two.emplace_back(ptr[i].GetElements()->AsStringRef(), ptr[i].GetElement(1).template Get<double>());
  682. }
  683. std::sort(two.begin(), two.end(), [](const std::pair<std::string_view, double> l, const std::pair<std::string_view, double> r){ return l.first < r.first; });
  684. UNIT_ASSERT_VALUES_EQUAL(one, two);
  685. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  686. }
  687. Y_UNIT_TEST_LLVM(TestMinMaxSumDoubleStringKey) {
  688. TSetup<LLVM> setup;
  689. std::vector<std::pair<std::string, double>> stringI8Samples(I8Samples.size());
  690. std::transform(I8Samples.cbegin(), I8Samples.cend(), stringI8Samples.begin(), [](std::pair<i8, double> src){ return std::make_pair(ToString(src.first), src.second); });
  691. std::unordered_map<std::string, std::array<double, 3U>> expects(201);
  692. const auto t = TInstant::Now();
  693. for (const auto& sample : stringI8Samples) {
  694. auto& item = expects.emplace(sample.first, std::array<double, 3U>{0.0, +1E7, -1E7}).first->second;
  695. std::get<0U>(item) += sample.second;
  696. std::get<1U>(item) = std::min(std::get<1U>(item), sample.second);
  697. std::get<2U>(item) = std::max(std::get<2U>(item), sample.second);
  698. }
  699. const auto cppTime = TInstant::Now() - t;
  700. std::vector<std::pair<std::string_view, std::array<double, 3U>>> one, two;
  701. one.reserve(expects.size());
  702. two.reserve(expects.size());
  703. one.insert(one.cend(), expects.cbegin(), expects.cend());
  704. std::sort(one.begin(), one.end(), [](const std::pair<std::string_view, std::array<double, 3U>> l, const std::pair<std::string_view, std::array<double, 3U>> r){ return l.first < r.first; });
  705. TProgramBuilder& pb = *setup.PgmBuilder;
  706. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<const char*>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  707. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  708. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  709. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }), 0ULL,
  710. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  711. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back(), items.back(), items.back()}; },
  712. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back()), pb.AggrMin(state[1U], items.back()), pb.AggrMax(state.back(), items.back())}; },
  713. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { state.insert(state.cbegin(), keys.front()); return state; }),
  714. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  715. ));
  716. const auto graph = setup.BuildGraph(pgmReturn, {list});
  717. NUdf::TUnboxedValue* items = nullptr;
  718. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(stringI8Samples.size(), items));
  719. for (const auto& sample : stringI8Samples) {
  720. NUdf::TUnboxedValue* pair = nullptr;
  721. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  722. pair[0] = NUdf::TUnboxedValuePod::Embedded(sample.first);
  723. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  724. }
  725. const auto t1 = TInstant::Now();
  726. const auto& value = graph->GetValue();
  727. const auto t2 = TInstant::Now();
  728. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  729. const auto ptr = value.GetElements();
  730. for (size_t i = 0ULL; i < expects.size(); ++i) {
  731. two.emplace_back(ptr[i].GetElements()->AsStringRef(), std::array<double, 3U>{ptr[i].GetElement(1).template Get<double>(), ptr[i].GetElement(2).template Get<double>(), ptr[i].GetElement(3).template Get<double>()});
  732. }
  733. std::sort(two.begin(), two.end(), [](const std::pair<std::string_view, std::array<double, 3U>> l, const std::pair<std::string_view, std::array<double, 3U>> r){ return l.first < r.first; });
  734. UNIT_ASSERT_VALUES_EQUAL(one, two);
  735. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  736. }
  737. Y_UNIT_TEST_LLVM(TestMinMaxSumTupleKey) {
  738. TSetup<LLVM> setup;
  739. std::vector<std::pair<std::pair<ui32, std::string>, double>> pairSamples(Ui16Samples.size());
  740. std::transform(Ui16Samples.cbegin(), Ui16Samples.cend(), pairSamples.begin(), [](std::pair<ui16, double> src){ return std::make_pair(std::make_pair(ui32(src.first / 10U % 100U), ToString(src.first % 10U)), src.second); });
  741. struct TPairHash { size_t operator()(const std::pair<ui32, std::string>& p) const { return CombineHashes(std::hash<ui32>()(p.first), std::hash<std::string_view>()(p.second)); } };
  742. std::unordered_map<std::pair<ui32, std::string>, std::array<double, 3U>, TPairHash> expects;
  743. const auto t = TInstant::Now();
  744. for (const auto& sample : pairSamples) {
  745. auto& item = expects.emplace(sample.first, std::array<double, 3U>{0.0, +1E7, -1E7}).first->second;
  746. std::get<0U>(item) += sample.second;
  747. std::get<1U>(item) = std::min(std::get<1U>(item), sample.second);
  748. std::get<2U>(item) = std::max(std::get<2U>(item), sample.second);
  749. }
  750. const auto cppTime = TInstant::Now() - t;
  751. std::vector<std::pair<std::pair<ui32, std::string>, std::array<double, 3U>>> one, two;
  752. one.reserve(expects.size());
  753. two.reserve(expects.size());
  754. one.insert(one.cend(), expects.cbegin(), expects.cend());
  755. std::sort(one.begin(), one.end(), [](const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> l, const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> r){ return l.first < r.first; });
  756. TProgramBuilder& pb = *setup.PgmBuilder;
  757. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewTupleType({pb.NewDataType(NUdf::TDataType<ui32>::Id), pb.NewDataType(NUdf::TDataType<const char*>::Id)}), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  758. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  759. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  760. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(pb.Nth(item, 0U), 0U), pb.Nth(pb.Nth(item, 0U), 1U), pb.Nth(item, 1U) }; }), 0ULL,
  761. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front(), items[1U]}; },
  762. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back(), items.back(), items.back()}; },
  763. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  764. return {pb.AggrAdd(state.front(), items.back()), pb.AggrMin(state[1U], items.back()), pb.AggrMax(state.back(), items.back()) };
  765. },
  766. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {keys.front(), keys.back(), state.front(), state[1U], state.back()}; }),
  767. [&](TRuntimeNode::TList items) { return pb.NewTuple({pb.NewTuple({items[0U], items[1U]}), items[2U], items[3U], items[4U]}); }
  768. ));
  769. const auto graph = setup.BuildGraph(pgmReturn, {list});
  770. NUdf::TUnboxedValue* items = nullptr;
  771. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(pairSamples.size(), items));
  772. for (const auto& sample : pairSamples) {
  773. NUdf::TUnboxedValue* pair = nullptr;
  774. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  775. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  776. NUdf::TUnboxedValue* keys = nullptr;
  777. pair[0] = graph->GetHolderFactory().CreateDirectArrayHolder(2U, keys);
  778. keys[0] = NUdf::TUnboxedValuePod(sample.first.first);
  779. keys[1] = NUdf::TUnboxedValuePod::Embedded(sample.first.second);
  780. }
  781. const auto t1 = TInstant::Now();
  782. const auto& value = graph->GetValue();
  783. const auto t2 = TInstant::Now();
  784. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  785. const auto ptr = value.GetElements();
  786. for (size_t i = 0ULL; i < expects.size(); ++i) {
  787. const auto elements = ptr[i].GetElements();
  788. two.emplace_back(std::make_pair(elements[0].GetElement(0).template Get<ui32>(), (elements[0].GetElements()[1]).AsStringRef()), std::array<double, 3U>{elements[1].template Get<double>(), elements[2].template Get<double>(), elements[3].template Get<double>()});
  789. }
  790. std::sort(two.begin(), two.end(), [](const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> l, const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> r){ return l.first < r.first; });
  791. UNIT_ASSERT_VALUES_EQUAL(one, two);
  792. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  793. }
  794. Y_UNIT_TEST_LLVM(TestTpch) {
  795. TSetup<LLVM> setup;
  796. struct TPairHash { size_t operator()(const std::pair<std::string_view, std::string_view>& p) const { return CombineHashes(std::hash<std::string_view>()(p.first), std::hash<std::string_view>()(p.second)); } };
  797. std::unordered_map<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>, TPairHash> expects;
  798. const auto t = TInstant::Now();
  799. for (auto& sample : TpchSamples) {
  800. if (std::get<0U>(sample) <= border) {
  801. const auto& ins = expects.emplace(std::pair<std::string_view, std::string_view>{std::get<1U>(sample), std::get<2U>(sample)}, std::pair<ui64, std::array<double, 5U>>{0ULL, {0., 0., 0., 0., 0.}});
  802. auto& item = ins.first->second;
  803. ++item.first;
  804. std::get<0U>(item.second) += std::get<3U>(sample);
  805. std::get<1U>(item.second) += std::get<5U>(sample);
  806. std::get<2U>(item.second) += std::get<6U>(sample);
  807. const auto v = std::get<3U>(sample) * (1. - std::get<5U>(sample));
  808. std::get<3U>(item.second) += v;
  809. std::get<4U>(item.second) += v * (1. + std::get<4U>(sample));
  810. }
  811. }
  812. for (auto& item : expects) {
  813. std::get<1U>(item.second.second) /= item.second.first;
  814. }
  815. const auto cppTime = TInstant::Now() - t;
  816. std::vector<std::pair<std::pair<std::string, std::string>, std::pair<ui64, std::array<double, 5U>>>> one, two;
  817. one.reserve(expects.size());
  818. two.reserve(expects.size());
  819. one.insert(one.cend(), expects.cbegin(), expects.cend());
  820. std::sort(one.begin(), one.end(), [](const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> l, const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> r){ return l.first < r.first; });
  821. TProgramBuilder& pb = *setup.PgmBuilder;
  822. const auto listType = pb.NewListType(pb.NewTupleType({
  823. pb.NewDataType(NUdf::TDataType<ui64>::Id),
  824. pb.NewDataType(NUdf::TDataType<const char*>::Id),
  825. pb.NewDataType(NUdf::TDataType<const char*>::Id),
  826. pb.NewDataType(NUdf::TDataType<double>::Id),
  827. pb.NewDataType(NUdf::TDataType<double>::Id),
  828. pb.NewDataType(NUdf::TDataType<double>::Id),
  829. pb.NewDataType(NUdf::TDataType<double>::Id)
  830. }));
  831. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  832. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideCombiner(
  833. pb.WideFilter(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  834. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U), pb.Nth(item, 3U), pb.Nth(item, 4U), pb.Nth(item, 5U), pb.Nth(item, 6U)}; }),
  835. [&](TRuntimeNode::TList items) { return pb.AggrLessOrEqual(items.front(), pb.NewDataLiteral<ui64>(border)); }
  836. ), 0ULL,
  837. [&](TRuntimeNode::TList item) -> TRuntimeNode::TList { return {item[1U], item[2U]}; },
  838. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  839. const auto price = items[3U];
  840. const auto disco = items[5U];
  841. const auto v = pb.Mul(price, pb.Sub(pb.NewDataLiteral<double>(1.), disco));
  842. return {pb.NewDataLiteral<ui64>(1ULL), price, disco, items[6U], v, pb.Mul(v, pb.Add(pb.NewDataLiteral<double>(1.), items[4U]))};
  843. },
  844. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  845. const auto price = items[3U];
  846. const auto disco = items[5U];
  847. const auto v = pb.Mul(price, pb.Sub(pb.NewDataLiteral<double>(1.), disco));
  848. return {pb.Increment(state[0U]), pb.AggrAdd(state[1U], price), pb.AggrAdd(state[2U], disco), pb.AggrAdd(state[3U], items[6U]), pb.AggrAdd(state[4U], v), pb.AggrAdd(state[5U], pb.Mul(v, pb.Add(pb.NewDataLiteral<double>(1.), items[4U])))};
  849. },
  850. [&](TRuntimeNode::TList key, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {key.front(), key.back(), state[0U], state[1U], pb.Div(state[2U], state[0U]), state[3U], state[4U], state[5U]}; }),
  851. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  852. ));
  853. const auto graph = setup.BuildGraph(pgmReturn, {list});
  854. NUdf::TUnboxedValue* items = nullptr;
  855. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(TpchSamples.size(), items));
  856. for (const auto& sample : TpchSamples) {
  857. NUdf::TUnboxedValue* elements = nullptr;
  858. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(7U, elements);
  859. elements[0] = NUdf::TUnboxedValuePod(std::get<0U>(sample));
  860. elements[1] = NUdf::TUnboxedValuePod::Embedded(std::get<1U>(sample));
  861. elements[2] = NUdf::TUnboxedValuePod::Embedded(std::get<2U>(sample));
  862. elements[3] = NUdf::TUnboxedValuePod(std::get<3U>(sample));
  863. elements[4] = NUdf::TUnboxedValuePod(std::get<4U>(sample));
  864. elements[5] = NUdf::TUnboxedValuePod(std::get<5U>(sample));
  865. elements[6] = NUdf::TUnboxedValuePod(std::get<6U>(sample));
  866. }
  867. const auto t1 = TInstant::Now();
  868. const auto& value = graph->GetValue();
  869. const auto t2 = TInstant::Now();
  870. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  871. const auto ptr = value.GetElements();
  872. for (size_t i = 0ULL; i < expects.size(); ++i) {
  873. const auto elements = ptr[i].GetElements();
  874. two.emplace_back(std::make_pair(elements[0].AsStringRef(), elements[1].AsStringRef()), std::pair<ui64, std::array<double, 5U>>{elements[2].template Get<ui64>(), {elements[3].template Get<double>(), elements[4].template Get<double>(), elements[5].template Get<double>(), elements[6].template Get<double>(), elements[7].template Get<double>()}});
  875. }
  876. std::sort(two.begin(), two.end(), [](const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> l, const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> r){ return l.first < r.first; });
  877. UNIT_ASSERT_VALUES_EQUAL(one, two);
  878. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  879. }
  880. }
  881. #endif
  882. #if !defined(MKQL_RUNTIME_VERSION) || MKQL_RUNTIME_VERSION >= 29u
  883. Y_UNIT_TEST_SUITE(TMiniKQLWideLastCombinerTest) {
  884. Y_UNIT_TEST_LLVM_SPILLING(TestLongStringsRefCounting) {
  885. // callable WideLastCombinerWithSpilling was introduced in 49 version of runtime
  886. if (MKQL_RUNTIME_VERSION < 49U && SPILLING) return;
  887. TSetup<LLVM, SPILLING> setup;
  888. TProgramBuilder& pb = *setup.PgmBuilder;
  889. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  890. const auto optionalType = pb.NewOptionalType(dataType);
  891. const auto tupleType = pb.NewTupleType({dataType, dataType});
  892. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  893. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  894. const auto longKeyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key one");
  895. const auto longKeyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key two");
  896. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 1");
  897. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 2");
  898. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 3");
  899. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 4");
  900. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 5");
  901. const auto value6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 6");
  902. const auto value7 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 7");
  903. const auto value8 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 8");
  904. const auto value9 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 9");
  905. const auto data1 = pb.NewTuple(tupleType, {keyOne, value1});
  906. const auto data2 = pb.NewTuple(tupleType, {keyTwo, value2});
  907. const auto data3 = pb.NewTuple(tupleType, {keyTwo, value3});
  908. const auto data4 = pb.NewTuple(tupleType, {longKeyOne, value4});
  909. const auto data5 = pb.NewTuple(tupleType, {longKeyTwo, value5});
  910. const auto data6 = pb.NewTuple(tupleType, {longKeyTwo, value6});
  911. const auto data7 = pb.NewTuple(tupleType, {longKeyTwo, value7});
  912. const auto data8 = pb.NewTuple(tupleType, {longKeyTwo, value8});
  913. const auto data9 = pb.NewTuple(tupleType, {longKeyTwo, value9});
  914. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  915. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(WideLastCombiner<SPILLING>(pb, pb.ExpandMap(pb.ToFlow(list),
  916. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  917. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  918. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  919. return {pb.NewOptional(items.back()), pb.NewOptional(keys.front()), pb.NewEmptyOptional(optionalType), pb.NewEmptyOptional(optionalType)};
  920. },
  921. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  922. return {pb.NewOptional(items.back()), state.front(), state[1U], state[2U]};
  923. },
  924. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  925. state.erase(state.cbegin());
  926. return {pb.FlatMap(pb.NewList(optionalType, state), [&](TRuntimeNode item) { return item; } )};
  927. }),
  928. [&](TRuntimeNode::TList items) -> TRuntimeNode {
  929. return pb.Fold1(items.front(),
  930. [&](TRuntimeNode item) { return item; },
  931. [&](TRuntimeNode item, TRuntimeNode state) {
  932. return pb.AggrConcat(pb.AggrConcat(state, pb.NewDataLiteral<NUdf::EDataSlot::String>(" / ")), item);
  933. }
  934. );
  935. }
  936. ));
  937. const auto graph = setup.BuildGraph(pgmReturn);
  938. if (SPILLING) {
  939. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  940. }
  941. const auto streamVal = graph->GetValue();
  942. std::unordered_set<TString> expected {
  943. "key one",
  944. "very long value 2 / key two",
  945. "very long key one",
  946. "very long value 8 / very long value 7 / very long value 6"
  947. };
  948. CheckIfStreamHasExpectedStringValues(streamVal, expected);
  949. }
  950. Y_UNIT_TEST_LLVM_SPILLING(TestLongStringsPasstroughtRefCounting) {
  951. // callable WideLastCombinerWithSpilling was introduced in 49 version of runtime
  952. if (MKQL_RUNTIME_VERSION < 49U && SPILLING) return;
  953. TSetup<LLVM, SPILLING> setup;
  954. TProgramBuilder& pb = *setup.PgmBuilder;
  955. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  956. const auto tupleType = pb.NewTupleType({dataType, dataType});
  957. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  958. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  959. const auto longKeyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key one");
  960. const auto longKeyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long key two");
  961. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 1");
  962. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 2");
  963. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 3");
  964. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 4");
  965. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 5");
  966. const auto value6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 6");
  967. const auto value7 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 7");
  968. const auto value8 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 8");
  969. const auto value9 = pb.NewDataLiteral<NUdf::EDataSlot::String>("very long value 9");
  970. const auto data1 = pb.NewTuple(tupleType, {keyOne, value1});
  971. const auto data2 = pb.NewTuple(tupleType, {keyTwo, value2});
  972. const auto data3 = pb.NewTuple(tupleType, {keyTwo, value3});
  973. const auto data4 = pb.NewTuple(tupleType, {longKeyOne, value4});
  974. const auto data5 = pb.NewTuple(tupleType, {longKeyTwo, value5});
  975. const auto data6 = pb.NewTuple(tupleType, {longKeyTwo, value6});
  976. const auto data7 = pb.NewTuple(tupleType, {longKeyTwo, value7});
  977. const auto data8 = pb.NewTuple(tupleType, {longKeyTwo, value8});
  978. const auto data9 = pb.NewTuple(tupleType, {longKeyTwo, value9});
  979. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  980. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(WideLastCombiner<SPILLING>(pb, pb.ExpandMap(pb.ToFlow(list),
  981. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  982. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  983. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  984. return {items.back(), keys.front(), items.back(), items.front()};
  985. },
  986. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  987. return {items.back(), keys.front(), state[2U], state.back()};
  988. },
  989. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  990. return state;
  991. }),
  992. [&](TRuntimeNode::TList items) -> TRuntimeNode {
  993. return pb.Fold1(pb.NewList(dataType, items),
  994. [&](TRuntimeNode item) { return item; },
  995. [&](TRuntimeNode item, TRuntimeNode state) {
  996. return pb.AggrConcat(pb.AggrConcat(state, pb.NewDataLiteral<NUdf::EDataSlot::String>(" / ")), item);
  997. }
  998. );
  999. }
  1000. ));
  1001. const auto graph = setup.BuildGraph(pgmReturn);
  1002. if (SPILLING) {
  1003. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1004. }
  1005. const auto streamVal = graph->GetValue();
  1006. std::unordered_set<TString> expected {
  1007. "very long value 1 / key one / very long value 1 / key one",
  1008. "very long value 3 / key two / very long value 2 / key two",
  1009. "very long value 4 / very long key one / very long value 4 / very long key one",
  1010. "very long value 9 / very long key two / very long value 5 / very long key two"
  1011. };
  1012. CheckIfStreamHasExpectedStringValues(streamVal, expected);
  1013. }
  1014. Y_UNIT_TEST_LLVM_SPILLING(TestDoNotCalculateUnusedInput) {
  1015. // Test is broken. Remove this if after YQL-18808.
  1016. if (SPILLING) return;
  1017. // callable WideLastCombinerWithSpilling was introduced in 49 version of runtime
  1018. if (MKQL_RUNTIME_VERSION < 49U && SPILLING) return;
  1019. TSetup<LLVM, SPILLING> setup;
  1020. TProgramBuilder& pb = *setup.PgmBuilder;
  1021. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  1022. const auto optionalType = pb.NewOptionalType(dataType);
  1023. const auto tupleType = pb.NewTupleType({dataType, optionalType, dataType});
  1024. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  1025. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  1026. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 1");
  1027. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 2");
  1028. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 3");
  1029. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 4");
  1030. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 5");
  1031. const auto empty = pb.NewDataLiteral<NUdf::EDataSlot::String>("");
  1032. const auto none = pb.NewEmptyOptional(optionalType);
  1033. const auto data1 = pb.NewTuple(tupleType, {keyOne, none, value1});
  1034. const auto data2 = pb.NewTuple(tupleType, {keyTwo, none, value2});
  1035. const auto data3 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  1036. const auto data4 = pb.NewTuple(tupleType, {keyOne, none, value4});
  1037. const auto data5 = pb.NewTuple(tupleType, {keyOne, none, value5});
  1038. const auto data6 = pb.NewTuple(tupleType, {keyOne, none, value1});
  1039. const auto data7 = pb.NewTuple(tupleType, {keyOne, none, value2});
  1040. const auto data8 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  1041. const auto data9 = pb.NewTuple(tupleType, {keyTwo, none, value4});
  1042. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  1043. const auto landmine = pb.NewDataLiteral<NUdf::EDataSlot::String>("ACHTUNG MINEN!");
  1044. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(WideLastCombiner<SPILLING>(pb, pb.ExpandMap(pb.ToFlow(list),
  1045. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Unwrap(pb.Nth(item, 1U), landmine, __FILE__, __LINE__, 0), pb.Nth(item, 2U)}; }),
  1046. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  1047. [&](TRuntimeNode::TList keys, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  1048. return {items.back(), keys.front(), empty, empty};
  1049. },
  1050. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  1051. return {items.back(), state.front(), state[1U], state[2U]};
  1052. },
  1053. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  1054. state.insert(state.cbegin(), keys.cbegin(), keys.cend());
  1055. return {pb.NewList(dataType, state)};
  1056. }),
  1057. [&](TRuntimeNode::TList items) -> TRuntimeNode {
  1058. return pb.Fold1(items.front(),
  1059. [&](TRuntimeNode item) { return item; },
  1060. [&](TRuntimeNode item, TRuntimeNode state) {
  1061. return pb.AggrConcat(pb.AggrConcat(state, pb.NewDataLiteral<NUdf::EDataSlot::String>(" / ")), item);
  1062. }
  1063. );
  1064. }
  1065. ));
  1066. const auto graph = setup.BuildGraph(pgmReturn);
  1067. if (SPILLING) {
  1068. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1069. }
  1070. const auto streamVal = graph->GetValue();
  1071. std::unordered_set<TString> expected {
  1072. "key one / value 2 / value 1 / value 5 / value 4",
  1073. "key two / value 4 / value 3 / value 3 / value 2"
  1074. };
  1075. CheckIfStreamHasExpectedStringValues(streamVal, expected);
  1076. }
  1077. Y_UNIT_TEST_LLVM_SPILLING(TestDoNotCalculateUnusedOutput) {
  1078. // callable WideLastCombinerWithSpilling was introduced in 49 version of runtime
  1079. if (MKQL_RUNTIME_VERSION < 49U && SPILLING) return;
  1080. TSetup<LLVM, SPILLING> setup;
  1081. TProgramBuilder& pb = *setup.PgmBuilder;
  1082. const auto dataType = pb.NewDataType(NUdf::TDataType<const char*>::Id);
  1083. const auto optionalType = pb.NewOptionalType(dataType);
  1084. const auto tupleType = pb.NewTupleType({dataType, optionalType, dataType});
  1085. const auto keyOne = pb.NewDataLiteral<NUdf::EDataSlot::String>("key one");
  1086. const auto keyTwo = pb.NewDataLiteral<NUdf::EDataSlot::String>("key two");
  1087. const auto value1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 1");
  1088. const auto value2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 2");
  1089. const auto value3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 3");
  1090. const auto value4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 4");
  1091. const auto value5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("value 5");
  1092. const auto empty = pb.NewDataLiteral<NUdf::EDataSlot::String>("");
  1093. const auto none = pb.NewEmptyOptional(optionalType);
  1094. const auto data1 = pb.NewTuple(tupleType, {keyOne, none, value1});
  1095. const auto data2 = pb.NewTuple(tupleType, {keyTwo, none, value2});
  1096. const auto data3 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  1097. const auto data4 = pb.NewTuple(tupleType, {keyOne, none, value4});
  1098. const auto data5 = pb.NewTuple(tupleType, {keyOne, none, value5});
  1099. const auto data6 = pb.NewTuple(tupleType, {keyOne, none, value1});
  1100. const auto data7 = pb.NewTuple(tupleType, {keyOne, none, value2});
  1101. const auto data8 = pb.NewTuple(tupleType, {keyTwo, none, value3});
  1102. const auto data9 = pb.NewTuple(tupleType, {keyTwo, none, value4});
  1103. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7, data8, data9});
  1104. const auto landmine = pb.NewDataLiteral<NUdf::EDataSlot::String>("ACHTUNG MINEN!");
  1105. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(WideLastCombiner<SPILLING>(pb, pb.ExpandMap(pb.ToFlow(list),
  1106. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }),
  1107. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  1108. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  1109. return {items[1U], items.back()};
  1110. },
  1111. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  1112. return {pb.Concat(state.front(), items[1U]), pb.AggrConcat(pb.AggrConcat(state.back(), pb.NewDataLiteral<NUdf::EDataSlot::String>(", ")), items.back())};
  1113. },
  1114. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  1115. return {pb.Unwrap(state.front(), landmine, __FILE__, __LINE__, 0), pb.AggrConcat(pb.AggrConcat(keys.front(), pb.NewDataLiteral<NUdf::EDataSlot::String>(": ")), state.back())};
  1116. }),
  1117. [&](TRuntimeNode::TList items) -> TRuntimeNode { return items.back(); }
  1118. ));
  1119. const auto graph = setup.BuildGraph(pgmReturn);
  1120. if (SPILLING) {
  1121. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1122. }
  1123. const auto streamVal = graph->GetValue();
  1124. std::unordered_set<TString> expected {
  1125. "key one: value 1, value 4, value 5, value 1, value 2",
  1126. "key two: value 2, value 3, value 3, value 4"
  1127. };
  1128. CheckIfStreamHasExpectedStringValues(streamVal, expected);
  1129. }
  1130. Y_UNIT_TEST_LLVM_SPILLING(TestThinAllLambdas) {
  1131. // callable WideLastCombinerWithSpilling was introduced in 49 version of runtime
  1132. if (MKQL_RUNTIME_VERSION < 49U && SPILLING) return;
  1133. TSetup<LLVM, SPILLING> setup;
  1134. TProgramBuilder& pb = *setup.PgmBuilder;
  1135. const auto tupleType = pb.NewTupleType({});
  1136. const auto data = pb.NewTuple({});
  1137. const auto list = pb.NewList(tupleType, {data, data, data, data});
  1138. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(WideLastCombiner<SPILLING>(pb, pb.ExpandMap(pb.ToFlow(list),
  1139. [](TRuntimeNode) -> TRuntimeNode::TList { return {}; }),
  1140. [](TRuntimeNode::TList items) { return items; },
  1141. [](TRuntimeNode::TList, TRuntimeNode::TList items) { return items; },
  1142. [](TRuntimeNode::TList, TRuntimeNode::TList, TRuntimeNode::TList state) { return state; },
  1143. [](TRuntimeNode::TList, TRuntimeNode::TList state) { return state; }),
  1144. [&](TRuntimeNode::TList) { return pb.NewTuple({}); }
  1145. ));
  1146. const auto graph = setup.BuildGraph(pgmReturn);
  1147. const auto streamVal = graph->GetValue();
  1148. NUdf::TUnboxedValue item;
  1149. const auto fetchStatus = streamVal.Fetch(item);
  1150. UNIT_ASSERT_EQUAL(fetchStatus, NUdf::EFetchStatus::Finish);
  1151. }
  1152. Y_UNIT_TEST_LLVM(TestSpillingBucketsDistribution) {
  1153. const size_t expectedBucketsCount = 128;
  1154. const size_t sampleSize = 8 * 128;
  1155. TSetup<LLVM, true> setup;
  1156. std::vector<std::pair<ui64, ui64>> samples(sampleSize);
  1157. std::generate(samples.begin(), samples.end(), [key = (ui64)1] () mutable -> std::pair<ui64, ui64> {
  1158. key += 64;
  1159. return {key, 1};
  1160. });
  1161. TProgramBuilder& pb = *setup.PgmBuilder;
  1162. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<ui64>::Id), pb.NewDataType(NUdf::TDataType<ui64>::Id)}));
  1163. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1164. const auto pgmReturn = pb.FromFlow(pb.NarrowMap(pb.WideLastCombinerWithSpilling(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1165. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }),
  1166. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  1167. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back()}; },
  1168. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back())}; },
  1169. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {keys.front(), state.front()}; }),
  1170. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); }
  1171. ));
  1172. const auto spillerFactory = std::make_shared<TMockSpillerFactory>();
  1173. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1174. graph->GetContext().SpillerFactory = spillerFactory;
  1175. NUdf::TUnboxedValue* items = nullptr;
  1176. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(samples.size(), items));
  1177. for (const auto& sample : samples) {
  1178. NUdf::TUnboxedValue* pair = nullptr;
  1179. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  1180. pair[0] = NUdf::TUnboxedValuePod(sample.first);
  1181. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  1182. }
  1183. const auto& value = graph->GetValue();
  1184. NUdf::TUnboxedValue item;
  1185. while (value.Fetch(item) != NUdf::EFetchStatus::Finish) {
  1186. ;
  1187. }
  1188. UNIT_ASSERT_EQUAL_C(spillerFactory->GetCreatedSpillers().size(), 1, "WideLastCombiner expected to create one spiller ");
  1189. const auto wideCombinerSpiller = std::dynamic_pointer_cast<TMockSpiller>(spillerFactory->GetCreatedSpillers()[0]);
  1190. UNIT_ASSERT_C(wideCombinerSpiller != nullptr, "MockSpillerFactory expected to create only MockSpillers");
  1191. auto flushedBucketsSizes = wideCombinerSpiller->GetPutSizes();
  1192. UNIT_ASSERT_EQUAL_C(flushedBucketsSizes.size(), expectedBucketsCount, "Spiller doesn't Put expected number of buckets");
  1193. auto anyEmpty = std::any_of(flushedBucketsSizes.begin(), flushedBucketsSizes.end(), [](size_t size) { return size == 0; });
  1194. UNIT_ASSERT_C(!anyEmpty, "Spiller flushed empty bucket");
  1195. }
  1196. }
  1197. Y_UNIT_TEST_SUITE(TMiniKQLWideLastCombinerPerfTest) {
  1198. Y_UNIT_TEST_LLVM(TestSumDoubleBooleanKeys) {
  1199. TSetup<LLVM> setup;
  1200. double positive = 0.0, negative = 0.0;
  1201. const auto t = TInstant::Now();
  1202. for (const auto& sample : I8Samples) {
  1203. (sample.second > 0.0 ? positive : negative) += sample.second;
  1204. }
  1205. const auto cppTime = TInstant::Now() - t;
  1206. TProgramBuilder& pb = *setup.PgmBuilder;
  1207. const auto listType = pb.NewListType(pb.NewDataType(NUdf::TDataType<double>::Id));
  1208. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1209. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1210. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {item}; }),
  1211. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {pb.AggrGreater(items.front(), pb.NewDataLiteral(0.0))}; },
  1212. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return items; },
  1213. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.front())}; },
  1214. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList { return state; }),
  1215. [&](TRuntimeNode::TList items) { return items.front(); }
  1216. ));
  1217. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1218. NUdf::TUnboxedValue* items = nullptr;
  1219. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
  1220. std::transform(I8Samples.cbegin(), I8Samples.cend(), items, [](const std::pair<i8, double> s){ return ToValue<double>(s.second); });
  1221. const auto t1 = TInstant::Now();
  1222. const auto& value = graph->GetValue();
  1223. const auto first = value.GetElement(0);
  1224. const auto second = value.GetElement(1);
  1225. const auto t2 = TInstant::Now();
  1226. if (first.template Get<double>() > 0.0) {
  1227. UNIT_ASSERT_VALUES_EQUAL(first.template Get<double>(), positive);
  1228. UNIT_ASSERT_VALUES_EQUAL(second.template Get<double>(), negative);
  1229. } else {
  1230. UNIT_ASSERT_VALUES_EQUAL(first.template Get<double>(), negative);
  1231. UNIT_ASSERT_VALUES_EQUAL(second.template Get<double>(), positive);
  1232. }
  1233. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1234. }
  1235. Y_UNIT_TEST_LLVM(TestMinMaxSumDoubleBooleanKeys) {
  1236. TSetup<LLVM> setup;
  1237. double pSum = 0.0, nSum = 0.0, pMax = 0.0, nMax = -1000.0, pMin = 1000.0, nMin = 0.0;
  1238. const auto t = TInstant::Now();
  1239. for (const auto& sample : I8Samples) {
  1240. if (sample.second > 0.0) {
  1241. pSum += sample.second;
  1242. pMax = std::max(pMax, sample.second);
  1243. pMin = std::min(pMin, sample.second);
  1244. } else {
  1245. nSum += sample.second;
  1246. nMax = std::max(nMax, sample.second);
  1247. nMin = std::min(nMin, sample.second);
  1248. }
  1249. }
  1250. const auto cppTime = TInstant::Now() - t;
  1251. TProgramBuilder& pb = *setup.PgmBuilder;
  1252. const auto listType = pb.NewListType(pb.NewDataType(NUdf::TDataType<double>::Id));
  1253. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1254. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1255. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {item}; }),
  1256. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {pb.AggrGreater(items.front(), pb.NewDataLiteral(0.0))}; },
  1257. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front(), items.front(), items.front()}; },
  1258. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  1259. return {pb.AggrAdd(state.front(), items.front()), pb.AggrMin(state[1U], items.front()), pb.AggrMax(state.back(), items.back()) };
  1260. },
  1261. [&](TRuntimeNode::TList, TRuntimeNode::TList state) -> TRuntimeNode::TList { return state; }),
  1262. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  1263. ));
  1264. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1265. NUdf::TUnboxedValue* items = nullptr;
  1266. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
  1267. std::transform(I8Samples.cbegin(), I8Samples.cend(), items, [](const std::pair<i8, double> s){ return ToValue<double>(s.second); });
  1268. const auto t1 = TInstant::Now();
  1269. const auto& value = graph->GetValue();
  1270. const auto first = value.GetElement(0);
  1271. const auto second = value.GetElement(1);
  1272. const auto t2 = TInstant::Now();
  1273. if (first.GetElement(0).template Get<double>() > 0.0) {
  1274. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(0).template Get<double>(), pSum);
  1275. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(1).template Get<double>(), pMin);
  1276. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(2).template Get<double>(), pMax);
  1277. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(0).template Get<double>(), nSum);
  1278. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(1).template Get<double>(), nMin);
  1279. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(2).template Get<double>(), nMax);
  1280. } else {
  1281. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(0).template Get<double>(), nSum);
  1282. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(1).template Get<double>(), nMin);
  1283. UNIT_ASSERT_VALUES_EQUAL(first.GetElement(2).template Get<double>(), nMax);
  1284. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(0).template Get<double>(), pSum);
  1285. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(1).template Get<double>(), pMin);
  1286. UNIT_ASSERT_VALUES_EQUAL(second.GetElement(2).template Get<double>(), pMax);
  1287. }
  1288. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1289. }
  1290. Y_UNIT_TEST_LLVM(TestSumDoubleSmallKey) {
  1291. TSetup<LLVM> setup;
  1292. std::unordered_map<i8, double> expects(201);
  1293. const auto t = TInstant::Now();
  1294. for (const auto& sample : I8Samples) {
  1295. expects.emplace(sample.first, 0.0).first->second += sample.second;
  1296. }
  1297. const auto cppTime = TInstant::Now() - t;
  1298. std::vector<std::pair<i8, double>> one, two;
  1299. one.reserve(expects.size());
  1300. two.reserve(expects.size());
  1301. one.insert(one.cend(), expects.cbegin(), expects.cend());
  1302. std::sort(one.begin(), one.end(), [](const std::pair<i8, double> l, const std::pair<i8, double> r){ return l.first < r.first; });
  1303. TProgramBuilder& pb = *setup.PgmBuilder;
  1304. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<i8>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  1305. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1306. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1307. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }),
  1308. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  1309. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back()}; },
  1310. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back())}; },
  1311. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {keys.front(), state.front()}; }),
  1312. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  1313. ));
  1314. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1315. NUdf::TUnboxedValue* items = nullptr;
  1316. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
  1317. for (const auto& sample : I8Samples) {
  1318. NUdf::TUnboxedValue* pair = nullptr;
  1319. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  1320. pair[0] = NUdf::TUnboxedValuePod(sample.first);
  1321. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  1322. }
  1323. const auto t1 = TInstant::Now();
  1324. const auto& value = graph->GetValue();
  1325. const auto t2 = TInstant::Now();
  1326. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  1327. const auto ptr = value.GetElements();
  1328. for (size_t i = 0ULL; i < expects.size(); ++i) {
  1329. two.emplace_back(ptr[i].GetElement(0).template Get<i8>(), ptr[i].GetElement(1).template Get<double>());
  1330. }
  1331. std::sort(two.begin(), two.end(), [](const std::pair<i8, double> l, const std::pair<i8, double> r){ return l.first < r.first; });
  1332. UNIT_ASSERT_VALUES_EQUAL(one, two);
  1333. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1334. }
  1335. Y_UNIT_TEST_LLVM(TestMinMaxSumDoubleSmallKey) {
  1336. TSetup<LLVM> setup;
  1337. std::unordered_map<i8, std::array<double, 3U>> expects(201);
  1338. const auto t = TInstant::Now();
  1339. for (const auto& sample : I8Samples) {
  1340. auto& item = expects.emplace(sample.first, std::array<double, 3U>{0.0, std::numeric_limits<double>::max(), std::numeric_limits<double>::min()}).first->second;
  1341. std::get<0U>(item) += sample.second;
  1342. std::get<1U>(item) = std::min(std::get<1U>(item), sample.second);
  1343. std::get<2U>(item) = std::max(std::get<2U>(item), sample.second);
  1344. }
  1345. const auto cppTime = TInstant::Now() - t;
  1346. std::vector<std::pair<i8, std::array<double, 3U>>> one, two;
  1347. one.reserve(expects.size());
  1348. two.reserve(expects.size());
  1349. one.insert(one.cend(), expects.cbegin(), expects.cend());
  1350. std::sort(one.begin(), one.end(), [](const std::pair<i8, std::array<double, 3U>> l, const std::pair<i8, std::array<double, 3U>> r){ return l.first < r.first; });
  1351. TProgramBuilder& pb = *setup.PgmBuilder;
  1352. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<i8>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  1353. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1354. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1355. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }),
  1356. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  1357. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back(), items.back(), items.back()}; },
  1358. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back()), pb.AggrMin(state[1U], items.back()), pb.AggrMax(state.back(), items.back())}; },
  1359. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { state.insert(state.cbegin(), keys.front()); return state; }),
  1360. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  1361. ));
  1362. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1363. NUdf::TUnboxedValue* items = nullptr;
  1364. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(I8Samples.size(), items));
  1365. for (const auto& sample : I8Samples) {
  1366. NUdf::TUnboxedValue* pair = nullptr;
  1367. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  1368. pair[0] = NUdf::TUnboxedValuePod(sample.first);
  1369. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  1370. }
  1371. const auto t1 = TInstant::Now();
  1372. const auto& value = graph->GetValue();
  1373. const auto t2 = TInstant::Now();
  1374. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  1375. const auto ptr = value.GetElements();
  1376. for (size_t i = 0ULL; i < expects.size(); ++i) {
  1377. two.emplace_back(ptr[i].GetElement(0).template Get<i8>(), std::array<double, 3U>{ptr[i].GetElement(1).template Get<double>(), ptr[i].GetElement(2).template Get<double>(), ptr[i].GetElement(3).template Get<double>()});
  1378. }
  1379. std::sort(two.begin(), two.end(), [](const std::pair<i8, std::array<double, 3U>> l, const std::pair<i8, std::array<double, 3U>> r){ return l.first < r.first; });
  1380. UNIT_ASSERT_VALUES_EQUAL(one, two);
  1381. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1382. }
  1383. Y_UNIT_TEST_LLVM(TestSumDoubleStringKey) {
  1384. TSetup<LLVM> setup;
  1385. std::vector<std::pair<std::string, double>> stringI8Samples(I8Samples.size());
  1386. std::transform(I8Samples.cbegin(), I8Samples.cend(), stringI8Samples.begin(), [](std::pair<i8, double> src){ return std::make_pair(ToString(src.first), src.second); });
  1387. std::unordered_map<std::string, double> expects(201);
  1388. const auto t = TInstant::Now();
  1389. for (const auto& sample : stringI8Samples) {
  1390. expects.emplace(sample.first, 0.0).first->second += sample.second;
  1391. }
  1392. const auto cppTime = TInstant::Now() - t;
  1393. std::vector<std::pair<std::string_view, double>> one, two;
  1394. one.reserve(expects.size());
  1395. two.reserve(expects.size());
  1396. one.insert(one.cend(), expects.cbegin(), expects.cend());
  1397. std::sort(one.begin(), one.end(), [](const std::pair<std::string_view, double> l, const std::pair<std::string_view, double> r){ return l.first < r.first; });
  1398. TProgramBuilder& pb = *setup.PgmBuilder;
  1399. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<const char*>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  1400. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1401. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1402. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }),
  1403. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  1404. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back()}; },
  1405. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back())}; },
  1406. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {keys.front(), state.front()}; }),
  1407. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  1408. ));
  1409. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1410. NUdf::TUnboxedValue* items = nullptr;
  1411. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(stringI8Samples.size(), items));
  1412. for (const auto& sample : stringI8Samples) {
  1413. NUdf::TUnboxedValue* pair = nullptr;
  1414. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  1415. pair[0] = NUdf::TUnboxedValuePod::Embedded(sample.first);
  1416. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  1417. }
  1418. const auto t1 = TInstant::Now();
  1419. const auto& value = graph->GetValue();
  1420. const auto t2 = TInstant::Now();
  1421. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  1422. const auto ptr = value.GetElements();
  1423. for (size_t i = 0ULL; i < expects.size(); ++i) {
  1424. two.emplace_back(ptr[i].GetElements()->AsStringRef(), ptr[i].GetElement(1).template Get<double>());
  1425. }
  1426. std::sort(two.begin(), two.end(), [](const std::pair<std::string_view, double> l, const std::pair<std::string_view, double> r){ return l.first < r.first; });
  1427. UNIT_ASSERT_VALUES_EQUAL(one, two);
  1428. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1429. }
  1430. Y_UNIT_TEST_LLVM(TestMinMaxSumDoubleStringKey) {
  1431. TSetup<LLVM> setup;
  1432. std::vector<std::pair<std::string, double>> stringI8Samples(I8Samples.size());
  1433. std::transform(I8Samples.cbegin(), I8Samples.cend(), stringI8Samples.begin(), [](std::pair<i8, double> src){ return std::make_pair(ToString(src.first), src.second); });
  1434. std::unordered_map<std::string, std::array<double, 3U>> expects(201);
  1435. const auto t = TInstant::Now();
  1436. for (const auto& sample : stringI8Samples) {
  1437. auto& item = expects.emplace(sample.first, std::array<double, 3U>{0.0, +1E7, -1E7}).first->second;
  1438. std::get<0U>(item) += sample.second;
  1439. std::get<1U>(item) = std::min(std::get<1U>(item), sample.second);
  1440. std::get<2U>(item) = std::max(std::get<2U>(item), sample.second);
  1441. }
  1442. const auto cppTime = TInstant::Now() - t;
  1443. std::vector<std::pair<std::string_view, std::array<double, 3U>>> one, two;
  1444. one.reserve(expects.size());
  1445. two.reserve(expects.size());
  1446. one.insert(one.cend(), expects.cbegin(), expects.cend());
  1447. std::sort(one.begin(), one.end(), [](const std::pair<std::string_view, std::array<double, 3U>> l, const std::pair<std::string_view, std::array<double, 3U>> r){ return l.first < r.first; });
  1448. TProgramBuilder& pb = *setup.PgmBuilder;
  1449. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewDataType(NUdf::TDataType<const char*>::Id), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  1450. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1451. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1452. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(item, 0U), pb.Nth(item, 1U) }; }),
  1453. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front()}; },
  1454. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back(), items.back(), items.back()}; },
  1455. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {pb.AggrAdd(state.front(), items.back()), pb.AggrMin(state[1U], items.back()), pb.AggrMax(state.back(), items.back())}; },
  1456. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { state.insert(state.cbegin(), keys.front()); return state; }),
  1457. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  1458. ));
  1459. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1460. NUdf::TUnboxedValue* items = nullptr;
  1461. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(stringI8Samples.size(), items));
  1462. for (const auto& sample : stringI8Samples) {
  1463. NUdf::TUnboxedValue* pair = nullptr;
  1464. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  1465. pair[0] = NUdf::TUnboxedValuePod::Embedded(sample.first);
  1466. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  1467. }
  1468. const auto t1 = TInstant::Now();
  1469. const auto& value = graph->GetValue();
  1470. const auto t2 = TInstant::Now();
  1471. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  1472. const auto ptr = value.GetElements();
  1473. for (size_t i = 0ULL; i < expects.size(); ++i) {
  1474. two.emplace_back(ptr[i].GetElements()->AsStringRef(), std::array<double, 3U>{ptr[i].GetElement(1).template Get<double>(), ptr[i].GetElement(2).template Get<double>(), ptr[i].GetElement(3).template Get<double>()});
  1475. }
  1476. std::sort(two.begin(), two.end(), [](const std::pair<std::string_view, std::array<double, 3U>> l, const std::pair<std::string_view, std::array<double, 3U>> r){ return l.first < r.first; });
  1477. UNIT_ASSERT_VALUES_EQUAL(one, two);
  1478. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1479. }
  1480. Y_UNIT_TEST_LLVM(TestMinMaxSumTupleKey) {
  1481. TSetup<LLVM> setup;
  1482. std::vector<std::pair<std::pair<ui32, std::string>, double>> pairSamples(Ui16Samples.size());
  1483. std::transform(Ui16Samples.cbegin(), Ui16Samples.cend(), pairSamples.begin(), [](std::pair<ui16, double> src){ return std::make_pair(std::make_pair(ui32(src.first / 10U % 100U), ToString(src.first % 10U)), src.second); });
  1484. struct TPairHash { size_t operator()(const std::pair<ui32, std::string>& p) const { return CombineHashes(std::hash<ui32>()(p.first), std::hash<std::string_view>()(p.second)); } };
  1485. std::unordered_map<std::pair<ui32, std::string>, std::array<double, 3U>, TPairHash> expects;
  1486. const auto t = TInstant::Now();
  1487. for (const auto& sample : pairSamples) {
  1488. auto& item = expects.emplace(sample.first, std::array<double, 3U>{0.0, +1E7, -1E7}).first->second;
  1489. std::get<0U>(item) += sample.second;
  1490. std::get<1U>(item) = std::min(std::get<1U>(item), sample.second);
  1491. std::get<2U>(item) = std::max(std::get<2U>(item), sample.second);
  1492. }
  1493. const auto cppTime = TInstant::Now() - t;
  1494. std::vector<std::pair<std::pair<ui32, std::string>, std::array<double, 3U>>> one, two;
  1495. one.reserve(expects.size());
  1496. two.reserve(expects.size());
  1497. one.insert(one.cend(), expects.cbegin(), expects.cend());
  1498. std::sort(one.begin(), one.end(), [](const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> l, const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> r){ return l.first < r.first; });
  1499. TProgramBuilder& pb = *setup.PgmBuilder;
  1500. const auto listType = pb.NewListType(pb.NewTupleType({pb.NewTupleType({pb.NewDataType(NUdf::TDataType<ui32>::Id), pb.NewDataType(NUdf::TDataType<const char*>::Id)}), pb.NewDataType(NUdf::TDataType<double>::Id)}));
  1501. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1502. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1503. [&](TRuntimeNode item) -> TRuntimeNode::TList { return { pb.Nth(pb.Nth(item, 0U), 0U), pb.Nth(pb.Nth(item, 0U), 1U), pb.Nth(item, 1U) }; }),
  1504. [&](TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.front(), items[1U]}; },
  1505. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList { return {items.back(), items.back(), items.back()}; },
  1506. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  1507. return {pb.AggrAdd(state.front(), items.back()), pb.AggrMin(state[1U], items.back()), pb.AggrMax(state.back(), items.back()) };
  1508. },
  1509. [&](TRuntimeNode::TList keys, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {keys.front(), keys.back(), state.front(), state[1U], state.back()}; }),
  1510. [&](TRuntimeNode::TList items) { return pb.NewTuple({pb.NewTuple({items[0U], items[1U]}), items[2U], items[3U], items[4U]}); }
  1511. ));
  1512. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1513. NUdf::TUnboxedValue* items = nullptr;
  1514. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(pairSamples.size(), items));
  1515. for (const auto& sample : pairSamples) {
  1516. NUdf::TUnboxedValue* pair = nullptr;
  1517. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(2U, pair);
  1518. pair[1] = NUdf::TUnboxedValuePod(sample.second);
  1519. NUdf::TUnboxedValue* keys = nullptr;
  1520. pair[0] = graph->GetHolderFactory().CreateDirectArrayHolder(2U, keys);
  1521. keys[0] = NUdf::TUnboxedValuePod(sample.first.first);
  1522. keys[1] = NUdf::TUnboxedValuePod::Embedded(sample.first.second);
  1523. }
  1524. const auto t1 = TInstant::Now();
  1525. const auto& value = graph->GetValue();
  1526. const auto t2 = TInstant::Now();
  1527. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  1528. const auto ptr = value.GetElements();
  1529. for (size_t i = 0ULL; i < expects.size(); ++i) {
  1530. const auto elements = ptr[i].GetElements();
  1531. two.emplace_back(std::make_pair(elements[0].GetElement(0).template Get<ui32>(), (elements[0].GetElements()[1]).AsStringRef()), std::array<double, 3U>{elements[1].template Get<double>(), elements[2].template Get<double>(), elements[3].template Get<double>()});
  1532. }
  1533. std::sort(two.begin(), two.end(), [](const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> l, const std::pair<std::pair<ui32, std::string_view>, std::array<double, 3U>> r){ return l.first < r.first; });
  1534. UNIT_ASSERT_VALUES_EQUAL(one, two);
  1535. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1536. }
  1537. Y_UNIT_TEST_LLVM(TestTpch) {
  1538. TSetup<LLVM> setup;
  1539. struct TPairHash { size_t operator()(const std::pair<std::string_view, std::string_view>& p) const { return CombineHashes(std::hash<std::string_view>()(p.first), std::hash<std::string_view>()(p.second)); } };
  1540. std::unordered_map<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>, TPairHash> expects;
  1541. const auto t = TInstant::Now();
  1542. for (auto& sample : TpchSamples) {
  1543. if (std::get<0U>(sample) <= border) {
  1544. const auto& ins = expects.emplace(std::pair<std::string_view, std::string_view>{std::get<1U>(sample), std::get<2U>(sample)}, std::pair<ui64, std::array<double, 5U>>{0ULL, {0., 0., 0., 0., 0.}});
  1545. auto& item = ins.first->second;
  1546. ++item.first;
  1547. std::get<0U>(item.second) += std::get<3U>(sample);
  1548. std::get<1U>(item.second) += std::get<5U>(sample);
  1549. std::get<2U>(item.second) += std::get<6U>(sample);
  1550. const auto v = std::get<3U>(sample) * (1. - std::get<5U>(sample));
  1551. std::get<3U>(item.second) += v;
  1552. std::get<4U>(item.second) += v * (1. + std::get<4U>(sample));
  1553. }
  1554. }
  1555. for (auto& item : expects) {
  1556. std::get<1U>(item.second.second) /= item.second.first;
  1557. }
  1558. const auto cppTime = TInstant::Now() - t;
  1559. std::vector<std::pair<std::pair<std::string, std::string>, std::pair<ui64, std::array<double, 5U>>>> one, two;
  1560. one.reserve(expects.size());
  1561. two.reserve(expects.size());
  1562. one.insert(one.cend(), expects.cbegin(), expects.cend());
  1563. std::sort(one.begin(), one.end(), [](const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> l, const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> r){ return l.first < r.first; });
  1564. TProgramBuilder& pb = *setup.PgmBuilder;
  1565. const auto listType = pb.NewListType(pb.NewTupleType({
  1566. pb.NewDataType(NUdf::TDataType<ui64>::Id),
  1567. pb.NewDataType(NUdf::TDataType<const char*>::Id),
  1568. pb.NewDataType(NUdf::TDataType<const char*>::Id),
  1569. pb.NewDataType(NUdf::TDataType<double>::Id),
  1570. pb.NewDataType(NUdf::TDataType<double>::Id),
  1571. pb.NewDataType(NUdf::TDataType<double>::Id),
  1572. pb.NewDataType(NUdf::TDataType<double>::Id)
  1573. }));
  1574. const auto list = TCallableBuilder(pb.GetTypeEnvironment(), "TestList", listType).Build();
  1575. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.WideLastCombiner(
  1576. pb.WideFilter(pb.ExpandMap(pb.ToFlow(TRuntimeNode(list, false)),
  1577. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U), pb.Nth(item, 3U), pb.Nth(item, 4U), pb.Nth(item, 5U), pb.Nth(item, 6U)}; }),
  1578. [&](TRuntimeNode::TList items) { return pb.AggrLessOrEqual(items.front(), pb.NewDataLiteral<ui64>(border)); }
  1579. ),
  1580. [&](TRuntimeNode::TList item) -> TRuntimeNode::TList { return {item[1U], item[2U]}; },
  1581. [&](TRuntimeNode::TList, TRuntimeNode::TList items) -> TRuntimeNode::TList {
  1582. const auto price = items[3U];
  1583. const auto disco = items[5U];
  1584. const auto v = pb.Mul(price, pb.Sub(pb.NewDataLiteral<double>(1.), disco));
  1585. return {pb.NewDataLiteral<ui64>(1ULL), price, disco, items[6U], v, pb.Mul(v, pb.Add(pb.NewDataLiteral<double>(1.), items[4U]))};
  1586. },
  1587. [&](TRuntimeNode::TList, TRuntimeNode::TList items, TRuntimeNode::TList state) -> TRuntimeNode::TList {
  1588. const auto price = items[3U];
  1589. const auto disco = items[5U];
  1590. const auto v = pb.Mul(price, pb.Sub(pb.NewDataLiteral<double>(1.), disco));
  1591. return {pb.Increment(state[0U]), pb.AggrAdd(state[1U], price), pb.AggrAdd(state[2U], disco), pb.AggrAdd(state[3U], items[6U]), pb.AggrAdd(state[4U], v), pb.AggrAdd(state[5U], pb.Mul(v, pb.Add(pb.NewDataLiteral<double>(1.), items[4U])))};
  1592. },
  1593. [&](TRuntimeNode::TList key, TRuntimeNode::TList state) -> TRuntimeNode::TList { return {key.front(), key.back(), state[0U], state[1U], pb.Div(state[2U], state[0U]), state[3U], state[4U], state[5U]}; }),
  1594. [&](TRuntimeNode::TList items) { return pb.NewTuple(items); }
  1595. ));
  1596. const auto graph = setup.BuildGraph(pgmReturn, {list});
  1597. NUdf::TUnboxedValue* items = nullptr;
  1598. graph->GetEntryPoint(0, true)->SetValue(graph->GetContext(), graph->GetHolderFactory().CreateDirectArrayHolder(TpchSamples.size(), items));
  1599. for (const auto& sample : TpchSamples) {
  1600. NUdf::TUnboxedValue* elements = nullptr;
  1601. *items++ = graph->GetHolderFactory().CreateDirectArrayHolder(7U, elements);
  1602. elements[0] = NUdf::TUnboxedValuePod(std::get<0U>(sample));
  1603. elements[1] = NUdf::TUnboxedValuePod::Embedded(std::get<1U>(sample));
  1604. elements[2] = NUdf::TUnboxedValuePod::Embedded(std::get<2U>(sample));
  1605. elements[3] = NUdf::TUnboxedValuePod(std::get<3U>(sample));
  1606. elements[4] = NUdf::TUnboxedValuePod(std::get<4U>(sample));
  1607. elements[5] = NUdf::TUnboxedValuePod(std::get<5U>(sample));
  1608. elements[6] = NUdf::TUnboxedValuePod(std::get<6U>(sample));
  1609. }
  1610. const auto t1 = TInstant::Now();
  1611. const auto& value = graph->GetValue();
  1612. const auto t2 = TInstant::Now();
  1613. UNIT_ASSERT_VALUES_EQUAL(value.GetListLength(), expects.size());
  1614. const auto ptr = value.GetElements();
  1615. for (size_t i = 0ULL; i < expects.size(); ++i) {
  1616. const auto elements = ptr[i].GetElements();
  1617. two.emplace_back(std::make_pair(elements[0].AsStringRef(), elements[1].AsStringRef()), std::pair<ui64, std::array<double, 5U>>{elements[2].template Get<ui64>(), {elements[3].template Get<double>(), elements[4].template Get<double>(), elements[5].template Get<double>(), elements[6].template Get<double>(), elements[7].template Get<double>()}});
  1618. }
  1619. std::sort(two.begin(), two.end(), [](const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> l, const std::pair<std::pair<std::string_view, std::string_view>, std::pair<ui64, std::array<double, 5U>>> r){ return l.first < r.first; });
  1620. UNIT_ASSERT_VALUES_EQUAL(one, two);
  1621. Cerr << "Runtime is " << t2 - t1 << " vs C++ " << cppTime << Endl;
  1622. }
  1623. }
  1624. #endif
  1625. }
  1626. }