mkql_block_compress_ut.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. #include "mkql_computation_node_ut.h"
  2. #include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
  3. #include <yql/essentials/minikql/computation/mkql_block_builder.h>
  4. #include <util/random/random.h>
  5. namespace NKikimr {
  6. namespace NMiniKQL {
  7. namespace {
  8. template<bool UseRandom, bool DoFilter, bool LLVM>
  9. void DoNestedTuplesCompressTest() {
  10. TSetup<LLVM> setup;
  11. TProgramBuilder& pb = *setup.PgmBuilder;
  12. const auto ui64Type = pb.NewDataType(NUdf::TDataType<ui64>::Id);
  13. const auto boolType = pb.NewDataType(NUdf::TDataType<bool>::Id);
  14. const auto utf8Type = pb.NewDataType(NUdf::EDataSlot::Utf8);
  15. const auto innerTupleType = pb.NewTupleType({ui64Type, boolType, utf8Type});
  16. const auto outerTupleType = pb.NewTupleType({ui64Type, innerTupleType, utf8Type});
  17. const auto finalTupleType = pb.NewTupleType({ui64Type, outerTupleType, boolType});
  18. const auto resultTupleType = pb.NewTupleType({ui64Type, outerTupleType});
  19. TRuntimeNode::TList items;
  20. static_assert(MaxBlockSizeInBytes % 4 == 0);
  21. constexpr size_t fixedStrSize = MaxBlockSizeInBytes / 4;
  22. if constexpr (UseRandom) {
  23. SetRandomSeed(0);
  24. }
  25. for (size_t i = 0; i < 95; ++i) {
  26. std::string str;
  27. bool filterValue;
  28. if constexpr (UseRandom) {
  29. size_t len = RandomNumber<size_t>(2 * MaxBlockSizeInBytes);
  30. str.reserve(len);
  31. for (size_t i = 0; i < len; ++i) {
  32. str.push_back((char)RandomNumber<ui8>(128));
  33. }
  34. if constexpr (DoFilter) {
  35. filterValue = RandomNumber<ui8>() & 1;
  36. } else {
  37. filterValue = true;
  38. }
  39. } else {
  40. str = std::string(fixedStrSize, ' ' + i);
  41. if constexpr (DoFilter) {
  42. filterValue = (i % 4) < 2;
  43. } else {
  44. filterValue = true;
  45. }
  46. }
  47. const auto innerTuple = pb.NewTuple(innerTupleType, {
  48. pb.NewDataLiteral<ui64>(i),
  49. pb.NewDataLiteral<bool>(i % 2),
  50. pb.NewDataLiteral<NUdf::EDataSlot::Utf8>((i % 2) ? str : std::string()),
  51. });
  52. const auto outerTuple = pb.NewTuple(outerTupleType, {
  53. pb.NewDataLiteral<ui64>(i),
  54. innerTuple,
  55. pb.NewDataLiteral<NUdf::EDataSlot::Utf8>((i % 2) ? std::string() : str),
  56. });
  57. const auto finalTuple = pb.NewTuple(finalTupleType, {
  58. pb.NewDataLiteral<ui64>(i),
  59. outerTuple,
  60. pb.NewDataLiteral(filterValue),
  61. });
  62. items.push_back(finalTuple);
  63. }
  64. const auto list = pb.NewList(finalTupleType, std::move(items));
  65. auto node = pb.ToFlow(list);
  66. node = pb.ExpandMap(node, [&](TRuntimeNode item) -> TRuntimeNode::TList {
  67. return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)};
  68. });
  69. node = pb.WideToBlocks(node);
  70. node = pb.BlockExpandChunked(node);
  71. node = pb.WideSkipBlocks(node, pb.template NewDataLiteral<ui64>(19));
  72. node = pb.BlockCompress(node, 2);
  73. node = pb.ToFlow(pb.WideFromBlocks(pb.FromFlow(node)));
  74. node = pb.NarrowMap(node, [&](TRuntimeNode::TList items) -> TRuntimeNode {
  75. return pb.NewTuple(resultTupleType, {items[0], items[1]});
  76. });
  77. const auto pgmReturn = pb.Collect(node);
  78. const auto graph = setup.BuildGraph(pgmReturn);
  79. const auto iterator = graph->GetValue().GetListIterator();
  80. if constexpr (UseRandom) {
  81. SetRandomSeed(0);
  82. }
  83. for (size_t i = 0; i < 95; ++i) {
  84. std::string str;
  85. bool filterValue;
  86. if constexpr (UseRandom) {
  87. size_t len = RandomNumber<size_t>(2 * MaxBlockSizeInBytes);
  88. str.reserve(len);
  89. for (size_t i = 0; i < len; ++i) {
  90. str.push_back((char)RandomNumber<ui8>(128));
  91. }
  92. if constexpr (DoFilter) {
  93. filterValue = RandomNumber<ui8>() & 1;
  94. } else {
  95. filterValue = true;
  96. }
  97. } else {
  98. str = std::string(fixedStrSize, ' ' + i);
  99. if constexpr (DoFilter) {
  100. filterValue = (i % 4) < 2;
  101. } else {
  102. filterValue = true;
  103. }
  104. }
  105. if (i < 19 || !filterValue) {
  106. continue;
  107. }
  108. NUdf::TUnboxedValue item;
  109. UNIT_ASSERT(iterator.Next(item));
  110. ui64 topNum = item.GetElement(0).Get<ui64>();
  111. const auto& outer = item.GetElement(1);
  112. ui64 num = outer.GetElement(0).Get<ui64>();
  113. const auto& inner = outer.GetElement(1);
  114. auto outerStrVal = outer.GetElement(2);
  115. std::string_view outerStr = outerStrVal.AsStringRef();
  116. ui64 innerNum = inner.GetElement(0).Get<ui64>();
  117. bool innerBool = inner.GetElement(1).Get<bool>();
  118. auto innerStrVal = inner.GetElement(2);
  119. std::string_view innerStr = innerStrVal.AsStringRef();
  120. UNIT_ASSERT_VALUES_EQUAL(num, i);
  121. UNIT_ASSERT_VALUES_EQUAL(topNum, i);
  122. UNIT_ASSERT_VALUES_EQUAL(innerNum, i);
  123. UNIT_ASSERT_VALUES_EQUAL(innerBool, i % 2);
  124. std::string expectedInner = (i % 2) ? str : std::string();
  125. std::string expectedOuter = (i % 2) ? std::string() : str;
  126. UNIT_ASSERT(innerStr == expectedInner);
  127. UNIT_ASSERT(outerStr == expectedOuter);
  128. }
  129. NUdf::TUnboxedValue item;
  130. UNIT_ASSERT(!iterator.Next(item));
  131. UNIT_ASSERT(!iterator.Next(item));
  132. }
  133. } //namespace
  134. Y_UNIT_TEST_SUITE(TMiniKQLBlockCompressTest) {
  135. Y_UNIT_TEST_LLVM(CompressBasic) {
  136. TSetup<LLVM> setup;
  137. TProgramBuilder& pb = *setup.PgmBuilder;
  138. const auto ui64Type = pb.NewDataType(NUdf::TDataType<ui64>::Id);
  139. const auto boolType = pb.NewDataType(NUdf::TDataType<bool>::Id);
  140. const auto tupleType = pb.NewTupleType({boolType, ui64Type, boolType});
  141. const auto data1 = pb.NewTuple(tupleType, {pb.NewDataLiteral(false), pb.NewDataLiteral<ui64>(1ULL), pb.NewDataLiteral(true)});
  142. const auto data2 = pb.NewTuple(tupleType, {pb.NewDataLiteral(true), pb.NewDataLiteral<ui64>(2ULL), pb.NewDataLiteral(false)});
  143. const auto data3 = pb.NewTuple(tupleType, {pb.NewDataLiteral(false), pb.NewDataLiteral<ui64>(3ULL), pb.NewDataLiteral(true)});
  144. const auto data4 = pb.NewTuple(tupleType, {pb.NewDataLiteral(false), pb.NewDataLiteral<ui64>(4ULL), pb.NewDataLiteral(true)});
  145. const auto data5 = pb.NewTuple(tupleType, {pb.NewDataLiteral(true), pb.NewDataLiteral<ui64>(5ULL), pb.NewDataLiteral(false)});
  146. const auto data6 = pb.NewTuple(tupleType, {pb.NewDataLiteral(true), pb.NewDataLiteral<ui64>(6ULL), pb.NewDataLiteral(true)});
  147. const auto data7 = pb.NewTuple(tupleType, {pb.NewDataLiteral(false), pb.NewDataLiteral<ui64>(7ULL), pb.NewDataLiteral(true)});
  148. const auto list = pb.NewList(tupleType, {data1, data2, data3, data4, data5, data6, data7});
  149. const auto flow = pb.ToFlow(list);
  150. const auto wideFlow = pb.ExpandMap(flow, [&](TRuntimeNode item) -> TRuntimeNode::TList {
  151. return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)};
  152. });
  153. const auto compressedBlocks = pb.BlockCompress(pb.WideToBlocks(wideFlow), 0);
  154. const auto compressedFlow = pb.ToFlow(pb.WideFromBlocks(pb.FromFlow(compressedBlocks)));
  155. const auto narrowFlow = pb.NarrowMap(compressedFlow, [&](TRuntimeNode::TList items) -> TRuntimeNode {
  156. return pb.NewTuple({items[0], items[1]});
  157. });
  158. const auto pgmReturn = pb.Collect(narrowFlow);
  159. const auto graph = setup.BuildGraph(pgmReturn);
  160. const auto res = graph->GetValue();
  161. const auto iterator = res.GetListIterator();
  162. NUdf::TUnboxedValue item;
  163. UNIT_ASSERT(iterator.Next(item));
  164. UNIT_ASSERT_VALUES_EQUAL(item.GetElement(0).Get<ui64>(), 2);
  165. UNIT_ASSERT_VALUES_EQUAL(item.GetElement(1).Get<bool>(), false);
  166. UNIT_ASSERT(iterator.Next(item));
  167. UNIT_ASSERT_VALUES_EQUAL(item.GetElement(0).Get<ui64>(), 5);
  168. UNIT_ASSERT_VALUES_EQUAL(item.GetElement(1).Get<bool>(), false);
  169. UNIT_ASSERT(iterator.Next(item));
  170. UNIT_ASSERT_VALUES_EQUAL(item.GetElement(0).Get<ui64>(), 6);
  171. UNIT_ASSERT_VALUES_EQUAL(item.GetElement(1).Get<bool>(), true);
  172. UNIT_ASSERT(!iterator.Next(item));
  173. UNIT_ASSERT(!iterator.Next(item));
  174. }
  175. Y_UNIT_TEST_LLVM(CompressNestedTuples) {
  176. DoNestedTuplesCompressTest<false, false, LLVM>();
  177. }
  178. Y_UNIT_TEST_LLVM(CompressNestedTuplesWithFilter) {
  179. DoNestedTuplesCompressTest<false, true, LLVM>();
  180. }
  181. Y_UNIT_TEST_LLVM(CompressNestedTuplesWithRandom) {
  182. DoNestedTuplesCompressTest<true, false, LLVM>();
  183. }
  184. Y_UNIT_TEST_LLVM(CompressNestedTuplesWithRandomWithFilter) {
  185. DoNestedTuplesCompressTest<true, true, LLVM>();
  186. }
  187. }
  188. } // namespace NMiniKQL
  189. } // namespace NKikimr