mkql_block_trimmer_ut.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. #include "mkql_block_trimmer.h"
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <yql/essentials/public/udf/arrow/block_builder.h>
  4. #include <yql/essentials/public/udf/arrow/block_reader.h>
  5. #include <yql/essentials/public/udf/arrow/memory_pool.h>
  6. #include <yql/essentials/minikql/mkql_type_builder.h>
  7. #include <yql/essentials/minikql/mkql_function_registry.h>
  8. #include <yql/essentials/minikql/mkql_program_builder.h>
  9. #include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
  10. using namespace NYql::NUdf;
  11. using namespace NKikimr;
  12. struct TBlockTrimmerTestData {
  13. TBlockTrimmerTestData()
  14. : FunctionRegistry(NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry()))
  15. , Alloc(__LOCATION__)
  16. , Env(Alloc)
  17. , PgmBuilder(Env, *FunctionRegistry)
  18. , MemInfo("Memory")
  19. , ArrowPool(GetYqlMemoryPool())
  20. {
  21. }
  22. TIntrusivePtr<NMiniKQL::IFunctionRegistry> FunctionRegistry;
  23. NMiniKQL::TScopedAlloc Alloc;
  24. NMiniKQL::TTypeEnvironment Env;
  25. NMiniKQL::TProgramBuilder PgmBuilder;
  26. NMiniKQL::TMemoryUsageInfo MemInfo;
  27. arrow::MemoryPool* const ArrowPool;
  28. };
  29. Y_UNIT_TEST_SUITE(TBlockTrimmerTest) {
  30. Y_UNIT_TEST(TestFixedSize) {
  31. TBlockTrimmerTestData data;
  32. const auto int64Type = data.PgmBuilder.NewDataType(NUdf::EDataSlot::Int64, false);
  33. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(int64Type);
  34. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  35. Y_ENSURE(blockLen > 8);
  36. constexpr auto testSize = NMiniKQL::MaxBlockSizeInBytes / sizeof(i64);
  37. constexpr auto sliceSize = 1024;
  38. static_assert(testSize % sliceSize == 0);
  39. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), int64Type, *data.ArrowPool, blockLen, nullptr);
  40. auto reader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), int64Type);
  41. auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), int64Type, data.ArrowPool);
  42. for (size_t i = 0; i < testSize; i++) {
  43. builder->Add(TBlockItem(i));
  44. }
  45. auto datum = builder->Build(true);
  46. Y_ENSURE(datum.is_array());
  47. auto array = datum.array();
  48. for (size_t sliceIdx = 0; sliceIdx < testSize / sliceSize; sliceIdx++) {
  49. auto slice = Chop(array, sliceSize);
  50. auto trimmedSlice = trimmer->Trim(slice);
  51. for (size_t elemIdx = 0; elemIdx < sliceSize; elemIdx++) {
  52. TBlockItem lhs = reader->GetItem(*slice, elemIdx);
  53. TBlockItem rhs = reader->GetItem(*trimmedSlice, elemIdx);
  54. UNIT_ASSERT_VALUES_EQUAL_C(lhs.Get<i64>(), rhs.Get<i64>(), "Expected the same data after trim");
  55. }
  56. }
  57. }
  58. Y_UNIT_TEST(TestString) {
  59. TBlockTrimmerTestData data;
  60. const auto stringType = data.PgmBuilder.NewDataType(NUdf::EDataSlot::String, false);
  61. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(stringType);
  62. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  63. Y_ENSURE(blockLen > 8);
  64. // To fit all strings into single block
  65. constexpr auto testSize = 512;
  66. constexpr auto sliceSize = 128;
  67. static_assert(testSize % sliceSize == 0);
  68. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), stringType, *data.ArrowPool, blockLen, nullptr);
  69. auto reader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), stringType);
  70. auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), stringType, data.ArrowPool);
  71. std::string testString;
  72. testString.resize(testSize);
  73. for (size_t i = 0; i < testSize; i++) {
  74. testString[i] = static_cast<char>(i);
  75. if (i % 2) {
  76. builder->Add(TBlockItem(TStringRef(testString.data(), i + 1)));
  77. } else {
  78. // Empty string
  79. builder->Add(TBlockItem(TStringRef()));
  80. }
  81. }
  82. auto datum = builder->Build(true);
  83. Y_ENSURE(datum.is_array());
  84. auto array = datum.array();
  85. for (size_t sliceIdx = 0; sliceIdx < testSize / sliceSize; sliceIdx++) {
  86. auto slice = Chop(array, sliceSize);
  87. auto trimmedSlice = trimmer->Trim(slice);
  88. for (size_t elemIdx = 0; elemIdx < sliceSize; elemIdx++) {
  89. TBlockItem lhs = reader->GetItem(*slice, elemIdx);
  90. TBlockItem rhs = reader->GetItem(*trimmedSlice, elemIdx);
  91. UNIT_ASSERT_VALUES_EQUAL_C(lhs.AsStringRef(), rhs.AsStringRef(), "Expected the same data after trim");
  92. }
  93. }
  94. }
  95. Y_UNIT_TEST(TestOptional) {
  96. TBlockTrimmerTestData data;
  97. const auto optionalInt64Type = data.PgmBuilder.NewDataType(NUdf::EDataSlot::Int64, true);
  98. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(optionalInt64Type);
  99. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  100. Y_ENSURE(blockLen > 8);
  101. constexpr auto testSize = NMiniKQL::MaxBlockSizeInBytes / sizeof(i64);
  102. constexpr auto sliceSize = 1024;
  103. static_assert(testSize % sliceSize == 0);
  104. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), optionalInt64Type, *data.ArrowPool, blockLen, nullptr);
  105. auto reader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), optionalInt64Type);
  106. auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), optionalInt64Type, data.ArrowPool);
  107. for (size_t i = 0; i < testSize; i++) {
  108. if (i % 2) {
  109. builder->Add(TBlockItem());
  110. } else {
  111. builder->Add(TBlockItem(i));
  112. }
  113. }
  114. auto datum = builder->Build(true);
  115. Y_ENSURE(datum.is_array());
  116. auto array = datum.array();
  117. for (size_t sliceIdx = 0; sliceIdx < testSize / sliceSize; sliceIdx++) {
  118. auto slice = Chop(array, sliceSize);
  119. auto trimmedSlice = trimmer->Trim(slice);
  120. for (size_t elemIdx = 0; elemIdx < sliceSize; elemIdx++) {
  121. TBlockItem lhs = reader->GetItem(*slice, elemIdx);
  122. TBlockItem rhs = reader->GetItem(*trimmedSlice, elemIdx);
  123. UNIT_ASSERT_VALUES_EQUAL_C(bool(lhs), bool(rhs), "Expected the same optionality after trim");
  124. if (lhs) {
  125. UNIT_ASSERT_VALUES_EQUAL_C(lhs.Get<i64>(), rhs.Get<i64>(), "Expected the same data after trim");
  126. }
  127. }
  128. }
  129. }
  130. Y_UNIT_TEST(TestExternalOptional) {
  131. TBlockTrimmerTestData data;
  132. const auto doubleOptInt64Type = data.PgmBuilder.NewOptionalType(data.PgmBuilder.NewDataType(NUdf::EDataSlot::Int64, true));
  133. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(doubleOptInt64Type);
  134. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  135. Y_ENSURE(blockLen > 8);
  136. constexpr auto testSize = NMiniKQL::MaxBlockSizeInBytes / sizeof(i64);
  137. constexpr auto sliceSize = 1024;
  138. static_assert(testSize % sliceSize == 0);
  139. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), doubleOptInt64Type, *data.ArrowPool, blockLen, nullptr);
  140. auto reader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), doubleOptInt64Type);
  141. auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), doubleOptInt64Type, data.ArrowPool);
  142. for (size_t i = 0; i < testSize; i++) {
  143. if (i % 2) {
  144. builder->Add(TBlockItem(i).MakeOptional());
  145. } else if (i % 4) {
  146. builder->Add(TBlockItem());
  147. } else {
  148. builder->Add(TBlockItem().MakeOptional());
  149. }
  150. }
  151. auto datum = builder->Build(true);
  152. Y_ENSURE(datum.is_array());
  153. auto array = datum.array();
  154. for (size_t sliceIdx = 0; sliceIdx < testSize / sliceSize; sliceIdx++) {
  155. auto slice = Chop(array, sliceSize);
  156. auto trimmedSlice = trimmer->Trim(slice);
  157. for (size_t elemIdx = 0; elemIdx < sliceSize; elemIdx++) {
  158. TBlockItem lhs = reader->GetItem(*slice, elemIdx);
  159. TBlockItem rhs = reader->GetItem(*trimmedSlice, elemIdx);
  160. for (size_t i = 0; i < 2; i++) {
  161. UNIT_ASSERT_VALUES_EQUAL_C(bool(lhs), bool(rhs), "Expected the same optionality after trim");
  162. if (!lhs) {
  163. break;
  164. }
  165. lhs = lhs.GetOptionalValue();
  166. rhs = rhs.GetOptionalValue();
  167. }
  168. if (lhs) {
  169. UNIT_ASSERT_VALUES_EQUAL_C(lhs.Get<i64>(), rhs.Get<i64>(), "Expected the same data after trim");
  170. }
  171. }
  172. }
  173. }
  174. Y_UNIT_TEST(TestTuple) {
  175. TBlockTrimmerTestData data;
  176. std::vector<NMiniKQL::TType*> types;
  177. types.push_back(data.PgmBuilder.NewDataType(NUdf::EDataSlot::Int64));
  178. types.push_back(data.PgmBuilder.NewDataType(NUdf::EDataSlot::String));
  179. types.push_back(data.PgmBuilder.NewDataType(NUdf::EDataSlot::Int64, true));
  180. const auto tupleType = data.PgmBuilder.NewTupleType(types);
  181. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(tupleType);
  182. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  183. Y_ENSURE(blockLen > 8);
  184. // To fit all strings into single block
  185. constexpr auto testSize = 512;
  186. constexpr auto sliceSize = 128;
  187. static_assert(testSize % sliceSize == 0);
  188. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), tupleType, *data.ArrowPool, blockLen, nullptr);
  189. auto reader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), tupleType);
  190. auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), tupleType, data.ArrowPool);
  191. std::string testString;
  192. testString.resize(testSize);
  193. std::vector<TBlockItem*> testTuples(testSize);
  194. for (size_t i = 0; i < testSize; i++) {
  195. testString[i] = static_cast<char>(i);
  196. TBlockItem* tupleItems = new TBlockItem[3];
  197. testTuples.push_back(tupleItems);
  198. tupleItems[0] = TBlockItem(i);
  199. tupleItems[1] = TBlockItem(TStringRef(testString.data(), i + 1));
  200. tupleItems[2] = i % 2 ? TBlockItem(i) : TBlockItem();
  201. builder->Add(TBlockItem(tupleItems));
  202. }
  203. auto datum = builder->Build(true);
  204. Y_ENSURE(datum.is_array());
  205. auto array = datum.array();
  206. for (size_t sliceIdx = 0; sliceIdx < testSize / sliceSize; sliceIdx++) {
  207. auto slice = Chop(array, sliceSize);
  208. auto trimmedSlice = trimmer->Trim(slice);
  209. for (size_t elemIdx = 0; elemIdx < sliceSize; elemIdx++) {
  210. TBlockItem lhs = reader->GetItem(*slice, elemIdx);
  211. TBlockItem rhs = reader->GetItem(*trimmedSlice, elemIdx);
  212. UNIT_ASSERT_VALUES_EQUAL_C(lhs.GetElement(0).Get<i64>(), rhs.GetElement(0).Get<i64>(), "Expected the same data after trim");
  213. UNIT_ASSERT_VALUES_EQUAL_C(lhs.GetElement(1).AsStringRef(), rhs.GetElement(1).AsStringRef(), "Expected the same data after trim");
  214. UNIT_ASSERT_VALUES_EQUAL_C(bool(lhs.GetElement(2)), bool(rhs.GetElement(2)), "Expected the same optionality after trim");
  215. if (bool(lhs.GetElement(2))) {
  216. UNIT_ASSERT_VALUES_EQUAL_C(lhs.GetElement(2).Get<i64>(), rhs.GetElement(2).Get<i64>(), "Expected the same data after trim");
  217. }
  218. }
  219. }
  220. for (auto tupleItems : testTuples) {
  221. delete[] tupleItems;
  222. }
  223. }
  224. Y_UNIT_TEST(TestTzDate) {
  225. TBlockTrimmerTestData data;
  226. using TDtLayout = TDataType<TTzDatetime>::TLayout;
  227. const auto tzDatetimeType = data.PgmBuilder.NewDataType(NUdf::EDataSlot::TzDatetime, false);
  228. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(tzDatetimeType);
  229. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  230. Y_ENSURE(blockLen > 8);
  231. constexpr auto testSize = NMiniKQL::MaxBlockSizeInBytes / (sizeof(TDtLayout) + sizeof(ui16));
  232. constexpr auto sliceSize = 1024;
  233. static_assert(testSize % sliceSize == 0);
  234. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), tzDatetimeType, *data.ArrowPool, blockLen, nullptr);
  235. auto reader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), tzDatetimeType);
  236. auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), tzDatetimeType, data.ArrowPool);
  237. for (size_t i = 0; i < testSize; i++) {
  238. TBlockItem dt = TBlockItem(i);
  239. dt.SetTimezoneId(i * 2);
  240. builder->Add(dt);
  241. }
  242. auto datum = builder->Build(true);
  243. Y_ENSURE(datum.is_array());
  244. auto array = datum.array();
  245. for (size_t sliceIdx = 0; sliceIdx < testSize / sliceSize; sliceIdx++) {
  246. auto slice = Chop(array, sliceSize);
  247. auto trimmedSlice = trimmer->Trim(slice);
  248. for (size_t elemIdx = 0; elemIdx < sliceSize; elemIdx++) {
  249. TBlockItem lhs = reader->GetItem(*slice, elemIdx);
  250. TBlockItem rhs = reader->GetItem(*trimmedSlice, elemIdx);
  251. UNIT_ASSERT_VALUES_EQUAL_C(lhs.Get<TDtLayout>(), rhs.Get<TDtLayout>(), "Expected the same data after trim");
  252. UNIT_ASSERT_VALUES_EQUAL_C(lhs.GetTimezoneId(), rhs.GetTimezoneId(), "Expected the same data after trim");
  253. }
  254. }
  255. }
  256. extern const char ResourceName[] = "Resource.Name";
  257. Y_UNIT_TEST(TestResource) {
  258. TBlockTrimmerTestData data;
  259. const auto resourceType = data.PgmBuilder.NewResourceType(ResourceName);
  260. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(resourceType);
  261. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  262. Y_ENSURE(blockLen > 8);
  263. constexpr auto testSize = NMiniKQL::MaxBlockSizeInBytes / sizeof(TUnboxedValue);
  264. constexpr auto sliceSize = 1024;
  265. static_assert(testSize % sliceSize == 0);
  266. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), resourceType, *data.ArrowPool, blockLen, nullptr);
  267. auto reader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  268. auto trimmer = MakeBlockTrimmer(NMiniKQL::TTypeInfoHelper(), resourceType, data.ArrowPool);
  269. struct TWithDtor {
  270. int Payload;
  271. std::shared_ptr<int> DestructorCallsCnt;
  272. TWithDtor(int payload, std::shared_ptr<int> destructorCallsCnt):
  273. Payload(payload), DestructorCallsCnt(std::move(destructorCallsCnt)) {
  274. }
  275. ~TWithDtor() {
  276. *DestructorCallsCnt = *DestructorCallsCnt + 1;
  277. }
  278. };
  279. using TTestResource = TBoxedResource<TWithDtor, ResourceName>;
  280. auto destructorCallsCnt = std::make_shared<int>(0);
  281. {
  282. for (size_t i = 0; i < testSize; i++) {
  283. builder->Add(TUnboxedValuePod(new TTestResource(i, destructorCallsCnt)));
  284. }
  285. auto datum = builder->Build(true);
  286. Y_ENSURE(datum.is_array());
  287. auto array = datum.array();
  288. for (size_t sliceIdx = 0; sliceIdx < testSize / sliceSize; sliceIdx++) {
  289. auto slice = Chop(array, sliceSize);
  290. auto trimmedSlice = trimmer->Trim(slice);
  291. for (size_t elemIdx = 0; elemIdx < sliceSize; elemIdx++) {
  292. TBlockItem lhs = reader->GetItem(*slice, elemIdx);
  293. TBlockItem rhs = reader->GetItem(*trimmedSlice, elemIdx);
  294. auto lhsResource = reinterpret_cast<TTestResource*>(lhs.GetBoxed().Get());
  295. auto rhsResource = reinterpret_cast<TTestResource*>(rhs.GetBoxed().Get());
  296. UNIT_ASSERT_VALUES_EQUAL_C(lhsResource->Get()->Payload, rhsResource->Get()->Payload, "Expected the same data after trim");
  297. }
  298. }
  299. }
  300. UNIT_ASSERT_VALUES_EQUAL_C(*destructorCallsCnt, testSize, "Expected 1 call to resource destructor");
  301. }
  302. }