array_builder_ut.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. #include <library/cpp/testing/unittest/registar.h>
  2. #include <yql/essentials/public/udf/arrow/block_builder.h>
  3. #include <yql/essentials/public/udf/arrow/block_reader.h>
  4. #include <yql/essentials/public/udf/arrow/memory_pool.h>
  5. #include <yql/essentials/minikql/mkql_type_builder.h>
  6. #include <yql/essentials/minikql/mkql_function_registry.h>
  7. #include <yql/essentials/minikql/mkql_program_builder.h>
  8. #include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
  9. using namespace NYql::NUdf;
  10. using namespace NKikimr;
  11. constexpr size_t MAX_BLOCK_SIZE = 240_KB;
  12. struct TArrayBuilderTestData {
  13. TArrayBuilderTestData()
  14. : FunctionRegistry(NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry()))
  15. , Alloc(__LOCATION__)
  16. , Env(Alloc)
  17. , PgmBuilder(Env, *FunctionRegistry)
  18. , MemInfo("Memory")
  19. , ArrowPool(GetYqlMemoryPool())
  20. {
  21. }
  22. TIntrusivePtr<NMiniKQL::IFunctionRegistry> FunctionRegistry;
  23. NMiniKQL::TScopedAlloc Alloc;
  24. NMiniKQL::TTypeEnvironment Env;
  25. NMiniKQL::TProgramBuilder PgmBuilder;
  26. NMiniKQL::TMemoryUsageInfo MemInfo;
  27. arrow::MemoryPool* const ArrowPool;
  28. };
  29. std::unique_ptr<IArrayBuilder> MakeResourceArrayBuilder(TType* resourceType, TArrayBuilderTestData& data) {
  30. auto arrayBuilder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), resourceType,
  31. *data.ArrowPool, MAX_BLOCK_SIZE, /* pgBuilder */nullptr);
  32. UNIT_ASSERT_C(arrayBuilder, "Failed to make resource arrow array builder");
  33. return arrayBuilder;
  34. }
  35. Y_UNIT_TEST_SUITE(TArrayBuilderTest) {
  36. Y_UNIT_TEST(TestEmbeddedResourceBuilder) {
  37. TArrayBuilderTestData data;
  38. const auto resourceType = data.PgmBuilder.NewResourceType("Test.Resource");
  39. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  40. auto resource = TUnboxedValuePod::Embedded("testtest");
  41. arrayBuilder->Add(resource);
  42. auto datum = arrayBuilder->Build(true);
  43. UNIT_ASSERT(datum.is_array());
  44. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 1);
  45. auto value = datum.array()->GetValues<TUnboxedValue>(1)[0];
  46. UNIT_ASSERT(value.IsEmbedded());
  47. UNIT_ASSERT_VALUES_EQUAL_C(TStringRef(value.AsStringRef()), TStringRef(resource.AsStringRef()),
  48. "Expected equal values after building array");
  49. }
  50. extern const char ResourceName[] = "Resource.Name";
  51. Y_UNIT_TEST(TestDtorCall) {
  52. TArrayBuilderTestData data;
  53. const auto resourceType = data.PgmBuilder.NewResourceType("Test.Resource");
  54. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  55. auto destructorCallsCnt = std::make_shared<int>(0);
  56. struct TWithDtor {
  57. int Payload;
  58. std::shared_ptr<int> DestructorCallsCnt;
  59. TWithDtor(int payload, std::shared_ptr<int> destructorCallsCnt):
  60. Payload(payload), DestructorCallsCnt(std::move(destructorCallsCnt)) {
  61. }
  62. ~TWithDtor() {
  63. *DestructorCallsCnt = *DestructorCallsCnt + 1;
  64. }
  65. };
  66. int payload = 123;
  67. using TTestResource = TBoxedResource<std::shared_ptr<TWithDtor>, ResourceName>;
  68. auto resourcePtr = std::make_shared<TWithDtor>(payload, destructorCallsCnt);
  69. TUnboxedValuePod resource(new TTestResource(std::move(resourcePtr)));
  70. {
  71. arrayBuilder->Add(resource);
  72. auto datum = arrayBuilder->Build(true);
  73. UNIT_ASSERT(datum.is_array());
  74. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 1);
  75. const auto value = datum.array()->GetValues<TUnboxedValuePod>(1)[0];
  76. auto boxed = value.AsBoxed().Get();
  77. const auto resource = reinterpret_cast<TTestResource*>(boxed);
  78. UNIT_ASSERT_VALUES_EQUAL(resource->Get()->get()->Payload, payload);
  79. }
  80. UNIT_ASSERT_VALUES_EQUAL_C(*destructorCallsCnt, 1, "Expected 1 call to resource destructor");
  81. }
  82. Y_UNIT_TEST(TestBoxedResourceNullable) {
  83. TArrayBuilderTestData data;
  84. const auto resourceType = data.PgmBuilder.NewOptionalType(data.PgmBuilder.NewResourceType("Test.Resource"));
  85. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  86. struct TResourceItem {
  87. int Payload;
  88. };
  89. using TTestResource = TBoxedResource<TResourceItem, ResourceName>;
  90. for (int i = 0; i < 4; i++) {
  91. if ((i % 2) == 0) {
  92. TUnboxedValuePod resource(new TTestResource(TResourceItem{i}));
  93. arrayBuilder->Add(resource);
  94. } else {
  95. arrayBuilder->Add(TUnboxedValuePod{});
  96. }
  97. }
  98. auto datum = arrayBuilder->Build(true);
  99. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  100. for (int i = 0; i < 4; i++) {
  101. if ((i % 2) == 0) {
  102. auto item = blockReader->GetItem(*datum.array(), i);
  103. UNIT_ASSERT_C(item.HasValue(), "Expected not null");
  104. auto* resourcePtr = reinterpret_cast<TTestResource*>(item.GetBoxed().Get());
  105. UNIT_ASSERT_EQUAL(i, resourcePtr->Get()->Payload);
  106. } else {
  107. auto item = blockReader->GetItem(*datum.array(), i);
  108. UNIT_ASSERT(!item.HasValue());
  109. }
  110. }
  111. }
  112. Y_UNIT_TEST(TestBuilderWithReader) {
  113. TArrayBuilderTestData data;
  114. const auto resourceType = data.PgmBuilder.NewResourceType("Test.Resource");
  115. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  116. const auto item1 = TUnboxedValuePod::Embedded("1");
  117. arrayBuilder->Add(item1);
  118. const auto item2 = TUnboxedValuePod::Embedded("22");
  119. arrayBuilder->Add(item2);
  120. auto datum = arrayBuilder->Build(true);
  121. UNIT_ASSERT(datum.is_array());
  122. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 2);
  123. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  124. const auto item1AfterRead = blockReader->GetItem(*datum.array(), 0);
  125. const auto item2AfterRead = blockReader->GetItem(*datum.array(), 1);
  126. UNIT_ASSERT_C(std::memcmp(item1.GetRawPtr(), item1AfterRead.GetRawPtr(), sizeof(TBlockItem)) == 0, "Expected UnboxedValue to equal to BlockItem");
  127. UNIT_ASSERT_C(std::memcmp(item2.GetRawPtr(), item2AfterRead.GetRawPtr(), sizeof(TBlockItem)) == 0, "Expected UnboxedValue to equal to BlockItem");
  128. }
  129. Y_UNIT_TEST(TestBoxedResourceReader) {
  130. TArrayBuilderTestData data;
  131. const auto resourceType = data.PgmBuilder.NewResourceType(ResourceName);
  132. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  133. using TTestResource = TBoxedResource<int, ResourceName>;
  134. arrayBuilder->Add(TUnboxedValuePod(new TTestResource(11111111)));
  135. arrayBuilder->Add(TUnboxedValuePod(new TTestResource(22222222)));
  136. const auto datum = arrayBuilder->Build(true);
  137. UNIT_ASSERT(datum.is_array());
  138. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 2);
  139. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  140. const auto item1AfterRead = blockReader->GetItem(*datum.array(), 0);
  141. const auto item2AfterRead = blockReader->GetItem(*datum.array(), 1);
  142. auto boxed1 = item1AfterRead.GetBoxed().Get();
  143. const auto resource1 = reinterpret_cast<TTestResource*>(boxed1);
  144. UNIT_ASSERT_VALUES_EQUAL(*resource1->Get(), 11111111);
  145. UNIT_ASSERT_VALUES_EQUAL(resource1->GetResourceTag(), ResourceName);
  146. auto boxed2 = item2AfterRead.GetBoxed().Get();
  147. const auto resource2 = reinterpret_cast<TTestResource*>(boxed2);
  148. UNIT_ASSERT_VALUES_EQUAL(*resource2->Get(), 22222222);
  149. UNIT_ASSERT_VALUES_EQUAL(resource2->GetResourceTag(), ResourceName);
  150. }
  151. Y_UNIT_TEST(TestTzDateBuilder_Layout) {
  152. TArrayBuilderTestData data;
  153. const auto tzDateType = data.PgmBuilder.NewDataType(EDataSlot::TzDate);
  154. const auto arrayBuilder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), tzDateType,
  155. *data.ArrowPool, MAX_BLOCK_SIZE, /* pgBuilder */ nullptr);
  156. auto makeTzDate = [] (ui16 val, ui16 tz) {
  157. TUnboxedValuePod tzDate {val};
  158. tzDate.SetTimezoneId(tz);
  159. return tzDate;
  160. };
  161. TVector<TUnboxedValuePod> dates{makeTzDate(1234, 1), makeTzDate(1234, 2), makeTzDate(45678, 333)};
  162. for (auto date: dates) {
  163. arrayBuilder->Add(date);
  164. }
  165. const auto datum = arrayBuilder->Build(true);
  166. UNIT_ASSERT(datum.is_array());
  167. UNIT_ASSERT_VALUES_EQUAL(datum.length(), dates.size());
  168. const auto childData = datum.array()->child_data;
  169. UNIT_ASSERT_VALUES_EQUAL_C(childData.size(), 2, "Expected date and timezone children");
  170. }
  171. Y_UNIT_TEST(TestResourceStringValueBuilderReader) {
  172. TArrayBuilderTestData data;
  173. const auto resourceType = data.PgmBuilder.NewResourceType(ResourceName);
  174. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  175. arrayBuilder->Add(TUnboxedValuePod(TStringValue("test")));
  176. arrayBuilder->Add(TUnboxedValuePod(TStringValue("1234"), /* size */ 3, /* offset */ 1));
  177. const auto datum = arrayBuilder->Build(true);
  178. UNIT_ASSERT(datum.is_array());
  179. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 2);
  180. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  181. const auto item1AfterRead = blockReader->GetItem(*datum.array(), 0);
  182. const auto item2AfterRead = blockReader->GetItem(*datum.array(), 1);
  183. UNIT_ASSERT_VALUES_EQUAL(item1AfterRead.GetStringRefFromValue(), "test");
  184. UNIT_ASSERT_VALUES_EQUAL(item2AfterRead.GetStringRefFromValue(), "234");
  185. }
  186. Y_UNIT_TEST(TestBuilderAllocatedSize) {
  187. TArrayBuilderTestData data;
  188. const auto optStringType = data.PgmBuilder.NewDataType(NUdf::EDataSlot::String, true);
  189. const auto int64Type = data.PgmBuilder.NewDataType(NUdf::EDataSlot::Int64, false);
  190. const auto structType = data.PgmBuilder.NewStructType({{ "a", optStringType }, { "b", int64Type }});
  191. const auto optStructType = data.PgmBuilder.NewOptionalType(structType);
  192. const auto doubleOptStructType = data.PgmBuilder.NewOptionalType(optStructType);
  193. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(doubleOptStructType);
  194. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  195. Y_ENSURE(blockLen > 8);
  196. size_t bigStringSize = NMiniKQL::MaxBlockSizeInBytes / 8;
  197. size_t hugeStringSize = NMiniKQL::MaxBlockSizeInBytes * 2;
  198. const TString bString(bigStringSize, 'a');
  199. TBlockItem strItem1(bString);
  200. TBlockItem intItem1(1);
  201. TBlockItem sItems1[] = { strItem1, intItem1 };
  202. TBlockItem sItem1(sItems1);
  203. const TBlockItem bigItem = sItem1.MakeOptional();
  204. const TString hString(hugeStringSize, 'b');
  205. TBlockItem strItem2(hString);
  206. TBlockItem intItem2(2);
  207. TBlockItem sItems2[] = { strItem2, intItem2 };
  208. TBlockItem sItem2(sItems2);
  209. const TBlockItem hugeItem = sItem2.MakeOptional();
  210. const size_t stringAllocStep =
  211. arrow::BitUtil::RoundUpToMultipleOf64(blockLen + 1) + // String NullMask
  212. arrow::BitUtil::RoundUpToMultipleOf64((blockLen + 1) * 4) + // String Offsets
  213. NMiniKQL::MaxBlockSizeInBytes; // String Data
  214. const size_t initialAllocated =
  215. stringAllocStep +
  216. arrow::BitUtil::RoundUpToMultipleOf64((blockLen + 1) * 8) + // Int64 Data
  217. 2 * arrow::BitUtil::RoundUpToMultipleOf64(blockLen + 1); // Double Optional
  218. size_t totalAllocated = 0;
  219. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), doubleOptStructType, *data.ArrowPool, blockLen, nullptr, &totalAllocated);
  220. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, initialAllocated);
  221. for (ui32 i = 0; i < 8; ++i) {
  222. builder->Add(bigItem);
  223. }
  224. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, initialAllocated);
  225. // string data block is fully used here
  226. size_t beforeBlockBoundary = totalAllocated;
  227. builder->Add(bigItem);
  228. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, beforeBlockBoundary + stringAllocStep);
  229. // string data block is partially used
  230. size_t beforeHugeString = totalAllocated;
  231. builder->Add(hugeItem);
  232. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, beforeHugeString + stringAllocStep + hugeStringSize - NMiniKQL::MaxBlockSizeInBytes);
  233. totalAllocated = 0;
  234. builder->Build(false);
  235. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, initialAllocated);
  236. }
  237. }