array_builder_ut.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. #include <library/cpp/testing/unittest/registar.h>
  2. #include <yql/essentials/public/udf/arrow/block_builder.h>
  3. #include <yql/essentials/public/udf/arrow/memory_pool.h>
  4. #include <yql/essentials/minikql/mkql_type_builder.h>
  5. #include <yql/essentials/minikql/mkql_function_registry.h>
  6. #include <yql/essentials/minikql/mkql_program_builder.h>
  7. #include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
  8. using namespace NYql::NUdf;
  9. using namespace NKikimr;
  10. constexpr size_t MAX_BLOCK_SIZE = 240_KB;
  11. struct TArrayBuilderTestData {
  12. TArrayBuilderTestData()
  13. : FunctionRegistry(NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry()))
  14. , Alloc(__LOCATION__)
  15. , Env(Alloc)
  16. , PgmBuilder(Env, *FunctionRegistry)
  17. , MemInfo("Memory")
  18. , ArrowPool(GetYqlMemoryPool())
  19. {
  20. }
  21. TIntrusivePtr<NMiniKQL::IFunctionRegistry> FunctionRegistry;
  22. NMiniKQL::TScopedAlloc Alloc;
  23. NMiniKQL::TTypeEnvironment Env;
  24. NMiniKQL::TProgramBuilder PgmBuilder;
  25. NMiniKQL::TMemoryUsageInfo MemInfo;
  26. arrow::MemoryPool* const ArrowPool;
  27. };
  28. std::unique_ptr<IArrayBuilder> MakeResourceArrayBuilder(TType* resourceType, TArrayBuilderTestData& data) {
  29. auto arrayBuilder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), resourceType,
  30. *data.ArrowPool, MAX_BLOCK_SIZE, /* pgBuilder */nullptr);
  31. UNIT_ASSERT_C(arrayBuilder, "Failed to make resource arrow array builder");
  32. return arrayBuilder;
  33. }
  34. Y_UNIT_TEST_SUITE(TArrayBuilderTest) {
  35. Y_UNIT_TEST(TestEmbeddedResourceBuilder) {
  36. TArrayBuilderTestData data;
  37. const auto resourceType = data.PgmBuilder.NewResourceType("Test.Resource");
  38. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  39. auto resource = TUnboxedValuePod::Embedded("testtest");
  40. arrayBuilder->Add(resource);
  41. auto datum = arrayBuilder->Build(true);
  42. UNIT_ASSERT(datum.is_array());
  43. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 1);
  44. auto value = datum.array()->GetValues<TUnboxedValue>(1)[0];
  45. UNIT_ASSERT(value.IsEmbedded());
  46. UNIT_ASSERT_VALUES_EQUAL_C(TStringRef(value.AsStringRef()), TStringRef(resource.AsStringRef()),
  47. "Expected equal values after building array");
  48. }
  49. extern const char ResourceName[] = "Resource.Name";
  50. Y_UNIT_TEST(TestDtorCall) {
  51. TArrayBuilderTestData data;
  52. const auto resourceType = data.PgmBuilder.NewResourceType("Test.Resource");
  53. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  54. auto destructorCallsCnt = std::make_shared<int>(0);
  55. struct TWithDtor {
  56. int Payload;
  57. std::shared_ptr<int> DestructorCallsCnt;
  58. TWithDtor(int payload, std::shared_ptr<int> destructorCallsCnt):
  59. Payload(payload), DestructorCallsCnt(std::move(destructorCallsCnt)) {
  60. }
  61. ~TWithDtor() {
  62. *DestructorCallsCnt = *DestructorCallsCnt + 1;
  63. }
  64. };
  65. int payload = 123;
  66. using TTestResource = TBoxedResource<std::shared_ptr<TWithDtor>, ResourceName>;
  67. auto resourcePtr = std::make_shared<TWithDtor>(payload, destructorCallsCnt);
  68. TUnboxedValuePod resource(new TTestResource(std::move(resourcePtr)));
  69. {
  70. arrayBuilder->Add(resource);
  71. auto datum = arrayBuilder->Build(true);
  72. UNIT_ASSERT(datum.is_array());
  73. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 1);
  74. const auto value = datum.array()->GetValues<TUnboxedValuePod>(1)[0];
  75. auto boxed = value.AsBoxed().Get();
  76. const auto resource = reinterpret_cast<TTestResource*>(boxed);
  77. UNIT_ASSERT_VALUES_EQUAL(resource->Get()->get()->Payload, payload);
  78. }
  79. UNIT_ASSERT_VALUES_EQUAL_C(*destructorCallsCnt, 1, "Expected 1 call to resource destructor");
  80. }
  81. Y_UNIT_TEST(TestBoxedResourceNullable) {
  82. TArrayBuilderTestData data;
  83. const auto resourceType = data.PgmBuilder.NewOptionalType(data.PgmBuilder.NewResourceType("Test.Resource"));
  84. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  85. struct TResourceItem {
  86. int Payload;
  87. };
  88. using TTestResource = TBoxedResource<TResourceItem, ResourceName>;
  89. for (int i = 0; i < 4; i++) {
  90. if ((i % 2) == 0) {
  91. TUnboxedValuePod resource(new TTestResource(TResourceItem{i}));
  92. arrayBuilder->Add(resource);
  93. } else {
  94. arrayBuilder->Add(TUnboxedValuePod{});
  95. }
  96. }
  97. auto datum = arrayBuilder->Build(true);
  98. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  99. for (int i = 0; i < 4; i++) {
  100. if ((i % 2) == 0) {
  101. auto item = blockReader->GetItem(*datum.array(), i);
  102. UNIT_ASSERT_C(item.HasValue(), "Expected not null");
  103. auto* resourcePtr = reinterpret_cast<TTestResource*>(item.GetBoxed().Get());
  104. UNIT_ASSERT_EQUAL(i, resourcePtr->Get()->Payload);
  105. } else {
  106. auto item = blockReader->GetItem(*datum.array(), i);
  107. UNIT_ASSERT(!item.HasValue());
  108. }
  109. }
  110. }
  111. Y_UNIT_TEST(TestBuilderWithReader) {
  112. TArrayBuilderTestData data;
  113. const auto resourceType = data.PgmBuilder.NewResourceType("Test.Resource");
  114. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  115. const auto item1 = TUnboxedValuePod::Embedded("1");
  116. arrayBuilder->Add(item1);
  117. const auto item2 = TUnboxedValuePod::Embedded("22");
  118. arrayBuilder->Add(item2);
  119. auto datum = arrayBuilder->Build(true);
  120. UNIT_ASSERT(datum.is_array());
  121. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 2);
  122. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  123. const auto item1AfterRead = blockReader->GetItem(*datum.array(), 0);
  124. const auto item2AfterRead = blockReader->GetItem(*datum.array(), 1);
  125. UNIT_ASSERT_C(std::memcmp(item1.GetRawPtr(), item1AfterRead.GetRawPtr(), sizeof(TBlockItem)) == 0, "Expected UnboxedValue to equal to BlockItem");
  126. UNIT_ASSERT_C(std::memcmp(item2.GetRawPtr(), item2AfterRead.GetRawPtr(), sizeof(TBlockItem)) == 0, "Expected UnboxedValue to equal to BlockItem");
  127. }
  128. Y_UNIT_TEST(TestBoxedResourceReader) {
  129. TArrayBuilderTestData data;
  130. const auto resourceType = data.PgmBuilder.NewResourceType(ResourceName);
  131. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  132. using TTestResource = TBoxedResource<int, ResourceName>;
  133. arrayBuilder->Add(TUnboxedValuePod(new TTestResource(11111111)));
  134. arrayBuilder->Add(TUnboxedValuePod(new TTestResource(22222222)));
  135. const auto datum = arrayBuilder->Build(true);
  136. UNIT_ASSERT(datum.is_array());
  137. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 2);
  138. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  139. const auto item1AfterRead = blockReader->GetItem(*datum.array(), 0);
  140. const auto item2AfterRead = blockReader->GetItem(*datum.array(), 1);
  141. auto boxed1 = item1AfterRead.GetBoxed().Get();
  142. const auto resource1 = reinterpret_cast<TTestResource*>(boxed1);
  143. UNIT_ASSERT_VALUES_EQUAL(*resource1->Get(), 11111111);
  144. UNIT_ASSERT_VALUES_EQUAL(resource1->GetResourceTag(), ResourceName);
  145. auto boxed2 = item2AfterRead.GetBoxed().Get();
  146. const auto resource2 = reinterpret_cast<TTestResource*>(boxed2);
  147. UNIT_ASSERT_VALUES_EQUAL(*resource2->Get(), 22222222);
  148. UNIT_ASSERT_VALUES_EQUAL(resource2->GetResourceTag(), ResourceName);
  149. }
  150. Y_UNIT_TEST(TestTzDateBuilder_Layout) {
  151. TArrayBuilderTestData data;
  152. const auto tzDateType = data.PgmBuilder.NewDataType(EDataSlot::TzDate);
  153. const auto arrayBuilder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), tzDateType,
  154. *data.ArrowPool, MAX_BLOCK_SIZE, /* pgBuilder */ nullptr);
  155. auto makeTzDate = [] (ui16 val, ui16 tz) {
  156. TUnboxedValuePod tzDate {val};
  157. tzDate.SetTimezoneId(tz);
  158. return tzDate;
  159. };
  160. TVector<TUnboxedValuePod> dates{makeTzDate(1234, 1), makeTzDate(1234, 2), makeTzDate(45678, 333)};
  161. for (auto date: dates) {
  162. arrayBuilder->Add(date);
  163. }
  164. const auto datum = arrayBuilder->Build(true);
  165. UNIT_ASSERT(datum.is_array());
  166. UNIT_ASSERT_VALUES_EQUAL(datum.length(), dates.size());
  167. const auto childData = datum.array()->child_data;
  168. UNIT_ASSERT_VALUES_EQUAL_C(childData.size(), 2, "Expected date and timezone children");
  169. }
  170. Y_UNIT_TEST(TestResourceStringValueBuilderReader) {
  171. TArrayBuilderTestData data;
  172. const auto resourceType = data.PgmBuilder.NewResourceType(ResourceName);
  173. const auto arrayBuilder = MakeResourceArrayBuilder(resourceType, data);
  174. arrayBuilder->Add(TUnboxedValuePod(TStringValue("test")));
  175. arrayBuilder->Add(TUnboxedValuePod(TStringValue("1234"), /* size */ 3, /* offset */ 1));
  176. const auto datum = arrayBuilder->Build(true);
  177. UNIT_ASSERT(datum.is_array());
  178. UNIT_ASSERT_VALUES_EQUAL(datum.length(), 2);
  179. const auto blockReader = MakeBlockReader(NMiniKQL::TTypeInfoHelper(), resourceType);
  180. const auto item1AfterRead = blockReader->GetItem(*datum.array(), 0);
  181. const auto item2AfterRead = blockReader->GetItem(*datum.array(), 1);
  182. UNIT_ASSERT_VALUES_EQUAL(item1AfterRead.GetStringRefFromValue(), "test");
  183. UNIT_ASSERT_VALUES_EQUAL(item2AfterRead.GetStringRefFromValue(), "234");
  184. }
  185. Y_UNIT_TEST(TestBuilderAllocatedSize) {
  186. TArrayBuilderTestData data;
  187. const auto optStringType = data.PgmBuilder.NewDataType(NUdf::EDataSlot::String, true);
  188. const auto int64Type = data.PgmBuilder.NewDataType(NUdf::EDataSlot::Int64, false);
  189. const auto structType = data.PgmBuilder.NewStructType({{ "a", optStringType }, { "b", int64Type }});
  190. const auto optStructType = data.PgmBuilder.NewOptionalType(structType);
  191. const auto doubleOptStructType = data.PgmBuilder.NewOptionalType(optStructType);
  192. size_t itemSize = NMiniKQL::CalcMaxBlockItemSize(doubleOptStructType);
  193. size_t blockLen = NMiniKQL::CalcBlockLen(itemSize);
  194. Y_ENSURE(blockLen > 8);
  195. size_t bigStringSize = NMiniKQL::MaxBlockSizeInBytes / 8;
  196. size_t hugeStringSize = NMiniKQL::MaxBlockSizeInBytes * 2;
  197. const TString bString(bigStringSize, 'a');
  198. TBlockItem strItem1(bString);
  199. TBlockItem intItem1(1);
  200. TBlockItem sItems1[] = { strItem1, intItem1 };
  201. TBlockItem sItem1(sItems1);
  202. const TBlockItem bigItem = sItem1.MakeOptional();
  203. const TString hString(hugeStringSize, 'b');
  204. TBlockItem strItem2(hString);
  205. TBlockItem intItem2(2);
  206. TBlockItem sItems2[] = { strItem2, intItem2 };
  207. TBlockItem sItem2(sItems2);
  208. const TBlockItem hugeItem = sItem2.MakeOptional();
  209. const size_t stringAllocStep =
  210. arrow::BitUtil::RoundUpToMultipleOf64(blockLen + 1) + // String NullMask
  211. arrow::BitUtil::RoundUpToMultipleOf64((blockLen + 1) * 4) + // String Offsets
  212. NMiniKQL::MaxBlockSizeInBytes; // String Data
  213. const size_t initialAllocated =
  214. stringAllocStep +
  215. arrow::BitUtil::RoundUpToMultipleOf64((blockLen + 1) * 8) + // Int64 Data
  216. 2 * arrow::BitUtil::RoundUpToMultipleOf64(blockLen + 1); // Double Optional
  217. size_t totalAllocated = 0;
  218. auto builder = MakeArrayBuilder(NMiniKQL::TTypeInfoHelper(), doubleOptStructType, *data.ArrowPool, blockLen, nullptr, &totalAllocated);
  219. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, initialAllocated);
  220. for (ui32 i = 0; i < 8; ++i) {
  221. builder->Add(bigItem);
  222. }
  223. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, initialAllocated);
  224. // string data block is fully used here
  225. size_t beforeBlockBoundary = totalAllocated;
  226. builder->Add(bigItem);
  227. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, beforeBlockBoundary + stringAllocStep);
  228. // string data block is partially used
  229. size_t beforeHugeString = totalAllocated;
  230. builder->Add(hugeItem);
  231. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, beforeHugeString + stringAllocStep + hugeStringSize - NMiniKQL::MaxBlockSizeInBytes);
  232. totalAllocated = 0;
  233. builder->Build(false);
  234. UNIT_ASSERT_VALUES_EQUAL(totalAllocated, initialAllocated);
  235. }
  236. }