mkql_value_builder_ut.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #include "mkql_value_builder.h"
  2. #include "mkql_computation_node_holders.h"
  3. #include <yql/essentials/minikql/mkql_function_registry.h>
  4. #include <yql/essentials/minikql/mkql_type_builder.h>
  5. #include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
  6. #include <yql/essentials/parser/pg_catalog/catalog.h>
  7. #include <library/cpp/testing/unittest/registar.h>
  8. #include <arrow/array/builder_primitive.h>
  9. #include <arrow/c/abi.h>
  10. #include <arrow/scalar.h>
  11. #include <arrow/chunked_array.h>
  12. namespace NYql {
  13. namespace NCommon {
  14. TString PgValueToNativeText(const NUdf::TUnboxedValuePod& value, ui32 pgTypeId);
  15. TString PgValueToNativeBinary(const NUdf::TUnboxedValuePod& value, ui32 pgTypeId);
  16. }
  17. }
  18. namespace NKikimr {
  19. using namespace NUdf;
  20. using namespace NYql::NCommon;
  21. namespace NMiniKQL {
  22. namespace {
  23. TString AsString(const TStringValue& v) {
  24. return { v.Data(), v.Size() };
  25. }
  26. }
  27. class TMiniKQLValueBuilderTest: public TTestBase {
  28. public:
  29. TMiniKQLValueBuilderTest()
  30. : FunctionRegistry(CreateFunctionRegistry(CreateBuiltinRegistry()))
  31. , Alloc(__LOCATION__)
  32. , Env(Alloc)
  33. , MemInfo("Memory")
  34. , HolderFactory(Alloc.Ref(), MemInfo, FunctionRegistry.Get())
  35. , Builder(HolderFactory, NUdf::EValidatePolicy::Exception)
  36. , TypeInfoHelper(new TTypeInfoHelper())
  37. , FunctionTypeInfoBuilder(Env, TypeInfoHelper, "", nullptr, {})
  38. {
  39. BoolOid = NYql::NPg::LookupType("bool").TypeId;
  40. }
  41. const IPgBuilder& GetPgBuilder() const {
  42. return Builder.GetPgBuilder();
  43. }
  44. private:
  45. TIntrusivePtr<NMiniKQL::IFunctionRegistry> FunctionRegistry;
  46. TScopedAlloc Alloc;
  47. TTypeEnvironment Env;
  48. TMemoryUsageInfo MemInfo;
  49. THolderFactory HolderFactory;
  50. TDefaultValueBuilder Builder;
  51. NUdf::ITypeInfoHelper::TPtr TypeInfoHelper;
  52. TFunctionTypeInfoBuilder FunctionTypeInfoBuilder;
  53. ui32 BoolOid = 0;
  54. UNIT_TEST_SUITE(TMiniKQLValueBuilderTest);
  55. UNIT_TEST(TestEmbeddedVariant);
  56. UNIT_TEST(TestBoxedVariant);
  57. UNIT_TEST(TestSubstring);
  58. UNIT_TEST(TestPgValueFromErrors);
  59. UNIT_TEST(TestPgValueFromText);
  60. UNIT_TEST(TestPgValueFromBinary);
  61. UNIT_TEST(TestConvertToFromPg);
  62. UNIT_TEST(TestConvertToFromPgNulls);
  63. UNIT_TEST(TestPgNewString);
  64. UNIT_TEST(TestArrowBlock);
  65. UNIT_TEST_SUITE_END();
  66. void TestEmbeddedVariant() {
  67. const auto v = Builder.NewVariant(62, TUnboxedValuePod((ui64) 42));
  68. UNIT_ASSERT(v);
  69. UNIT_ASSERT(!v.IsBoxed());
  70. UNIT_ASSERT_VALUES_EQUAL(62, v.GetVariantIndex());
  71. UNIT_ASSERT_VALUES_EQUAL(42, v.GetVariantItem().Get<ui64>());
  72. }
  73. void TestBoxedVariant() {
  74. const auto v = Builder.NewVariant(63, TUnboxedValuePod((ui64) 42));
  75. UNIT_ASSERT(v);
  76. UNIT_ASSERT(v.IsBoxed());
  77. UNIT_ASSERT_VALUES_EQUAL(63, v.GetVariantIndex());
  78. UNIT_ASSERT_VALUES_EQUAL(42, v.GetVariantItem().Get<ui64>());
  79. }
  80. void TestSubstring() {
  81. const auto string = Builder.NewString("0123456789qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM");
  82. UNIT_ASSERT(string);
  83. const auto zero = Builder.SubString(string, 7, 0);
  84. UNIT_ASSERT_VALUES_EQUAL(TStringBuf(""), TStringBuf(zero.AsStringRef()));
  85. const auto tail = Builder.SubString(string, 60, 8);
  86. UNIT_ASSERT_VALUES_EQUAL(TStringBuf("NM"), TStringBuf(tail.AsStringRef()));
  87. const auto small = Builder.SubString(string, 2, 14);
  88. UNIT_ASSERT_VALUES_EQUAL(TStringBuf("23456789qwerty"), TStringBuf(small.AsStringRef()));
  89. const auto one = Builder.SubString(string, 3, 15);
  90. UNIT_ASSERT_VALUES_EQUAL(TStringBuf("3456789qwertyui"), TStringBuf(one.AsStringRef()));
  91. UNIT_ASSERT_VALUES_EQUAL(string.AsStringValue().Data(), one.AsStringValue().Data());
  92. const auto two = Builder.SubString(string, 10, 30);
  93. UNIT_ASSERT_VALUES_EQUAL(TStringBuf("qwertyuiopasdfghjklzxcvbnmQWER"), TStringBuf(two.AsStringRef()));
  94. UNIT_ASSERT_VALUES_EQUAL(string.AsStringValue().Data(), two.AsStringValue().Data());
  95. }
  96. void TestPgValueFromErrors() {
  97. const TBindTerminator bind(&Builder); // to raise exception instead of abort
  98. {
  99. TStringValue error("");
  100. auto r = GetPgBuilder().ValueFromText(BoolOid, "", error);
  101. UNIT_ASSERT(!r);
  102. UNIT_ASSERT_STRING_CONTAINS(AsString(error), "ERROR: invalid input syntax for type boolean: \"\"");
  103. }
  104. {
  105. TStringValue error("");
  106. auto r = GetPgBuilder().ValueFromText(BoolOid, "zzzz", error);
  107. UNIT_ASSERT(!r);
  108. UNIT_ASSERT_STRING_CONTAINS(AsString(error), "ERROR: invalid input syntax for type boolean: \"zzzz\"");
  109. }
  110. {
  111. TStringValue error("");
  112. auto r = GetPgBuilder().ValueFromBinary(BoolOid, "", error);
  113. UNIT_ASSERT(!r);
  114. UNIT_ASSERT_STRING_CONTAINS(AsString(error), "ERROR: no data left in message");
  115. }
  116. {
  117. TStringValue error("");
  118. auto r = GetPgBuilder().ValueFromBinary(BoolOid, "zzzz", error);
  119. UNIT_ASSERT(!r);
  120. UNIT_ASSERT_STRING_CONTAINS(AsString(error), "Not all data has been consumed by 'recv' function: boolrecv, data size: 4, consumed size: 1");
  121. }
  122. }
  123. void TestPgValueFromText() {
  124. const TBindTerminator bind(&Builder);
  125. for (auto validTrue : { "t"sv, "true"sv }) {
  126. TStringValue error("");
  127. auto r = GetPgBuilder().ValueFromText(BoolOid, validTrue, error);
  128. UNIT_ASSERT(r);
  129. UNIT_ASSERT_VALUES_EQUAL(AsString(error), "");
  130. auto s = PgValueToNativeText(r, BoolOid);
  131. UNIT_ASSERT_VALUES_EQUAL(s, "t");
  132. }
  133. for (auto validFalse : { "f"sv, "false"sv }) {
  134. TStringValue error("");
  135. auto r = GetPgBuilder().ValueFromText(BoolOid, validFalse, error);
  136. UNIT_ASSERT(r);
  137. UNIT_ASSERT_VALUES_EQUAL(AsString(error), "");
  138. auto s = PgValueToNativeText(r, BoolOid);
  139. UNIT_ASSERT_VALUES_EQUAL(s, "f");
  140. }
  141. }
  142. void TestPgValueFromBinary() {
  143. const TBindTerminator bind(&Builder);
  144. TStringValue error("");
  145. auto t = GetPgBuilder().ValueFromText(BoolOid, "true", error);
  146. UNIT_ASSERT(t);
  147. auto f = GetPgBuilder().ValueFromText(BoolOid, "false", error);
  148. UNIT_ASSERT(f);
  149. auto ts = PgValueToNativeBinary(t, BoolOid);
  150. auto fs = PgValueToNativeBinary(f, BoolOid);
  151. {
  152. auto r = GetPgBuilder().ValueFromBinary(BoolOid, ts, error);
  153. UNIT_ASSERT(r);
  154. auto s = PgValueToNativeText(r, BoolOid);
  155. UNIT_ASSERT_VALUES_EQUAL(s, "t");
  156. }
  157. {
  158. auto r = GetPgBuilder().ValueFromBinary(BoolOid, fs, error);
  159. UNIT_ASSERT(r);
  160. auto s = PgValueToNativeText(r, BoolOid);
  161. UNIT_ASSERT_VALUES_EQUAL(s, "f");
  162. }
  163. }
  164. void TestConvertToFromPg() {
  165. const TBindTerminator bind(&Builder);
  166. auto boolType = FunctionTypeInfoBuilder.SimpleType<bool>();
  167. {
  168. auto v = GetPgBuilder().ConvertToPg(TUnboxedValuePod(true), boolType, BoolOid);
  169. auto s = PgValueToNativeText(v, BoolOid);
  170. UNIT_ASSERT_VALUES_EQUAL(s, "t");
  171. auto from = GetPgBuilder().ConvertFromPg(v, BoolOid, boolType);
  172. UNIT_ASSERT_VALUES_EQUAL(from.Get<bool>(), true);
  173. }
  174. {
  175. auto v = GetPgBuilder().ConvertToPg(TUnboxedValuePod(false), boolType, BoolOid);
  176. auto s = PgValueToNativeText(v, BoolOid);
  177. UNIT_ASSERT_VALUES_EQUAL(s, "f");
  178. auto from = GetPgBuilder().ConvertFromPg(v, BoolOid, boolType);
  179. UNIT_ASSERT_VALUES_EQUAL(from.Get<bool>(), false);
  180. }
  181. }
  182. void TestConvertToFromPgNulls() {
  183. const TBindTerminator bind(&Builder);
  184. auto boolOptionalType = FunctionTypeInfoBuilder.Optional()->Item<bool>().Build();
  185. {
  186. auto v = GetPgBuilder().ConvertToPg(TUnboxedValuePod(), boolOptionalType, BoolOid);
  187. UNIT_ASSERT(!v);
  188. }
  189. {
  190. auto v = GetPgBuilder().ConvertFromPg(TUnboxedValuePod(), BoolOid, boolOptionalType);
  191. UNIT_ASSERT(!v);
  192. }
  193. }
  194. void TestPgNewString() {
  195. {
  196. auto& pgText = NYql::NPg::LookupType("text");
  197. UNIT_ASSERT_VALUES_EQUAL(pgText.TypeLen, -1);
  198. auto s = GetPgBuilder().NewString(pgText.TypeLen, pgText.TypeId, "ABC");
  199. auto utf8Type = FunctionTypeInfoBuilder.SimpleType<TUtf8>();
  200. auto from = GetPgBuilder().ConvertFromPg(s, pgText.TypeId, utf8Type);
  201. UNIT_ASSERT_VALUES_EQUAL((TStringBuf)from.AsStringRef(), "ABC"sv);
  202. }
  203. {
  204. auto& pgCString = NYql::NPg::LookupType("cstring");
  205. UNIT_ASSERT_VALUES_EQUAL(pgCString.TypeLen, -2);
  206. auto s = GetPgBuilder().NewString(pgCString.TypeLen, pgCString.TypeId, "ABC");
  207. auto utf8Type = FunctionTypeInfoBuilder.SimpleType<TUtf8>();
  208. auto from = GetPgBuilder().ConvertFromPg(s, pgCString.TypeId, utf8Type);
  209. UNIT_ASSERT_VALUES_EQUAL((TStringBuf)from.AsStringRef(), "ABC"sv);
  210. }
  211. {
  212. auto& byteaString = NYql::NPg::LookupType("bytea");
  213. UNIT_ASSERT_VALUES_EQUAL(byteaString.TypeLen, -1);
  214. auto s = GetPgBuilder().NewString(byteaString.TypeLen, byteaString.TypeId, "ABC");
  215. auto stringType = FunctionTypeInfoBuilder.SimpleType<char*>();
  216. auto from = GetPgBuilder().ConvertFromPg(s, byteaString.TypeId, stringType);
  217. UNIT_ASSERT_VALUES_EQUAL((TStringBuf)from.AsStringRef(), "ABC"sv);
  218. }
  219. }
  220. void TestArrowBlock() {
  221. auto type = FunctionTypeInfoBuilder.SimpleType<ui64>();
  222. auto atype = TypeInfoHelper->MakeArrowType(type);
  223. {
  224. arrow::Datum d1(std::make_shared<arrow::UInt64Scalar>(123));
  225. NUdf::TUnboxedValue val1 = HolderFactory.CreateArrowBlock(std::move(d1));
  226. bool isScalar;
  227. ui64 length;
  228. auto chunks = Builder.GetArrowBlockChunks(val1, isScalar, length);
  229. UNIT_ASSERT_VALUES_EQUAL(chunks, 1);
  230. UNIT_ASSERT(isScalar);
  231. UNIT_ASSERT_VALUES_EQUAL(length, 1);
  232. ArrowArray arr1;
  233. Builder.ExportArrowBlock(val1, 0, &arr1);
  234. NUdf::TUnboxedValue val2 = Builder.ImportArrowBlock(&arr1, 1, isScalar, *atype);
  235. const auto& d2 = TArrowBlock::From(val2).GetDatum();
  236. UNIT_ASSERT(d2.is_scalar());
  237. UNIT_ASSERT_VALUES_EQUAL(d2.scalar_as<arrow::UInt64Scalar>().value, 123);
  238. }
  239. {
  240. arrow::UInt64Builder builder;
  241. UNIT_ASSERT(builder.Reserve(3).ok());
  242. builder.UnsafeAppend(ui64(10));
  243. builder.UnsafeAppend(ui64(20));
  244. builder.UnsafeAppend(ui64(30));
  245. std::shared_ptr<arrow::ArrayData> builderResult;
  246. UNIT_ASSERT(builder.FinishInternal(&builderResult).ok());
  247. arrow::Datum d1(builderResult);
  248. NUdf::TUnboxedValue val1 = HolderFactory.CreateArrowBlock(std::move(d1));
  249. bool isScalar;
  250. ui64 length;
  251. auto chunks = Builder.GetArrowBlockChunks(val1, isScalar, length);
  252. UNIT_ASSERT_VALUES_EQUAL(chunks, 1);
  253. UNIT_ASSERT(!isScalar);
  254. UNIT_ASSERT_VALUES_EQUAL(length, 3);
  255. ArrowArray arr1;
  256. Builder.ExportArrowBlock(val1, 0, &arr1);
  257. NUdf::TUnboxedValue val2 = Builder.ImportArrowBlock(&arr1, 1, isScalar, *atype);
  258. const auto& d2 = TArrowBlock::From(val2).GetDatum();
  259. UNIT_ASSERT(d2.is_array());
  260. UNIT_ASSERT_VALUES_EQUAL(d2.array()->length, 3);
  261. UNIT_ASSERT_VALUES_EQUAL(d2.array()->GetNullCount(), 0);
  262. auto flat = d2.array()->GetValues<ui64>(1);
  263. UNIT_ASSERT_VALUES_EQUAL(flat[0], 10);
  264. UNIT_ASSERT_VALUES_EQUAL(flat[1], 20);
  265. UNIT_ASSERT_VALUES_EQUAL(flat[2], 30);
  266. }
  267. {
  268. arrow::UInt64Builder builder1;
  269. UNIT_ASSERT(builder1.Reserve(3).ok());
  270. builder1.UnsafeAppend(ui64(10));
  271. builder1.UnsafeAppend(ui64(20));
  272. builder1.UnsafeAppend(ui64(30));
  273. std::shared_ptr<arrow::Array> builder1Result;
  274. UNIT_ASSERT(builder1.Finish(&builder1Result).ok());
  275. arrow::UInt64Builder builder2;
  276. UNIT_ASSERT(builder2.Reserve(2).ok());
  277. builder2.UnsafeAppend(ui64(40));
  278. builder2.UnsafeAppend(ui64(50));
  279. std::shared_ptr<arrow::Array> builder2Result;
  280. UNIT_ASSERT(builder2.Finish(&builder2Result).ok());
  281. auto chunked = arrow::ChunkedArray::Make({ builder1Result, builder2Result }).ValueOrDie();
  282. arrow::Datum d1(chunked);
  283. NUdf::TUnboxedValue val1 = HolderFactory.CreateArrowBlock(std::move(d1));
  284. bool isScalar;
  285. ui64 length;
  286. auto chunks = Builder.GetArrowBlockChunks(val1, isScalar, length);
  287. UNIT_ASSERT_VALUES_EQUAL(chunks, 2);
  288. UNIT_ASSERT(!isScalar);
  289. UNIT_ASSERT_VALUES_EQUAL(length, 5);
  290. ArrowArray arrs[2];
  291. Builder.ExportArrowBlock(val1, 0, &arrs[0]);
  292. Builder.ExportArrowBlock(val1, 1, &arrs[1]);
  293. NUdf::TUnboxedValue val2 = Builder.ImportArrowBlock(arrs, 2, isScalar, *atype);
  294. const auto& d2 = TArrowBlock::From(val2).GetDatum();
  295. UNIT_ASSERT(d2.is_arraylike() && !d2.is_array());
  296. UNIT_ASSERT_VALUES_EQUAL(d2.length(), 5);
  297. UNIT_ASSERT_VALUES_EQUAL(d2.chunks().size(), 2);
  298. UNIT_ASSERT_VALUES_EQUAL(d2.chunks()[0]->data()->length, 3);
  299. UNIT_ASSERT_VALUES_EQUAL(d2.chunks()[0]->data()->GetNullCount(), 0);
  300. auto flat = d2.chunks()[0]->data()->GetValues<ui64>(1);
  301. UNIT_ASSERT_VALUES_EQUAL(flat[0], 10);
  302. UNIT_ASSERT_VALUES_EQUAL(flat[1], 20);
  303. UNIT_ASSERT_VALUES_EQUAL(flat[2], 30);
  304. UNIT_ASSERT_VALUES_EQUAL(d2.chunks()[1]->data()->length, 2);
  305. UNIT_ASSERT_VALUES_EQUAL(d2.chunks()[1]->data()->GetNullCount(), 0);
  306. flat = d2.chunks()[1]->data()->GetValues<ui64>(1);
  307. UNIT_ASSERT_VALUES_EQUAL(flat[0], 40);
  308. UNIT_ASSERT_VALUES_EQUAL(flat[1], 50);
  309. }
  310. }
  311. };
  312. UNIT_TEST_SUITE_REGISTRATION(TMiniKQLValueBuilderTest);
  313. }
  314. }