presort.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. #include "presort.h"
  2. #include "presort_impl.h"
  3. #include "mkql_computation_node_holders.h"
  4. #include <yql/essentials/minikql/defs.h>
  5. #include <yql/essentials/minikql/mkql_string_util.h>
  6. #include <yql/essentials/utils/swap_bytes.h>
  7. #include <yql/essentials/parser/pg_wrapper/interface/pack.h>
  8. #include <yql/essentials/public/decimal/yql_decimal_serialize.h>
  9. #include <util/system/unaligned_mem.h>
  10. #include <util/string/builder.h>
  11. namespace NKikimr {
  12. namespace NMiniKQL {
  13. namespace NDetail {
  14. constexpr size_t UuidSize = 16;
  15. template <bool Desc>
  16. Y_FORCE_INLINE
  17. void EncodeUuid(TVector<ui8>& output, const char* data) {
  18. output.resize(output.size() + UuidSize);
  19. auto ptr = output.end() - UuidSize;
  20. if (Desc) {
  21. for (size_t i = 0; i < UuidSize; ++i) {
  22. *ptr++ = ui8(*data++) ^ 0xFF;
  23. }
  24. }
  25. else {
  26. std::memcpy(ptr, data, UuidSize);
  27. }
  28. }
  29. template <bool Desc>
  30. Y_FORCE_INLINE
  31. TStringBuf DecodeUuid(TStringBuf& input, TVector<ui8>& value) {
  32. EnsureInputSize(input, UuidSize);
  33. auto data = input.data();
  34. input.Skip(UuidSize);
  35. value.resize(UuidSize);
  36. auto ptr = value.begin();
  37. if (Desc) {
  38. for (size_t i = 0; i < UuidSize; ++i) {
  39. *ptr++ = ui8(*data++) ^ 0xFF;
  40. }
  41. }
  42. else {
  43. std::memcpy(ptr, data, UuidSize);
  44. }
  45. return TStringBuf((const char*)value.begin(), (const char*)value.end());
  46. }
  47. template <typename TUnsigned, bool Desc>
  48. Y_FORCE_INLINE
  49. void EncodeTzUnsigned(TVector<ui8>& output, TUnsigned value, ui16 tzId) {
  50. constexpr size_t size = sizeof(TUnsigned);
  51. if (Desc) {
  52. value = ~value;
  53. tzId = ~tzId;
  54. }
  55. output.resize(output.size() + size + sizeof(ui16));
  56. WriteUnaligned<TUnsigned>(output.end() - size - sizeof(ui16), SwapBytes(value));
  57. WriteUnaligned<ui16>(output.end() - sizeof(ui16), SwapBytes(tzId));
  58. }
  59. template <typename TSigned, bool Desc>
  60. Y_FORCE_INLINE
  61. void EncodeTzSigned(TVector<ui8>& output, TSigned value, ui16 tzId) {
  62. using TUnsigned = std::make_unsigned_t<TSigned>;
  63. auto unsignedValue = static_cast<TUnsigned>(value) ^ (TUnsigned(1) << (8 * sizeof(TUnsigned) - 1));
  64. EncodeTzUnsigned<TUnsigned, Desc>(output, unsignedValue, tzId);
  65. }
  66. template <typename TUnsigned, bool Desc>
  67. Y_FORCE_INLINE
  68. void DecodeTzUnsigned(TStringBuf& input, TUnsigned& value, ui16& tzId) {
  69. constexpr size_t size = sizeof(TUnsigned);
  70. EnsureInputSize(input, size + sizeof(ui16));
  71. auto v = ReadUnaligned<TUnsigned>(input.data());
  72. auto t = ReadUnaligned<ui16>(input.data() + size);
  73. input.Skip(size + sizeof(ui16));
  74. if (Desc) {
  75. value = ~SwapBytes(v);
  76. tzId = ~SwapBytes(t);
  77. }
  78. else {
  79. value = SwapBytes(v);
  80. tzId = SwapBytes(t);
  81. }
  82. }
  83. template <typename TSigned, bool Desc>
  84. Y_FORCE_INLINE
  85. void DecodeTzSigned(TStringBuf& input, TSigned& value, ui16& tzId) {
  86. using TUnsigned = std::make_unsigned_t<TSigned>;
  87. TUnsigned unsignedValue;
  88. DecodeTzUnsigned<TUnsigned, Desc>(input, unsignedValue, tzId);
  89. value = TSigned(unsignedValue ^ (TUnsigned(1) << (8 * sizeof(TUnsigned) - 1)));
  90. }
  91. constexpr size_t DecimalSize = sizeof(NYql::NDecimal::TInt128);
  92. template <bool Desc>
  93. Y_FORCE_INLINE
  94. void EncodeDecimal(TVector<ui8>& output, NYql::NDecimal::TInt128 value) {
  95. output.resize(output.size() + DecimalSize);
  96. auto ptr = reinterpret_cast<char*>(output.end() - DecimalSize);
  97. output.resize(output.size() + NYql::NDecimal::Serialize(Desc ? -value : value, ptr) - DecimalSize);
  98. }
  99. template <bool Desc>
  100. Y_FORCE_INLINE
  101. NYql::NDecimal::TInt128 DecodeDecimal(TStringBuf& input) {
  102. const auto des = NYql::NDecimal::Deserialize(input.data(), input.size());
  103. input.Skip(des.second);
  104. auto res = Desc ? -des.first : des.first;
  105. MKQL_ENSURE(!NYql::NDecimal::IsError(res), "Bad packed data: invalid decimal.");
  106. return res;
  107. }
  108. template <bool Desc>
  109. Y_FORCE_INLINE
  110. void Encode(TVector<ui8>& output, NUdf::EDataSlot slot, const NUdf::TUnboxedValuePod& value) {
  111. switch (slot) {
  112. case NUdf::EDataSlot::Bool:
  113. EncodeBool<Desc>(output, value.Get<bool>());
  114. break;
  115. case NUdf::EDataSlot::Int8:
  116. EncodeSigned<i8, Desc>(output, value.Get<i8>());
  117. break;
  118. case NUdf::EDataSlot::Uint8:
  119. EncodeUnsigned<ui8, Desc>(output, value.Get<ui8>());
  120. break;
  121. case NUdf::EDataSlot::Int16:
  122. EncodeSigned<i16, Desc>(output, value.Get<i16>());
  123. break;
  124. case NUdf::EDataSlot::Uint16:
  125. case NUdf::EDataSlot::Date:
  126. EncodeUnsigned<ui16, Desc>(output, value.Get<ui16>());
  127. break;
  128. case NUdf::EDataSlot::Int32:
  129. case NUdf::EDataSlot::Date32:
  130. EncodeSigned<i32, Desc>(output, value.Get<i32>());
  131. break;
  132. case NUdf::EDataSlot::Uint32:
  133. case NUdf::EDataSlot::Datetime:
  134. EncodeUnsigned<ui32, Desc>(output, value.Get<ui32>());
  135. break;
  136. case NUdf::EDataSlot::Int64:
  137. case NUdf::EDataSlot::Interval:
  138. case NUdf::EDataSlot::Interval64:
  139. case NUdf::EDataSlot::Datetime64:
  140. case NUdf::EDataSlot::Timestamp64:
  141. EncodeSigned<i64, Desc>(output, value.Get<i64>());
  142. break;
  143. case NUdf::EDataSlot::Uint64:
  144. case NUdf::EDataSlot::Timestamp:
  145. EncodeUnsigned<ui64, Desc>(output, value.Get<ui64>());
  146. break;
  147. case NUdf::EDataSlot::Double:
  148. EncodeFloating<double, Desc>(output, value.Get<double>());
  149. break;
  150. case NUdf::EDataSlot::Float:
  151. EncodeFloating<float, Desc>(output, value.Get<float>());
  152. break;
  153. case NUdf::EDataSlot::DyNumber:
  154. case NUdf::EDataSlot::String:
  155. case NUdf::EDataSlot::Utf8: {
  156. auto stringRef = value.AsStringRef();
  157. EncodeString<Desc>(output, TStringBuf(stringRef.Data(), stringRef.Size()));
  158. break;
  159. }
  160. case NUdf::EDataSlot::Uuid:
  161. EncodeUuid<Desc>(output, value.AsStringRef().Data());
  162. break;
  163. case NUdf::EDataSlot::TzDate:
  164. EncodeTzUnsigned<ui16, Desc>(output, value.Get<ui16>(), value.GetTimezoneId());
  165. break;
  166. case NUdf::EDataSlot::TzDatetime:
  167. EncodeTzUnsigned<ui32, Desc>(output, value.Get<ui32>(), value.GetTimezoneId());
  168. break;
  169. case NUdf::EDataSlot::TzTimestamp:
  170. EncodeTzUnsigned<ui64, Desc>(output, value.Get<ui64>(), value.GetTimezoneId());
  171. break;
  172. case NUdf::EDataSlot::Decimal:
  173. EncodeDecimal<Desc>(output, value.GetInt128());
  174. break;
  175. case NUdf::EDataSlot::TzDate32:
  176. EncodeTzSigned<i32, Desc>(output, value.Get<i32>(), value.GetTimezoneId());
  177. break;
  178. case NUdf::EDataSlot::TzDatetime64:
  179. EncodeTzSigned<i64, Desc>(output, value.Get<i64>(), value.GetTimezoneId());
  180. break;
  181. case NUdf::EDataSlot::TzTimestamp64:
  182. EncodeTzSigned<i64, Desc>(output, value.Get<i64>(), value.GetTimezoneId());
  183. break;
  184. default:
  185. MKQL_ENSURE(false, TStringBuilder() << "unknown data slot for presort encoding: " << slot);
  186. }
  187. }
  188. template <bool Desc>
  189. Y_FORCE_INLINE
  190. NUdf::TUnboxedValue Decode(TStringBuf& input, NUdf::EDataSlot slot, TVector<ui8>& buffer)
  191. {
  192. switch (slot) {
  193. case NUdf::EDataSlot::Bool:
  194. return NUdf::TUnboxedValuePod(DecodeBool<Desc>(input));
  195. case NUdf::EDataSlot::Int8:
  196. return NUdf::TUnboxedValuePod(DecodeSigned<i8, Desc>(input));
  197. case NUdf::EDataSlot::Uint8:
  198. return NUdf::TUnboxedValuePod(DecodeUnsigned<ui8, Desc>(input));
  199. case NUdf::EDataSlot::Int16:
  200. return NUdf::TUnboxedValuePod(DecodeSigned<i16, Desc>(input));
  201. case NUdf::EDataSlot::Uint16:
  202. case NUdf::EDataSlot::Date:
  203. return NUdf::TUnboxedValuePod(DecodeUnsigned<ui16, Desc>(input));
  204. case NUdf::EDataSlot::Int32:
  205. case NUdf::EDataSlot::Date32:
  206. return NUdf::TUnboxedValuePod(DecodeSigned<i32, Desc>(input));
  207. case NUdf::EDataSlot::Uint32:
  208. case NUdf::EDataSlot::Datetime:
  209. return NUdf::TUnboxedValuePod(DecodeUnsigned<ui32, Desc>(input));
  210. case NUdf::EDataSlot::Int64:
  211. case NUdf::EDataSlot::Interval:
  212. case NUdf::EDataSlot::Interval64:
  213. case NUdf::EDataSlot::Datetime64:
  214. case NUdf::EDataSlot::Timestamp64:
  215. return NUdf::TUnboxedValuePod(DecodeSigned<i64, Desc>(input));
  216. case NUdf::EDataSlot::Uint64:
  217. case NUdf::EDataSlot::Timestamp:
  218. return NUdf::TUnboxedValuePod(DecodeUnsigned<ui64, Desc>(input));
  219. case NUdf::EDataSlot::Double:
  220. return NUdf::TUnboxedValuePod(DecodeFloating<double, Desc>(input));
  221. case NUdf::EDataSlot::Float:
  222. return NUdf::TUnboxedValuePod(DecodeFloating<float, Desc>(input));
  223. case NUdf::EDataSlot::DyNumber:
  224. case NUdf::EDataSlot::String:
  225. case NUdf::EDataSlot::Utf8:
  226. buffer.clear();
  227. return MakeString(NUdf::TStringRef(DecodeString<Desc>(input, buffer)));
  228. case NUdf::EDataSlot::Uuid:
  229. buffer.clear();
  230. return MakeString(NUdf::TStringRef(DecodeUuid<Desc>(input, buffer)));
  231. case NUdf::EDataSlot::TzDate: {
  232. ui16 date;
  233. ui16 tzId;
  234. DecodeTzUnsigned<ui16, Desc>(input, date, tzId);
  235. NUdf::TUnboxedValuePod value(date);
  236. value.SetTimezoneId(tzId);
  237. return value;
  238. }
  239. case NUdf::EDataSlot::TzDatetime: {
  240. ui32 datetime;
  241. ui16 tzId;
  242. DecodeTzUnsigned<ui32, Desc>(input, datetime, tzId);
  243. NUdf::TUnboxedValuePod value(datetime);
  244. value.SetTimezoneId(tzId);
  245. return value;
  246. }
  247. case NUdf::EDataSlot::TzTimestamp: {
  248. ui64 timestamp;
  249. ui16 tzId;
  250. DecodeTzUnsigned<ui64, Desc>(input, timestamp, tzId);
  251. NUdf::TUnboxedValuePod value(timestamp);
  252. value.SetTimezoneId(tzId);
  253. return value;
  254. }
  255. case NUdf::EDataSlot::Decimal:
  256. return NUdf::TUnboxedValuePod(DecodeDecimal<Desc>(input));
  257. case NUdf::EDataSlot::TzDate32: {
  258. i32 date;
  259. ui16 tzId;
  260. DecodeTzSigned<i32, Desc>(input, date, tzId);
  261. NUdf::TUnboxedValuePod value(date);
  262. value.SetTimezoneId(tzId);
  263. return value;
  264. }
  265. case NUdf::EDataSlot::TzDatetime64: {
  266. i64 datetime;
  267. ui16 tzId;
  268. DecodeTzSigned<i64, Desc>(input, datetime, tzId);
  269. NUdf::TUnboxedValuePod value(datetime);
  270. value.SetTimezoneId(tzId);
  271. return value;
  272. }
  273. case NUdf::EDataSlot::TzTimestamp64: {
  274. i64 timestamp;
  275. ui16 tzId;
  276. DecodeTzSigned<i64, Desc>(input, timestamp, tzId);
  277. NUdf::TUnboxedValuePod value(timestamp);
  278. value.SetTimezoneId(tzId);
  279. return value;
  280. }
  281. default:
  282. MKQL_ENSURE(false, TStringBuilder() << "unknown data slot for presort decoding: " << slot);
  283. }
  284. }
  285. struct TDictItem {
  286. TString KeyBuffer;
  287. NUdf::TUnboxedValue Payload;
  288. TDictItem(const TString& keyBuffer, const NUdf::TUnboxedValue& payload)
  289. : KeyBuffer(keyBuffer)
  290. , Payload(payload)
  291. {}
  292. bool operator<(const TDictItem& other) const {
  293. return KeyBuffer < other.KeyBuffer;
  294. }
  295. };
  296. void EncodeValue(TType* type, const NUdf::TUnboxedValue& value, TVector<ui8>& output) {
  297. switch (type->GetKind()) {
  298. case TType::EKind::Void:
  299. case TType::EKind::Null:
  300. case TType::EKind::EmptyList:
  301. case TType::EKind::EmptyDict:
  302. break;
  303. case TType::EKind::Data: {
  304. auto slot = *static_cast<TDataType*>(type)->GetDataSlot();
  305. Encode<false>(output, slot, value);
  306. break;
  307. }
  308. case TType::EKind::Optional: {
  309. auto itemType = static_cast<TOptionalType*>(type)->GetItemType();
  310. auto hasValue = (bool)value;
  311. EncodeBool<false>(output, hasValue);
  312. if (hasValue) {
  313. EncodeValue(itemType, value.GetOptionalValue(), output);
  314. }
  315. break;
  316. }
  317. case TType::EKind::List: {
  318. auto itemType = static_cast<TListType*>(type)->GetItemType();
  319. auto iterator = value.GetListIterator();
  320. NUdf::TUnboxedValue item;
  321. while (iterator.Next(item)) {
  322. EncodeBool<false>(output, true);
  323. EncodeValue(itemType, item, output);
  324. }
  325. EncodeBool<false>(output, false);
  326. break;
  327. }
  328. case TType::EKind::Tuple: {
  329. auto tupleType = static_cast<TTupleType*>(type);
  330. for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) {
  331. EncodeValue(tupleType->GetElementType(i), value.GetElement(i), output);
  332. }
  333. break;
  334. }
  335. case TType::EKind::Struct: {
  336. auto structType = static_cast<TStructType*>(type);
  337. for (ui32 i = 0; i < structType->GetMembersCount(); ++i) {
  338. EncodeValue(structType->GetMemberType(i), value.GetElement(i), output);
  339. }
  340. break;
  341. }
  342. case TType::EKind::Variant: {
  343. auto underlyingType = static_cast<TVariantType*>(type)->GetUnderlyingType();
  344. auto alt = value.GetVariantIndex();
  345. TType* altType;
  346. ui32 altCount;
  347. if (underlyingType->IsStruct()) {
  348. auto structType = static_cast<TStructType*>(underlyingType);
  349. altType = structType->GetMemberType(alt);
  350. altCount = structType->GetMembersCount();
  351. } else {
  352. auto tupleType = static_cast<TTupleType*>(underlyingType);
  353. altType = tupleType->GetElementType(alt);
  354. altCount = tupleType->GetElementsCount();
  355. }
  356. if (altCount < 256) {
  357. EncodeUnsigned<ui8, false>(output, alt);
  358. } else if (altCount < 256 * 256) {
  359. EncodeUnsigned<ui16, false>(output, alt);
  360. } else {
  361. EncodeUnsigned<ui32, false>(output, alt);
  362. }
  363. EncodeValue(altType, value.GetVariantItem(), output);
  364. break;
  365. }
  366. case TType::EKind::Dict: {
  367. auto dictType = static_cast<TDictType*>(type);
  368. auto iter = value.GetDictIterator();
  369. if (value.IsSortedDict()) {
  370. NUdf::TUnboxedValue key, payload;
  371. while (iter.NextPair(key, payload)) {
  372. EncodeBool<false>(output, true);
  373. EncodeValue(dictType->GetKeyType(), key, output);
  374. EncodeValue(dictType->GetPayloadType(), payload, output);
  375. }
  376. } else {
  377. // canonize keys
  378. TVector<TDictItem> items;
  379. items.reserve(value.GetDictLength());
  380. NUdf::TUnboxedValue key, payload;
  381. TVector<ui8> buffer;
  382. while (iter.NextPair(key, payload)) {
  383. buffer.clear();
  384. EncodeValue(dictType->GetKeyType(), key, buffer);
  385. TString keyBuffer((const char*)buffer.begin(), buffer.size());
  386. items.emplace_back(keyBuffer, payload);
  387. }
  388. Sort(items.begin(), items.end());
  389. // output values
  390. for (const auto& x : items) {
  391. EncodeBool<false>(output, true);
  392. output.insert(output.end(), x.KeyBuffer.begin(), x.KeyBuffer.end());
  393. EncodeValue(dictType->GetPayloadType(), x.Payload, output);
  394. }
  395. }
  396. EncodeBool<false>(output, false);
  397. break;
  398. }
  399. case TType::EKind::Pg: {
  400. auto pgType = static_cast<TPgType*>(type);
  401. auto hasValue = (bool)value;
  402. EncodeBool<false>(output, hasValue);
  403. if (hasValue) {
  404. EncodePresortPGValue(pgType, value, output);
  405. }
  406. break;
  407. }
  408. case TType::EKind::Tagged: {
  409. auto baseType = static_cast<TTaggedType*>(type)->GetBaseType();
  410. EncodeValue(baseType, value, output);
  411. break;
  412. }
  413. default:
  414. MKQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr());
  415. }
  416. }
  417. NUdf::TUnboxedValue DecodeImpl(TType* type, TStringBuf& input, const THolderFactory& factory, TVector<ui8>& buffer) {
  418. switch (type->GetKind()) {
  419. case TType::EKind::Void:
  420. return NUdf::TUnboxedValue::Void();
  421. case TType::EKind::Null:
  422. return NUdf::TUnboxedValue();
  423. case TType::EKind::EmptyList:
  424. return factory.GetEmptyContainerLazy();
  425. case TType::EKind::EmptyDict:
  426. return factory.GetEmptyContainerLazy();
  427. case TType::EKind::Data: {
  428. auto slot = *static_cast<TDataType*>(type)->GetDataSlot();
  429. return Decode<false>(input, slot, buffer);
  430. }
  431. case TType::EKind::Pg: {
  432. auto pgType = static_cast<TPgType*>(type);
  433. auto hasValue = DecodeBool<false>(input);
  434. if (!hasValue) {
  435. return NUdf::TUnboxedValue();
  436. }
  437. return DecodePresortPGValue(pgType, input, buffer);
  438. }
  439. case TType::EKind::Optional: {
  440. auto itemType = static_cast<TOptionalType*>(type)->GetItemType();
  441. auto hasValue = DecodeBool<false>(input);
  442. if (!hasValue) {
  443. return NUdf::TUnboxedValue();
  444. }
  445. auto value = DecodeImpl(itemType, input, factory, buffer);
  446. return value.Release().MakeOptional();
  447. }
  448. case TType::EKind::List: {
  449. auto itemType = static_cast<TListType*>(type)->GetItemType();
  450. TUnboxedValueVector values;
  451. while (DecodeBool<false>(input)) {
  452. auto value = DecodeImpl(itemType, input, factory, buffer);
  453. values.emplace_back(value);
  454. }
  455. return factory.VectorAsArray(values);
  456. }
  457. case TType::EKind::Tuple: {
  458. auto tupleType = static_cast<TTupleType*>(type);
  459. NUdf::TUnboxedValue* items;
  460. auto array = factory.CreateDirectArrayHolder(tupleType->GetElementsCount(), items);
  461. for (ui32 i = 0; i < tupleType->GetElementsCount(); ++i) {
  462. items[i] = DecodeImpl(tupleType->GetElementType(i), input, factory, buffer);
  463. }
  464. return array;
  465. }
  466. case TType::EKind::Variant: {
  467. auto underlyingType = static_cast<TVariantType*>(type)->GetUnderlyingType();
  468. ui32 altCount;
  469. MKQL_ENSURE(underlyingType->IsTuple(), "Expcted variant over tuple");
  470. auto tupleType = static_cast<TTupleType*>(underlyingType);
  471. altCount = tupleType->GetElementsCount();
  472. ui32 alt;
  473. if (altCount < 256) {
  474. alt = DecodeUnsigned<ui8, false>(input);
  475. } else if (altCount < 256 * 256) {
  476. alt = DecodeUnsigned<ui16, false>(input);
  477. } else {
  478. alt = DecodeUnsigned<ui32, false>(input);
  479. }
  480. TType* altType = tupleType->GetElementType(alt);
  481. auto value = DecodeImpl(altType, input, factory, buffer);
  482. return factory.CreateVariantHolder(value.Release(), alt);
  483. }
  484. case TType::EKind::Tagged: {
  485. auto baseType = static_cast<TTaggedType*>(type)->GetBaseType();
  486. return DecodeImpl(baseType, input, factory, buffer);
  487. }
  488. // Struct and Dict may be encoded into a presort form only to canonize dict keys. No need to decode them.
  489. case TType::EKind::Struct:
  490. case TType::EKind::Dict:
  491. default:
  492. MKQL_ENSURE(false, "Unsupported type: " << type->GetKindAsStr());
  493. }
  494. }
  495. } // NDetail
  496. void TPresortCodec::AddType(NUdf::EDataSlot slot, bool isOptional, bool isDesc) {
  497. Types.push_back({slot, isOptional, isDesc});
  498. }
  499. void TPresortEncoder::Start() {
  500. Output.clear();
  501. Current = 0;
  502. }
  503. void TPresortEncoder::Start(TStringBuf prefix) {
  504. Output.clear();
  505. auto data = reinterpret_cast<const ui8*>(prefix.data());
  506. Output.insert(Output.begin(), data, data + prefix.size());
  507. Current = 0;
  508. }
  509. void TPresortEncoder::Encode(const NUdf::TUnboxedValuePod& value) {
  510. auto& type = Types[Current++];
  511. if (type.IsDesc) {
  512. if (type.IsOptional) {
  513. auto hasValue = (bool)value;
  514. NDetail::EncodeBool<true>(Output, hasValue);
  515. if (!hasValue) {
  516. return;
  517. }
  518. }
  519. NDetail::Encode<true>(Output, type.Slot, value);
  520. } else {
  521. if (type.IsOptional) {
  522. auto hasValue = (bool)value;
  523. NDetail::EncodeBool<false>(Output, hasValue);
  524. if (!hasValue) {
  525. return;
  526. }
  527. }
  528. NDetail::Encode<false>(Output, type.Slot, value);
  529. }
  530. }
  531. TStringBuf TPresortEncoder::Finish() {
  532. MKQL_ENSURE(Current == Types.size(), "not all fields were encoded");
  533. return TStringBuf((const char*)Output.data(), Output.size());
  534. }
  535. void TPresortDecoder::Start(TStringBuf input) {
  536. Input = input;
  537. Current = 0;
  538. }
  539. NUdf::TUnboxedValue TPresortDecoder::Decode() {
  540. auto& type = Types[Current++];
  541. if (type.IsDesc) {
  542. if (type.IsOptional && !NDetail::DecodeBool<true>(Input)) {
  543. return NUdf::TUnboxedValuePod();
  544. }
  545. return NDetail::Decode<true>(Input, type.Slot, Buffer);
  546. } else {
  547. if (type.IsOptional && !NDetail::DecodeBool<false>(Input)) {
  548. return NUdf::TUnboxedValuePod();
  549. }
  550. return NDetail::Decode<false>(Input, type.Slot, Buffer);
  551. }
  552. }
  553. void TPresortDecoder::Finish() {
  554. MKQL_ENSURE(Current == Types.size(), "not all fields were decoded");
  555. MKQL_ENSURE(Input.empty(), "buffer is not empty");
  556. }
  557. TGenericPresortEncoder::TGenericPresortEncoder(TType* type)
  558. : Type(type)
  559. {}
  560. TStringBuf TGenericPresortEncoder::Encode(const NUdf::TUnboxedValue& value, bool desc) {
  561. Output.clear();
  562. NDetail::EncodeValue(Type, value, Output);
  563. if (desc) {
  564. for (auto& x : Output) {
  565. x = ~x;
  566. }
  567. }
  568. return TStringBuf((const char*)Output.data(), Output.size());
  569. }
  570. NUdf::TUnboxedValue TGenericPresortEncoder::Decode(TStringBuf buf, bool desc, const THolderFactory& factory) {
  571. if (desc) {
  572. Output.assign(buf.begin(), buf.end());
  573. for (auto& x : Output) {
  574. x = ~x;
  575. }
  576. auto newBuf = TStringBuf(reinterpret_cast<const char*>(Output.data()), Output.size());
  577. auto ret = NDetail::DecodeImpl(Type, newBuf, factory, Buffer);
  578. Output.clear();
  579. MKQL_ENSURE(newBuf.empty(), "buffer must be empty");
  580. return ret;
  581. } else {
  582. auto ret = NDetail::DecodeImpl(Type, buf, factory, Buffer);
  583. MKQL_ENSURE(buf.empty(), "buffer is not empty");
  584. return ret;
  585. }
  586. }
  587. } // NMiniKQL
  588. } // NKikimr