mkql_computation_node_holders.h 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127
  1. #pragma once
  2. #include <yql/essentials/utils/hash.h>
  3. #include "mkql_computation_node_impl.h"
  4. #include "mkql_computation_node_list.h"
  5. #include <yql/essentials/minikql/aligned_page_pool.h>
  6. #include <yql/essentials/minikql/compact_hash.h>
  7. #include <yql/essentials/minikql/mkql_type_ops.h>
  8. #include <yql/essentials/minikql/mkql_type_builder.h>
  9. #include <contrib/libs/apache/arrow/cpp/src/arrow/datum.h>
  10. #include <util/generic/maybe.h>
  11. #include <util/memory/pool.h>
  12. #include <functional>
  13. #include <unordered_map>
  14. #include <unordered_set>
  15. #include <optional>
  16. #include <vector>
  17. namespace NKikimr {
  18. namespace NMiniKQL {
  19. class TMemoryUsageInfo;
  20. const ui32 CodegenArraysFallbackLimit = 1000u;
  21. template <typename Type, EMemorySubPool MemoryPool = EMemorySubPool::Default>
  22. using TMKQLVector = std::vector<Type, TMKQLAllocator<Type, MemoryPool>>;
  23. template<typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, EMemorySubPool MemoryPool = EMemorySubPool::Default>
  24. using TMKQLHashMap = std::unordered_map<Key, T, Hash, KeyEqual, TMKQLAllocator<std::pair<const Key, T>, MemoryPool>>;
  25. using TKeyTypes = std::vector<std::pair<NUdf::EDataSlot, bool>>;
  26. using TUnboxedValueVector = std::vector<NUdf::TUnboxedValue, TMKQLAllocator<NUdf::TUnboxedValue>>;
  27. using TTemporaryUnboxedValueVector = std::vector<NUdf::TUnboxedValue, TMKQLAllocator<NUdf::TUnboxedValue, EMemorySubPool::Temporary>>;
  28. using TUnboxedValueDeque = std::deque<NUdf::TUnboxedValue, TMKQLAllocator<NUdf::TUnboxedValue>>;
  29. using TKeyPayloadPair = std::pair<NUdf::TUnboxedValue, NUdf::TUnboxedValue>;
  30. using TKeyPayloadPairVector = std::vector<TKeyPayloadPair, TMKQLAllocator<TKeyPayloadPair>>;
  31. class TUnboxedValueBatch {
  32. // TUnboxedValueBatch represents column values for RowCount rows
  33. // If wide encoding is used and each row contains Width columns, Values consists of Width * RowCount items:
  34. // first Width elements correspond to first row,
  35. // second Width elements - to second row, etc
  36. // For narrow encoding, each row is represented as a single item (a struct) - so Width is equal to 1
  37. private:
  38. using TBottomType = TUnboxedValueVector;
  39. using TTopType = std::deque<TBottomType>;
  40. public:
  41. using value_type = NUdf::TUnboxedValue;
  42. explicit TUnboxedValueBatch(const TType* rowType = nullptr)
  43. : Width_((rowType && rowType->IsMulti()) ? static_cast<const TMultiType*>(rowType)->GetElementsCount() : 1u)
  44. , IsWide_(rowType && rowType->IsMulti())
  45. , PageSize_(GetPageSize(Width_))
  46. {
  47. }
  48. TUnboxedValueBatch(const TUnboxedValueBatch& other) = default;
  49. TUnboxedValueBatch& operator=(const TUnboxedValueBatch& other) = default;
  50. TUnboxedValueBatch(TUnboxedValueBatch&& other)
  51. : Width_(other.Width_)
  52. , IsWide_(other.IsWide_)
  53. , PageSize_(other.PageSize_)
  54. , Values_(std::move(other.Values_))
  55. , RowOffset_(other.RowOffset_)
  56. , RowCount_(other.RowCount_)
  57. {
  58. other.clear();
  59. }
  60. inline void clear() {
  61. Values_.clear();
  62. RowOffset_ = RowCount_ = 0;
  63. }
  64. inline bool empty() const {
  65. return RowCount_ == 0;
  66. }
  67. inline void swap(TUnboxedValueBatch& other) {
  68. std::swap(Width_, other.Width_);
  69. std::swap(PageSize_, other.PageSize_);
  70. std::swap(Values_, other.Values_);
  71. std::swap(RowOffset_, other.RowOffset_);
  72. std::swap(RowCount_, other.RowCount_);
  73. }
  74. template<typename... TArgs>
  75. void emplace_back(TArgs&&... args) {
  76. MKQL_ENSURE(!IsWide(), "emplace_back() should not be used for wide batch");
  77. if (Values_.empty() || Values_.back().size() == Values_.back().capacity()) {
  78. Values_.emplace_back();
  79. Values_.back().reserve(PageSize_);
  80. }
  81. Values_.back().emplace_back(std::forward<TArgs>(args)...);
  82. RowCount_++;
  83. }
  84. inline void push_back(const value_type& row) {
  85. emplace_back(row);
  86. }
  87. inline void push_back(value_type&& row) {
  88. emplace_back(std::move(row));
  89. }
  90. template<typename TFunc>
  91. auto ForEachRow(const TFunc& cb) const {
  92. MKQL_ENSURE(!IsWide(), "ForEachRowWide() should be used instead");
  93. return DoForEachRow<const NUdf::TUnboxedValue, const TUnboxedValueBatch>(this,
  94. [&cb](const NUdf::TUnboxedValue* values, ui32 width) {
  95. Y_DEBUG_ABORT_UNLESS(width == 1);
  96. Y_DEBUG_ABORT_UNLESS(values);
  97. return cb(*values);
  98. });
  99. }
  100. template<typename TFunc>
  101. auto ForEachRow(const TFunc& cb) {
  102. MKQL_ENSURE(!IsWide(), "ForEachRowWide() should be used instead");
  103. return DoForEachRow<NUdf::TUnboxedValue, TUnboxedValueBatch>(this,
  104. [&cb](NUdf::TUnboxedValue* values, ui32 width) {
  105. Y_DEBUG_ABORT_UNLESS(width == 1);
  106. Y_DEBUG_ABORT_UNLESS(values);
  107. return cb(*values);
  108. });
  109. }
  110. template<typename TFunc>
  111. auto ForEachRowWide(const TFunc& cb) const {
  112. MKQL_ENSURE(IsWide(), "ForEachRow() should be used instead");
  113. return DoForEachRow<const NUdf::TUnboxedValue, const TUnboxedValueBatch>(this, cb);
  114. }
  115. template<typename TFunc>
  116. auto ForEachRowWide(const TFunc& cb) {
  117. MKQL_ENSURE(IsWide(), "ForEachRow() should be used instead");
  118. return DoForEachRow<NUdf::TUnboxedValue, TUnboxedValueBatch>(this, cb);
  119. }
  120. inline TMaybe<ui32> Width() const {
  121. return IsWide_ ? Width_ : TMaybe<ui32>{};
  122. }
  123. inline bool IsWide() const {
  124. return IsWide_;
  125. }
  126. inline ui64 RowCount() const {
  127. return RowCount_;
  128. }
  129. const value_type* Head() const {
  130. MKQL_ENSURE(RowCount_, "Head() on empty batch");
  131. return Width_ ? &Values_.front()[RowOffset_ * Width_] : nullptr;
  132. }
  133. value_type* Head() {
  134. MKQL_ENSURE(RowCount_, "Head() on empty batch");
  135. return Width_ ? &Values_.front()[RowOffset_ * Width_] : nullptr;
  136. }
  137. inline void Pop(size_t rowCount = 1) {
  138. MKQL_ENSURE(rowCount <= RowCount_, "Invalid arg");
  139. ui64 newStartOffset = (RowOffset_ + rowCount) * Width_;
  140. while (newStartOffset >= PageSize_) {
  141. MKQL_ENSURE_S(!Values_.empty());
  142. Values_.pop_front();
  143. newStartOffset -= PageSize_;
  144. }
  145. RowOffset_ = Width_ ? newStartOffset / Width_ : 0;
  146. RowCount_ -= rowCount;
  147. }
  148. template<typename TFunc>
  149. void PushRow(const TFunc& producer) {
  150. ReserveNextRow();
  151. for (ui32 i = 0; i < Width_; ++i) {
  152. Values_.back().emplace_back(producer(i));
  153. }
  154. ++RowCount_;
  155. }
  156. void PushRow(NUdf::TUnboxedValue* values, ui32 width) {
  157. Y_DEBUG_ABORT_UNLESS(width == Width_);
  158. ReserveNextRow();
  159. for (ui32 i = 0; i < Width_; ++i) {
  160. Values_.back().emplace_back(std::move(values[i]));
  161. }
  162. ++RowCount_;
  163. }
  164. private:
  165. static const size_t DesiredPageSize = 1024;
  166. static inline size_t GetPageSize(size_t width) {
  167. if (!width) {
  168. return DesiredPageSize;
  169. }
  170. size_t pageSize = DesiredPageSize + width - 1;
  171. return pageSize - pageSize % width;
  172. }
  173. inline void ReserveNextRow() {
  174. bool full = Width_ && (Values_.empty() || Values_.back().size() == PageSize_);
  175. if (full) {
  176. Values_.emplace_back();
  177. Values_.back().reserve(PageSize_);
  178. }
  179. }
  180. template<typename TValue, typename TParent, typename TFunc>
  181. static auto DoForEachRow(TParent* parent, const TFunc& cb) {
  182. using TReturn = typename std::result_of<TFunc(TValue*, ui32)>::type;
  183. auto currTop = parent->Values_.begin();
  184. Y_DEBUG_ABORT_UNLESS(parent->PageSize_ > parent->RowOffset_);
  185. Y_DEBUG_ABORT_UNLESS(parent->Width_ == 0 || (parent->PageSize_ - parent->RowOffset_) % parent->Width_ == 0);
  186. size_t valuesOnPage = parent->PageSize_ - parent->RowOffset_;
  187. TValue* values = (parent->Width_ && parent->RowCount_) ? currTop->data() + parent->RowOffset_ : nullptr;
  188. for (size_t i = 0; i < parent->RowCount_; ++i) {
  189. if constexpr (std::is_same_v<TReturn, bool>) {
  190. if (!cb(values, parent->Width_)) {
  191. return false;
  192. }
  193. } else {
  194. static_assert(std::is_same_v<TReturn, void>, "Callback should either return bool or void");
  195. cb(values, parent->Width_);
  196. }
  197. values += parent->Width_;
  198. valuesOnPage -= parent->Width_;
  199. if (!valuesOnPage) {
  200. valuesOnPage = parent->PageSize_;
  201. ++currTop;
  202. values = currTop->data();
  203. }
  204. }
  205. if constexpr (std::is_same_v<TReturn, bool>) {
  206. return true;
  207. }
  208. }
  209. ui32 Width_;
  210. bool IsWide_;
  211. size_t PageSize_;
  212. TTopType Values_;
  213. ui64 RowOffset_ = 0;
  214. ui64 RowCount_ = 0;
  215. };
  216. inline int CompareValues(NUdf::EDataSlot type, bool asc, bool isOptional, const NUdf::TUnboxedValuePod& lhs, const NUdf::TUnboxedValuePod& rhs) {
  217. int cmp;
  218. if (isOptional) {
  219. if (!lhs && !rhs) {
  220. cmp = 0;
  221. }
  222. else if (!lhs) {
  223. cmp = -1;
  224. }
  225. else if (!rhs) {
  226. cmp = 1;
  227. }
  228. else {
  229. cmp = NUdf::CompareValues(type, lhs, rhs);
  230. }
  231. }
  232. else {
  233. cmp = NUdf::CompareValues(type, lhs, rhs);
  234. }
  235. if (!asc) {
  236. cmp = -cmp;
  237. }
  238. return cmp;
  239. }
  240. inline int CompareValues(const NUdf::TUnboxedValuePod* left, const NUdf::TUnboxedValuePod* right, const TKeyTypes& types, const bool* directions) {
  241. for (ui32 i = 0; i < types.size(); ++i) {
  242. if (const auto cmp = CompareValues(types[i].first, directions[i], types[i].second, left[i], right[i])) {
  243. return cmp;
  244. }
  245. }
  246. return 0;
  247. }
  248. inline int CompareKeys(const NUdf::TUnboxedValuePod& left, const NUdf::TUnboxedValuePod& right, const TKeyTypes& types, bool isTuple) {
  249. if (isTuple) {
  250. if (left && right)
  251. for (ui32 i = 0; i < types.size(); ++i) {
  252. if (const auto cmp = CompareValues(types[i].first, true, types[i].second, left.GetElement(i), right.GetElement(i))) {
  253. return cmp;
  254. }
  255. }
  256. else if (!left && right)
  257. return -1;
  258. else if (left && !right)
  259. return 1;
  260. return 0;
  261. }
  262. else {
  263. return CompareValues(types.front().first, true, types.front().second, left, right);
  264. }
  265. }
  266. struct TKeyPayloadPairLess {
  267. TKeyPayloadPairLess(const TKeyTypes& types, bool isTuple, const NUdf::ICompare* compare)
  268. : Types(&types)
  269. , IsTuple(isTuple)
  270. , Compare(compare)
  271. {}
  272. bool operator()(const TKeyPayloadPair& left, const TKeyPayloadPair& right) const {
  273. if (Compare) {
  274. return Compare->Less(left.first, right.first);
  275. }
  276. return CompareKeys(left.first, right.first, *Types, IsTuple) < 0;
  277. }
  278. const TKeyTypes* Types;
  279. bool IsTuple;
  280. const NUdf::ICompare* Compare;
  281. };
  282. struct TKeyPayloadPairEqual {
  283. TKeyPayloadPairEqual(const TKeyTypes& types, bool isTuple, const NUdf::IEquate* equate)
  284. : Types(&types)
  285. , IsTuple(isTuple)
  286. , Equate(equate)
  287. {}
  288. bool operator()(const TKeyPayloadPair& left, const TKeyPayloadPair& right) const {
  289. if (Equate) {
  290. return Equate->Equals(left.first, right.first);
  291. }
  292. return CompareKeys(left.first, right.first, *Types, IsTuple) == 0;
  293. }
  294. const TKeyTypes* Types;
  295. bool IsTuple;
  296. const NUdf::IEquate* Equate;
  297. };
  298. struct TValueEqual {
  299. TValueEqual(const TKeyTypes& types, bool isTuple, const NUdf::IEquate* equate)
  300. : Types(&types)
  301. , IsTuple(isTuple)
  302. , Equate(equate)
  303. {}
  304. bool operator()(const NUdf::TUnboxedValue& left, const NUdf::TUnboxedValue& right) const {
  305. if (Equate) {
  306. return Equate->Equals(left, right);
  307. }
  308. return CompareKeys(left, right, *Types, IsTuple) == 0;
  309. }
  310. const TKeyTypes* Types;
  311. bool IsTuple;
  312. const NUdf::IEquate* Equate;
  313. };
  314. struct TValueLess {
  315. TValueLess(const TKeyTypes& types, bool isTuple, const NUdf::ICompare* compare)
  316. : Types(&types)
  317. , IsTuple(isTuple)
  318. , Compare(compare)
  319. {}
  320. bool operator()(const NUdf::TUnboxedValue& left, const NUdf::TUnboxedValue& right) const {
  321. if (Compare) {
  322. return Compare->Less(left, right);
  323. }
  324. return CompareKeys(left, right, *Types, IsTuple) < 0;
  325. }
  326. const TKeyTypes* Types;
  327. bool IsTuple;
  328. const NUdf::ICompare* Compare;
  329. };
  330. constexpr NUdf::THashType HashOfNull = ~0ULL;
  331. struct TValueHasher {
  332. TValueHasher(const TKeyTypes& types, bool isTuple, const NUdf::IHash* hash)
  333. : Types(&types)
  334. , IsTuple(isTuple)
  335. , Hash(hash)
  336. {}
  337. NUdf::THashType operator()(const NUdf::TUnboxedValuePod& value) const {
  338. if (Hash) {
  339. return Hash->Hash(value);
  340. }
  341. if (!value)
  342. return HashOfNull;
  343. if (IsTuple) {
  344. NUdf::THashType hash = 0ULL;
  345. if (auto elements = value.GetElements())
  346. for (const auto& type : (*Types)) {
  347. if (const auto v = *elements++)
  348. hash = CombineHashes(hash, NUdf::GetValueHash(type.first, v));
  349. else
  350. hash = CombineHashes(hash, HashOfNull);
  351. }
  352. else
  353. for (auto i = 0U; i < Types->size(); ++i) {
  354. if (const auto v = value.GetElement(i))
  355. hash = CombineHashes(hash, NUdf::GetValueHash((*Types)[i].first, v));
  356. else
  357. hash = CombineHashes(hash, HashOfNull);
  358. }
  359. return hash;
  360. }
  361. return NUdf::GetValueHash((*Types).front().first, value);
  362. }
  363. const TKeyTypes* Types;
  364. bool IsTuple;
  365. const NUdf::IHash* Hash;
  366. };
  367. template<typename T>
  368. struct TFloatHash : private std::hash<T> {
  369. std::size_t operator()(T value) const {
  370. return std::isnan(value) ? ~0ULL : std::hash<T>::operator()(value);
  371. }
  372. };
  373. template<typename T>
  374. struct TFloatEquals {
  375. bool operator()(T l, T r) const {
  376. return std::isunordered(l, r) ? std::isnan(l) == std::isnan(r) : l == r;
  377. }
  378. };
  379. template <typename T>
  380. using TMyHash = std::conditional_t<std::is_floating_point<T>::value, TFloatHash<T>, std::hash<T>>;
  381. template <typename T>
  382. using TMyEquals = std::conditional_t<std::is_floating_point<T>::value, TFloatEquals<T>, std::equal_to<T>>;
  383. constexpr float COMPACT_HASH_MAX_LOAD_FACTOR = 1.2f;
  384. using TValuesDictHashMap = std::unordered_map<
  385. NUdf::TUnboxedValue, NUdf::TUnboxedValue,
  386. NYql::TVaryingHash<NUdf::TUnboxedValue, TValueHasher>, TValueEqual,
  387. TMKQLAllocator<std::pair<const NUdf::TUnboxedValue, NUdf::TUnboxedValue>>>;
  388. using TValuesDictHashSet = std::unordered_set<
  389. NUdf::TUnboxedValue, NYql::TVaryingHash<NUdf::TUnboxedValue, TValueHasher>, TValueEqual,
  390. TMKQLAllocator<NUdf::TUnboxedValue>>;
  391. template <typename T>
  392. using TValuesDictHashSingleFixedSet = std::unordered_set<T, NYql::TVaryingHash<T, TMyHash<T>>, TMyEquals<T>, TMKQLAllocator<T>>;
  393. template <typename T>
  394. using TValuesDictHashSingleFixedCompactSet = NCHash::TCompactHashSet<T, TMyHash<T>, TMyEquals<T>>;
  395. /*
  396. * All *Small* functions expect the 'value' content to be formed by the TValuePacker class,
  397. * which embeds the encoded data length at the beginning of the buffer
  398. */
  399. inline bool IsSmallValueEmbedded(ui64 value) {
  400. return (value & 1) != 0;
  401. }
  402. inline TStringBuf GetSmallValue(const ui64& value) {
  403. if (!IsSmallValueEmbedded(value)) {
  404. // pointer
  405. const char* ptr = (const char*)value;
  406. ui32 length = *(const ui32*)ptr;
  407. return TStringBuf(ptr, length + 4);
  408. } else {
  409. // embedded
  410. ui32 length = (value & 0x0f) >> 1;
  411. return TStringBuf(((const char*)&value), length + 1);
  412. }
  413. }
  414. inline ui64 AddSmallValue(TPagedArena& pool, const TStringBuf& value) {
  415. if (value.size() <= 8) {
  416. ui64 ret = 0;
  417. memcpy((ui8*)&ret, value.data(), value.size());
  418. Y_DEBUG_ABORT_UNLESS(IsSmallValueEmbedded(ret));
  419. return ret;
  420. }
  421. else {
  422. auto ptr = pool.Alloc(value.size());
  423. memcpy((ui8*)ptr, value.data(), value.size());
  424. return (ui64)ptr;
  425. }
  426. }
  427. inline ui64 AsSmallValue(const TStringBuf& value) {
  428. if (value.size() <= 8) {
  429. ui64 ret = 0;
  430. memcpy((ui8*)&ret, value.data(), value.size());
  431. return ret;
  432. }
  433. else {
  434. return (ui64)value.data();
  435. }
  436. }
  437. struct TSmallValueEqual {
  438. bool operator()(ui64 lhs, ui64 rhs) const {
  439. return IsSmallValueEmbedded(lhs) ? lhs == rhs : GetSmallValue(lhs) == GetSmallValue(rhs);
  440. }
  441. };
  442. struct TSmallValueHash {
  443. ui64 operator()(ui64 value) const {
  444. return THash<TStringBuf>()(GetSmallValue(value));
  445. }
  446. };
  447. using TValuesDictHashCompactSet = NCHash::TCompactHashSet<ui64, TSmallValueHash, TSmallValueEqual>;
  448. using TValuesDictHashCompactMap = NCHash::TCompactHash<ui64, ui64, TSmallValueHash, TSmallValueEqual>;
  449. using TValuesDictHashCompactMultiMap = NCHash::TCompactMultiHash<ui64, ui64, TSmallValueHash, TSmallValueEqual>;
  450. template <typename T>
  451. using TValuesDictHashSingleFixedCompactMap = NCHash::TCompactHash<T, ui64, TMyHash<T>, TMyEquals<T>>;
  452. template <typename T>
  453. using TValuesDictHashSingleFixedCompactMultiMap = NCHash::TCompactMultiHash<T, ui64, TMyHash<T>, TMyEquals<T>>;
  454. template <typename T>
  455. using TValuesDictHashSingleFixedMap = std::unordered_map<T, NUdf::TUnboxedValue, NYql::TVaryingHash<T, TMyHash<T>>, TMyEquals<T>,
  456. TMKQLAllocator<std::pair<const T, NUdf::TUnboxedValue>>>;
  457. using THashedDictFiller = std::function<void(TValuesDictHashMap&)>;
  458. using THashedSetFiller = std::function<void(TValuesDictHashSet&)>;
  459. using TSortedDictFiller = std::function<void(TKeyPayloadPairVector&)>;
  460. using TSortedSetFiller = std::function<void(TUnboxedValueVector&)>;
  461. enum class EDictSortMode {
  462. RequiresSorting,
  463. SortedUniqueAscending,
  464. SortedUniqueDescening
  465. };
  466. class TTypeHolder: public TComputationValue<TTypeHolder> {
  467. public:
  468. TTypeHolder(TMemoryUsageInfo* memInfo, TType* type)
  469. : TComputationValue(memInfo)
  470. , Type(type)
  471. {}
  472. NUdf::TStringRef GetResourceTag() const override {
  473. return NUdf::TStringRef::Of("TypeHolder");
  474. }
  475. void* GetResource() override {
  476. return Type;
  477. }
  478. private:
  479. TType* const Type;
  480. };
  481. class TArrowBlock: public TComputationValue<TArrowBlock> {
  482. public:
  483. explicit TArrowBlock(TMemoryUsageInfo* memInfo, arrow::Datum&& datum)
  484. : TComputationValue(memInfo)
  485. , Datum_(std::move(datum))
  486. {
  487. }
  488. inline static const TArrowBlock& From(const NUdf::TUnboxedValuePod& value) {
  489. return *static_cast<TArrowBlock*>(value.AsRawBoxed());
  490. }
  491. inline static const TArrowBlock& From(NUdf::TUnboxedValuePod&& value) = delete;
  492. inline const arrow::Datum& GetDatum() const {
  493. return Datum_;
  494. }
  495. NUdf::TStringRef GetResourceTag() const override {
  496. return NUdf::TStringRef::Of("ArrowBlock");
  497. }
  498. void* GetResource() override {
  499. return &Datum_;
  500. }
  501. private:
  502. arrow::Datum Datum_;
  503. };
  504. template <class IFace>
  505. class TTypeOperationsRegistry {
  506. using TValuePtr = typename IFace::TPtr;
  507. public:
  508. IFace* FindOrEmplace(const TType& type) {
  509. auto it = Registry.find(type);
  510. if (it == Registry.end()) {
  511. TTypeBase tb(type);
  512. TValuePtr ptr;
  513. if constexpr (std::is_same_v<IFace, NUdf::IHash>) {
  514. ptr = MakeHashImpl(&type);
  515. } else if constexpr (std::is_same_v<IFace, NUdf::IEquate>) {
  516. ptr = MakeEquateImpl(&type);
  517. } else if constexpr (std::is_same_v<IFace, NUdf::ICompare>) {
  518. ptr = MakeCompareImpl(&type);
  519. } else {
  520. static_assert(TDependentFalse<IFace>, "unexpected type");
  521. }
  522. auto p = std::make_pair((const TTypeBase)type, ptr);
  523. it = Registry.insert(p).first;
  524. }
  525. return it->second.Get();
  526. }
  527. private:
  528. THashMap<TTypeBase, TValuePtr, THasherTType, TEqualTType> Registry;
  529. };
  530. class TDirectArrayHolderInplace : public TComputationValue<TDirectArrayHolderInplace> {
  531. public:
  532. void* operator new(size_t sz) = delete;
  533. void* operator new[](size_t sz) = delete;
  534. void operator delete(void *mem, std::size_t sz) {
  535. const auto pSize = static_cast<void*>(static_cast<ui8*>(mem) + sizeof(TComputationValue<TDirectArrayHolderInplace>));
  536. FreeWithSize(mem, sz + *static_cast<ui64*>(pSize) * sizeof(NUdf::TUnboxedValue));
  537. }
  538. void operator delete[](void *mem, std::size_t sz) = delete;
  539. TDirectArrayHolderInplace(TMemoryUsageInfo* memInfo, ui64 size)
  540. : TComputationValue(memInfo)
  541. , Size(size)
  542. {
  543. MKQL_ENSURE(Size > 0U, "Can't create empty array holder.");
  544. MKQL_MEM_TAKE(GetMemInfo(), GetPtr(), Size * sizeof(NUdf::TUnboxedValue));
  545. std::memset(GetPtr(), 0, Size * sizeof(NUdf::TUnboxedValue));
  546. }
  547. ~TDirectArrayHolderInplace() {
  548. for (ui64 i = 0U; i < Size; ++i) {
  549. (GetPtr() + i)->~TUnboxedValue();
  550. }
  551. MKQL_MEM_RETURN(GetMemInfo(), GetPtr(), Size * sizeof(NUdf::TUnboxedValue));
  552. }
  553. ui64 GetSize() const {
  554. return Size;
  555. }
  556. NUdf::TUnboxedValue* GetPtr() const {
  557. return (NUdf::TUnboxedValue*)(this + 1);
  558. }
  559. private:
  560. class TIterator : public TTemporaryComputationValue<TIterator> {
  561. public:
  562. TIterator(const TDirectArrayHolderInplace* parent)
  563. : TTemporaryComputationValue(parent->GetMemInfo()), Parent(const_cast<TDirectArrayHolderInplace*>(parent))
  564. {}
  565. private:
  566. bool Skip() final {
  567. return ++Current < Parent->GetSize();
  568. }
  569. bool Next(NUdf::TUnboxedValue& value) final {
  570. if (!Skip())
  571. return false;
  572. value = Parent->GetPtr()[Current];
  573. return true;
  574. }
  575. bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& payload) final {
  576. if (!Next(payload))
  577. return false;
  578. key = NUdf::TUnboxedValuePod(Current);
  579. return true;
  580. }
  581. const NUdf::TRefCountedPtr<TDirectArrayHolderInplace> Parent;
  582. ui64 Current = Max<ui64>();
  583. };
  584. class TKeysIterator : public TTemporaryComputationValue<TKeysIterator> {
  585. public:
  586. TKeysIterator(const TDirectArrayHolderInplace& parent)
  587. : TTemporaryComputationValue(parent.GetMemInfo()), Size(parent.GetSize())
  588. {}
  589. private:
  590. bool Skip() final {
  591. return ++Current < Size;
  592. }
  593. bool Next(NUdf::TUnboxedValue& key) final {
  594. if (!Skip())
  595. return false;
  596. key = NUdf::TUnboxedValuePod(Current);
  597. return true;
  598. }
  599. const ui64 Size;
  600. ui64 Current = Max<ui64>();
  601. };
  602. bool HasListItems() const final {
  603. return true;
  604. }
  605. bool HasDictItems() const final {
  606. return true;
  607. }
  608. bool HasFastListLength() const final {
  609. return true;
  610. }
  611. ui64 GetListLength() const final {
  612. return Size;
  613. }
  614. ui64 GetDictLength() const final {
  615. return Size;
  616. }
  617. ui64 GetEstimatedListLength() const final {
  618. return Size;
  619. }
  620. NUdf::TUnboxedValue GetListIterator() const final {
  621. return NUdf::TUnboxedValuePod(new TIterator(this));
  622. }
  623. NUdf::TUnboxedValue GetDictIterator() const final {
  624. return NUdf::TUnboxedValuePod(new TIterator(this));
  625. }
  626. NUdf::TUnboxedValue GetPayloadsIterator() const final {
  627. return NUdf::TUnboxedValuePod(new TIterator(this));
  628. }
  629. NUdf::TUnboxedValue GetKeysIterator() const final {
  630. return NUdf::TUnboxedValuePod(new TKeysIterator(*this));
  631. }
  632. NUdf::IBoxedValuePtr ReverseListImpl(const NUdf::IValueBuilder& builder) const final {
  633. if (1U >= Size)
  634. return const_cast<TDirectArrayHolderInplace*>(this);
  635. NUdf::TUnboxedValue* items = nullptr;
  636. auto result = builder.NewArray(Size, items);
  637. std::reverse_copy(GetPtr(), GetPtr() + Size, items);
  638. return result.Release().AsBoxed();
  639. }
  640. NUdf::IBoxedValuePtr SkipListImpl(const NUdf::IValueBuilder& builder, ui64 count) const final {
  641. if (!count)
  642. return const_cast<TDirectArrayHolderInplace*>(this);
  643. if (count >= Size)
  644. return builder.NewEmptyList().Release().AsBoxed();
  645. const auto newSize = Size - count;
  646. NUdf::TUnboxedValue* items = nullptr;
  647. auto result = builder.NewArray(newSize, items);
  648. std::copy_n(GetPtr() + count, newSize, items);
  649. return result.Release().AsBoxed();
  650. }
  651. NUdf::IBoxedValuePtr TakeListImpl(const NUdf::IValueBuilder& builder, ui64 count) const final {
  652. if (!count)
  653. return builder.NewEmptyList().Release().AsBoxed();
  654. if (count >= Size)
  655. return const_cast<TDirectArrayHolderInplace*>(this);
  656. const auto newSize = count;
  657. NUdf::TUnboxedValue* items = nullptr;
  658. auto result = builder.NewArray(newSize, items);
  659. std::copy_n(GetPtr(), newSize, items);
  660. return result.Release().AsBoxed();
  661. }
  662. NUdf::IBoxedValuePtr ToIndexDictImpl(const NUdf::IValueBuilder&) const final {
  663. return const_cast<TDirectArrayHolderInplace*>(this);
  664. }
  665. bool Contains(const NUdf::TUnboxedValuePod& key) const final {
  666. return key.Get<ui64>() < Size;
  667. }
  668. NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const final {
  669. const auto index = key.Get<ui64>();
  670. return index < Size ? GetPtr()[index].MakeOptional() : NUdf::TUnboxedValuePod();
  671. }
  672. NUdf::TUnboxedValue GetElement(ui32 index) const final {
  673. Y_DEBUG_ABORT_UNLESS(index < Size);
  674. return GetPtr()[index];
  675. }
  676. const NUdf::TUnboxedValue* GetElements() const final {
  677. return GetPtr();
  678. }
  679. bool IsSortedDict() const override {
  680. return true;
  681. }
  682. const ui64 Size;
  683. };
  684. //////////////////////////////////////////////////////////////////////////////
  685. // THolderFactory
  686. //////////////////////////////////////////////////////////////////////////////
  687. class THolderFactory: private TNonCopyable
  688. {
  689. public:
  690. THolderFactory(
  691. TAllocState& allocState,
  692. TMemoryUsageInfo& memInfo,
  693. const IFunctionRegistry* functionRegistry = nullptr);
  694. ~THolderFactory();
  695. template <typename T, typename... TArgs>
  696. NUdf::TUnboxedValuePod Create(TArgs&&... args) const {
  697. return NUdf::TUnboxedValuePod(AllocateOn<T>(CurrentAllocState, &MemInfo, std::forward<TArgs>(args)...));
  698. }
  699. NUdf::TUnboxedValuePod CreateTypeHolder(TType* type) const;
  700. NUdf::TUnboxedValuePod CreateDirectListHolder(TDefaultListRepresentation&& items) const;
  701. NUdf::TUnboxedValuePod CreateDirectArrayHolder(ui64 size, NUdf::TUnboxedValue*& itemsPtr) const;
  702. NUdf::TUnboxedValuePod CreateArrowBlock(arrow::Datum&& datum) const;
  703. NUdf::TUnboxedValuePod VectorAsArray(TUnboxedValueVector& values) const;
  704. NUdf::TUnboxedValuePod VectorAsVectorHolder(TUnboxedValueVector&& list) const;
  705. NUdf::TUnboxedValuePod NewVectorHolder() const;
  706. NUdf::TUnboxedValuePod NewTemporaryVectorHolder() const;
  707. const NUdf::IHash* GetHash(const TType& type, bool useIHash) const;
  708. const NUdf::IEquate* GetEquate(const TType& type, bool useIHash) const;
  709. const NUdf::ICompare* GetCompare(const TType& type, bool useIHash) const;
  710. template <class TForwardIterator>
  711. NUdf::TUnboxedValuePod RangeAsArray(TForwardIterator first, TForwardIterator last) const {
  712. auto count = std::distance(first, last);
  713. if (count == 0)
  714. return GetEmptyContainerLazy();
  715. NUdf::TUnboxedValue* itemsPtr = nullptr;
  716. auto tuple = CreateDirectArrayHolder(count, itemsPtr);
  717. while (first != last) {
  718. *itemsPtr++ = std::move(*first);
  719. ++first;
  720. }
  721. return tuple;
  722. }
  723. NUdf::TUnboxedValuePod CreateDirectSortedSetHolder(
  724. TSortedSetFiller filler,
  725. const TKeyTypes& types,
  726. bool isTuple,
  727. EDictSortMode mode,
  728. bool eagerFill,
  729. TType* encodedType,
  730. const NUdf::ICompare* compare,
  731. const NUdf::IEquate* equate) const;
  732. NUdf::TUnboxedValuePod CreateDirectSortedDictHolder(
  733. TSortedDictFiller filler,
  734. const TKeyTypes& types,
  735. bool isTuple,
  736. EDictSortMode mode,
  737. bool eagerFill,
  738. TType* encodedType,
  739. const NUdf::ICompare* compare,
  740. const NUdf::IEquate* equate) const;
  741. NUdf::TUnboxedValuePod CreateDirectHashedDictHolder(
  742. THashedDictFiller filler,
  743. const TKeyTypes& types,
  744. bool isTuple,
  745. bool eagerFill,
  746. TType* encodedType,
  747. const NUdf::IHash* hash,
  748. const NUdf::IEquate* equate) const;
  749. NUdf::TUnboxedValuePod CreateDirectHashedSetHolder(
  750. THashedSetFiller filler,
  751. const TKeyTypes& types,
  752. bool isTuple,
  753. bool eagerFill,
  754. TType* encodedType,
  755. const NUdf::IHash* hash,
  756. const NUdf::IEquate* equate) const;
  757. template <typename T, bool OptionalKey>
  758. NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedSetHolder(TValuesDictHashSingleFixedSet<T>&& set, bool hasNull) const;
  759. template <typename T, bool OptionalKey>
  760. NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactSetHolder(TValuesDictHashSingleFixedCompactSet<T>&& set, bool hasNull) const;
  761. template <typename T, bool OptionalKey>
  762. NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedMapHolder(TValuesDictHashSingleFixedMap<T>&& map, std::optional<NUdf::TUnboxedValue>&& nullPayload) const;
  763. NUdf::TUnboxedValuePod CreateDirectHashedCompactSetHolder(
  764. TValuesDictHashCompactSet&& set, TPagedArena&& pool, TType* keyType,
  765. TComputationContext* ctx) const;
  766. NUdf::TUnboxedValuePod CreateDirectHashedCompactMapHolder(
  767. TValuesDictHashCompactMap&& map, TPagedArena&& pool, TType* keyType, TType* payloadType,
  768. TComputationContext* ctx) const;
  769. NUdf::TUnboxedValuePod CreateDirectHashedCompactMultiMapHolder(
  770. TValuesDictHashCompactMultiMap&& map, TPagedArena&& pool, TType* keyType, TType* payloadType,
  771. TComputationContext* ctx) const;
  772. template <typename T, bool OptionalKey>
  773. NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactMapHolder(
  774. TValuesDictHashSingleFixedCompactMap<T>&& map, std::optional<ui64>&& nullPayload, TPagedArena&& pool, TType* payloadType,
  775. TComputationContext* ctx) const;
  776. template <typename T, bool OptionalKey>
  777. NUdf::TUnboxedValuePod CreateDirectHashedSingleFixedCompactMultiMapHolder(
  778. TValuesDictHashSingleFixedCompactMultiMap<T>&& map, std::vector<ui64>&& nullPayloads, TPagedArena&& pool, TType* payloadType,
  779. TComputationContext* ctx) const;
  780. NUdf::IDictValueBuilder::TPtr NewDict(
  781. const NUdf::TType* dictType,
  782. ui32 flags) const;
  783. NUdf::IListValueBuilder::TPtr NewList() const;
  784. NUdf::TUnboxedValuePod Cloned(const NUdf::TUnboxedValuePod& it) const;
  785. NUdf::TUnboxedValuePod Reversed(const NUdf::TUnboxedValuePod& it) const;
  786. NUdf::TUnboxedValuePod CreateLimitedList(
  787. NUdf::IBoxedValuePtr&& parent,
  788. TMaybe<ui64> skip, TMaybe<ui64> take,
  789. TMaybe<ui64> knownLength) const;
  790. NUdf::TUnboxedValuePod ReverseList(const NUdf::IValueBuilder* builder, const NUdf::TUnboxedValuePod list) const;
  791. NUdf::TUnboxedValuePod SkipList(const NUdf::IValueBuilder* builder, const NUdf::TUnboxedValuePod list, ui64 count) const;
  792. NUdf::TUnboxedValuePod TakeList(const NUdf::IValueBuilder* builder, const NUdf::TUnboxedValuePod list, ui64 count) const;
  793. NUdf::TUnboxedValuePod ToIndexDict(const NUdf::IValueBuilder* builder, const NUdf::TUnboxedValuePod list) const;
  794. template<bool IsStream>
  795. NUdf::TUnboxedValuePod Collect(NUdf::TUnboxedValuePod list) const;
  796. NUdf::TUnboxedValuePod LazyList(NUdf::TUnboxedValuePod list) const;
  797. NUdf::TUnboxedValuePod Append(NUdf::TUnboxedValuePod list, NUdf::TUnboxedValuePod last) const;
  798. NUdf::TUnboxedValuePod Prepend(NUdf::TUnboxedValuePod first, NUdf::TUnboxedValuePod list) const;
  799. NUdf::TUnboxedValuePod CreateVariantHolder(NUdf::TUnboxedValuePod item, ui32 index) const;
  800. NUdf::TUnboxedValuePod CreateBoxedVariantHolder(NUdf::TUnboxedValuePod item, ui32 index) const;
  801. NUdf::TUnboxedValuePod CreateIteratorOverList(NUdf::TUnboxedValuePod list) const;
  802. NUdf::TUnboxedValuePod CreateForwardList(NUdf::TUnboxedValuePod stream) const;
  803. NUdf::TUnboxedValuePod CloneArray(const NUdf::TUnboxedValuePod list, NUdf::TUnboxedValue*& itemsPtr) const;
  804. TMemoryUsageInfo& GetMemInfo() const {
  805. return MemInfo;
  806. }
  807. NUdf::TUnboxedValuePod GetEmptyContainerLazy() const;
  808. void CleanupModulesOnTerminate() const {
  809. if (FunctionRegistry) {
  810. FunctionRegistry->CleanupModulesOnTerminate();
  811. }
  812. }
  813. TAlignedPagePool& GetPagePool() const {
  814. return *CurrentAllocState;
  815. }
  816. ui64 GetMemoryUsed() const {
  817. return CurrentAllocState->GetUsed();
  818. }
  819. const IFunctionRegistry* GetFunctionRegistry() const {
  820. return FunctionRegistry;
  821. }
  822. template<bool FromStreams>
  823. NUdf::TUnboxedValuePod ExtendList(NUdf::TUnboxedValue* data, ui64 size) const;
  824. NUdf::TUnboxedValuePod ExtendStream(NUdf::TUnboxedValue* data, ui64 size) const;
  825. private:
  826. TAllocState* const CurrentAllocState;
  827. TMemoryUsageInfo& MemInfo;
  828. const IFunctionRegistry* const FunctionRegistry;
  829. mutable TMaybe<NUdf::TUnboxedValue> EmptyContainer;
  830. mutable TTypeOperationsRegistry<NUdf::IHash> HashRegistry;
  831. mutable TTypeOperationsRegistry<NUdf::IEquate> EquateRegistry;
  832. mutable TTypeOperationsRegistry<NUdf::ICompare> CompareRegistry;
  833. };
  834. constexpr const ui32 STEP_FOR_RSS_CHECK = 100U;
  835. // Returns true if current usage delta exceeds the memory limit
  836. // The function automatically adjusts memory limit taking into account RSS delta between calls
  837. template<bool TrackRss>
  838. inline bool TComputationContext::CheckAdjustedMemLimit(ui64 memLimit, ui64 initMemUsage) {
  839. if (!memLimit) {
  840. return false;
  841. }
  842. if (TrackRss && (RssCounter++ % STEP_FOR_RSS_CHECK == 0)) {
  843. UpdateUsageAdjustor(memLimit);
  844. }
  845. const auto currentMemUsage = HolderFactory.GetMemoryUsed();
  846. return currentMemUsage * UsageAdjustor >= initMemUsage + memLimit;
  847. }
  848. void GetDictionaryKeyTypes(const TType* keyType, TKeyTypes& types, bool& isTuple, bool& encoded, bool& useIHash, bool expandTuple = true);
  849. template<bool SupportEqual, bool SupportHash, bool SupportLess>
  850. class TKeyTypeContanerHelper {
  851. public:
  852. TKeyTypeContanerHelper() = default;
  853. TKeyTypeContanerHelper(const TType* type) {
  854. bool encoded;
  855. bool useIHash;
  856. GetDictionaryKeyTypes(type, KeyTypes, IsTuple, encoded, useIHash);
  857. if (useIHash || encoded) {
  858. if constexpr(SupportEqual) {
  859. Equate = MakeEquateImpl(type);
  860. }
  861. if constexpr(SupportHash) {
  862. Hash = MakeHashImpl(type);
  863. }
  864. if constexpr(SupportLess) {
  865. Compare = MakeCompareImpl(type);
  866. }
  867. }
  868. }
  869. public: //unavailable getters may be eliminated at compile time, but it'd make code much less readable
  870. TValueEqual GetValueEqual() const{
  871. Y_ABORT_UNLESS(SupportEqual);
  872. return TValueEqual(KeyTypes, IsTuple, Equate.Get());
  873. }
  874. TValueHasher GetValueHash() const{
  875. Y_ABORT_UNLESS(SupportHash);
  876. return TValueHasher(KeyTypes, IsTuple, Hash.Get());
  877. }
  878. TValueLess GetValueLess() const{
  879. Y_ABORT_UNLESS(SupportLess);
  880. return TValueLess(KeyTypes, IsTuple , Compare.Get());
  881. }
  882. private:
  883. TKeyTypes KeyTypes;
  884. bool IsTuple = false;
  885. //unsused pointers may be eliminated at compile time, but it'd make code much less readable
  886. NUdf::IEquate::TPtr Equate;
  887. NUdf::IHash::TPtr Hash;
  888. NUdf::ICompare::TPtr Compare;
  889. };
  890. class TPlainContainerCache {
  891. public:
  892. TPlainContainerCache();
  893. TPlainContainerCache(const TPlainContainerCache&) = delete;
  894. TPlainContainerCache& operator=(const TPlainContainerCache&) = delete;
  895. void Clear();
  896. NUdf::TUnboxedValuePod NewArray(const THolderFactory& factory, ui64 size, NUdf::TUnboxedValue*& items);
  897. private:
  898. std::array<NUdf::TUnboxedValue, 2> Cached;
  899. std::array<NUdf::TUnboxedValue*, 2> CachedItems;
  900. ui8 CacheIndex = 0U;
  901. };
  902. template<class TObject>
  903. class TMutableObjectOverBoxedValue {
  904. public:
  905. TMutableObjectOverBoxedValue(TComputationMutables& mutables)
  906. : ObjectIndex(mutables.CurValueIndex++)
  907. {}
  908. template <typename... Args>
  909. TObject& RefMutableObject(TComputationContext& ctx, Args&&... args) const {
  910. auto& unboxed = ctx.MutableValues[ObjectIndex];
  911. if (!unboxed.HasValue()) {
  912. unboxed = ctx.HolderFactory.Create<TObject>(std::forward<Args>(args)...);
  913. }
  914. auto boxed = unboxed.AsBoxed();
  915. return *static_cast<TObject*>(boxed.Get());
  916. }
  917. private:
  918. const ui32 ObjectIndex;
  919. };
  920. } // namespace NMiniKQL
  921. } // namespace NKikimr