presort_impl.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. #pragma once
  2. #include <yql/essentials/minikql/defs.h>
  3. #include <yql/essentials/minikql/mkql_node.h>
  4. #include <yql/essentials/public/udf/udf_value.h>
  5. #include <yql/essentials/utils/swap_bytes.h>
  6. #include <util/generic/vector.h>
  7. namespace NKikimr {
  8. namespace NMiniKQL {
  9. namespace NDetail {
  10. using NYql::SwapBytes;
  11. Y_FORCE_INLINE
  12. void EnsureInputSize(TStringBuf& input, size_t size) {
  13. MKQL_ENSURE(input.size() >= size, "premature end of input");
  14. }
  15. template <bool Desc>
  16. Y_FORCE_INLINE
  17. void EncodeBool(TVector<ui8>& output, bool value) {
  18. output.push_back(Desc ? 0xFF ^ ui8(value) : ui8(value));
  19. }
  20. template <bool Desc>
  21. Y_FORCE_INLINE
  22. bool DecodeBool(TStringBuf& input) {
  23. EnsureInputSize(input, 1);
  24. auto result = Desc ? bool(0xFF ^ ui8(input[0])) : bool(input[0]);
  25. input.Skip(1);
  26. return result;
  27. }
  28. template <typename TUnsigned, bool Desc>
  29. Y_FORCE_INLINE
  30. void EncodeUnsigned(TVector<ui8>& output, TUnsigned value) {
  31. constexpr size_t size = sizeof(TUnsigned);
  32. if (Desc) {
  33. value = ~value;
  34. }
  35. output.resize(output.size() + size);
  36. WriteUnaligned<TUnsigned>(output.end() - size, SwapBytes(value));
  37. }
  38. template <typename TUnsigned, bool Desc>
  39. Y_FORCE_INLINE
  40. TUnsigned DecodeUnsigned(TStringBuf& input) {
  41. constexpr size_t size = sizeof(TUnsigned);
  42. EnsureInputSize(input, size);
  43. auto value = ReadUnaligned<TUnsigned>(input.data());
  44. input.Skip(size);
  45. value = SwapBytes(value);
  46. if (Desc) {
  47. value = ~value;
  48. }
  49. return value;
  50. }
  51. template <typename TSigned, bool Desc>
  52. Y_FORCE_INLINE
  53. void EncodeSigned(TVector<ui8>& output, TSigned value) {
  54. using TUnsigned = std::make_unsigned_t<TSigned>;
  55. constexpr size_t size = sizeof(TUnsigned);
  56. constexpr TUnsigned shift = TUnsigned(1) << (size * 8 - 1);
  57. EncodeUnsigned<TUnsigned, Desc>(output, TUnsigned(value) + shift);
  58. }
  59. template <typename TSigned, bool Desc>
  60. Y_FORCE_INLINE
  61. TSigned DecodeSigned(TStringBuf& input) {
  62. using TUnsigned = std::make_unsigned_t<TSigned>;
  63. constexpr size_t size = sizeof(TUnsigned);
  64. constexpr TUnsigned shift = TUnsigned(1) << (size * 8 - 1);
  65. return TSigned(DecodeUnsigned<TUnsigned, Desc>(input) - shift);
  66. }
  67. enum class EFPCode : ui8 {
  68. NegInf = 0,
  69. Neg = 1,
  70. Zero = 2,
  71. Pos = 3,
  72. PosInf = 4,
  73. Nan = 5
  74. };
  75. template <typename TFloat>
  76. struct TFloatToInteger {};
  77. template <>
  78. struct TFloatToInteger<float> {
  79. using TType = ui32;
  80. };
  81. template <>
  82. struct TFloatToInteger<double> {
  83. using TType = ui64;
  84. };
  85. static_assert(std::numeric_limits<float>::is_iec559, "float type is not iec559(ieee754)");
  86. static_assert(std::numeric_limits<double>::is_iec559, "double type is not iec559(ieee754)");
  87. template <typename TFloat, bool Desc>
  88. Y_FORCE_INLINE
  89. void EncodeFloating(TVector<ui8>& output, TFloat value) {
  90. using TInteger = typename TFloatToInteger<TFloat>::TType;
  91. EFPCode code;
  92. switch (std::fpclassify(value)) {
  93. case FP_NORMAL:
  94. case FP_SUBNORMAL: {
  95. auto integer = ReadUnaligned<TInteger>(&value);
  96. if (value < 0) {
  97. integer = ~integer;
  98. code = EFPCode::Neg;
  99. }
  100. else {
  101. code = EFPCode::Pos;
  102. }
  103. output.push_back(Desc ? 0xFF ^ ui8(code) : ui8(code));
  104. EncodeUnsigned<TInteger, Desc>(output, integer);
  105. return;
  106. }
  107. case FP_ZERO:
  108. code = EFPCode::Zero;
  109. break;
  110. case FP_INFINITE:
  111. code = value < 0 ? EFPCode::NegInf : EFPCode::PosInf;
  112. break;
  113. default:
  114. code = EFPCode::Nan;
  115. break;
  116. }
  117. output.push_back(Desc ? 0xFF ^ ui8(code) : ui8(code));
  118. }
  119. template <typename TFloat, bool Desc>
  120. Y_FORCE_INLINE
  121. TFloat DecodeFloating(TStringBuf& input) {
  122. using TInteger = typename TFloatToInteger<TFloat>::TType;
  123. EnsureInputSize(input, 1);
  124. auto code = EFPCode(Desc ? 0xFF ^ input[0] : input[0]);
  125. input.Skip(1);
  126. bool negative;
  127. switch (code) {
  128. case EFPCode::Zero:
  129. return 0;
  130. case EFPCode::NegInf:
  131. return -std::numeric_limits<TFloat>::infinity();
  132. case EFPCode::PosInf:
  133. return std::numeric_limits<TFloat>::infinity();
  134. case EFPCode::Nan:
  135. return std::numeric_limits<TFloat>::quiet_NaN();
  136. case EFPCode::Neg:
  137. negative = true;
  138. break;
  139. case EFPCode::Pos:
  140. negative = false;
  141. break;
  142. default:
  143. MKQL_ENSURE(false, "floating point data is corrupted");
  144. }
  145. auto integer = DecodeUnsigned<TInteger, Desc>(input);
  146. if (negative) {
  147. integer = ~integer;
  148. }
  149. return ReadUnaligned<TFloat>(&integer);
  150. }
  151. constexpr ui8 BlockCode = 0x1F;
  152. constexpr size_t BlockSize = 15;
  153. constexpr size_t BlockSizeUi64 = BlockSize / 8 + 1;
  154. template <bool Desc>
  155. Y_FORCE_INLINE
  156. void EncodeString(TVector<ui8>& output, TStringBuf value) {
  157. size_t part = 0;
  158. while (!value.empty()) {
  159. union {
  160. ui8 buffer[BlockSize + 1];
  161. ui64 buffer64[BlockSizeUi64];
  162. };
  163. part = std::min(value.size(), BlockSize);
  164. if (part == BlockSize) {
  165. std::memcpy(buffer + 1, value.data(), BlockSize);
  166. }
  167. else {
  168. for (size_t i = 0; i < BlockSizeUi64; ++i) {
  169. buffer64[i] = 0;
  170. }
  171. std::memcpy(buffer + 1, value.data(), part);
  172. }
  173. value.Skip(part);
  174. buffer[0] = BlockCode;
  175. if (Desc) {
  176. for (size_t i = 0; i < BlockSizeUi64; ++i) {
  177. buffer64[i] ^= std::numeric_limits<ui64>::max();
  178. }
  179. }
  180. output.insert(output.end(), buffer, buffer + BlockSize + 1);
  181. }
  182. auto lastLength = ui8(part);
  183. output.push_back(Desc ? 0xFF ^ lastLength : lastLength);
  184. }
  185. template <bool Desc>
  186. Y_FORCE_INLINE
  187. TStringBuf DecodeString(TStringBuf& input, TVector<ui8>& value) {
  188. EnsureInputSize(input, 1);
  189. ui8 code = Desc ? 0xFF ^ input[0] : input[0];
  190. input.Skip(1);
  191. if (code != BlockCode) {
  192. MKQL_ENSURE(code == 0, TStringBuilder() << "unknown string block code: " << code);
  193. return TStringBuf();
  194. }
  195. while (code == BlockCode) {
  196. union {
  197. ui8 buffer[BlockSize + 1];
  198. ui64 buffer64[BlockSizeUi64];
  199. };
  200. EnsureInputSize(input, BlockSize + 1);
  201. std::memcpy(buffer, input.data(), BlockSize + 1);
  202. input.Skip(BlockSize + 1);
  203. if (Desc) {
  204. for (size_t i = 0; i < BlockSizeUi64; ++i) {
  205. buffer64[i] ^= std::numeric_limits<ui64>::max();
  206. }
  207. }
  208. value.insert(value.end(), buffer, buffer + BlockSize);
  209. code = buffer[BlockSize];
  210. }
  211. auto begin = (const char*)value.begin();
  212. auto end = (const char*)value.end() - BlockSize + code;
  213. return TStringBuf(begin, end - begin);
  214. }
  215. }
  216. } // NMiniKQL
  217. } // NKikimr