float_huffman.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. #include "float_huffman.h"
  2. #include <util/generic/array_ref.h>
  3. #include <util/generic/bitops.h>
  4. #include <util/generic/cast.h>
  5. #include <util/generic/yexception.h>
  6. #include <util/system/unaligned_mem.h>
  7. #include <util/system/yassert.h>
  8. #include <util/stream/format.h>
  9. namespace NCodecs::NFloatHuff {
  10. namespace {
  11. struct THuffEntry {
  12. ui32 CodeBase;
  13. ui16 Prefix;
  14. int PrefLength;
  15. int CodeLength;
  16. int TotalLength;
  17. ui32 Mask;
  18. ui64 Offset;
  19. THuffEntry() = default;
  20. constexpr THuffEntry(ui32 codeBase, ui16 prefix, int prefLength, int codeLength)
  21. : CodeBase(codeBase)
  22. , Prefix(prefix)
  23. , PrefLength(prefLength)
  24. , CodeLength(codeLength)
  25. , TotalLength(prefLength + codeLength)
  26. , Mask(Mask64(codeLength))
  27. , Offset(-(ui64(codeBase) << prefLength) + prefix)
  28. {}
  29. bool Fit(ui32 code) const {
  30. return code >= CodeBase && code < CodeBase + (1ULL << CodeLength);
  31. }
  32. };
  33. // NB. There is a typo in the penultimate line (34 instead of 24). It was there from the very
  34. // first commit and we cannot fix it without breaking all the clients.
  35. constexpr THuffEntry entries[16] = {
  36. {0x00000000, 0x01, 1, 0}, // Only +0.0f, 1 bit, prefix [1]
  37. {0x3f800000, 0x0e, 4, 0}, // Only +1.0f, 4 bits, prefix [0111]
  38. {0x3f700000, 0x08, 5, 20}, // [0.9375, 1.0), 25 bits, prefix [00010]
  39. {0x3f000000, 0x00, 5, 20}, // [0.5, 0.5625), 25 bits, prefx [00000]
  40. {0x3f400000, 0x06, 6, 20}, // [0.75, 0.8125), 26 bits, prefix [011000]
  41. {0x3f500000, 0x22, 6, 20}, // [0.8125, 0.875), 26 bits, prefix [010001]
  42. {0x3f200000, 0x02, 6, 20}, // [0.625, 0.6875), 26 bits, prefix [010000]
  43. {0x3f100000, 0x38, 6, 20}, // [0.5625, 0.625), 26 bits, prefix [000111]
  44. {0x3f600000, 0x18, 6, 20}, // [0.875, 0.9375), 26 bits, prefix [000110]
  45. {0x3f300000, 0x30, 6, 20}, // [0.6875, 0.75), 26 bits, prefix [000011]
  46. {0x3e800000, 0x10, 6, 20}, // [0.25, 0.28125), 26 bits, prefix [000010]
  47. {0x3e000000, 0x04, 3, 24}, // [0.125, 0.5), 27 bits, prefix [001]
  48. {0x3d000000, 0x0a, 4, 24}, // [0.03125, 0.125), 28 bits, prefix [0101]
  49. {0x3c000000, 0x12, 5, 24}, // [0.0078125, 0.03125), 29 bits, prefix [01001]
  50. {0x3b000000, 0x26, 6, 34}, // [0.001953125, end of range), 40 bits, prefix [011001]
  51. {0x00000000, 0x16, 5, 32}, // whole range, 37 bits, prefix [01101]
  52. };
  53. [[noreturn]] Y_NO_INLINE void ThrowInvalidOffset(size_t size, size_t byteOffset) {
  54. ythrow yexception() <<
  55. "Decompression error: requested decoding 8 bytes past end of input buffer of " << size << " bytes size at position " << byteOffset << ". ";
  56. }
  57. struct THuffInfo {
  58. constexpr THuffInfo() {
  59. for (size_t i = 0; i < 64; ++i) {
  60. bool init = false;
  61. for (size_t j = 0; j != 16; ++j) {
  62. ui16 prefix = i & Mask64(entries[j].PrefLength);
  63. if (entries[j].Prefix == prefix) {
  64. init = true;
  65. DecodeLookup[i] = entries[j];
  66. break;
  67. }
  68. }
  69. Y_ASSERT(init);
  70. }
  71. for (ui32 i = 0; i < (1 << 12); ++i) {
  72. // First two entries (+0.0f and +1.0f) are not present in the lookup, they are handled separately
  73. for (int value = 2; value < 16; ++value) {
  74. if (entries[value].Fit(i << 20)) {
  75. EncodeLookup[i] = value;
  76. break;
  77. }
  78. }
  79. }
  80. }
  81. std::pair<ui64, int> GetCode(ui32 value) const {
  82. // Zeros are handled separately in the main loop
  83. Y_ASSERT(value != 0);
  84. if (value == 0x3f800000) {
  85. return {0x0e, 4};
  86. }
  87. const auto& entry = entries[EncodeLookup[value >> 20]];
  88. return {
  89. (ui64(value) << entry.PrefLength) + entry.Offset,
  90. entry.TotalLength
  91. };
  92. }
  93. THuffEntry DecodeLookup[64];
  94. ui8 EncodeLookup[1 << 12];
  95. };
  96. const THuffInfo huffInfo;
  97. /// End Of Stream
  98. const ui32 EOS = ui32(-1);
  99. }
  100. TString Encode(TArrayRef<const float> factors) {
  101. TString result;
  102. result.resize((factors.size() + 1) * 40 / 8 + 8, 0); // Max code length is 40 bits
  103. int usedBits = 0;
  104. ui64 buffer = 0;
  105. char* writePtr = result.begin();
  106. auto writeBits = [&](ui64 code, int size) {
  107. const auto bitsToTransfer = Min(size, 64 - usedBits);
  108. buffer |= (code << usedBits);
  109. usedBits += bitsToTransfer;
  110. if (usedBits == 64) {
  111. memcpy(writePtr, &buffer, 8);
  112. usedBits = size - bitsToTransfer;
  113. if (bitsToTransfer != 64) {
  114. buffer = code >> bitsToTransfer;
  115. } else {
  116. buffer = 0;
  117. }
  118. writePtr += 8;
  119. }
  120. };
  121. for (size_t i = 0; i != factors.size();) {
  122. if (BitCast<ui32>(factors[i]) == 0) {
  123. int zeroCount = 1;
  124. for (;;) {
  125. ++i;
  126. if (i == factors.size() || BitCast<ui32>(factors[i]) != 0) {
  127. break;
  128. }
  129. ++zeroCount;
  130. }
  131. for (; zeroCount >= 64; zeroCount -= 64) {
  132. writeBits(ui64(-1), 64);
  133. }
  134. writeBits(Mask64(zeroCount), zeroCount);
  135. } else {
  136. const auto [code, codeSize] = huffInfo.GetCode(BitCast<ui32>(factors[i]));
  137. writeBits(code, codeSize);
  138. ++i;
  139. }
  140. }
  141. // Write EOS.
  142. // We use precomputed constants instead of the following:
  143. // auto [code, codeSize] = huffInfo.GetCode(EOS);
  144. // writeBits(code, codeSize);
  145. writeBits(211527139302, 40);
  146. memcpy(writePtr, &buffer, 8);
  147. result.resize(writePtr - result.begin() + usedBits / 8 + 8);
  148. return result;
  149. }
  150. TDecoder::TDecoder(TStringBuf data)
  151. : State{
  152. .Workspace = data.size() < 8 ? ThrowInvalidOffset(data.size(), 0), 0 : ReadUnaligned<ui64>(data.data()),
  153. .WorkspaceSize = 64,
  154. .Position = 8,
  155. .Data = data
  156. }
  157. {
  158. FillDecodeBuffer();
  159. }
  160. TVector<float> TDecoder::DecodeAll(size_t sizeHint) {
  161. TVector<float> result;
  162. result.reserve(sizeHint);
  163. while (Begin != End) {
  164. result.insert(result.end(), Begin, End);
  165. FillDecodeBuffer();
  166. }
  167. return result;
  168. }
  169. size_t TDecoder::Decode(TArrayRef<float> dest) {
  170. size_t count = 0;
  171. while (count < dest.size()) {
  172. if (dest.size() - count < size_t(End - Begin)) {
  173. const auto size = dest.size() - count;
  174. std::copy(Begin, Begin + size, dest.data() + count);
  175. Begin += size;
  176. return dest.size();
  177. } else {
  178. std::copy(Begin, End, dest.data() + count);
  179. count += End - Begin;
  180. FillDecodeBuffer();
  181. if (Begin == End) {
  182. break;
  183. }
  184. }
  185. }
  186. return count;
  187. }
  188. size_t TDecoder::Skip(size_t count) {
  189. size_t skippedCount = 0;
  190. while (skippedCount < count) {
  191. if (count - skippedCount < size_t(End - Begin)) {
  192. const auto size = count - skippedCount;
  193. Begin += size;
  194. return count;
  195. } else {
  196. skippedCount += End - Begin;
  197. FillDecodeBuffer();
  198. if (Begin == End) {
  199. break;
  200. }
  201. }
  202. }
  203. return skippedCount;
  204. }
  205. bool TDecoder::HasMore() const {
  206. return Begin != End;
  207. }
  208. void TDecoder::FillDecodeBuffer() {
  209. Begin = DecodeBuffer.data();
  210. End = DecodeBuffer.data();
  211. if (HitEos) {
  212. return;
  213. }
  214. // This helps to keep most of the variables in the registers.
  215. float* end = End;
  216. TState state = State;
  217. // It is faster to just zero all the memory here in one go
  218. // and then avoid inner loop when writing zeros. There we
  219. // can just increment end pointer.
  220. std::fill(DecodeBuffer.begin(), DecodeBuffer.end(), 0.0f);
  221. // Make sure that inside the loop we always have space to put 64 zeros and one other
  222. // value.
  223. float* cap = DecodeBuffer.data() + DecodeBuffer.size() - 64 - 1;
  224. while (end < cap) {
  225. if (state.Workspace % 2 == 1) {
  226. // Decode zeros
  227. // There we can just scan whole state.Workspace for ones because it contains
  228. // zeros outside of the WorkspaceSize bits.
  229. const auto negWorkspace = ~state.Workspace;
  230. const int zeroCount = negWorkspace ? CountTrailingZeroBits(negWorkspace) : 64;
  231. end += zeroCount;
  232. state.SkipBits(zeroCount);
  233. continue;
  234. }
  235. if (state.PeekBits(4) == 0x0e) {
  236. *end++ = 1.0f;
  237. state.SkipBits(4);
  238. continue;
  239. }
  240. const auto& entry = huffInfo.DecodeLookup[state.PeekBits(6)];
  241. const auto code = ui32((state.NextBitsUnmasked(entry.TotalLength) >> entry.PrefLength) & entry.Mask) + entry.CodeBase;
  242. if (Y_UNLIKELY(code == EOS)) {
  243. HitEos = true;
  244. break;
  245. }
  246. *end++ = BitCast<float>(code);
  247. }
  248. End = end;
  249. State = state;
  250. }
  251. ui64 TDecoder::TState::PeekBits(int count) {
  252. if (WorkspaceSize > count) {
  253. return Workspace & Mask64(count);
  254. } else {
  255. if (Y_UNLIKELY(Position + 8 > Data.size())) {
  256. ThrowInvalidOffset(Data.size(), Position);
  257. }
  258. return (Workspace | (ReadUnaligned<ui64>(Data.data() + Position) << WorkspaceSize)) & Mask64(count);
  259. }
  260. }
  261. ui64 TDecoder::TState::NextBitsUnmasked(int count) {
  262. if (WorkspaceSize > count) {
  263. const auto result = Workspace;
  264. Workspace >>= count;
  265. WorkspaceSize -= count;
  266. return result;
  267. } else {
  268. if (Y_UNLIKELY(Position + 8 > Data.size())) {
  269. ThrowInvalidOffset(Data.size(), Position);
  270. }
  271. ui64 result = Workspace;
  272. Workspace = ReadUnaligned<ui64>(Data.data() + Position);
  273. Position += 8;
  274. result |= Workspace << WorkspaceSize;
  275. Workspace >>= count - WorkspaceSize;
  276. WorkspaceSize += 64 - count;
  277. return result;
  278. }
  279. }
  280. void TDecoder::TState::SkipBits(int count) {
  281. if (WorkspaceSize > count) {
  282. Workspace >>= count;
  283. WorkspaceSize -= count;
  284. } else {
  285. if (Y_UNLIKELY(Position + 8 > Data.size())) {
  286. ThrowInvalidOffset(Data.size(), Position);
  287. }
  288. Workspace = ReadUnaligned<ui64>(Data.data() + Position);
  289. Position += 8;
  290. Workspace >>= count - WorkspaceSize;
  291. WorkspaceSize += 64 - count;
  292. }
  293. }
  294. TVector<float> Decode(TStringBuf data, size_t sizeHint) {
  295. return TDecoder(data).DecodeAll(sizeHint);
  296. }
  297. } // namespace NCodecs::NFloatHuff