bit_util.h 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. #pragma once
  2. #include "defs.h"
  3. namespace NYql {
  4. namespace NUdf {
  5. inline ui8* CompressAsSparseBitmap(const ui8* src, size_t srcOffset, const ui8* sparseBitmap, ui8* dst, size_t count) {
  6. while (count--) {
  7. ui8 inputBit = (src[srcOffset >> 3] >> (srcOffset & 7)) & 1;
  8. *dst = inputBit;
  9. ++srcOffset;
  10. dst += *sparseBitmap++;
  11. }
  12. return dst;
  13. }
  14. inline ui8* DecompressToSparseBitmap(ui8* dstSparse, const ui8* src, size_t srcOffset, size_t count) {
  15. if (srcOffset != 0) {
  16. size_t offsetBytes = srcOffset >> 3;
  17. size_t offsetTail = srcOffset & 7;
  18. src += offsetBytes;
  19. if (offsetTail != 0) {
  20. for (ui8 i = offsetTail; count > 0 && i < 8; i++, count--) {
  21. *dstSparse++ = (*src >> i) & 1u;
  22. }
  23. src++;
  24. }
  25. }
  26. while (count >= 8) {
  27. ui8 slot = *src++;
  28. *dstSparse++ = (slot >> 0) & 1u;
  29. *dstSparse++ = (slot >> 1) & 1u;
  30. *dstSparse++ = (slot >> 2) & 1u;
  31. *dstSparse++ = (slot >> 3) & 1u;
  32. *dstSparse++ = (slot >> 4) & 1u;
  33. *dstSparse++ = (slot >> 5) & 1u;
  34. *dstSparse++ = (slot >> 6) & 1u;
  35. *dstSparse++ = (slot >> 7) & 1u;
  36. count -= 8;
  37. }
  38. for (ui8 i = 0; i < count; i++) {
  39. *dstSparse++ = (*src >> i) & 1u;
  40. }
  41. return dstSparse;
  42. }
  43. template<bool Negate>
  44. inline void CompressSparseImpl(ui8* dst, const ui8* srcSparse, size_t len) {
  45. while (len >= 8) {
  46. ui8 result = 0;
  47. result |= (*srcSparse++ & 1u) << 0;
  48. result |= (*srcSparse++ & 1u) << 1;
  49. result |= (*srcSparse++ & 1u) << 2;
  50. result |= (*srcSparse++ & 1u) << 3;
  51. result |= (*srcSparse++ & 1u) << 4;
  52. result |= (*srcSparse++ & 1u) << 5;
  53. result |= (*srcSparse++ & 1u) << 6;
  54. result |= (*srcSparse++ & 1u) << 7;
  55. if constexpr (Negate) {
  56. *dst++ = ~result;
  57. } else {
  58. *dst++ = result;
  59. }
  60. len -= 8;
  61. }
  62. if (len) {
  63. ui8 result = 0;
  64. for (ui8 i = 0; i < len; ++i) {
  65. result |= (*srcSparse++ & 1u) << i;
  66. }
  67. if constexpr (Negate) {
  68. *dst++ = ~result;
  69. } else {
  70. *dst++ = result;
  71. }
  72. }
  73. }
  74. inline void CompressSparseBitmap(ui8* dst, const ui8* srcSparse, size_t len) {
  75. return CompressSparseImpl<false>(dst, srcSparse, len);
  76. }
  77. inline void CompressSparseBitmapNegate(ui8* dst, const ui8* srcSparse, size_t len) {
  78. return CompressSparseImpl<true>(dst, srcSparse, len);
  79. }
  80. template<typename T>
  81. inline T* CompressArray(const T* src, const ui8* sparseBitmap, T* dst, size_t count) {
  82. while (count--) {
  83. *dst = *src++;
  84. dst += *sparseBitmap++;
  85. }
  86. return dst;
  87. }
  88. inline void CopyDenseBitmap(ui8* dst, const ui8* src, size_t srcOffset, size_t len) {
  89. if ((srcOffset & 7) != 0) {
  90. size_t offsetBytes = srcOffset >> 3;
  91. src += offsetBytes;
  92. ui8 offsetTail = srcOffset & 7;
  93. ui8 offsetHead = 8 - offsetTail;
  94. ui8 remainder = *src++ >> offsetTail;
  95. size_t dstOffset = offsetHead;
  96. for (; dstOffset < len; dstOffset += 8) {
  97. *dst++ = remainder | (*src << offsetHead);
  98. remainder = *src >> offsetTail;
  99. src++;
  100. }
  101. // dst is guaranteed to have extra length even if it's not needed
  102. *dst++ = remainder;
  103. } else {
  104. src += srcOffset >> 3;
  105. // Round up to 8
  106. len = (len + 7u) & ~size_t(7u);
  107. memcpy(dst, src, len >> 3);
  108. }
  109. }
  110. }
  111. }