bitoutput.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #pragma once
  2. #include <library/cpp/deprecated/accessors/accessors.h>
  3. #include <util/stream/output.h>
  4. #include <util/system/yassert.h>
  5. #include <util/generic/bitops.h>
  6. #include <util/generic/vector.h>
  7. #include <util/generic/yexception.h>
  8. namespace NBitIO {
  9. // Based on junk/solar/codecs/bitstream.h
  10. // Almost all code is hard tuned for sequential write performance.
  11. // Use tools/bursttrie/benchmarks/bitstreams_benchmark to check your changes.
  12. inline constexpr ui64 BytesUp(ui64 bits) {
  13. return (bits + 7ULL) >> 3ULL;
  14. }
  15. template <typename TStorage>
  16. class TBitOutputBase {
  17. protected:
  18. TStorage* Storage;
  19. ui64 FreeBits;
  20. ui64 Active;
  21. ui64 Offset;
  22. public:
  23. TBitOutputBase(TStorage* storage)
  24. : Storage(storage)
  25. , FreeBits(64)
  26. , Active()
  27. , Offset()
  28. {
  29. }
  30. ui64 GetOffset() const {
  31. return Offset + BytesUp(64ULL - FreeBits);
  32. }
  33. ui64 GetBitOffset() const {
  34. return (64ULL - FreeBits) & 7ULL;
  35. }
  36. ui64 GetByteReminder() const {
  37. return FreeBits & 7ULL;
  38. }
  39. public:
  40. // interface
  41. // Write "bits" lower bits.
  42. Y_FORCE_INLINE void Write(ui64 data, ui64 bits) {
  43. if (FreeBits < bits) {
  44. if (FreeBits) {
  45. bits -= FreeBits;
  46. Active |= (data & MaskLowerBits(FreeBits)) << (64ULL - FreeBits);
  47. data >>= FreeBits;
  48. FreeBits = 0ULL;
  49. }
  50. Flush();
  51. }
  52. Active |= bits ? ((data & MaskLowerBits(bits)) << (64ULL - FreeBits)) : 0;
  53. FreeBits -= bits;
  54. }
  55. // Write "bits" lower bits starting from "skipbits" bit.
  56. Y_FORCE_INLINE void Write(ui64 data, ui64 bits, ui64 skipbits) {
  57. Write(data >> skipbits, bits);
  58. }
  59. // Unsigned wordwise write. Underlying data is splitted in "words" of "bits(data) + 1(flag)" bits.
  60. // Like this: (unsigned char)0x2E<3> (0000 0010 1110) <=> 1110 0101
  61. // fddd fddd
  62. template <ui64 bits>
  63. Y_FORCE_INLINE void WriteWords(ui64 data) {
  64. do {
  65. ui64 part = data;
  66. data >>= bits;
  67. part |= FastZeroIfFalse(data, NthBit64(bits));
  68. Write(part, bits + 1ULL);
  69. } while (data);
  70. }
  71. Y_FORCE_INLINE ui64 /* padded bits */ Flush() {
  72. const ui64 ubytes = 8ULL - (FreeBits >> 3ULL);
  73. if (ubytes) {
  74. Active <<= FreeBits;
  75. Active >>= FreeBits;
  76. Storage->WriteData((const char*)&Active, (const char*)&Active + ubytes);
  77. Offset += ubytes;
  78. }
  79. const ui64 padded = FreeBits & 7;
  80. FreeBits = 64ULL;
  81. Active = 0ULL;
  82. return padded;
  83. }
  84. virtual ~TBitOutputBase() {
  85. Flush();
  86. }
  87. private:
  88. static Y_FORCE_INLINE ui64 FastZeroIfFalse(bool cond, ui64 iftrue) {
  89. return -i64(cond) & iftrue;
  90. }
  91. };
  92. template <typename TVec>
  93. class TBitOutputVectorImpl {
  94. TVec* Data;
  95. public:
  96. void WriteData(const char* begin, const char* end) {
  97. NAccessors::Append(*Data, begin, end);
  98. }
  99. TBitOutputVectorImpl(TVec* data)
  100. : Data(data)
  101. {
  102. }
  103. };
  104. template <typename TVec>
  105. struct TBitOutputVector: public TBitOutputVectorImpl<TVec>, public TBitOutputBase<TBitOutputVectorImpl<TVec>> {
  106. inline TBitOutputVector(TVec* data)
  107. : TBitOutputVectorImpl<TVec>(data)
  108. , TBitOutputBase<TBitOutputVectorImpl<TVec>>(this)
  109. {
  110. }
  111. };
  112. class TBitOutputArrayImpl {
  113. char* Data;
  114. size_t Left;
  115. public:
  116. void WriteData(const char* begin, const char* end) {
  117. size_t sz = end - begin;
  118. Y_ABORT_UNLESS(sz <= Left, " ");
  119. memcpy(Data, begin, sz);
  120. Data += sz;
  121. Left -= sz;
  122. }
  123. TBitOutputArrayImpl(char* begin, size_t len)
  124. : Data(begin)
  125. , Left(len)
  126. {
  127. }
  128. };
  129. struct TBitOutputArray: public TBitOutputArrayImpl, public TBitOutputBase<TBitOutputArrayImpl> {
  130. inline TBitOutputArray(char* begin, size_t len)
  131. : TBitOutputArrayImpl(begin, len)
  132. , TBitOutputBase<TBitOutputArrayImpl>(this)
  133. {
  134. }
  135. };
  136. using TBitOutputYVector = TBitOutputVector<TVector<char>>;
  137. class TBitOutputStreamImpl {
  138. IOutputStream* Out;
  139. public:
  140. void WriteData(const char* begin, const char* end) {
  141. Out->Write(begin, end - begin);
  142. }
  143. TBitOutputStreamImpl(IOutputStream* out)
  144. : Out(out)
  145. {
  146. }
  147. };
  148. struct TBitOutputStream: public TBitOutputStreamImpl, public TBitOutputBase<TBitOutputStreamImpl> {
  149. inline TBitOutputStream(IOutputStream* out)
  150. : TBitOutputStreamImpl(out)
  151. , TBitOutputBase<TBitOutputStreamImpl>(this)
  152. {
  153. }
  154. };
  155. }