delta_codec.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #pragma once
  2. #include "codecs.h"
  3. #include <util/generic/array_ref.h>
  4. #include <util/generic/typetraits.h>
  5. #include <util/generic/bitops.h>
  6. #include <util/string/cast.h>
  7. namespace NCodecs {
  8. template <typename T = ui64, bool UnsignedDelta = true>
  9. class TDeltaCodec: public ICodec {
  10. static_assert(std::is_integral<T>::value, "expect std::is_integral<T>::value");
  11. public:
  12. using TUnsigned = std::make_unsigned_t<T>;
  13. using TSigned = std::make_signed_t<T>;
  14. using TDelta = std::conditional_t<UnsignedDelta, TUnsigned, TSigned>;
  15. private:
  16. const TDelta MinDelta{Min<TDelta>()};
  17. const TDelta MaxDelta{Max<TDelta>() - 1};
  18. const TDelta InvalidDelta{MaxDelta + 1};
  19. Y_FORCE_INLINE static TDelta AddSafe(TUnsigned a, TUnsigned b) {
  20. return a + b;
  21. }
  22. Y_FORCE_INLINE static TDelta SubSafe(TUnsigned a, TUnsigned b) {
  23. return a - b;
  24. }
  25. public:
  26. struct TDecoder {
  27. const TDelta InvalidDelta{Max<TDelta>()};
  28. T Last = 0;
  29. T Result = 0;
  30. bool First = true;
  31. bool Invalid = false;
  32. Y_FORCE_INLINE bool Decode(TDelta t) {
  33. if (Y_UNLIKELY(First)) {
  34. First = false;
  35. Result = Last = t;
  36. return true;
  37. }
  38. if (Y_UNLIKELY(Invalid)) {
  39. Invalid = false;
  40. Last = 0;
  41. Result = t;
  42. return true;
  43. }
  44. Result = (Last += t);
  45. Invalid = t == InvalidDelta;
  46. return !Invalid;
  47. }
  48. };
  49. public:
  50. static TStringBuf MyName();
  51. TDeltaCodec() {
  52. MyTraits.SizeOfInputElement = sizeof(T);
  53. MyTraits.AssumesStructuredInput = true;
  54. }
  55. TString GetName() const override {
  56. return ToString(MyName());
  57. }
  58. template <class TItem>
  59. static void AppendTo(TBuffer& b, TItem t) {
  60. b.Append((char*)&t, sizeof(t));
  61. }
  62. ui8 Encode(TStringBuf s, TBuffer& b) const override {
  63. b.Clear();
  64. if (s.empty()) {
  65. return 0;
  66. }
  67. b.Reserve(s.size());
  68. TArrayRef<const T> tin{(const T*)s.data(), s.size() / sizeof(T)};
  69. const T* it = tin.begin();
  70. TDelta last = *(it++);
  71. AppendTo(b, last);
  72. TDelta maxt = SubSafe(MaxDelta, last);
  73. TDelta mint = AddSafe(MinDelta, last);
  74. for (; it != tin.end(); ++it) {
  75. TDelta t = *it;
  76. if (Y_LIKELY((t >= mint) & (t <= maxt))) {
  77. AppendTo(b, t - last);
  78. last = t;
  79. maxt = SubSafe(MaxDelta, last);
  80. mint = AddSafe(MinDelta, last);
  81. } else {
  82. // delta overflow
  83. AppendTo(b, InvalidDelta);
  84. AppendTo(b, t);
  85. last = 0;
  86. maxt = MaxDelta;
  87. mint = MinDelta;
  88. }
  89. }
  90. return 0;
  91. }
  92. void Decode(TStringBuf s, TBuffer& b) const override {
  93. b.Clear();
  94. if (s.empty()) {
  95. return;
  96. }
  97. b.Reserve(s.size());
  98. TArrayRef<const T> tin{(const T*)s.data(), s.size() / sizeof(T)};
  99. TDecoder dec;
  100. for (const T* it = tin.begin(); it != tin.end(); ++it) {
  101. T tmp;
  102. memcpy(&tmp, it, sizeof(tmp));
  103. if (dec.Decode(tmp)) {
  104. AppendTo(b, dec.Result);
  105. }
  106. }
  107. }
  108. protected:
  109. void DoLearn(ISequenceReader&) override {
  110. }
  111. };
  112. }