comptable_codec.cpp 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #include "comptable_codec.h"
  2. #include <library/cpp/comptable/comptable.h>
  3. #include <util/string/cast.h>
  4. namespace NCodecs {
  5. class TCompTableCodec::TImpl: public TAtomicRefCount<TImpl> {
  6. public:
  7. TImpl(EQuality q)
  8. : Quality(q)
  9. {
  10. }
  11. void Init() {
  12. Compressor.Reset(new NCompTable::TChunkCompressor{(bool)Quality, Table});
  13. Decompressor.Reset(new NCompTable::TChunkDecompressor{(bool)Quality, Table});
  14. }
  15. ui8 Encode(TStringBuf in, TBuffer& out) const {
  16. out.Clear();
  17. if (!in) {
  18. return 0;
  19. }
  20. TVector<char> result;
  21. Compressor->Compress(in, &result);
  22. out.Assign(&result[0], result.size());
  23. return 0;
  24. }
  25. void Decode(TStringBuf in, TBuffer& out) const {
  26. out.Clear();
  27. if (!in) {
  28. return;
  29. }
  30. TVector<char> result;
  31. Decompressor->Decompress(in, &result);
  32. out.Assign(&result[0], result.size());
  33. }
  34. void DoLearn(ISequenceReader& in) {
  35. NCompTable::TDataSampler sampler;
  36. TStringBuf region;
  37. while (in.NextRegion(region)) {
  38. if (!region) {
  39. continue;
  40. }
  41. sampler.AddStat(region);
  42. }
  43. sampler.BuildTable(Table);
  44. Init();
  45. }
  46. void Save(IOutputStream* out) const {
  47. ::Save(out, Table);
  48. }
  49. void Load(IInputStream* in) {
  50. ::Load(in, Table);
  51. Init();
  52. }
  53. NCompTable::TCompressorTable Table;
  54. THolder<NCompTable::TChunkCompressor> Compressor;
  55. THolder<NCompTable::TChunkDecompressor> Decompressor;
  56. const EQuality Quality;
  57. static const ui32 SampleSize = Max(NCompTable::TDataSampler::Size * 4, (1 << 22) * 5);
  58. };
  59. TCompTableCodec::TCompTableCodec(EQuality q)
  60. : Impl(new TImpl{q})
  61. {
  62. MyTraits.NeedsTraining = true;
  63. MyTraits.SizeOnEncodeMultiplier = 2;
  64. MyTraits.SizeOnDecodeMultiplier = 10;
  65. MyTraits.RecommendedSampleSize = TImpl::SampleSize;
  66. }
  67. TCompTableCodec::~TCompTableCodec() = default;
  68. TString TCompTableCodec::GetName() const {
  69. return ToString(Impl->Quality ? MyNameHQ() : MyNameLQ());
  70. }
  71. ui8 TCompTableCodec::Encode(TStringBuf in, TBuffer& out) const {
  72. return Impl->Encode(in, out);
  73. }
  74. void TCompTableCodec::Decode(TStringBuf in, TBuffer& out) const {
  75. Impl->Decode(in, out);
  76. }
  77. void TCompTableCodec::DoLearn(ISequenceReader& in) {
  78. Impl->DoLearn(in);
  79. }
  80. void TCompTableCodec::Save(IOutputStream* out) const {
  81. Impl->Save(out);
  82. }
  83. void TCompTableCodec::Load(IInputStream* in) {
  84. Impl->Load(in);
  85. }
  86. }