123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379 |
- #include "zlib.h"
- #include <util/memory/addstorage.h>
- #include <util/generic/scope.h>
- #include <util/generic/utility.h>
- #include <zlib.h>
- #include <cstring>
- namespace {
- static const int opts[] = {
- // Auto
- 15 + 32,
- // ZLib
- 15 + 0,
- // GZip
- 15 + 16,
- // Raw
- -15};
- class TZLibCommon {
- public:
- inline TZLibCommon() noexcept {
- memset(Z(), 0, sizeof(*Z()));
- }
- inline ~TZLibCommon() = default;
- inline const char* GetErrMsg() const noexcept {
- return Z()->msg != nullptr ? Z()->msg : "unknown error";
- }
- inline z_stream* Z() const noexcept {
- return (z_stream*)(&Z_);
- }
- private:
- z_stream Z_;
- };
- static inline ui32 MaxPortion(size_t s) noexcept {
- return (ui32)Min<size_t>(Max<ui32>(), s);
- }
- struct TChunkedZeroCopyInput {
- inline TChunkedZeroCopyInput(IZeroCopyInput* in)
- : In(in)
- , Buf(nullptr)
- , Len(0)
- {
- }
- template <class P, class T>
- inline bool Next(P** buf, T* len) {
- if (!Len) {
- Len = In->Next(&Buf);
- if (!Len) {
- return false;
- }
- }
- const T toread = (T)Min((size_t)Max<T>(), Len);
- *len = toread;
- *buf = (P*)Buf;
- Buf += toread;
- Len -= toread;
- return true;
- }
- IZeroCopyInput* In;
- const char* Buf;
- size_t Len;
- };
- } // namespace
- class TZLibDecompress::TImpl: private TZLibCommon, public TChunkedZeroCopyInput {
- public:
- inline TImpl(IZeroCopyInput* in, ZLib::StreamType type, TStringBuf dict)
- : TChunkedZeroCopyInput(in)
- , Dict(dict)
- {
- if (inflateInit2(Z(), opts[type]) != Z_OK) {
- ythrow TZLibDecompressorError() << "can not init inflate engine";
- }
- if (dict.size() && type == ZLib::Raw) {
- SetDict();
- }
- }
- virtual ~TImpl() {
- inflateEnd(Z());
- }
- void SetAllowMultipleStreams(bool allowMultipleStreams) {
- AllowMultipleStreams_ = allowMultipleStreams;
- }
- inline size_t Read(void* buf, size_t size) {
- Z()->next_out = (unsigned char*)buf;
- Z()->avail_out = size;
- while (true) {
- if (Z()->avail_in == 0) {
- if (!FillInputBuffer()) {
- return 0;
- }
- }
- switch (inflate(Z(), Z_SYNC_FLUSH)) {
- case Z_NEED_DICT: {
- SetDict();
- continue;
- }
- case Z_STREAM_END: {
- if (AllowMultipleStreams_) {
- if (inflateReset(Z()) != Z_OK) {
- ythrow TZLibDecompressorError() << "inflate reset error(" << GetErrMsg() << ")";
- }
- } else {
- return size - Z()->avail_out;
- }
- [[fallthrough]];
- }
- case Z_OK: {
- const size_t processed = size - Z()->avail_out;
- if (processed) {
- return processed;
- }
- break;
- }
- default:
- ythrow TZLibDecompressorError() << "inflate error(" << GetErrMsg() << ")";
- }
- }
- }
- private:
- inline bool FillInputBuffer() {
- return Next(&Z()->next_in, &Z()->avail_in);
- }
- void SetDict() {
- if (inflateSetDictionary(Z(), (const Bytef*)Dict.data(), Dict.size()) != Z_OK) {
- ythrow TZLibCompressorError() << "can not set inflate dictionary";
- }
- }
- bool AllowMultipleStreams_ = true;
- TStringBuf Dict;
- };
- namespace {
- class TDecompressStream: public IZeroCopyInput, public TZLibDecompress::TImpl, public TAdditionalStorage<TDecompressStream> {
- public:
- inline TDecompressStream(IInputStream* input, ZLib::StreamType type, TStringBuf dict)
- : TZLibDecompress::TImpl(this, type, dict)
- , Stream_(input)
- {
- }
- ~TDecompressStream() override = default;
- private:
- size_t DoNext(const void** ptr, size_t len) override {
- void* buf = AdditionalData();
- *ptr = buf;
- return Stream_->Read(buf, Min(len, AdditionalDataLength()));
- }
- private:
- IInputStream* Stream_;
- };
- using TZeroCopyDecompress = TZLibDecompress::TImpl;
- } // namespace
- class TZLibCompress::TImpl: public TAdditionalStorage<TImpl>, private TZLibCommon {
- static inline ZLib::StreamType Type(ZLib::StreamType type) {
- if (type == ZLib::Auto) {
- return ZLib::ZLib;
- }
- if (type >= ZLib::Invalid) {
- ythrow TZLibError() << "invalid compression type: " << static_cast<unsigned long>(type);
- }
- return type;
- }
- public:
- inline TImpl(const TParams& p)
- : Stream_(p.Out)
- {
- if (deflateInit2(Z(), Min<size_t>(9, p.CompressionLevel), Z_DEFLATED, opts[Type(p.Type)], 8, Z_DEFAULT_STRATEGY)) {
- ythrow TZLibCompressorError() << "can not init inflate engine";
- }
- // Create exactly the same files on all platforms by fixing OS field in the header.
- if (p.Type == ZLib::GZip) {
- GZHeader_ = MakeHolder<gz_header>();
- GZHeader_->os = 3; // UNIX
- deflateSetHeader(Z(), GZHeader_.Get());
- }
- if (p.Dict.size()) {
- if (deflateSetDictionary(Z(), (const Bytef*)p.Dict.data(), p.Dict.size())) {
- ythrow TZLibCompressorError() << "can not set deflate dictionary";
- }
- }
- Z()->next_out = TmpBuf();
- Z()->avail_out = TmpBufLen();
- }
- inline ~TImpl() {
- deflateEnd(Z());
- }
- inline void Write(const void* buf, size_t size) {
- const Bytef* b = (const Bytef*)buf;
- const Bytef* e = b + size;
- Y_DEFER {
- Z()->next_in = nullptr;
- Z()->avail_in = 0;
- };
- do {
- b = WritePart(b, e);
- } while (b < e);
- }
- inline const Bytef* WritePart(const Bytef* b, const Bytef* e) {
- Z()->next_in = const_cast<Bytef*>(b);
- Z()->avail_in = MaxPortion(e - b);
- while (Z()->avail_in) {
- const int ret = deflate(Z(), Z_NO_FLUSH);
- switch (ret) {
- case Z_OK:
- continue;
- case Z_BUF_ERROR:
- FlushBuffer();
- break;
- default:
- ythrow TZLibCompressorError() << "deflate error(" << GetErrMsg() << ")";
- }
- }
- return Z()->next_in;
- }
- inline void Flush() {
- int ret = deflate(Z(), Z_SYNC_FLUSH);
- while ((ret == Z_OK || ret == Z_BUF_ERROR) && !Z()->avail_out) {
- FlushBuffer();
- ret = deflate(Z(), Z_SYNC_FLUSH);
- }
- if (ret != Z_OK && ret != Z_BUF_ERROR) {
- ythrow TZLibCompressorError() << "deflate flush error(" << GetErrMsg() << ")";
- }
- if (Z()->avail_out < TmpBufLen()) {
- FlushBuffer();
- }
- }
- inline void FlushBuffer() {
- Stream_->Write(TmpBuf(), TmpBufLen() - Z()->avail_out);
- Z()->next_out = TmpBuf();
- Z()->avail_out = TmpBufLen();
- }
- inline void Finish() {
- int ret = deflate(Z(), Z_FINISH);
- while (ret == Z_OK || ret == Z_BUF_ERROR) {
- FlushBuffer();
- ret = deflate(Z(), Z_FINISH);
- }
- if (ret == Z_STREAM_END) {
- Stream_->Write(TmpBuf(), TmpBufLen() - Z()->avail_out);
- } else {
- ythrow TZLibCompressorError() << "deflate finish error(" << GetErrMsg() << ")";
- }
- }
- private:
- inline unsigned char* TmpBuf() noexcept {
- return (unsigned char*)AdditionalData();
- }
- inline size_t TmpBufLen() const noexcept {
- return AdditionalDataLength();
- }
- private:
- IOutputStream* Stream_;
- THolder<gz_header> GZHeader_;
- };
- TZLibDecompress::TZLibDecompress(IZeroCopyInput* input, ZLib::StreamType type, TStringBuf dict)
- : Impl_(new TZeroCopyDecompress(input, type, dict))
- {
- }
- TZLibDecompress::TZLibDecompress(IInputStream* input, ZLib::StreamType type, size_t buflen, TStringBuf dict)
- : Impl_(new (buflen) TDecompressStream(input, type, dict))
- {
- }
- void TZLibDecompress::SetAllowMultipleStreams(bool allowMultipleStreams) {
- Impl_->SetAllowMultipleStreams(allowMultipleStreams);
- }
- TZLibDecompress::~TZLibDecompress() = default;
- size_t TZLibDecompress::DoRead(void* buf, size_t size) {
- return Impl_->Read(buf, MaxPortion(size));
- }
- void TZLibCompress::Init(const TParams& params) {
- Y_ENSURE(params.BufLen >= 16, "ZLib buffer too small");
- Impl_.Reset(new (params.BufLen) TImpl(params));
- }
- void TZLibCompress::TDestruct::Destroy(TImpl* impl) {
- delete impl;
- }
- TZLibCompress::~TZLibCompress() {
- try {
- Finish();
- } catch (...) {
- // ¯\_(ツ)_/¯
- }
- }
- void TZLibCompress::DoWrite(const void* buf, size_t size) {
- if (!Impl_) {
- ythrow TZLibCompressorError() << "can not write to finished zlib stream";
- }
- Impl_->Write(buf, size);
- }
- void TZLibCompress::DoFlush() {
- if (Impl_) {
- Impl_->Flush();
- }
- }
- void TZLibCompress::DoFinish() {
- THolder<TImpl> impl(Impl_.Release());
- if (impl) {
- impl->Finish();
- }
- }
- TBufferedZLibDecompress::~TBufferedZLibDecompress() = default;
|