lzma.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. #include "lzma.h"
  2. #include <util/stream/mem.h>
  3. #include <util/system/context.h>
  4. #include <util/generic/cast.h>
  5. #include <util/memory/addstorage.h>
  6. #include <util/generic/ptr.h>
  7. #include <util/generic/intrlist.h>
  8. #include <util/generic/scope.h>
  9. extern "C" {
  10. #include <contrib/libs/lzmasdk/LzmaEnc.h>
  11. #include <contrib/libs/lzmasdk/LzmaDec.h>
  12. }
  13. namespace {
  14. class TMemoryGc {
  15. class TAllocation: public TIntrusiveListItem<TAllocation>, public TAdditionalStorage<TAllocation> {
  16. };
  17. public:
  18. inline void* Allocate(size_t len) {
  19. if (len > 1024 * 1024 * 1024) {
  20. return nullptr;
  21. }
  22. TAllocation* ret = new (len) TAllocation;
  23. Allocs_.PushBack(ret);
  24. return ret->AdditionalData();
  25. }
  26. inline void Deallocate(void* ptr) noexcept {
  27. if (ptr) {
  28. delete TAllocation::ObjectFromData(ptr);
  29. }
  30. }
  31. private:
  32. TIntrusiveListWithAutoDelete<TAllocation, TDelete> Allocs_;
  33. };
  34. template <class T>
  35. class TInverseFilter {
  36. class TTrampoLine: public ITrampoLine {
  37. public:
  38. inline TTrampoLine(TInverseFilter* parent)
  39. : Parent_(parent)
  40. {
  41. }
  42. void DoRun() override {
  43. Parent_->RunFilter();
  44. }
  45. private:
  46. TInverseFilter* Parent_;
  47. };
  48. class TInput: public IInputStream {
  49. public:
  50. inline TInput(TInverseFilter* parent)
  51. : Parent_(parent)
  52. {
  53. }
  54. ~TInput() override {
  55. }
  56. size_t DoRead(void* ptr, size_t len) override {
  57. return Parent_->ReadImpl(ptr, len);
  58. }
  59. private:
  60. TInverseFilter* Parent_;
  61. };
  62. class TOutput: public IOutputStream {
  63. public:
  64. inline TOutput(TInverseFilter* parent)
  65. : Parent_(parent)
  66. {
  67. }
  68. ~TOutput() override {
  69. }
  70. void DoWrite(const void* ptr, size_t len) override {
  71. Parent_->WriteImpl(ptr, len);
  72. }
  73. private:
  74. TInverseFilter* Parent_;
  75. };
  76. public:
  77. inline TInverseFilter(IOutputStream* slave, T* filter)
  78. : Slave_(slave)
  79. , Filter_(filter)
  80. , TrampoLine_(this)
  81. , FilterCtx_(FilterClosure())
  82. , Finished_(false)
  83. , In_(nullptr, 0)
  84. {
  85. }
  86. virtual ~TInverseFilter() {
  87. if (!UncaughtException()) {
  88. try {
  89. Finish();
  90. } catch (...) {
  91. }
  92. } else {
  93. //rely on gc
  94. }
  95. }
  96. inline void Write(const void* ptr, size_t len) {
  97. In_.Reset(ptr, len);
  98. Y_DEFER {
  99. In_.Reset(0, 0);
  100. };
  101. while (In_.Avail()) {
  102. SwitchTo();
  103. }
  104. }
  105. inline void Finish() {
  106. if (!Finished_) {
  107. Finished_ = true;
  108. SwitchTo();
  109. }
  110. }
  111. private:
  112. inline void RunFilter() {
  113. try {
  114. TInput in(this);
  115. TOutput out(this);
  116. (*Filter_)(&in, &out);
  117. } catch (...) {
  118. Err_ = std::current_exception();
  119. }
  120. SwitchFrom();
  121. }
  122. inline TContClosure FilterClosure() {
  123. return {&TrampoLine_, TArrayRef(Stack_, sizeof(Stack_))};
  124. }
  125. inline size_t ReadImpl(void* ptr, size_t len) {
  126. while (!Finished_) {
  127. const size_t ret = In_.Read(ptr, len);
  128. if (ret) {
  129. return ret;
  130. }
  131. SwitchFrom();
  132. }
  133. return 0;
  134. }
  135. inline void WriteImpl(const void* ptr, size_t len) {
  136. Y_ASSERT(!Out_.Avail());
  137. Out_.Reset(ptr, len);
  138. while (Out_.Avail()) {
  139. SwitchFrom();
  140. }
  141. }
  142. inline bool FlushImpl() {
  143. if (Out_.Avail()) {
  144. TransferData(&Out_, Slave_);
  145. Out_.Reset(nullptr, 0);
  146. return true;
  147. }
  148. return false;
  149. }
  150. inline void SwitchTo() {
  151. do {
  152. CurrentCtx_.SwitchTo(&FilterCtx_);
  153. if (Err_) {
  154. Finished_ = true;
  155. std::rethrow_exception(Err_);
  156. }
  157. } while (FlushImpl());
  158. }
  159. inline void SwitchFrom() {
  160. FilterCtx_.SwitchTo(&CurrentCtx_);
  161. }
  162. private:
  163. IOutputStream* Slave_;
  164. T* Filter_;
  165. TTrampoLine TrampoLine_;
  166. char Stack_[16 * 1024];
  167. TContMachineContext FilterCtx_;
  168. TContMachineContext CurrentCtx_;
  169. bool Finished_;
  170. TMemoryInput In_;
  171. TMemoryInput Out_;
  172. std::exception_ptr Err_;
  173. };
  174. class TLzma {
  175. public:
  176. class TLzmaInput: public ISeqInStream {
  177. public:
  178. inline TLzmaInput(IInputStream* slave)
  179. : Slave_(slave)
  180. {
  181. Read = ReadFunc;
  182. }
  183. private:
  184. static inline SRes ReadFunc(const ISeqInStream* p, void* ptr, size_t* len) {
  185. *len = const_cast<TLzmaInput*>(static_cast<const TLzmaInput*>(p))->Slave_->Read(ptr, *len);
  186. return SZ_OK;
  187. }
  188. private:
  189. IInputStream* Slave_;
  190. };
  191. class TLzmaOutput: public ISeqOutStream {
  192. public:
  193. inline TLzmaOutput(IOutputStream* slave)
  194. : Slave_(slave)
  195. {
  196. Write = WriteFunc;
  197. }
  198. private:
  199. static inline size_t WriteFunc(const ISeqOutStream* p, const void* ptr, size_t len) {
  200. const_cast<TLzmaOutput*>(static_cast<const TLzmaOutput*>(p))->Slave_->Write(ptr, len);
  201. return len;
  202. }
  203. private:
  204. IOutputStream* Slave_;
  205. };
  206. class TAlloc: public ISzAlloc {
  207. public:
  208. inline TAlloc() {
  209. Alloc = AllocFunc;
  210. Free = FreeFunc;
  211. }
  212. private:
  213. static void* AllocFunc(const ISzAlloc* t, size_t len) {
  214. return static_cast<TAlloc*>(((ISzAlloc*)t))->Gc_.Allocate(len);
  215. }
  216. static void FreeFunc(const ISzAlloc* t, void* p) {
  217. static_cast<TAlloc*>(((ISzAlloc*)t))->Gc_.Deallocate(p);
  218. }
  219. private:
  220. TMemoryGc Gc_;
  221. };
  222. inline ISzAlloc* Alloc() noexcept {
  223. return &Alloc_;
  224. }
  225. static inline void Check(SRes r) {
  226. if (r != SZ_OK) {
  227. ythrow yexception() << "lzma error(" << r << ")";
  228. }
  229. }
  230. private:
  231. TAlloc Alloc_;
  232. };
  233. class TLzmaCompressBase: public TLzma {
  234. public:
  235. inline TLzmaCompressBase(size_t level)
  236. : H_(LzmaEnc_Create(Alloc()))
  237. {
  238. if (!H_) {
  239. ythrow yexception() << "can not init lzma engine";
  240. }
  241. LzmaEncProps_Init(&Props_);
  242. Props_.level = level;
  243. Props_.dictSize = 0;
  244. Props_.lc = -1;
  245. Props_.lp = -1;
  246. Props_.pb = -1;
  247. Props_.fb = -1;
  248. Props_.numThreads = -1;
  249. Props_.writeEndMark = 1;
  250. Check(LzmaEnc_SetProps(H_, &Props_));
  251. size_t bufLen = sizeof(PropsBuf_);
  252. Zero(PropsBuf_);
  253. Check(LzmaEnc_WriteProperties(H_, PropsBuf_, &bufLen));
  254. }
  255. inline ~TLzmaCompressBase() {
  256. LzmaEnc_Destroy(H_, Alloc(), Alloc());
  257. }
  258. inline void operator()(IInputStream* in, IOutputStream* out) {
  259. TLzmaInput input(in);
  260. TLzmaOutput output(out);
  261. out->Write(PropsBuf_, sizeof(PropsBuf_));
  262. Check(LzmaEnc_Encode(H_, &output, &input, nullptr, Alloc(), Alloc()));
  263. }
  264. private:
  265. CLzmaEncHandle H_;
  266. CLzmaEncProps Props_;
  267. Byte PropsBuf_[LZMA_PROPS_SIZE];
  268. };
  269. }
  270. class TLzmaCompress::TImpl: public TLzmaCompressBase, public TInverseFilter<TLzmaCompressBase> {
  271. public:
  272. inline TImpl(IOutputStream* slave, size_t level)
  273. : TLzmaCompressBase(level)
  274. , TInverseFilter<TLzmaCompressBase>(slave, this)
  275. {
  276. }
  277. };
  278. class TLzmaDecompress::TImpl: public TLzma {
  279. public:
  280. inline TImpl()
  281. : InBegin_(nullptr)
  282. , InEnd_(nullptr)
  283. {
  284. LzmaDec_Construct(&H_);
  285. }
  286. inline virtual ~TImpl() {
  287. LzmaDec_Free(&H_, Alloc());
  288. }
  289. inline size_t Read(void* ptr, size_t len) {
  290. Byte* pos = (Byte*)ptr;
  291. Byte* end = pos + len;
  292. retry:
  293. size_t availLen = InEnd_ - InBegin_;
  294. size_t bufLen = end - pos;
  295. ELzmaStatus status;
  296. Check(LzmaDec_DecodeToBuf(&H_, pos, &bufLen, (Byte*)InBegin_, &availLen, LZMA_FINISH_ANY, &status));
  297. InBegin_ += availLen;
  298. pos += bufLen;
  299. if (status == LZMA_STATUS_NEEDS_MORE_INPUT) {
  300. Y_ASSERT(InEnd_ == InBegin_);
  301. if (!Fill()) {
  302. ythrow yexception() << "incomplete lzma stream";
  303. }
  304. goto retry;
  305. }
  306. return pos - (Byte*)ptr;
  307. }
  308. private:
  309. virtual bool Fill() = 0;
  310. protected:
  311. CLzmaDec H_;
  312. char* InBegin_;
  313. char* InEnd_;
  314. };
  315. class TLzmaDecompress::TImplStream: public TImpl {
  316. public:
  317. inline TImplStream(IInputStream* slave)
  318. : Slave_(slave)
  319. {
  320. Byte buf[LZMA_PROPS_SIZE];
  321. if (Slave_->Load(buf, sizeof(buf)) != sizeof(buf))
  322. ythrow yexception() << "can't read lzma header";
  323. Check(LzmaDec_Allocate(&H_, buf, sizeof(buf), Alloc()));
  324. LzmaDec_Init(&H_);
  325. }
  326. private:
  327. bool Fill() override {
  328. size_t size = Slave_->Read(In_, sizeof(In_));
  329. InBegin_ = In_;
  330. InEnd_ = In_ + size;
  331. return size;
  332. }
  333. private:
  334. IInputStream* Slave_;
  335. char In_[4096];
  336. };
  337. class TLzmaDecompress::TImplZeroCopy: public TLzmaDecompress::TImpl {
  338. public:
  339. inline TImplZeroCopy(IZeroCopyInput* in)
  340. : Input_(in)
  341. {
  342. if (!Fill())
  343. ythrow yexception() << "can't read lzma header";
  344. char buf[LZMA_PROPS_SIZE];
  345. char* header;
  346. if (InEnd_ - InBegin_ >= LZMA_PROPS_SIZE) {
  347. header = InBegin_;
  348. InBegin_ += LZMA_PROPS_SIZE;
  349. } else {
  350. //bad luck, first part is less than header
  351. //try to copy header part by part to the local buffer
  352. const char* end = buf + sizeof(buf);
  353. char* pos = buf;
  354. while (1) {
  355. size_t left = end - pos;
  356. size_t avail = InEnd_ - InBegin_;
  357. if (left < avail) {
  358. memcpy(pos, InBegin_, left);
  359. InBegin_ += left;
  360. break;
  361. } else {
  362. memcpy(pos, InBegin_, avail);
  363. pos += avail;
  364. if (!Fill()) {
  365. ythrow yexception() << "can't read lzma header";
  366. }
  367. }
  368. }
  369. header = buf;
  370. }
  371. Check(LzmaDec_Allocate(&H_, (Byte*)header, LZMA_PROPS_SIZE, Alloc()));
  372. LzmaDec_Init(&H_);
  373. }
  374. private:
  375. bool Fill() override {
  376. size_t size = Input_->Next(&InBegin_);
  377. if (size) {
  378. InEnd_ = InBegin_ + size;
  379. return true;
  380. }
  381. return false;
  382. }
  383. IZeroCopyInput* Input_;
  384. };
  385. TLzmaCompress::TLzmaCompress(IOutputStream* slave, size_t level)
  386. : Impl_(new TImpl(slave, level))
  387. {
  388. }
  389. TLzmaCompress::~TLzmaCompress() {
  390. }
  391. void TLzmaCompress::DoWrite(const void* buf, size_t len) {
  392. if (!Impl_) {
  393. ythrow yexception() << "can not write to finished lzma stream";
  394. }
  395. Impl_->Write(buf, len);
  396. }
  397. void TLzmaCompress::DoFinish() {
  398. THolder<TImpl> impl(Impl_.Release());
  399. if (impl) {
  400. impl->Finish();
  401. }
  402. }
  403. TLzmaDecompress::TLzmaDecompress(IInputStream* slave)
  404. : Impl_(new TImplStream(slave))
  405. {
  406. }
  407. TLzmaDecompress::TLzmaDecompress(IZeroCopyInput* input)
  408. : Impl_(new TImplZeroCopy(input))
  409. {
  410. }
  411. TLzmaDecompress::~TLzmaDecompress() {
  412. }
  413. size_t TLzmaDecompress::DoRead(void* buf, size_t len) {
  414. return Impl_->Read(buf, len);
  415. }