_lzmamodule.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663
  1. /* _lzma - Low-level Python interface to liblzma.
  2. Initial implementation by Per Øyvind Karlsen.
  3. Rewritten by Nadeem Vawda.
  4. */
  5. #define PY_SSIZE_T_CLEAN
  6. #include "Python.h"
  7. #include "structmember.h" // PyMemberDef
  8. #include <stdlib.h> // free()
  9. #include <string.h>
  10. #include <lzma.h>
  11. // Blocks output buffer wrappers
  12. #include "pycore_blocks_output_buffer.h"
  13. #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
  14. #error "The maximum block size accepted by liblzma is SIZE_MAX."
  15. #endif
  16. /* On success, return value >= 0
  17. On failure, return -1 */
  18. static inline Py_ssize_t
  19. OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
  20. uint8_t **next_out, size_t *avail_out)
  21. {
  22. Py_ssize_t allocated;
  23. allocated = _BlocksOutputBuffer_InitAndGrow(
  24. buffer, max_length, (void**) next_out);
  25. *avail_out = (size_t) allocated;
  26. return allocated;
  27. }
  28. /* On success, return value >= 0
  29. On failure, return -1 */
  30. static inline Py_ssize_t
  31. OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
  32. uint8_t **next_out, size_t *avail_out)
  33. {
  34. Py_ssize_t allocated;
  35. allocated = _BlocksOutputBuffer_Grow(
  36. buffer, (void**) next_out, (Py_ssize_t) *avail_out);
  37. *avail_out = (size_t) allocated;
  38. return allocated;
  39. }
  40. static inline Py_ssize_t
  41. OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
  42. {
  43. return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
  44. }
  45. static inline PyObject *
  46. OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
  47. {
  48. return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
  49. }
  50. static inline void
  51. OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
  52. {
  53. _BlocksOutputBuffer_OnError(buffer);
  54. }
  55. #define ACQUIRE_LOCK(obj) do { \
  56. if (!PyThread_acquire_lock((obj)->lock, 0)) { \
  57. Py_BEGIN_ALLOW_THREADS \
  58. PyThread_acquire_lock((obj)->lock, 1); \
  59. Py_END_ALLOW_THREADS \
  60. } } while (0)
  61. #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
  62. typedef struct {
  63. PyTypeObject *lzma_compressor_type;
  64. PyTypeObject *lzma_decompressor_type;
  65. PyObject *error;
  66. PyObject *empty_tuple;
  67. } _lzma_state;
  68. static inline _lzma_state*
  69. get_lzma_state(PyObject *module)
  70. {
  71. void *state = PyModule_GetState(module);
  72. assert(state != NULL);
  73. return (_lzma_state *)state;
  74. }
  75. /* Container formats: */
  76. enum {
  77. FORMAT_AUTO,
  78. FORMAT_XZ,
  79. FORMAT_ALONE,
  80. FORMAT_RAW,
  81. };
  82. #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
  83. typedef struct {
  84. PyObject_HEAD
  85. lzma_allocator alloc;
  86. lzma_stream lzs;
  87. int flushed;
  88. PyThread_type_lock lock;
  89. } Compressor;
  90. typedef struct {
  91. PyObject_HEAD
  92. lzma_allocator alloc;
  93. lzma_stream lzs;
  94. int check;
  95. char eof;
  96. PyObject *unused_data;
  97. char needs_input;
  98. uint8_t *input_buffer;
  99. size_t input_buffer_size;
  100. PyThread_type_lock lock;
  101. } Decompressor;
  102. /* Helper functions. */
  103. static int
  104. catch_lzma_error(_lzma_state *state, lzma_ret lzret)
  105. {
  106. switch (lzret) {
  107. case LZMA_OK:
  108. case LZMA_GET_CHECK:
  109. case LZMA_NO_CHECK:
  110. case LZMA_STREAM_END:
  111. return 0;
  112. case LZMA_UNSUPPORTED_CHECK:
  113. PyErr_SetString(state->error, "Unsupported integrity check");
  114. return 1;
  115. case LZMA_MEM_ERROR:
  116. PyErr_NoMemory();
  117. return 1;
  118. case LZMA_MEMLIMIT_ERROR:
  119. PyErr_SetString(state->error, "Memory usage limit exceeded");
  120. return 1;
  121. case LZMA_FORMAT_ERROR:
  122. PyErr_SetString(state->error, "Input format not supported by decoder");
  123. return 1;
  124. case LZMA_OPTIONS_ERROR:
  125. PyErr_SetString(state->error, "Invalid or unsupported options");
  126. return 1;
  127. case LZMA_DATA_ERROR:
  128. PyErr_SetString(state->error, "Corrupt input data");
  129. return 1;
  130. case LZMA_BUF_ERROR:
  131. PyErr_SetString(state->error, "Insufficient buffer space");
  132. return 1;
  133. case LZMA_PROG_ERROR:
  134. PyErr_SetString(state->error, "Internal error");
  135. return 1;
  136. default:
  137. PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
  138. return 1;
  139. }
  140. }
  141. static void*
  142. PyLzma_Malloc(void *opaque, size_t items, size_t size)
  143. {
  144. if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
  145. return NULL;
  146. }
  147. /* PyMem_Malloc() cannot be used:
  148. the GIL is not held when lzma_code() is called */
  149. return PyMem_RawMalloc(items * size);
  150. }
  151. static void
  152. PyLzma_Free(void *opaque, void *ptr)
  153. {
  154. PyMem_RawFree(ptr);
  155. }
  156. /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
  157. since the predefined conversion specifiers do not suit our needs:
  158. uint32_t - the "I" (unsigned int) specifier is the right size, but
  159. silently ignores overflows on conversion.
  160. lzma_vli - the "K" (unsigned long long) specifier is the right
  161. size, but like "I" it silently ignores overflows on conversion.
  162. lzma_mode and lzma_match_finder - these are enumeration types, and
  163. so the size of each is implementation-defined. Worse, different
  164. enum types can be of different sizes within the same program, so
  165. to be strictly correct, we need to define two separate converters.
  166. */
  167. #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
  168. static int \
  169. FUNCNAME(PyObject *obj, void *ptr) \
  170. { \
  171. unsigned long long val; \
  172. \
  173. val = PyLong_AsUnsignedLongLong(obj); \
  174. if (PyErr_Occurred()) \
  175. return 0; \
  176. if ((unsigned long long)(TYPE)val != val) { \
  177. PyErr_SetString(PyExc_OverflowError, \
  178. "Value too large for " #TYPE " type"); \
  179. return 0; \
  180. } \
  181. *(TYPE *)ptr = (TYPE)val; \
  182. return 1; \
  183. }
  184. INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
  185. INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
  186. INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
  187. INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
  188. #undef INT_TYPE_CONVERTER_FUNC
  189. /* Filter specifier parsing.
  190. This code handles converting filter specifiers (Python dicts) into
  191. the C lzma_filter structs expected by liblzma. */
  192. static void *
  193. parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
  194. {
  195. static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
  196. "pb", "mode", "nice_len", "mf", "depth", NULL};
  197. PyObject *id;
  198. PyObject *preset_obj;
  199. uint32_t preset = LZMA_PRESET_DEFAULT;
  200. lzma_options_lzma *options;
  201. /* First, fill in default values for all the options using a preset.
  202. Then, override the defaults with any values given by the caller. */
  203. preset_obj = PyMapping_GetItemString(spec, "preset");
  204. if (preset_obj == NULL) {
  205. if (PyErr_ExceptionMatches(PyExc_KeyError)) {
  206. PyErr_Clear();
  207. }
  208. else {
  209. return NULL;
  210. }
  211. } else {
  212. int ok = uint32_converter(preset_obj, &preset);
  213. Py_DECREF(preset_obj);
  214. if (!ok) {
  215. return NULL;
  216. }
  217. }
  218. options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
  219. if (options == NULL) {
  220. return PyErr_NoMemory();
  221. }
  222. if (lzma_lzma_preset(options, preset)) {
  223. PyMem_Free(options);
  224. PyErr_Format(state->error, "Invalid compression preset: %u", preset);
  225. return NULL;
  226. }
  227. if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
  228. "|OOO&O&O&O&O&O&O&O&", optnames,
  229. &id, &preset_obj,
  230. uint32_converter, &options->dict_size,
  231. uint32_converter, &options->lc,
  232. uint32_converter, &options->lp,
  233. uint32_converter, &options->pb,
  234. lzma_mode_converter, &options->mode,
  235. uint32_converter, &options->nice_len,
  236. lzma_mf_converter, &options->mf,
  237. uint32_converter, &options->depth)) {
  238. PyErr_SetString(PyExc_ValueError,
  239. "Invalid filter specifier for LZMA filter");
  240. PyMem_Free(options);
  241. return NULL;
  242. }
  243. return options;
  244. }
  245. static void *
  246. parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
  247. {
  248. static char *optnames[] = {"id", "dist", NULL};
  249. PyObject *id;
  250. uint32_t dist = 1;
  251. lzma_options_delta *options;
  252. if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
  253. &id, uint32_converter, &dist)) {
  254. PyErr_SetString(PyExc_ValueError,
  255. "Invalid filter specifier for delta filter");
  256. return NULL;
  257. }
  258. options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
  259. if (options == NULL) {
  260. return PyErr_NoMemory();
  261. }
  262. options->type = LZMA_DELTA_TYPE_BYTE;
  263. options->dist = dist;
  264. return options;
  265. }
  266. static void *
  267. parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
  268. {
  269. static char *optnames[] = {"id", "start_offset", NULL};
  270. PyObject *id;
  271. uint32_t start_offset = 0;
  272. lzma_options_bcj *options;
  273. if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
  274. &id, uint32_converter, &start_offset)) {
  275. PyErr_SetString(PyExc_ValueError,
  276. "Invalid filter specifier for BCJ filter");
  277. return NULL;
  278. }
  279. options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
  280. if (options == NULL) {
  281. return PyErr_NoMemory();
  282. }
  283. options->start_offset = start_offset;
  284. return options;
  285. }
  286. static int
  287. lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
  288. {
  289. lzma_filter *f = (lzma_filter *)ptr;
  290. PyObject *id_obj;
  291. if (!PyMapping_Check(spec)) {
  292. PyErr_SetString(PyExc_TypeError,
  293. "Filter specifier must be a dict or dict-like object");
  294. return 0;
  295. }
  296. id_obj = PyMapping_GetItemString(spec, "id");
  297. if (id_obj == NULL) {
  298. if (PyErr_ExceptionMatches(PyExc_KeyError))
  299. PyErr_SetString(PyExc_ValueError,
  300. "Filter specifier must have an \"id\" entry");
  301. return 0;
  302. }
  303. f->id = PyLong_AsUnsignedLongLong(id_obj);
  304. Py_DECREF(id_obj);
  305. if (PyErr_Occurred()) {
  306. return 0;
  307. }
  308. switch (f->id) {
  309. case LZMA_FILTER_LZMA1:
  310. case LZMA_FILTER_LZMA2:
  311. f->options = parse_filter_spec_lzma(state, spec);
  312. return f->options != NULL;
  313. case LZMA_FILTER_DELTA:
  314. f->options = parse_filter_spec_delta(state, spec);
  315. return f->options != NULL;
  316. case LZMA_FILTER_X86:
  317. case LZMA_FILTER_POWERPC:
  318. case LZMA_FILTER_IA64:
  319. case LZMA_FILTER_ARM:
  320. case LZMA_FILTER_ARMTHUMB:
  321. case LZMA_FILTER_SPARC:
  322. f->options = parse_filter_spec_bcj(state, spec);
  323. return f->options != NULL;
  324. default:
  325. PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
  326. return 0;
  327. }
  328. }
  329. static void
  330. free_filter_chain(lzma_filter filters[])
  331. {
  332. for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
  333. PyMem_Free(filters[i].options);
  334. }
  335. }
  336. static int
  337. parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
  338. {
  339. Py_ssize_t i, num_filters;
  340. num_filters = PySequence_Length(filterspecs);
  341. if (num_filters == -1) {
  342. return -1;
  343. }
  344. if (num_filters > LZMA_FILTERS_MAX) {
  345. PyErr_Format(PyExc_ValueError,
  346. "Too many filters - liblzma supports a maximum of %d",
  347. LZMA_FILTERS_MAX);
  348. return -1;
  349. }
  350. for (i = 0; i < num_filters; i++) {
  351. int ok = 1;
  352. PyObject *spec = PySequence_GetItem(filterspecs, i);
  353. if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
  354. ok = 0;
  355. }
  356. Py_XDECREF(spec);
  357. if (!ok) {
  358. filters[i].id = LZMA_VLI_UNKNOWN;
  359. free_filter_chain(filters);
  360. return -1;
  361. }
  362. }
  363. filters[num_filters].id = LZMA_VLI_UNKNOWN;
  364. return 0;
  365. }
  366. /* Filter specifier construction.
  367. This code handles converting C lzma_filter structs into
  368. Python-level filter specifiers (represented as dicts). */
  369. static int
  370. spec_add_field(PyObject *spec, const char *key, unsigned long long value)
  371. {
  372. PyObject *value_object = PyLong_FromUnsignedLongLong(value);
  373. if (value_object == NULL) {
  374. return -1;
  375. }
  376. PyObject *key_object = PyUnicode_InternFromString(key);
  377. if (key_object == NULL) {
  378. Py_DECREF(value_object);
  379. return -1;
  380. }
  381. int status = PyDict_SetItem(spec, key_object, value_object);
  382. Py_DECREF(key_object);
  383. Py_DECREF(value_object);
  384. return status;
  385. }
  386. static PyObject *
  387. build_filter_spec(const lzma_filter *f)
  388. {
  389. PyObject *spec;
  390. spec = PyDict_New();
  391. if (spec == NULL) {
  392. return NULL;
  393. }
  394. #define ADD_FIELD(SOURCE, FIELD) \
  395. do { \
  396. if (spec_add_field(spec, #FIELD, SOURCE->FIELD) == -1) \
  397. goto error;\
  398. } while (0)
  399. ADD_FIELD(f, id);
  400. switch (f->id) {
  401. /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
  402. lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
  403. dict_size field is used. */
  404. case LZMA_FILTER_LZMA1: {
  405. lzma_options_lzma *options = f->options;
  406. ADD_FIELD(options, lc);
  407. ADD_FIELD(options, lp);
  408. ADD_FIELD(options, pb);
  409. ADD_FIELD(options, dict_size);
  410. break;
  411. }
  412. case LZMA_FILTER_LZMA2: {
  413. lzma_options_lzma *options = f->options;
  414. ADD_FIELD(options, dict_size);
  415. break;
  416. }
  417. case LZMA_FILTER_DELTA: {
  418. lzma_options_delta *options = f->options;
  419. ADD_FIELD(options, dist);
  420. break;
  421. }
  422. case LZMA_FILTER_X86:
  423. case LZMA_FILTER_POWERPC:
  424. case LZMA_FILTER_IA64:
  425. case LZMA_FILTER_ARM:
  426. case LZMA_FILTER_ARMTHUMB:
  427. case LZMA_FILTER_SPARC: {
  428. lzma_options_bcj *options = f->options;
  429. if (options) {
  430. ADD_FIELD(options, start_offset);
  431. }
  432. break;
  433. }
  434. default:
  435. PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
  436. goto error;
  437. }
  438. #undef ADD_FIELD
  439. return spec;
  440. error:
  441. Py_DECREF(spec);
  442. return NULL;
  443. }
  444. /*[clinic input]
  445. module _lzma
  446. class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
  447. class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
  448. [clinic start generated code]*/
  449. /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
  450. #include "clinic/_lzmamodule.c.h"
  451. /*[python input]
  452. class lzma_vli_converter(CConverter):
  453. type = 'lzma_vli'
  454. converter = 'lzma_vli_converter'
  455. class lzma_filter_converter(CConverter):
  456. type = 'lzma_filter'
  457. converter = 'lzma_filter_converter'
  458. c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
  459. def cleanup(self):
  460. name = ensure_legal_c_identifier(self.name)
  461. return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
  462. ' PyMem_Free(%(name)s.options);\n') % {'name': name}
  463. [python start generated code]*/
  464. /*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
  465. /* LZMACompressor class. */
  466. static PyObject *
  467. compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
  468. {
  469. PyObject *result;
  470. _BlocksOutputBuffer buffer = {.list = NULL};
  471. _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
  472. assert(state != NULL);
  473. if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
  474. goto error;
  475. }
  476. c->lzs.next_in = data;
  477. c->lzs.avail_in = len;
  478. for (;;) {
  479. lzma_ret lzret;
  480. Py_BEGIN_ALLOW_THREADS
  481. lzret = lzma_code(&c->lzs, action);
  482. Py_END_ALLOW_THREADS
  483. if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
  484. lzret = LZMA_OK; /* That wasn't a real error */
  485. }
  486. if (catch_lzma_error(state, lzret)) {
  487. goto error;
  488. }
  489. if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
  490. (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
  491. break;
  492. } else if (c->lzs.avail_out == 0) {
  493. if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
  494. goto error;
  495. }
  496. }
  497. }
  498. result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
  499. if (result != NULL) {
  500. return result;
  501. }
  502. error:
  503. OutputBuffer_OnError(&buffer);
  504. return NULL;
  505. }
  506. /*[clinic input]
  507. _lzma.LZMACompressor.compress
  508. data: Py_buffer
  509. /
  510. Provide data to the compressor object.
  511. Returns a chunk of compressed data if possible, or b'' otherwise.
  512. When you have finished providing data to the compressor, call the
  513. flush() method to finish the compression process.
  514. [clinic start generated code]*/
  515. static PyObject *
  516. _lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
  517. /*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
  518. {
  519. PyObject *result = NULL;
  520. ACQUIRE_LOCK(self);
  521. if (self->flushed) {
  522. PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
  523. }
  524. else {
  525. result = compress(self, data->buf, data->len, LZMA_RUN);
  526. }
  527. RELEASE_LOCK(self);
  528. return result;
  529. }
  530. /*[clinic input]
  531. _lzma.LZMACompressor.flush
  532. Finish the compression process.
  533. Returns the compressed data left in internal buffers.
  534. The compressor object may not be used after this method is called.
  535. [clinic start generated code]*/
  536. static PyObject *
  537. _lzma_LZMACompressor_flush_impl(Compressor *self)
  538. /*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
  539. {
  540. PyObject *result = NULL;
  541. ACQUIRE_LOCK(self);
  542. if (self->flushed) {
  543. PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
  544. } else {
  545. self->flushed = 1;
  546. result = compress(self, NULL, 0, LZMA_FINISH);
  547. }
  548. RELEASE_LOCK(self);
  549. return result;
  550. }
  551. static int
  552. Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
  553. int check, uint32_t preset, PyObject *filterspecs)
  554. {
  555. lzma_ret lzret;
  556. if (filterspecs == Py_None) {
  557. lzret = lzma_easy_encoder(lzs, preset, check);
  558. } else {
  559. lzma_filter filters[LZMA_FILTERS_MAX + 1];
  560. if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
  561. return -1;
  562. lzret = lzma_stream_encoder(lzs, filters, check);
  563. free_filter_chain(filters);
  564. }
  565. if (catch_lzma_error(state, lzret)) {
  566. return -1;
  567. }
  568. else {
  569. return 0;
  570. }
  571. }
  572. static int
  573. Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
  574. {
  575. lzma_ret lzret;
  576. if (filterspecs == Py_None) {
  577. lzma_options_lzma options;
  578. if (lzma_lzma_preset(&options, preset)) {
  579. PyErr_Format(state->error, "Invalid compression preset: %u", preset);
  580. return -1;
  581. }
  582. lzret = lzma_alone_encoder(lzs, &options);
  583. } else {
  584. lzma_filter filters[LZMA_FILTERS_MAX + 1];
  585. if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
  586. return -1;
  587. if (filters[0].id == LZMA_FILTER_LZMA1 &&
  588. filters[1].id == LZMA_VLI_UNKNOWN) {
  589. lzret = lzma_alone_encoder(lzs, filters[0].options);
  590. } else {
  591. PyErr_SetString(PyExc_ValueError,
  592. "Invalid filter chain for FORMAT_ALONE - "
  593. "must be a single LZMA1 filter");
  594. lzret = LZMA_PROG_ERROR;
  595. }
  596. free_filter_chain(filters);
  597. }
  598. if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
  599. return -1;
  600. }
  601. else {
  602. return 0;
  603. }
  604. }
  605. static int
  606. Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
  607. {
  608. lzma_filter filters[LZMA_FILTERS_MAX + 1];
  609. lzma_ret lzret;
  610. if (filterspecs == Py_None) {
  611. PyErr_SetString(PyExc_ValueError,
  612. "Must specify filters for FORMAT_RAW");
  613. return -1;
  614. }
  615. if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
  616. return -1;
  617. }
  618. lzret = lzma_raw_encoder(lzs, filters);
  619. free_filter_chain(filters);
  620. if (catch_lzma_error(state, lzret)) {
  621. return -1;
  622. }
  623. else {
  624. return 0;
  625. }
  626. }
  627. /*[-clinic input]
  628. @classmethod
  629. _lzma.LZMACompressor.__new__
  630. format: int(c_default="FORMAT_XZ") = FORMAT_XZ
  631. The container format to use for the output. This can
  632. be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
  633. check: int(c_default="-1") = unspecified
  634. The integrity check to use. For FORMAT_XZ, the default
  635. is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
  636. checks; for these formats, check must be omitted, or be CHECK_NONE.
  637. preset: object = None
  638. If provided should be an integer in the range 0-9, optionally
  639. OR-ed with the constant PRESET_EXTREME.
  640. filters: object = None
  641. If provided should be a sequence of dicts. Each dict should
  642. have an entry for "id" indicating the ID of the filter, plus
  643. additional entries for options to the filter.
  644. Create a compressor object for compressing data incrementally.
  645. The settings used by the compressor can be specified either as a
  646. preset compression level (with the 'preset' argument), or in detail
  647. as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
  648. and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
  649. level. For FORMAT_RAW, the caller must always specify a filter chain;
  650. the raw compressor does not support preset compression levels.
  651. For one-shot compression, use the compress() function instead.
  652. [-clinic start generated code]*/
  653. static PyObject *
  654. Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
  655. {
  656. static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
  657. int format = FORMAT_XZ;
  658. int check = -1;
  659. uint32_t preset = LZMA_PRESET_DEFAULT;
  660. PyObject *preset_obj = Py_None;
  661. PyObject *filterspecs = Py_None;
  662. Compressor *self;
  663. _lzma_state *state = PyType_GetModuleState(type);
  664. assert(state != NULL);
  665. if (!PyArg_ParseTupleAndKeywords(args, kwargs,
  666. "|iiOO:LZMACompressor", arg_names,
  667. &format, &check, &preset_obj,
  668. &filterspecs)) {
  669. return NULL;
  670. }
  671. if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
  672. PyErr_SetString(PyExc_ValueError,
  673. "Integrity checks are only supported by FORMAT_XZ");
  674. return NULL;
  675. }
  676. if (preset_obj != Py_None && filterspecs != Py_None) {
  677. PyErr_SetString(PyExc_ValueError,
  678. "Cannot specify both preset and filter chain");
  679. return NULL;
  680. }
  681. if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) {
  682. return NULL;
  683. }
  684. assert(type != NULL && type->tp_alloc != NULL);
  685. self = (Compressor *)type->tp_alloc(type, 0);
  686. if (self == NULL) {
  687. return NULL;
  688. }
  689. self->alloc.opaque = NULL;
  690. self->alloc.alloc = PyLzma_Malloc;
  691. self->alloc.free = PyLzma_Free;
  692. self->lzs.allocator = &self->alloc;
  693. self->lock = PyThread_allocate_lock();
  694. if (self->lock == NULL) {
  695. Py_DECREF(self);
  696. PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
  697. return NULL;
  698. }
  699. self->flushed = 0;
  700. switch (format) {
  701. case FORMAT_XZ:
  702. if (check == -1) {
  703. check = LZMA_CHECK_CRC64;
  704. }
  705. if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
  706. goto error;
  707. }
  708. break;
  709. case FORMAT_ALONE:
  710. if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
  711. goto error;
  712. }
  713. break;
  714. case FORMAT_RAW:
  715. if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
  716. goto error;
  717. }
  718. break;
  719. default:
  720. PyErr_Format(PyExc_ValueError,
  721. "Invalid container format: %d", format);
  722. goto error;
  723. }
  724. return (PyObject *)self;
  725. error:
  726. Py_DECREF(self);
  727. return NULL;
  728. }
  729. static void
  730. Compressor_dealloc(Compressor *self)
  731. {
  732. lzma_end(&self->lzs);
  733. if (self->lock != NULL) {
  734. PyThread_free_lock(self->lock);
  735. }
  736. PyTypeObject *tp = Py_TYPE(self);
  737. tp->tp_free((PyObject *)self);
  738. Py_DECREF(tp);
  739. }
  740. static PyMethodDef Compressor_methods[] = {
  741. _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
  742. _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
  743. {NULL}
  744. };
  745. static int
  746. Compressor_traverse(Compressor *self, visitproc visit, void *arg)
  747. {
  748. Py_VISIT(Py_TYPE(self));
  749. return 0;
  750. }
  751. PyDoc_STRVAR(Compressor_doc,
  752. "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
  753. "\n"
  754. "Create a compressor object for compressing data incrementally.\n"
  755. "\n"
  756. "format specifies the container format to use for the output. This can\n"
  757. "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
  758. "\n"
  759. "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
  760. "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
  761. "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
  762. "\n"
  763. "The settings used by the compressor can be specified either as a\n"
  764. "preset compression level (with the 'preset' argument), or in detail\n"
  765. "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
  766. "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
  767. "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
  768. "the raw compressor does not support preset compression levels.\n"
  769. "\n"
  770. "preset (if provided) should be an integer in the range 0-9, optionally\n"
  771. "OR-ed with the constant PRESET_EXTREME.\n"
  772. "\n"
  773. "filters (if provided) should be a sequence of dicts. Each dict should\n"
  774. "have an entry for \"id\" indicating the ID of the filter, plus\n"
  775. "additional entries for options to the filter.\n"
  776. "\n"
  777. "For one-shot compression, use the compress() function instead.\n");
  778. static PyType_Slot lzma_compressor_type_slots[] = {
  779. {Py_tp_dealloc, Compressor_dealloc},
  780. {Py_tp_methods, Compressor_methods},
  781. {Py_tp_new, Compressor_new},
  782. {Py_tp_doc, (char *)Compressor_doc},
  783. {Py_tp_traverse, Compressor_traverse},
  784. {0, 0}
  785. };
  786. static PyType_Spec lzma_compressor_type_spec = {
  787. .name = "_lzma.LZMACompressor",
  788. .basicsize = sizeof(Compressor),
  789. // Calling PyType_GetModuleState() on a subclass is not safe.
  790. // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
  791. // which prevents to create a subclass.
  792. // So calling PyType_GetModuleState() in this file is always safe.
  793. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
  794. .slots = lzma_compressor_type_slots,
  795. };
  796. /* LZMADecompressor class. */
  797. /* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
  798. buffer is allocated dynamically and returned. At most max_length bytes are
  799. returned, so some of the input may not be consumed. d->lzs.next_in and
  800. d->lzs.avail_in are updated to reflect the consumed input. */
  801. static PyObject*
  802. decompress_buf(Decompressor *d, Py_ssize_t max_length)
  803. {
  804. PyObject *result;
  805. lzma_stream *lzs = &d->lzs;
  806. _BlocksOutputBuffer buffer = {.list = NULL};
  807. _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
  808. assert(state != NULL);
  809. if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
  810. goto error;
  811. }
  812. for (;;) {
  813. lzma_ret lzret;
  814. Py_BEGIN_ALLOW_THREADS
  815. lzret = lzma_code(lzs, LZMA_RUN);
  816. Py_END_ALLOW_THREADS
  817. if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
  818. lzret = LZMA_OK; /* That wasn't a real error */
  819. }
  820. if (catch_lzma_error(state, lzret)) {
  821. goto error;
  822. }
  823. if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
  824. d->check = lzma_get_check(&d->lzs);
  825. }
  826. if (lzret == LZMA_STREAM_END) {
  827. d->eof = 1;
  828. break;
  829. } else if (lzs->avail_out == 0) {
  830. /* Need to check lzs->avail_out before lzs->avail_in.
  831. Maybe lzs's internal state still have a few bytes
  832. can be output, grow the output buffer and continue
  833. if max_lengh < 0. */
  834. if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
  835. break;
  836. }
  837. if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
  838. goto error;
  839. }
  840. } else if (lzs->avail_in == 0) {
  841. break;
  842. }
  843. }
  844. result = OutputBuffer_Finish(&buffer, lzs->avail_out);
  845. if (result != NULL) {
  846. return result;
  847. }
  848. error:
  849. OutputBuffer_OnError(&buffer);
  850. return NULL;
  851. }
  852. static PyObject *
  853. decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
  854. {
  855. char input_buffer_in_use;
  856. PyObject *result;
  857. lzma_stream *lzs = &d->lzs;
  858. /* Prepend unconsumed input if necessary */
  859. if (lzs->next_in != NULL) {
  860. size_t avail_now, avail_total;
  861. /* Number of bytes we can append to input buffer */
  862. avail_now = (d->input_buffer + d->input_buffer_size)
  863. - (lzs->next_in + lzs->avail_in);
  864. /* Number of bytes we can append if we move existing
  865. contents to beginning of buffer (overwriting
  866. consumed input) */
  867. avail_total = d->input_buffer_size - lzs->avail_in;
  868. if (avail_total < len) {
  869. size_t offset = lzs->next_in - d->input_buffer;
  870. uint8_t *tmp;
  871. size_t new_size = d->input_buffer_size + len - avail_now;
  872. /* Assign to temporary variable first, so we don't
  873. lose address of allocated buffer if realloc fails */
  874. tmp = PyMem_Realloc(d->input_buffer, new_size);
  875. if (tmp == NULL) {
  876. PyErr_SetNone(PyExc_MemoryError);
  877. return NULL;
  878. }
  879. d->input_buffer = tmp;
  880. d->input_buffer_size = new_size;
  881. lzs->next_in = d->input_buffer + offset;
  882. }
  883. else if (avail_now < len) {
  884. memmove(d->input_buffer, lzs->next_in,
  885. lzs->avail_in);
  886. lzs->next_in = d->input_buffer;
  887. }
  888. memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
  889. lzs->avail_in += len;
  890. input_buffer_in_use = 1;
  891. }
  892. else {
  893. lzs->next_in = data;
  894. lzs->avail_in = len;
  895. input_buffer_in_use = 0;
  896. }
  897. result = decompress_buf(d, max_length);
  898. if (result == NULL) {
  899. lzs->next_in = NULL;
  900. return NULL;
  901. }
  902. if (d->eof) {
  903. d->needs_input = 0;
  904. if (lzs->avail_in > 0) {
  905. Py_XSETREF(d->unused_data,
  906. PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
  907. if (d->unused_data == NULL) {
  908. goto error;
  909. }
  910. }
  911. }
  912. else if (lzs->avail_in == 0) {
  913. lzs->next_in = NULL;
  914. if (lzs->avail_out == 0) {
  915. /* (avail_in==0 && avail_out==0)
  916. Maybe lzs's internal state still have a few bytes can
  917. be output, try to output them next time. */
  918. d->needs_input = 0;
  919. /* If max_length < 0, lzs->avail_out always > 0 */
  920. assert(max_length >= 0);
  921. } else {
  922. /* Input buffer exhausted, output buffer has space. */
  923. d->needs_input = 1;
  924. }
  925. }
  926. else {
  927. d->needs_input = 0;
  928. /* If we did not use the input buffer, we now have
  929. to copy the tail from the caller's buffer into the
  930. input buffer */
  931. if (!input_buffer_in_use) {
  932. /* Discard buffer if it's too small
  933. (resizing it may needlessly copy the current contents) */
  934. if (d->input_buffer != NULL &&
  935. d->input_buffer_size < lzs->avail_in) {
  936. PyMem_Free(d->input_buffer);
  937. d->input_buffer = NULL;
  938. }
  939. /* Allocate if necessary */
  940. if (d->input_buffer == NULL) {
  941. d->input_buffer = PyMem_Malloc(lzs->avail_in);
  942. if (d->input_buffer == NULL) {
  943. PyErr_SetNone(PyExc_MemoryError);
  944. goto error;
  945. }
  946. d->input_buffer_size = lzs->avail_in;
  947. }
  948. /* Copy tail */
  949. memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
  950. lzs->next_in = d->input_buffer;
  951. }
  952. }
  953. return result;
  954. error:
  955. Py_XDECREF(result);
  956. return NULL;
  957. }
  958. /*[clinic input]
  959. _lzma.LZMADecompressor.decompress
  960. data: Py_buffer
  961. max_length: Py_ssize_t=-1
  962. Decompress *data*, returning uncompressed data as bytes.
  963. If *max_length* is nonnegative, returns at most *max_length* bytes of
  964. decompressed data. If this limit is reached and further output can be
  965. produced, *self.needs_input* will be set to ``False``. In this case, the next
  966. call to *decompress()* may provide *data* as b'' to obtain more of the output.
  967. If all of the input data was decompressed and returned (either because this
  968. was less than *max_length* bytes, or because *max_length* was negative),
  969. *self.needs_input* will be set to True.
  970. Attempting to decompress data after the end of stream is reached raises an
  971. EOFError. Any data found after the end of the stream is ignored and saved in
  972. the unused_data attribute.
  973. [clinic start generated code]*/
  974. static PyObject *
  975. _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
  976. Py_ssize_t max_length)
  977. /*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
  978. {
  979. PyObject *result = NULL;
  980. ACQUIRE_LOCK(self);
  981. if (self->eof)
  982. PyErr_SetString(PyExc_EOFError, "Already at end of stream");
  983. else
  984. result = decompress(self, data->buf, data->len, max_length);
  985. RELEASE_LOCK(self);
  986. return result;
  987. }
  988. static int
  989. Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
  990. {
  991. lzma_filter filters[LZMA_FILTERS_MAX + 1];
  992. lzma_ret lzret;
  993. if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
  994. return -1;
  995. }
  996. lzret = lzma_raw_decoder(lzs, filters);
  997. free_filter_chain(filters);
  998. if (catch_lzma_error(state, lzret)) {
  999. return -1;
  1000. }
  1001. else {
  1002. return 0;
  1003. }
  1004. }
  1005. /*[clinic input]
  1006. @classmethod
  1007. _lzma.LZMADecompressor.__new__
  1008. format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
  1009. Specifies the container format of the input stream. If this is
  1010. FORMAT_AUTO (the default), the decompressor will automatically detect
  1011. whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
  1012. FORMAT_RAW cannot be autodetected.
  1013. memlimit: object = None
  1014. Limit the amount of memory used by the decompressor. This will cause
  1015. decompression to fail if the input cannot be decompressed within the
  1016. given limit.
  1017. filters: object = None
  1018. A custom filter chain. This argument is required for FORMAT_RAW, and
  1019. not accepted with any other format. When provided, this should be a
  1020. sequence of dicts, each indicating the ID and options for a single
  1021. filter.
  1022. Create a decompressor object for decompressing data incrementally.
  1023. For one-shot decompression, use the decompress() function instead.
  1024. [clinic start generated code]*/
  1025. static PyObject *
  1026. _lzma_LZMADecompressor_impl(PyTypeObject *type, int format,
  1027. PyObject *memlimit, PyObject *filters)
  1028. /*[clinic end generated code: output=2d46d5e70f10bc7f input=ca40cd1cb1202b0d]*/
  1029. {
  1030. Decompressor *self;
  1031. const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
  1032. uint64_t memlimit_ = UINT64_MAX;
  1033. lzma_ret lzret;
  1034. _lzma_state *state = PyType_GetModuleState(type);
  1035. assert(state != NULL);
  1036. if (memlimit != Py_None) {
  1037. if (format == FORMAT_RAW) {
  1038. PyErr_SetString(PyExc_ValueError,
  1039. "Cannot specify memory limit with FORMAT_RAW");
  1040. return NULL;
  1041. }
  1042. memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
  1043. if (PyErr_Occurred()) {
  1044. return NULL;
  1045. }
  1046. }
  1047. if (format == FORMAT_RAW && filters == Py_None) {
  1048. PyErr_SetString(PyExc_ValueError,
  1049. "Must specify filters for FORMAT_RAW");
  1050. return NULL;
  1051. } else if (format != FORMAT_RAW && filters != Py_None) {
  1052. PyErr_SetString(PyExc_ValueError,
  1053. "Cannot specify filters except with FORMAT_RAW");
  1054. return NULL;
  1055. }
  1056. assert(type != NULL && type->tp_alloc != NULL);
  1057. self = (Decompressor *)type->tp_alloc(type, 0);
  1058. if (self == NULL) {
  1059. return NULL;
  1060. }
  1061. self->alloc.opaque = NULL;
  1062. self->alloc.alloc = PyLzma_Malloc;
  1063. self->alloc.free = PyLzma_Free;
  1064. self->lzs.allocator = &self->alloc;
  1065. self->lzs.next_in = NULL;
  1066. self->lock = PyThread_allocate_lock();
  1067. if (self->lock == NULL) {
  1068. Py_DECREF(self);
  1069. PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
  1070. return NULL;
  1071. }
  1072. self->check = LZMA_CHECK_UNKNOWN;
  1073. self->needs_input = 1;
  1074. self->input_buffer = NULL;
  1075. self->input_buffer_size = 0;
  1076. Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
  1077. if (self->unused_data == NULL) {
  1078. goto error;
  1079. }
  1080. switch (format) {
  1081. case FORMAT_AUTO:
  1082. lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
  1083. if (catch_lzma_error(state, lzret)) {
  1084. goto error;
  1085. }
  1086. break;
  1087. case FORMAT_XZ:
  1088. lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
  1089. if (catch_lzma_error(state, lzret)) {
  1090. goto error;
  1091. }
  1092. break;
  1093. case FORMAT_ALONE:
  1094. self->check = LZMA_CHECK_NONE;
  1095. lzret = lzma_alone_decoder(&self->lzs, memlimit_);
  1096. if (catch_lzma_error(state, lzret)) {
  1097. goto error;
  1098. }
  1099. break;
  1100. case FORMAT_RAW:
  1101. self->check = LZMA_CHECK_NONE;
  1102. if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
  1103. goto error;
  1104. }
  1105. break;
  1106. default:
  1107. PyErr_Format(PyExc_ValueError,
  1108. "Invalid container format: %d", format);
  1109. goto error;
  1110. }
  1111. return (PyObject *)self;
  1112. error:
  1113. Py_DECREF(self);
  1114. return NULL;
  1115. }
  1116. static void
  1117. Decompressor_dealloc(Decompressor *self)
  1118. {
  1119. if(self->input_buffer != NULL)
  1120. PyMem_Free(self->input_buffer);
  1121. lzma_end(&self->lzs);
  1122. Py_CLEAR(self->unused_data);
  1123. if (self->lock != NULL) {
  1124. PyThread_free_lock(self->lock);
  1125. }
  1126. PyTypeObject *tp = Py_TYPE(self);
  1127. tp->tp_free((PyObject *)self);
  1128. Py_DECREF(tp);
  1129. }
  1130. static int
  1131. Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
  1132. {
  1133. Py_VISIT(Py_TYPE(self));
  1134. return 0;
  1135. }
  1136. static PyMethodDef Decompressor_methods[] = {
  1137. _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
  1138. {NULL}
  1139. };
  1140. PyDoc_STRVAR(Decompressor_check_doc,
  1141. "ID of the integrity check used by the input stream.");
  1142. PyDoc_STRVAR(Decompressor_eof_doc,
  1143. "True if the end-of-stream marker has been reached.");
  1144. PyDoc_STRVAR(Decompressor_needs_input_doc,
  1145. "True if more input is needed before more decompressed data can be produced.");
  1146. PyDoc_STRVAR(Decompressor_unused_data_doc,
  1147. "Data found after the end of the compressed stream.");
  1148. static PyMemberDef Decompressor_members[] = {
  1149. {"check", T_INT, offsetof(Decompressor, check), READONLY,
  1150. Decompressor_check_doc},
  1151. {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
  1152. Decompressor_eof_doc},
  1153. {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
  1154. Decompressor_needs_input_doc},
  1155. {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
  1156. Decompressor_unused_data_doc},
  1157. {NULL}
  1158. };
  1159. static PyType_Slot lzma_decompressor_type_slots[] = {
  1160. {Py_tp_dealloc, Decompressor_dealloc},
  1161. {Py_tp_methods, Decompressor_methods},
  1162. {Py_tp_new, _lzma_LZMADecompressor},
  1163. {Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
  1164. {Py_tp_traverse, Decompressor_traverse},
  1165. {Py_tp_members, Decompressor_members},
  1166. {0, 0}
  1167. };
  1168. static PyType_Spec lzma_decompressor_type_spec = {
  1169. .name = "_lzma.LZMADecompressor",
  1170. .basicsize = sizeof(Decompressor),
  1171. // Calling PyType_GetModuleState() on a subclass is not safe.
  1172. // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
  1173. // which prevents to create a subclass.
  1174. // So calling PyType_GetModuleState() in this file is always safe.
  1175. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
  1176. .slots = lzma_decompressor_type_slots,
  1177. };
  1178. /* Module-level functions. */
  1179. /*[clinic input]
  1180. _lzma.is_check_supported
  1181. check_id: int
  1182. /
  1183. Test whether the given integrity check is supported.
  1184. Always returns True for CHECK_NONE and CHECK_CRC32.
  1185. [clinic start generated code]*/
  1186. static PyObject *
  1187. _lzma_is_check_supported_impl(PyObject *module, int check_id)
  1188. /*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
  1189. {
  1190. return PyBool_FromLong(lzma_check_is_supported(check_id));
  1191. }
  1192. PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
  1193. "_encode_filter_properties($module, filter, /)\n"
  1194. "--\n"
  1195. "\n"
  1196. "Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
  1197. "\n"
  1198. "The result does not include the filter ID itself, only the options.");
  1199. #define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF \
  1200. {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
  1201. static PyObject *
  1202. _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
  1203. static PyObject *
  1204. _lzma__encode_filter_properties(PyObject *module, PyObject *arg)
  1205. {
  1206. PyObject *return_value = NULL;
  1207. lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
  1208. _lzma_state *state = get_lzma_state(module);
  1209. assert(state != NULL);
  1210. if (!lzma_filter_converter(state, arg, &filter)) {
  1211. goto exit;
  1212. }
  1213. return_value = _lzma__encode_filter_properties_impl(module, filter);
  1214. exit:
  1215. /* Cleanup for filter */
  1216. if (filter.id != LZMA_VLI_UNKNOWN) {
  1217. PyMem_Free(filter.options);
  1218. }
  1219. return return_value;
  1220. }
  1221. static PyObject *
  1222. _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
  1223. {
  1224. lzma_ret lzret;
  1225. uint32_t encoded_size;
  1226. PyObject *result = NULL;
  1227. _lzma_state *state = get_lzma_state(module);
  1228. assert(state != NULL);
  1229. lzret = lzma_properties_size(&encoded_size, &filter);
  1230. if (catch_lzma_error(state, lzret))
  1231. goto error;
  1232. result = PyBytes_FromStringAndSize(NULL, encoded_size);
  1233. if (result == NULL)
  1234. goto error;
  1235. lzret = lzma_properties_encode(
  1236. &filter, (uint8_t *)PyBytes_AS_STRING(result));
  1237. if (catch_lzma_error(state, lzret)) {
  1238. goto error;
  1239. }
  1240. return result;
  1241. error:
  1242. Py_XDECREF(result);
  1243. return NULL;
  1244. }
  1245. /*[clinic input]
  1246. _lzma._decode_filter_properties
  1247. filter_id: lzma_vli
  1248. encoded_props: Py_buffer
  1249. /
  1250. Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
  1251. The result does not include the filter ID itself, only the options.
  1252. [clinic start generated code]*/
  1253. static PyObject *
  1254. _lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
  1255. Py_buffer *encoded_props)
  1256. /*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
  1257. {
  1258. lzma_filter filter;
  1259. lzma_ret lzret;
  1260. PyObject *result = NULL;
  1261. filter.id = filter_id;
  1262. _lzma_state *state = get_lzma_state(module);
  1263. assert(state != NULL);
  1264. lzret = lzma_properties_decode(
  1265. &filter, NULL, encoded_props->buf, encoded_props->len);
  1266. if (catch_lzma_error(state, lzret)) {
  1267. return NULL;
  1268. }
  1269. result = build_filter_spec(&filter);
  1270. /* We use vanilla free() here instead of PyMem_Free() - filter.options was
  1271. allocated by lzma_properties_decode() using the default allocator. */
  1272. free(filter.options);
  1273. return result;
  1274. }
  1275. /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
  1276. would not work correctly on platforms with 32-bit longs. */
  1277. static int
  1278. module_add_int_constant(PyObject *m, const char *name, long long value)
  1279. {
  1280. PyObject *o = PyLong_FromLongLong(value);
  1281. if (o == NULL) {
  1282. return -1;
  1283. }
  1284. if (PyModule_AddObject(m, name, o) == 0) {
  1285. return 0;
  1286. }
  1287. Py_DECREF(o);
  1288. return -1;
  1289. }
  1290. static int
  1291. lzma_exec(PyObject *module)
  1292. {
  1293. #define ADD_INT_PREFIX_MACRO(module, macro) \
  1294. do { \
  1295. if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) { \
  1296. return -1; \
  1297. } \
  1298. } while(0)
  1299. #define ADD_INT_MACRO(module, macro) \
  1300. do { \
  1301. if (PyModule_AddIntMacro(module, macro) < 0) { \
  1302. return -1; \
  1303. } \
  1304. } while (0)
  1305. _lzma_state *state = get_lzma_state(module);
  1306. state->empty_tuple = PyTuple_New(0);
  1307. if (state->empty_tuple == NULL) {
  1308. return -1;
  1309. }
  1310. ADD_INT_MACRO(module, FORMAT_AUTO);
  1311. ADD_INT_MACRO(module, FORMAT_XZ);
  1312. ADD_INT_MACRO(module, FORMAT_ALONE);
  1313. ADD_INT_MACRO(module, FORMAT_RAW);
  1314. ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
  1315. ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
  1316. ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
  1317. ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
  1318. ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
  1319. ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
  1320. ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
  1321. ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
  1322. ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
  1323. ADD_INT_PREFIX_MACRO(module, FILTER_X86);
  1324. ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
  1325. ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
  1326. ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
  1327. ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
  1328. ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
  1329. ADD_INT_PREFIX_MACRO(module, MF_HC3);
  1330. ADD_INT_PREFIX_MACRO(module, MF_HC4);
  1331. ADD_INT_PREFIX_MACRO(module, MF_BT2);
  1332. ADD_INT_PREFIX_MACRO(module, MF_BT3);
  1333. ADD_INT_PREFIX_MACRO(module, MF_BT4);
  1334. ADD_INT_PREFIX_MACRO(module, MODE_FAST);
  1335. ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
  1336. ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
  1337. ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
  1338. state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
  1339. if (state->error == NULL) {
  1340. return -1;
  1341. }
  1342. if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
  1343. return -1;
  1344. }
  1345. state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
  1346. &lzma_compressor_type_spec, NULL);
  1347. if (state->lzma_compressor_type == NULL) {
  1348. return -1;
  1349. }
  1350. if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
  1351. return -1;
  1352. }
  1353. state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
  1354. &lzma_decompressor_type_spec, NULL);
  1355. if (state->lzma_decompressor_type == NULL) {
  1356. return -1;
  1357. }
  1358. if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
  1359. return -1;
  1360. }
  1361. return 0;
  1362. }
  1363. static PyMethodDef lzma_methods[] = {
  1364. _LZMA_IS_CHECK_SUPPORTED_METHODDEF
  1365. _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
  1366. _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
  1367. {NULL}
  1368. };
  1369. static PyModuleDef_Slot lzma_slots[] = {
  1370. {Py_mod_exec, lzma_exec},
  1371. {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
  1372. {0, NULL}
  1373. };
  1374. static int
  1375. lzma_traverse(PyObject *module, visitproc visit, void *arg)
  1376. {
  1377. _lzma_state *state = get_lzma_state(module);
  1378. Py_VISIT(state->lzma_compressor_type);
  1379. Py_VISIT(state->lzma_decompressor_type);
  1380. Py_VISIT(state->error);
  1381. Py_VISIT(state->empty_tuple);
  1382. return 0;
  1383. }
  1384. static int
  1385. lzma_clear(PyObject *module)
  1386. {
  1387. _lzma_state *state = get_lzma_state(module);
  1388. Py_CLEAR(state->lzma_compressor_type);
  1389. Py_CLEAR(state->lzma_decompressor_type);
  1390. Py_CLEAR(state->error);
  1391. Py_CLEAR(state->empty_tuple);
  1392. return 0;
  1393. }
  1394. static void
  1395. lzma_free(void *module)
  1396. {
  1397. lzma_clear((PyObject *)module);
  1398. }
  1399. static PyModuleDef _lzmamodule = {
  1400. PyModuleDef_HEAD_INIT,
  1401. .m_name = "_lzma",
  1402. .m_size = sizeof(_lzma_state),
  1403. .m_methods = lzma_methods,
  1404. .m_slots = lzma_slots,
  1405. .m_traverse = lzma_traverse,
  1406. .m_clear = lzma_clear,
  1407. .m_free = lzma_free,
  1408. };
  1409. PyMODINIT_FUNC
  1410. PyInit__lzma(void)
  1411. {
  1412. return PyModuleDef_Init(&_lzmamodule);
  1413. }