xzlib.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815
  1. /**
  2. * xzlib.c: front end for the transparent support of lzma compression
  3. * at the I/O layer, based on an example file from lzma project
  4. *
  5. * See Copyright for the status of this software.
  6. *
  7. * Anders F Bjorklund <afb@users.sourceforge.net>
  8. */
  9. #define IN_LIBXML
  10. #include "libxml.h"
  11. #ifdef LIBXML_LZMA_ENABLED
  12. #include <string.h>
  13. #ifdef HAVE_ERRNO_H
  14. #include <errno.h>
  15. #endif
  16. #ifdef HAVE_SYS_TYPES_H
  17. #include <sys/types.h>
  18. #endif
  19. #ifdef HAVE_SYS_STAT_H
  20. #include <sys/stat.h>
  21. #endif
  22. #ifdef HAVE_FCNTL_H
  23. #include <fcntl.h>
  24. #endif
  25. #ifdef HAVE_UNISTD_H
  26. #include <unistd.h>
  27. #endif
  28. #ifdef HAVE_STDLIB_H
  29. #include <stdlib.h>
  30. #endif
  31. #ifdef LIBXML_ZLIB_ENABLED
  32. #include <zlib.h>
  33. #endif
  34. #ifdef LIBXML_LZMA_ENABLED
  35. #error #include <lzma.h>
  36. #endif
  37. #error #include "xzlib.h"
  38. #include <libxml/xmlmemory.h>
  39. /* values for xz_state how */
  40. #define LOOK 0 /* look for a gzip/lzma header */
  41. #define COPY 1 /* copy input directly */
  42. #define GZIP 2 /* decompress a gzip stream */
  43. #define LZMA 3 /* decompress a lzma stream */
  44. /* internal lzma file state data structure */
  45. typedef struct {
  46. int mode; /* see lzma modes above */
  47. int fd; /* file descriptor */
  48. char *path; /* path or fd for error messages */
  49. uint64_t pos; /* current position in uncompressed data */
  50. unsigned int size; /* buffer size, zero if not allocated yet */
  51. unsigned int want; /* requested buffer size, default is BUFSIZ */
  52. unsigned char *in; /* input buffer */
  53. unsigned char *out; /* output buffer (double-sized when reading) */
  54. unsigned char *next; /* next output data to deliver or write */
  55. unsigned int have; /* amount of output data unused at next */
  56. int eof; /* true if end of input file reached */
  57. uint64_t start; /* where the lzma data started, for rewinding */
  58. uint64_t raw; /* where the raw data started, for seeking */
  59. int how; /* 0: get header, 1: copy, 2: decompress */
  60. int direct; /* true if last read direct, false if lzma */
  61. /* seek request */
  62. uint64_t skip; /* amount to skip (already rewound if backwards) */
  63. int seek; /* true if seek request pending */
  64. /* error information */
  65. int err; /* error code */
  66. char *msg; /* error message */
  67. /* lzma stream */
  68. int init; /* is the inflate stream initialized */
  69. lzma_stream strm; /* stream structure in-place (not a pointer) */
  70. char padding1[32]; /* padding allowing to cope with possible
  71. extensions of above structure without
  72. too much side effect */
  73. #ifdef LIBXML_ZLIB_ENABLED
  74. /* zlib inflate or deflate stream */
  75. z_stream zstrm; /* stream structure in-place (not a pointer) */
  76. #endif
  77. char padding2[32]; /* padding allowing to cope with possible
  78. extensions of above structure without
  79. too much side effect */
  80. } xz_state, *xz_statep;
  81. static void
  82. xz_error(xz_statep state, int err, const char *msg)
  83. {
  84. /* free previously allocated message and clear */
  85. if (state->msg != NULL) {
  86. if (state->err != LZMA_MEM_ERROR)
  87. xmlFree(state->msg);
  88. state->msg = NULL;
  89. }
  90. /* set error code, and if no message, then done */
  91. state->err = err;
  92. if (msg == NULL)
  93. return;
  94. /* for an out of memory error, save as static string */
  95. if (err == LZMA_MEM_ERROR) {
  96. state->msg = (char *) msg;
  97. return;
  98. }
  99. /* construct error message with path */
  100. if ((state->msg =
  101. xmlMalloc(strlen(state->path) + strlen(msg) + 3)) == NULL) {
  102. state->err = LZMA_MEM_ERROR;
  103. state->msg = (char *) "out of memory";
  104. return;
  105. }
  106. strcpy(state->msg, state->path);
  107. strcat(state->msg, ": ");
  108. strcat(state->msg, msg);
  109. return;
  110. }
  111. static void
  112. xz_reset(xz_statep state)
  113. {
  114. state->have = 0; /* no output data available */
  115. state->eof = 0; /* not at end of file */
  116. state->how = LOOK; /* look for gzip header */
  117. state->direct = 1; /* default for empty file */
  118. state->seek = 0; /* no seek request pending */
  119. xz_error(state, LZMA_OK, NULL); /* clear error */
  120. state->pos = 0; /* no uncompressed data yet */
  121. state->strm.avail_in = 0; /* no input data yet */
  122. #ifdef LIBXML_ZLIB_ENABLED
  123. state->zstrm.avail_in = 0; /* no input data yet */
  124. #endif
  125. }
  126. static xzFile
  127. xz_open(const char *path, int fd, const char *mode ATTRIBUTE_UNUSED)
  128. {
  129. xz_statep state;
  130. /* allocate xzFile structure to return */
  131. state = xmlMalloc(sizeof(xz_state));
  132. if (state == NULL)
  133. return NULL;
  134. state->size = 0; /* no buffers allocated yet */
  135. state->want = BUFSIZ; /* requested buffer size */
  136. state->msg = NULL; /* no error message yet */
  137. state->init = 0; /* initialization of zlib data */
  138. /* save the path name for error messages */
  139. state->path = xmlMalloc(strlen(path) + 1);
  140. if (state->path == NULL) {
  141. xmlFree(state);
  142. return NULL;
  143. }
  144. strcpy(state->path, path);
  145. /* open the file with the appropriate mode (or just use fd) */
  146. state->fd = fd != -1 ? fd : open(path,
  147. #ifdef O_LARGEFILE
  148. O_LARGEFILE |
  149. #endif
  150. #ifdef O_BINARY
  151. O_BINARY |
  152. #endif
  153. O_RDONLY, 0666);
  154. if (state->fd == -1) {
  155. xmlFree(state->path);
  156. xmlFree(state);
  157. return NULL;
  158. }
  159. /* save the current position for rewinding (only if reading) */
  160. state->start = lseek(state->fd, 0, SEEK_CUR);
  161. if (state->start == (uint64_t) - 1)
  162. state->start = 0;
  163. /* initialize stream */
  164. xz_reset(state);
  165. /* return stream */
  166. return (xzFile) state;
  167. }
  168. static int
  169. xz_compressed(xzFile f) {
  170. xz_statep state;
  171. if (f == NULL)
  172. return(-1);
  173. state = (xz_statep) f;
  174. if (state->init <= 0)
  175. return(-1);
  176. switch (state->how) {
  177. case COPY:
  178. return(0);
  179. case GZIP:
  180. case LZMA:
  181. return(1);
  182. }
  183. return(-1);
  184. }
  185. xzFile
  186. __libxml2_xzopen(const char *path, const char *mode)
  187. {
  188. return xz_open(path, -1, mode);
  189. }
  190. int
  191. __libxml2_xzcompressed(xzFile f) {
  192. return xz_compressed(f);
  193. }
  194. xzFile
  195. __libxml2_xzdopen(int fd, const char *mode)
  196. {
  197. char *path; /* identifier for error messages */
  198. xzFile xz;
  199. if (fd == -1 || (path = xmlMalloc(7 + 3 * sizeof(int))) == NULL)
  200. return NULL;
  201. sprintf(path, "<fd:%d>", fd); /* for debugging */
  202. xz = xz_open(path, fd, mode);
  203. xmlFree(path);
  204. return xz;
  205. }
  206. static int
  207. xz_load(xz_statep state, unsigned char *buf, unsigned int len,
  208. unsigned int *have)
  209. {
  210. int ret;
  211. *have = 0;
  212. do {
  213. ret = read(state->fd, buf + *have, len - *have);
  214. if (ret <= 0)
  215. break;
  216. *have += ret;
  217. } while (*have < len);
  218. if (ret < 0) {
  219. xz_error(state, -1, strerror(errno));
  220. return -1;
  221. }
  222. if (ret == 0)
  223. state->eof = 1;
  224. return 0;
  225. }
  226. static int
  227. xz_avail(xz_statep state)
  228. {
  229. lzma_stream *strm = &(state->strm);
  230. if (state->err != LZMA_OK)
  231. return -1;
  232. if (state->eof == 0) {
  233. /* avail_in is size_t, which is not necessary sizeof(unsigned) */
  234. unsigned tmp = strm->avail_in;
  235. if (xz_load(state, state->in, state->size, &tmp) == -1) {
  236. strm->avail_in = tmp;
  237. return -1;
  238. }
  239. strm->avail_in = tmp;
  240. strm->next_in = state->in;
  241. }
  242. return 0;
  243. }
  244. #ifdef LIBXML_ZLIB_ENABLED
  245. static int
  246. xz_avail_zstrm(xz_statep state)
  247. {
  248. int ret;
  249. state->strm.avail_in = state->zstrm.avail_in;
  250. state->strm.next_in = state->zstrm.next_in;
  251. ret = xz_avail(state);
  252. state->zstrm.avail_in = (uInt) state->strm.avail_in;
  253. state->zstrm.next_in = (Bytef *) state->strm.next_in;
  254. return ret;
  255. }
  256. #endif
  257. static int
  258. is_format_xz(xz_statep state)
  259. {
  260. lzma_stream *strm = &(state->strm);
  261. return strm->avail_in >= 6 && memcmp(state->in, "\3757zXZ", 6) == 0;
  262. }
  263. static int
  264. is_format_lzma(xz_statep state)
  265. {
  266. lzma_stream *strm = &(state->strm);
  267. lzma_filter filter;
  268. lzma_options_lzma *opt;
  269. uint32_t dict_size;
  270. uint64_t uncompressed_size;
  271. size_t i;
  272. if (strm->avail_in < 13)
  273. return 0;
  274. filter.id = LZMA_FILTER_LZMA1;
  275. if (lzma_properties_decode(&filter, NULL, state->in, 5) != LZMA_OK)
  276. return 0;
  277. opt = filter.options;
  278. dict_size = opt->dict_size;
  279. free(opt); /* we can't use xmlFree on a string returned by zlib */
  280. /* A hack to ditch tons of false positives: We allow only dictionary
  281. * sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone
  282. * created only files with 2^n, but accepts any dictionary size.
  283. * If someone complains, this will be reconsidered.
  284. */
  285. if (dict_size != UINT32_MAX) {
  286. uint32_t d = dict_size - 1;
  287. d |= d >> 2;
  288. d |= d >> 3;
  289. d |= d >> 4;
  290. d |= d >> 8;
  291. d |= d >> 16;
  292. ++d;
  293. if (d != dict_size || dict_size == 0)
  294. return 0;
  295. }
  296. /* Another hack to ditch false positives: Assume that if the
  297. * uncompressed size is known, it must be less than 256 GiB.
  298. * Again, if someone complains, this will be reconsidered.
  299. */
  300. uncompressed_size = 0;
  301. for (i = 0; i < 8; ++i)
  302. uncompressed_size |= (uint64_t) (state->in[5 + i]) << (i * 8);
  303. if (uncompressed_size != UINT64_MAX
  304. && uncompressed_size > (UINT64_C(1) << 38))
  305. return 0;
  306. return 1;
  307. }
  308. #ifdef LIBXML_ZLIB_ENABLED
  309. /* Get next byte from input, or -1 if end or error. */
  310. #define NEXT() ((strm->avail_in == 0 && xz_avail(state) == -1) ? -1 : \
  311. (strm->avail_in == 0 ? -1 : \
  312. (strm->avail_in--, *(strm->next_in)++)))
  313. /* Same thing, but from zstrm */
  314. #define NEXTZ() ((strm->avail_in == 0 && xz_avail_zstrm(state) == -1) ? -1 : \
  315. (strm->avail_in == 0 ? -1 : \
  316. (strm->avail_in--, *(strm->next_in)++)))
  317. /* Get a four-byte little-endian integer and return 0 on success and the value
  318. in *ret. Otherwise -1 is returned and *ret is not modified. */
  319. static int
  320. gz_next4(xz_statep state, unsigned long *ret)
  321. {
  322. int ch;
  323. unsigned long val;
  324. z_streamp strm = &(state->zstrm);
  325. val = NEXTZ();
  326. val += (unsigned) NEXTZ() << 8;
  327. val += (unsigned long) NEXTZ() << 16;
  328. ch = NEXTZ();
  329. if (ch == -1)
  330. return -1;
  331. val += (unsigned long) ch << 24;
  332. *ret = val;
  333. return 0;
  334. }
  335. #endif
  336. static int
  337. xz_head(xz_statep state)
  338. {
  339. lzma_stream *strm = &(state->strm);
  340. lzma_stream init = LZMA_STREAM_INIT;
  341. int flags;
  342. unsigned len;
  343. /* allocate read buffers and inflate memory */
  344. if (state->size == 0) {
  345. /* allocate buffers */
  346. state->in = xmlMalloc(state->want);
  347. state->out = xmlMalloc(state->want << 1);
  348. if (state->in == NULL || state->out == NULL) {
  349. if (state->out != NULL)
  350. xmlFree(state->out);
  351. if (state->in != NULL)
  352. xmlFree(state->in);
  353. xz_error(state, LZMA_MEM_ERROR, "out of memory");
  354. return -1;
  355. }
  356. state->size = state->want;
  357. /* allocate decoder memory */
  358. state->strm = init;
  359. state->strm.avail_in = 0;
  360. state->strm.next_in = NULL;
  361. if (lzma_auto_decoder(&state->strm, 100000000, 0) != LZMA_OK) {
  362. xmlFree(state->out);
  363. xmlFree(state->in);
  364. state->size = 0;
  365. xz_error(state, LZMA_MEM_ERROR, "out of memory");
  366. return -1;
  367. }
  368. #ifdef LIBXML_ZLIB_ENABLED
  369. /* allocate inflate memory */
  370. state->zstrm.zalloc = Z_NULL;
  371. state->zstrm.zfree = Z_NULL;
  372. state->zstrm.opaque = Z_NULL;
  373. state->zstrm.avail_in = 0;
  374. state->zstrm.next_in = Z_NULL;
  375. if (state->init == 0) {
  376. if (inflateInit2(&(state->zstrm), -15) != Z_OK) {/* raw inflate */
  377. xmlFree(state->out);
  378. xmlFree(state->in);
  379. state->size = 0;
  380. xz_error(state, LZMA_MEM_ERROR, "out of memory");
  381. return -1;
  382. }
  383. state->init = 1;
  384. }
  385. #endif
  386. }
  387. /* get some data in the input buffer */
  388. if (strm->avail_in == 0) {
  389. if (xz_avail(state) == -1)
  390. return -1;
  391. if (strm->avail_in == 0)
  392. return 0;
  393. }
  394. /* look for the xz magic header bytes */
  395. if (is_format_xz(state) || is_format_lzma(state)) {
  396. state->how = LZMA;
  397. state->direct = 0;
  398. return 0;
  399. }
  400. #ifdef LIBXML_ZLIB_ENABLED
  401. /* look for the gzip magic header bytes 31 and 139 */
  402. if (strm->next_in[0] == 31) {
  403. strm->avail_in--;
  404. strm->next_in++;
  405. if (strm->avail_in == 0 && xz_avail(state) == -1)
  406. return -1;
  407. if (strm->avail_in && strm->next_in[0] == 139) {
  408. /* we have a gzip header, woo hoo! */
  409. strm->avail_in--;
  410. strm->next_in++;
  411. /* skip rest of header */
  412. if (NEXT() != 8) { /* compression method */
  413. xz_error(state, LZMA_DATA_ERROR,
  414. "unknown compression method");
  415. return -1;
  416. }
  417. flags = NEXT();
  418. if (flags & 0xe0) { /* reserved flag bits */
  419. xz_error(state, LZMA_DATA_ERROR,
  420. "unknown header flags set");
  421. return -1;
  422. }
  423. NEXT(); /* modification time */
  424. NEXT();
  425. NEXT();
  426. NEXT();
  427. NEXT(); /* extra flags */
  428. NEXT(); /* operating system */
  429. if (flags & 4) { /* extra field */
  430. len = (unsigned) NEXT();
  431. len += (unsigned) NEXT() << 8;
  432. while (len--)
  433. if (NEXT() < 0)
  434. break;
  435. }
  436. if (flags & 8) /* file name */
  437. while (NEXT() > 0) ;
  438. if (flags & 16) /* comment */
  439. while (NEXT() > 0) ;
  440. if (flags & 2) { /* header crc */
  441. NEXT();
  442. NEXT();
  443. }
  444. /* an unexpected end of file is not checked for here -- it will be
  445. * noticed on the first request for uncompressed data */
  446. /* set up for decompression */
  447. inflateReset(&state->zstrm);
  448. state->zstrm.adler = crc32(0L, Z_NULL, 0);
  449. state->how = GZIP;
  450. state->direct = 0;
  451. return 0;
  452. } else {
  453. /* not a gzip file -- save first byte (31) and fall to raw i/o */
  454. state->out[0] = 31;
  455. state->have = 1;
  456. }
  457. }
  458. #endif
  459. /* doing raw i/o, save start of raw data for seeking, copy any leftover
  460. * input to output -- this assumes that the output buffer is larger than
  461. * the input buffer, which also assures space for gzungetc() */
  462. state->raw = state->pos;
  463. state->next = state->out;
  464. if (strm->avail_in) {
  465. memcpy(state->next + state->have, strm->next_in, strm->avail_in);
  466. state->have += strm->avail_in;
  467. strm->avail_in = 0;
  468. }
  469. state->how = COPY;
  470. state->direct = 1;
  471. return 0;
  472. }
  473. static int
  474. xz_decomp(xz_statep state)
  475. {
  476. int ret;
  477. unsigned had;
  478. unsigned long crc, len;
  479. lzma_stream *strm = &(state->strm);
  480. lzma_action action = LZMA_RUN;
  481. /* fill output buffer up to end of deflate stream */
  482. had = strm->avail_out;
  483. do {
  484. /* get more input for inflate() */
  485. if (strm->avail_in == 0 && xz_avail(state) == -1)
  486. return -1;
  487. if (strm->avail_in == 0) {
  488. xz_error(state, LZMA_DATA_ERROR, "unexpected end of file");
  489. return -1;
  490. }
  491. if (state->eof)
  492. action = LZMA_FINISH;
  493. /* decompress and handle errors */
  494. #ifdef LIBXML_ZLIB_ENABLED
  495. if (state->how == GZIP) {
  496. state->zstrm.avail_in = (uInt) state->strm.avail_in;
  497. state->zstrm.next_in = (Bytef *) state->strm.next_in;
  498. state->zstrm.avail_out = (uInt) state->strm.avail_out;
  499. state->zstrm.next_out = (Bytef *) state->strm.next_out;
  500. ret = inflate(&state->zstrm, Z_NO_FLUSH);
  501. if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
  502. xz_error(state, Z_STREAM_ERROR,
  503. "internal error: inflate stream corrupt");
  504. return -1;
  505. }
  506. /*
  507. * FIXME: Remapping a couple of error codes and falling through
  508. * to the LZMA error handling looks fragile.
  509. */
  510. if (ret == Z_MEM_ERROR)
  511. ret = LZMA_MEM_ERROR;
  512. if (ret == Z_DATA_ERROR)
  513. ret = LZMA_DATA_ERROR;
  514. if (ret == Z_STREAM_END)
  515. ret = LZMA_STREAM_END;
  516. state->strm.avail_in = state->zstrm.avail_in;
  517. state->strm.next_in = state->zstrm.next_in;
  518. state->strm.avail_out = state->zstrm.avail_out;
  519. state->strm.next_out = state->zstrm.next_out;
  520. } else /* state->how == LZMA */
  521. #endif
  522. ret = lzma_code(strm, action);
  523. if (ret == LZMA_MEM_ERROR) {
  524. xz_error(state, LZMA_MEM_ERROR, "out of memory");
  525. return -1;
  526. }
  527. if (ret == LZMA_DATA_ERROR) {
  528. xz_error(state, LZMA_DATA_ERROR, "compressed data error");
  529. return -1;
  530. }
  531. if (ret == LZMA_PROG_ERROR) {
  532. xz_error(state, LZMA_PROG_ERROR, "compression error");
  533. return -1;
  534. }
  535. if ((state->how != GZIP) &&
  536. (ret != LZMA_OK) && (ret != LZMA_STREAM_END)) {
  537. xz_error(state, ret, "lzma error");
  538. return -1;
  539. }
  540. } while (strm->avail_out && ret != LZMA_STREAM_END);
  541. /* update available output and crc check value */
  542. state->have = had - strm->avail_out;
  543. state->next = strm->next_out - state->have;
  544. #ifdef LIBXML_ZLIB_ENABLED
  545. state->zstrm.adler =
  546. crc32(state->zstrm.adler, state->next, state->have);
  547. #endif
  548. if (ret == LZMA_STREAM_END) {
  549. #ifdef LIBXML_ZLIB_ENABLED
  550. if (state->how == GZIP) {
  551. if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) {
  552. xz_error(state, LZMA_DATA_ERROR, "unexpected end of file");
  553. return -1;
  554. }
  555. if (crc != state->zstrm.adler) {
  556. xz_error(state, LZMA_DATA_ERROR, "incorrect data check");
  557. return -1;
  558. }
  559. if (len != (state->zstrm.total_out & 0xffffffffL)) {
  560. xz_error(state, LZMA_DATA_ERROR, "incorrect length check");
  561. return -1;
  562. }
  563. state->strm.avail_in = 0;
  564. state->strm.next_in = NULL;
  565. state->strm.avail_out = 0;
  566. state->strm.next_out = NULL;
  567. } else
  568. #endif
  569. if (strm->avail_in != 0 || !state->eof) {
  570. xz_error(state, LZMA_DATA_ERROR, "trailing garbage");
  571. return -1;
  572. }
  573. state->how = LOOK; /* ready for next stream, once have is 0 (leave
  574. * state->direct unchanged to remember how) */
  575. }
  576. /* good decompression */
  577. return 0;
  578. }
  579. static int
  580. xz_make(xz_statep state)
  581. {
  582. lzma_stream *strm = &(state->strm);
  583. if (state->how == LOOK) { /* look for lzma / gzip header */
  584. if (xz_head(state) == -1)
  585. return -1;
  586. if (state->have) /* got some data from xz_head() */
  587. return 0;
  588. }
  589. if (state->how == COPY) { /* straight copy */
  590. if (xz_load(state, state->out, state->size << 1, &(state->have)) ==
  591. -1)
  592. return -1;
  593. state->next = state->out;
  594. } else if (state->how == LZMA || state->how == GZIP) { /* decompress */
  595. strm->avail_out = state->size << 1;
  596. strm->next_out = state->out;
  597. if (xz_decomp(state) == -1)
  598. return -1;
  599. }
  600. return 0;
  601. }
  602. static int
  603. xz_skip(xz_statep state, uint64_t len)
  604. {
  605. unsigned n;
  606. /* skip over len bytes or reach end-of-file, whichever comes first */
  607. while (len)
  608. /* skip over whatever is in output buffer */
  609. if (state->have) {
  610. n = (uint64_t) state->have > len ?
  611. (unsigned) len : state->have;
  612. state->have -= n;
  613. state->next += n;
  614. state->pos += n;
  615. len -= n;
  616. }
  617. /* output buffer empty -- return if we're at the end of the input */
  618. else if (state->eof && state->strm.avail_in == 0)
  619. break;
  620. /* need more data to skip -- load up output buffer */
  621. else {
  622. /* get more output, looking for header if required */
  623. if (xz_make(state) == -1)
  624. return -1;
  625. }
  626. return 0;
  627. }
  628. int
  629. __libxml2_xzread(xzFile file, void *buf, unsigned len)
  630. {
  631. unsigned got, n;
  632. xz_statep state;
  633. lzma_stream *strm;
  634. /* get internal structure */
  635. if (file == NULL)
  636. return -1;
  637. state = (xz_statep) file;
  638. strm = &(state->strm);
  639. /* check that we're reading and that there's no error */
  640. if (state->err != LZMA_OK)
  641. return -1;
  642. /* since an int is returned, make sure len fits in one, otherwise return
  643. * with an error (this avoids the flaw in the interface) */
  644. if ((int) len < 0) {
  645. xz_error(state, LZMA_BUF_ERROR,
  646. "requested length does not fit in int");
  647. return -1;
  648. }
  649. /* if len is zero, avoid unnecessary operations */
  650. if (len == 0)
  651. return 0;
  652. /* process a skip request */
  653. if (state->seek) {
  654. state->seek = 0;
  655. if (xz_skip(state, state->skip) == -1)
  656. return -1;
  657. }
  658. /* get len bytes to buf, or less than len if at the end */
  659. got = 0;
  660. do {
  661. /* first just try copying data from the output buffer */
  662. if (state->have) {
  663. n = state->have > len ? len : state->have;
  664. memcpy(buf, state->next, n);
  665. state->next += n;
  666. state->have -= n;
  667. }
  668. /* output buffer empty -- return if we're at the end of the input */
  669. else if (state->eof && strm->avail_in == 0)
  670. break;
  671. /* need output data -- for small len or new stream load up our output
  672. * buffer */
  673. else if (state->how == LOOK || len < (state->size << 1)) {
  674. /* get more output, looking for header if required */
  675. if (xz_make(state) == -1)
  676. return -1;
  677. continue; /* no progress yet -- go back to memcpy() above */
  678. /* the copy above assures that we will leave with space in the
  679. * output buffer, allowing at least one gzungetc() to succeed */
  680. }
  681. /* large len -- read directly into user buffer */
  682. else if (state->how == COPY) { /* read directly */
  683. if (xz_load(state, buf, len, &n) == -1)
  684. return -1;
  685. }
  686. /* large len -- decompress directly into user buffer */
  687. else { /* state->how == LZMA */
  688. strm->avail_out = len;
  689. strm->next_out = buf;
  690. if (xz_decomp(state) == -1)
  691. return -1;
  692. n = state->have;
  693. state->have = 0;
  694. }
  695. /* update progress */
  696. len -= n;
  697. buf = (char *) buf + n;
  698. got += n;
  699. state->pos += n;
  700. } while (len);
  701. /* return number of bytes read into user buffer (will fit in int) */
  702. return (int) got;
  703. }
  704. int
  705. __libxml2_xzclose(xzFile file)
  706. {
  707. int ret;
  708. xz_statep state;
  709. /* get internal structure */
  710. if (file == NULL)
  711. return LZMA_DATA_ERROR;
  712. state = (xz_statep) file;
  713. /* free memory and close file */
  714. if (state->size) {
  715. lzma_end(&(state->strm));
  716. #ifdef LIBXML_ZLIB_ENABLED
  717. if (state->init == 1)
  718. inflateEnd(&(state->zstrm));
  719. state->init = 0;
  720. #endif
  721. xmlFree(state->out);
  722. xmlFree(state->in);
  723. }
  724. xmlFree(state->path);
  725. if ((state->msg != NULL) && (state->err != LZMA_MEM_ERROR))
  726. xmlFree(state->msg);
  727. ret = close(state->fd);
  728. xmlFree(state);
  729. return ret ? ret : LZMA_OK;
  730. }
  731. #endif /* LIBXML_LZMA_ENABLED */