inflate.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package flate implements the DEFLATE compressed data format, described in
  5. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  6. // formats.
  7. package flate
  8. import (
  9. "bufio"
  10. "compress/flate"
  11. "fmt"
  12. "io"
  13. "math/bits"
  14. "sync"
  15. )
  16. const (
  17. maxCodeLen = 16 // max length of Huffman code
  18. maxCodeLenMask = 15 // mask for max length of Huffman code
  19. // The next three numbers come from the RFC section 3.2.7, with the
  20. // additional proviso in section 3.2.5 which implies that distance codes
  21. // 30 and 31 should never occur in compressed data.
  22. maxNumLit = 286
  23. maxNumDist = 30
  24. numCodes = 19 // number of codes in Huffman meta-code
  25. debugDecode = false
  26. )
  27. // Value of length - 3 and extra bits.
  28. type lengthExtra struct {
  29. length, extra uint8
  30. }
  31. var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
  32. var bitMask32 = [32]uint32{
  33. 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
  34. 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
  35. 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
  36. 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
  37. } // up to 32 bits
  38. // Initialize the fixedHuffmanDecoder only once upon first use.
  39. var fixedOnce sync.Once
  40. var fixedHuffmanDecoder huffmanDecoder
  41. // A CorruptInputError reports the presence of corrupt input at a given offset.
  42. type CorruptInputError = flate.CorruptInputError
  43. // An InternalError reports an error in the flate code itself.
  44. type InternalError string
  45. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  46. // A ReadError reports an error encountered while reading input.
  47. //
  48. // Deprecated: No longer returned.
  49. type ReadError = flate.ReadError
  50. // A WriteError reports an error encountered while writing output.
  51. //
  52. // Deprecated: No longer returned.
  53. type WriteError = flate.WriteError
  54. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  55. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  56. // instead of allocating a new one.
  57. type Resetter interface {
  58. // Reset discards any buffered data and resets the Resetter as if it was
  59. // newly initialized with the given reader.
  60. Reset(r io.Reader, dict []byte) error
  61. }
  62. // The data structure for decoding Huffman tables is based on that of
  63. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  64. // For codes smaller than the table width, there are multiple entries
  65. // (each combination of trailing bits has the same value). For codes
  66. // larger than the table width, the table contains a link to an overflow
  67. // table. The width of each entry in the link table is the maximum code
  68. // size minus the chunk width.
  69. //
  70. // Note that you can do a lookup in the table even without all bits
  71. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  72. // have the property that shorter codes come before longer ones, the
  73. // bit length estimate in the result is a lower bound on the actual
  74. // number of bits.
  75. //
  76. // See the following:
  77. // http://www.gzip.org/algorithm.txt
  78. // chunk & 15 is number of bits
  79. // chunk >> 4 is value, including table link
  80. const (
  81. huffmanChunkBits = 9
  82. huffmanNumChunks = 1 << huffmanChunkBits
  83. huffmanCountMask = 15
  84. huffmanValueShift = 4
  85. )
  86. type huffmanDecoder struct {
  87. maxRead int // the maximum number of bits we can read and not overread
  88. chunks *[huffmanNumChunks]uint16 // chunks as described above
  89. links [][]uint16 // overflow links
  90. linkMask uint32 // mask the width of the link table
  91. }
  92. // Initialize Huffman decoding tables from array of code lengths.
  93. // Following this function, h is guaranteed to be initialized into a complete
  94. // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
  95. // degenerate case where the tree has only a single symbol with length 1. Empty
  96. // trees are permitted.
  97. func (h *huffmanDecoder) init(lengths []int) bool {
  98. // Sanity enables additional runtime tests during Huffman
  99. // table construction. It's intended to be used during
  100. // development to supplement the currently ad-hoc unit tests.
  101. const sanity = false
  102. if h.chunks == nil {
  103. h.chunks = &[huffmanNumChunks]uint16{}
  104. }
  105. if h.maxRead != 0 {
  106. *h = huffmanDecoder{chunks: h.chunks, links: h.links}
  107. }
  108. // Count number of codes of each length,
  109. // compute maxRead and max length.
  110. var count [maxCodeLen]int
  111. var min, max int
  112. for _, n := range lengths {
  113. if n == 0 {
  114. continue
  115. }
  116. if min == 0 || n < min {
  117. min = n
  118. }
  119. if n > max {
  120. max = n
  121. }
  122. count[n&maxCodeLenMask]++
  123. }
  124. // Empty tree. The decompressor.huffSym function will fail later if the tree
  125. // is used. Technically, an empty tree is only valid for the HDIST tree and
  126. // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
  127. // is guaranteed to fail since it will attempt to use the tree to decode the
  128. // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
  129. // guaranteed to fail later since the compressed data section must be
  130. // composed of at least one symbol (the end-of-block marker).
  131. if max == 0 {
  132. return true
  133. }
  134. code := 0
  135. var nextcode [maxCodeLen]int
  136. for i := min; i <= max; i++ {
  137. code <<= 1
  138. nextcode[i&maxCodeLenMask] = code
  139. code += count[i&maxCodeLenMask]
  140. }
  141. // Check that the coding is complete (i.e., that we've
  142. // assigned all 2-to-the-max possible bit sequences).
  143. // Exception: To be compatible with zlib, we also need to
  144. // accept degenerate single-code codings. See also
  145. // TestDegenerateHuffmanCoding.
  146. if code != 1<<uint(max) && !(code == 1 && max == 1) {
  147. if debugDecode {
  148. fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
  149. }
  150. return false
  151. }
  152. h.maxRead = min
  153. chunks := h.chunks[:]
  154. for i := range chunks {
  155. chunks[i] = 0
  156. }
  157. if max > huffmanChunkBits {
  158. numLinks := 1 << (uint(max) - huffmanChunkBits)
  159. h.linkMask = uint32(numLinks - 1)
  160. // create link tables
  161. link := nextcode[huffmanChunkBits+1] >> 1
  162. if cap(h.links) < huffmanNumChunks-link {
  163. h.links = make([][]uint16, huffmanNumChunks-link)
  164. } else {
  165. h.links = h.links[:huffmanNumChunks-link]
  166. }
  167. for j := uint(link); j < huffmanNumChunks; j++ {
  168. reverse := int(bits.Reverse16(uint16(j)))
  169. reverse >>= uint(16 - huffmanChunkBits)
  170. off := j - uint(link)
  171. if sanity && h.chunks[reverse] != 0 {
  172. panic("impossible: overwriting existing chunk")
  173. }
  174. h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
  175. if cap(h.links[off]) < numLinks {
  176. h.links[off] = make([]uint16, numLinks)
  177. } else {
  178. links := h.links[off][:0]
  179. h.links[off] = links[:numLinks]
  180. }
  181. }
  182. } else {
  183. h.links = h.links[:0]
  184. }
  185. for i, n := range lengths {
  186. if n == 0 {
  187. continue
  188. }
  189. code := nextcode[n]
  190. nextcode[n]++
  191. chunk := uint16(i<<huffmanValueShift | n)
  192. reverse := int(bits.Reverse16(uint16(code)))
  193. reverse >>= uint(16 - n)
  194. if n <= huffmanChunkBits {
  195. for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
  196. // We should never need to overwrite
  197. // an existing chunk. Also, 0 is
  198. // never a valid chunk, because the
  199. // lower 4 "count" bits should be
  200. // between 1 and 15.
  201. if sanity && h.chunks[off] != 0 {
  202. panic("impossible: overwriting existing chunk")
  203. }
  204. h.chunks[off] = chunk
  205. }
  206. } else {
  207. j := reverse & (huffmanNumChunks - 1)
  208. if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
  209. // Longer codes should have been
  210. // associated with a link table above.
  211. panic("impossible: not an indirect chunk")
  212. }
  213. value := h.chunks[j] >> huffmanValueShift
  214. linktab := h.links[value]
  215. reverse >>= huffmanChunkBits
  216. for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
  217. if sanity && linktab[off] != 0 {
  218. panic("impossible: overwriting existing chunk")
  219. }
  220. linktab[off] = chunk
  221. }
  222. }
  223. }
  224. if sanity {
  225. // Above we've sanity checked that we never overwrote
  226. // an existing entry. Here we additionally check that
  227. // we filled the tables completely.
  228. for i, chunk := range h.chunks {
  229. if chunk == 0 {
  230. // As an exception, in the degenerate
  231. // single-code case, we allow odd
  232. // chunks to be missing.
  233. if code == 1 && i%2 == 1 {
  234. continue
  235. }
  236. panic("impossible: missing chunk")
  237. }
  238. }
  239. for _, linktab := range h.links {
  240. for _, chunk := range linktab {
  241. if chunk == 0 {
  242. panic("impossible: missing chunk")
  243. }
  244. }
  245. }
  246. }
  247. return true
  248. }
  249. // The actual read interface needed by NewReader.
  250. // If the passed in io.Reader does not also have ReadByte,
  251. // the NewReader will introduce its own buffering.
  252. type Reader interface {
  253. io.Reader
  254. io.ByteReader
  255. }
  256. // Decompress state.
  257. type decompressor struct {
  258. // Input source.
  259. r Reader
  260. roffset int64
  261. // Huffman decoders for literal/length, distance.
  262. h1, h2 huffmanDecoder
  263. // Length arrays used to define Huffman codes.
  264. bits *[maxNumLit + maxNumDist]int
  265. codebits *[numCodes]int
  266. // Output history, buffer.
  267. dict dictDecoder
  268. // Next step in the decompression,
  269. // and decompression state.
  270. step func(*decompressor)
  271. stepState int
  272. err error
  273. toRead []byte
  274. hl, hd *huffmanDecoder
  275. copyLen int
  276. copyDist int
  277. // Temporary buffer (avoids repeated allocation).
  278. buf [4]byte
  279. // Input bits, in top of b.
  280. b uint32
  281. nb uint
  282. final bool
  283. }
  284. func (f *decompressor) nextBlock() {
  285. for f.nb < 1+2 {
  286. if f.err = f.moreBits(); f.err != nil {
  287. return
  288. }
  289. }
  290. f.final = f.b&1 == 1
  291. f.b >>= 1
  292. typ := f.b & 3
  293. f.b >>= 2
  294. f.nb -= 1 + 2
  295. switch typ {
  296. case 0:
  297. f.dataBlock()
  298. if debugDecode {
  299. fmt.Println("stored block")
  300. }
  301. case 1:
  302. // compressed, fixed Huffman tables
  303. f.hl = &fixedHuffmanDecoder
  304. f.hd = nil
  305. f.huffmanBlockDecoder()()
  306. if debugDecode {
  307. fmt.Println("predefinied huffman block")
  308. }
  309. case 2:
  310. // compressed, dynamic Huffman tables
  311. if f.err = f.readHuffman(); f.err != nil {
  312. break
  313. }
  314. f.hl = &f.h1
  315. f.hd = &f.h2
  316. f.huffmanBlockDecoder()()
  317. if debugDecode {
  318. fmt.Println("dynamic huffman block")
  319. }
  320. default:
  321. // 3 is reserved.
  322. if debugDecode {
  323. fmt.Println("reserved data block encountered")
  324. }
  325. f.err = CorruptInputError(f.roffset)
  326. }
  327. }
  328. func (f *decompressor) Read(b []byte) (int, error) {
  329. for {
  330. if len(f.toRead) > 0 {
  331. n := copy(b, f.toRead)
  332. f.toRead = f.toRead[n:]
  333. if len(f.toRead) == 0 {
  334. return n, f.err
  335. }
  336. return n, nil
  337. }
  338. if f.err != nil {
  339. return 0, f.err
  340. }
  341. f.step(f)
  342. if f.err != nil && len(f.toRead) == 0 {
  343. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  344. }
  345. }
  346. }
  347. // Support the io.WriteTo interface for io.Copy and friends.
  348. func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
  349. total := int64(0)
  350. flushed := false
  351. for {
  352. if len(f.toRead) > 0 {
  353. n, err := w.Write(f.toRead)
  354. total += int64(n)
  355. if err != nil {
  356. f.err = err
  357. return total, err
  358. }
  359. if n != len(f.toRead) {
  360. return total, io.ErrShortWrite
  361. }
  362. f.toRead = f.toRead[:0]
  363. }
  364. if f.err != nil && flushed {
  365. if f.err == io.EOF {
  366. return total, nil
  367. }
  368. return total, f.err
  369. }
  370. if f.err == nil {
  371. f.step(f)
  372. }
  373. if len(f.toRead) == 0 && f.err != nil && !flushed {
  374. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  375. flushed = true
  376. }
  377. }
  378. }
  379. func (f *decompressor) Close() error {
  380. if f.err == io.EOF {
  381. return nil
  382. }
  383. return f.err
  384. }
  385. // RFC 1951 section 3.2.7.
  386. // Compression with dynamic Huffman codes
  387. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  388. func (f *decompressor) readHuffman() error {
  389. // HLIT[5], HDIST[5], HCLEN[4].
  390. for f.nb < 5+5+4 {
  391. if err := f.moreBits(); err != nil {
  392. return err
  393. }
  394. }
  395. nlit := int(f.b&0x1F) + 257
  396. if nlit > maxNumLit {
  397. if debugDecode {
  398. fmt.Println("nlit > maxNumLit", nlit)
  399. }
  400. return CorruptInputError(f.roffset)
  401. }
  402. f.b >>= 5
  403. ndist := int(f.b&0x1F) + 1
  404. if ndist > maxNumDist {
  405. if debugDecode {
  406. fmt.Println("ndist > maxNumDist", ndist)
  407. }
  408. return CorruptInputError(f.roffset)
  409. }
  410. f.b >>= 5
  411. nclen := int(f.b&0xF) + 4
  412. // numCodes is 19, so nclen is always valid.
  413. f.b >>= 4
  414. f.nb -= 5 + 5 + 4
  415. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  416. for i := 0; i < nclen; i++ {
  417. for f.nb < 3 {
  418. if err := f.moreBits(); err != nil {
  419. return err
  420. }
  421. }
  422. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  423. f.b >>= 3
  424. f.nb -= 3
  425. }
  426. for i := nclen; i < len(codeOrder); i++ {
  427. f.codebits[codeOrder[i]] = 0
  428. }
  429. if !f.h1.init(f.codebits[0:]) {
  430. if debugDecode {
  431. fmt.Println("init codebits failed")
  432. }
  433. return CorruptInputError(f.roffset)
  434. }
  435. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  436. // using the code length Huffman code.
  437. for i, n := 0, nlit+ndist; i < n; {
  438. x, err := f.huffSym(&f.h1)
  439. if err != nil {
  440. return err
  441. }
  442. if x < 16 {
  443. // Actual length.
  444. f.bits[i] = x
  445. i++
  446. continue
  447. }
  448. // Repeat previous length or zero.
  449. var rep int
  450. var nb uint
  451. var b int
  452. switch x {
  453. default:
  454. return InternalError("unexpected length code")
  455. case 16:
  456. rep = 3
  457. nb = 2
  458. if i == 0 {
  459. if debugDecode {
  460. fmt.Println("i==0")
  461. }
  462. return CorruptInputError(f.roffset)
  463. }
  464. b = f.bits[i-1]
  465. case 17:
  466. rep = 3
  467. nb = 3
  468. b = 0
  469. case 18:
  470. rep = 11
  471. nb = 7
  472. b = 0
  473. }
  474. for f.nb < nb {
  475. if err := f.moreBits(); err != nil {
  476. if debugDecode {
  477. fmt.Println("morebits:", err)
  478. }
  479. return err
  480. }
  481. }
  482. rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
  483. f.b >>= nb & regSizeMaskUint32
  484. f.nb -= nb
  485. if i+rep > n {
  486. if debugDecode {
  487. fmt.Println("i+rep > n", i, rep, n)
  488. }
  489. return CorruptInputError(f.roffset)
  490. }
  491. for j := 0; j < rep; j++ {
  492. f.bits[i] = b
  493. i++
  494. }
  495. }
  496. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  497. if debugDecode {
  498. fmt.Println("init2 failed")
  499. }
  500. return CorruptInputError(f.roffset)
  501. }
  502. // As an optimization, we can initialize the maxRead bits to read at a time
  503. // for the HLIT tree to the length of the EOB marker since we know that
  504. // every block must terminate with one. This preserves the property that
  505. // we never read any extra bytes after the end of the DEFLATE stream.
  506. if f.h1.maxRead < f.bits[endBlockMarker] {
  507. f.h1.maxRead = f.bits[endBlockMarker]
  508. }
  509. if !f.final {
  510. // If not the final block, the smallest block possible is
  511. // a predefined table, BTYPE=01, with a single EOB marker.
  512. // This will take up 3 + 7 bits.
  513. f.h1.maxRead += 10
  514. }
  515. return nil
  516. }
  517. // Copy a single uncompressed data block from input to output.
  518. func (f *decompressor) dataBlock() {
  519. // Uncompressed.
  520. // Discard current half-byte.
  521. left := (f.nb) & 7
  522. f.nb -= left
  523. f.b >>= left
  524. offBytes := f.nb >> 3
  525. // Unfilled values will be overwritten.
  526. f.buf[0] = uint8(f.b)
  527. f.buf[1] = uint8(f.b >> 8)
  528. f.buf[2] = uint8(f.b >> 16)
  529. f.buf[3] = uint8(f.b >> 24)
  530. f.roffset += int64(offBytes)
  531. f.nb, f.b = 0, 0
  532. // Length then ones-complement of length.
  533. nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
  534. f.roffset += int64(nr)
  535. if err != nil {
  536. f.err = noEOF(err)
  537. return
  538. }
  539. n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
  540. nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
  541. if nn != ^n {
  542. if debugDecode {
  543. ncomp := ^n
  544. fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
  545. }
  546. f.err = CorruptInputError(f.roffset)
  547. return
  548. }
  549. if n == 0 {
  550. f.toRead = f.dict.readFlush()
  551. f.finishBlock()
  552. return
  553. }
  554. f.copyLen = int(n)
  555. f.copyData()
  556. }
  557. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  558. // It pauses for reads when f.hist is full.
  559. func (f *decompressor) copyData() {
  560. buf := f.dict.writeSlice()
  561. if len(buf) > f.copyLen {
  562. buf = buf[:f.copyLen]
  563. }
  564. cnt, err := io.ReadFull(f.r, buf)
  565. f.roffset += int64(cnt)
  566. f.copyLen -= cnt
  567. f.dict.writeMark(cnt)
  568. if err != nil {
  569. f.err = noEOF(err)
  570. return
  571. }
  572. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  573. f.toRead = f.dict.readFlush()
  574. f.step = (*decompressor).copyData
  575. return
  576. }
  577. f.finishBlock()
  578. }
  579. func (f *decompressor) finishBlock() {
  580. if f.final {
  581. if f.dict.availRead() > 0 {
  582. f.toRead = f.dict.readFlush()
  583. }
  584. f.err = io.EOF
  585. }
  586. f.step = (*decompressor).nextBlock
  587. }
  588. // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
  589. func noEOF(e error) error {
  590. if e == io.EOF {
  591. return io.ErrUnexpectedEOF
  592. }
  593. return e
  594. }
  595. func (f *decompressor) moreBits() error {
  596. c, err := f.r.ReadByte()
  597. if err != nil {
  598. return noEOF(err)
  599. }
  600. f.roffset++
  601. f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
  602. f.nb += 8
  603. return nil
  604. }
  605. // Read the next Huffman-encoded symbol from f according to h.
  606. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  607. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  608. // with single element, huffSym must error on these two edge cases. In both
  609. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  610. // satisfy the n == 0 check below.
  611. n := uint(h.maxRead)
  612. // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
  613. // but is smart enough to keep local variables in registers, so use nb and b,
  614. // inline call to moreBits and reassign b,nb back to f on return.
  615. nb, b := f.nb, f.b
  616. for {
  617. for nb < n {
  618. c, err := f.r.ReadByte()
  619. if err != nil {
  620. f.b = b
  621. f.nb = nb
  622. return 0, noEOF(err)
  623. }
  624. f.roffset++
  625. b |= uint32(c) << (nb & regSizeMaskUint32)
  626. nb += 8
  627. }
  628. chunk := h.chunks[b&(huffmanNumChunks-1)]
  629. n = uint(chunk & huffmanCountMask)
  630. if n > huffmanChunkBits {
  631. chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
  632. n = uint(chunk & huffmanCountMask)
  633. }
  634. if n <= nb {
  635. if n == 0 {
  636. f.b = b
  637. f.nb = nb
  638. if debugDecode {
  639. fmt.Println("huffsym: n==0")
  640. }
  641. f.err = CorruptInputError(f.roffset)
  642. return 0, f.err
  643. }
  644. f.b = b >> (n & regSizeMaskUint32)
  645. f.nb = nb - n
  646. return int(chunk >> huffmanValueShift), nil
  647. }
  648. }
  649. }
  650. func makeReader(r io.Reader) Reader {
  651. if rr, ok := r.(Reader); ok {
  652. return rr
  653. }
  654. return bufio.NewReader(r)
  655. }
  656. func fixedHuffmanDecoderInit() {
  657. fixedOnce.Do(func() {
  658. // These come from the RFC section 3.2.6.
  659. var bits [288]int
  660. for i := 0; i < 144; i++ {
  661. bits[i] = 8
  662. }
  663. for i := 144; i < 256; i++ {
  664. bits[i] = 9
  665. }
  666. for i := 256; i < 280; i++ {
  667. bits[i] = 7
  668. }
  669. for i := 280; i < 288; i++ {
  670. bits[i] = 8
  671. }
  672. fixedHuffmanDecoder.init(bits[:])
  673. })
  674. }
  675. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  676. *f = decompressor{
  677. r: makeReader(r),
  678. bits: f.bits,
  679. codebits: f.codebits,
  680. h1: f.h1,
  681. h2: f.h2,
  682. dict: f.dict,
  683. step: (*decompressor).nextBlock,
  684. }
  685. f.dict.init(maxMatchOffset, dict)
  686. return nil
  687. }
  688. // NewReader returns a new ReadCloser that can be used
  689. // to read the uncompressed version of r.
  690. // If r does not also implement io.ByteReader,
  691. // the decompressor may read more data than necessary from r.
  692. // It is the caller's responsibility to call Close on the ReadCloser
  693. // when finished reading.
  694. //
  695. // The ReadCloser returned by NewReader also implements Resetter.
  696. func NewReader(r io.Reader) io.ReadCloser {
  697. fixedHuffmanDecoderInit()
  698. var f decompressor
  699. f.r = makeReader(r)
  700. f.bits = new([maxNumLit + maxNumDist]int)
  701. f.codebits = new([numCodes]int)
  702. f.step = (*decompressor).nextBlock
  703. f.dict.init(maxMatchOffset, nil)
  704. return &f
  705. }
  706. // NewReaderDict is like NewReader but initializes the reader
  707. // with a preset dictionary. The returned Reader behaves as if
  708. // the uncompressed data stream started with the given dictionary,
  709. // which has already been read. NewReaderDict is typically used
  710. // to read data compressed by NewWriterDict.
  711. //
  712. // The ReadCloser returned by NewReader also implements Resetter.
  713. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  714. fixedHuffmanDecoderInit()
  715. var f decompressor
  716. f.r = makeReader(r)
  717. f.bits = new([maxNumLit + maxNumDist]int)
  718. f.codebits = new([numCodes]int)
  719. f.step = (*decompressor).nextBlock
  720. f.dict.init(maxMatchOffset, dict)
  721. return &f
  722. }