dict.go 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. package zstd
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "github.com/klauspost/compress/huff0"
  8. )
  9. type dict struct {
  10. id uint32
  11. litEnc *huff0.Scratch
  12. llDec, ofDec, mlDec sequenceDec
  13. //llEnc, ofEnc, mlEnc []*fseEncoder
  14. offsets [3]int
  15. content []byte
  16. }
  17. const dictMagic = "\x37\xa4\x30\xec"
  18. // Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
  19. const dictMaxLength = 1 << 31
  20. // ID returns the dictionary id or 0 if d is nil.
  21. func (d *dict) ID() uint32 {
  22. if d == nil {
  23. return 0
  24. }
  25. return d.id
  26. }
  27. // ContentSize returns the dictionary content size or 0 if d is nil.
  28. func (d *dict) ContentSize() int {
  29. if d == nil {
  30. return 0
  31. }
  32. return len(d.content)
  33. }
  34. // Content returns the dictionary content.
  35. func (d *dict) Content() []byte {
  36. if d == nil {
  37. return nil
  38. }
  39. return d.content
  40. }
  41. // Offsets returns the initial offsets.
  42. func (d *dict) Offsets() [3]int {
  43. if d == nil {
  44. return [3]int{}
  45. }
  46. return d.offsets
  47. }
  48. // LitEncoder returns the literal encoder.
  49. func (d *dict) LitEncoder() *huff0.Scratch {
  50. if d == nil {
  51. return nil
  52. }
  53. return d.litEnc
  54. }
  55. // Load a dictionary as described in
  56. // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
  57. func loadDict(b []byte) (*dict, error) {
  58. // Check static field size.
  59. if len(b) <= 8+(3*4) {
  60. return nil, io.ErrUnexpectedEOF
  61. }
  62. d := dict{
  63. llDec: sequenceDec{fse: &fseDecoder{}},
  64. ofDec: sequenceDec{fse: &fseDecoder{}},
  65. mlDec: sequenceDec{fse: &fseDecoder{}},
  66. }
  67. if string(b[:4]) != dictMagic {
  68. return nil, ErrMagicMismatch
  69. }
  70. d.id = binary.LittleEndian.Uint32(b[4:8])
  71. if d.id == 0 {
  72. return nil, errors.New("dictionaries cannot have ID 0")
  73. }
  74. // Read literal table
  75. var err error
  76. d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
  77. if err != nil {
  78. return nil, fmt.Errorf("loading literal table: %w", err)
  79. }
  80. d.litEnc.Reuse = huff0.ReusePolicyMust
  81. br := byteReader{
  82. b: b,
  83. off: 0,
  84. }
  85. readDec := func(i tableIndex, dec *fseDecoder) error {
  86. if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil {
  87. return err
  88. }
  89. if br.overread() {
  90. return io.ErrUnexpectedEOF
  91. }
  92. err = dec.transform(symbolTableX[i])
  93. if err != nil {
  94. println("Transform table error:", err)
  95. return err
  96. }
  97. if debugDecoder || debugEncoder {
  98. println("Read table ok", "symbolLen:", dec.symbolLen)
  99. }
  100. // Set decoders as predefined so they aren't reused.
  101. dec.preDefined = true
  102. return nil
  103. }
  104. if err := readDec(tableOffsets, d.ofDec.fse); err != nil {
  105. return nil, err
  106. }
  107. if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil {
  108. return nil, err
  109. }
  110. if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil {
  111. return nil, err
  112. }
  113. if br.remain() < 12 {
  114. return nil, io.ErrUnexpectedEOF
  115. }
  116. d.offsets[0] = int(br.Uint32())
  117. br.advance(4)
  118. d.offsets[1] = int(br.Uint32())
  119. br.advance(4)
  120. d.offsets[2] = int(br.Uint32())
  121. br.advance(4)
  122. if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 {
  123. return nil, errors.New("invalid offset in dictionary")
  124. }
  125. d.content = make([]byte, br.remain())
  126. copy(d.content, br.unread())
  127. if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) {
  128. return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets)
  129. }
  130. return &d, nil
  131. }
  132. // InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
  133. func InspectDictionary(b []byte) (interface {
  134. ID() uint32
  135. ContentSize() int
  136. Content() []byte
  137. Offsets() [3]int
  138. LitEncoder() *huff0.Scratch
  139. }, error) {
  140. initPredefined()
  141. d, err := loadDict(b)
  142. return d, err
  143. }