fse_test.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. // Copyright 2018 Klaus Post. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
  5. package fse
  6. import (
  7. "bytes"
  8. "fmt"
  9. "os"
  10. "reflect"
  11. "strings"
  12. "testing"
  13. )
  14. type inputFn func() ([]byte, error)
  15. var testfiles = []struct {
  16. name string
  17. fn inputFn
  18. err error
  19. }{
  20. // gettysburg.txt is a small plain text.
  21. {name: "gettysburg", fn: func() ([]byte, error) { return os.ReadFile("../testdata/gettysburg.txt") }},
  22. // Digits is the digits of the irrational number e. Its decimal representation
  23. // does not repeat, but there are only 10 possible digits, so it should be
  24. // reasonably compressible.
  25. {name: "digits", fn: func() ([]byte, error) { return os.ReadFile("../testdata/e.txt") }},
  26. // Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
  27. {name: "twain", fn: func() ([]byte, error) { return os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") }},
  28. // Random bytes
  29. {name: "random", fn: func() ([]byte, error) { return os.ReadFile("../testdata/sharnd.out") }, err: ErrIncompressible},
  30. // Low entropy
  31. {name: "low-ent", fn: func() ([]byte, error) { return []byte(strings.Repeat("1221", 10000)), nil }},
  32. // Super Low entropy
  33. {name: "superlow-ent", fn: func() ([]byte, error) { return []byte(strings.Repeat("1", 10000) + strings.Repeat("2", 500)), nil }},
  34. // Zero bytes
  35. {name: "zeroes", fn: func() ([]byte, error) { return make([]byte, 10000), nil }, err: ErrUseRLE},
  36. {name: "crash1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash1.bin") }, err: ErrIncompressible},
  37. {name: "crash2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash2.bin") }, err: ErrIncompressible},
  38. {name: "crash3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash3.bin") }, err: ErrIncompressible},
  39. {name: "endzerobits", fn: func() ([]byte, error) { return os.ReadFile("../testdata/endzerobits.bin") }, err: nil},
  40. {name: "endnonzero", fn: func() ([]byte, error) { return os.ReadFile("../testdata/endnonzero.bin") }, err: ErrIncompressible},
  41. {name: "case1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case1.bin") }, err: ErrIncompressible},
  42. {name: "case2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case2.bin") }, err: ErrIncompressible},
  43. {name: "case3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case3.bin") }, err: ErrIncompressible},
  44. {name: "pngdata.001", fn: func() ([]byte, error) { return os.ReadFile("../testdata/pngdata.bin") }, err: nil},
  45. {name: "normcount2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/normcount2.bin") }, err: nil},
  46. }
  47. var decTestfiles = []struct {
  48. name string
  49. fn inputFn
  50. err string
  51. }{
  52. // gettysburg.txt is a small plain text.
  53. {name: "hang1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang1.bin") }, err: "corruption detected (bitCount 252 > 32)"},
  54. {name: "hang2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang2.bin") }, err: "newState (0) == oldState (0) and no bits"},
  55. {name: "hang3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang3.bin") }, err: "maxSymbolValue too small"},
  56. {name: "symlen1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-symlen1.bin") }, err: "symbolLen (257) too big"},
  57. {name: "crash4", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash4.bin") }, err: "symbolLen (1) too small"},
  58. {name: "crash5", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash5.bin") }, err: "symbolLen (1) too small"},
  59. {name: "crash6", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-crash6.bin") }, err: "newState (32768) outside table size (32768)"},
  60. {name: "something", fn: func() ([]byte, error) { return os.ReadFile("../testdata/fse-artifact3.bin") }, err: "corrupt stream, did not find end of stream"},
  61. }
  62. func TestCompress(t *testing.T) {
  63. for _, test := range testfiles {
  64. t.Run(test.name, func(t *testing.T) {
  65. var s Scratch
  66. buf0, err := test.fn()
  67. if err != nil {
  68. t.Fatal(err)
  69. }
  70. b, err := Compress(buf0, &s)
  71. if err != test.err {
  72. t.Errorf("want error %v (%T), got %v (%T)", test.err, test.err, err, err)
  73. }
  74. if b == nil {
  75. t.Log(test.name + ": not compressible")
  76. return
  77. }
  78. t.Logf("%s: %d -> %d bytes (%.2f:1)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
  79. })
  80. }
  81. }
  82. func ExampleCompress() {
  83. // Read data
  84. data, err := os.ReadFile("../testdata/e.txt")
  85. if err != nil {
  86. panic(err)
  87. }
  88. // Create re-usable scratch buffer.
  89. var s Scratch
  90. b, err := Compress(data, &s)
  91. if err != nil {
  92. panic(err)
  93. }
  94. fmt.Printf("Compress: %d -> %d bytes (%.2f:1)\n", len(data), len(b), float64(len(data))/float64(len(b)))
  95. // OUTPUT: Compress: 100003 -> 41564 bytes (2.41:1)
  96. }
  97. func TestDecompress(t *testing.T) {
  98. for _, test := range decTestfiles {
  99. t.Run(test.name, func(t *testing.T) {
  100. var s Scratch
  101. s.DecompressLimit = 1 << 20
  102. buf0, err := test.fn()
  103. if err != nil {
  104. t.Fatal(err)
  105. }
  106. b, err := Decompress(buf0, &s)
  107. if fmt.Sprint(err) != test.err {
  108. t.Errorf("want error %q, got %q (%T)", test.err, err, err)
  109. return
  110. }
  111. if err != nil {
  112. return
  113. }
  114. if len(b) == 0 {
  115. t.Error(test.name + ": no output")
  116. return
  117. }
  118. t.Logf("%s: %d -> %d bytes (1:%.2f)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
  119. })
  120. }
  121. }
  122. func ExampleDecompress() {
  123. // Read data
  124. data, err := os.ReadFile("../testdata/e.txt")
  125. if err != nil {
  126. panic(err)
  127. }
  128. // Create re-usable scratch buffer.
  129. var s Scratch
  130. b, err := Compress(data, &s)
  131. if err != nil {
  132. panic(err)
  133. }
  134. // Since we use the output of compression, it cannot be used as output for decompression.
  135. s.Out = make([]byte, 0, len(data))
  136. d, err := Decompress(b, &s)
  137. if err != nil {
  138. panic(err)
  139. }
  140. fmt.Printf("Input matches: %t\n", bytes.Equal(d, data))
  141. // OUTPUT: Input matches: true
  142. }
  143. func BenchmarkCompress(b *testing.B) {
  144. for _, tt := range testfiles {
  145. test := tt
  146. b.Run(test.name, func(b *testing.B) {
  147. var s Scratch
  148. buf0, err := test.fn()
  149. if err != nil {
  150. b.Fatal(err)
  151. }
  152. _, err = Compress(buf0, &s)
  153. if err != test.err {
  154. b.Fatal("unexpected error:", err)
  155. }
  156. if err != nil {
  157. b.Skip("skipping benchmark: ", err)
  158. return
  159. }
  160. b.ResetTimer()
  161. b.ReportAllocs()
  162. b.SetBytes(int64(len(buf0)))
  163. for i := 0; i < b.N; i++ {
  164. _, _ = Compress(buf0, &s)
  165. }
  166. })
  167. }
  168. }
  169. func TestReadNCount(t *testing.T) {
  170. for i := range testfiles {
  171. var s Scratch
  172. test := testfiles[i]
  173. t.Run(test.name, func(t *testing.T) {
  174. name := test.name + ": "
  175. buf0, err := testfiles[i].fn()
  176. if err != nil {
  177. t.Fatal(err)
  178. }
  179. b, err := Compress(buf0, &s)
  180. if err != test.err {
  181. t.Error(err)
  182. return
  183. }
  184. if err != nil {
  185. t.Skip(name + err.Error())
  186. return
  187. }
  188. t.Logf("%s: %d -> %d bytes (%.2f:1)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
  189. //t.Logf("%v", b)
  190. var s2 Scratch
  191. dc, err := Decompress(b, &s2)
  192. if err != nil {
  193. t.Fatal(err)
  194. }
  195. want := s.norm[:s.symbolLen]
  196. got := s2.norm[:s2.symbolLen]
  197. if !reflect.DeepEqual(want, got) {
  198. if s.actualTableLog != s2.actualTableLog {
  199. t.Errorf(name+"norm table, want tablelog: %d, got %d", s.actualTableLog, s2.actualTableLog)
  200. }
  201. if s.symbolLen != s2.symbolLen {
  202. t.Errorf(name+"norm table, want size: %d, got %d", s.symbolLen, s2.symbolLen)
  203. }
  204. t.Errorf(name + "norm table, got delta: \n")
  205. return
  206. }
  207. for i, dec := range s2.decTable {
  208. dd := dec.symbol
  209. ee := s.ct.tableSymbol[i]
  210. if dd != ee {
  211. t.Errorf("table symbol mismatch. idx %d, enc: %v, dec:%v", i, ee, dd)
  212. break
  213. }
  214. }
  215. if dc != nil {
  216. if len(buf0) != len(dc) {
  217. t.Errorf(name+"decompressed, want size: %d, got %d", len(buf0), len(dc))
  218. if len(buf0) > len(dc) {
  219. buf0 = buf0[:len(dc)]
  220. } else {
  221. dc = dc[:len(buf0)]
  222. }
  223. if !bytes.Equal(buf0, dc) {
  224. t.Errorf(name+"decompressed, got delta: (in) %v != (out) %v\n", buf0, dc)
  225. }
  226. return
  227. }
  228. if !bytes.Equal(buf0, dc) {
  229. t.Errorf(name + "decompressed, got delta.")
  230. }
  231. if !t.Failed() {
  232. t.Log("... roundtrip ok!")
  233. }
  234. }
  235. })
  236. }
  237. }
  238. func BenchmarkDecompress(b *testing.B) {
  239. for _, tt := range testfiles {
  240. test := tt
  241. b.Run(test.name, func(b *testing.B) {
  242. var s, s2 Scratch
  243. buf0, err := test.fn()
  244. if err != nil {
  245. b.Fatal(err)
  246. }
  247. out, err := Compress(buf0, &s)
  248. if err != test.err {
  249. b.Fatal(err)
  250. }
  251. if err != nil {
  252. b.Skip(test.name + ": " + err.Error())
  253. return
  254. }
  255. got, err := Decompress(out, &s2)
  256. if err != nil {
  257. b.Fatal(err)
  258. }
  259. if !bytes.Equal(buf0, got) {
  260. b.Fatal("output mismatch")
  261. }
  262. b.ResetTimer()
  263. b.ReportAllocs()
  264. b.SetBytes(int64(len(buf0)))
  265. for i := 0; i < b.N; i++ {
  266. _, err = Decompress(out, &s2)
  267. if err != nil {
  268. b.Fatal(err)
  269. }
  270. }
  271. })
  272. }
  273. }