encoder_test.go 30 KB


  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "math/rand"
  10. "os"
  11. "runtime"
  12. "strings"
  13. "sync"
  14. "testing"
  15. "time"
  16. "github.com/klauspost/compress/zip"
  17. "github.com/klauspost/compress/zstd/internal/xxhash"
  18. )
  19. var testWindowSizes = []int{MinWindowSize, 1 << 16, 1 << 22, 1 << 24}
  20. type testEncOpt struct {
  21. name string
  22. o []EOption
  23. }
  24. func getEncOpts(cMax int) []testEncOpt {
  25. var o []testEncOpt
  26. for level := speedNotSet + 1; level < speedLast; level++ {
  27. if isRaceTest && level >= SpeedBestCompression {
  28. break
  29. }
  30. for conc := 1; conc <= 4; conc *= 2 {
  31. for _, wind := range testWindowSizes {
  32. addOpt := func(name string, options ...EOption) {
  33. opts := append([]EOption(nil), WithEncoderLevel(level), WithEncoderConcurrency(conc), WithWindowSize(wind))
  34. name = fmt.Sprintf("%s-c%d-w%dk-%s", level.String(), conc, wind/1024, name)
  35. o = append(o, testEncOpt{name: name, o: append(opts, options...)})
  36. }
  37. addOpt("default")
  38. if testing.Short() {
  39. break
  40. }
  41. addOpt("nocrc", WithEncoderCRC(false))
  42. addOpt("lowmem", WithLowerEncoderMem(true))
  43. addOpt("alllit", WithAllLitEntropyCompression(true))
  44. addOpt("nolit", WithNoEntropyCompression(true))
  45. addOpt("pad1k", WithEncoderPadding(1024))
  46. addOpt("zerof", WithZeroFrames(true))
  47. addOpt("1seg", WithSingleSegment(true))
  48. }
  49. if testing.Short() && conc == 2 {
  50. break
  51. }
  52. if conc >= cMax {
  53. break
  54. }
  55. }
  56. }
  57. return o
  58. }
  59. func TestEncoder_EncodeAllSimple(t *testing.T) {
  60. in, err := os.ReadFile("testdata/z000028")
  61. if err != nil {
  62. t.Fatal(err)
  63. }
  64. dec, err := NewReader(nil)
  65. if err != nil {
  66. t.Fatal(err)
  67. }
  68. defer dec.Close()
  69. in = append(in, in...)
  70. for _, opts := range getEncOpts(4) {
  71. t.Run(opts.name, func(t *testing.T) {
  72. runtime.GC()
  73. e, err := NewWriter(nil, opts.o...)
  74. if err != nil {
  75. t.Fatal(err)
  76. }
  77. defer e.Close()
  78. start := time.Now()
  79. dst := e.EncodeAll(in, nil)
  80. //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  81. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  82. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  83. decoded, err := dec.DecodeAll(dst, nil)
  84. if err != nil {
  85. t.Error(err, len(decoded))
  86. }
  87. if !bytes.Equal(decoded, in) {
  88. os.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm)
  89. os.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm)
  90. t.Fatal("Decoded does not match")
  91. }
  92. //t.Log("Encoded content matched")
  93. })
  94. }
  95. }
  96. func TestEncoder_EncodeAllConcurrent(t *testing.T) {
  97. in, err := os.ReadFile("testdata/z000028")
  98. if err != nil {
  99. t.Fatal(err)
  100. }
  101. in = append(in, in...)
  102. // When running race no more than 8k goroutines allowed.
  103. n := 400 / runtime.GOMAXPROCS(0)
  104. if testing.Short() {
  105. n = 20 / runtime.GOMAXPROCS(0)
  106. }
  107. dec, err := NewReader(nil)
  108. if err != nil {
  109. t.Fatal(err)
  110. }
  111. defer dec.Close()
  112. for _, opts := range getEncOpts(2) {
  113. t.Run(opts.name, func(t *testing.T) {
  114. rng := rand.New(rand.NewSource(0x1337))
  115. e, err := NewWriter(nil, opts.o...)
  116. if err != nil {
  117. t.Fatal(err)
  118. }
  119. defer e.Close()
  120. var wg sync.WaitGroup
  121. wg.Add(n)
  122. for i := 0; i < n; i++ {
  123. in := in[rng.Int()&1023:]
  124. in = in[:rng.Intn(len(in))]
  125. go func() {
  126. defer wg.Done()
  127. dst := e.EncodeAll(in, nil)
  128. if len(dst) > e.MaxEncodedSize(len(in)) {
  129. t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in)))
  130. }
  131. //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  132. decoded, err := dec.DecodeAll(dst, nil)
  133. if err != nil {
  134. t.Error(err, len(decoded))
  135. }
  136. if !bytes.Equal(decoded, in) {
  137. //os.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm)
  138. //os.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm)
  139. t.Error("Decoded does not match")
  140. return
  141. }
  142. }()
  143. }
  144. wg.Wait()
  145. //t.Log("Encoded content matched.", n, "goroutines")
  146. })
  147. }
  148. }
  149. func TestEncoder_EncodeAllEncodeXML(t *testing.T) {
  150. f, err := os.Open("testdata/xml.zst")
  151. if err != nil {
  152. t.Fatal(err)
  153. }
  154. defer f.Close()
  155. dec, err := NewReader(f)
  156. if err != nil {
  157. t.Fatal(err)
  158. }
  159. defer dec.Close()
  160. in, err := io.ReadAll(dec)
  161. if err != nil {
  162. t.Fatal(err)
  163. }
  164. if testing.Short() {
  165. in = in[:10000]
  166. }
  167. for level := speedNotSet + 1; level < speedLast; level++ {
  168. t.Run(level.String(), func(t *testing.T) {
  169. if isRaceTest && level >= SpeedBestCompression {
  170. t.SkipNow()
  171. }
  172. e, err := NewWriter(nil, WithEncoderLevel(level))
  173. if err != nil {
  174. t.Fatal(err)
  175. }
  176. defer e.Close()
  177. start := time.Now()
  178. dst := e.EncodeAll(in, nil)
  179. if len(dst) > e.MaxEncodedSize(len(in)) {
  180. t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in)))
  181. }
  182. //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  183. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  184. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  185. decoded, err := dec.DecodeAll(dst, nil)
  186. if err != nil {
  187. t.Error(err, len(decoded))
  188. }
  189. if !bytes.Equal(decoded, in) {
  190. os.WriteFile("testdata/"+t.Name()+"-xml.got", decoded, os.ModePerm)
  191. t.Error("Decoded does not match")
  192. return
  193. }
  194. //t.Log("Encoded content matched")
  195. })
  196. }
  197. }
  198. func TestEncoderRegression(t *testing.T) {
  199. defer timeout(4 * time.Minute)()
  200. data, err := os.ReadFile("testdata/comp-crashers.zip")
  201. if err != nil {
  202. t.Fatal(err)
  203. }
  204. // We can't close the decoder.
  205. dec, err := NewReader(nil)
  206. if err != nil {
  207. t.Error(err)
  208. return
  209. }
  210. defer dec.Close()
  211. for _, opts := range getEncOpts(2) {
  212. t.Run(opts.name, func(t *testing.T) {
  213. zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
  214. if err != nil {
  215. t.Fatal(err)
  216. }
  217. enc, err := NewWriter(
  218. nil,
  219. opts.o...,
  220. )
  221. if err != nil {
  222. t.Fatal(err)
  223. }
  224. defer enc.Close()
  225. for i, tt := range zr.File {
  226. if !strings.HasSuffix(t.Name(), "") {
  227. continue
  228. }
  229. if testing.Short() && i > 10 {
  230. break
  231. }
  232. t.Run(tt.Name, func(t *testing.T) {
  233. r, err := tt.Open()
  234. if err != nil {
  235. t.Error(err)
  236. return
  237. }
  238. in, err := io.ReadAll(r)
  239. if err != nil {
  240. t.Error(err)
  241. }
  242. encoded := enc.EncodeAll(in, nil)
  243. if len(encoded) > enc.MaxEncodedSize(len(in)) {
  244. t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in)))
  245. }
  246. // Usually too small...
  247. got, err := dec.DecodeAll(encoded, make([]byte, 0, len(in)))
  248. if err != nil {
  249. t.Logf("error: %v\nwant: %v\ngot: %v", err, len(in), len(got))
  250. t.Fatal(err)
  251. }
  252. // Use the Writer
  253. var dst bytes.Buffer
  254. enc.ResetContentSize(&dst, int64(len(in)))
  255. _, err = enc.Write(in)
  256. if err != nil {
  257. t.Error(err)
  258. }
  259. err = enc.Close()
  260. if err != nil {
  261. t.Error(err)
  262. }
  263. encoded = dst.Bytes()
  264. if len(encoded) > enc.MaxEncodedSize(len(in)) {
  265. t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in)))
  266. }
  267. got, err = dec.DecodeAll(encoded, make([]byte, 0, len(in)/2))
  268. if err != nil {
  269. t.Logf("error: %v\nwant: %v\ngot: %v", err, in, got)
  270. t.Error(err)
  271. }
  272. })
  273. }
  274. })
  275. }
  276. }
  277. func TestEncoder_EncodeAllTwain(t *testing.T) {
  278. in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
  279. if err != nil {
  280. t.Fatal(err)
  281. }
  282. testWindowSizes := testWindowSizes
  283. if testing.Short() {
  284. testWindowSizes = []int{1 << 20}
  285. }
  286. dec, err := NewReader(nil)
  287. if err != nil {
  288. t.Fatal(err)
  289. }
  290. defer dec.Close()
  291. for level := speedNotSet + 1; level < speedLast; level++ {
  292. t.Run(level.String(), func(t *testing.T) {
  293. if isRaceTest && level >= SpeedBestCompression {
  294. t.SkipNow()
  295. }
  296. for _, windowSize := range testWindowSizes {
  297. t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
  298. e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
  299. if err != nil {
  300. t.Fatal(err)
  301. }
  302. defer e.Close()
  303. start := time.Now()
  304. dst := e.EncodeAll(in, nil)
  305. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  306. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  307. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  308. decoded, err := dec.DecodeAll(dst, nil)
  309. if err != nil {
  310. t.Error(err, len(decoded))
  311. }
  312. if !bytes.Equal(decoded, in) {
  313. os.WriteFile("testdata/"+t.Name()+"-Mark.Twain-Tom.Sawyer.txt.got", decoded, os.ModePerm)
  314. t.Fatal("Decoded does not match")
  315. }
  316. t.Log("Encoded content matched")
  317. })
  318. }
  319. })
  320. }
  321. }
  322. func TestEncoder_EncodeAllPi(t *testing.T) {
  323. in, err := os.ReadFile("../testdata/pi.txt")
  324. if err != nil {
  325. t.Fatal(err)
  326. }
  327. testWindowSizes := testWindowSizes
  328. if testing.Short() {
  329. testWindowSizes = []int{1 << 20}
  330. }
  331. dec, err := NewReader(nil)
  332. if err != nil {
  333. t.Fatal(err)
  334. }
  335. defer dec.Close()
  336. for level := speedNotSet + 1; level < speedLast; level++ {
  337. t.Run(level.String(), func(t *testing.T) {
  338. if isRaceTest && level >= SpeedBestCompression {
  339. t.SkipNow()
  340. }
  341. for _, windowSize := range testWindowSizes {
  342. t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
  343. e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
  344. if err != nil {
  345. t.Fatal(err)
  346. }
  347. defer e.Close()
  348. start := time.Now()
  349. dst := e.EncodeAll(in, nil)
  350. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  351. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  352. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  353. decoded, err := dec.DecodeAll(dst, nil)
  354. if err != nil {
  355. t.Error(err, len(decoded))
  356. }
  357. if !bytes.Equal(decoded, in) {
  358. os.WriteFile("testdata/"+t.Name()+"-pi.txt.got", decoded, os.ModePerm)
  359. t.Fatal("Decoded does not match")
  360. }
  361. t.Log("Encoded content matched")
  362. })
  363. }
  364. })
  365. }
  366. }
  367. func TestWithEncoderPadding(t *testing.T) {
  368. n := 100
  369. if testing.Short() {
  370. n = 2
  371. }
  372. rng := rand.New(rand.NewSource(0x1337))
  373. d, err := NewReader(nil)
  374. if err != nil {
  375. t.Fatal(err)
  376. }
  377. defer d.Close()
  378. for i := 0; i < n; i++ {
  379. padding := (rng.Int() & 0xfff) + 1
  380. src := make([]byte, (rng.Int()&0xfffff)+1)
  381. for i := range src {
  382. src[i] = uint8(rng.Uint32()) & 7
  383. }
  384. e, err := NewWriter(nil, WithEncoderPadding(padding), WithEncoderCRC(rng.Uint32()&1 == 0))
  385. if err != nil {
  386. t.Fatal(err)
  387. }
  388. // Test the added padding is invisible.
  389. dst := e.EncodeAll(src, nil)
  390. if len(dst)%padding != 0 {
  391. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  392. }
  393. got, err := d.DecodeAll(dst, nil)
  394. if err != nil {
  395. t.Fatal(err)
  396. }
  397. if !bytes.Equal(src, got) {
  398. t.Fatal("output mismatch")
  399. }
  400. // Test when we supply data as well.
  401. dst = e.EncodeAll(src, make([]byte, rng.Int()&255))
  402. if len(dst)%padding != 0 {
  403. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  404. }
  405. // Test using the writer.
  406. var buf bytes.Buffer
  407. e.ResetContentSize(&buf, int64(len(src)))
  408. _, err = io.Copy(e, bytes.NewBuffer(src))
  409. if err != nil {
  410. t.Fatal(err)
  411. }
  412. err = e.Close()
  413. if err != nil {
  414. t.Fatal(err)
  415. }
  416. dst = buf.Bytes()
  417. if len(dst)%padding != 0 {
  418. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  419. }
  420. // Test the added padding is invisible.
  421. got, err = d.DecodeAll(dst, nil)
  422. if err != nil {
  423. t.Fatal(err)
  424. }
  425. if !bytes.Equal(src, got) {
  426. t.Fatal("output mismatch")
  427. }
  428. // Try after reset
  429. buf.Reset()
  430. e.Reset(&buf)
  431. _, err = io.Copy(e, bytes.NewBuffer(src))
  432. if err != nil {
  433. t.Fatal(err)
  434. }
  435. err = e.Close()
  436. if err != nil {
  437. t.Fatal(err)
  438. }
  439. dst = buf.Bytes()
  440. if len(dst)%padding != 0 {
  441. t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
  442. }
  443. // Test the added padding is invisible.
  444. got, err = d.DecodeAll(dst, nil)
  445. if err != nil {
  446. t.Fatal(err)
  447. }
  448. if !bytes.Equal(src, got) {
  449. t.Fatal("output mismatch")
  450. }
  451. }
  452. }
  453. func TestEncoder_EncoderXML(t *testing.T) {
  454. testEncoderRoundtrip(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
  455. testEncoderRoundtripWriter(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
  456. }
  457. func TestEncoder_EncoderTwain(t *testing.T) {
  458. testEncoderRoundtrip(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
  459. testEncoderRoundtripWriter(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
  460. }
  461. func TestEncoder_EncoderPi(t *testing.T) {
  462. testEncoderRoundtrip(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
  463. testEncoderRoundtripWriter(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
  464. }
  465. func TestEncoder_EncoderSilesia(t *testing.T) {
  466. testEncoderRoundtrip(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
  467. testEncoderRoundtripWriter(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
  468. }
  469. func TestEncoder_EncoderSimple(t *testing.T) {
  470. testEncoderRoundtrip(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
  471. testEncoderRoundtripWriter(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
  472. }
  473. func TestEncoder_EncoderHTML(t *testing.T) {
  474. testEncoderRoundtrip(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
  475. testEncoderRoundtripWriter(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
  476. }
  477. func TestEncoder_EncoderEnwik9(t *testing.T) {
  478. //testEncoderRoundtrip(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12})
  479. //testEncoderRoundtripWriter(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12})
  480. }
  481. // test roundtrip using io.ReaderFrom interface.
  482. func testEncoderRoundtrip(t *testing.T, file string, wantCRC []byte) {
  483. for _, opt := range getEncOpts(1) {
  484. t.Run(opt.name, func(t *testing.T) {
  485. opt := opt
  486. //t.Parallel()
  487. f, err := os.Open(file)
  488. if err != nil {
  489. if os.IsNotExist(err) {
  490. t.Skip("No input file:", file)
  491. return
  492. }
  493. t.Fatal(err)
  494. }
  495. defer f.Close()
  496. if stat, err := f.Stat(); testing.Short() && err == nil {
  497. if stat.Size() > 10000 {
  498. t.SkipNow()
  499. }
  500. }
  501. input := io.Reader(f)
  502. if strings.HasSuffix(file, ".zst") {
  503. dec, err := NewReader(f)
  504. if err != nil {
  505. t.Fatal(err)
  506. }
  507. input = dec
  508. defer dec.Close()
  509. }
  510. pr, pw := io.Pipe()
  511. dec2, err := NewReader(pr)
  512. if err != nil {
  513. t.Fatal(err)
  514. }
  515. defer dec2.Close()
  516. enc, err := NewWriter(pw, opt.o...)
  517. if err != nil {
  518. t.Fatal(err)
  519. }
  520. defer enc.Close()
  521. var wantSize int64
  522. start := time.Now()
  523. go func() {
  524. n, err := enc.ReadFrom(input)
  525. if err != nil {
  526. t.Error(err)
  527. return
  528. }
  529. wantSize = n
  530. err = enc.Close()
  531. if err != nil {
  532. t.Error(err)
  533. return
  534. }
  535. pw.Close()
  536. }()
  537. var gotSize int64
  538. // Check CRC
  539. d := xxhash.New()
  540. if true {
  541. gotSize, err = io.Copy(d, dec2)
  542. } else {
  543. fout, err := os.Create(file + ".got")
  544. if err != nil {
  545. t.Fatal(err)
  546. }
  547. gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
  548. if err != nil {
  549. t.Fatal(err)
  550. }
  551. }
  552. if wantSize != gotSize {
  553. t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
  554. }
  555. if err != nil {
  556. t.Fatal(err)
  557. }
  558. if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
  559. t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
  560. } else if len(wantCRC) != 8 {
  561. t.Logf("Unable to verify CRC: %#v", gotCRC)
  562. } else {
  563. t.Logf("CRC Verified: %#v", gotCRC)
  564. }
  565. t.Log("Encoder len", wantSize)
  566. mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  567. t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
  568. })
  569. }
  570. }
  571. type writerWrapper struct {
  572. w io.Writer
  573. }
  574. func (w writerWrapper) Write(p []byte) (n int, err error) {
  575. return w.w.Write(p)
  576. }
  577. // test roundtrip using plain io.Writer interface.
  578. func testEncoderRoundtripWriter(t *testing.T, file string, wantCRC []byte) {
  579. f, err := os.Open(file)
  580. if err != nil {
  581. if os.IsNotExist(err) {
  582. t.Skip("No input file:", file)
  583. return
  584. }
  585. t.Fatal(err)
  586. }
  587. defer f.Close()
  588. if stat, err := f.Stat(); testing.Short() && err == nil {
  589. if stat.Size() > 10000 {
  590. t.SkipNow()
  591. }
  592. }
  593. input := io.Reader(f)
  594. if strings.HasSuffix(file, ".zst") {
  595. dec, err := NewReader(f)
  596. if err != nil {
  597. t.Fatal(err)
  598. }
  599. input = dec
  600. defer dec.Close()
  601. }
  602. pr, pw := io.Pipe()
  603. dec2, err := NewReader(pr)
  604. if err != nil {
  605. t.Fatal(err)
  606. }
  607. defer dec2.Close()
  608. enc, err := NewWriter(pw, WithEncoderCRC(true))
  609. if err != nil {
  610. t.Fatal(err)
  611. }
  612. defer enc.Close()
  613. encW := writerWrapper{w: enc}
  614. var wantSize int64
  615. start := time.Now()
  616. go func() {
  617. n, err := io.CopyBuffer(encW, input, make([]byte, 1337))
  618. if err != nil {
  619. t.Error(err)
  620. return
  621. }
  622. wantSize = n
  623. err = enc.Close()
  624. if err != nil {
  625. t.Error(err)
  626. return
  627. }
  628. pw.Close()
  629. }()
  630. var gotSize int64
  631. // Check CRC
  632. d := xxhash.New()
  633. if true {
  634. gotSize, err = io.Copy(d, dec2)
  635. } else {
  636. fout, err := os.Create(file + ".got")
  637. if err != nil {
  638. t.Fatal(err)
  639. }
  640. gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
  641. if err != nil {
  642. t.Fatal(err)
  643. }
  644. }
  645. if wantSize != gotSize {
  646. t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
  647. }
  648. if err != nil {
  649. t.Fatal(err)
  650. }
  651. if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
  652. t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
  653. } else if len(wantCRC) != 8 {
  654. t.Logf("Unable to verify CRC: %#v", gotCRC)
  655. } else {
  656. t.Logf("CRC Verified: %#v", gotCRC)
  657. }
  658. t.Log("Fast Encoder len", wantSize)
  659. mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  660. t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
  661. }
  662. func TestEncoder_EncodeAllSilesia(t *testing.T) {
  663. if testing.Short() {
  664. t.SkipNow()
  665. }
  666. in, err := os.ReadFile("testdata/silesia.tar")
  667. if err != nil {
  668. if os.IsNotExist(err) {
  669. t.Skip("Missing testdata/silesia.tar")
  670. return
  671. }
  672. t.Fatal(err)
  673. }
  674. var e Encoder
  675. start := time.Now()
  676. dst := e.EncodeAll(in, nil)
  677. t.Log("Fast Encoder len", len(in), "-> zstd len", len(dst))
  678. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  679. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  680. dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
  681. if err != nil {
  682. t.Fatal(err)
  683. }
  684. defer dec.Close()
  685. decoded, err := dec.DecodeAll(dst, nil)
  686. if err != nil {
  687. t.Error(err, len(decoded))
  688. }
  689. if !bytes.Equal(decoded, in) {
  690. os.WriteFile("testdata/"+t.Name()+"-silesia.tar.got", decoded, os.ModePerm)
  691. t.Fatal("Decoded does not match")
  692. }
  693. t.Log("Encoded content matched")
  694. }
  695. func TestEncoderReadFrom(t *testing.T) {
  696. buffer := bytes.NewBuffer(nil)
  697. encoder, err := NewWriter(buffer)
  698. if err != nil {
  699. t.Fatal(err)
  700. }
  701. if _, err := encoder.ReadFrom(strings.NewReader("0")); err != nil {
  702. t.Fatal(err)
  703. }
  704. if err := encoder.Close(); err != nil {
  705. t.Fatal(err)
  706. }
  707. dec, _ := NewReader(nil)
  708. toDec := buffer.Bytes()
  709. toDec = append(toDec, toDec...)
  710. decoded, err := dec.DecodeAll(toDec, nil)
  711. if err != nil {
  712. t.Fatal(err)
  713. }
  714. if !bytes.Equal([]byte("00"), decoded) {
  715. t.Logf("encoded: % x\n", buffer.Bytes())
  716. t.Fatalf("output mismatch, got %s", string(decoded))
  717. }
  718. dec.Close()
  719. }
  720. func TestInterleavedWriteReadFrom(t *testing.T) {
  721. var encoded bytes.Buffer
  722. enc, err := NewWriter(&encoded)
  723. if err != nil {
  724. t.Fatal(err)
  725. }
  726. if _, err := enc.Write([]byte("write1")); err != nil {
  727. t.Fatal(err)
  728. }
  729. if _, err := enc.Write([]byte("write2")); err != nil {
  730. t.Fatal(err)
  731. }
  732. if _, err := enc.ReadFrom(strings.NewReader("readfrom1")); err != nil {
  733. t.Fatal(err)
  734. }
  735. if _, err := enc.Write([]byte("write3")); err != nil {
  736. t.Fatal(err)
  737. }
  738. if err := enc.Close(); err != nil {
  739. t.Fatal(err)
  740. }
  741. dec, err := NewReader(&encoded)
  742. if err != nil {
  743. t.Fatal(err)
  744. }
  745. defer dec.Close()
  746. gotb, err := io.ReadAll(dec)
  747. if err != nil {
  748. t.Fatal(err)
  749. }
  750. got := string(gotb)
  751. if want := "write1write2readfrom1write3"; got != want {
  752. t.Errorf("got decoded %q, want %q", got, want)
  753. }
  754. }
  755. func TestEncoder_EncodeAllEmpty(t *testing.T) {
  756. if testing.Short() {
  757. t.SkipNow()
  758. }
  759. var in []byte
  760. for _, opt := range getEncOpts(1) {
  761. t.Run(opt.name, func(t *testing.T) {
  762. e, err := NewWriter(nil, opt.o...)
  763. if err != nil {
  764. t.Fatal(err)
  765. }
  766. defer e.Close()
  767. dst := e.EncodeAll(in, nil)
  768. t.Log("Block Encoder len", len(in), "-> zstd len", len(dst), dst)
  769. dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
  770. if err != nil {
  771. t.Fatal(err)
  772. }
  773. defer dec.Close()
  774. decoded, err := dec.DecodeAll(dst, nil)
  775. if err != nil {
  776. t.Error(err, len(decoded))
  777. }
  778. if !bytes.Equal(decoded, in) {
  779. t.Fatal("Decoded does not match")
  780. }
  781. // Test buffer writer.
  782. var buf bytes.Buffer
  783. e.Reset(&buf)
  784. err = e.Close()
  785. if err != nil {
  786. t.Fatal(err)
  787. }
  788. dst = buf.Bytes()
  789. t.Log("Buffer Encoder len", len(in), "-> zstd len", len(dst))
  790. decoded, err = dec.DecodeAll(dst, nil)
  791. if err != nil {
  792. t.Error(err, len(decoded))
  793. }
  794. if !bytes.Equal(decoded, in) {
  795. t.Fatal("Decoded does not match")
  796. }
  797. t.Log("Encoded content matched")
  798. })
  799. }
  800. }
  801. func TestEncoder_EncodeAllEnwik9(t *testing.T) {
  802. if testing.Short() {
  803. t.SkipNow()
  804. }
  805. file := "testdata/enwik9.zst"
  806. f, err := os.Open(file)
  807. if err != nil {
  808. if os.IsNotExist(err) {
  809. t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" +
  810. "compress it with 'zstd -15 -T0 enwik9' and place it in " + file)
  811. }
  812. }
  813. defer f.Close()
  814. dec, err := NewReader(f)
  815. if err != nil {
  816. t.Fatal(err)
  817. }
  818. defer dec.Close()
  819. in, err := io.ReadAll(dec)
  820. if err != nil {
  821. t.Fatal(err)
  822. }
  823. start := time.Now()
  824. e, err := NewWriter(nil)
  825. dst := e.EncodeAll(in, nil)
  826. if err != nil {
  827. t.Fatal(err)
  828. }
  829. t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
  830. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  831. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  832. decoded, err := dec.DecodeAll(dst, nil)
  833. if err != nil {
  834. t.Error(err, len(decoded))
  835. }
  836. if !bytes.Equal(decoded, in) {
  837. os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm)
  838. t.Fatal("Decoded does not match")
  839. }
  840. t.Log("Encoded content matched")
  841. }
  842. func TestEncoder_EncoderStreamEnwik9(t *testing.T) {
  843. if testing.Short() {
  844. t.SkipNow()
  845. }
  846. file := "testdata/enwik9.zst"
  847. f, err := os.Open(file)
  848. if err != nil {
  849. if os.IsNotExist(err) {
  850. t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" +
  851. "compress it with 'zstd -15 -T0 enwik9' and place it in " + file)
  852. }
  853. }
  854. defer f.Close()
  855. dec, err := NewReader(f)
  856. if err != nil {
  857. t.Fatal(err)
  858. }
  859. defer dec.Close()
  860. in, err := io.ReadAll(dec)
  861. if err != nil {
  862. t.Fatal(err)
  863. }
  864. start := time.Now()
  865. var dst bytes.Buffer
  866. e, err := NewWriter(&dst)
  867. if err != nil {
  868. t.Fatal(err)
  869. }
  870. _, err = io.Copy(e, bytes.NewBuffer(in))
  871. if err != nil {
  872. t.Fatal(err)
  873. }
  874. e.Close()
  875. t.Log("Full Encoder len", len(in), "-> zstd len", dst.Len())
  876. mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
  877. t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
  878. if false {
  879. decoded, err := dec.DecodeAll(dst.Bytes(), nil)
  880. if err != nil {
  881. t.Error(err, len(decoded))
  882. }
  883. if !bytes.Equal(decoded, in) {
  884. os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm)
  885. t.Fatal("Decoded does not match")
  886. }
  887. t.Log("Encoded content matched")
  888. }
  889. }
  890. func BenchmarkEncoder_EncodeAllXML(b *testing.B) {
  891. f, err := os.Open("testdata/xml.zst")
  892. if err != nil {
  893. b.Fatal(err)
  894. }
  895. defer f.Close()
  896. dec, err := NewReader(f)
  897. if err != nil {
  898. b.Fatal(err)
  899. }
  900. in, err := io.ReadAll(dec)
  901. if err != nil {
  902. b.Fatal(err)
  903. }
  904. dec.Close()
  905. enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
  906. dst := enc.EncodeAll(in, nil)
  907. wantSize := len(dst)
  908. //b.Log("Output size:", len(dst))
  909. b.ResetTimer()
  910. b.ReportAllocs()
  911. b.SetBytes(int64(len(in)))
  912. for i := 0; i < b.N; i++ {
  913. dst := enc.EncodeAll(in, dst[:0])
  914. if len(dst) != wantSize {
  915. b.Fatal(len(dst), "!=", wantSize)
  916. }
  917. }
  918. }
  919. func BenchmarkEncoder_EncodeAllSimple(b *testing.B) {
  920. in, err := os.ReadFile("testdata/z000028")
  921. if err != nil {
  922. b.Fatal(err)
  923. }
  924. for level := speedNotSet + 1; level < speedLast; level++ {
  925. b.Run(level.String(), func(b *testing.B) {
  926. enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
  927. if err != nil {
  928. b.Fatal(err)
  929. }
  930. defer enc.Close()
  931. dst := enc.EncodeAll(in, nil)
  932. wantSize := len(dst)
  933. b.ResetTimer()
  934. b.ReportAllocs()
  935. b.SetBytes(int64(len(in)))
  936. for i := 0; i < b.N; i++ {
  937. dst := enc.EncodeAll(in, dst[:0])
  938. if len(dst) != wantSize {
  939. b.Fatal(len(dst), "!=", wantSize)
  940. }
  941. }
  942. })
  943. }
  944. }
  945. func BenchmarkEncoder_EncodeAllSimple4K(b *testing.B) {
  946. in, err := os.ReadFile("testdata/z000028")
  947. if err != nil {
  948. b.Fatal(err)
  949. }
  950. in = in[:4096]
  951. for level := speedNotSet + 1; level < speedLast; level++ {
  952. b.Run(level.String(), func(b *testing.B) {
  953. enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
  954. if err != nil {
  955. b.Fatal(err)
  956. }
  957. defer enc.Close()
  958. dst := enc.EncodeAll(in, nil)
  959. wantSize := len(dst)
  960. b.ResetTimer()
  961. b.ReportAllocs()
  962. b.SetBytes(int64(len(in)))
  963. for i := 0; i < b.N; i++ {
  964. dst := enc.EncodeAll(in, dst[:0])
  965. if len(dst) != wantSize {
  966. b.Fatal(len(dst), "!=", wantSize)
  967. }
  968. }
  969. })
  970. }
  971. }
  972. func BenchmarkEncoder_EncodeAllHTML(b *testing.B) {
  973. in, err := os.ReadFile("../testdata/html.txt")
  974. if err != nil {
  975. b.Fatal(err)
  976. }
  977. enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
  978. dst := enc.EncodeAll(in, nil)
  979. wantSize := len(dst)
  980. b.ResetTimer()
  981. b.ReportAllocs()
  982. b.SetBytes(int64(len(in)))
  983. for i := 0; i < b.N; i++ {
  984. dst := enc.EncodeAll(in, dst[:0])
  985. if len(dst) != wantSize {
  986. b.Fatal(len(dst), "!=", wantSize)
  987. }
  988. }
  989. }
  990. func BenchmarkEncoder_EncodeAllTwain(b *testing.B) {
  991. in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
  992. if err != nil {
  993. b.Fatal(err)
  994. }
  995. enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
  996. dst := enc.EncodeAll(in, nil)
  997. wantSize := len(dst)
  998. b.ResetTimer()
  999. b.ReportAllocs()
  1000. b.SetBytes(int64(len(in)))
  1001. for i := 0; i < b.N; i++ {
  1002. dst := enc.EncodeAll(in, dst[:0])
  1003. if len(dst) != wantSize {
  1004. b.Fatal(len(dst), "!=", wantSize)
  1005. }
  1006. }
  1007. }
  1008. func BenchmarkEncoder_EncodeAllPi(b *testing.B) {
  1009. in, err := os.ReadFile("../testdata/pi.txt")
  1010. if err != nil {
  1011. b.Fatal(err)
  1012. }
  1013. enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
  1014. dst := enc.EncodeAll(in, nil)
  1015. wantSize := len(dst)
  1016. b.ResetTimer()
  1017. b.ReportAllocs()
  1018. b.SetBytes(int64(len(in)))
  1019. for i := 0; i < b.N; i++ {
  1020. dst := enc.EncodeAll(in, dst[:0])
  1021. if len(dst) != wantSize {
  1022. b.Fatal(len(dst), "!=", wantSize)
  1023. }
  1024. }
  1025. }
  1026. func BenchmarkRandom4KEncodeAllFastest(b *testing.B) {
  1027. rng := rand.New(rand.NewSource(1))
  1028. data := make([]byte, 4<<10)
  1029. for i := range data {
  1030. data[i] = uint8(rng.Intn(256))
  1031. }
  1032. enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(1))
  1033. defer enc.Close()
  1034. dst := enc.EncodeAll(data, nil)
  1035. wantSize := len(dst)
  1036. b.ResetTimer()
  1037. b.ReportAllocs()
  1038. b.SetBytes(int64(len(data)))
  1039. for i := 0; i < b.N; i++ {
  1040. dst := enc.EncodeAll(data, dst[:0])
  1041. if len(dst) != wantSize {
  1042. b.Fatal(len(dst), "!=", wantSize)
  1043. }
  1044. }
  1045. }
  1046. func BenchmarkRandom10MBEncodeAllFastest(b *testing.B) {
  1047. rng := rand.New(rand.NewSource(1))
  1048. data := make([]byte, 10<<20)
  1049. rng.Read(data)
  1050. enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(2))
  1051. defer enc.Close()
  1052. dst := enc.EncodeAll(data, nil)
  1053. wantSize := len(dst)
  1054. b.ResetTimer()
  1055. b.ReportAllocs()
  1056. b.SetBytes(int64(len(data)))
  1057. for i := 0; i < b.N; i++ {
  1058. dst := enc.EncodeAll(data, dst[:0])
  1059. if len(dst) != wantSize {
  1060. b.Fatal(len(dst), "!=", wantSize)
  1061. }
  1062. }
  1063. }
  1064. func BenchmarkRandom4KEncodeAllDefault(b *testing.B) {
  1065. rng := rand.New(rand.NewSource(1))
  1066. data := make([]byte, 4<<10)
  1067. rng.Read(data)
  1068. enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
  1069. defer enc.Close()
  1070. dst := enc.EncodeAll(data, nil)
  1071. wantSize := len(dst)
  1072. b.ResetTimer()
  1073. b.ReportAllocs()
  1074. b.SetBytes(int64(len(data)))
  1075. for i := 0; i < b.N; i++ {
  1076. dst := enc.EncodeAll(data, dst[:0])
  1077. if len(dst) != wantSize {
  1078. b.Fatal(len(dst), "!=", wantSize)
  1079. }
  1080. }
  1081. }
  1082. func BenchmarkRandomEncodeAllDefault(b *testing.B) {
  1083. rng := rand.New(rand.NewSource(1))
  1084. data := make([]byte, 10<<20)
  1085. rng.Read(data)
  1086. enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
  1087. defer enc.Close()
  1088. dst := enc.EncodeAll(data, nil)
  1089. wantSize := len(dst)
  1090. b.ResetTimer()
  1091. b.ReportAllocs()
  1092. b.SetBytes(int64(len(data)))
  1093. for i := 0; i < b.N; i++ {
  1094. dst := enc.EncodeAll(data, dst[:0])
  1095. if len(dst) != wantSize {
  1096. b.Fatal(len(dst), "!=", wantSize)
  1097. }
  1098. }
  1099. }
  1100. func BenchmarkRandom10MBEncoderFastest(b *testing.B) {
  1101. rng := rand.New(rand.NewSource(1))
  1102. data := make([]byte, 10<<20)
  1103. rng.Read(data)
  1104. wantSize := int64(len(data))
  1105. enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedFastest))
  1106. defer enc.Close()
  1107. n, err := io.Copy(enc, bytes.NewBuffer(data))
  1108. if err != nil {
  1109. b.Fatal(err)
  1110. }
  1111. if n != wantSize {
  1112. b.Fatal(n, "!=", wantSize)
  1113. }
  1114. b.ResetTimer()
  1115. b.ReportAllocs()
  1116. b.SetBytes(wantSize)
  1117. for i := 0; i < b.N; i++ {
  1118. enc.Reset(io.Discard)
  1119. n, err := io.Copy(enc, bytes.NewBuffer(data))
  1120. if err != nil {
  1121. b.Fatal(err)
  1122. }
  1123. if n != wantSize {
  1124. b.Fatal(n, "!=", wantSize)
  1125. }
  1126. }
  1127. }
  1128. func BenchmarkRandomEncoderDefault(b *testing.B) {
  1129. rng := rand.New(rand.NewSource(1))
  1130. data := make([]byte, 10<<20)
  1131. rng.Read(data)
  1132. wantSize := int64(len(data))
  1133. enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedDefault))
  1134. defer enc.Close()
  1135. n, err := io.Copy(enc, bytes.NewBuffer(data))
  1136. if err != nil {
  1137. b.Fatal(err)
  1138. }
  1139. if n != wantSize {
  1140. b.Fatal(n, "!=", wantSize)
  1141. }
  1142. b.ResetTimer()
  1143. b.ReportAllocs()
  1144. b.SetBytes(wantSize)
  1145. for i := 0; i < b.N; i++ {
  1146. enc.Reset(io.Discard)
  1147. n, err := io.Copy(enc, bytes.NewBuffer(data))
  1148. if err != nil {
  1149. b.Fatal(err)
  1150. }
  1151. if n != wantSize {
  1152. b.Fatal(n, "!=", wantSize)
  1153. }
  1154. }
  1155. }