12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244 |
- // Copyright 2019+ Klaus Post. All rights reserved.
- // License information can be found in the LICENSE file.
- // Based on work by Yann Collet, released under BSD License.
- package zstd
- import (
- "bytes"
- "fmt"
- "io"
- "math/rand"
- "os"
- "runtime"
- "strings"
- "sync"
- "testing"
- "time"
- "github.com/klauspost/compress/zip"
- "github.com/klauspost/compress/zstd/internal/xxhash"
- )
- var testWindowSizes = []int{MinWindowSize, 1 << 16, 1 << 22, 1 << 24}
- type testEncOpt struct {
- name string
- o []EOption
- }
- func getEncOpts(cMax int) []testEncOpt {
- var o []testEncOpt
- for level := speedNotSet + 1; level < speedLast; level++ {
- if isRaceTest && level >= SpeedBestCompression {
- break
- }
- for conc := 1; conc <= 4; conc *= 2 {
- for _, wind := range testWindowSizes {
- addOpt := func(name string, options ...EOption) {
- opts := append([]EOption(nil), WithEncoderLevel(level), WithEncoderConcurrency(conc), WithWindowSize(wind))
- name = fmt.Sprintf("%s-c%d-w%dk-%s", level.String(), conc, wind/1024, name)
- o = append(o, testEncOpt{name: name, o: append(opts, options...)})
- }
- addOpt("default")
- if testing.Short() {
- break
- }
- addOpt("nocrc", WithEncoderCRC(false))
- addOpt("lowmem", WithLowerEncoderMem(true))
- addOpt("alllit", WithAllLitEntropyCompression(true))
- addOpt("nolit", WithNoEntropyCompression(true))
- addOpt("pad1k", WithEncoderPadding(1024))
- addOpt("zerof", WithZeroFrames(true))
- addOpt("1seg", WithSingleSegment(true))
- }
- if testing.Short() && conc == 2 {
- break
- }
- if conc >= cMax {
- break
- }
- }
- }
- return o
- }
- func TestEncoder_EncodeAllSimple(t *testing.T) {
- in, err := os.ReadFile("testdata/z000028")
- if err != nil {
- t.Fatal(err)
- }
- dec, err := NewReader(nil)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- in = append(in, in...)
- for _, opts := range getEncOpts(4) {
- t.Run(opts.name, func(t *testing.T) {
- runtime.GC()
- e, err := NewWriter(nil, opts.o...)
- if err != nil {
- t.Fatal(err)
- }
- defer e.Close()
- start := time.Now()
- dst := e.EncodeAll(in, nil)
- //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
- mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- os.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm)
- os.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm)
- t.Fatal("Decoded does not match")
- }
- //t.Log("Encoded content matched")
- })
- }
- }
- func TestEncoder_EncodeAllConcurrent(t *testing.T) {
- in, err := os.ReadFile("testdata/z000028")
- if err != nil {
- t.Fatal(err)
- }
- in = append(in, in...)
- // When running race no more than 8k goroutines allowed.
- n := 400 / runtime.GOMAXPROCS(0)
- if testing.Short() {
- n = 20 / runtime.GOMAXPROCS(0)
- }
- dec, err := NewReader(nil)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- for _, opts := range getEncOpts(2) {
- t.Run(opts.name, func(t *testing.T) {
- rng := rand.New(rand.NewSource(0x1337))
- e, err := NewWriter(nil, opts.o...)
- if err != nil {
- t.Fatal(err)
- }
- defer e.Close()
- var wg sync.WaitGroup
- wg.Add(n)
- for i := 0; i < n; i++ {
- in := in[rng.Int()&1023:]
- in = in[:rng.Intn(len(in))]
- go func() {
- defer wg.Done()
- dst := e.EncodeAll(in, nil)
- if len(dst) > e.MaxEncodedSize(len(in)) {
- t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in)))
- }
- //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- //os.WriteFile("testdata/"+t.Name()+"-z000028.got", decoded, os.ModePerm)
- //os.WriteFile("testdata/"+t.Name()+"-z000028.want", in, os.ModePerm)
- t.Error("Decoded does not match")
- return
- }
- }()
- }
- wg.Wait()
- //t.Log("Encoded content matched.", n, "goroutines")
- })
- }
- }
- func TestEncoder_EncodeAllEncodeXML(t *testing.T) {
- f, err := os.Open("testdata/xml.zst")
- if err != nil {
- t.Fatal(err)
- }
- defer f.Close()
- dec, err := NewReader(f)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- in, err := io.ReadAll(dec)
- if err != nil {
- t.Fatal(err)
- }
- if testing.Short() {
- in = in[:10000]
- }
- for level := speedNotSet + 1; level < speedLast; level++ {
- t.Run(level.String(), func(t *testing.T) {
- if isRaceTest && level >= SpeedBestCompression {
- t.SkipNow()
- }
- e, err := NewWriter(nil, WithEncoderLevel(level))
- if err != nil {
- t.Fatal(err)
- }
- defer e.Close()
- start := time.Now()
- dst := e.EncodeAll(in, nil)
- if len(dst) > e.MaxEncodedSize(len(in)) {
- t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(dst), e.MaxEncodedSize(len(in)))
- }
- //t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
- mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- os.WriteFile("testdata/"+t.Name()+"-xml.got", decoded, os.ModePerm)
- t.Error("Decoded does not match")
- return
- }
- //t.Log("Encoded content matched")
- })
- }
- }
- func TestEncoderRegression(t *testing.T) {
- defer timeout(4 * time.Minute)()
- data, err := os.ReadFile("testdata/comp-crashers.zip")
- if err != nil {
- t.Fatal(err)
- }
- // We can't close the decoder.
- dec, err := NewReader(nil)
- if err != nil {
- t.Error(err)
- return
- }
- defer dec.Close()
- for _, opts := range getEncOpts(2) {
- t.Run(opts.name, func(t *testing.T) {
- zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
- if err != nil {
- t.Fatal(err)
- }
- enc, err := NewWriter(
- nil,
- opts.o...,
- )
- if err != nil {
- t.Fatal(err)
- }
- defer enc.Close()
- for i, tt := range zr.File {
- if !strings.HasSuffix(t.Name(), "") {
- continue
- }
- if testing.Short() && i > 10 {
- break
- }
- t.Run(tt.Name, func(t *testing.T) {
- r, err := tt.Open()
- if err != nil {
- t.Error(err)
- return
- }
- in, err := io.ReadAll(r)
- if err != nil {
- t.Error(err)
- }
- encoded := enc.EncodeAll(in, nil)
- if len(encoded) > enc.MaxEncodedSize(len(in)) {
- t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in)))
- }
- // Usually too small...
- got, err := dec.DecodeAll(encoded, make([]byte, 0, len(in)))
- if err != nil {
- t.Logf("error: %v\nwant: %v\ngot: %v", err, len(in), len(got))
- t.Fatal(err)
- }
- // Use the Writer
- var dst bytes.Buffer
- enc.ResetContentSize(&dst, int64(len(in)))
- _, err = enc.Write(in)
- if err != nil {
- t.Error(err)
- }
- err = enc.Close()
- if err != nil {
- t.Error(err)
- }
- encoded = dst.Bytes()
- if len(encoded) > enc.MaxEncodedSize(len(in)) {
- t.Errorf("max encoded size for %v: got: %d, want max: %d", len(in), len(encoded), enc.MaxEncodedSize(len(in)))
- }
- got, err = dec.DecodeAll(encoded, make([]byte, 0, len(in)/2))
- if err != nil {
- t.Logf("error: %v\nwant: %v\ngot: %v", err, in, got)
- t.Error(err)
- }
- })
- }
- })
- }
- }
- func TestEncoder_EncodeAllTwain(t *testing.T) {
- in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
- if err != nil {
- t.Fatal(err)
- }
- testWindowSizes := testWindowSizes
- if testing.Short() {
- testWindowSizes = []int{1 << 20}
- }
- dec, err := NewReader(nil)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- for level := speedNotSet + 1; level < speedLast; level++ {
- t.Run(level.String(), func(t *testing.T) {
- if isRaceTest && level >= SpeedBestCompression {
- t.SkipNow()
- }
- for _, windowSize := range testWindowSizes {
- t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
- e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
- if err != nil {
- t.Fatal(err)
- }
- defer e.Close()
- start := time.Now()
- dst := e.EncodeAll(in, nil)
- t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
- mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- os.WriteFile("testdata/"+t.Name()+"-Mark.Twain-Tom.Sawyer.txt.got", decoded, os.ModePerm)
- t.Fatal("Decoded does not match")
- }
- t.Log("Encoded content matched")
- })
- }
- })
- }
- }
- func TestEncoder_EncodeAllPi(t *testing.T) {
- in, err := os.ReadFile("../testdata/pi.txt")
- if err != nil {
- t.Fatal(err)
- }
- testWindowSizes := testWindowSizes
- if testing.Short() {
- testWindowSizes = []int{1 << 20}
- }
- dec, err := NewReader(nil)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- for level := speedNotSet + 1; level < speedLast; level++ {
- t.Run(level.String(), func(t *testing.T) {
- if isRaceTest && level >= SpeedBestCompression {
- t.SkipNow()
- }
- for _, windowSize := range testWindowSizes {
- t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
- e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
- if err != nil {
- t.Fatal(err)
- }
- defer e.Close()
- start := time.Now()
- dst := e.EncodeAll(in, nil)
- t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
- mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- os.WriteFile("testdata/"+t.Name()+"-pi.txt.got", decoded, os.ModePerm)
- t.Fatal("Decoded does not match")
- }
- t.Log("Encoded content matched")
- })
- }
- })
- }
- }
- func TestWithEncoderPadding(t *testing.T) {
- n := 100
- if testing.Short() {
- n = 2
- }
- rng := rand.New(rand.NewSource(0x1337))
- d, err := NewReader(nil)
- if err != nil {
- t.Fatal(err)
- }
- defer d.Close()
- for i := 0; i < n; i++ {
- padding := (rng.Int() & 0xfff) + 1
- src := make([]byte, (rng.Int()&0xfffff)+1)
- for i := range src {
- src[i] = uint8(rng.Uint32()) & 7
- }
- e, err := NewWriter(nil, WithEncoderPadding(padding), WithEncoderCRC(rng.Uint32()&1 == 0))
- if err != nil {
- t.Fatal(err)
- }
- // Test the added padding is invisible.
- dst := e.EncodeAll(src, nil)
- if len(dst)%padding != 0 {
- t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
- }
- got, err := d.DecodeAll(dst, nil)
- if err != nil {
- t.Fatal(err)
- }
- if !bytes.Equal(src, got) {
- t.Fatal("output mismatch")
- }
- // Test when we supply data as well.
- dst = e.EncodeAll(src, make([]byte, rng.Int()&255))
- if len(dst)%padding != 0 {
- t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
- }
- // Test using the writer.
- var buf bytes.Buffer
- e.ResetContentSize(&buf, int64(len(src)))
- _, err = io.Copy(e, bytes.NewBuffer(src))
- if err != nil {
- t.Fatal(err)
- }
- err = e.Close()
- if err != nil {
- t.Fatal(err)
- }
- dst = buf.Bytes()
- if len(dst)%padding != 0 {
- t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
- }
- // Test the added padding is invisible.
- got, err = d.DecodeAll(dst, nil)
- if err != nil {
- t.Fatal(err)
- }
- if !bytes.Equal(src, got) {
- t.Fatal("output mismatch")
- }
- // Try after reset
- buf.Reset()
- e.Reset(&buf)
- _, err = io.Copy(e, bytes.NewBuffer(src))
- if err != nil {
- t.Fatal(err)
- }
- err = e.Close()
- if err != nil {
- t.Fatal(err)
- }
- dst = buf.Bytes()
- if len(dst)%padding != 0 {
- t.Fatalf("wanted size to be mutiple of %d, got size %d with remainder %d", padding, len(dst), len(dst)%padding)
- }
- // Test the added padding is invisible.
- got, err = d.DecodeAll(dst, nil)
- if err != nil {
- t.Fatal(err)
- }
- if !bytes.Equal(src, got) {
- t.Fatal("output mismatch")
- }
- }
- }
- func TestEncoder_EncoderXML(t *testing.T) {
- testEncoderRoundtrip(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
- testEncoderRoundtripWriter(t, "./testdata/xml.zst", []byte{0x56, 0x54, 0x69, 0x8e, 0x40, 0x50, 0x11, 0xe})
- }
- func TestEncoder_EncoderTwain(t *testing.T) {
- testEncoderRoundtrip(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
- testEncoderRoundtripWriter(t, "../testdata/Mark.Twain-Tom.Sawyer.txt", []byte{0x12, 0x1f, 0x12, 0x70, 0x79, 0x37, 0x1f, 0xc6})
- }
- func TestEncoder_EncoderPi(t *testing.T) {
- testEncoderRoundtrip(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
- testEncoderRoundtripWriter(t, "../testdata/pi.txt", []byte{0xe7, 0xe5, 0x25, 0x39, 0x92, 0xc7, 0x4a, 0xfb})
- }
- func TestEncoder_EncoderSilesia(t *testing.T) {
- testEncoderRoundtrip(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
- testEncoderRoundtripWriter(t, "testdata/silesia.tar", []byte{0xa5, 0x5b, 0x5e, 0xe, 0x5e, 0xea, 0x51, 0x6b})
- }
- func TestEncoder_EncoderSimple(t *testing.T) {
- testEncoderRoundtrip(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
- testEncoderRoundtripWriter(t, "testdata/z000028", []byte{0x8b, 0x2, 0x37, 0x70, 0x92, 0xb, 0x98, 0x95})
- }
- func TestEncoder_EncoderHTML(t *testing.T) {
- testEncoderRoundtrip(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
- testEncoderRoundtripWriter(t, "../testdata/html.txt", []byte{0x35, 0xa9, 0x5c, 0x37, 0x20, 0x9e, 0xc3, 0x37})
- }
- func TestEncoder_EncoderEnwik9(t *testing.T) {
- //testEncoderRoundtrip(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12})
- //testEncoderRoundtripWriter(t, "./testdata/enwik9.zst", []byte{0x28, 0xfa, 0xf4, 0x30, 0xca, 0x4b, 0x64, 0x12})
- }
- // test roundtrip using io.ReaderFrom interface.
- func testEncoderRoundtrip(t *testing.T, file string, wantCRC []byte) {
- for _, opt := range getEncOpts(1) {
- t.Run(opt.name, func(t *testing.T) {
- opt := opt
- //t.Parallel()
- f, err := os.Open(file)
- if err != nil {
- if os.IsNotExist(err) {
- t.Skip("No input file:", file)
- return
- }
- t.Fatal(err)
- }
- defer f.Close()
- if stat, err := f.Stat(); testing.Short() && err == nil {
- if stat.Size() > 10000 {
- t.SkipNow()
- }
- }
- input := io.Reader(f)
- if strings.HasSuffix(file, ".zst") {
- dec, err := NewReader(f)
- if err != nil {
- t.Fatal(err)
- }
- input = dec
- defer dec.Close()
- }
- pr, pw := io.Pipe()
- dec2, err := NewReader(pr)
- if err != nil {
- t.Fatal(err)
- }
- defer dec2.Close()
- enc, err := NewWriter(pw, opt.o...)
- if err != nil {
- t.Fatal(err)
- }
- defer enc.Close()
- var wantSize int64
- start := time.Now()
- go func() {
- n, err := enc.ReadFrom(input)
- if err != nil {
- t.Error(err)
- return
- }
- wantSize = n
- err = enc.Close()
- if err != nil {
- t.Error(err)
- return
- }
- pw.Close()
- }()
- var gotSize int64
- // Check CRC
- d := xxhash.New()
- if true {
- gotSize, err = io.Copy(d, dec2)
- } else {
- fout, err := os.Create(file + ".got")
- if err != nil {
- t.Fatal(err)
- }
- gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
- if err != nil {
- t.Fatal(err)
- }
- }
- if wantSize != gotSize {
- t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
- }
- if err != nil {
- t.Fatal(err)
- }
- if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
- t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
- } else if len(wantCRC) != 8 {
- t.Logf("Unable to verify CRC: %#v", gotCRC)
- } else {
- t.Logf("CRC Verified: %#v", gotCRC)
- }
- t.Log("Encoder len", wantSize)
- mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
- })
- }
- }
- type writerWrapper struct {
- w io.Writer
- }
- func (w writerWrapper) Write(p []byte) (n int, err error) {
- return w.w.Write(p)
- }
- // test roundtrip using plain io.Writer interface.
- func testEncoderRoundtripWriter(t *testing.T, file string, wantCRC []byte) {
- f, err := os.Open(file)
- if err != nil {
- if os.IsNotExist(err) {
- t.Skip("No input file:", file)
- return
- }
- t.Fatal(err)
- }
- defer f.Close()
- if stat, err := f.Stat(); testing.Short() && err == nil {
- if stat.Size() > 10000 {
- t.SkipNow()
- }
- }
- input := io.Reader(f)
- if strings.HasSuffix(file, ".zst") {
- dec, err := NewReader(f)
- if err != nil {
- t.Fatal(err)
- }
- input = dec
- defer dec.Close()
- }
- pr, pw := io.Pipe()
- dec2, err := NewReader(pr)
- if err != nil {
- t.Fatal(err)
- }
- defer dec2.Close()
- enc, err := NewWriter(pw, WithEncoderCRC(true))
- if err != nil {
- t.Fatal(err)
- }
- defer enc.Close()
- encW := writerWrapper{w: enc}
- var wantSize int64
- start := time.Now()
- go func() {
- n, err := io.CopyBuffer(encW, input, make([]byte, 1337))
- if err != nil {
- t.Error(err)
- return
- }
- wantSize = n
- err = enc.Close()
- if err != nil {
- t.Error(err)
- return
- }
- pw.Close()
- }()
- var gotSize int64
- // Check CRC
- d := xxhash.New()
- if true {
- gotSize, err = io.Copy(d, dec2)
- } else {
- fout, err := os.Create(file + ".got")
- if err != nil {
- t.Fatal(err)
- }
- gotSize, err = io.Copy(io.MultiWriter(fout, d), dec2)
- if err != nil {
- t.Fatal(err)
- }
- }
- if wantSize != gotSize {
- t.Errorf("want size (%d) != got size (%d)", wantSize, gotSize)
- }
- if err != nil {
- t.Fatal(err)
- }
- if gotCRC := d.Sum(nil); len(wantCRC) > 0 && !bytes.Equal(gotCRC, wantCRC) {
- t.Errorf("crc mismatch %#v (want) != %#v (got)", wantCRC, gotCRC)
- } else if len(wantCRC) != 8 {
- t.Logf("Unable to verify CRC: %#v", gotCRC)
- } else {
- t.Logf("CRC Verified: %#v", gotCRC)
- }
- t.Log("Fast Encoder len", wantSize)
- mbpersec := (float64(wantSize) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded+Decoded %d bytes with %.2f MB/s", wantSize, mbpersec)
- }
- func TestEncoder_EncodeAllSilesia(t *testing.T) {
- if testing.Short() {
- t.SkipNow()
- }
- in, err := os.ReadFile("testdata/silesia.tar")
- if err != nil {
- if os.IsNotExist(err) {
- t.Skip("Missing testdata/silesia.tar")
- return
- }
- t.Fatal(err)
- }
- var e Encoder
- start := time.Now()
- dst := e.EncodeAll(in, nil)
- t.Log("Fast Encoder len", len(in), "-> zstd len", len(dst))
- mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
- dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- os.WriteFile("testdata/"+t.Name()+"-silesia.tar.got", decoded, os.ModePerm)
- t.Fatal("Decoded does not match")
- }
- t.Log("Encoded content matched")
- }
- func TestEncoderReadFrom(t *testing.T) {
- buffer := bytes.NewBuffer(nil)
- encoder, err := NewWriter(buffer)
- if err != nil {
- t.Fatal(err)
- }
- if _, err := encoder.ReadFrom(strings.NewReader("0")); err != nil {
- t.Fatal(err)
- }
- if err := encoder.Close(); err != nil {
- t.Fatal(err)
- }
- dec, _ := NewReader(nil)
- toDec := buffer.Bytes()
- toDec = append(toDec, toDec...)
- decoded, err := dec.DecodeAll(toDec, nil)
- if err != nil {
- t.Fatal(err)
- }
- if !bytes.Equal([]byte("00"), decoded) {
- t.Logf("encoded: % x\n", buffer.Bytes())
- t.Fatalf("output mismatch, got %s", string(decoded))
- }
- dec.Close()
- }
- func TestInterleavedWriteReadFrom(t *testing.T) {
- var encoded bytes.Buffer
- enc, err := NewWriter(&encoded)
- if err != nil {
- t.Fatal(err)
- }
- if _, err := enc.Write([]byte("write1")); err != nil {
- t.Fatal(err)
- }
- if _, err := enc.Write([]byte("write2")); err != nil {
- t.Fatal(err)
- }
- if _, err := enc.ReadFrom(strings.NewReader("readfrom1")); err != nil {
- t.Fatal(err)
- }
- if _, err := enc.Write([]byte("write3")); err != nil {
- t.Fatal(err)
- }
- if err := enc.Close(); err != nil {
- t.Fatal(err)
- }
- dec, err := NewReader(&encoded)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- gotb, err := io.ReadAll(dec)
- if err != nil {
- t.Fatal(err)
- }
- got := string(gotb)
- if want := "write1write2readfrom1write3"; got != want {
- t.Errorf("got decoded %q, want %q", got, want)
- }
- }
- func TestEncoder_EncodeAllEmpty(t *testing.T) {
- if testing.Short() {
- t.SkipNow()
- }
- var in []byte
- for _, opt := range getEncOpts(1) {
- t.Run(opt.name, func(t *testing.T) {
- e, err := NewWriter(nil, opt.o...)
- if err != nil {
- t.Fatal(err)
- }
- defer e.Close()
- dst := e.EncodeAll(in, nil)
- t.Log("Block Encoder len", len(in), "-> zstd len", len(dst), dst)
- dec, err := NewReader(nil, WithDecoderMaxMemory(220<<20))
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- t.Fatal("Decoded does not match")
- }
- // Test buffer writer.
- var buf bytes.Buffer
- e.Reset(&buf)
- err = e.Close()
- if err != nil {
- t.Fatal(err)
- }
- dst = buf.Bytes()
- t.Log("Buffer Encoder len", len(in), "-> zstd len", len(dst))
- decoded, err = dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- t.Fatal("Decoded does not match")
- }
- t.Log("Encoded content matched")
- })
- }
- }
- func TestEncoder_EncodeAllEnwik9(t *testing.T) {
- if testing.Short() {
- t.SkipNow()
- }
- file := "testdata/enwik9.zst"
- f, err := os.Open(file)
- if err != nil {
- if os.IsNotExist(err) {
- t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" +
- "compress it with 'zstd -15 -T0 enwik9' and place it in " + file)
- }
- }
- defer f.Close()
- dec, err := NewReader(f)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- in, err := io.ReadAll(dec)
- if err != nil {
- t.Fatal(err)
- }
- start := time.Now()
- e, err := NewWriter(nil)
- dst := e.EncodeAll(in, nil)
- if err != nil {
- t.Fatal(err)
- }
- t.Log("Simple Encoder len", len(in), "-> zstd len", len(dst))
- mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
- decoded, err := dec.DecodeAll(dst, nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm)
- t.Fatal("Decoded does not match")
- }
- t.Log("Encoded content matched")
- }
- func TestEncoder_EncoderStreamEnwik9(t *testing.T) {
- if testing.Short() {
- t.SkipNow()
- }
- file := "testdata/enwik9.zst"
- f, err := os.Open(file)
- if err != nil {
- if os.IsNotExist(err) {
- t.Skip("To run extended tests, download http://mattmahoney.net/dc/enwik9.zip unzip it \n" +
- "compress it with 'zstd -15 -T0 enwik9' and place it in " + file)
- }
- }
- defer f.Close()
- dec, err := NewReader(f)
- if err != nil {
- t.Fatal(err)
- }
- defer dec.Close()
- in, err := io.ReadAll(dec)
- if err != nil {
- t.Fatal(err)
- }
- start := time.Now()
- var dst bytes.Buffer
- e, err := NewWriter(&dst)
- if err != nil {
- t.Fatal(err)
- }
- _, err = io.Copy(e, bytes.NewBuffer(in))
- if err != nil {
- t.Fatal(err)
- }
- e.Close()
- t.Log("Full Encoder len", len(in), "-> zstd len", dst.Len())
- mbpersec := (float64(len(in)) / (1024 * 1024)) / (float64(time.Since(start)) / (float64(time.Second)))
- t.Logf("Encoded %d bytes with %.2f MB/s", len(in), mbpersec)
- if false {
- decoded, err := dec.DecodeAll(dst.Bytes(), nil)
- if err != nil {
- t.Error(err, len(decoded))
- }
- if !bytes.Equal(decoded, in) {
- os.WriteFile("testdata/"+t.Name()+"-enwik9.got", decoded, os.ModePerm)
- t.Fatal("Decoded does not match")
- }
- t.Log("Encoded content matched")
- }
- }
- func BenchmarkEncoder_EncodeAllXML(b *testing.B) {
- f, err := os.Open("testdata/xml.zst")
- if err != nil {
- b.Fatal(err)
- }
- defer f.Close()
- dec, err := NewReader(f)
- if err != nil {
- b.Fatal(err)
- }
- in, err := io.ReadAll(dec)
- if err != nil {
- b.Fatal(err)
- }
- dec.Close()
- enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
- dst := enc.EncodeAll(in, nil)
- wantSize := len(dst)
- //b.Log("Output size:", len(dst))
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(in)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(in, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkEncoder_EncodeAllSimple(b *testing.B) {
- in, err := os.ReadFile("testdata/z000028")
- if err != nil {
- b.Fatal(err)
- }
- for level := speedNotSet + 1; level < speedLast; level++ {
- b.Run(level.String(), func(b *testing.B) {
- enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
- if err != nil {
- b.Fatal(err)
- }
- defer enc.Close()
- dst := enc.EncodeAll(in, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(in)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(in, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- })
- }
- }
- func BenchmarkEncoder_EncodeAllSimple4K(b *testing.B) {
- in, err := os.ReadFile("testdata/z000028")
- if err != nil {
- b.Fatal(err)
- }
- in = in[:4096]
- for level := speedNotSet + 1; level < speedLast; level++ {
- b.Run(level.String(), func(b *testing.B) {
- enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderLevel(level))
- if err != nil {
- b.Fatal(err)
- }
- defer enc.Close()
- dst := enc.EncodeAll(in, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(in)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(in, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- })
- }
- }
- func BenchmarkEncoder_EncodeAllHTML(b *testing.B) {
- in, err := os.ReadFile("../testdata/html.txt")
- if err != nil {
- b.Fatal(err)
- }
- enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
- dst := enc.EncodeAll(in, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(in)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(in, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkEncoder_EncodeAllTwain(b *testing.B) {
- in, err := os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt")
- if err != nil {
- b.Fatal(err)
- }
- enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
- dst := enc.EncodeAll(in, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(in)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(in, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkEncoder_EncodeAllPi(b *testing.B) {
- in, err := os.ReadFile("../testdata/pi.txt")
- if err != nil {
- b.Fatal(err)
- }
- enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
- dst := enc.EncodeAll(in, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(in)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(in, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkRandom4KEncodeAllFastest(b *testing.B) {
- rng := rand.New(rand.NewSource(1))
- data := make([]byte, 4<<10)
- for i := range data {
- data[i] = uint8(rng.Intn(256))
- }
- enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(1))
- defer enc.Close()
- dst := enc.EncodeAll(data, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(data)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(data, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkRandom10MBEncodeAllFastest(b *testing.B) {
- rng := rand.New(rand.NewSource(1))
- data := make([]byte, 10<<20)
- rng.Read(data)
- enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(2))
- defer enc.Close()
- dst := enc.EncodeAll(data, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(data)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(data, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkRandom4KEncodeAllDefault(b *testing.B) {
- rng := rand.New(rand.NewSource(1))
- data := make([]byte, 4<<10)
- rng.Read(data)
- enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
- defer enc.Close()
- dst := enc.EncodeAll(data, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(data)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(data, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkRandomEncodeAllDefault(b *testing.B) {
- rng := rand.New(rand.NewSource(1))
- data := make([]byte, 10<<20)
- rng.Read(data)
- enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
- defer enc.Close()
- dst := enc.EncodeAll(data, nil)
- wantSize := len(dst)
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(data)))
- for i := 0; i < b.N; i++ {
- dst := enc.EncodeAll(data, dst[:0])
- if len(dst) != wantSize {
- b.Fatal(len(dst), "!=", wantSize)
- }
- }
- }
- func BenchmarkRandom10MBEncoderFastest(b *testing.B) {
- rng := rand.New(rand.NewSource(1))
- data := make([]byte, 10<<20)
- rng.Read(data)
- wantSize := int64(len(data))
- enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedFastest))
- defer enc.Close()
- n, err := io.Copy(enc, bytes.NewBuffer(data))
- if err != nil {
- b.Fatal(err)
- }
- if n != wantSize {
- b.Fatal(n, "!=", wantSize)
- }
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(wantSize)
- for i := 0; i < b.N; i++ {
- enc.Reset(io.Discard)
- n, err := io.Copy(enc, bytes.NewBuffer(data))
- if err != nil {
- b.Fatal(err)
- }
- if n != wantSize {
- b.Fatal(n, "!=", wantSize)
- }
- }
- }
- func BenchmarkRandomEncoderDefault(b *testing.B) {
- rng := rand.New(rand.NewSource(1))
- data := make([]byte, 10<<20)
- rng.Read(data)
- wantSize := int64(len(data))
- enc, _ := NewWriter(io.Discard, WithEncoderLevel(SpeedDefault))
- defer enc.Close()
- n, err := io.Copy(enc, bytes.NewBuffer(data))
- if err != nil {
- b.Fatal(err)
- }
- if n != wantSize {
- b.Fatal(n, "!=", wantSize)
- }
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(wantSize)
- for i := 0; i < b.N; i++ {
- enc.Reset(io.Discard)
- n, err := io.Copy(enc, bytes.NewBuffer(data))
- if err != nil {
- b.Fatal(err)
- }
- if n != wantSize {
- b.Fatal(n, "!=", wantSize)
- }
- }
- }
|