123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288 |
- // Copyright 2018 Klaus Post. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
- package fse
- import (
- "bytes"
- "fmt"
- "os"
- "reflect"
- "strings"
- "testing"
- )
- type inputFn func() ([]byte, error)
- var testfiles = []struct {
- name string
- fn inputFn
- err error
- }{
- // gettysburg.txt is a small plain text.
- {name: "gettysburg", fn: func() ([]byte, error) { return os.ReadFile("../testdata/gettysburg.txt") }},
- // Digits is the digits of the irrational number e. Its decimal representation
- // does not repeat, but there are only 10 possible digits, so it should be
- // reasonably compressible.
- {name: "digits", fn: func() ([]byte, error) { return os.ReadFile("../testdata/e.txt") }},
- // Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
- {name: "twain", fn: func() ([]byte, error) { return os.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") }},
- // Random bytes
- {name: "random", fn: func() ([]byte, error) { return os.ReadFile("../testdata/sharnd.out") }, err: ErrIncompressible},
- // Low entropy
- {name: "low-ent", fn: func() ([]byte, error) { return []byte(strings.Repeat("1221", 10000)), nil }},
- // Super Low entropy
- {name: "superlow-ent", fn: func() ([]byte, error) { return []byte(strings.Repeat("1", 10000) + strings.Repeat("2", 500)), nil }},
- // Zero bytes
- {name: "zeroes", fn: func() ([]byte, error) { return make([]byte, 10000), nil }, err: ErrUseRLE},
- {name: "crash1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash1.bin") }, err: ErrIncompressible},
- {name: "crash2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash2.bin") }, err: ErrIncompressible},
- {name: "crash3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash3.bin") }, err: ErrIncompressible},
- {name: "endzerobits", fn: func() ([]byte, error) { return os.ReadFile("../testdata/endzerobits.bin") }, err: nil},
- {name: "endnonzero", fn: func() ([]byte, error) { return os.ReadFile("../testdata/endnonzero.bin") }, err: ErrIncompressible},
- {name: "case1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case1.bin") }, err: ErrIncompressible},
- {name: "case2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case2.bin") }, err: ErrIncompressible},
- {name: "case3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/case3.bin") }, err: ErrIncompressible},
- {name: "pngdata.001", fn: func() ([]byte, error) { return os.ReadFile("../testdata/pngdata.bin") }, err: nil},
- {name: "normcount2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/normcount2.bin") }, err: nil},
- }
- var decTestfiles = []struct {
- name string
- fn inputFn
- err string
- }{
- // gettysburg.txt is a small plain text.
- {name: "hang1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang1.bin") }, err: "corruption detected (bitCount 252 > 32)"},
- {name: "hang2", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang2.bin") }, err: "newState (0) == oldState (0) and no bits"},
- {name: "hang3", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-hang3.bin") }, err: "maxSymbolValue too small"},
- {name: "symlen1", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-symlen1.bin") }, err: "symbolLen (257) too big"},
- {name: "crash4", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash4.bin") }, err: "symbolLen (1) too small"},
- {name: "crash5", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash5.bin") }, err: "symbolLen (1) too small"},
- {name: "crash6", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-crash6.bin") }, err: "newState (32768) outside table size (32768)"},
- {name: "something", fn: func() ([]byte, error) { return os.ReadFile("../testdata/fse-artifact3.bin") }, err: "corrupt stream, did not find end of stream"},
- }
- func TestCompress(t *testing.T) {
- for _, test := range testfiles {
- t.Run(test.name, func(t *testing.T) {
- var s Scratch
- buf0, err := test.fn()
- if err != nil {
- t.Fatal(err)
- }
- b, err := Compress(buf0, &s)
- if err != test.err {
- t.Errorf("want error %v (%T), got %v (%T)", test.err, test.err, err, err)
- }
- if b == nil {
- t.Log(test.name + ": not compressible")
- return
- }
- t.Logf("%s: %d -> %d bytes (%.2f:1)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
- })
- }
- }
- func ExampleCompress() {
- // Read data
- data, err := os.ReadFile("../testdata/e.txt")
- if err != nil {
- panic(err)
- }
- // Create re-usable scratch buffer.
- var s Scratch
- b, err := Compress(data, &s)
- if err != nil {
- panic(err)
- }
- fmt.Printf("Compress: %d -> %d bytes (%.2f:1)\n", len(data), len(b), float64(len(data))/float64(len(b)))
- // OUTPUT: Compress: 100003 -> 41564 bytes (2.41:1)
- }
- func TestDecompress(t *testing.T) {
- for _, test := range decTestfiles {
- t.Run(test.name, func(t *testing.T) {
- var s Scratch
- s.DecompressLimit = 1 << 20
- buf0, err := test.fn()
- if err != nil {
- t.Fatal(err)
- }
- b, err := Decompress(buf0, &s)
- if fmt.Sprint(err) != test.err {
- t.Errorf("want error %q, got %q (%T)", test.err, err, err)
- return
- }
- if err != nil {
- return
- }
- if len(b) == 0 {
- t.Error(test.name + ": no output")
- return
- }
- t.Logf("%s: %d -> %d bytes (1:%.2f)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
- })
- }
- }
- func ExampleDecompress() {
- // Read data
- data, err := os.ReadFile("../testdata/e.txt")
- if err != nil {
- panic(err)
- }
- // Create re-usable scratch buffer.
- var s Scratch
- b, err := Compress(data, &s)
- if err != nil {
- panic(err)
- }
- // Since we use the output of compression, it cannot be used as output for decompression.
- s.Out = make([]byte, 0, len(data))
- d, err := Decompress(b, &s)
- if err != nil {
- panic(err)
- }
- fmt.Printf("Input matches: %t\n", bytes.Equal(d, data))
- // OUTPUT: Input matches: true
- }
- func BenchmarkCompress(b *testing.B) {
- for _, tt := range testfiles {
- test := tt
- b.Run(test.name, func(b *testing.B) {
- var s Scratch
- buf0, err := test.fn()
- if err != nil {
- b.Fatal(err)
- }
- _, err = Compress(buf0, &s)
- if err != test.err {
- b.Fatal("unexpected error:", err)
- }
- if err != nil {
- b.Skip("skipping benchmark: ", err)
- return
- }
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(buf0)))
- for i := 0; i < b.N; i++ {
- _, _ = Compress(buf0, &s)
- }
- })
- }
- }
- func TestReadNCount(t *testing.T) {
- for i := range testfiles {
- var s Scratch
- test := testfiles[i]
- t.Run(test.name, func(t *testing.T) {
- name := test.name + ": "
- buf0, err := testfiles[i].fn()
- if err != nil {
- t.Fatal(err)
- }
- b, err := Compress(buf0, &s)
- if err != test.err {
- t.Error(err)
- return
- }
- if err != nil {
- t.Skip(name + err.Error())
- return
- }
- t.Logf("%s: %d -> %d bytes (%.2f:1)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)))
- //t.Logf("%v", b)
- var s2 Scratch
- dc, err := Decompress(b, &s2)
- if err != nil {
- t.Fatal(err)
- }
- want := s.norm[:s.symbolLen]
- got := s2.norm[:s2.symbolLen]
- if !reflect.DeepEqual(want, got) {
- if s.actualTableLog != s2.actualTableLog {
- t.Errorf(name+"norm table, want tablelog: %d, got %d", s.actualTableLog, s2.actualTableLog)
- }
- if s.symbolLen != s2.symbolLen {
- t.Errorf(name+"norm table, want size: %d, got %d", s.symbolLen, s2.symbolLen)
- }
- t.Errorf(name + "norm table, got delta: \n")
- return
- }
- for i, dec := range s2.decTable {
- dd := dec.symbol
- ee := s.ct.tableSymbol[i]
- if dd != ee {
- t.Errorf("table symbol mismatch. idx %d, enc: %v, dec:%v", i, ee, dd)
- break
- }
- }
- if dc != nil {
- if len(buf0) != len(dc) {
- t.Errorf(name+"decompressed, want size: %d, got %d", len(buf0), len(dc))
- if len(buf0) > len(dc) {
- buf0 = buf0[:len(dc)]
- } else {
- dc = dc[:len(buf0)]
- }
- if !bytes.Equal(buf0, dc) {
- t.Errorf(name+"decompressed, got delta: (in) %v != (out) %v\n", buf0, dc)
- }
- return
- }
- if !bytes.Equal(buf0, dc) {
- t.Errorf(name + "decompressed, got delta.")
- }
- if !t.Failed() {
- t.Log("... roundtrip ok!")
- }
- }
- })
- }
- }
- func BenchmarkDecompress(b *testing.B) {
- for _, tt := range testfiles {
- test := tt
- b.Run(test.name, func(b *testing.B) {
- var s, s2 Scratch
- buf0, err := test.fn()
- if err != nil {
- b.Fatal(err)
- }
- out, err := Compress(buf0, &s)
- if err != test.err {
- b.Fatal(err)
- }
- if err != nil {
- b.Skip(test.name + ": " + err.Error())
- return
- }
- got, err := Decompress(out, &s2)
- if err != nil {
- b.Fatal(err)
- }
- if !bytes.Equal(buf0, got) {
- b.Fatal("output mismatch")
- }
- b.ResetTimer()
- b.ReportAllocs()
- b.SetBytes(int64(len(buf0)))
- for i := 0; i < b.N; i++ {
- _, err = Decompress(out, &s2)
- if err != nil {
- b.Fatal(err)
- }
- }
- })
- }
- }
|