ec_test.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. package erasure_coding
  2. import (
  3. "bytes"
  4. "fmt"
  5. "math/rand"
  6. "os"
  7. "testing"
  8. "github.com/klauspost/reedsolomon"
  9. "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
  10. "github.com/chrislusf/seaweedfs/weed/storage/types"
  11. )
  12. const (
  13. largeBlockSize = 10000
  14. smallBlockSize = 100
  15. )
  16. func TestEncodingDecoding(t *testing.T) {
  17. bufferSize := 50
  18. baseFileName := "1"
  19. err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize)
  20. if err != nil {
  21. t.Logf("generateEcFiles: %v", err)
  22. }
  23. err = WriteSortedFileFromIdx(baseFileName, ".ecx")
  24. if err != nil {
  25. t.Logf("WriteSortedFileFromIdx: %v", err)
  26. }
  27. err = validateFiles(baseFileName)
  28. if err != nil {
  29. t.Logf("WriteSortedFileFromIdx: %v", err)
  30. }
  31. removeGeneratedFiles(baseFileName)
  32. }
  33. func validateFiles(baseFileName string) error {
  34. nm, err := readNeedleMap(baseFileName)
  35. defer nm.Close()
  36. if err != nil {
  37. return fmt.Errorf("readNeedleMap: %v", err)
  38. }
  39. datFile, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  40. if err != nil {
  41. return fmt.Errorf("failed to open dat file: %v", err)
  42. }
  43. defer datFile.Close()
  44. fi, err := datFile.Stat()
  45. if err != nil {
  46. return fmt.Errorf("failed to stat dat file: %v", err)
  47. }
  48. ecFiles, err := openEcFiles(baseFileName, true)
  49. defer closeEcFiles(ecFiles)
  50. err = nm.AscendingVisit(func(value needle_map.NeedleValue) error {
  51. return assertSame(datFile, fi.Size(), ecFiles, value.Offset, value.Size)
  52. })
  53. if err != nil {
  54. return fmt.Errorf("failed to check ec files: %v", err)
  55. }
  56. return nil
  57. }
  58. func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size types.Size) error {
  59. data, err := readDatFile(datFile, offset, size)
  60. if err != nil {
  61. return fmt.Errorf("failed to read dat file: %v", err)
  62. }
  63. ecData, err := readEcFile(datSize, ecFiles, offset, size)
  64. if err != nil {
  65. return fmt.Errorf("failed to read ec file: %v", err)
  66. }
  67. if bytes.Compare(data, ecData) != 0 {
  68. return fmt.Errorf("unexpected data read")
  69. }
  70. return nil
  71. }
  72. func readDatFile(datFile *os.File, offset types.Offset, size types.Size) ([]byte, error) {
  73. data := make([]byte, size)
  74. n, err := datFile.ReadAt(data, offset.ToActualOffset())
  75. if err != nil {
  76. return nil, fmt.Errorf("failed to ReadAt dat file: %v", err)
  77. }
  78. if n != int(size) {
  79. return nil, fmt.Errorf("unexpected read size %d, expected %d", n, size)
  80. }
  81. return data, nil
  82. }
  83. func readEcFile(datSize int64, ecFiles []*os.File, offset types.Offset, size types.Size) (data []byte, err error) {
  84. intervals := LocateData(largeBlockSize, smallBlockSize, datSize, offset.ToActualOffset(), size)
  85. for i, interval := range intervals {
  86. if d, e := readOneInterval(interval, ecFiles); e != nil {
  87. return nil, e
  88. } else {
  89. if i == 0 {
  90. data = d
  91. } else {
  92. data = append(data, d...)
  93. }
  94. }
  95. }
  96. return data, nil
  97. }
  98. func readOneInterval(interval Interval, ecFiles []*os.File) (data []byte, err error) {
  99. ecFileIndex, ecFileOffset := interval.ToShardIdAndOffset(largeBlockSize, smallBlockSize)
  100. data = make([]byte, interval.Size)
  101. err = readFromFile(ecFiles[ecFileIndex], data, ecFileOffset)
  102. { // do some ec testing
  103. ecData, err := readFromOtherEcFiles(ecFiles, int(ecFileIndex), ecFileOffset, interval.Size)
  104. if err != nil {
  105. return nil, fmt.Errorf("ec reconstruct error: %v", err)
  106. }
  107. if bytes.Compare(data, ecData) != 0 {
  108. return nil, fmt.Errorf("ec compare error")
  109. }
  110. }
  111. return
  112. }
  113. func readFromOtherEcFiles(ecFiles []*os.File, ecFileIndex int, ecFileOffset int64, size types.Size) (data []byte, err error) {
  114. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  115. if err != nil {
  116. return nil, fmt.Errorf("failed to create encoder: %v", err)
  117. }
  118. bufs := make([][]byte, TotalShardsCount)
  119. for i := 0; i < DataShardsCount; {
  120. n := int(rand.Int31n(TotalShardsCount))
  121. if n == ecFileIndex || bufs[n] != nil {
  122. continue
  123. }
  124. bufs[n] = make([]byte, size)
  125. i++
  126. }
  127. for i, buf := range bufs {
  128. if buf == nil {
  129. continue
  130. }
  131. err = readFromFile(ecFiles[i], buf, ecFileOffset)
  132. if err != nil {
  133. return
  134. }
  135. }
  136. if err = enc.ReconstructData(bufs); err != nil {
  137. return nil, err
  138. }
  139. return bufs[ecFileIndex], nil
  140. }
  141. func readFromFile(file *os.File, data []byte, ecFileOffset int64) (err error) {
  142. _, err = file.ReadAt(data, ecFileOffset)
  143. return
  144. }
  145. func removeGeneratedFiles(baseFileName string) {
  146. for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
  147. fname := fmt.Sprintf("%s.ec%02d", baseFileName, i)
  148. os.Remove(fname)
  149. }
  150. os.Remove(baseFileName + ".ecx")
  151. }
  152. func TestLocateData(t *testing.T) {
  153. intervals := LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize, 1)
  154. if len(intervals) != 1 {
  155. t.Errorf("unexpected interval size %d", len(intervals))
  156. }
  157. if !intervals[0].sameAs(Interval{0, 0, 1, false, 1}) {
  158. t.Errorf("unexpected interval %+v", intervals[0])
  159. }
  160. intervals = LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1-DataShardsCount*largeBlockSize/2-100)
  161. fmt.Printf("%+v\n", intervals)
  162. }
  163. func (this Interval) sameAs(that Interval) bool {
  164. return this.IsLargeBlock == that.IsLargeBlock &&
  165. this.InnerBlockOffset == that.InnerBlockOffset &&
  166. this.BlockIndex == that.BlockIndex &&
  167. this.Size == that.Size
  168. }