ec_test.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. package erasure_coding
  2. import (
  3. "bytes"
  4. "fmt"
  5. "math/rand"
  6. "os"
  7. "testing"
  8. "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
  9. "github.com/chrislusf/seaweedfs/weed/storage/types"
  10. "github.com/klauspost/reedsolomon"
  11. )
  12. const (
  13. largeBlockSize = 10000
  14. smallBlockSize = 100
  15. )
  16. func TestEncodingDecoding(t *testing.T) {
  17. bufferSize := 50
  18. baseFileName := "1"
  19. err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize)
  20. if err != nil {
  21. t.Logf("generateEcFiles: %v", err)
  22. }
  23. err = WriteSortedFileFromIdx(baseFileName, ".ecx")
  24. if err != nil {
  25. t.Logf("WriteSortedFileFromIdx: %v", err)
  26. }
  27. err = validateFiles(baseFileName)
  28. if err != nil {
  29. t.Logf("WriteSortedFileFromIdx: %v", err)
  30. }
  31. removeGeneratedFiles(baseFileName)
  32. }
  33. func validateFiles(baseFileName string) error {
  34. nm, err := readNeedleMap(baseFileName)
  35. if err != nil {
  36. return fmt.Errorf("readNeedleMap: %v", err)
  37. }
  38. datFile, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  39. if err != nil {
  40. return fmt.Errorf("failed to open dat file: %v", err)
  41. }
  42. defer datFile.Close()
  43. fi, err := datFile.Stat()
  44. if err != nil {
  45. return fmt.Errorf("failed to stat dat file: %v", err)
  46. }
  47. ecFiles, err := openEcFiles(baseFileName, true)
  48. defer closeEcFiles(ecFiles)
  49. err = nm.AscendingVisit(func(value needle_map.NeedleValue) error {
  50. return assertSame(datFile, fi.Size(), ecFiles, value.Offset, value.Size)
  51. })
  52. if err != nil {
  53. return fmt.Errorf("failed to check ec files: %v", err)
  54. }
  55. return nil
  56. }
  57. func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) error {
  58. data, err := readDatFile(datFile, offset, size)
  59. if err != nil {
  60. return fmt.Errorf("failed to read dat file: %v", err)
  61. }
  62. ecData, err := readEcFile(datSize, ecFiles, offset, size)
  63. if err != nil {
  64. return fmt.Errorf("failed to read ec file: %v", err)
  65. }
  66. if bytes.Compare(data, ecData) != 0 {
  67. return fmt.Errorf("unexpected data read")
  68. }
  69. return nil
  70. }
  71. func readDatFile(datFile *os.File, offset types.Offset, size uint32) ([]byte, error) {
  72. data := make([]byte, size)
  73. n, err := datFile.ReadAt(data, offset.ToAcutalOffset())
  74. if err != nil {
  75. return nil, fmt.Errorf("failed to ReadAt dat file: %v", err)
  76. }
  77. if n != int(size) {
  78. return nil, fmt.Errorf("unexpected read size %d, expected %d", n, size)
  79. }
  80. return data, nil
  81. }
  82. func readEcFile(datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) (data []byte, err error) {
  83. intervals := LocateData(largeBlockSize, smallBlockSize, datSize, offset.ToAcutalOffset(), size)
  84. for i, interval := range intervals {
  85. if d, e := readOneInterval(interval, ecFiles); e != nil {
  86. return nil, e
  87. } else {
  88. if i == 0 {
  89. data = d
  90. } else {
  91. data = append(data, d...)
  92. }
  93. }
  94. }
  95. return data, nil
  96. }
  97. func readOneInterval(interval Interval, ecFiles []*os.File) (data []byte, err error) {
  98. ecFileIndex, ecFileOffset := interval.ToShardIdAndOffset(largeBlockSize, smallBlockSize)
  99. data = make([]byte, interval.Size)
  100. err = readFromFile(ecFiles[ecFileIndex], data, ecFileOffset)
  101. { // do some ec testing
  102. ecData, err := readFromOtherEcFiles(ecFiles, int(ecFileIndex), ecFileOffset, interval.Size)
  103. if err != nil {
  104. return nil, fmt.Errorf("ec reconstruct error: %v", err)
  105. }
  106. if bytes.Compare(data, ecData) != 0 {
  107. return nil, fmt.Errorf("ec compare error")
  108. }
  109. }
  110. return
  111. }
  112. func readFromOtherEcFiles(ecFiles []*os.File, ecFileIndex int, ecFileOffset int64, size uint32) (data []byte, err error) {
  113. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  114. if err != nil {
  115. return nil, fmt.Errorf("failed to create encoder: %v", err)
  116. }
  117. bufs := make([][]byte, TotalShardsCount)
  118. for i := 0; i < DataShardsCount; {
  119. n := int(rand.Int31n(TotalShardsCount))
  120. if n == ecFileIndex || bufs[n] != nil {
  121. continue
  122. }
  123. bufs[n] = make([]byte, size)
  124. i++
  125. }
  126. for i, buf := range bufs {
  127. if buf == nil {
  128. continue
  129. }
  130. err = readFromFile(ecFiles[i], buf, ecFileOffset)
  131. if err != nil {
  132. return
  133. }
  134. }
  135. if err = enc.ReconstructData(bufs); err != nil {
  136. return nil, err
  137. }
  138. return bufs[ecFileIndex], nil
  139. }
  140. func readFromFile(file *os.File, data []byte, ecFileOffset int64) (err error) {
  141. _, err = file.ReadAt(data, ecFileOffset)
  142. return
  143. }
  144. func removeGeneratedFiles(baseFileName string) {
  145. for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
  146. fname := fmt.Sprintf("%s.ec%02d", baseFileName, i)
  147. os.Remove(fname)
  148. }
  149. os.Remove(baseFileName + ".ecx")
  150. }
  151. func TestLocateData(t *testing.T) {
  152. intervals := LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize, 1)
  153. if len(intervals) != 1 {
  154. t.Errorf("unexpected interval size %d", len(intervals))
  155. }
  156. if !intervals[0].sameAs(Interval{0, 0, 1, false, 1}) {
  157. t.Errorf("unexpected interval %+v", intervals[0])
  158. }
  159. intervals = LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1-DataShardsCount*largeBlockSize/2-100)
  160. fmt.Printf("%+v\n", intervals)
  161. }
  162. func (this Interval) sameAs(that Interval) bool {
  163. return this.IsLargeBlock == that.IsLargeBlock &&
  164. this.InnerBlockOffset == that.InnerBlockOffset &&
  165. this.BlockIndex == that.BlockIndex &&
  166. this.Size == that.Size
  167. }