ec_encoder.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. package erasure_coding
  2. import (
  3. "fmt"
  4. "io"
  5. "os"
  6. "github.com/chrislusf/seaweedfs/weed/glog"
  7. "github.com/chrislusf/seaweedfs/weed/storage/idx"
  8. "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
  9. "github.com/chrislusf/seaweedfs/weed/storage/types"
  10. "github.com/chrislusf/seaweedfs/weed/util"
  11. "github.com/klauspost/reedsolomon"
  12. )
  13. const (
  14. DataShardsCount = 10
  15. ParityShardsCount = 4
  16. TotalShardsCount = DataShardsCount + ParityShardsCount
  17. ErasureCodingLargeBlockSize = 1024 * 1024 * 1024 // 1GB
  18. ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
  19. )
  20. // WriteSortedEcxFile generates .ecx file from existing .idx file
  21. // all keys are sorted in ascending order
  22. func WriteSortedEcxFile(baseFileName string) (e error) {
  23. cm, err := readCompactMap(baseFileName)
  24. if err != nil {
  25. return fmt.Errorf("readCompactMap: %v", err)
  26. }
  27. ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
  28. if err != nil {
  29. return fmt.Errorf("failed to open ecx file: %v", err)
  30. }
  31. defer ecxFile.Close()
  32. err = cm.AscendingVisit(func(value needle_map.NeedleValue) error {
  33. bytes := value.ToBytes()
  34. _, writeErr := ecxFile.Write(bytes)
  35. return writeErr
  36. })
  37. if err != nil {
  38. return fmt.Errorf("failed to visit ecx file: %v", err)
  39. }
  40. return nil
  41. }
  42. // WriteEcFiles generates .ec01 ~ .ec14 files
  43. func WriteEcFiles(baseFileName string) error {
  44. return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
  45. }
  46. func RebuildEcFiles(baseFileName string) ([]uint32, error) {
  47. return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
  48. }
  49. func ToExt(ecIndex int) string {
  50. return fmt.Sprintf(".ec%02d", ecIndex)
  51. }
  52. func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error {
  53. file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
  54. if err != nil {
  55. return fmt.Errorf("failed to open dat file: %v", err)
  56. }
  57. defer file.Close()
  58. fi, err := file.Stat()
  59. if err != nil {
  60. return fmt.Errorf("failed to stat dat file: %v", err)
  61. }
  62. err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
  63. if err != nil {
  64. return fmt.Errorf("encodeDatFile: %v", err)
  65. }
  66. return nil
  67. }
  68. func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) (generatedShardIds []uint32, err error) {
  69. shardHasData := make([]bool, TotalShardsCount)
  70. inputFiles := make([]*os.File, TotalShardsCount)
  71. outputFiles := make([]*os.File, TotalShardsCount)
  72. for shardId := 0; shardId < TotalShardsCount; shardId++ {
  73. shardFileName := baseFileName + ToExt(shardId)
  74. if util.FileExists(shardFileName) {
  75. shardHasData[shardId] = true
  76. inputFiles[shardId], err = os.OpenFile(shardFileName, os.O_RDONLY, 0)
  77. if err != nil {
  78. return nil, err
  79. }
  80. defer inputFiles[shardId].Close()
  81. } else {
  82. outputFiles[shardId], err = os.OpenFile(shardFileName, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644)
  83. if err != nil {
  84. return nil, err
  85. }
  86. defer outputFiles[shardId].Close()
  87. generatedShardIds = append(generatedShardIds, uint32(shardId))
  88. }
  89. }
  90. err = rebuildEcFiles(shardHasData, inputFiles, outputFiles)
  91. if err != nil {
  92. return nil, fmt.Errorf("rebuildEcFiles: %v", err)
  93. }
  94. return
  95. }
  96. func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
  97. bufferSize := int64(len(buffers[0]))
  98. batchCount := blockSize / bufferSize
  99. if blockSize%bufferSize != 0 {
  100. glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize)
  101. }
  102. for b := int64(0); b < batchCount; b++ {
  103. err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs)
  104. if err != nil {
  105. return err
  106. }
  107. }
  108. return nil
  109. }
  110. func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error) {
  111. for i := 0; i < TotalShardsCount; i++ {
  112. fname := baseFileName + ToExt(i)
  113. openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY
  114. if forRead {
  115. openOption = os.O_RDONLY
  116. }
  117. f, err := os.OpenFile(fname, openOption, 0644)
  118. if err != nil {
  119. return files, fmt.Errorf("failed to open file %s: %v", fname, err)
  120. }
  121. files = append(files, f)
  122. }
  123. return
  124. }
  125. func closeEcFiles(files []*os.File) {
  126. for _, f := range files {
  127. if f != nil {
  128. f.Close()
  129. }
  130. }
  131. }
  132. func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
  133. // read data into buffers
  134. for i := 0; i < DataShardsCount; i++ {
  135. n, err := file.ReadAt(buffers[i], startOffset+blockSize*int64(i))
  136. if err != nil {
  137. if err != io.EOF {
  138. return err
  139. }
  140. }
  141. if n < len(buffers[i]) {
  142. for t := len(buffers[i]) - 1; t >= n; t-- {
  143. buffers[i][t] = 0
  144. }
  145. }
  146. }
  147. err := enc.Encode(buffers)
  148. if err != nil {
  149. return err
  150. }
  151. for i := 0; i < TotalShardsCount; i++ {
  152. _, err := outputs[i].Write(buffers[i])
  153. if err != nil {
  154. return err
  155. }
  156. }
  157. return nil
  158. }
  159. func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
  160. var processedSize int64
  161. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  162. if err != nil {
  163. return fmt.Errorf("failed to create encoder: %v", err)
  164. }
  165. buffers := make([][]byte, TotalShardsCount)
  166. for i, _ := range buffers {
  167. buffers[i] = make([]byte, bufferSize)
  168. }
  169. outputs, err := openEcFiles(baseFileName, false)
  170. defer closeEcFiles(outputs)
  171. if err != nil {
  172. return fmt.Errorf("failed to open ec files %s: %v", baseFileName, err)
  173. }
  174. for remainingSize > largeBlockSize*DataShardsCount {
  175. err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
  176. if err != nil {
  177. return fmt.Errorf("failed to encode large chunk data: %v", err)
  178. }
  179. remainingSize -= largeBlockSize * DataShardsCount
  180. processedSize += largeBlockSize * DataShardsCount
  181. }
  182. for remainingSize > 0 {
  183. encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
  184. if err != nil {
  185. return fmt.Errorf("failed to encode small chunk data: %v", err)
  186. }
  187. remainingSize -= smallBlockSize * DataShardsCount
  188. processedSize += smallBlockSize * DataShardsCount
  189. }
  190. return nil
  191. }
  192. func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File) error {
  193. enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
  194. if err != nil {
  195. return fmt.Errorf("failed to create encoder: %v", err)
  196. }
  197. buffers := make([][]byte, TotalShardsCount)
  198. for i, _ := range buffers {
  199. if shardHasData[i] {
  200. buffers[i] = make([]byte, ErasureCodingSmallBlockSize)
  201. }
  202. }
  203. var startOffset int64
  204. var inputBufferDataSize int
  205. for {
  206. // read the input data from files
  207. for i := 0; i < TotalShardsCount; i++ {
  208. if shardHasData[i] {
  209. n, _ := inputFiles[i].ReadAt(buffers[i], startOffset)
  210. if n == 0 {
  211. return nil
  212. }
  213. if inputBufferDataSize == 0 {
  214. inputBufferDataSize = n
  215. }
  216. if inputBufferDataSize != n {
  217. return fmt.Errorf("ec shard size expected %d actual %d", inputBufferDataSize, n)
  218. }
  219. } else {
  220. buffers[i] = nil
  221. }
  222. }
  223. // encode the data
  224. err = enc.Reconstruct(buffers)
  225. if err != nil {
  226. return fmt.Errorf("reconstruct: %v", err)
  227. }
  228. // write the data to output files
  229. for i := 0; i < TotalShardsCount; i++ {
  230. if !shardHasData[i] {
  231. n, _ := outputFiles[i].WriteAt(buffers[i][:inputBufferDataSize], startOffset)
  232. if inputBufferDataSize != n {
  233. return fmt.Errorf("fail to write to %s", outputFiles[i].Name())
  234. }
  235. }
  236. }
  237. startOffset += int64(inputBufferDataSize)
  238. }
  239. }
  240. func readCompactMap(baseFileName string) (*needle_map.CompactMap, error) {
  241. indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644)
  242. if err != nil {
  243. return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err)
  244. }
  245. defer indexFile.Close()
  246. cm := needle_map.NewCompactMap()
  247. err = idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
  248. if !offset.IsZero() && size != types.TombstoneFileSize {
  249. cm.Set(key, offset, size)
  250. } else {
  251. cm.Delete(key)
  252. }
  253. return nil
  254. })
  255. return cm, err
  256. }