volume_grpc_erasure_coding.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. package weed_server
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "math"
  8. "os"
  9. "path"
  10. "path/filepath"
  11. "strings"
  12. "github.com/chrislusf/seaweedfs/weed/glog"
  13. "github.com/chrislusf/seaweedfs/weed/operation"
  14. "github.com/chrislusf/seaweedfs/weed/pb"
  15. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  16. "github.com/chrislusf/seaweedfs/weed/storage"
  17. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  18. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  19. "github.com/chrislusf/seaweedfs/weed/storage/types"
  20. "github.com/chrislusf/seaweedfs/weed/util"
  21. )
  22. /*
  23. Steps to apply erasure coding to .dat .idx files
  24. 0. ensure the volume is readonly
  25. 1. client call VolumeEcShardsGenerate to generate the .ecx and .ec00 ~ .ec13 files
  26. 2. client ask master for possible servers to hold the ec files, at least 4 servers
  27. 3. client call VolumeEcShardsCopy on above target servers to copy ec files from the source server
  28. 4. target servers report the new ec files to the master
  29. 5. master stores vid -> [14]*DataNode
  30. 6. client checks master. If all 14 slices are ready, delete the original .idx, .idx files
  31. */
  32. // VolumeEcShardsGenerate generates the .ecx and .ec00 ~ .ec13 files
  33. func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_server_pb.VolumeEcShardsGenerateRequest) (*volume_server_pb.VolumeEcShardsGenerateResponse, error) {
  34. v := vs.store.GetVolume(needle.VolumeId(req.VolumeId))
  35. if v == nil {
  36. return nil, fmt.Errorf("volume %d not found", req.VolumeId)
  37. }
  38. baseFileName := v.FileName()
  39. if v.Collection != req.Collection {
  40. return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
  41. }
  42. // write .ecx file
  43. if err := erasure_coding.WriteSortedFileFromIdx(baseFileName, ".ecx"); err != nil {
  44. return nil, fmt.Errorf("WriteSortedFileFromIdx %s: %v", baseFileName, err)
  45. }
  46. // write .ec00 ~ .ec13 files
  47. if err := erasure_coding.WriteEcFiles(baseFileName); err != nil {
  48. return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err)
  49. }
  50. // write .vif files
  51. if err := pb.SaveVolumeInfo(baseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(v.Version())}); err != nil {
  52. return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err)
  53. }
  54. return &volume_server_pb.VolumeEcShardsGenerateResponse{}, nil
  55. }
  56. // VolumeEcShardsRebuild generates the any of the missing .ec00 ~ .ec13 files
  57. func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_server_pb.VolumeEcShardsRebuildRequest) (*volume_server_pb.VolumeEcShardsRebuildResponse, error) {
  58. baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId))
  59. var rebuiltShardIds []uint32
  60. for _, location := range vs.store.Locations {
  61. if util.FileExists(path.Join(location.Directory, baseFileName+".ecx")) {
  62. // write .ec00 ~ .ec13 files
  63. baseFileName = path.Join(location.Directory, baseFileName)
  64. if generatedShardIds, err := erasure_coding.RebuildEcFiles(baseFileName); err != nil {
  65. return nil, fmt.Errorf("RebuildEcFiles %s: %v", baseFileName, err)
  66. } else {
  67. rebuiltShardIds = generatedShardIds
  68. }
  69. if err := erasure_coding.RebuildEcxFile(baseFileName); err != nil {
  70. return nil, fmt.Errorf("RebuildEcxFile %s: %v", baseFileName, err)
  71. }
  72. break
  73. }
  74. }
  75. return &volume_server_pb.VolumeEcShardsRebuildResponse{
  76. RebuiltShardIds: rebuiltShardIds,
  77. }, nil
  78. }
  79. // VolumeEcShardsCopy copy the .ecx and some ec data slices
  80. func (vs *VolumeServer) VolumeEcShardsCopy(ctx context.Context, req *volume_server_pb.VolumeEcShardsCopyRequest) (*volume_server_pb.VolumeEcShardsCopyResponse, error) {
  81. location := vs.store.FindFreeLocation()
  82. if location == nil {
  83. return nil, fmt.Errorf("no space left")
  84. }
  85. baseFileName := storage.VolumeFileName(location.Directory, req.Collection, int(req.VolumeId))
  86. err := operation.WithVolumeServerClient(req.SourceDataNode, vs.grpcDialOption, func(ctx context.Context, client volume_server_pb.VolumeServerClient) error {
  87. // copy ec data slices
  88. for _, shardId := range req.ShardIds {
  89. if err := vs.doCopyFile(ctx, client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, erasure_coding.ToExt(int(shardId)), false, false); err != nil {
  90. return err
  91. }
  92. }
  93. if req.CopyEcxFile {
  94. // copy ecx file
  95. if err := vs.doCopyFile(ctx, client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".ecx", false, false); err != nil {
  96. return err
  97. }
  98. return nil
  99. }
  100. if req.CopyEcjFile {
  101. // copy ecj file
  102. if err := vs.doCopyFile(ctx, client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".ecj", true, true); err != nil {
  103. return err
  104. }
  105. }
  106. if req.CopyVifFile {
  107. // copy vif file
  108. if err := vs.doCopyFile(ctx, client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".vif", false, true); err != nil {
  109. return err
  110. }
  111. }
  112. return nil
  113. })
  114. if err != nil {
  115. return nil, fmt.Errorf("VolumeEcShardsCopy volume %d: %v", req.VolumeId, err)
  116. }
  117. return &volume_server_pb.VolumeEcShardsCopyResponse{}, nil
  118. }
  119. // VolumeEcShardsDelete local delete the .ecx and some ec data slices if not needed
  120. // the shard should not be mounted before calling this.
  121. func (vs *VolumeServer) VolumeEcShardsDelete(ctx context.Context, req *volume_server_pb.VolumeEcShardsDeleteRequest) (*volume_server_pb.VolumeEcShardsDeleteResponse, error) {
  122. baseFilename := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId))
  123. glog.V(0).Infof("ec volume %d shard delete %v", req.VolumeId, req.ShardIds)
  124. found := false
  125. for _, location := range vs.store.Locations {
  126. if util.FileExists(path.Join(location.Directory, baseFilename+".ecx")) {
  127. found = true
  128. baseFilename = path.Join(location.Directory, baseFilename)
  129. for _, shardId := range req.ShardIds {
  130. os.Remove(baseFilename + erasure_coding.ToExt(int(shardId)))
  131. }
  132. break
  133. }
  134. }
  135. if !found {
  136. return nil, nil
  137. }
  138. // check whether to delete the .ecx and .ecj file also
  139. hasEcxFile := false
  140. hasIdxFile := false
  141. existingShardCount := 0
  142. bName := filepath.Base(baseFilename)
  143. for _, location := range vs.store.Locations {
  144. fileInfos, err := ioutil.ReadDir(location.Directory)
  145. if err != nil {
  146. continue
  147. }
  148. for _, fileInfo := range fileInfos {
  149. if fileInfo.Name() == bName+".ecx" || fileInfo.Name() == bName+".ecj" {
  150. hasEcxFile = true
  151. continue
  152. }
  153. if fileInfo.Name() == bName+".idx" {
  154. hasIdxFile = true
  155. continue
  156. }
  157. if strings.HasPrefix(fileInfo.Name(), bName+".ec") {
  158. existingShardCount++
  159. }
  160. }
  161. }
  162. if hasEcxFile && existingShardCount == 0 {
  163. if err := os.Remove(baseFilename + ".ecx"); err != nil {
  164. return nil, err
  165. }
  166. if err := os.Remove(baseFilename + ".ecj"); err != nil {
  167. return nil, err
  168. }
  169. }
  170. if !hasIdxFile {
  171. // .vif is used for ec volumes and normal volumes
  172. os.Remove(baseFilename + ".vif")
  173. }
  174. return &volume_server_pb.VolumeEcShardsDeleteResponse{}, nil
  175. }
  176. func (vs *VolumeServer) VolumeEcShardsMount(ctx context.Context, req *volume_server_pb.VolumeEcShardsMountRequest) (*volume_server_pb.VolumeEcShardsMountResponse, error) {
  177. for _, shardId := range req.ShardIds {
  178. err := vs.store.MountEcShards(req.Collection, needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId))
  179. if err != nil {
  180. glog.Errorf("ec shard mount %v: %v", req, err)
  181. } else {
  182. glog.V(2).Infof("ec shard mount %v", req)
  183. }
  184. if err != nil {
  185. return nil, fmt.Errorf("mount %d.%d: %v", req.VolumeId, shardId, err)
  186. }
  187. }
  188. return &volume_server_pb.VolumeEcShardsMountResponse{}, nil
  189. }
  190. func (vs *VolumeServer) VolumeEcShardsUnmount(ctx context.Context, req *volume_server_pb.VolumeEcShardsUnmountRequest) (*volume_server_pb.VolumeEcShardsUnmountResponse, error) {
  191. for _, shardId := range req.ShardIds {
  192. err := vs.store.UnmountEcShards(needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId))
  193. if err != nil {
  194. glog.Errorf("ec shard unmount %v: %v", req, err)
  195. } else {
  196. glog.V(2).Infof("ec shard unmount %v", req)
  197. }
  198. if err != nil {
  199. return nil, fmt.Errorf("unmount %d.%d: %v", req.VolumeId, shardId, err)
  200. }
  201. }
  202. return &volume_server_pb.VolumeEcShardsUnmountResponse{}, nil
  203. }
  204. func (vs *VolumeServer) VolumeEcShardRead(req *volume_server_pb.VolumeEcShardReadRequest, stream volume_server_pb.VolumeServer_VolumeEcShardReadServer) error {
  205. ecVolume, found := vs.store.FindEcVolume(needle.VolumeId(req.VolumeId))
  206. if !found {
  207. return fmt.Errorf("VolumeEcShardRead not found ec volume id %d", req.VolumeId)
  208. }
  209. ecShard, found := ecVolume.FindEcVolumeShard(erasure_coding.ShardId(req.ShardId))
  210. if !found {
  211. return fmt.Errorf("not found ec shard %d.%d", req.VolumeId, req.ShardId)
  212. }
  213. if req.FileKey != 0 {
  214. _, size, _ := ecVolume.FindNeedleFromEcx(types.Uint64ToNeedleId(req.FileKey))
  215. if size == types.TombstoneFileSize {
  216. return stream.Send(&volume_server_pb.VolumeEcShardReadResponse{
  217. IsDeleted: true,
  218. })
  219. }
  220. }
  221. bufSize := req.Size
  222. if bufSize > BufferSizeLimit {
  223. bufSize = BufferSizeLimit
  224. }
  225. buffer := make([]byte, bufSize)
  226. startOffset, bytesToRead := req.Offset, req.Size
  227. for bytesToRead > 0 {
  228. // min of bytesToRead and bufSize
  229. bufferSize := bufSize
  230. if bufferSize > bytesToRead {
  231. bufferSize = bytesToRead
  232. }
  233. bytesread, err := ecShard.ReadAt(buffer[0:bufferSize], startOffset)
  234. // println("read", ecShard.FileName(), "startOffset", startOffset, bytesread, "bytes, with target", bufferSize)
  235. if bytesread > 0 {
  236. if int64(bytesread) > bytesToRead {
  237. bytesread = int(bytesToRead)
  238. }
  239. err = stream.Send(&volume_server_pb.VolumeEcShardReadResponse{
  240. Data: buffer[:bytesread],
  241. })
  242. if err != nil {
  243. // println("sending", bytesread, "bytes err", err.Error())
  244. return err
  245. }
  246. startOffset += int64(bytesread)
  247. bytesToRead -= int64(bytesread)
  248. }
  249. if err != nil {
  250. if err != io.EOF {
  251. return err
  252. }
  253. return nil
  254. }
  255. }
  256. return nil
  257. }
  258. func (vs *VolumeServer) VolumeEcBlobDelete(ctx context.Context, req *volume_server_pb.VolumeEcBlobDeleteRequest) (*volume_server_pb.VolumeEcBlobDeleteResponse, error) {
  259. resp := &volume_server_pb.VolumeEcBlobDeleteResponse{}
  260. for _, location := range vs.store.Locations {
  261. if localEcVolume, found := location.FindEcVolume(needle.VolumeId(req.VolumeId)); found {
  262. _, size, _, err := localEcVolume.LocateEcShardNeedle(types.NeedleId(req.FileKey), needle.Version(req.Version))
  263. if err != nil {
  264. return nil, fmt.Errorf("locate in local ec volume: %v", err)
  265. }
  266. if size == types.TombstoneFileSize {
  267. return resp, nil
  268. }
  269. err = localEcVolume.DeleteNeedleFromEcx(types.NeedleId(req.FileKey))
  270. if err != nil {
  271. return nil, err
  272. }
  273. break
  274. }
  275. }
  276. return resp, nil
  277. }
  278. // VolumeEcShardsToVolume generates the .idx, .dat files from .ecx, .ecj and .ec01 ~ .ec14 files
  279. func (vs *VolumeServer) VolumeEcShardsToVolume(ctx context.Context, req *volume_server_pb.VolumeEcShardsToVolumeRequest) (*volume_server_pb.VolumeEcShardsToVolumeResponse, error) {
  280. v, found := vs.store.FindEcVolume(needle.VolumeId(req.VolumeId))
  281. if !found {
  282. return nil, fmt.Errorf("ec volume %d not found", req.VolumeId)
  283. }
  284. baseFileName := v.FileName()
  285. if v.Collection != req.Collection {
  286. return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection)
  287. }
  288. // calculate .dat file size
  289. datFileSize, err := erasure_coding.FindDatFileSize(baseFileName)
  290. if err != nil {
  291. return nil, fmt.Errorf("FindDatFileSize %s: %v", baseFileName, err)
  292. }
  293. // write .dat file from .ec00 ~ .ec09 files
  294. if err := erasure_coding.WriteDatFile(baseFileName, datFileSize); err != nil {
  295. return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err)
  296. }
  297. // write .idx file from .ecx and .ecj files
  298. if err := erasure_coding.WriteIdxFileFromEcIndex(baseFileName); err != nil {
  299. return nil, fmt.Errorf("WriteIdxFileFromEcIndex %s: %v", baseFileName, err)
  300. }
  301. return &volume_server_pb.VolumeEcShardsToVolumeResponse{}, nil
  302. }