command_ec_common.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. package shell
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/seaweedfs/seaweedfs/weed/glog"
  6. "github.com/seaweedfs/seaweedfs/weed/operation"
  7. "github.com/seaweedfs/seaweedfs/weed/pb"
  8. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  9. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  12. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  13. "golang.org/x/exp/slices"
  14. "google.golang.org/grpc"
  15. "math"
  16. )
  17. func moveMountedShardToEcNode(commandEnv *CommandEnv, existingLocation *EcNode, collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, destinationEcNode *EcNode, applyBalancing bool) (err error) {
  18. if !commandEnv.isLocked() {
  19. return fmt.Errorf("lock is lost")
  20. }
  21. copiedShardIds := []uint32{uint32(shardId)}
  22. if applyBalancing {
  23. existingServerAddress := pb.NewServerAddressFromDataNode(existingLocation.info)
  24. // ask destination node to copy shard and the ecx file from source node, and mount it
  25. copiedShardIds, err = oneServerCopyAndMountEcShardsFromSource(commandEnv.option.GrpcDialOption, destinationEcNode, []uint32{uint32(shardId)}, vid, collection, existingServerAddress)
  26. if err != nil {
  27. return err
  28. }
  29. // unmount the to be deleted shards
  30. err = unmountEcShards(commandEnv.option.GrpcDialOption, vid, existingServerAddress, copiedShardIds)
  31. if err != nil {
  32. return err
  33. }
  34. // ask source node to delete the shard, and maybe the ecx file
  35. err = sourceServerDeleteEcShards(commandEnv.option.GrpcDialOption, collection, vid, existingServerAddress, copiedShardIds)
  36. if err != nil {
  37. return err
  38. }
  39. fmt.Printf("moved ec shard %d.%d %s => %s\n", vid, shardId, existingLocation.info.Id, destinationEcNode.info.Id)
  40. }
  41. destinationEcNode.addEcVolumeShards(vid, collection, copiedShardIds)
  42. existingLocation.deleteEcVolumeShards(vid, copiedShardIds)
  43. return nil
  44. }
  45. func oneServerCopyAndMountEcShardsFromSource(grpcDialOption grpc.DialOption,
  46. targetServer *EcNode, shardIdsToCopy []uint32,
  47. volumeId needle.VolumeId, collection string, existingLocation pb.ServerAddress) (copiedShardIds []uint32, err error) {
  48. fmt.Printf("allocate %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  49. targetAddress := pb.NewServerAddressFromDataNode(targetServer.info)
  50. err = operation.WithVolumeServerClient(false, targetAddress, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  51. if targetAddress != existingLocation {
  52. fmt.Printf("copy %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id)
  53. _, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{
  54. VolumeId: uint32(volumeId),
  55. Collection: collection,
  56. ShardIds: shardIdsToCopy,
  57. CopyEcxFile: true,
  58. CopyEcjFile: true,
  59. CopyVifFile: true,
  60. SourceDataNode: string(existingLocation),
  61. })
  62. if copyErr != nil {
  63. return fmt.Errorf("copy %d.%v %s => %s : %v\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id, copyErr)
  64. }
  65. }
  66. fmt.Printf("mount %d.%v on %s\n", volumeId, shardIdsToCopy, targetServer.info.Id)
  67. _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
  68. VolumeId: uint32(volumeId),
  69. Collection: collection,
  70. ShardIds: shardIdsToCopy,
  71. })
  72. if mountErr != nil {
  73. return fmt.Errorf("mount %d.%v on %s : %v\n", volumeId, shardIdsToCopy, targetServer.info.Id, mountErr)
  74. }
  75. if targetAddress != existingLocation {
  76. copiedShardIds = shardIdsToCopy
  77. glog.V(0).Infof("%s ec volume %d deletes shards %+v", existingLocation, volumeId, copiedShardIds)
  78. }
  79. return nil
  80. })
  81. if err != nil {
  82. return
  83. }
  84. return
  85. }
  86. func eachDataNode(topo *master_pb.TopologyInfo, fn func(dc string, rack RackId, dn *master_pb.DataNodeInfo)) {
  87. for _, dc := range topo.DataCenterInfos {
  88. for _, rack := range dc.RackInfos {
  89. for _, dn := range rack.DataNodeInfos {
  90. fn(dc.Id, RackId(rack.Id), dn)
  91. }
  92. }
  93. }
  94. }
  95. func sortEcNodesByFreeslotsDescending(ecNodes []*EcNode) {
  96. slices.SortFunc(ecNodes, func(a, b *EcNode) int {
  97. return b.freeEcSlot - a.freeEcSlot
  98. })
  99. }
  100. func sortEcNodesByFreeslotsAscending(ecNodes []*EcNode) {
  101. slices.SortFunc(ecNodes, func(a, b *EcNode) int {
  102. return a.freeEcSlot - b.freeEcSlot
  103. })
  104. }
  105. type CandidateEcNode struct {
  106. ecNode *EcNode
  107. shardCount int
  108. }
  109. // if the index node changed the freeEcSlot, need to keep every EcNode still sorted
  110. func ensureSortedEcNodes(data []*CandidateEcNode, index int, lessThan func(i, j int) bool) {
  111. for i := index - 1; i >= 0; i-- {
  112. if lessThan(i+1, i) {
  113. swap(data, i, i+1)
  114. } else {
  115. break
  116. }
  117. }
  118. for i := index + 1; i < len(data); i++ {
  119. if lessThan(i, i-1) {
  120. swap(data, i, i-1)
  121. } else {
  122. break
  123. }
  124. }
  125. }
  126. func swap(data []*CandidateEcNode, i, j int) {
  127. t := data[i]
  128. data[i] = data[j]
  129. data[j] = t
  130. }
  131. func countShards(ecShardInfos []*master_pb.VolumeEcShardInformationMessage) (count int) {
  132. for _, ecShardInfo := range ecShardInfos {
  133. shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
  134. count += shardBits.ShardIdCount()
  135. }
  136. return
  137. }
  138. func countFreeShardSlots(dn *master_pb.DataNodeInfo, diskType types.DiskType) (count int) {
  139. if dn.DiskInfos == nil {
  140. return 0
  141. }
  142. diskInfo := dn.DiskInfos[string(diskType)]
  143. if diskInfo == nil {
  144. return 0
  145. }
  146. return int(diskInfo.MaxVolumeCount-diskInfo.VolumeCount)*erasure_coding.DataShardsCount - countShards(diskInfo.EcShardInfos)
  147. }
  148. type RackId string
  149. type EcNodeId string
  150. type EcNode struct {
  151. info *master_pb.DataNodeInfo
  152. dc string
  153. rack RackId
  154. freeEcSlot int
  155. }
  156. func (ecNode *EcNode) localShardIdCount(vid uint32) int {
  157. for _, diskInfo := range ecNode.info.DiskInfos {
  158. for _, ecShardInfo := range diskInfo.EcShardInfos {
  159. if vid == ecShardInfo.Id {
  160. shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
  161. return shardBits.ShardIdCount()
  162. }
  163. }
  164. }
  165. return 0
  166. }
  167. type EcRack struct {
  168. ecNodes map[EcNodeId]*EcNode
  169. freeEcSlot int
  170. }
  171. func collectEcNodes(commandEnv *CommandEnv, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int, err error) {
  172. // list all possible locations
  173. // collect topology information
  174. topologyInfo, _, err := collectTopologyInfo(commandEnv, 0)
  175. if err != nil {
  176. return
  177. }
  178. // find out all volume servers with one slot left.
  179. ecNodes, totalFreeEcSlots = collectEcVolumeServersByDc(topologyInfo, selectedDataCenter)
  180. sortEcNodesByFreeslotsDescending(ecNodes)
  181. return
  182. }
  183. func collectEcVolumeServersByDc(topo *master_pb.TopologyInfo, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int) {
  184. eachDataNode(topo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  185. if selectedDataCenter != "" && selectedDataCenter != dc {
  186. return
  187. }
  188. freeEcSlots := countFreeShardSlots(dn, types.HardDriveType)
  189. ecNodes = append(ecNodes, &EcNode{
  190. info: dn,
  191. dc: dc,
  192. rack: rack,
  193. freeEcSlot: int(freeEcSlots),
  194. })
  195. totalFreeEcSlots += freeEcSlots
  196. })
  197. return
  198. }
  199. func sourceServerDeleteEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation pb.ServerAddress, toBeDeletedShardIds []uint32) error {
  200. fmt.Printf("delete %d.%v from %s\n", volumeId, toBeDeletedShardIds, sourceLocation)
  201. return operation.WithVolumeServerClient(false, sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  202. _, deleteErr := volumeServerClient.VolumeEcShardsDelete(context.Background(), &volume_server_pb.VolumeEcShardsDeleteRequest{
  203. VolumeId: uint32(volumeId),
  204. Collection: collection,
  205. ShardIds: toBeDeletedShardIds,
  206. })
  207. return deleteErr
  208. })
  209. }
  210. func unmountEcShards(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceLocation pb.ServerAddress, toBeUnmountedhardIds []uint32) error {
  211. fmt.Printf("unmount %d.%v from %s\n", volumeId, toBeUnmountedhardIds, sourceLocation)
  212. return operation.WithVolumeServerClient(false, sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  213. _, deleteErr := volumeServerClient.VolumeEcShardsUnmount(context.Background(), &volume_server_pb.VolumeEcShardsUnmountRequest{
  214. VolumeId: uint32(volumeId),
  215. ShardIds: toBeUnmountedhardIds,
  216. })
  217. return deleteErr
  218. })
  219. }
  220. func mountEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation pb.ServerAddress, toBeMountedhardIds []uint32) error {
  221. fmt.Printf("mount %d.%v on %s\n", volumeId, toBeMountedhardIds, sourceLocation)
  222. return operation.WithVolumeServerClient(false, sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  223. _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{
  224. VolumeId: uint32(volumeId),
  225. Collection: collection,
  226. ShardIds: toBeMountedhardIds,
  227. })
  228. return mountErr
  229. })
  230. }
  231. func ceilDivide(total, n int) int {
  232. return int(math.Ceil(float64(total) / float64(n)))
  233. }
  234. func findEcVolumeShards(ecNode *EcNode, vid needle.VolumeId) erasure_coding.ShardBits {
  235. if diskInfo, found := ecNode.info.DiskInfos[string(types.HardDriveType)]; found {
  236. for _, shardInfo := range diskInfo.EcShardInfos {
  237. if needle.VolumeId(shardInfo.Id) == vid {
  238. return erasure_coding.ShardBits(shardInfo.EcIndexBits)
  239. }
  240. }
  241. }
  242. return 0
  243. }
  244. func (ecNode *EcNode) addEcVolumeShards(vid needle.VolumeId, collection string, shardIds []uint32) *EcNode {
  245. foundVolume := false
  246. diskInfo, found := ecNode.info.DiskInfos[string(types.HardDriveType)]
  247. if found {
  248. for _, shardInfo := range diskInfo.EcShardInfos {
  249. if needle.VolumeId(shardInfo.Id) == vid {
  250. oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits)
  251. newShardBits := oldShardBits
  252. for _, shardId := range shardIds {
  253. newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId))
  254. }
  255. shardInfo.EcIndexBits = uint32(newShardBits)
  256. ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount()
  257. foundVolume = true
  258. break
  259. }
  260. }
  261. } else {
  262. diskInfo = &master_pb.DiskInfo{
  263. Type: string(types.HardDriveType),
  264. }
  265. ecNode.info.DiskInfos[string(types.HardDriveType)] = diskInfo
  266. }
  267. if !foundVolume {
  268. var newShardBits erasure_coding.ShardBits
  269. for _, shardId := range shardIds {
  270. newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId))
  271. }
  272. diskInfo.EcShardInfos = append(diskInfo.EcShardInfos, &master_pb.VolumeEcShardInformationMessage{
  273. Id: uint32(vid),
  274. Collection: collection,
  275. EcIndexBits: uint32(newShardBits),
  276. DiskType: string(types.HardDriveType),
  277. })
  278. ecNode.freeEcSlot -= len(shardIds)
  279. }
  280. return ecNode
  281. }
  282. func (ecNode *EcNode) deleteEcVolumeShards(vid needle.VolumeId, shardIds []uint32) *EcNode {
  283. if diskInfo, found := ecNode.info.DiskInfos[string(types.HardDriveType)]; found {
  284. for _, shardInfo := range diskInfo.EcShardInfos {
  285. if needle.VolumeId(shardInfo.Id) == vid {
  286. oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits)
  287. newShardBits := oldShardBits
  288. for _, shardId := range shardIds {
  289. newShardBits = newShardBits.RemoveShardId(erasure_coding.ShardId(shardId))
  290. }
  291. shardInfo.EcIndexBits = uint32(newShardBits)
  292. ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount()
  293. }
  294. }
  295. }
  296. return ecNode
  297. }
  298. func groupByCount(data []*EcNode, identifierFn func(*EcNode) (id string, count int)) map[string]int {
  299. countMap := make(map[string]int)
  300. for _, d := range data {
  301. id, count := identifierFn(d)
  302. countMap[id] += count
  303. }
  304. return countMap
  305. }
  306. func groupBy(data []*EcNode, identifierFn func(*EcNode) (id string)) map[string][]*EcNode {
  307. groupMap := make(map[string][]*EcNode)
  308. for _, d := range data {
  309. id := identifierFn(d)
  310. groupMap[id] = append(groupMap[id], d)
  311. }
  312. return groupMap
  313. }