command_volume_fix_replication.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. package shell
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "math/rand"
  7. "sort"
  8. "github.com/chrislusf/seaweedfs/weed/operation"
  9. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  10. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  11. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  12. )
  13. func init() {
  14. Commands = append(Commands, &commandVolumeFixReplication{})
  15. }
  16. type commandVolumeFixReplication struct {
  17. }
  18. func (c *commandVolumeFixReplication) Name() string {
  19. return "volume.fix.replication"
  20. }
  21. func (c *commandVolumeFixReplication) Help() string {
  22. return `add replicas to volumes that are missing replicas
  23. This command file all under-replicated volumes, and find volume servers with free slots.
  24. If the free slots satisfy the replication requirement, the volume content is copied over and mounted.
  25. volume.fix.replication -n # do not take action
  26. volume.fix.replication # actually copying the volume files and mount the volume
  27. Note:
  28. * each time this will only add back one replica for one volume id. If there are multiple replicas
  29. are missing, e.g. multiple volume servers are new, you may need to run this multiple times.
  30. * do not run this too quick within seconds, since the new volume replica may take a few seconds
  31. to register itself to the master.
  32. `
  33. }
  34. func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  35. if err = commandEnv.confirmIsLocked(); err != nil {
  36. return
  37. }
  38. takeAction := true
  39. if len(args) > 0 && args[0] == "-n" {
  40. takeAction = false
  41. }
  42. var resp *master_pb.VolumeListResponse
  43. err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error {
  44. resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
  45. return err
  46. })
  47. if err != nil {
  48. return err
  49. }
  50. // find all volumes that needs replication
  51. // collect all data nodes
  52. replicatedVolumeLocations := make(map[uint32][]location)
  53. replicatedVolumeInfo := make(map[uint32]*master_pb.VolumeInformationMessage)
  54. var allLocations []location
  55. eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  56. loc := newLocation(dc, string(rack), dn)
  57. for _, v := range dn.VolumeInfos {
  58. if v.ReplicaPlacement > 0 {
  59. replicatedVolumeLocations[v.Id] = append(replicatedVolumeLocations[v.Id], loc)
  60. replicatedVolumeInfo[v.Id] = v
  61. }
  62. }
  63. allLocations = append(allLocations, loc)
  64. })
  65. // find all under replicated volumes
  66. underReplicatedVolumeLocations := make(map[uint32][]location)
  67. for vid, locations := range replicatedVolumeLocations {
  68. volumeInfo := replicatedVolumeInfo[vid]
  69. replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(volumeInfo.ReplicaPlacement))
  70. if replicaPlacement.GetCopyCount() > len(locations) {
  71. underReplicatedVolumeLocations[vid] = locations
  72. }
  73. }
  74. if len(underReplicatedVolumeLocations) == 0 {
  75. return fmt.Errorf("no under replicated volumes")
  76. }
  77. if len(allLocations) == 0 {
  78. return fmt.Errorf("no data nodes at all")
  79. }
  80. // find the most under populated data nodes
  81. keepDataNodesSorted(allLocations)
  82. for vid, locations := range underReplicatedVolumeLocations {
  83. volumeInfo := replicatedVolumeInfo[vid]
  84. replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(volumeInfo.ReplicaPlacement))
  85. foundNewLocation := false
  86. for _, dst := range allLocations {
  87. // check whether data nodes satisfy the constraints
  88. if dst.dataNode.FreeVolumeCount > 0 && satisfyReplicaPlacement(replicaPlacement, locations, dst) {
  89. // ask the volume server to replicate the volume
  90. sourceNodes := underReplicatedVolumeLocations[vid]
  91. sourceNode := sourceNodes[rand.Intn(len(sourceNodes))]
  92. foundNewLocation = true
  93. fmt.Fprintf(writer, "replicating volume %d %s from %s to dataNode %s ...\n", volumeInfo.Id, replicaPlacement, sourceNode.dataNode.Id, dst.dataNode.Id)
  94. if !takeAction {
  95. break
  96. }
  97. err := operation.WithVolumeServerClient(dst.dataNode.Id, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  98. _, replicateErr := volumeServerClient.VolumeCopy(context.Background(), &volume_server_pb.VolumeCopyRequest{
  99. VolumeId: volumeInfo.Id,
  100. SourceDataNode: sourceNode.dataNode.Id,
  101. })
  102. if replicateErr != nil {
  103. return fmt.Errorf("copying from %s => %s : %v", sourceNode.dataNode.Id, dst.dataNode.Id, replicateErr)
  104. }
  105. return nil
  106. })
  107. if err != nil {
  108. return err
  109. }
  110. // adjust free volume count
  111. dst.dataNode.FreeVolumeCount--
  112. keepDataNodesSorted(allLocations)
  113. break
  114. }
  115. }
  116. if !foundNewLocation {
  117. fmt.Fprintf(writer, "failed to place volume %d replica as %s, existing:%+v\n", volumeInfo.Id, replicaPlacement, locations)
  118. }
  119. }
  120. return nil
  121. }
  122. func keepDataNodesSorted(dataNodes []location) {
  123. sort.Slice(dataNodes, func(i, j int) bool {
  124. return dataNodes[i].dataNode.FreeVolumeCount > dataNodes[j].dataNode.FreeVolumeCount
  125. })
  126. }
  127. /*
  128. if on an existing data node {
  129. return false
  130. }
  131. if different from existing dcs {
  132. if lack on different dcs {
  133. return true
  134. }else{
  135. return false
  136. }
  137. }
  138. if not on primary dc {
  139. return false
  140. }
  141. if different from existing racks {
  142. if lack on different racks {
  143. return true
  144. }else{
  145. return false
  146. }
  147. }
  148. if not on primary rack {
  149. return false
  150. }
  151. if lacks on same rack {
  152. return true
  153. } else {
  154. return false
  155. }
  156. */
  157. func satisfyReplicaPlacement(replicaPlacement *super_block.ReplicaPlacement, existingLocations []location, possibleLocation location) bool {
  158. existingDataNodes := make(map[string]int)
  159. for _, loc := range existingLocations {
  160. existingDataNodes[loc.String()] += 1
  161. }
  162. sameDataNodeCount := existingDataNodes[possibleLocation.String()]
  163. // avoid duplicated volume on the same data node
  164. if sameDataNodeCount > 0 {
  165. return false
  166. }
  167. existingDataCenters := make(map[string]int)
  168. for _, loc := range existingLocations {
  169. existingDataCenters[loc.DataCenter()] += 1
  170. }
  171. primaryDataCenters, _ := findTopKeys(existingDataCenters)
  172. // ensure data center count is within limit
  173. if _, found := existingDataCenters[possibleLocation.DataCenter()]; !found {
  174. // different from existing dcs
  175. if len(existingDataCenters) < replicaPlacement.DiffDataCenterCount+1 {
  176. // lack on different dcs
  177. return true
  178. } else {
  179. // adding this would go over the different dcs limit
  180. return false
  181. }
  182. }
  183. // now this is same as one of the existing data center
  184. if !isAmong(possibleLocation.DataCenter(), primaryDataCenters) {
  185. // not on one of the primary dcs
  186. return false
  187. }
  188. // now this is one of the primary dcs
  189. existingRacks := make(map[string]int)
  190. for _, loc := range existingLocations {
  191. if loc.DataCenter() != possibleLocation.DataCenter() {
  192. continue
  193. }
  194. existingRacks[loc.Rack()] += 1
  195. }
  196. primaryRacks, _ := findTopKeys(existingRacks)
  197. sameRackCount := existingRacks[possibleLocation.Rack()]
  198. // ensure rack count is within limit
  199. if _, found := existingRacks[possibleLocation.Rack()]; !found {
  200. // different from existing racks
  201. if len(existingRacks) < replicaPlacement.DiffRackCount+1 {
  202. // lack on different racks
  203. return true
  204. } else {
  205. // adding this would go over the different racks limit
  206. return false
  207. }
  208. }
  209. // now this is same as one of the existing racks
  210. if !isAmong(possibleLocation.Rack(), primaryRacks) {
  211. // not on the primary rack
  212. return false
  213. }
  214. // now this is on the primary rack
  215. // different from existing data nodes
  216. if sameRackCount < replicaPlacement.SameRackCount+1 {
  217. // lack on same rack
  218. return true
  219. } else {
  220. // adding this would go over the same data node limit
  221. return false
  222. }
  223. }
  224. func findTopKeys(m map[string]int) (topKeys []string, max int) {
  225. for k, c := range m {
  226. if max < c {
  227. topKeys = topKeys[:0]
  228. topKeys = append(topKeys, k)
  229. max = c
  230. } else if max == c {
  231. topKeys = append(topKeys, k)
  232. }
  233. }
  234. return
  235. }
  236. func isAmong(key string, keys []string) bool {
  237. for _, k := range keys {
  238. if k == key {
  239. return true
  240. }
  241. }
  242. return false
  243. }
  244. type location struct {
  245. dc string
  246. rack string
  247. dataNode *master_pb.DataNodeInfo
  248. }
  249. func newLocation(dc, rack string, dataNode *master_pb.DataNodeInfo) location {
  250. return location{
  251. dc: dc,
  252. rack: rack,
  253. dataNode: dataNode,
  254. }
  255. }
  256. func (l location) String() string {
  257. return fmt.Sprintf("%s %s %s", l.dc, l.rack, l.dataNode.Id)
  258. }
  259. func (l location) Rack() string {
  260. return fmt.Sprintf("%s %s", l.dc, l.rack)
  261. }
  262. func (l location) DataCenter() string {
  263. return l.dc
  264. }