command_volume_tier_move.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. package shell
  2. import (
  3. "flag"
  4. "fmt"
  5. "github.com/chrislusf/seaweedfs/weed/glog"
  6. "github.com/chrislusf/seaweedfs/weed/pb"
  7. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  8. "github.com/chrislusf/seaweedfs/weed/storage/types"
  9. "github.com/chrislusf/seaweedfs/weed/wdclient"
  10. "io"
  11. "path/filepath"
  12. "sync"
  13. "time"
  14. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  15. )
  16. func init() {
  17. Commands = append(Commands, &commandVolumeTierMove{})
  18. }
  19. type commandVolumeTierMove struct {
  20. activeServers map[pb.ServerAddress]struct{}
  21. activeServersLock sync.Mutex
  22. activeServersCond *sync.Cond
  23. }
  24. func (c *commandVolumeTierMove) Name() string {
  25. return "volume.tier.move"
  26. }
  27. func (c *commandVolumeTierMove) Help() string {
  28. return `change a volume from one disk type to another
  29. volume.tier.move -fromDiskType=hdd -toDiskType=ssd [-collectionPattern=""] [-fullPercent=95] [-quietFor=1h]
  30. Even if the volume is replicated, only one replica will be changed and the rest replicas will be dropped.
  31. So "volume.fix.replication" and "volume.balance" should be followed.
  32. `
  33. }
  34. func (c *commandVolumeTierMove) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  35. c.activeServers = make(map[pb.ServerAddress]struct{})
  36. c.activeServersCond = sync.NewCond(new(sync.Mutex))
  37. tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  38. collectionPattern := tierCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'")
  39. fullPercentage := tierCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size")
  40. quietPeriod := tierCommand.Duration("quietFor", 24*time.Hour, "select volumes without no writes for this period")
  41. source := tierCommand.String("fromDiskType", "", "the source disk type")
  42. target := tierCommand.String("toDiskType", "", "the target disk type")
  43. applyChange := tierCommand.Bool("force", false, "actually apply the changes")
  44. if err = tierCommand.Parse(args); err != nil {
  45. return nil
  46. }
  47. if err = commandEnv.confirmIsLocked(args); err != nil {
  48. return
  49. }
  50. fromDiskType := types.ToDiskType(*source)
  51. toDiskType := types.ToDiskType(*target)
  52. if fromDiskType == toDiskType {
  53. return fmt.Errorf("source tier %s is the same as target tier %s", fromDiskType, toDiskType)
  54. }
  55. // collect topology information
  56. topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv)
  57. if err != nil {
  58. return err
  59. }
  60. // collect all volumes that should change
  61. volumeIds, err := collectVolumeIdsForTierChange(commandEnv, topologyInfo, volumeSizeLimitMb, fromDiskType, *collectionPattern, *fullPercentage, *quietPeriod)
  62. if err != nil {
  63. return err
  64. }
  65. fmt.Printf("tier move volumes: %v\n", volumeIds)
  66. _, allLocations := collectVolumeReplicaLocations(topologyInfo)
  67. for _, vid := range volumeIds {
  68. if err = c.doVolumeTierMove(commandEnv, writer, vid, toDiskType, allLocations, *applyChange); err != nil {
  69. fmt.Printf("tier move volume %d: %v\n", vid, err)
  70. }
  71. }
  72. return nil
  73. }
  74. func isOneOf(server string, locations []wdclient.Location) bool {
  75. for _, loc := range locations {
  76. if server == loc.Url {
  77. return true
  78. }
  79. }
  80. return false
  81. }
  82. func (c *commandVolumeTierMove) doVolumeTierMove(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, allLocations []location, applyChanges bool) (err error) {
  83. // find volume location
  84. locations, found := commandEnv.MasterClient.GetLocations(uint32(vid))
  85. if !found {
  86. return fmt.Errorf("volume %d not found", vid)
  87. }
  88. // find one server with the most empty volume slots with target disk type
  89. hasFoundTarget := false
  90. keepDataNodesSorted(allLocations, toDiskType)
  91. fn := capacityByFreeVolumeCount(toDiskType)
  92. wg := sync.WaitGroup{}
  93. for _, dst := range allLocations {
  94. if fn(dst.dataNode) > 0 && !hasFoundTarget {
  95. // ask the volume server to replicate the volume
  96. if isOneOf(dst.dataNode.Id, locations) {
  97. continue
  98. }
  99. var sourceVolumeServer pb.ServerAddress
  100. for _, loc := range locations {
  101. if loc.Url != dst.dataNode.Id {
  102. sourceVolumeServer = loc.ServerAddress()
  103. }
  104. }
  105. if sourceVolumeServer == "" {
  106. continue
  107. }
  108. fmt.Fprintf(writer, "moving volume %d from %s to %s with disk type %s ...\n", vid, sourceVolumeServer, dst.dataNode.Id, toDiskType.ReadableString())
  109. hasFoundTarget = true
  110. if !applyChanges {
  111. // adjust volume count
  112. dst.dataNode.DiskInfos[string(toDiskType)].VolumeCount++
  113. break
  114. }
  115. destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
  116. c.activeServersCond.L.Lock()
  117. _, isSourceActive := c.activeServers[sourceVolumeServer]
  118. _, isDestActive := c.activeServers[destServerAddress]
  119. for isSourceActive || isDestActive {
  120. c.activeServersCond.Wait()
  121. _, isSourceActive = c.activeServers[sourceVolumeServer]
  122. _, isDestActive = c.activeServers[destServerAddress]
  123. }
  124. c.activeServers[sourceVolumeServer] = struct{}{}
  125. c.activeServers[destServerAddress] = struct{}{}
  126. c.activeServersCond.L.Unlock()
  127. wg.Add(1)
  128. go func(dst location) {
  129. if err := c.doMoveOneVolume(commandEnv, writer, vid, toDiskType, locations, sourceVolumeServer, dst); err != nil {
  130. fmt.Fprintf(writer, "move volume %d %s => %s: %v\n", vid, sourceVolumeServer, dst.dataNode.Id, err)
  131. }
  132. delete(c.activeServers, sourceVolumeServer)
  133. delete(c.activeServers, destServerAddress)
  134. c.activeServersCond.Signal()
  135. wg.Done()
  136. }(dst)
  137. }
  138. }
  139. wg.Wait()
  140. if !hasFoundTarget {
  141. fmt.Fprintf(writer, "can not find disk type %s for volume %d\n", toDiskType.ReadableString(), vid)
  142. }
  143. return nil
  144. }
  145. func (c *commandVolumeTierMove) doMoveOneVolume(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, locations []wdclient.Location, sourceVolumeServer pb.ServerAddress, dst location) (err error) {
  146. // mark all replicas as read only
  147. if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, false); err != nil {
  148. return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err)
  149. }
  150. if err = LiveMoveVolume(commandEnv.option.GrpcDialOption, writer, vid, sourceVolumeServer, pb.NewServerAddressFromDataNode(dst.dataNode), 5*time.Second, toDiskType.ReadableString(), true); err != nil {
  151. // mark all replicas as writable
  152. if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, true); err != nil {
  153. glog.Errorf("mark volume %d as writable on %s: %v", vid, locations[0].Url, err)
  154. }
  155. return fmt.Errorf("move volume %d %s => %s : %v", vid, locations[0].Url, dst.dataNode.Id, err)
  156. }
  157. // adjust volume count
  158. dst.dataNode.DiskInfos[string(toDiskType)].VolumeCount++
  159. // remove the remaining replicas
  160. for _, loc := range locations {
  161. if loc.Url != dst.dataNode.Id && loc.ServerAddress() != sourceVolumeServer {
  162. if err = deleteVolume(commandEnv.option.GrpcDialOption, vid, loc.ServerAddress()); err != nil {
  163. fmt.Fprintf(writer, "failed to delete volume %d on %s: %v\n", vid, loc.Url, err)
  164. }
  165. }
  166. }
  167. return nil
  168. }
  169. func collectVolumeIdsForTierChange(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeSizeLimitMb uint64, sourceTier types.DiskType, collectionPattern string, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) {
  170. quietSeconds := int64(quietPeriod / time.Second)
  171. nowUnixSeconds := time.Now().Unix()
  172. fmt.Printf("collect %s volumes quiet for: %d seconds\n", sourceTier, quietSeconds)
  173. vidMap := make(map[uint32]bool)
  174. eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  175. for _, diskInfo := range dn.DiskInfos {
  176. for _, v := range diskInfo.VolumeInfos {
  177. // check collection name pattern
  178. if collectionPattern != "" {
  179. matched, err := filepath.Match(collectionPattern, v.Collection)
  180. if err != nil {
  181. return
  182. }
  183. if !matched {
  184. continue
  185. }
  186. }
  187. if v.ModifiedAtSecond+quietSeconds < nowUnixSeconds && types.ToDiskType(v.DiskType) == sourceTier {
  188. if float64(v.Size) > fullPercentage/100*float64(volumeSizeLimitMb)*1024*1024 {
  189. vidMap[v.Id] = true
  190. }
  191. }
  192. }
  193. }
  194. })
  195. for vid := range vidMap {
  196. vids = append(vids, needle.VolumeId(vid))
  197. }
  198. return
  199. }