command_volume_tier_move.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. package shell
  2. import (
  3. "context"
  4. "errors"
  5. "flag"
  6. "fmt"
  7. "github.com/seaweedfs/seaweedfs/weed/glog"
  8. "github.com/seaweedfs/seaweedfs/weed/pb"
  9. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  11. "github.com/seaweedfs/seaweedfs/weed/wdclient"
  12. "io"
  13. "path/filepath"
  14. "sync"
  15. "time"
  16. "github.com/seaweedfs/seaweedfs/weed/operation"
  17. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  18. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  19. )
  20. func init() {
  21. Commands = append(Commands, &commandVolumeTierMove{})
  22. }
  23. type volumeTierMoveJob struct {
  24. src pb.ServerAddress
  25. vid needle.VolumeId
  26. }
  27. type commandVolumeTierMove struct {
  28. activeServers sync.Map
  29. queues map[pb.ServerAddress]chan volumeTierMoveJob
  30. //activeServers map[pb.ServerAddress]struct{}
  31. //activeServersLock sync.Mutex
  32. //activeServersCond *sync.Cond
  33. }
  34. func (c *commandVolumeTierMove) Name() string {
  35. return "volume.tier.move"
  36. }
  37. func (c *commandVolumeTierMove) Help() string {
  38. return `change a volume from one disk type to another
  39. volume.tier.move -fromDiskType=hdd -toDiskType=ssd [-collectionPattern=""] [-fullPercent=95] [-quietFor=1h] [-parallelLimit=4] [-toReplication=XYZ]
  40. Even if the volume is replicated, only one replica will be changed and the rest replicas will be dropped.
  41. So "volume.fix.replication" and "volume.balance" should be followed.
  42. `
  43. }
  44. func (c *commandVolumeTierMove) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  45. tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  46. collectionPattern := tierCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'")
  47. fullPercentage := tierCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size")
  48. quietPeriod := tierCommand.Duration("quietFor", 24*time.Hour, "select volumes without no writes for this period")
  49. source := tierCommand.String("fromDiskType", "", "the source disk type")
  50. target := tierCommand.String("toDiskType", "", "the target disk type")
  51. parallelLimit := tierCommand.Int("parallelLimit", 0, "limit the number of parallel copying jobs")
  52. applyChange := tierCommand.Bool("force", false, "actually apply the changes")
  53. ioBytePerSecond := tierCommand.Int64("ioBytePerSecond", 0, "limit the speed of move")
  54. replicationString := tierCommand.String("toReplication", "", "the new target replication setting")
  55. if err = tierCommand.Parse(args); err != nil {
  56. return nil
  57. }
  58. infoAboutSimulationMode(writer, *applyChange, "-force")
  59. if err = commandEnv.confirmIsLocked(args); err != nil {
  60. return
  61. }
  62. fromDiskType := types.ToDiskType(*source)
  63. toDiskType := types.ToDiskType(*target)
  64. if fromDiskType == toDiskType {
  65. return fmt.Errorf("source tier %s is the same as target tier %s", fromDiskType, toDiskType)
  66. }
  67. // collect topology information
  68. topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv, 0)
  69. if err != nil {
  70. return err
  71. }
  72. // collect all volumes that should change
  73. volumeIds, err := collectVolumeIdsForTierChange(commandEnv, topologyInfo, volumeSizeLimitMb, fromDiskType, *collectionPattern, *fullPercentage, *quietPeriod)
  74. if err != nil {
  75. return err
  76. }
  77. fmt.Printf("tier move volumes: %v\n", volumeIds)
  78. _, allLocations := collectVolumeReplicaLocations(topologyInfo)
  79. allLocations = filterLocationsByDiskType(allLocations, toDiskType)
  80. keepDataNodesSorted(allLocations, toDiskType)
  81. if len(allLocations) > 0 && *parallelLimit > 0 && *parallelLimit < len(allLocations) {
  82. allLocations = allLocations[:*parallelLimit]
  83. }
  84. wg := sync.WaitGroup{}
  85. bufferLen := len(allLocations)
  86. c.queues = make(map[pb.ServerAddress]chan volumeTierMoveJob)
  87. for _, dst := range allLocations {
  88. destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
  89. c.queues[destServerAddress] = make(chan volumeTierMoveJob, bufferLen)
  90. wg.Add(1)
  91. go func(dst location, jobs <-chan volumeTierMoveJob, applyChanges bool) {
  92. defer wg.Done()
  93. for job := range jobs {
  94. fmt.Fprintf(writer, "moving volume %d from %s to %s with disk type %s ...\n", job.vid, job.src, dst.dataNode.Id, toDiskType.ReadableString())
  95. locations, found := commandEnv.MasterClient.GetLocations(uint32(job.vid))
  96. if !found {
  97. fmt.Printf("volume %d not found", job.vid)
  98. continue
  99. }
  100. unlock := c.Lock(job.src)
  101. if applyChanges {
  102. if err := c.doMoveOneVolume(commandEnv, writer, job.vid, toDiskType, locations, job.src, dst, *ioBytePerSecond, replicationString); err != nil {
  103. fmt.Fprintf(writer, "move volume %d %s => %s: %v\n", job.vid, job.src, dst.dataNode.Id, err)
  104. }
  105. }
  106. unlock()
  107. }
  108. }(dst, c.queues[destServerAddress], *applyChange)
  109. }
  110. for _, vid := range volumeIds {
  111. if err = c.doVolumeTierMove(commandEnv, writer, vid, toDiskType, allLocations); err != nil {
  112. fmt.Printf("tier move volume %d: %v\n", vid, err)
  113. }
  114. allLocations = rotateDataNodes(allLocations)
  115. }
  116. for key, _ := range c.queues {
  117. close(c.queues[key])
  118. }
  119. wg.Wait()
  120. return nil
  121. }
  122. func (c *commandVolumeTierMove) Lock(key pb.ServerAddress) func() {
  123. value, _ := c.activeServers.LoadOrStore(key, &sync.Mutex{})
  124. mtx := value.(*sync.Mutex)
  125. mtx.Lock()
  126. return func() { mtx.Unlock() }
  127. }
  128. func filterLocationsByDiskType(dataNodes []location, diskType types.DiskType) (ret []location) {
  129. for _, loc := range dataNodes {
  130. _, found := loc.dataNode.DiskInfos[string(diskType)]
  131. if found {
  132. ret = append(ret, loc)
  133. }
  134. }
  135. return
  136. }
  137. func rotateDataNodes(dataNodes []location) []location {
  138. if len(dataNodes) > 0 {
  139. return append(dataNodes[1:], dataNodes[0])
  140. } else {
  141. return dataNodes
  142. }
  143. }
  144. func isOneOf(server string, locations []wdclient.Location) bool {
  145. for _, loc := range locations {
  146. if server == loc.Url {
  147. return true
  148. }
  149. }
  150. return false
  151. }
  152. func (c *commandVolumeTierMove) doVolumeTierMove(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, allLocations []location) (err error) {
  153. // find volume location
  154. locations, found := commandEnv.MasterClient.GetLocations(uint32(vid))
  155. if !found {
  156. return fmt.Errorf("volume %d not found", vid)
  157. }
  158. // find one server with the most empty volume slots with target disk type
  159. hasFoundTarget := false
  160. fn := capacityByFreeVolumeCount(toDiskType)
  161. for _, dst := range allLocations {
  162. if fn(dst.dataNode) > 0 && !hasFoundTarget {
  163. // ask the volume server to replicate the volume
  164. if isOneOf(dst.dataNode.Id, locations) {
  165. continue
  166. }
  167. var sourceVolumeServer pb.ServerAddress
  168. for _, loc := range locations {
  169. if loc.Url != dst.dataNode.Id {
  170. sourceVolumeServer = loc.ServerAddress()
  171. }
  172. }
  173. if sourceVolumeServer == "" {
  174. continue
  175. }
  176. hasFoundTarget = true
  177. // adjust volume count
  178. dst.dataNode.DiskInfos[string(toDiskType)].VolumeCount++
  179. destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
  180. c.queues[destServerAddress] <- volumeTierMoveJob{sourceVolumeServer, vid}
  181. }
  182. }
  183. if !hasFoundTarget {
  184. fmt.Fprintf(writer, "can not find disk type %s for volume %d\n", toDiskType.ReadableString(), vid)
  185. }
  186. return nil
  187. }
  188. func (c *commandVolumeTierMove) doMoveOneVolume(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, locations []wdclient.Location, sourceVolumeServer pb.ServerAddress, dst location, ioBytePerSecond int64, replicationString *string) (err error) {
  189. if !commandEnv.isLocked() {
  190. return fmt.Errorf("lock is lost")
  191. }
  192. // mark all replicas as read only
  193. if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, false); err != nil {
  194. return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err)
  195. }
  196. newAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
  197. if err = LiveMoveVolume(commandEnv.option.GrpcDialOption, writer, vid, sourceVolumeServer, newAddress, 5*time.Second, toDiskType.ReadableString(), ioBytePerSecond, true); err != nil {
  198. // mark all replicas as writable
  199. if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, true); err != nil {
  200. glog.Errorf("mark volume %d as writable on %s: %v", vid, locations[0].Url, err)
  201. }
  202. return fmt.Errorf("move volume %d %s => %s : %v", vid, locations[0].Url, dst.dataNode.Id, err)
  203. }
  204. // If move is successful and replication is not empty, alter moved volume's replication setting
  205. if *replicationString != "" {
  206. err = operation.WithVolumeServerClient(false, newAddress, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  207. resp, configureErr := volumeServerClient.VolumeConfigure(context.Background(), &volume_server_pb.VolumeConfigureRequest{
  208. VolumeId: uint32(vid),
  209. Replication: *replicationString,
  210. })
  211. if configureErr != nil {
  212. return configureErr
  213. }
  214. if resp.Error != "" {
  215. return errors.New(resp.Error)
  216. }
  217. return nil
  218. })
  219. if err != nil {
  220. glog.Errorf("update volume %d replication on %s: %v", vid, locations[0].Url, err)
  221. }
  222. }
  223. // remove the remaining replicas
  224. for _, loc := range locations {
  225. if loc.Url != dst.dataNode.Id && loc.ServerAddress() != sourceVolumeServer {
  226. if err = deleteVolume(commandEnv.option.GrpcDialOption, vid, loc.ServerAddress()); err != nil {
  227. fmt.Fprintf(writer, "failed to delete volume %d on %s: %v\n", vid, loc.Url, err)
  228. }
  229. // reduce volume count? Not really necessary since they are "more" full and will not be a candidate to move to
  230. }
  231. }
  232. return nil
  233. }
  234. func collectVolumeIdsForTierChange(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeSizeLimitMb uint64, sourceTier types.DiskType, collectionPattern string, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) {
  235. quietSeconds := int64(quietPeriod / time.Second)
  236. nowUnixSeconds := time.Now().Unix()
  237. fmt.Printf("collect %s volumes quiet for: %d seconds\n", sourceTier, quietSeconds)
  238. vidMap := make(map[uint32]bool)
  239. eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  240. for _, diskInfo := range dn.DiskInfos {
  241. for _, v := range diskInfo.VolumeInfos {
  242. // check collection name pattern
  243. if collectionPattern != "" {
  244. matched, err := filepath.Match(collectionPattern, v.Collection)
  245. if err != nil {
  246. return
  247. }
  248. if !matched {
  249. continue
  250. }
  251. }
  252. if v.ModifiedAtSecond+quietSeconds < nowUnixSeconds && types.ToDiskType(v.DiskType) == sourceTier {
  253. if float64(v.Size) > fullPercentage/100*float64(volumeSizeLimitMb)*1024*1024 {
  254. vidMap[v.Id] = true
  255. }
  256. }
  257. }
  258. }
  259. })
  260. for vid := range vidMap {
  261. vids = append(vids, needle.VolumeId(vid))
  262. }
  263. return
  264. }