Просмотр исходного кода

shell: add volumeServer.evacuate command

Chris Lu 4 лет назад
Родитель
Сommit
a595916342

+ 1 - 1
weed/shell/command_ec_balance.go

@@ -28,7 +28,7 @@ func (c *commandEcBalance) Help() string {
 
 	Algorithm:
 
-	For each type of volume server (different max volume count limit){
+	func EcBalance() {
 		for each collection:
 			balanceEcVolumes(collectionName)
 		for each rack:

+ 19 - 4
weed/shell/command_ec_common.go

@@ -173,6 +173,16 @@ type EcNode struct {
 	freeEcSlot int
 }
 
+func (ecNode *EcNode) localShardIdCount(vid uint32) int {
+	for _, ecShardInfo := range ecNode.info.EcShardInfos {
+		if vid == ecShardInfo.Id {
+			shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits)
+			return shardBits.ShardIdCount()
+		}
+	}
+	return 0
+}
+
 type EcRack struct {
 	ecNodes    map[EcNodeId]*EcNode
 	freeEcSlot int
@@ -191,7 +201,15 @@ func collectEcNodes(commandEnv *CommandEnv, selectedDataCenter string) (ecNodes
 	}
 
 	// find out all volume servers with one slot left.
-	eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
+	ecNodes, totalFreeEcSlots = collectEcVolumeServersByDc(resp.TopologyInfo, selectedDataCenter)
+
+	sortEcNodesByFreeslotsDecending(ecNodes)
+
+	return
+}
+
+func collectEcVolumeServersByDc(topo *master_pb.TopologyInfo, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int) {
+	eachDataNode(topo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
 		if selectedDataCenter != "" && selectedDataCenter != dc {
 			return
 		}
@@ -205,9 +223,6 @@ func collectEcNodes(commandEnv *CommandEnv, selectedDataCenter string) (ecNodes
 		})
 		totalFreeEcSlots += freeEcSlots
 	})
-
-	sortEcNodesByFreeslotsDecending(ecNodes)
-
 	return
 }
 

+ 29 - 16
weed/shell/command_volume_balance.go

@@ -244,32 +244,43 @@ func balanceSelectedVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*
 func attemptToMoveOneVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, candidateVolumes []*master_pb.VolumeInformationMessage, emptyNode *Node, applyBalancing bool) (hasMoved bool, err error) {
 
 	for _, v := range candidateVolumes {
-		if v.ReplicaPlacement > 0 {
-			replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(v.ReplicaPlacement))
-			if !isGoodMove(replicaPlacement, volumeReplicas[v.Id], fullNode, emptyNode) {
-				continue
-			}
+		hasMoved, err = maybeMoveOneVolume(commandEnv, volumeReplicas, fullNode, v, emptyNode, applyBalancing)
+		if err != nil {
+			return
 		}
-		if _, found := emptyNode.selectedVolumes[v.Id]; !found {
-			if err = moveVolume(commandEnv, v, fullNode, emptyNode, applyBalancing); err == nil {
-				adjustAfterMove(v, volumeReplicas, fullNode, emptyNode)
-				hasMoved = true
-				break
-			} else {
-				return
-			}
+		if hasMoved {
+			break
+		}
+	}
+	return
+}
+
+func maybeMoveOneVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, candidateVolume *master_pb.VolumeInformationMessage, emptyNode *Node, applyChange bool) (hasMoved bool, err error) {
+
+	if candidateVolume.ReplicaPlacement > 0 {
+		replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(candidateVolume.ReplicaPlacement))
+		if !isGoodMove(replicaPlacement, volumeReplicas[candidateVolume.Id], fullNode, emptyNode) {
+			return false, nil
+		}
+	}
+	if _, found := emptyNode.selectedVolumes[candidateVolume.Id]; !found {
+		if err = moveVolume(commandEnv, candidateVolume, fullNode, emptyNode, applyChange); err == nil {
+			adjustAfterMove(candidateVolume, volumeReplicas, fullNode, emptyNode)
+			return true, nil
+		} else {
+			return
 		}
 	}
 	return
 }
 
-func moveVolume(commandEnv *CommandEnv, v *master_pb.VolumeInformationMessage, fullNode *Node, emptyNode *Node, applyBalancing bool) error {
+func moveVolume(commandEnv *CommandEnv, v *master_pb.VolumeInformationMessage, fullNode *Node, emptyNode *Node, applyChange bool) error {
 	collectionPrefix := v.Collection + "_"
 	if v.Collection == "" {
 		collectionPrefix = ""
 	}
 	fmt.Fprintf(os.Stdout, "moving volume %s%d %s => %s\n", collectionPrefix, v.Id, fullNode.info.Id, emptyNode.info.Id)
-	if applyBalancing {
+	if applyChange {
 		return LiveMoveVolume(commandEnv.option.GrpcDialOption, needle.VolumeId(v.Id), fullNode.info.Id, emptyNode.info.Id, 5*time.Second)
 	}
 	return nil
@@ -315,7 +326,9 @@ func isGoodMove(placement *super_block.ReplicaPlacement, existingReplicas []*Vol
 
 func adjustAfterMove(v *master_pb.VolumeInformationMessage, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, emptyNode *Node) {
 	delete(fullNode.selectedVolumes, v.Id)
-	emptyNode.selectedVolumes[v.Id] = v
+	if emptyNode.selectedVolumes != nil {
+		emptyNode.selectedVolumes[v.Id] = v
+	}
 	existingReplicas := volumeReplicas[v.Id]
 	for _, replica := range existingReplicas {
 		if replica.location.dataNode.Id == fullNode.info.Id &&

+ 192 - 0
weed/shell/command_volume_server_evacuate.go

@@ -0,0 +1,192 @@
+package shell
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
+	"github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
+	"github.com/chrislusf/seaweedfs/weed/storage/needle"
+	"io"
+	"sort"
+)
+
+func init() {
+	Commands = append(Commands, &commandVolumeServerEvacuate{})
+}
+
+type commandVolumeServerEvacuate struct {
+}
+
+func (c *commandVolumeServerEvacuate) Name() string {
+	return "volumeServer.evacuate"
+}
+
+func (c *commandVolumeServerEvacuate) Help() string {
+	return `move out all data on a volume server
+
+	volumeServer.evacuate -node <host:port>
+
+	This command moves all data away from the volume server.
+	The volumes on the volume servers will be redistributed.
+
+	Usually this is used to prepare to shutdown or upgrade the volume server.
+
+`
+}
+
+func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
+
+	if err = commandEnv.confirmIsLocked(); err != nil {
+		return
+	}
+
+	vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
+	volumeServer := vsEvacuateCommand.String("node", "", "<host>:<port> of the volume server")
+	applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes")
+	if err = vsEvacuateCommand.Parse(args); err != nil {
+		return nil
+	}
+
+	if *volumeServer == "" {
+		return fmt.Errorf("need to specify volume server by -node=<host>:<port>")
+	}
+
+	return volumeServerEvacuate(commandEnv, *volumeServer, *applyChange, writer)
+
+}
+
+func volumeServerEvacuate(commandEnv *CommandEnv, volumeServer string, applyChange bool, writer io.Writer) (err error) {
+	// 1. confirm the volume server is part of the cluster
+	// 2. collect all other volume servers, sort by empty slots
+	// 3. move to any other volume server as long as it satisfy the replication requirements
+
+	// list all the volumes
+	var resp *master_pb.VolumeListResponse
+	err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error {
+		resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{})
+		return err
+	})
+	if err != nil {
+		return err
+	}
+
+	if err := evacuateNormalVolumes(commandEnv, resp, volumeServer, applyChange); err != nil {
+		return err
+	}
+
+	if err := evacuateEcVolumes(commandEnv, resp, volumeServer, applyChange); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func evacuateNormalVolumes(commandEnv *CommandEnv, resp *master_pb.VolumeListResponse, volumeServer string, applyChange bool) error {
+	// find this volume server
+	volumeServers := collectVolumeServersByDc(resp.TopologyInfo, "")
+	thisNode, otherNodes := nodesOtherThan(volumeServers, volumeServer)
+	if thisNode == nil {
+		return fmt.Errorf("%s is not found in this cluster", volumeServer)
+	}
+
+	// move away normal volumes
+	volumeReplicas, _ := collectVolumeReplicaLocations(resp)
+	for _, vol := range thisNode.info.VolumeInfos {
+		hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange)
+		if err != nil {
+			return fmt.Errorf("move away volume %d from %s: %v", vol.Id, volumeServer, err)
+		}
+		if !hasMoved {
+			return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer)
+		}
+	}
+	return nil
+}
+
+func evacuateEcVolumes(commandEnv *CommandEnv, resp *master_pb.VolumeListResponse, volumeServer string, applyChange bool) error {
+	// find this ec volume server
+	ecNodes, _ := collectEcVolumeServersByDc(resp.TopologyInfo, "")
+	thisNode, otherNodes := ecNodesOtherThan(ecNodes, volumeServer)
+	if thisNode == nil {
+		return fmt.Errorf("%s is not found in this cluster", volumeServer)
+	}
+
+	// move away ec volumes
+	for _, ecShardInfo := range thisNode.info.EcShardInfos {
+		hasMoved, err := moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange)
+		if err != nil {
+			return fmt.Errorf("move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err)
+		}
+		if !hasMoved {
+			return fmt.Errorf("failed to move ec volume %d from %s", ecShardInfo.Id, volumeServer)
+		}
+	}
+	return nil
+}
+
+func moveAwayOneEcVolume(commandEnv *CommandEnv, ecShardInfo *master_pb.VolumeEcShardInformationMessage, thisNode *EcNode, otherNodes []*EcNode, applyChange bool) (hasMoved bool, err error) {
+
+	for _, shardId := range erasure_coding.ShardBits(ecShardInfo.EcIndexBits).ShardIds() {
+
+		sort.Slice(otherNodes, func(i, j int) bool {
+			return otherNodes[i].localShardIdCount(ecShardInfo.Id) < otherNodes[j].localShardIdCount(ecShardInfo.Id)
+		})
+
+		for i := 0; i < len(otherNodes); i++ {
+			emptyNode := otherNodes[i]
+			err = moveMountedShardToEcNode(commandEnv, thisNode, ecShardInfo.Collection, needle.VolumeId(ecShardInfo.Id), shardId, emptyNode, applyChange)
+			if err != nil {
+				return
+			} else {
+				hasMoved = true
+				break
+			}
+		}
+		if !hasMoved {
+			return
+		}
+	}
+
+	return
+}
+
+func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) {
+	sort.Slice(otherNodes, func(i, j int) bool {
+		return otherNodes[i].localVolumeRatio() < otherNodes[j].localVolumeRatio()
+	})
+
+	for i := 0; i < len(otherNodes); i++ {
+		emptyNode := otherNodes[i]
+		hasMoved, err = maybeMoveOneVolume(commandEnv, volumeReplicas, thisNode, vol, emptyNode, applyChange)
+		if err != nil {
+			return
+		}
+		if hasMoved {
+			break
+		}
+	}
+	return
+}
+
+func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, otherNodes []*Node) {
+	for _, node := range volumeServers {
+		if node.info.Id == thisServer {
+			thisNode = node
+			continue
+		}
+		otherNodes = append(otherNodes, node)
+	}
+	return
+}
+
+func ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNode *EcNode, otherNodes []*EcNode) {
+	for _, node := range volumeServers {
+		if node.info.Id == thisServer {
+			thisNode = node
+			continue
+		}
+		otherNodes = append(otherNodes, node)
+	}
+	return
+}