command_volume_fix_replication.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. package shell
  2. import (
  3. "context"
  4. "flag"
  5. "fmt"
  6. "io"
  7. "path/filepath"
  8. "strconv"
  9. "time"
  10. "github.com/seaweedfs/seaweedfs/weed/pb"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  12. "github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
  13. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  14. "golang.org/x/exp/slices"
  15. "google.golang.org/grpc"
  16. "github.com/seaweedfs/seaweedfs/weed/operation"
  17. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  18. "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
  19. "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
  20. )
  21. func init() {
  22. Commands = append(Commands, &commandVolumeFixReplication{})
  23. }
  24. type commandVolumeFixReplication struct {
  25. collectionPattern *string
  26. }
  27. func (c *commandVolumeFixReplication) Name() string {
  28. return "volume.fix.replication"
  29. }
  30. func (c *commandVolumeFixReplication) Help() string {
  31. return `add or remove replicas to volumes that are missing replicas or over-replicated
  32. This command finds all over-replicated volumes. If found, it will purge the oldest copies and stop.
  33. This command also finds all under-replicated volumes, and finds volume servers with free slots.
  34. If the free slots satisfy the replication requirement, the volume content is copied over and mounted.
  35. volume.fix.replication -n # do not take action
  36. volume.fix.replication # actually deleting or copying the volume files and mount the volume
  37. volume.fix.replication -collectionPattern=important* # fix any collections with prefix "important"
  38. Note:
  39. * each time this will only add back one replica for each volume id that is under replicated.
  40. If there are multiple replicas are missing, e.g. replica count is > 2, you may need to run this multiple times.
  41. * do not run this too quickly within seconds, since the new volume replica may take a few seconds
  42. to register itself to the master.
  43. `
  44. }
  45. func (c *commandVolumeFixReplication) HasTag(tag CommandTag) bool {
  46. return false && tag == ResourceHeavy // resource intensive only when deleting and checking with replicas.
  47. }
  48. func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  49. volFixReplicationCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  50. c.collectionPattern = volFixReplicationCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'")
  51. skipChange := volFixReplicationCommand.Bool("n", false, "skip the changes")
  52. doDelete := volFixReplicationCommand.Bool("doDelete", true, "Also delete over-replicated volumes besides fixing under-replication")
  53. doCheck := volFixReplicationCommand.Bool("doCheck", true, "Also check synchronization before deleting")
  54. retryCount := volFixReplicationCommand.Int("retry", 5, "how many times to retry")
  55. volumesPerStep := volFixReplicationCommand.Int("volumesPerStep", 0, "how many volumes to fix in one cycle")
  56. if err = volFixReplicationCommand.Parse(args); err != nil {
  57. return nil
  58. }
  59. if err = commandEnv.confirmIsLocked(args); err != nil {
  60. return
  61. }
  62. takeAction := !*skipChange
  63. underReplicatedVolumeIdsCount := 1
  64. for underReplicatedVolumeIdsCount > 0 {
  65. fixedVolumeReplicas := map[string]int{}
  66. // collect topology information
  67. topologyInfo, _, err := collectTopologyInfo(commandEnv, 15*time.Second)
  68. if err != nil {
  69. return err
  70. }
  71. // find all volumes that needs replication
  72. // collect all data nodes
  73. volumeReplicas, allLocations := collectVolumeReplicaLocations(topologyInfo)
  74. if len(allLocations) == 0 {
  75. return fmt.Errorf("no data nodes at all")
  76. }
  77. // find all under replicated volumes
  78. var underReplicatedVolumeIds, overReplicatedVolumeIds, misplacedVolumeIds []uint32
  79. for vid, replicas := range volumeReplicas {
  80. replica := replicas[0]
  81. replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replica.info.ReplicaPlacement))
  82. switch {
  83. case replicaPlacement.GetCopyCount() > len(replicas) || !satisfyReplicaCurrentLocation(replicaPlacement, replicas):
  84. underReplicatedVolumeIds = append(underReplicatedVolumeIds, vid)
  85. case isMisplaced(replicas, replicaPlacement):
  86. misplacedVolumeIds = append(misplacedVolumeIds, vid)
  87. fmt.Fprintf(writer, "volume %d replication %s is not well placed %s\n", replica.info.Id, replicaPlacement, replica.location.dataNode.Id)
  88. case replicaPlacement.GetCopyCount() < len(replicas):
  89. overReplicatedVolumeIds = append(overReplicatedVolumeIds, vid)
  90. fmt.Fprintf(writer, "volume %d replication %s, but over replicated %+d\n", replica.info.Id, replicaPlacement, len(replicas))
  91. }
  92. }
  93. if !commandEnv.isLocked() {
  94. return fmt.Errorf("lock is lost")
  95. }
  96. if len(overReplicatedVolumeIds) > 0 && *doDelete {
  97. if err := c.deleteOneVolume(commandEnv, writer, takeAction, *doCheck, overReplicatedVolumeIds, volumeReplicas, allLocations, pickOneReplicaToDelete); err != nil {
  98. return err
  99. }
  100. }
  101. if len(misplacedVolumeIds) > 0 && *doDelete {
  102. if err := c.deleteOneVolume(commandEnv, writer, takeAction, *doCheck, misplacedVolumeIds, volumeReplicas, allLocations, pickOneMisplacedVolume); err != nil {
  103. return err
  104. }
  105. }
  106. underReplicatedVolumeIdsCount = len(underReplicatedVolumeIds)
  107. if underReplicatedVolumeIdsCount > 0 {
  108. // find the most under populated data nodes
  109. fixedVolumeReplicas, err = c.fixUnderReplicatedVolumes(commandEnv, writer, takeAction, underReplicatedVolumeIds, volumeReplicas, allLocations, *retryCount, *volumesPerStep)
  110. if err != nil {
  111. return err
  112. }
  113. }
  114. if *skipChange {
  115. break
  116. }
  117. // check that the topology has been updated
  118. if len(fixedVolumeReplicas) > 0 {
  119. fixedVolumes := make([]string, 0, len(fixedVolumeReplicas))
  120. for k, _ := range fixedVolumeReplicas {
  121. fixedVolumes = append(fixedVolumes, k)
  122. }
  123. volumeIdLocations, err := lookupVolumeIds(commandEnv, fixedVolumes)
  124. if err != nil {
  125. return err
  126. }
  127. for _, volumeIdLocation := range volumeIdLocations {
  128. volumeId := volumeIdLocation.VolumeOrFileId
  129. volumeIdLocationCount := len(volumeIdLocation.Locations)
  130. i := 0
  131. for fixedVolumeReplicas[volumeId] >= volumeIdLocationCount {
  132. fmt.Fprintf(writer, "the number of locations for volume %s has not increased yet, let's wait\n", volumeId)
  133. time.Sleep(time.Duration(i+1) * time.Second * 7)
  134. volumeLocIds, err := lookupVolumeIds(commandEnv, []string{volumeId})
  135. if err != nil {
  136. return err
  137. }
  138. volumeIdLocationCount = len(volumeLocIds[0].Locations)
  139. if *retryCount <= i {
  140. return fmt.Errorf("replicas volume %s mismatch in topology", volumeId)
  141. }
  142. i += 1
  143. }
  144. }
  145. }
  146. }
  147. return nil
  148. }
  149. func collectVolumeReplicaLocations(topologyInfo *master_pb.TopologyInfo) (map[uint32][]*VolumeReplica, []location) {
  150. volumeReplicas := make(map[uint32][]*VolumeReplica)
  151. var allLocations []location
  152. eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
  153. loc := newLocation(dc, string(rack), dn)
  154. for _, diskInfo := range dn.DiskInfos {
  155. for _, v := range diskInfo.VolumeInfos {
  156. volumeReplicas[v.Id] = append(volumeReplicas[v.Id], &VolumeReplica{
  157. location: &loc,
  158. info: v,
  159. })
  160. }
  161. }
  162. allLocations = append(allLocations, loc)
  163. })
  164. return volumeReplicas, allLocations
  165. }
  166. type SelectOneVolumeFunc func(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) *VolumeReplica
  167. func checkOneVolume(a *VolumeReplica, b *VolumeReplica, writer io.Writer, grpcDialOption grpc.DialOption) (err error) {
  168. aDB, bDB := needle_map.NewMemDb(), needle_map.NewMemDb()
  169. defer func() {
  170. aDB.Close()
  171. bDB.Close()
  172. }()
  173. // read index db
  174. readIndexDbCutoffFrom := uint64(time.Now().UnixNano())
  175. if err = readIndexDatabase(aDB, a.info.Collection, a.info.Id, pb.NewServerAddressFromDataNode(a.location.dataNode), false, writer, grpcDialOption); err != nil {
  176. return fmt.Errorf("readIndexDatabase %s volume %d: %v", a.location.dataNode, a.info.Id, err)
  177. }
  178. if err := readIndexDatabase(bDB, b.info.Collection, b.info.Id, pb.NewServerAddressFromDataNode(b.location.dataNode), false, writer, grpcDialOption); err != nil {
  179. return fmt.Errorf("readIndexDatabase %s volume %d: %v", b.location.dataNode, b.info.Id, err)
  180. }
  181. if _, err = doVolumeCheckDisk(aDB, bDB, a, b, false, writer, true, false, float64(1), readIndexDbCutoffFrom, grpcDialOption); err != nil {
  182. return fmt.Errorf("doVolumeCheckDisk source:%s target:%s volume %d: %v", a.location.dataNode.Id, b.location.dataNode.Id, a.info.Id, err)
  183. }
  184. return
  185. }
  186. func (c *commandVolumeFixReplication) deleteOneVolume(commandEnv *CommandEnv, writer io.Writer, takeAction bool, doCheck bool, overReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location, selectOneVolumeFn SelectOneVolumeFunc) error {
  187. for _, vid := range overReplicatedVolumeIds {
  188. replicas := volumeReplicas[vid]
  189. replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replicas[0].info.ReplicaPlacement))
  190. replica := selectOneVolumeFn(replicas, replicaPlacement)
  191. // check collection name pattern
  192. if *c.collectionPattern != "" {
  193. matched, err := filepath.Match(*c.collectionPattern, replica.info.Collection)
  194. if err != nil {
  195. return fmt.Errorf("match pattern %s with collection %s: %v", *c.collectionPattern, replica.info.Collection, err)
  196. }
  197. if !matched {
  198. break
  199. }
  200. }
  201. collectionIsMismatch := false
  202. for _, volumeReplica := range replicas {
  203. if volumeReplica.info.Collection != replica.info.Collection {
  204. fmt.Fprintf(writer, "skip delete volume %d as collection %s is mismatch: %s\n", replica.info.Id, replica.info.Collection, volumeReplica.info.Collection)
  205. collectionIsMismatch = true
  206. }
  207. }
  208. if collectionIsMismatch {
  209. continue
  210. }
  211. fmt.Fprintf(writer, "deleting volume %d from %s ...\n", replica.info.Id, replica.location.dataNode.Id)
  212. if !takeAction {
  213. break
  214. }
  215. if doCheck {
  216. for _, replicaB := range replicas {
  217. if replicaB.location.dataNode == replica.location.dataNode {
  218. continue
  219. }
  220. if err := checkOneVolume(replica, replicaB, writer, commandEnv.option.GrpcDialOption); err != nil {
  221. return fmt.Errorf("sync volume %d on %s and %s: %v\n", replica.info.Id, replica.location.dataNode.Id, replicaB.location.dataNode.Id, err)
  222. }
  223. }
  224. }
  225. if err := deleteVolume(commandEnv.option.GrpcDialOption, needle.VolumeId(replica.info.Id),
  226. pb.NewServerAddressFromDataNode(replica.location.dataNode), false); err != nil {
  227. return fmt.Errorf("deleting volume %d from %s : %v", replica.info.Id, replica.location.dataNode.Id, err)
  228. }
  229. }
  230. return nil
  231. }
  232. func (c *commandVolumeFixReplication) fixUnderReplicatedVolumes(commandEnv *CommandEnv, writer io.Writer, takeAction bool, underReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location, retryCount int, volumesPerStep int) (fixedVolumes map[string]int, err error) {
  233. fixedVolumes = map[string]int{}
  234. if len(underReplicatedVolumeIds) > volumesPerStep && volumesPerStep > 0 {
  235. underReplicatedVolumeIds = underReplicatedVolumeIds[0:volumesPerStep]
  236. }
  237. for _, vid := range underReplicatedVolumeIds {
  238. for i := 0; i < retryCount+1; i++ {
  239. if err = c.fixOneUnderReplicatedVolume(commandEnv, writer, takeAction, volumeReplicas, vid, allLocations); err == nil {
  240. if takeAction {
  241. fixedVolumes[strconv.FormatUint(uint64(vid), 10)] = len(volumeReplicas[vid])
  242. }
  243. break
  244. } else {
  245. fmt.Fprintf(writer, "fixing under replicated volume %d: %v\n", vid, err)
  246. }
  247. }
  248. }
  249. return fixedVolumes, nil
  250. }
  251. func (c *commandVolumeFixReplication) fixOneUnderReplicatedVolume(commandEnv *CommandEnv, writer io.Writer, takeAction bool, volumeReplicas map[uint32][]*VolumeReplica, vid uint32, allLocations []location) error {
  252. replicas := volumeReplicas[vid]
  253. replica := pickOneReplicaToCopyFrom(replicas)
  254. replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replica.info.ReplicaPlacement))
  255. foundNewLocation := false
  256. hasSkippedCollection := false
  257. keepDataNodesSorted(allLocations, types.ToDiskType(replica.info.DiskType))
  258. fn := capacityByFreeVolumeCount(types.ToDiskType(replica.info.DiskType))
  259. for _, dst := range allLocations {
  260. // check whether data nodes satisfy the constraints
  261. if fn(dst.dataNode) > 0 && satisfyReplicaPlacement(replicaPlacement, replicas, dst) {
  262. // check collection name pattern
  263. if *c.collectionPattern != "" {
  264. matched, err := filepath.Match(*c.collectionPattern, replica.info.Collection)
  265. if err != nil {
  266. return fmt.Errorf("match pattern %s with collection %s: %v", *c.collectionPattern, replica.info.Collection, err)
  267. }
  268. if !matched {
  269. hasSkippedCollection = true
  270. break
  271. }
  272. }
  273. // ask the volume server to replicate the volume
  274. foundNewLocation = true
  275. fmt.Fprintf(writer, "replicating volume %d %s from %s to dataNode %s ...\n", replica.info.Id, replicaPlacement, replica.location.dataNode.Id, dst.dataNode.Id)
  276. if !takeAction {
  277. // adjust volume count
  278. addVolumeCount(dst.dataNode.DiskInfos[replica.info.DiskType], 1)
  279. break
  280. }
  281. err := operation.WithVolumeServerClient(false, pb.NewServerAddressFromDataNode(dst.dataNode), commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  282. stream, replicateErr := volumeServerClient.VolumeCopy(context.Background(), &volume_server_pb.VolumeCopyRequest{
  283. VolumeId: replica.info.Id,
  284. SourceDataNode: string(pb.NewServerAddressFromDataNode(replica.location.dataNode)),
  285. })
  286. if replicateErr != nil {
  287. return fmt.Errorf("copying from %s => %s : %v", replica.location.dataNode.Id, dst.dataNode.Id, replicateErr)
  288. }
  289. for {
  290. resp, recvErr := stream.Recv()
  291. if recvErr != nil {
  292. if recvErr == io.EOF {
  293. break
  294. } else {
  295. return recvErr
  296. }
  297. }
  298. if resp.ProcessedBytes > 0 {
  299. fmt.Fprintf(writer, "volume %d processed %d bytes\n", replica.info.Id, resp.ProcessedBytes)
  300. }
  301. }
  302. return nil
  303. })
  304. if err != nil {
  305. return err
  306. }
  307. // adjust volume count
  308. addVolumeCount(dst.dataNode.DiskInfos[replica.info.DiskType], 1)
  309. break
  310. }
  311. }
  312. if !foundNewLocation && !hasSkippedCollection {
  313. fmt.Fprintf(writer, "failed to place volume %d replica as %s, existing:%+v\n", replica.info.Id, replicaPlacement, len(replicas))
  314. }
  315. return nil
  316. }
  317. func addVolumeCount(info *master_pb.DiskInfo, count int) {
  318. if info == nil {
  319. return
  320. }
  321. info.VolumeCount += int64(count)
  322. info.FreeVolumeCount -= int64(count)
  323. }
  324. func keepDataNodesSorted(dataNodes []location, diskType types.DiskType) {
  325. fn := capacityByFreeVolumeCount(diskType)
  326. slices.SortFunc(dataNodes, func(a, b location) int {
  327. return int(fn(b.dataNode) - fn(a.dataNode))
  328. })
  329. }
  330. func satisfyReplicaCurrentLocation(replicaPlacement *super_block.ReplicaPlacement, replicas []*VolumeReplica) bool {
  331. existingDataCenters, existingRacks, _ := countReplicas(replicas)
  332. if replicaPlacement.DiffDataCenterCount+1 > len(existingDataCenters) {
  333. return false
  334. }
  335. if replicaPlacement.DiffRackCount+1 > len(existingRacks) {
  336. return false
  337. }
  338. if replicaPlacement.SameRackCount > 0 {
  339. foundSatisfyRack := false
  340. for _, rackCount := range existingRacks {
  341. if rackCount >= replicaPlacement.SameRackCount+1 {
  342. foundSatisfyRack = true
  343. }
  344. }
  345. return foundSatisfyRack
  346. }
  347. return true
  348. }
  349. /*
  350. if on an existing data node {
  351. return false
  352. }
  353. if different from existing dcs {
  354. if lack on different dcs {
  355. return true
  356. }else{
  357. return false
  358. }
  359. }
  360. if not on primary dc {
  361. return false
  362. }
  363. if different from existing racks {
  364. if lack on different racks {
  365. return true
  366. }else{
  367. return false
  368. }
  369. }
  370. if not on primary rack {
  371. return false
  372. }
  373. if lacks on same rack {
  374. return true
  375. } else {
  376. return false
  377. }
  378. */
  379. func satisfyReplicaPlacement(replicaPlacement *super_block.ReplicaPlacement, replicas []*VolumeReplica, possibleLocation location) bool {
  380. existingDataCenters, _, existingDataNodes := countReplicas(replicas)
  381. if _, found := existingDataNodes[possibleLocation.String()]; found {
  382. // avoid duplicated volume on the same data node
  383. return false
  384. }
  385. primaryDataCenters, _ := findTopKeys(existingDataCenters)
  386. // ensure data center count is within limit
  387. if _, found := existingDataCenters[possibleLocation.DataCenter()]; !found {
  388. // different from existing dcs
  389. if len(existingDataCenters) < replicaPlacement.DiffDataCenterCount+1 {
  390. // lack on different dcs
  391. return true
  392. } else {
  393. // adding this would go over the different dcs limit
  394. return false
  395. }
  396. }
  397. // now this is same as one of the existing data center
  398. if !isAmong(possibleLocation.DataCenter(), primaryDataCenters) {
  399. // not on one of the primary dcs
  400. return false
  401. }
  402. // now this is one of the primary dcs
  403. primaryDcRacks := make(map[string]int)
  404. for _, replica := range replicas {
  405. if replica.location.DataCenter() != possibleLocation.DataCenter() {
  406. continue
  407. }
  408. primaryDcRacks[replica.location.Rack()] += 1
  409. }
  410. primaryRacks, _ := findTopKeys(primaryDcRacks)
  411. sameRackCount := primaryDcRacks[possibleLocation.Rack()]
  412. // ensure rack count is within limit
  413. if _, found := primaryDcRacks[possibleLocation.Rack()]; !found {
  414. // different from existing racks
  415. if len(primaryDcRacks) < replicaPlacement.DiffRackCount+1 {
  416. // lack on different racks
  417. return true
  418. } else {
  419. // adding this would go over the different racks limit
  420. return false
  421. }
  422. }
  423. // now this is same as one of the existing racks
  424. if !isAmong(possibleLocation.Rack(), primaryRacks) {
  425. // not on the primary rack
  426. return false
  427. }
  428. // now this is on the primary rack
  429. // different from existing data nodes
  430. if sameRackCount < replicaPlacement.SameRackCount+1 {
  431. // lack on same rack
  432. return true
  433. } else {
  434. // adding this would go over the same data node limit
  435. return false
  436. }
  437. }
  438. func findTopKeys(m map[string]int) (topKeys []string, max int) {
  439. for k, c := range m {
  440. if max < c {
  441. topKeys = topKeys[:0]
  442. topKeys = append(topKeys, k)
  443. max = c
  444. } else if max == c {
  445. topKeys = append(topKeys, k)
  446. }
  447. }
  448. return
  449. }
  450. func isAmong(key string, keys []string) bool {
  451. for _, k := range keys {
  452. if k == key {
  453. return true
  454. }
  455. }
  456. return false
  457. }
  458. type VolumeReplica struct {
  459. location *location
  460. info *master_pb.VolumeInformationMessage
  461. }
  462. type location struct {
  463. dc string
  464. rack string
  465. dataNode *master_pb.DataNodeInfo
  466. }
  467. func newLocation(dc, rack string, dataNode *master_pb.DataNodeInfo) location {
  468. return location{
  469. dc: dc,
  470. rack: rack,
  471. dataNode: dataNode,
  472. }
  473. }
  474. func (l location) String() string {
  475. return fmt.Sprintf("%s %s %s", l.dc, l.rack, l.dataNode.Id)
  476. }
  477. func (l location) Rack() string {
  478. return fmt.Sprintf("%s %s", l.dc, l.rack)
  479. }
  480. func (l location) DataCenter() string {
  481. return l.dc
  482. }
  483. func pickOneReplicaToCopyFrom(replicas []*VolumeReplica) *VolumeReplica {
  484. mostRecent := replicas[0]
  485. for _, replica := range replicas {
  486. if replica.info.ModifiedAtSecond > mostRecent.info.ModifiedAtSecond {
  487. mostRecent = replica
  488. }
  489. }
  490. return mostRecent
  491. }
  492. func countReplicas(replicas []*VolumeReplica) (diffDc, diffRack, diffNode map[string]int) {
  493. diffDc = make(map[string]int)
  494. diffRack = make(map[string]int)
  495. diffNode = make(map[string]int)
  496. for _, replica := range replicas {
  497. diffDc[replica.location.DataCenter()] += 1
  498. diffRack[replica.location.Rack()] += 1
  499. diffNode[replica.location.String()] += 1
  500. }
  501. return
  502. }
  503. func pickOneReplicaToDelete(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) *VolumeReplica {
  504. slices.SortFunc(replicas, func(a, b *VolumeReplica) int {
  505. if a.info.Size != b.info.Size {
  506. return int(a.info.Size - b.info.Size)
  507. }
  508. if a.info.ModifiedAtSecond != b.info.ModifiedAtSecond {
  509. return int(a.info.ModifiedAtSecond - b.info.ModifiedAtSecond)
  510. }
  511. if a.info.CompactRevision != b.info.CompactRevision {
  512. return int(a.info.CompactRevision - b.info.CompactRevision)
  513. }
  514. return 0
  515. })
  516. return replicas[0]
  517. }
  518. // check and fix misplaced volumes
  519. func isMisplaced(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) bool {
  520. for i := 0; i < len(replicas); i++ {
  521. others := otherThan(replicas, i)
  522. if !satisfyReplicaPlacement(replicaPlacement, others, *replicas[i].location) {
  523. return true
  524. }
  525. }
  526. return false
  527. }
  528. func otherThan(replicas []*VolumeReplica, index int) (others []*VolumeReplica) {
  529. for i := 0; i < len(replicas); i++ {
  530. if index != i {
  531. others = append(others, replicas[i])
  532. }
  533. }
  534. return
  535. }
  536. func pickOneMisplacedVolume(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) (toDelete *VolumeReplica) {
  537. var deletionCandidates []*VolumeReplica
  538. for i := 0; i < len(replicas); i++ {
  539. others := otherThan(replicas, i)
  540. if !isMisplaced(others, replicaPlacement) {
  541. deletionCandidates = append(deletionCandidates, replicas[i])
  542. }
  543. }
  544. if len(deletionCandidates) > 0 {
  545. return pickOneReplicaToDelete(deletionCandidates, replicaPlacement)
  546. }
  547. return pickOneReplicaToDelete(replicas, replicaPlacement)
  548. }