123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364 |
- package topology
- import (
- "context"
- "io"
- "sync"
- "sync/atomic"
- "time"
- "github.com/seaweedfs/seaweedfs/weed/util"
- "github.com/seaweedfs/seaweedfs/weed/pb"
- "google.golang.org/grpc"
- "github.com/seaweedfs/seaweedfs/weed/storage/needle"
- "github.com/seaweedfs/seaweedfs/weed/glog"
- "github.com/seaweedfs/seaweedfs/weed/operation"
- "github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
- )
- func (t *Topology) batchVacuumVolumeCheck(grpcDialOption grpc.DialOption, vid needle.VolumeId,
- locationlist *VolumeLocationList, garbageThreshold float64) (*VolumeLocationList, bool) {
- ch := make(chan int, locationlist.Length())
- errCount := int32(0)
- for index, dn := range locationlist.list {
- go func(index int, url pb.ServerAddress, vid needle.VolumeId) {
- err := operation.WithVolumeServerClient(false, url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
- resp, err := volumeServerClient.VacuumVolumeCheck(context.Background(), &volume_server_pb.VacuumVolumeCheckRequest{
- VolumeId: uint32(vid),
- })
- if err != nil {
- atomic.AddInt32(&errCount, 1)
- ch <- -1
- return err
- }
- if resp.GarbageRatio >= garbageThreshold {
- ch <- index
- } else {
- ch <- -1
- }
- return nil
- })
- if err != nil {
- glog.V(0).Infof("Checking vacuuming %d on %s: %v", vid, url, err)
- }
- }(index, dn.ServerAddress(), vid)
- }
- vacuumLocationList := NewVolumeLocationList()
- waitTimeout := time.NewTimer(time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1))
- defer waitTimeout.Stop()
- for range locationlist.list {
- select {
- case index := <-ch:
- if index != -1 {
- vacuumLocationList.list = append(vacuumLocationList.list, locationlist.list[index])
- }
- case <-waitTimeout.C:
- return vacuumLocationList, false
- }
- }
- return vacuumLocationList, errCount == 0 && len(vacuumLocationList.list) > 0
- }
- func (t *Topology) batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId,
- locationlist *VolumeLocationList, preallocate int64) bool {
- vl.accessLock.Lock()
- vl.removeFromWritable(vid)
- vl.accessLock.Unlock()
- ch := make(chan bool, locationlist.Length())
- for index, dn := range locationlist.list {
- go func(index int, url pb.ServerAddress, vid needle.VolumeId) {
- glog.V(0).Infoln(index, "Start vacuuming", vid, "on", url)
- err := operation.WithVolumeServerClient(true, url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
- stream, err := volumeServerClient.VacuumVolumeCompact(context.Background(), &volume_server_pb.VacuumVolumeCompactRequest{
- VolumeId: uint32(vid),
- Preallocate: preallocate,
- })
- if err != nil {
- return err
- }
- for {
- resp, recvErr := stream.Recv()
- if recvErr != nil {
- if recvErr == io.EOF {
- break
- } else {
- return recvErr
- }
- }
- glog.V(0).Infof("%d vacuum %d on %s processed %d bytes, loadAvg %.02f%%",
- index, vid, url, resp.ProcessedBytes, resp.LoadAvg_1M*100)
- }
- return nil
- })
- if err != nil {
- glog.Errorf("Error when vacuuming %d on %s: %v", vid, url, err)
- ch <- false
- } else {
- glog.V(0).Infof("Complete vacuuming %d on %s", vid, url)
- ch <- true
- }
- }(index, dn.ServerAddress(), vid)
- }
- isVacuumSuccess := true
- waitTimeout := time.NewTimer(3 * time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1))
- defer waitTimeout.Stop()
- for range locationlist.list {
- select {
- case canCommit := <-ch:
- isVacuumSuccess = isVacuumSuccess && canCommit
- case <-waitTimeout.C:
- return false
- }
- }
- return isVacuumSuccess
- }
- func (t *Topology) batchVacuumVolumeCommit(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, vacuumLocationList, locationList *VolumeLocationList) bool {
- isCommitSuccess := true
- isReadOnly := false
- isFullCapacity := false
- for _, dn := range vacuumLocationList.list {
- glog.V(0).Infoln("Start Committing vacuum", vid, "on", dn.Url())
- err := operation.WithVolumeServerClient(false, dn.ServerAddress(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
- resp, err := volumeServerClient.VacuumVolumeCommit(context.Background(), &volume_server_pb.VacuumVolumeCommitRequest{
- VolumeId: uint32(vid),
- })
- if resp != nil {
- if resp.IsReadOnly {
- isReadOnly = true
- }
- if resp.VolumeSize > t.volumeSizeLimit {
- isFullCapacity = true
- }
- }
- return err
- })
- if err != nil {
- glog.Errorf("Error when committing vacuum %d on %s: %v", vid, dn.Url(), err)
- isCommitSuccess = false
- } else {
- glog.V(0).Infof("Complete Committing vacuum %d on %s", vid, dn.Url())
- }
- }
- //we should check the status of all replicas
- if len(locationList.list) > len(vacuumLocationList.list) {
- for _, dn := range locationList.list {
- isFound := false
- for _, dnVaccum := range vacuumLocationList.list {
- if dn.id == dnVaccum.id {
- isFound = true
- break
- }
- }
- if !isFound {
- err := operation.WithVolumeServerClient(false, dn.ServerAddress(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
- resp, err := volumeServerClient.VolumeStatus(context.Background(), &volume_server_pb.VolumeStatusRequest{
- VolumeId: uint32(vid),
- })
- if resp != nil {
- if resp.IsReadOnly {
- isReadOnly = true
- }
- if resp.VolumeSize > t.volumeSizeLimit {
- isFullCapacity = true
- }
- }
- return err
- })
- if err != nil {
- glog.Errorf("Error when checking volume %d status on %s: %v", vid, dn.Url(), err)
- //we mark volume read-only, since the volume state is unknown
- isReadOnly = true
- }
- }
- }
- }
- if isCommitSuccess {
- //record vacuum time of volume
- vl.accessLock.Lock()
- vl.vacuumedVolumes[vid] = time.Now()
- vl.accessLock.Unlock()
- for _, dn := range vacuumLocationList.list {
- vl.SetVolumeAvailable(dn, vid, isReadOnly, isFullCapacity)
- }
- }
- return isCommitSuccess
- }
- func (t *Topology) batchVacuumVolumeCleanup(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) {
- for _, dn := range locationlist.list {
- glog.V(0).Infoln("Start cleaning up", vid, "on", dn.Url())
- err := operation.WithVolumeServerClient(false, dn.ServerAddress(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
- _, err := volumeServerClient.VacuumVolumeCleanup(context.Background(), &volume_server_pb.VacuumVolumeCleanupRequest{
- VolumeId: uint32(vid),
- })
- return err
- })
- if err != nil {
- glog.Errorf("Error when cleaning up vacuum %d on %s: %v", vid, dn.Url(), err)
- } else {
- glog.V(0).Infof("Complete cleaning up vacuum %d on %s", vid, dn.Url())
- }
- }
- }
- func (t *Topology) Vacuum(grpcDialOption grpc.DialOption, garbageThreshold float64, maxParallelVacuumPerServer int, volumeId uint32, collection string, preallocate int64, automatic bool) {
- // if there is vacuum going on, return immediately
- swapped := atomic.CompareAndSwapInt64(&t.vacuumLockCounter, 0, 1)
- if !swapped {
- glog.V(0).Infof("Vacuum is already running")
- return
- }
- defer atomic.StoreInt64(&t.vacuumLockCounter, 0)
- // now only one vacuum process going on
- glog.V(1).Infof("Start vacuum on demand with threshold: %f collection: %s volumeId: %d",
- garbageThreshold, collection, volumeId)
- for _, col := range t.collectionMap.Items() {
- c := col.(*Collection)
- if collection != "" && collection != c.Name {
- continue
- }
- for _, vl := range c.storageType2VolumeLayout.Items() {
- if vl != nil {
- volumeLayout := vl.(*VolumeLayout)
- if volumeId > 0 {
- vid := needle.VolumeId(volumeId)
- volumeLayout.accessLock.RLock()
- locationList, ok := volumeLayout.vid2location[vid]
- volumeLayout.accessLock.RUnlock()
- if ok {
- t.vacuumOneVolumeId(grpcDialOption, volumeLayout, c, garbageThreshold, locationList, vid, preallocate)
- }
- } else {
- t.vacuumOneVolumeLayout(grpcDialOption, volumeLayout, c, garbageThreshold, maxParallelVacuumPerServer, preallocate, automatic)
- }
- }
- if automatic && t.isDisableVacuum {
- break
- }
- }
- if automatic && t.isDisableVacuum {
- glog.V(0).Infof("Vacuum is disabled")
- break
- }
- }
- }
- func (t *Topology) vacuumOneVolumeLayout(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, maxParallelVacuumPerServer int, preallocate int64, automatic bool) {
- volumeLayout.accessLock.RLock()
- todoVolumeMap := make(map[needle.VolumeId]*VolumeLocationList)
- for vid, locationList := range volumeLayout.vid2location {
- todoVolumeMap[vid] = locationList.Copy()
- }
- volumeLayout.accessLock.RUnlock()
- // limiter for each volume server
- limiter := make(map[NodeId]int)
- var limiterLock sync.Mutex
- for _, locationList := range todoVolumeMap {
- for _, dn := range locationList.list {
- if _, ok := limiter[dn.Id()]; !ok {
- limiter[dn.Id()] = maxParallelVacuumPerServer
- }
- }
- }
- executor := util.NewLimitedConcurrentExecutor(100)
- var wg sync.WaitGroup
- for len(todoVolumeMap) > 0 {
- pendingVolumeMap := make(map[needle.VolumeId]*VolumeLocationList)
- for vid, locationList := range todoVolumeMap {
- hasEnoughQuota := true
- for _, dn := range locationList.list {
- limiterLock.Lock()
- quota := limiter[dn.Id()]
- limiterLock.Unlock()
- if quota <= 0 {
- hasEnoughQuota = false
- break
- }
- }
- if !hasEnoughQuota {
- pendingVolumeMap[vid] = locationList
- continue
- }
- // debit the quota
- for _, dn := range locationList.list {
- limiterLock.Lock()
- limiter[dn.Id()]--
- limiterLock.Unlock()
- }
- wg.Add(1)
- executor.Execute(func() {
- defer wg.Done()
- t.vacuumOneVolumeId(grpcDialOption, volumeLayout, c, garbageThreshold, locationList, vid, preallocate)
- // credit the quota
- for _, dn := range locationList.list {
- limiterLock.Lock()
- limiter[dn.Id()]++
- limiterLock.Unlock()
- }
- })
- if automatic && t.isDisableVacuum {
- break
- }
- }
- if automatic && t.isDisableVacuum {
- break
- }
- if len(todoVolumeMap) == len(pendingVolumeMap) {
- time.Sleep(10 * time.Second)
- }
- todoVolumeMap = pendingVolumeMap
- }
- wg.Wait()
- }
- func (t *Topology) vacuumOneVolumeId(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, locationList *VolumeLocationList, vid needle.VolumeId, preallocate int64) {
- volumeLayout.accessLock.RLock()
- isReadOnly := volumeLayout.readonlyVolumes.IsTrue(vid)
- isEnoughCopies := volumeLayout.enoughCopies(vid)
- volumeLayout.accessLock.RUnlock()
- if isReadOnly {
- return
- }
- if !isEnoughCopies {
- glog.Warningf("skip vacuuming: not enough copies for volume:%d", vid)
- return
- }
- glog.V(1).Infof("check vacuum on collection:%s volume:%d", c.Name, vid)
- if vacuumLocationList, needVacuum := t.batchVacuumVolumeCheck(
- grpcDialOption, vid, locationList, garbageThreshold); needVacuum {
- if t.batchVacuumVolumeCompact(grpcDialOption, volumeLayout, vid, vacuumLocationList, preallocate) {
- t.batchVacuumVolumeCommit(grpcDialOption, volumeLayout, vid, vacuumLocationList, locationList)
- } else {
- t.batchVacuumVolumeCleanup(grpcDialOption, volumeLayout, vid, vacuumLocationList)
- }
- }
- }
|