topology_vacuum.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. package topology
  2. import (
  3. "context"
  4. "sync/atomic"
  5. "time"
  6. "google.golang.org/grpc"
  7. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  8. "github.com/chrislusf/seaweedfs/weed/glog"
  9. "github.com/chrislusf/seaweedfs/weed/operation"
  10. "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
  11. )
  12. func (t *Topology) batchVacuumVolumeCheck(grpcDialOption grpc.DialOption, vid needle.VolumeId,
  13. locationlist *VolumeLocationList, garbageThreshold float64) (*VolumeLocationList, bool) {
  14. ch := make(chan int, locationlist.Length())
  15. errCount := int32(0)
  16. for index, dn := range locationlist.list {
  17. go func(index int, url string, vid needle.VolumeId) {
  18. err := operation.WithVolumeServerClient(url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  19. resp, err := volumeServerClient.VacuumVolumeCheck(context.Background(), &volume_server_pb.VacuumVolumeCheckRequest{
  20. VolumeId: uint32(vid),
  21. })
  22. if err != nil {
  23. atomic.AddInt32(&errCount, 1)
  24. ch <- -1
  25. return err
  26. }
  27. if resp.GarbageRatio >= garbageThreshold {
  28. ch <- index
  29. } else {
  30. ch <- -1
  31. }
  32. return nil
  33. })
  34. if err != nil {
  35. glog.V(0).Infof("Checking vacuuming %d on %s: %v", vid, url, err)
  36. }
  37. }(index, dn.Url(), vid)
  38. }
  39. vacuumLocationList := NewVolumeLocationList()
  40. waitTimeout := time.NewTimer(time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1))
  41. defer waitTimeout.Stop()
  42. for range locationlist.list {
  43. select {
  44. case index := <-ch:
  45. if index != -1 {
  46. vacuumLocationList.list = append(vacuumLocationList.list, locationlist.list[index])
  47. }
  48. case <-waitTimeout.C:
  49. return vacuumLocationList, false
  50. }
  51. }
  52. return vacuumLocationList, errCount == 0 && len(vacuumLocationList.list) > 0
  53. }
  54. func (t *Topology) batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId,
  55. locationlist *VolumeLocationList, preallocate int64) bool {
  56. vl.accessLock.Lock()
  57. vl.removeFromWritable(vid)
  58. vl.accessLock.Unlock()
  59. ch := make(chan bool, locationlist.Length())
  60. for index, dn := range locationlist.list {
  61. go func(index int, url string, vid needle.VolumeId) {
  62. glog.V(0).Infoln(index, "Start vacuuming", vid, "on", url)
  63. err := operation.WithVolumeServerClient(url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  64. _, err := volumeServerClient.VacuumVolumeCompact(context.Background(), &volume_server_pb.VacuumVolumeCompactRequest{
  65. VolumeId: uint32(vid),
  66. Preallocate: preallocate,
  67. })
  68. return err
  69. })
  70. if err != nil {
  71. glog.Errorf("Error when vacuuming %d on %s: %v", vid, url, err)
  72. ch <- false
  73. } else {
  74. glog.V(0).Infof("Complete vacuuming %d on %s", vid, url)
  75. ch <- true
  76. }
  77. }(index, dn.Url(), vid)
  78. }
  79. isVacuumSuccess := true
  80. waitTimeout := time.NewTimer(3 * time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1))
  81. defer waitTimeout.Stop()
  82. for range locationlist.list {
  83. select {
  84. case canCommit := <-ch:
  85. isVacuumSuccess = isVacuumSuccess && canCommit
  86. case <-waitTimeout.C:
  87. return false
  88. }
  89. }
  90. return isVacuumSuccess
  91. }
  92. func (t *Topology) batchVacuumVolumeCommit(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) bool {
  93. isCommitSuccess := true
  94. isReadOnly := false
  95. for _, dn := range locationlist.list {
  96. glog.V(0).Infoln("Start Committing vacuum", vid, "on", dn.Url())
  97. err := operation.WithVolumeServerClient(dn.Url(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  98. resp, err := volumeServerClient.VacuumVolumeCommit(context.Background(), &volume_server_pb.VacuumVolumeCommitRequest{
  99. VolumeId: uint32(vid),
  100. })
  101. if resp != nil && resp.IsReadOnly {
  102. isReadOnly = true
  103. }
  104. return err
  105. })
  106. if err != nil {
  107. glog.Errorf("Error when committing vacuum %d on %s: %v", vid, dn.Url(), err)
  108. isCommitSuccess = false
  109. } else {
  110. glog.V(0).Infof("Complete Committing vacuum %d on %s", vid, dn.Url())
  111. }
  112. }
  113. if isCommitSuccess {
  114. for _, dn := range locationlist.list {
  115. vl.SetVolumeAvailable(dn, vid, isReadOnly)
  116. }
  117. }
  118. return isCommitSuccess
  119. }
  120. func (t *Topology) batchVacuumVolumeCleanup(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) {
  121. for _, dn := range locationlist.list {
  122. glog.V(0).Infoln("Start cleaning up", vid, "on", dn.Url())
  123. err := operation.WithVolumeServerClient(dn.Url(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
  124. _, err := volumeServerClient.VacuumVolumeCleanup(context.Background(), &volume_server_pb.VacuumVolumeCleanupRequest{
  125. VolumeId: uint32(vid),
  126. })
  127. return err
  128. })
  129. if err != nil {
  130. glog.Errorf("Error when cleaning up vacuum %d on %s: %v", vid, dn.Url(), err)
  131. } else {
  132. glog.V(0).Infof("Complete cleaning up vacuum %d on %s", vid, dn.Url())
  133. }
  134. }
  135. }
  136. func (t *Topology) Vacuum(grpcDialOption grpc.DialOption, garbageThreshold float64, preallocate int64) {
  137. // if there is vacuum going on, return immediately
  138. swapped := atomic.CompareAndSwapInt64(&t.vacuumLockCounter, 0, 1)
  139. if !swapped {
  140. return
  141. }
  142. defer atomic.StoreInt64(&t.vacuumLockCounter, 0)
  143. // now only one vacuum process going on
  144. glog.V(1).Infof("Start vacuum on demand with threshold: %f", garbageThreshold)
  145. for _, col := range t.collectionMap.Items() {
  146. c := col.(*Collection)
  147. for _, vl := range c.storageType2VolumeLayout.Items() {
  148. if vl != nil {
  149. volumeLayout := vl.(*VolumeLayout)
  150. t.vacuumOneVolumeLayout(grpcDialOption, volumeLayout, c, garbageThreshold, preallocate)
  151. }
  152. }
  153. }
  154. }
  155. func (t *Topology) vacuumOneVolumeLayout(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, preallocate int64) {
  156. volumeLayout.accessLock.RLock()
  157. tmpMap := make(map[needle.VolumeId]*VolumeLocationList)
  158. for vid, locationList := range volumeLayout.vid2location {
  159. tmpMap[vid] = locationList.Copy()
  160. }
  161. volumeLayout.accessLock.RUnlock()
  162. for vid, locationList := range tmpMap {
  163. volumeLayout.accessLock.RLock()
  164. isReadOnly := volumeLayout.readonlyVolumes.IsTrue(vid)
  165. volumeLayout.accessLock.RUnlock()
  166. if isReadOnly {
  167. continue
  168. }
  169. glog.V(2).Infof("check vacuum on collection:%s volume:%d", c.Name, vid)
  170. if vacuumLocationList, needVacuum := t.batchVacuumVolumeCheck(grpcDialOption, vid, locationList, garbageThreshold); needVacuum {
  171. if t.batchVacuumVolumeCompact(grpcDialOption, volumeLayout, vid, vacuumLocationList, preallocate) {
  172. t.batchVacuumVolumeCommit(grpcDialOption, volumeLayout, vid, vacuumLocationList)
  173. } else {
  174. t.batchVacuumVolumeCleanup(grpcDialOption, volumeLayout, vid, vacuumLocationList)
  175. }
  176. }
  177. }
  178. }