topology.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. package topology
  2. import (
  3. "errors"
  4. "fmt"
  5. "github.com/chrislusf/seaweedfs/weed/storage/types"
  6. "math/rand"
  7. "sync"
  8. "time"
  9. "github.com/chrislusf/raft"
  10. "github.com/chrislusf/seaweedfs/weed/glog"
  11. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  12. "github.com/chrislusf/seaweedfs/weed/sequence"
  13. "github.com/chrislusf/seaweedfs/weed/storage"
  14. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  15. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  16. "github.com/chrislusf/seaweedfs/weed/util"
  17. )
  18. type Topology struct {
  19. vacuumLockCounter int64
  20. NodeImpl
  21. collectionMap *util.ConcurrentReadMap
  22. ecShardMap map[needle.VolumeId]*EcShardLocations
  23. ecShardMapLock sync.RWMutex
  24. pulse int64
  25. volumeSizeLimit uint64
  26. replicationAsMin bool
  27. Sequence sequence.Sequencer
  28. chanFullVolumes chan storage.VolumeInfo
  29. chanCrowdedVolumes chan storage.VolumeInfo
  30. Configuration *Configuration
  31. RaftServer raft.Server
  32. }
  33. func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int, replicationAsMin bool) *Topology {
  34. t := &Topology{}
  35. t.id = NodeId(id)
  36. t.nodeType = "Topology"
  37. t.NodeImpl.value = t
  38. t.diskUsages = newDiskUsages()
  39. t.children = make(map[NodeId]Node)
  40. t.collectionMap = util.NewConcurrentReadMap()
  41. t.ecShardMap = make(map[needle.VolumeId]*EcShardLocations)
  42. t.pulse = int64(pulse)
  43. t.volumeSizeLimit = volumeSizeLimit
  44. t.replicationAsMin = replicationAsMin
  45. t.Sequence = seq
  46. t.chanFullVolumes = make(chan storage.VolumeInfo)
  47. t.chanCrowdedVolumes = make(chan storage.VolumeInfo)
  48. t.Configuration = &Configuration{}
  49. return t
  50. }
  51. func (t *Topology) IsLeader() bool {
  52. if t.RaftServer != nil {
  53. if t.RaftServer.State() == raft.Leader {
  54. return true
  55. }
  56. if leader, err := t.Leader(); err == nil {
  57. if t.RaftServer.Name() == leader {
  58. return true
  59. }
  60. }
  61. }
  62. return false
  63. }
  64. func (t *Topology) Leader() (string, error) {
  65. l := ""
  66. for count := 0; count < 3; count++ {
  67. if t.RaftServer != nil {
  68. l = t.RaftServer.Leader()
  69. } else {
  70. return "", errors.New("Raft Server not ready yet!")
  71. }
  72. if l != "" {
  73. break
  74. } else {
  75. time.Sleep(time.Duration(5+count) * time.Second)
  76. }
  77. }
  78. return l, nil
  79. }
  80. func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*DataNode) {
  81. // maybe an issue if lots of collections?
  82. if collection == "" {
  83. for _, c := range t.collectionMap.Items() {
  84. if list := c.(*Collection).Lookup(vid); list != nil {
  85. return list
  86. }
  87. }
  88. } else {
  89. if c, ok := t.collectionMap.Find(collection); ok {
  90. return c.(*Collection).Lookup(vid)
  91. }
  92. }
  93. if locations, found := t.LookupEcShards(vid); found {
  94. for _, loc := range locations.Locations {
  95. dataNodes = append(dataNodes, loc...)
  96. }
  97. return dataNodes
  98. }
  99. return nil
  100. }
  101. func (t *Topology) NextVolumeId() (needle.VolumeId, error) {
  102. vid := t.GetMaxVolumeId()
  103. next := vid.Next()
  104. if _, err := t.RaftServer.Do(NewMaxVolumeIdCommand(next)); err != nil {
  105. return 0, err
  106. }
  107. return next, nil
  108. }
  109. // deprecated
  110. func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool {
  111. vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
  112. active, _ := vl.GetActiveVolumeCount(option)
  113. return active > 0
  114. }
  115. func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) {
  116. vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType).PickForWrite(count, option)
  117. if err != nil {
  118. return "", 0, nil, fmt.Errorf("failed to find writable volumes for collection:%s replication:%s ttl:%s error: %v", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String(), err)
  119. }
  120. if datanodes.Length() == 0 {
  121. return "", 0, nil, fmt.Errorf("no writable volumes available for collection:%s replication:%s ttl:%s", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String())
  122. }
  123. fileId := t.Sequence.NextFileId(count)
  124. return needle.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes.Head(), nil
  125. }
  126. func (t *Topology) GetVolumeLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) *VolumeLayout {
  127. return t.collectionMap.Get(collectionName, func() interface{} {
  128. return NewCollection(collectionName, t.volumeSizeLimit, t.replicationAsMin)
  129. }).(*Collection).GetOrCreateVolumeLayout(rp, ttl, diskType)
  130. }
  131. func (t *Topology) ListCollections(includeNormalVolumes, includeEcVolumes bool) (ret []string) {
  132. mapOfCollections := make(map[string]bool)
  133. for _, c := range t.collectionMap.Items() {
  134. mapOfCollections[c.(*Collection).Name] = true
  135. }
  136. if includeEcVolumes {
  137. t.ecShardMapLock.RLock()
  138. for _, ecVolumeLocation := range t.ecShardMap {
  139. mapOfCollections[ecVolumeLocation.Collection] = true
  140. }
  141. t.ecShardMapLock.RUnlock()
  142. }
  143. for k := range mapOfCollections {
  144. ret = append(ret, k)
  145. }
  146. return ret
  147. }
  148. func (t *Topology) FindCollection(collectionName string) (*Collection, bool) {
  149. c, hasCollection := t.collectionMap.Find(collectionName)
  150. if !hasCollection {
  151. return nil, false
  152. }
  153. return c.(*Collection), hasCollection
  154. }
  155. func (t *Topology) DeleteCollection(collectionName string) {
  156. t.collectionMap.Delete(collectionName)
  157. }
  158. func (t *Topology) DeleteLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) {
  159. collection, found := t.FindCollection(collectionName)
  160. if !found {
  161. return
  162. }
  163. collection.DeleteVolumeLayout(rp, ttl, diskType)
  164. if len(collection.storageType2VolumeLayout.Items()) == 0 {
  165. t.DeleteCollection(collectionName)
  166. }
  167. }
  168. func (t *Topology) RegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
  169. diskType := types.ToDiskType(v.DiskType)
  170. vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  171. vl.RegisterVolume(&v, dn)
  172. vl.EnsureCorrectWritables(&v)
  173. }
  174. func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
  175. glog.Infof("removing volume info: %+v", v)
  176. diskType := types.ToDiskType(v.DiskType)
  177. volumeLayout := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  178. volumeLayout.UnRegisterVolume(&v, dn)
  179. if volumeLayout.isEmpty() {
  180. t.DeleteLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  181. }
  182. }
  183. func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter {
  184. for _, c := range t.Children() {
  185. dc := c.(*DataCenter)
  186. if string(dc.Id()) == dcName {
  187. return dc
  188. }
  189. }
  190. dc := NewDataCenter(dcName)
  191. t.LinkChildNode(dc)
  192. return dc
  193. }
  194. func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformationMessage, dn *DataNode) (newVolumes, deletedVolumes []storage.VolumeInfo) {
  195. // convert into in memory struct storage.VolumeInfo
  196. var volumeInfos []storage.VolumeInfo
  197. for _, v := range volumes {
  198. if vi, err := storage.NewVolumeInfo(v); err == nil {
  199. volumeInfos = append(volumeInfos, vi)
  200. } else {
  201. glog.V(0).Infof("Fail to convert joined volume information: %v", err)
  202. }
  203. }
  204. // find out the delta volumes
  205. var changedVolumes []storage.VolumeInfo
  206. newVolumes, deletedVolumes, changedVolumes = dn.UpdateVolumes(volumeInfos)
  207. for _, v := range newVolumes {
  208. t.RegisterVolumeLayout(v, dn)
  209. }
  210. for _, v := range deletedVolumes {
  211. t.UnRegisterVolumeLayout(v, dn)
  212. }
  213. for _, v := range changedVolumes {
  214. diskType := types.ToDiskType(v.DiskType)
  215. vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  216. vl.EnsureCorrectWritables(&v)
  217. }
  218. return
  219. }
  220. func (t *Topology) IncrementalSyncDataNodeRegistration(newVolumes, deletedVolumes []*master_pb.VolumeShortInformationMessage, dn *DataNode) {
  221. var newVis, oldVis []storage.VolumeInfo
  222. for _, v := range newVolumes {
  223. vi, err := storage.NewVolumeInfoFromShort(v)
  224. if err != nil {
  225. glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err)
  226. continue
  227. }
  228. newVis = append(newVis, vi)
  229. }
  230. for _, v := range deletedVolumes {
  231. vi, err := storage.NewVolumeInfoFromShort(v)
  232. if err != nil {
  233. glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err)
  234. continue
  235. }
  236. oldVis = append(oldVis, vi)
  237. }
  238. dn.DeltaUpdateVolumes(newVis, oldVis)
  239. for _, vi := range newVis {
  240. t.RegisterVolumeLayout(vi, dn)
  241. }
  242. for _, vi := range oldVis {
  243. t.UnRegisterVolumeLayout(vi, dn)
  244. }
  245. return
  246. }