topology.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. package topology
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "math/rand"
  7. "sync"
  8. "time"
  9. "github.com/seaweedfs/seaweedfs/weed/pb"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  11. hashicorpRaft "github.com/hashicorp/raft"
  12. "github.com/seaweedfs/raft"
  13. "github.com/seaweedfs/seaweedfs/weed/glog"
  14. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  15. "github.com/seaweedfs/seaweedfs/weed/sequence"
  16. "github.com/seaweedfs/seaweedfs/weed/storage"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  18. "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
  19. "github.com/seaweedfs/seaweedfs/weed/util"
  20. )
  21. type Topology struct {
  22. vacuumLockCounter int64
  23. NodeImpl
  24. collectionMap *util.ConcurrentReadMap
  25. ecShardMap map[needle.VolumeId]*EcShardLocations
  26. ecShardMapLock sync.RWMutex
  27. pulse int64
  28. volumeSizeLimit uint64
  29. replicationAsMin bool
  30. isDisableVacuum bool
  31. Sequence sequence.Sequencer
  32. chanFullVolumes chan storage.VolumeInfo
  33. chanCrowdedVolumes chan storage.VolumeInfo
  34. Configuration *Configuration
  35. RaftServer raft.Server
  36. RaftServerAccessLock sync.RWMutex
  37. HashicorpRaft *hashicorpRaft.Raft
  38. UuidAccessLock sync.RWMutex
  39. UuidMap map[string][]string
  40. }
  41. func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int, replicationAsMin bool) *Topology {
  42. t := &Topology{}
  43. t.id = NodeId(id)
  44. t.nodeType = "Topology"
  45. t.NodeImpl.value = t
  46. t.diskUsages = newDiskUsages()
  47. t.children = make(map[NodeId]Node)
  48. t.collectionMap = util.NewConcurrentReadMap()
  49. t.ecShardMap = make(map[needle.VolumeId]*EcShardLocations)
  50. t.pulse = int64(pulse)
  51. t.volumeSizeLimit = volumeSizeLimit
  52. t.replicationAsMin = replicationAsMin
  53. t.Sequence = seq
  54. t.chanFullVolumes = make(chan storage.VolumeInfo)
  55. t.chanCrowdedVolumes = make(chan storage.VolumeInfo)
  56. t.Configuration = &Configuration{}
  57. return t
  58. }
  59. func (t *Topology) IsLeader() bool {
  60. t.RaftServerAccessLock.RLock()
  61. defer t.RaftServerAccessLock.RUnlock()
  62. if t.RaftServer != nil {
  63. if t.RaftServer.State() == raft.Leader {
  64. return true
  65. }
  66. if leader, err := t.Leader(); err == nil {
  67. if pb.ServerAddress(t.RaftServer.Name()) == leader {
  68. return true
  69. }
  70. }
  71. } else if t.HashicorpRaft != nil {
  72. if t.HashicorpRaft.State() == hashicorpRaft.Leader {
  73. return true
  74. }
  75. }
  76. return false
  77. }
  78. func (t *Topology) Leader() (l pb.ServerAddress, err error) {
  79. for count := 0; count < 3; count++ {
  80. l, err = t.MaybeLeader()
  81. if err != nil {
  82. return
  83. }
  84. if l != "" {
  85. break
  86. }
  87. time.Sleep(time.Duration(5+count) * time.Second)
  88. }
  89. return
  90. }
  91. func (t *Topology) MaybeLeader() (l pb.ServerAddress, err error) {
  92. t.RaftServerAccessLock.RLock()
  93. defer t.RaftServerAccessLock.RUnlock()
  94. if t.RaftServer != nil {
  95. l = pb.ServerAddress(t.RaftServer.Leader())
  96. } else if t.HashicorpRaft != nil {
  97. l = pb.ServerAddress(t.HashicorpRaft.Leader())
  98. } else {
  99. err = errors.New("Raft Server not ready yet!")
  100. }
  101. return
  102. }
  103. func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*DataNode) {
  104. // maybe an issue if lots of collections?
  105. if collection == "" {
  106. for _, c := range t.collectionMap.Items() {
  107. if list := c.(*Collection).Lookup(vid); list != nil {
  108. return list
  109. }
  110. }
  111. } else {
  112. if c, ok := t.collectionMap.Find(collection); ok {
  113. return c.(*Collection).Lookup(vid)
  114. }
  115. }
  116. if locations, found := t.LookupEcShards(vid); found {
  117. for _, loc := range locations.Locations {
  118. dataNodes = append(dataNodes, loc...)
  119. }
  120. return dataNodes
  121. }
  122. return nil
  123. }
  124. func (t *Topology) NextVolumeId() (needle.VolumeId, error) {
  125. vid := t.GetMaxVolumeId()
  126. next := vid.Next()
  127. t.RaftServerAccessLock.RLock()
  128. defer t.RaftServerAccessLock.RUnlock()
  129. if t.RaftServer != nil {
  130. if _, err := t.RaftServer.Do(NewMaxVolumeIdCommand(next)); err != nil {
  131. return 0, err
  132. }
  133. } else if t.HashicorpRaft != nil {
  134. b, err := json.Marshal(NewMaxVolumeIdCommand(next))
  135. if err != nil {
  136. return 0, fmt.Errorf("failed marshal NewMaxVolumeIdCommand: %+v", err)
  137. }
  138. if future := t.HashicorpRaft.Apply(b, time.Second); future.Error() != nil {
  139. return 0, future.Error()
  140. }
  141. }
  142. return next, nil
  143. }
  144. // deprecated
  145. func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool {
  146. vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
  147. active, _ := vl.GetActiveVolumeCount(option)
  148. return active > 0
  149. }
  150. func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *VolumeLocationList, error) {
  151. vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType).PickForWrite(count, option)
  152. if err != nil {
  153. return "", 0, nil, fmt.Errorf("failed to find writable volumes for collection:%s replication:%s ttl:%s error: %v", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String(), err)
  154. }
  155. if datanodes.Length() == 0 {
  156. return "", 0, nil, fmt.Errorf("no writable volumes available for collection:%s replication:%s ttl:%s", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String())
  157. }
  158. fileId := t.Sequence.NextFileId(count)
  159. return needle.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes, nil
  160. }
  161. func (t *Topology) GetVolumeLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) *VolumeLayout {
  162. return t.collectionMap.Get(collectionName, func() interface{} {
  163. return NewCollection(collectionName, t.volumeSizeLimit, t.replicationAsMin)
  164. }).(*Collection).GetOrCreateVolumeLayout(rp, ttl, diskType)
  165. }
  166. func (t *Topology) ListCollections(includeNormalVolumes, includeEcVolumes bool) (ret []string) {
  167. mapOfCollections := make(map[string]bool)
  168. for _, c := range t.collectionMap.Items() {
  169. mapOfCollections[c.(*Collection).Name] = true
  170. }
  171. if includeEcVolumes {
  172. t.ecShardMapLock.RLock()
  173. for _, ecVolumeLocation := range t.ecShardMap {
  174. mapOfCollections[ecVolumeLocation.Collection] = true
  175. }
  176. t.ecShardMapLock.RUnlock()
  177. }
  178. for k := range mapOfCollections {
  179. ret = append(ret, k)
  180. }
  181. return ret
  182. }
  183. func (t *Topology) FindCollection(collectionName string) (*Collection, bool) {
  184. c, hasCollection := t.collectionMap.Find(collectionName)
  185. if !hasCollection {
  186. return nil, false
  187. }
  188. return c.(*Collection), hasCollection
  189. }
  190. func (t *Topology) DeleteCollection(collectionName string) {
  191. t.collectionMap.Delete(collectionName)
  192. }
  193. func (t *Topology) DeleteLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) {
  194. collection, found := t.FindCollection(collectionName)
  195. if !found {
  196. return
  197. }
  198. collection.DeleteVolumeLayout(rp, ttl, diskType)
  199. if len(collection.storageType2VolumeLayout.Items()) == 0 {
  200. t.DeleteCollection(collectionName)
  201. }
  202. }
  203. func (t *Topology) RegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
  204. diskType := types.ToDiskType(v.DiskType)
  205. vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  206. vl.RegisterVolume(&v, dn)
  207. vl.EnsureCorrectWritables(&v)
  208. }
  209. func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
  210. glog.Infof("removing volume info: %+v from %v", v, dn.id)
  211. diskType := types.ToDiskType(v.DiskType)
  212. volumeLayout := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  213. volumeLayout.UnRegisterVolume(&v, dn)
  214. if volumeLayout.isEmpty() {
  215. t.DeleteLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  216. }
  217. }
  218. func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter {
  219. t.Lock()
  220. defer t.Unlock()
  221. for _, c := range t.children {
  222. dc := c.(*DataCenter)
  223. if string(dc.Id()) == dcName {
  224. return dc
  225. }
  226. }
  227. dc := NewDataCenter(dcName)
  228. t.doLinkChildNode(dc)
  229. return dc
  230. }
  231. func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformationMessage, dn *DataNode) (newVolumes, deletedVolumes []storage.VolumeInfo) {
  232. // convert into in memory struct storage.VolumeInfo
  233. var volumeInfos []storage.VolumeInfo
  234. for _, v := range volumes {
  235. if vi, err := storage.NewVolumeInfo(v); err == nil {
  236. volumeInfos = append(volumeInfos, vi)
  237. } else {
  238. glog.V(0).Infof("Fail to convert joined volume information: %v", err)
  239. }
  240. }
  241. // find out the delta volumes
  242. var changedVolumes []storage.VolumeInfo
  243. newVolumes, deletedVolumes, changedVolumes = dn.UpdateVolumes(volumeInfos)
  244. for _, v := range newVolumes {
  245. t.RegisterVolumeLayout(v, dn)
  246. }
  247. for _, v := range deletedVolumes {
  248. t.UnRegisterVolumeLayout(v, dn)
  249. }
  250. for _, v := range changedVolumes {
  251. diskType := types.ToDiskType(v.DiskType)
  252. vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
  253. vl.EnsureCorrectWritables(&v)
  254. }
  255. return
  256. }
  257. func (t *Topology) IncrementalSyncDataNodeRegistration(newVolumes, deletedVolumes []*master_pb.VolumeShortInformationMessage, dn *DataNode) {
  258. var newVis, oldVis []storage.VolumeInfo
  259. for _, v := range newVolumes {
  260. vi, err := storage.NewVolumeInfoFromShort(v)
  261. if err != nil {
  262. glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err)
  263. continue
  264. }
  265. newVis = append(newVis, vi)
  266. }
  267. for _, v := range deletedVolumes {
  268. vi, err := storage.NewVolumeInfoFromShort(v)
  269. if err != nil {
  270. glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err)
  271. continue
  272. }
  273. oldVis = append(oldVis, vi)
  274. }
  275. dn.DeltaUpdateVolumes(newVis, oldVis)
  276. for _, vi := range newVis {
  277. t.RegisterVolumeLayout(vi, dn)
  278. }
  279. for _, vi := range oldVis {
  280. t.UnRegisterVolumeLayout(vi, dn)
  281. }
  282. return
  283. }
  284. func (t *Topology) DataNodeRegistration(dcName, rackName string, dn *DataNode) {
  285. if dn.Parent() != nil {
  286. return
  287. }
  288. // registration to topo
  289. dc := t.GetOrCreateDataCenter(dcName)
  290. rack := dc.GetOrCreateRack(rackName)
  291. rack.LinkChildNode(dn)
  292. glog.Infof("[%s] reLink To topo ", dn.Id())
  293. }
  294. func (t *Topology) DisableVacuum() {
  295. glog.V(0).Infof("DisableVacuum")
  296. t.isDisableVacuum = true
  297. }
  298. func (t *Topology) EnableVacuum() {
  299. glog.V(0).Infof("EnableVacuum")
  300. t.isDisableVacuum = false
  301. }