store.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. package storage
  2. import (
  3. "fmt"
  4. "github.com/chrislusf/seaweedfs/weed/util"
  5. "path/filepath"
  6. "strings"
  7. "sync/atomic"
  8. "google.golang.org/grpc"
  9. "github.com/chrislusf/seaweedfs/weed/glog"
  10. "github.com/chrislusf/seaweedfs/weed/pb"
  11. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  12. "github.com/chrislusf/seaweedfs/weed/stats"
  13. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  14. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  15. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  16. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  17. )
  18. const (
  19. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  20. )
  21. type ReadOption struct {
  22. ReadDeleted bool
  23. }
  24. /*
  25. * A VolumeServer contains one Store
  26. */
  27. type Store struct {
  28. MasterAddress string
  29. grpcDialOption grpc.DialOption
  30. volumeSizeLimit uint64 // read from the master
  31. Ip string
  32. Port int
  33. PublicUrl string
  34. Locations []*DiskLocation
  35. dataCenter string // optional informaton, overwriting master setting if exists
  36. rack string // optional information, overwriting master setting if exists
  37. connected bool
  38. NeedleMapKind NeedleMapKind
  39. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  40. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  41. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  42. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  43. isStopping bool
  44. }
  45. func (s *Store) String() (str string) {
  46. str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  47. return
  48. }
  49. func NewStore(grpcDialOption grpc.DialOption, port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int,
  50. minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  51. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  52. s.Locations = make([]*DiskLocation, 0)
  53. for i := 0; i < len(dirnames); i++ {
  54. location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpaces[i], idxFolder, diskTypes[i])
  55. location.loadExistingVolumes(needleMapKind)
  56. s.Locations = append(s.Locations, location)
  57. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  58. }
  59. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  60. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  61. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  62. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  63. return
  64. }
  65. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  66. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  67. if e != nil {
  68. return e
  69. }
  70. ttl, e := needle.ReadTTL(ttlString)
  71. if e != nil {
  72. return e
  73. }
  74. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  75. return e
  76. }
  77. func (s *Store) DeleteCollection(collection string) (e error) {
  78. for _, location := range s.Locations {
  79. e = location.DeleteCollectionFromDiskLocation(collection)
  80. if e != nil {
  81. return
  82. }
  83. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  84. }
  85. return
  86. }
  87. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  88. for _, location := range s.Locations {
  89. if v, found := location.FindVolume(vid); found {
  90. return v
  91. }
  92. }
  93. return nil
  94. }
  95. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  96. max := 0
  97. for _, location := range s.Locations {
  98. if diskType != location.DiskType {
  99. continue
  100. }
  101. if location.isDiskSpaceLow {
  102. continue
  103. }
  104. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  105. currentFreeCount *= erasure_coding.DataShardsCount
  106. currentFreeCount -= location.EcVolumesLen()
  107. currentFreeCount /= erasure_coding.DataShardsCount
  108. if currentFreeCount > max {
  109. max = currentFreeCount
  110. ret = location
  111. }
  112. }
  113. return ret
  114. }
  115. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  116. if s.findVolume(vid) != nil {
  117. return fmt.Errorf("Volume Id %d already exists!", vid)
  118. }
  119. if location := s.FindFreeLocation(diskType); location != nil {
  120. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  121. location.Directory, vid, collection, replicaPlacement, ttl)
  122. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  123. location.SetVolume(vid, volume)
  124. glog.V(0).Infof("add volume %d", vid)
  125. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  126. Id: uint32(vid),
  127. Collection: collection,
  128. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  129. Version: uint32(volume.Version()),
  130. Ttl: ttl.ToUint32(),
  131. DiskType: string(diskType),
  132. }
  133. return nil
  134. } else {
  135. return err
  136. }
  137. }
  138. return fmt.Errorf("No more free space left")
  139. }
  140. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  141. for _, location := range s.Locations {
  142. stats := collectStatsForOneLocation(location)
  143. allStats = append(allStats, stats...)
  144. }
  145. sortVolumeInfos(allStats)
  146. return allStats
  147. }
  148. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  149. location.volumesLock.RLock()
  150. defer location.volumesLock.RUnlock()
  151. for k, v := range location.volumes {
  152. s := collectStatForOneVolume(k, v)
  153. stats = append(stats, s)
  154. }
  155. return stats
  156. }
  157. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  158. s = &VolumeInfo{
  159. Id: vid,
  160. Collection: v.Collection,
  161. ReplicaPlacement: v.ReplicaPlacement,
  162. Version: v.Version(),
  163. ReadOnly: v.IsReadOnly(),
  164. Ttl: v.Ttl,
  165. CompactRevision: uint32(v.CompactionRevision),
  166. DiskType: v.DiskType().String(),
  167. }
  168. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  169. v.dataFileAccessLock.RLock()
  170. defer v.dataFileAccessLock.RUnlock()
  171. if v.nm == nil {
  172. return
  173. }
  174. s.FileCount = v.nm.FileCount()
  175. s.DeleteCount = v.nm.DeletedCount()
  176. s.DeletedByteCount = v.nm.DeletedSize()
  177. s.Size = v.nm.ContentSize()
  178. return
  179. }
  180. func (s *Store) SetDataCenter(dataCenter string) {
  181. s.dataCenter = dataCenter
  182. }
  183. func (s *Store) SetRack(rack string) {
  184. s.rack = rack
  185. }
  186. func (s *Store) GetDataCenter() string {
  187. return s.dataCenter
  188. }
  189. func (s *Store) GetRack() string {
  190. return s.rack
  191. }
  192. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  193. var volumeMessages []*master_pb.VolumeInformationMessage
  194. maxVolumeCounts := make(map[string]uint32)
  195. var maxFileKey NeedleId
  196. collectionVolumeSize := make(map[string]uint64)
  197. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  198. for _, location := range s.Locations {
  199. var deleteVids []needle.VolumeId
  200. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  201. location.volumesLock.RLock()
  202. for _, v := range location.volumes {
  203. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  204. if volumeMessage == nil {
  205. continue
  206. }
  207. if maxFileKey < curMaxFileKey {
  208. maxFileKey = curMaxFileKey
  209. }
  210. deleteVolume := false
  211. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  212. volumeMessages = append(volumeMessages, volumeMessage)
  213. } else {
  214. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  215. deleteVids = append(deleteVids, v.Id)
  216. deleteVolume = true
  217. } else {
  218. glog.V(0).Infof("volume %d is expired", v.Id)
  219. }
  220. if v.lastIoError != nil {
  221. deleteVids = append(deleteVids, v.Id)
  222. deleteVolume = true
  223. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  224. }
  225. }
  226. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  227. collectionVolumeSize[v.Collection] = 0
  228. }
  229. if !deleteVolume {
  230. collectionVolumeSize[v.Collection] += volumeMessage.Size
  231. } else {
  232. collectionVolumeSize[v.Collection] -= volumeMessage.Size
  233. if collectionVolumeSize[v.Collection] <= 0 {
  234. delete(collectionVolumeSize, v.Collection)
  235. }
  236. }
  237. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  238. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  239. "IsReadOnly": 0,
  240. "noWriteOrDelete": 0,
  241. "noWriteCanDelete": 0,
  242. "isDiskSpaceLow": 0,
  243. }
  244. }
  245. if !deleteVolume && v.IsReadOnly() {
  246. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  247. if v.noWriteOrDelete {
  248. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  249. }
  250. if v.noWriteCanDelete {
  251. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  252. }
  253. if v.location.isDiskSpaceLow {
  254. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  255. }
  256. }
  257. }
  258. location.volumesLock.RUnlock()
  259. if len(deleteVids) > 0 {
  260. // delete expired volumes.
  261. location.volumesLock.Lock()
  262. for _, vid := range deleteVids {
  263. found, err := location.deleteVolumeById(vid)
  264. if err == nil {
  265. if found {
  266. glog.V(0).Infof("volume %d is deleted", vid)
  267. }
  268. } else {
  269. glog.Warningf("delete volume %d: %v", vid, err)
  270. }
  271. }
  272. location.volumesLock.Unlock()
  273. }
  274. }
  275. for col, size := range collectionVolumeSize {
  276. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  277. }
  278. for col, types := range collectionVolumeReadOnlyCount {
  279. for t, count := range types {
  280. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  281. }
  282. }
  283. return &master_pb.Heartbeat{
  284. Ip: s.Ip,
  285. Port: uint32(s.Port),
  286. PublicUrl: s.PublicUrl,
  287. MaxVolumeCounts: maxVolumeCounts,
  288. MaxFileKey: NeedleIdToUint64(maxFileKey),
  289. DataCenter: s.dataCenter,
  290. Rack: s.rack,
  291. Volumes: volumeMessages,
  292. HasNoVolumes: len(volumeMessages) == 0,
  293. }
  294. }
  295. func (s *Store) SetStopping() {
  296. s.isStopping = true
  297. }
  298. func (s *Store) Close() {
  299. for _, location := range s.Locations {
  300. location.Close()
  301. }
  302. }
  303. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, fsync bool) (isUnchanged bool, err error) {
  304. if v := s.findVolume(i); v != nil {
  305. if v.IsReadOnly() {
  306. err = fmt.Errorf("volume %d is read only", i)
  307. return
  308. }
  309. _, _, isUnchanged, err = v.writeNeedle2(n, fsync && s.isStopping)
  310. return
  311. }
  312. glog.V(0).Infoln("volume", i, "not found!")
  313. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  314. return
  315. }
  316. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  317. if v := s.findVolume(i); v != nil {
  318. if v.noWriteOrDelete {
  319. return 0, fmt.Errorf("volume %d is read only", i)
  320. }
  321. return v.deleteNeedle2(n)
  322. }
  323. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  324. }
  325. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption) (int, error) {
  326. if v := s.findVolume(i); v != nil {
  327. return v.readNeedle(n, readOption)
  328. }
  329. return 0, fmt.Errorf("volume %d not found", i)
  330. }
  331. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  332. return s.findVolume(i)
  333. }
  334. func (s *Store) HasVolume(i needle.VolumeId) bool {
  335. v := s.findVolume(i)
  336. return v != nil
  337. }
  338. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  339. v := s.findVolume(i)
  340. if v == nil {
  341. return fmt.Errorf("volume %d not found", i)
  342. }
  343. v.noWriteLock.Lock()
  344. v.noWriteOrDelete = true
  345. v.noWriteLock.Unlock()
  346. return nil
  347. }
  348. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  349. v := s.findVolume(i)
  350. if v == nil {
  351. return fmt.Errorf("volume %d not found", i)
  352. }
  353. v.noWriteLock.Lock()
  354. v.noWriteOrDelete = false
  355. v.noWriteLock.Unlock()
  356. return nil
  357. }
  358. func (s *Store) MountVolume(i needle.VolumeId) error {
  359. for _, location := range s.Locations {
  360. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  361. glog.V(0).Infof("mount volume %d", i)
  362. v := s.findVolume(i)
  363. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  364. Id: uint32(v.Id),
  365. Collection: v.Collection,
  366. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  367. Version: uint32(v.Version()),
  368. Ttl: v.Ttl.ToUint32(),
  369. DiskType: string(v.location.DiskType),
  370. }
  371. return nil
  372. }
  373. }
  374. return fmt.Errorf("volume %d not found on disk", i)
  375. }
  376. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  377. v := s.findVolume(i)
  378. if v == nil {
  379. return nil
  380. }
  381. message := master_pb.VolumeShortInformationMessage{
  382. Id: uint32(v.Id),
  383. Collection: v.Collection,
  384. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  385. Version: uint32(v.Version()),
  386. Ttl: v.Ttl.ToUint32(),
  387. DiskType: string(v.location.DiskType),
  388. }
  389. for _, location := range s.Locations {
  390. if err := location.UnloadVolume(i); err == nil {
  391. glog.V(0).Infof("UnmountVolume %d", i)
  392. s.DeletedVolumesChan <- message
  393. return nil
  394. }
  395. }
  396. return fmt.Errorf("volume %d not found on disk", i)
  397. }
  398. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  399. v := s.findVolume(i)
  400. if v == nil {
  401. return fmt.Errorf("delete volume %d not found on disk", i)
  402. }
  403. message := master_pb.VolumeShortInformationMessage{
  404. Id: uint32(v.Id),
  405. Collection: v.Collection,
  406. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  407. Version: uint32(v.Version()),
  408. Ttl: v.Ttl.ToUint32(),
  409. DiskType: string(v.location.DiskType),
  410. }
  411. for _, location := range s.Locations {
  412. if err := location.DeleteVolume(i); err == nil {
  413. glog.V(0).Infof("DeleteVolume %d", i)
  414. s.DeletedVolumesChan <- message
  415. return nil
  416. } else {
  417. glog.Errorf("DeleteVolume %d: %v", i, err)
  418. }
  419. }
  420. return fmt.Errorf("volume %d not found on disk", i)
  421. }
  422. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  423. for _, location := range s.Locations {
  424. fileInfo, found := location.LocateVolume(i)
  425. if !found {
  426. continue
  427. }
  428. // load, modify, save
  429. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  430. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  431. volumeInfo, _, _, err := pb.MaybeLoadVolumeInfo(vifFile)
  432. if err != nil {
  433. return fmt.Errorf("volume %d fail to load vif", i)
  434. }
  435. volumeInfo.Replication = replication
  436. err = pb.SaveVolumeInfo(vifFile, volumeInfo)
  437. if err != nil {
  438. return fmt.Errorf("volume %d fail to save vif", i)
  439. }
  440. return nil
  441. }
  442. return fmt.Errorf("volume %d not found on disk", i)
  443. }
  444. func (s *Store) SetVolumeSizeLimit(x uint64) {
  445. atomic.StoreUint64(&s.volumeSizeLimit, x)
  446. }
  447. func (s *Store) GetVolumeSizeLimit() uint64 {
  448. return atomic.LoadUint64(&s.volumeSizeLimit)
  449. }
  450. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  451. volumeSizeLimit := s.GetVolumeSizeLimit()
  452. if volumeSizeLimit == 0 {
  453. return
  454. }
  455. for _, diskLocation := range s.Locations {
  456. if diskLocation.OriginalMaxVolumeCount == 0 {
  457. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  458. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  459. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  460. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  461. volCount := diskLocation.VolumesLen()
  462. maxVolumeCount := volCount
  463. if unclaimedSpaces > int64(volumeSizeLimit) {
  464. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  465. }
  466. diskLocation.MaxVolumeCount = maxVolumeCount
  467. glog.V(2).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  468. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  469. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  470. }
  471. }
  472. return
  473. }