store.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. package storage
  2. import (
  3. "fmt"
  4. "github.com/chrislusf/seaweedfs/weed/pb"
  5. "github.com/chrislusf/seaweedfs/weed/storage/volume_info"
  6. "github.com/chrislusf/seaweedfs/weed/util"
  7. "path/filepath"
  8. "strings"
  9. "sync/atomic"
  10. "google.golang.org/grpc"
  11. "github.com/chrislusf/seaweedfs/weed/glog"
  12. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  13. "github.com/chrislusf/seaweedfs/weed/stats"
  14. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  15. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  16. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  17. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  18. )
  19. const (
  20. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  21. )
  22. type ReadOption struct {
  23. ReadDeleted bool
  24. }
  25. /*
  26. * A VolumeServer contains one Store
  27. */
  28. type Store struct {
  29. MasterAddress pb.ServerAddress
  30. grpcDialOption grpc.DialOption
  31. volumeSizeLimit uint64 // read from the master
  32. Ip string
  33. Port int
  34. GrpcPort int
  35. PublicUrl string
  36. Locations []*DiskLocation
  37. dataCenter string // optional informaton, overwriting master setting if exists
  38. rack string // optional information, overwriting master setting if exists
  39. connected bool
  40. NeedleMapKind NeedleMapKind
  41. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  42. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  43. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  44. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  45. isStopping bool
  46. }
  47. func (s *Store) String() (str string) {
  48. str = fmt.Sprintf("Ip:%s, Port:%d, GrpcPort:%d PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.GrpcPort, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  49. return
  50. }
  51. func NewStore(grpcDialOption grpc.DialOption, ip string, port int, grpcPort int, publicUrl string, dirnames []string, maxVolumeCounts []int,
  52. minFreeSpaces []util.MinFreeSpace, idxFolder string, needleMapKind NeedleMapKind, diskTypes []DiskType) (s *Store) {
  53. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, GrpcPort: grpcPort, PublicUrl: publicUrl, NeedleMapKind: needleMapKind}
  54. s.Locations = make([]*DiskLocation, 0)
  55. for i := 0; i < len(dirnames); i++ {
  56. location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpaces[i], idxFolder, diskTypes[i])
  57. location.loadExistingVolumes(needleMapKind)
  58. s.Locations = append(s.Locations, location)
  59. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  60. }
  61. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  62. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  63. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  64. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  65. return
  66. }
  67. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32, diskType DiskType) error {
  68. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  69. if e != nil {
  70. return e
  71. }
  72. ttl, e := needle.ReadTTL(ttlString)
  73. if e != nil {
  74. return e
  75. }
  76. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb, diskType)
  77. return e
  78. }
  79. func (s *Store) DeleteCollection(collection string) (e error) {
  80. for _, location := range s.Locations {
  81. e = location.DeleteCollectionFromDiskLocation(collection)
  82. if e != nil {
  83. return
  84. }
  85. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  86. }
  87. return
  88. }
  89. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  90. for _, location := range s.Locations {
  91. if v, found := location.FindVolume(vid); found {
  92. return v
  93. }
  94. }
  95. return nil
  96. }
  97. func (s *Store) FindFreeLocation(diskType DiskType) (ret *DiskLocation) {
  98. max := 0
  99. for _, location := range s.Locations {
  100. if diskType != location.DiskType {
  101. continue
  102. }
  103. if location.isDiskSpaceLow {
  104. continue
  105. }
  106. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  107. currentFreeCount *= erasure_coding.DataShardsCount
  108. currentFreeCount -= location.EcVolumesLen()
  109. currentFreeCount /= erasure_coding.DataShardsCount
  110. if currentFreeCount > max {
  111. max = currentFreeCount
  112. ret = location
  113. }
  114. }
  115. return ret
  116. }
  117. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32, diskType DiskType) error {
  118. if s.findVolume(vid) != nil {
  119. return fmt.Errorf("Volume Id %d already exists!", vid)
  120. }
  121. if location := s.FindFreeLocation(diskType); location != nil {
  122. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  123. location.Directory, vid, collection, replicaPlacement, ttl)
  124. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  125. location.SetVolume(vid, volume)
  126. glog.V(0).Infof("add volume %d", vid)
  127. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  128. Id: uint32(vid),
  129. Collection: collection,
  130. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  131. Version: uint32(volume.Version()),
  132. Ttl: ttl.ToUint32(),
  133. DiskType: string(diskType),
  134. }
  135. return nil
  136. } else {
  137. return err
  138. }
  139. }
  140. return fmt.Errorf("No more free space left")
  141. }
  142. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  143. for _, location := range s.Locations {
  144. stats := collectStatsForOneLocation(location)
  145. allStats = append(allStats, stats...)
  146. }
  147. sortVolumeInfos(allStats)
  148. return allStats
  149. }
  150. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  151. location.volumesLock.RLock()
  152. defer location.volumesLock.RUnlock()
  153. for k, v := range location.volumes {
  154. s := collectStatForOneVolume(k, v)
  155. stats = append(stats, s)
  156. }
  157. return stats
  158. }
  159. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  160. s = &VolumeInfo{
  161. Id: vid,
  162. Collection: v.Collection,
  163. ReplicaPlacement: v.ReplicaPlacement,
  164. Version: v.Version(),
  165. ReadOnly: v.IsReadOnly(),
  166. Ttl: v.Ttl,
  167. CompactRevision: uint32(v.CompactionRevision),
  168. DiskType: v.DiskType().String(),
  169. }
  170. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  171. v.dataFileAccessLock.RLock()
  172. defer v.dataFileAccessLock.RUnlock()
  173. if v.nm == nil {
  174. return
  175. }
  176. s.FileCount = v.nm.FileCount()
  177. s.DeleteCount = v.nm.DeletedCount()
  178. s.DeletedByteCount = v.nm.DeletedSize()
  179. s.Size = v.nm.ContentSize()
  180. return
  181. }
  182. func (s *Store) SetDataCenter(dataCenter string) {
  183. s.dataCenter = dataCenter
  184. }
  185. func (s *Store) SetRack(rack string) {
  186. s.rack = rack
  187. }
  188. func (s *Store) GetDataCenter() string {
  189. return s.dataCenter
  190. }
  191. func (s *Store) GetRack() string {
  192. return s.rack
  193. }
  194. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  195. var volumeMessages []*master_pb.VolumeInformationMessage
  196. maxVolumeCounts := make(map[string]uint32)
  197. var maxFileKey NeedleId
  198. collectionVolumeSize := make(map[string]uint64)
  199. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  200. for _, location := range s.Locations {
  201. var deleteVids []needle.VolumeId
  202. maxVolumeCounts[string(location.DiskType)] += uint32(location.MaxVolumeCount)
  203. location.volumesLock.RLock()
  204. for _, v := range location.volumes {
  205. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  206. if volumeMessage == nil {
  207. continue
  208. }
  209. if maxFileKey < curMaxFileKey {
  210. maxFileKey = curMaxFileKey
  211. }
  212. deleteVolume := false
  213. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  214. volumeMessages = append(volumeMessages, volumeMessage)
  215. } else {
  216. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  217. deleteVids = append(deleteVids, v.Id)
  218. deleteVolume = true
  219. } else {
  220. glog.V(0).Infof("volume %d is expired", v.Id)
  221. }
  222. if v.lastIoError != nil {
  223. deleteVids = append(deleteVids, v.Id)
  224. deleteVolume = true
  225. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  226. }
  227. }
  228. if _, exist := collectionVolumeSize[v.Collection]; !exist {
  229. collectionVolumeSize[v.Collection] = 0
  230. }
  231. if !deleteVolume {
  232. collectionVolumeSize[v.Collection] += volumeMessage.Size
  233. } else {
  234. collectionVolumeSize[v.Collection] -= volumeMessage.Size
  235. if collectionVolumeSize[v.Collection] <= 0 {
  236. delete(collectionVolumeSize, v.Collection)
  237. }
  238. }
  239. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  240. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  241. "IsReadOnly": 0,
  242. "noWriteOrDelete": 0,
  243. "noWriteCanDelete": 0,
  244. "isDiskSpaceLow": 0,
  245. }
  246. }
  247. if !deleteVolume && v.IsReadOnly() {
  248. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  249. if v.noWriteOrDelete {
  250. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  251. }
  252. if v.noWriteCanDelete {
  253. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  254. }
  255. if v.location.isDiskSpaceLow {
  256. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  257. }
  258. }
  259. }
  260. location.volumesLock.RUnlock()
  261. if len(deleteVids) > 0 {
  262. // delete expired volumes.
  263. location.volumesLock.Lock()
  264. for _, vid := range deleteVids {
  265. found, err := location.deleteVolumeById(vid)
  266. if err == nil {
  267. if found {
  268. glog.V(0).Infof("volume %d is deleted", vid)
  269. }
  270. } else {
  271. glog.Warningf("delete volume %d: %v", vid, err)
  272. }
  273. }
  274. location.volumesLock.Unlock()
  275. }
  276. }
  277. for col, size := range collectionVolumeSize {
  278. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  279. }
  280. for col, types := range collectionVolumeReadOnlyCount {
  281. for t, count := range types {
  282. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  283. }
  284. }
  285. return &master_pb.Heartbeat{
  286. Ip: s.Ip,
  287. Port: uint32(s.Port),
  288. GrpcPort: uint32(s.GrpcPort),
  289. PublicUrl: s.PublicUrl,
  290. MaxVolumeCounts: maxVolumeCounts,
  291. MaxFileKey: NeedleIdToUint64(maxFileKey),
  292. DataCenter: s.dataCenter,
  293. Rack: s.rack,
  294. Volumes: volumeMessages,
  295. HasNoVolumes: len(volumeMessages) == 0,
  296. }
  297. }
  298. func (s *Store) SetStopping() {
  299. s.isStopping = true
  300. for _, location := range s.Locations {
  301. location.SetStopping()
  302. }
  303. }
  304. func (s *Store) Close() {
  305. for _, location := range s.Locations {
  306. location.Close()
  307. }
  308. }
  309. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, checkCookie bool, fsync bool) (isUnchanged bool, err error) {
  310. if v := s.findVolume(i); v != nil {
  311. if v.IsReadOnly() {
  312. err = fmt.Errorf("volume %d is read only", i)
  313. return
  314. }
  315. _, _, isUnchanged, err = v.writeNeedle2(n, checkCookie, fsync && s.isStopping)
  316. return
  317. }
  318. glog.V(0).Infoln("volume", i, "not found!")
  319. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  320. return
  321. }
  322. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  323. if v := s.findVolume(i); v != nil {
  324. if v.noWriteOrDelete {
  325. return 0, fmt.Errorf("volume %d is read only", i)
  326. }
  327. return v.deleteNeedle2(n)
  328. }
  329. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  330. }
  331. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption, onReadSizeFn func(size Size)) (int, error) {
  332. if v := s.findVolume(i); v != nil {
  333. return v.readNeedle(n, readOption, onReadSizeFn)
  334. }
  335. return 0, fmt.Errorf("volume %d not found", i)
  336. }
  337. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  338. return s.findVolume(i)
  339. }
  340. func (s *Store) HasVolume(i needle.VolumeId) bool {
  341. v := s.findVolume(i)
  342. return v != nil
  343. }
  344. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  345. v := s.findVolume(i)
  346. if v == nil {
  347. return fmt.Errorf("volume %d not found", i)
  348. }
  349. v.noWriteLock.Lock()
  350. v.noWriteOrDelete = true
  351. v.noWriteLock.Unlock()
  352. return nil
  353. }
  354. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  355. v := s.findVolume(i)
  356. if v == nil {
  357. return fmt.Errorf("volume %d not found", i)
  358. }
  359. v.noWriteLock.Lock()
  360. v.noWriteOrDelete = false
  361. v.noWriteLock.Unlock()
  362. return nil
  363. }
  364. func (s *Store) MountVolume(i needle.VolumeId) error {
  365. for _, location := range s.Locations {
  366. if found := location.LoadVolume(i, s.NeedleMapKind); found == true {
  367. glog.V(0).Infof("mount volume %d", i)
  368. v := s.findVolume(i)
  369. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  370. Id: uint32(v.Id),
  371. Collection: v.Collection,
  372. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  373. Version: uint32(v.Version()),
  374. Ttl: v.Ttl.ToUint32(),
  375. DiskType: string(v.location.DiskType),
  376. }
  377. return nil
  378. }
  379. }
  380. return fmt.Errorf("volume %d not found on disk", i)
  381. }
  382. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  383. v := s.findVolume(i)
  384. if v == nil {
  385. return nil
  386. }
  387. message := master_pb.VolumeShortInformationMessage{
  388. Id: uint32(v.Id),
  389. Collection: v.Collection,
  390. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  391. Version: uint32(v.Version()),
  392. Ttl: v.Ttl.ToUint32(),
  393. DiskType: string(v.location.DiskType),
  394. }
  395. for _, location := range s.Locations {
  396. if err := location.UnloadVolume(i); err == nil || err == ErrVolumeNotFound {
  397. glog.V(0).Infof("UnmountVolume %d", i)
  398. s.DeletedVolumesChan <- message
  399. return nil
  400. }
  401. }
  402. return fmt.Errorf("volume %d not found on disk", i)
  403. }
  404. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  405. v := s.findVolume(i)
  406. if v == nil {
  407. return fmt.Errorf("delete volume %d not found on disk", i)
  408. }
  409. message := master_pb.VolumeShortInformationMessage{
  410. Id: uint32(v.Id),
  411. Collection: v.Collection,
  412. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  413. Version: uint32(v.Version()),
  414. Ttl: v.Ttl.ToUint32(),
  415. DiskType: string(v.location.DiskType),
  416. }
  417. for _, location := range s.Locations {
  418. if err := location.DeleteVolume(i); err == nil || err == ErrVolumeNotFound {
  419. glog.V(0).Infof("DeleteVolume %d", i)
  420. s.DeletedVolumesChan <- message
  421. return nil
  422. } else {
  423. glog.Errorf("DeleteVolume %d: %v", i, err)
  424. }
  425. }
  426. return fmt.Errorf("volume %d not found on disk", i)
  427. }
  428. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  429. for _, location := range s.Locations {
  430. fileInfo, found := location.LocateVolume(i)
  431. if !found {
  432. continue
  433. }
  434. // load, modify, save
  435. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  436. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  437. volumeInfo, _, _, err := volume_info.MaybeLoadVolumeInfo(vifFile)
  438. if err != nil {
  439. return fmt.Errorf("volume %d fail to load vif", i)
  440. }
  441. volumeInfo.Replication = replication
  442. err = volume_info.SaveVolumeInfo(vifFile, volumeInfo)
  443. if err != nil {
  444. return fmt.Errorf("volume %d fail to save vif", i)
  445. }
  446. return nil
  447. }
  448. return fmt.Errorf("volume %d not found on disk", i)
  449. }
  450. func (s *Store) SetVolumeSizeLimit(x uint64) {
  451. atomic.StoreUint64(&s.volumeSizeLimit, x)
  452. }
  453. func (s *Store) GetVolumeSizeLimit() uint64 {
  454. return atomic.LoadUint64(&s.volumeSizeLimit)
  455. }
  456. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  457. volumeSizeLimit := s.GetVolumeSizeLimit()
  458. if volumeSizeLimit == 0 {
  459. return
  460. }
  461. for _, diskLocation := range s.Locations {
  462. if diskLocation.OriginalMaxVolumeCount == 0 {
  463. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  464. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  465. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  466. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  467. volCount := diskLocation.VolumesLen()
  468. maxVolumeCount := volCount
  469. if unclaimedSpaces > int64(volumeSizeLimit) {
  470. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  471. }
  472. diskLocation.MaxVolumeCount = maxVolumeCount
  473. glog.V(2).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  474. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  475. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  476. }
  477. }
  478. return
  479. }