store.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. package storage
  2. import (
  3. "fmt"
  4. "path/filepath"
  5. "strings"
  6. "sync/atomic"
  7. "google.golang.org/grpc"
  8. "github.com/chrislusf/seaweedfs/weed/util/log"
  9. "github.com/chrislusf/seaweedfs/weed/pb"
  10. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  11. "github.com/chrislusf/seaweedfs/weed/stats"
  12. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  13. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  14. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  15. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  16. "github.com/chrislusf/seaweedfs/weed/util"
  17. )
  18. const (
  19. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  20. )
  21. type ReadOption struct {
  22. ReadDeleted bool
  23. }
  24. /*
  25. * A VolumeServer contains one Store
  26. */
  27. type Store struct {
  28. MasterAddress string
  29. grpcDialOption grpc.DialOption
  30. volumeSizeLimit uint64 // read from the master
  31. Ip string
  32. Port int
  33. PublicUrl string
  34. Locations []*DiskLocation
  35. dataCenter string // optional informaton, overwriting master setting if exists
  36. rack string // optional information, overwriting master setting if exists
  37. connected bool
  38. NeedleMapType NeedleMapType
  39. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  40. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  41. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  42. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  43. }
  44. func (s *Store) String() (str string) {
  45. str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  46. return
  47. }
  48. func NewStore(grpcDialOption grpc.DialOption, port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, minFreeSpacePercents []float32, needleMapKind NeedleMapType) (s *Store) {
  49. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapType: needleMapKind}
  50. s.Locations = make([]*DiskLocation, 0)
  51. for i := 0; i < len(dirnames); i++ {
  52. location := NewDiskLocation(util.ResolvePath(dirnames[i]), maxVolumeCounts[i], minFreeSpacePercents[i])
  53. location.loadExistingVolumes(needleMapKind)
  54. s.Locations = append(s.Locations, location)
  55. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  56. }
  57. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  58. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  59. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  60. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  61. return
  62. }
  63. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32) error {
  64. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  65. if e != nil {
  66. return e
  67. }
  68. ttl, e := needle.ReadTTL(ttlString)
  69. if e != nil {
  70. return e
  71. }
  72. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb)
  73. return e
  74. }
  75. func (s *Store) DeleteCollection(collection string) (e error) {
  76. for _, location := range s.Locations {
  77. e = location.DeleteCollectionFromDiskLocation(collection)
  78. if e != nil {
  79. return
  80. }
  81. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  82. }
  83. return
  84. }
  85. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  86. for _, location := range s.Locations {
  87. if v, found := location.FindVolume(vid); found {
  88. return v
  89. }
  90. }
  91. return nil
  92. }
  93. func (s *Store) FindFreeLocation() (ret *DiskLocation) {
  94. max := 0
  95. for _, location := range s.Locations {
  96. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  97. currentFreeCount *= erasure_coding.DataShardsCount
  98. currentFreeCount -= location.EcVolumesLen()
  99. currentFreeCount /= erasure_coding.DataShardsCount
  100. if currentFreeCount > max {
  101. max = currentFreeCount
  102. ret = location
  103. }
  104. }
  105. return ret
  106. }
  107. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32) error {
  108. if s.findVolume(vid) != nil {
  109. return fmt.Errorf("Volume Id %d already exists!", vid)
  110. }
  111. if location := s.FindFreeLocation(); location != nil {
  112. log.Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  113. location.Directory, vid, collection, replicaPlacement, ttl)
  114. if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  115. location.SetVolume(vid, volume)
  116. log.Infof("add volume %d", vid)
  117. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  118. Id: uint32(vid),
  119. Collection: collection,
  120. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  121. Version: uint32(volume.Version()),
  122. Ttl: ttl.ToUint32(),
  123. }
  124. return nil
  125. } else {
  126. return err
  127. }
  128. }
  129. return fmt.Errorf("No more free space left")
  130. }
  131. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  132. for _, location := range s.Locations {
  133. stats := collectStatsForOneLocation(location)
  134. allStats = append(allStats, stats...)
  135. }
  136. sortVolumeInfos(allStats)
  137. return allStats
  138. }
  139. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  140. location.volumesLock.RLock()
  141. defer location.volumesLock.RUnlock()
  142. for k, v := range location.volumes {
  143. s := collectStatForOneVolume(k, v)
  144. stats = append(stats, s)
  145. }
  146. return stats
  147. }
  148. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  149. s = &VolumeInfo{
  150. Id: vid,
  151. Collection: v.Collection,
  152. ReplicaPlacement: v.ReplicaPlacement,
  153. Version: v.Version(),
  154. ReadOnly: v.IsReadOnly(),
  155. Ttl: v.Ttl,
  156. CompactRevision: uint32(v.CompactionRevision),
  157. }
  158. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  159. v.dataFileAccessLock.RLock()
  160. defer v.dataFileAccessLock.RUnlock()
  161. if v.nm == nil {
  162. return
  163. }
  164. s.FileCount = v.nm.FileCount()
  165. s.DeleteCount = v.nm.DeletedCount()
  166. s.DeletedByteCount = v.nm.DeletedSize()
  167. s.Size = v.nm.ContentSize()
  168. return
  169. }
  170. func (s *Store) SetDataCenter(dataCenter string) {
  171. s.dataCenter = dataCenter
  172. }
  173. func (s *Store) SetRack(rack string) {
  174. s.rack = rack
  175. }
  176. func (s *Store) GetDataCenter() string {
  177. return s.dataCenter
  178. }
  179. func (s *Store) GetRack() string {
  180. return s.rack
  181. }
  182. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  183. var volumeMessages []*master_pb.VolumeInformationMessage
  184. maxVolumeCount := 0
  185. var maxFileKey NeedleId
  186. collectionVolumeSize := make(map[string]uint64)
  187. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  188. for _, location := range s.Locations {
  189. var deleteVids []needle.VolumeId
  190. maxVolumeCount = maxVolumeCount + location.MaxVolumeCount
  191. location.volumesLock.RLock()
  192. for _, v := range location.volumes {
  193. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  194. if maxFileKey < curMaxFileKey {
  195. maxFileKey = curMaxFileKey
  196. }
  197. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  198. volumeMessages = append(volumeMessages, volumeMessage)
  199. } else {
  200. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  201. deleteVids = append(deleteVids, v.Id)
  202. } else {
  203. log.Infoln("volume", v.Id, "is expired.")
  204. }
  205. }
  206. collectionVolumeSize[v.Collection] += volumeMessage.Size
  207. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  208. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  209. "IsReadOnly": 0,
  210. "noWriteOrDelete": 0,
  211. "noWriteCanDelete": 0,
  212. "isDiskSpaceLow": 0,
  213. }
  214. }
  215. if v.IsReadOnly() {
  216. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  217. if v.noWriteOrDelete {
  218. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  219. }
  220. if v.noWriteCanDelete {
  221. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  222. }
  223. if v.location.isDiskSpaceLow {
  224. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  225. }
  226. }
  227. }
  228. location.volumesLock.RUnlock()
  229. if len(deleteVids) > 0 {
  230. // delete expired volumes.
  231. location.volumesLock.Lock()
  232. for _, vid := range deleteVids {
  233. found, err := location.deleteVolumeById(vid)
  234. if found {
  235. if err == nil {
  236. log.Infof("volume %d is deleted", vid)
  237. } else {
  238. log.Infof("delete volume %d: %v", vid, err)
  239. }
  240. }
  241. }
  242. location.volumesLock.Unlock()
  243. }
  244. }
  245. for col, size := range collectionVolumeSize {
  246. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  247. }
  248. for col, types := range collectionVolumeReadOnlyCount {
  249. for t, count := range types {
  250. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  251. }
  252. }
  253. return &master_pb.Heartbeat{
  254. Ip: s.Ip,
  255. Port: uint32(s.Port),
  256. PublicUrl: s.PublicUrl,
  257. MaxVolumeCount: uint32(maxVolumeCount),
  258. MaxFileKey: NeedleIdToUint64(maxFileKey),
  259. DataCenter: s.dataCenter,
  260. Rack: s.rack,
  261. Volumes: volumeMessages,
  262. HasNoVolumes: len(volumeMessages) == 0,
  263. }
  264. }
  265. func (s *Store) Close() {
  266. for _, location := range s.Locations {
  267. location.Close()
  268. }
  269. }
  270. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, fsync bool) (isUnchanged bool, err error) {
  271. if v := s.findVolume(i); v != nil {
  272. if v.IsReadOnly() {
  273. err = fmt.Errorf("volume %d is read only", i)
  274. return
  275. }
  276. _, _, isUnchanged, err = v.writeNeedle2(n, fsync)
  277. return
  278. }
  279. log.Infoln("volume", i, "not found!")
  280. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  281. return
  282. }
  283. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  284. if v := s.findVolume(i); v != nil {
  285. if v.noWriteOrDelete {
  286. return 0, fmt.Errorf("volume %d is read only", i)
  287. }
  288. return v.deleteNeedle2(n)
  289. }
  290. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  291. }
  292. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption) (int, error) {
  293. if v := s.findVolume(i); v != nil {
  294. return v.readNeedle(n, readOption)
  295. }
  296. return 0, fmt.Errorf("volume %d not found", i)
  297. }
  298. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  299. return s.findVolume(i)
  300. }
  301. func (s *Store) HasVolume(i needle.VolumeId) bool {
  302. v := s.findVolume(i)
  303. return v != nil
  304. }
  305. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  306. v := s.findVolume(i)
  307. if v == nil {
  308. return fmt.Errorf("volume %d not found", i)
  309. }
  310. v.noWriteLock.Lock()
  311. v.noWriteOrDelete = true
  312. v.noWriteLock.Unlock()
  313. return nil
  314. }
  315. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  316. v := s.findVolume(i)
  317. if v == nil {
  318. return fmt.Errorf("volume %d not found", i)
  319. }
  320. v.noWriteLock.Lock()
  321. v.noWriteOrDelete = false
  322. v.noWriteLock.Unlock()
  323. return nil
  324. }
  325. func (s *Store) MountVolume(i needle.VolumeId) error {
  326. for _, location := range s.Locations {
  327. if found := location.LoadVolume(i, s.NeedleMapType); found == true {
  328. log.Infof("mount volume %d", i)
  329. v := s.findVolume(i)
  330. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  331. Id: uint32(v.Id),
  332. Collection: v.Collection,
  333. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  334. Version: uint32(v.Version()),
  335. Ttl: v.Ttl.ToUint32(),
  336. }
  337. return nil
  338. }
  339. }
  340. return fmt.Errorf("volume %d not found on disk", i)
  341. }
  342. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  343. v := s.findVolume(i)
  344. if v == nil {
  345. return nil
  346. }
  347. message := master_pb.VolumeShortInformationMessage{
  348. Id: uint32(v.Id),
  349. Collection: v.Collection,
  350. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  351. Version: uint32(v.Version()),
  352. Ttl: v.Ttl.ToUint32(),
  353. }
  354. for _, location := range s.Locations {
  355. if err := location.UnloadVolume(i); err == nil {
  356. log.Infof("UnmountVolume %d", i)
  357. s.DeletedVolumesChan <- message
  358. return nil
  359. }
  360. }
  361. return fmt.Errorf("volume %d not found on disk", i)
  362. }
  363. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  364. v := s.findVolume(i)
  365. if v == nil {
  366. return fmt.Errorf("delete volume %d not found on disk", i)
  367. }
  368. message := master_pb.VolumeShortInformationMessage{
  369. Id: uint32(v.Id),
  370. Collection: v.Collection,
  371. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  372. Version: uint32(v.Version()),
  373. Ttl: v.Ttl.ToUint32(),
  374. }
  375. for _, location := range s.Locations {
  376. if err := location.DeleteVolume(i); err == nil {
  377. log.Infof("DeleteVolume %d", i)
  378. s.DeletedVolumesChan <- message
  379. return nil
  380. } else {
  381. log.Errorf("DeleteVolume %d: %v", i, err)
  382. }
  383. }
  384. return fmt.Errorf("volume %d not found on disk", i)
  385. }
  386. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  387. for _, location := range s.Locations {
  388. fileInfo, found := location.LocateVolume(i)
  389. if !found {
  390. continue
  391. }
  392. // load, modify, save
  393. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  394. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  395. volumeInfo, _, err := pb.MaybeLoadVolumeInfo(vifFile)
  396. if err != nil {
  397. return fmt.Errorf("volume %d fail to load vif", i)
  398. }
  399. volumeInfo.Replication = replication
  400. err = pb.SaveVolumeInfo(vifFile, volumeInfo)
  401. if err != nil {
  402. return fmt.Errorf("volume %d fail to save vif", i)
  403. }
  404. return nil
  405. }
  406. return fmt.Errorf("volume %d not found on disk", i)
  407. }
  408. func (s *Store) SetVolumeSizeLimit(x uint64) {
  409. atomic.StoreUint64(&s.volumeSizeLimit, x)
  410. }
  411. func (s *Store) GetVolumeSizeLimit() uint64 {
  412. return atomic.LoadUint64(&s.volumeSizeLimit)
  413. }
  414. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  415. volumeSizeLimit := s.GetVolumeSizeLimit()
  416. for _, diskLocation := range s.Locations {
  417. if diskLocation.OriginalMaxVolumeCount == 0 {
  418. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  419. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  420. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  421. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  422. volCount := diskLocation.VolumesLen()
  423. maxVolumeCount := volCount
  424. if unclaimedSpaces > int64(volumeSizeLimit) {
  425. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  426. }
  427. diskLocation.MaxVolumeCount = maxVolumeCount
  428. log.Debugf("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  429. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  430. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  431. }
  432. }
  433. return
  434. }