store.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. package storage
  2. import (
  3. "fmt"
  4. "path/filepath"
  5. "strings"
  6. "sync/atomic"
  7. "google.golang.org/grpc"
  8. "github.com/chrislusf/seaweedfs/weed/glog"
  9. "github.com/chrislusf/seaweedfs/weed/pb"
  10. "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
  11. "github.com/chrislusf/seaweedfs/weed/stats"
  12. "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
  13. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  14. "github.com/chrislusf/seaweedfs/weed/storage/super_block"
  15. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  16. )
  17. const (
  18. MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
  19. )
  20. type ReadOption struct {
  21. ReadDeleted bool
  22. }
  23. /*
  24. * A VolumeServer contains one Store
  25. */
  26. type Store struct {
  27. MasterAddress string
  28. grpcDialOption grpc.DialOption
  29. volumeSizeLimit uint64 // read from the master
  30. Ip string
  31. Port int
  32. PublicUrl string
  33. Locations []*DiskLocation
  34. dataCenter string // optional informaton, overwriting master setting if exists
  35. rack string // optional information, overwriting master setting if exists
  36. connected bool
  37. NeedleMapType NeedleMapType
  38. NewVolumesChan chan master_pb.VolumeShortInformationMessage
  39. DeletedVolumesChan chan master_pb.VolumeShortInformationMessage
  40. NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  41. DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage
  42. }
  43. func (s *Store) String() (str string) {
  44. str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit())
  45. return
  46. }
  47. func NewStore(grpcDialOption grpc.DialOption, port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, minFreeSpacePercents []float32, idxFolder string, needleMapKind NeedleMapType) (s *Store) {
  48. s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapType: needleMapKind}
  49. s.Locations = make([]*DiskLocation, 0)
  50. for i := 0; i < len(dirnames); i++ {
  51. location := NewDiskLocation(dirnames[i], maxVolumeCounts[i], minFreeSpacePercents[i], idxFolder)
  52. location.loadExistingVolumes(needleMapKind)
  53. s.Locations = append(s.Locations, location)
  54. stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i]))
  55. }
  56. s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  57. s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3)
  58. s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  59. s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3)
  60. return
  61. }
  62. func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32) error {
  63. rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement)
  64. if e != nil {
  65. return e
  66. }
  67. ttl, e := needle.ReadTTL(ttlString)
  68. if e != nil {
  69. return e
  70. }
  71. e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb)
  72. return e
  73. }
  74. func (s *Store) DeleteCollection(collection string) (e error) {
  75. for _, location := range s.Locations {
  76. e = location.DeleteCollectionFromDiskLocation(collection)
  77. if e != nil {
  78. return
  79. }
  80. // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan
  81. }
  82. return
  83. }
  84. func (s *Store) findVolume(vid needle.VolumeId) *Volume {
  85. for _, location := range s.Locations {
  86. if v, found := location.FindVolume(vid); found {
  87. return v
  88. }
  89. }
  90. return nil
  91. }
  92. func (s *Store) FindFreeLocation() (ret *DiskLocation) {
  93. max := 0
  94. for _, location := range s.Locations {
  95. currentFreeCount := location.MaxVolumeCount - location.VolumesLen()
  96. currentFreeCount *= erasure_coding.DataShardsCount
  97. currentFreeCount -= location.EcVolumesLen()
  98. currentFreeCount /= erasure_coding.DataShardsCount
  99. if currentFreeCount > max {
  100. max = currentFreeCount
  101. ret = location
  102. }
  103. }
  104. return ret
  105. }
  106. func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32) error {
  107. if s.findVolume(vid) != nil {
  108. return fmt.Errorf("Volume Id %d already exists!", vid)
  109. }
  110. if location := s.FindFreeLocation(); location != nil {
  111. glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
  112. location.Directory, vid, collection, replicaPlacement, ttl)
  113. if volume, err := NewVolume(location.Directory, location.IdxDirectory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil {
  114. location.SetVolume(vid, volume)
  115. glog.V(0).Infof("add volume %d", vid)
  116. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  117. Id: uint32(vid),
  118. Collection: collection,
  119. ReplicaPlacement: uint32(replicaPlacement.Byte()),
  120. Version: uint32(volume.Version()),
  121. Ttl: ttl.ToUint32(),
  122. }
  123. return nil
  124. } else {
  125. return err
  126. }
  127. }
  128. return fmt.Errorf("No more free space left")
  129. }
  130. func (s *Store) VolumeInfos() (allStats []*VolumeInfo) {
  131. for _, location := range s.Locations {
  132. stats := collectStatsForOneLocation(location)
  133. allStats = append(allStats, stats...)
  134. }
  135. sortVolumeInfos(allStats)
  136. return allStats
  137. }
  138. func collectStatsForOneLocation(location *DiskLocation) (stats []*VolumeInfo) {
  139. location.volumesLock.RLock()
  140. defer location.volumesLock.RUnlock()
  141. for k, v := range location.volumes {
  142. s := collectStatForOneVolume(k, v)
  143. stats = append(stats, s)
  144. }
  145. return stats
  146. }
  147. func collectStatForOneVolume(vid needle.VolumeId, v *Volume) (s *VolumeInfo) {
  148. s = &VolumeInfo{
  149. Id: vid,
  150. Collection: v.Collection,
  151. ReplicaPlacement: v.ReplicaPlacement,
  152. Version: v.Version(),
  153. ReadOnly: v.IsReadOnly(),
  154. Ttl: v.Ttl,
  155. CompactRevision: uint32(v.CompactionRevision),
  156. }
  157. s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey()
  158. v.dataFileAccessLock.RLock()
  159. defer v.dataFileAccessLock.RUnlock()
  160. if v.nm == nil {
  161. return
  162. }
  163. s.FileCount = v.nm.FileCount()
  164. s.DeleteCount = v.nm.DeletedCount()
  165. s.DeletedByteCount = v.nm.DeletedSize()
  166. s.Size = v.nm.ContentSize()
  167. return
  168. }
  169. func (s *Store) SetDataCenter(dataCenter string) {
  170. s.dataCenter = dataCenter
  171. }
  172. func (s *Store) SetRack(rack string) {
  173. s.rack = rack
  174. }
  175. func (s *Store) GetDataCenter() string {
  176. return s.dataCenter
  177. }
  178. func (s *Store) GetRack() string {
  179. return s.rack
  180. }
  181. func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
  182. var volumeMessages []*master_pb.VolumeInformationMessage
  183. maxVolumeCount := 0
  184. var maxFileKey NeedleId
  185. collectionVolumeSize := make(map[string]uint64)
  186. collectionVolumeReadOnlyCount := make(map[string]map[string]uint8)
  187. for _, location := range s.Locations {
  188. var deleteVids []needle.VolumeId
  189. maxVolumeCount = maxVolumeCount + location.MaxVolumeCount
  190. location.volumesLock.RLock()
  191. for _, v := range location.volumes {
  192. curMaxFileKey, volumeMessage := v.ToVolumeInformationMessage()
  193. if maxFileKey < curMaxFileKey {
  194. maxFileKey = curMaxFileKey
  195. }
  196. if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
  197. volumeMessages = append(volumeMessages, volumeMessage)
  198. } else {
  199. if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
  200. deleteVids = append(deleteVids, v.Id)
  201. } else {
  202. glog.V(0).Infof("volume %d is expired", v.Id)
  203. }
  204. if v.lastIoError != nil {
  205. deleteVids = append(deleteVids, v.Id)
  206. glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
  207. }
  208. }
  209. collectionVolumeSize[v.Collection] += volumeMessage.Size
  210. if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
  211. collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
  212. "IsReadOnly": 0,
  213. "noWriteOrDelete": 0,
  214. "noWriteCanDelete": 0,
  215. "isDiskSpaceLow": 0,
  216. }
  217. }
  218. if v.IsReadOnly() {
  219. collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
  220. if v.noWriteOrDelete {
  221. collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
  222. }
  223. if v.noWriteCanDelete {
  224. collectionVolumeReadOnlyCount[v.Collection]["noWriteCanDelete"] += 1
  225. }
  226. if v.location.isDiskSpaceLow {
  227. collectionVolumeReadOnlyCount[v.Collection]["isDiskSpaceLow"] += 1
  228. }
  229. }
  230. }
  231. location.volumesLock.RUnlock()
  232. if len(deleteVids) > 0 {
  233. // delete expired volumes.
  234. location.volumesLock.Lock()
  235. for _, vid := range deleteVids {
  236. found, err := location.deleteVolumeById(vid)
  237. if err == nil {
  238. if found {
  239. glog.V(0).Infof("volume %d is deleted", vid)
  240. }
  241. } else {
  242. glog.V(0).Infof("delete volume %d: %v", vid, err)
  243. }
  244. }
  245. location.volumesLock.Unlock()
  246. }
  247. }
  248. for col, size := range collectionVolumeSize {
  249. stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size))
  250. }
  251. for col, types := range collectionVolumeReadOnlyCount {
  252. for t, count := range types {
  253. stats.VolumeServerReadOnlyVolumeGauge.WithLabelValues(col, t).Set(float64(count))
  254. }
  255. }
  256. return &master_pb.Heartbeat{
  257. Ip: s.Ip,
  258. Port: uint32(s.Port),
  259. PublicUrl: s.PublicUrl,
  260. MaxVolumeCount: uint32(maxVolumeCount),
  261. MaxFileKey: NeedleIdToUint64(maxFileKey),
  262. DataCenter: s.dataCenter,
  263. Rack: s.rack,
  264. Volumes: volumeMessages,
  265. HasNoVolumes: len(volumeMessages) == 0,
  266. }
  267. }
  268. func (s *Store) Close() {
  269. for _, location := range s.Locations {
  270. location.Close()
  271. }
  272. }
  273. func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle, fsync bool) (isUnchanged bool, err error) {
  274. if v := s.findVolume(i); v != nil {
  275. if v.IsReadOnly() {
  276. err = fmt.Errorf("volume %d is read only", i)
  277. return
  278. }
  279. _, _, isUnchanged, err = v.writeNeedle2(n, fsync)
  280. return
  281. }
  282. glog.V(0).Infoln("volume", i, "not found!")
  283. err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  284. return
  285. }
  286. func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (Size, error) {
  287. if v := s.findVolume(i); v != nil {
  288. if v.noWriteOrDelete {
  289. return 0, fmt.Errorf("volume %d is read only", i)
  290. }
  291. return v.deleteNeedle2(n)
  292. }
  293. return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port)
  294. }
  295. func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle, readOption *ReadOption) (int, error) {
  296. if v := s.findVolume(i); v != nil {
  297. return v.readNeedle(n, readOption)
  298. }
  299. return 0, fmt.Errorf("volume %d not found", i)
  300. }
  301. func (s *Store) GetVolume(i needle.VolumeId) *Volume {
  302. return s.findVolume(i)
  303. }
  304. func (s *Store) HasVolume(i needle.VolumeId) bool {
  305. v := s.findVolume(i)
  306. return v != nil
  307. }
  308. func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error {
  309. v := s.findVolume(i)
  310. if v == nil {
  311. return fmt.Errorf("volume %d not found", i)
  312. }
  313. v.noWriteLock.Lock()
  314. v.noWriteOrDelete = true
  315. v.noWriteLock.Unlock()
  316. return nil
  317. }
  318. func (s *Store) MarkVolumeWritable(i needle.VolumeId) error {
  319. v := s.findVolume(i)
  320. if v == nil {
  321. return fmt.Errorf("volume %d not found", i)
  322. }
  323. v.noWriteLock.Lock()
  324. v.noWriteOrDelete = false
  325. v.noWriteLock.Unlock()
  326. return nil
  327. }
  328. func (s *Store) MountVolume(i needle.VolumeId) error {
  329. for _, location := range s.Locations {
  330. if found := location.LoadVolume(i, s.NeedleMapType); found == true {
  331. glog.V(0).Infof("mount volume %d", i)
  332. v := s.findVolume(i)
  333. s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{
  334. Id: uint32(v.Id),
  335. Collection: v.Collection,
  336. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  337. Version: uint32(v.Version()),
  338. Ttl: v.Ttl.ToUint32(),
  339. }
  340. return nil
  341. }
  342. }
  343. return fmt.Errorf("volume %d not found on disk", i)
  344. }
  345. func (s *Store) UnmountVolume(i needle.VolumeId) error {
  346. v := s.findVolume(i)
  347. if v == nil {
  348. return nil
  349. }
  350. message := master_pb.VolumeShortInformationMessage{
  351. Id: uint32(v.Id),
  352. Collection: v.Collection,
  353. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  354. Version: uint32(v.Version()),
  355. Ttl: v.Ttl.ToUint32(),
  356. }
  357. for _, location := range s.Locations {
  358. if err := location.UnloadVolume(i); err == nil {
  359. glog.V(0).Infof("UnmountVolume %d", i)
  360. s.DeletedVolumesChan <- message
  361. return nil
  362. }
  363. }
  364. return fmt.Errorf("volume %d not found on disk", i)
  365. }
  366. func (s *Store) DeleteVolume(i needle.VolumeId) error {
  367. v := s.findVolume(i)
  368. if v == nil {
  369. return fmt.Errorf("delete volume %d not found on disk", i)
  370. }
  371. message := master_pb.VolumeShortInformationMessage{
  372. Id: uint32(v.Id),
  373. Collection: v.Collection,
  374. ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
  375. Version: uint32(v.Version()),
  376. Ttl: v.Ttl.ToUint32(),
  377. }
  378. for _, location := range s.Locations {
  379. if err := location.DeleteVolume(i); err == nil {
  380. glog.V(0).Infof("DeleteVolume %d", i)
  381. s.DeletedVolumesChan <- message
  382. return nil
  383. } else {
  384. glog.Errorf("DeleteVolume %d: %v", i, err)
  385. }
  386. }
  387. return fmt.Errorf("volume %d not found on disk", i)
  388. }
  389. func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
  390. for _, location := range s.Locations {
  391. fileInfo, found := location.LocateVolume(i)
  392. if !found {
  393. continue
  394. }
  395. // load, modify, save
  396. baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
  397. vifFile := filepath.Join(location.Directory, baseFileName+".vif")
  398. volumeInfo, _, err := pb.MaybeLoadVolumeInfo(vifFile)
  399. if err != nil {
  400. return fmt.Errorf("volume %d fail to load vif", i)
  401. }
  402. volumeInfo.Replication = replication
  403. err = pb.SaveVolumeInfo(vifFile, volumeInfo)
  404. if err != nil {
  405. return fmt.Errorf("volume %d fail to save vif", i)
  406. }
  407. return nil
  408. }
  409. return fmt.Errorf("volume %d not found on disk", i)
  410. }
  411. func (s *Store) SetVolumeSizeLimit(x uint64) {
  412. atomic.StoreUint64(&s.volumeSizeLimit, x)
  413. }
  414. func (s *Store) GetVolumeSizeLimit() uint64 {
  415. return atomic.LoadUint64(&s.volumeSizeLimit)
  416. }
  417. func (s *Store) MaybeAdjustVolumeMax() (hasChanges bool) {
  418. volumeSizeLimit := s.GetVolumeSizeLimit()
  419. for _, diskLocation := range s.Locations {
  420. if diskLocation.OriginalMaxVolumeCount == 0 {
  421. currentMaxVolumeCount := diskLocation.MaxVolumeCount
  422. diskStatus := stats.NewDiskStatus(diskLocation.Directory)
  423. unusedSpace := diskLocation.UnUsedSpace(volumeSizeLimit)
  424. unclaimedSpaces := int64(diskStatus.Free) - int64(unusedSpace)
  425. volCount := diskLocation.VolumesLen()
  426. maxVolumeCount := volCount
  427. if unclaimedSpaces > int64(volumeSizeLimit) {
  428. maxVolumeCount += int(uint64(unclaimedSpaces)/volumeSizeLimit) - 1
  429. }
  430. diskLocation.MaxVolumeCount = maxVolumeCount
  431. glog.V(2).Infof("disk %s max %d unclaimedSpace:%dMB, unused:%dMB volumeSizeLimit:%dMB",
  432. diskLocation.Directory, maxVolumeCount, unclaimedSpaces/1024/1024, unusedSpace/1024/1024, volumeSizeLimit/1024/1024)
  433. hasChanges = hasChanges || currentMaxVolumeCount != diskLocation.MaxVolumeCount
  434. }
  435. }
  436. return
  437. }