rocksdb_store.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. //go:build rocksdb
  2. // +build rocksdb
  3. package rocksdb
  4. import (
  5. "bytes"
  6. "context"
  7. "crypto/md5"
  8. "fmt"
  9. "io"
  10. "os"
  11. gorocksdb "github.com/linxGnu/grocksdb"
  12. "github.com/seaweedfs/seaweedfs/weed/filer"
  13. "github.com/seaweedfs/seaweedfs/weed/glog"
  14. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  15. weed_util "github.com/seaweedfs/seaweedfs/weed/util"
  16. )
  17. func init() {
  18. filer.Stores = append(filer.Stores, &RocksDBStore{})
  19. }
  20. type options struct {
  21. opt *gorocksdb.Options
  22. bto *gorocksdb.BlockBasedTableOptions
  23. ro *gorocksdb.ReadOptions
  24. wo *gorocksdb.WriteOptions
  25. }
  26. func (opt *options) init() {
  27. opt.opt = gorocksdb.NewDefaultOptions()
  28. opt.bto = gorocksdb.NewDefaultBlockBasedTableOptions()
  29. opt.ro = gorocksdb.NewDefaultReadOptions()
  30. opt.wo = gorocksdb.NewDefaultWriteOptions()
  31. }
  32. func (opt *options) close() {
  33. opt.opt.Destroy()
  34. opt.bto.Destroy()
  35. opt.ro.Destroy()
  36. opt.wo.Destroy()
  37. }
  38. type RocksDBStore struct {
  39. path string
  40. db *gorocksdb.DB
  41. options
  42. }
  43. func (store *RocksDBStore) GetName() string {
  44. return "rocksdb"
  45. }
  46. func (store *RocksDBStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) {
  47. dir := configuration.GetString(prefix + "dir")
  48. return store.initialize(dir)
  49. }
  50. func (store *RocksDBStore) initialize(dir string) (err error) {
  51. glog.Infof("filer store rocksdb dir: %s", dir)
  52. os.MkdirAll(dir, 0755)
  53. if err := weed_util.TestFolderWritable(dir); err != nil {
  54. return fmt.Errorf("Check Level Folder %s Writable: %s", dir, err)
  55. }
  56. store.options.init()
  57. store.opt.SetCreateIfMissing(true)
  58. // reduce write amplification
  59. // also avoid expired data stored in highest level never get compacted
  60. store.opt.SetLevelCompactionDynamicLevelBytes(true)
  61. store.opt.SetCompactionFilter(NewTTLFilter())
  62. // store.opt.SetMaxBackgroundCompactions(2)
  63. // https://github.com/tecbot/gorocksdb/issues/132
  64. store.bto.SetFilterPolicy(gorocksdb.NewBloomFilterFull(8))
  65. store.opt.SetBlockBasedTableFactory(store.bto)
  66. // store.opt.EnableStatistics()
  67. store.db, err = gorocksdb.OpenDb(store.opt, dir)
  68. return
  69. }
  70. func (store *RocksDBStore) BeginTransaction(ctx context.Context) (context.Context, error) {
  71. return ctx, nil
  72. }
  73. func (store *RocksDBStore) CommitTransaction(ctx context.Context) error {
  74. return nil
  75. }
  76. func (store *RocksDBStore) RollbackTransaction(ctx context.Context) error {
  77. return nil
  78. }
  79. func (store *RocksDBStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) {
  80. dir, name := entry.DirAndName()
  81. key := genKey(dir, name)
  82. value, err := entry.EncodeAttributesAndChunks()
  83. if err != nil {
  84. return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err)
  85. }
  86. err = store.db.Put(store.wo, key, value)
  87. if err != nil {
  88. return fmt.Errorf("persisting %s : %v", entry.FullPath, err)
  89. }
  90. // println("saved", entry.FullPath, "chunks", len(entry.GetChunks()))
  91. return nil
  92. }
  93. func (store *RocksDBStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) {
  94. return store.InsertEntry(ctx, entry)
  95. }
  96. func (store *RocksDBStore) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) {
  97. dir, name := fullpath.DirAndName()
  98. key := genKey(dir, name)
  99. data, err := store.db.Get(store.ro, key)
  100. if data == nil || !data.Exists() {
  101. return nil, filer_pb.ErrNotFound
  102. }
  103. defer data.Free()
  104. if err != nil {
  105. return nil, fmt.Errorf("get %s : %v", fullpath, err)
  106. }
  107. entry = &filer.Entry{
  108. FullPath: fullpath,
  109. }
  110. err = entry.DecodeAttributesAndChunks(data.Data())
  111. if err != nil {
  112. return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err)
  113. }
  114. // println("read", entry.FullPath, "chunks", len(entry.GetChunks()), "data", len(data), string(data))
  115. return entry, nil
  116. }
  117. func (store *RocksDBStore) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) {
  118. dir, name := fullpath.DirAndName()
  119. key := genKey(dir, name)
  120. err = store.db.Delete(store.wo, key)
  121. if err != nil {
  122. return fmt.Errorf("delete %s : %v", fullpath, err)
  123. }
  124. return nil
  125. }
  126. func (store *RocksDBStore) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) {
  127. directoryPrefix := genDirectoryKeyPrefix(fullpath, "")
  128. batch := gorocksdb.NewWriteBatch()
  129. defer batch.Destroy()
  130. ro := gorocksdb.NewDefaultReadOptions()
  131. defer ro.Destroy()
  132. ro.SetFillCache(false)
  133. iter := store.db.NewIterator(ro)
  134. defer iter.Close()
  135. err = enumerate(iter, directoryPrefix, nil, false, -1, func(key, value []byte) bool {
  136. batch.Delete(key)
  137. return true
  138. })
  139. if err != nil {
  140. return fmt.Errorf("delete list %s : %v", fullpath, err)
  141. }
  142. err = store.db.Write(store.wo, batch)
  143. if err != nil {
  144. return fmt.Errorf("delete %s : %v", fullpath, err)
  145. }
  146. return nil
  147. }
  148. func enumerate(iter *gorocksdb.Iterator, prefix, lastKey []byte, includeLastKey bool, limit int64, fn func(key, value []byte) bool) (err error) {
  149. if len(lastKey) == 0 {
  150. iter.Seek(prefix)
  151. } else {
  152. iter.Seek(lastKey)
  153. if !includeLastKey {
  154. if iter.Valid() {
  155. if bytes.Equal(iter.Key().Data(), lastKey) {
  156. iter.Next()
  157. }
  158. }
  159. }
  160. }
  161. i := int64(0)
  162. for ; iter.Valid(); iter.Next() {
  163. if limit > 0 {
  164. i++
  165. if i > limit {
  166. break
  167. }
  168. }
  169. key := iter.Key().Data()
  170. if !bytes.HasPrefix(key, prefix) {
  171. break
  172. }
  173. ret := fn(key, iter.Value().Data())
  174. if !ret {
  175. break
  176. }
  177. }
  178. if err := iter.Err(); err != nil {
  179. return fmt.Errorf("prefix scan iterator: %v", err)
  180. }
  181. return nil
  182. }
  183. func (store *RocksDBStore) ListDirectoryEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) {
  184. return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc)
  185. }
  186. func (store *RocksDBStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) {
  187. directoryPrefix := genDirectoryKeyPrefix(dirPath, prefix)
  188. lastFileStart := directoryPrefix
  189. if startFileName != "" {
  190. lastFileStart = genDirectoryKeyPrefix(dirPath, startFileName)
  191. }
  192. ro := gorocksdb.NewDefaultReadOptions()
  193. defer ro.Destroy()
  194. ro.SetFillCache(false)
  195. iter := store.db.NewIterator(ro)
  196. defer iter.Close()
  197. err = enumerate(iter, directoryPrefix, lastFileStart, includeStartFile, limit, func(key, value []byte) bool {
  198. fileName := getNameFromKey(key)
  199. if fileName == "" {
  200. return true
  201. }
  202. entry := &filer.Entry{
  203. FullPath: weed_util.NewFullPath(string(dirPath), fileName),
  204. }
  205. lastFileName = fileName
  206. // println("list", entry.FullPath, "chunks", len(entry.GetChunks()))
  207. if decodeErr := entry.DecodeAttributesAndChunks(value); decodeErr != nil {
  208. err = decodeErr
  209. glog.V(0).Infof("list %s : %v", entry.FullPath, err)
  210. return false
  211. }
  212. if !eachEntryFunc(entry) {
  213. return false
  214. }
  215. return true
  216. })
  217. if err != nil {
  218. return lastFileName, fmt.Errorf("prefix list %s : %v", dirPath, err)
  219. }
  220. return lastFileName, err
  221. }
  222. func genKey(dirPath, fileName string) (key []byte) {
  223. key = hashToBytes(dirPath)
  224. key = append(key, []byte(fileName)...)
  225. return key
  226. }
  227. func genDirectoryKeyPrefix(fullpath weed_util.FullPath, startFileName string) (keyPrefix []byte) {
  228. keyPrefix = hashToBytes(string(fullpath))
  229. if len(startFileName) > 0 {
  230. keyPrefix = append(keyPrefix, []byte(startFileName)...)
  231. }
  232. return keyPrefix
  233. }
  234. func getNameFromKey(key []byte) string {
  235. return string(key[md5.Size:])
  236. }
  237. // hash directory, and use last byte for partitioning
  238. func hashToBytes(dir string) []byte {
  239. h := md5.New()
  240. io.WriteString(h, dir)
  241. b := h.Sum(nil)
  242. return b
  243. }
  244. func (store *RocksDBStore) Shutdown() {
  245. store.db.Close()
  246. store.options.close()
  247. }