command_remote_cache.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. package shell
  2. import (
  3. "flag"
  4. "fmt"
  5. "github.com/seaweedfs/seaweedfs/weed/filer"
  6. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  7. "github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
  8. "github.com/seaweedfs/seaweedfs/weed/util"
  9. "io"
  10. "sync"
  11. )
  12. func init() {
  13. Commands = append(Commands, &commandRemoteCache{})
  14. }
  15. type commandRemoteCache struct {
  16. }
  17. func (c *commandRemoteCache) Name() string {
  18. return "remote.cache"
  19. }
  20. func (c *commandRemoteCache) Help() string {
  21. return `cache the file content for mounted directories or files
  22. # assume a remote storage is configured to name "cloud1"
  23. remote.configure -name=cloud1 -type=s3 -s3.access_key=xxx -s3.secret_key=yyy
  24. # mount and pull one bucket
  25. remote.mount -dir=/xxx -remote=cloud1/bucket
  26. # after mount, run one of these command to cache the content of the files
  27. remote.cache -dir=/xxx
  28. remote.cache -dir=/xxx/some/sub/dir
  29. remote.cache -dir=/xxx/some/sub/dir -include=*.pdf
  30. remote.cache -dir=/xxx/some/sub/dir -exclude=*.txt
  31. remote.cache -maxSize=1024000 # cache files smaller than 100K
  32. remote.cache -maxAge=3600 # cache files less than 1 hour old
  33. This is designed to run regularly. So you can add it to some cronjob.
  34. If a file is already synchronized with the remote copy, the file will be skipped to avoid unnecessary copy.
  35. The actual data copying goes through volume severs in parallel.
  36. `
  37. }
  38. func (c *commandRemoteCache) HasTag(CommandTag) bool {
  39. return false
  40. }
  41. func (c *commandRemoteCache) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
  42. remoteMountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
  43. dir := remoteMountCommand.String("dir", "", "a mounted directory or one of its sub folders in filer")
  44. concurrency := remoteMountCommand.Int("concurrent", 32, "concurrent file downloading")
  45. fileFiler := newFileFilter(remoteMountCommand)
  46. if err = remoteMountCommand.Parse(args); err != nil {
  47. return nil
  48. }
  49. if *dir != "" {
  50. if err := c.doCacheOneDirectory(commandEnv, writer, *dir, fileFiler, *concurrency); err != nil {
  51. return err
  52. }
  53. return nil
  54. }
  55. mappings, err := filer.ReadMountMappings(commandEnv.option.GrpcDialOption, commandEnv.option.FilerAddress)
  56. if err != nil {
  57. return err
  58. }
  59. for key, _ := range mappings.Mappings {
  60. if err := c.doCacheOneDirectory(commandEnv, writer, key, fileFiler, *concurrency); err != nil {
  61. return err
  62. }
  63. }
  64. return nil
  65. }
  66. func (c *commandRemoteCache) doCacheOneDirectory(commandEnv *CommandEnv, writer io.Writer, dir string, fileFiler *FileFilter, concurrency int) error {
  67. mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, detectErr := detectMountInfo(commandEnv, writer, dir)
  68. if detectErr != nil {
  69. jsonPrintln(writer, mappings)
  70. return detectErr
  71. }
  72. // pull content from remote
  73. if err := c.cacheContentData(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(dir), fileFiler, remoteStorageConf, concurrency); err != nil {
  74. return fmt.Errorf("cache content data on %s: %v", localMountedDir, err)
  75. }
  76. return nil
  77. }
  78. func recursivelyTraverseDirectory(filerClient filer_pb.FilerClient, dirPath util.FullPath, visitEntry func(dir util.FullPath, entry *filer_pb.Entry) bool) (err error) {
  79. err = filer_pb.ReadDirAllEntries(filerClient, dirPath, "", func(entry *filer_pb.Entry, isLast bool) error {
  80. if entry.IsDirectory {
  81. if !visitEntry(dirPath, entry) {
  82. return nil
  83. }
  84. subDir := dirPath.Child(entry.Name)
  85. if err := recursivelyTraverseDirectory(filerClient, subDir, visitEntry); err != nil {
  86. return err
  87. }
  88. } else {
  89. if !visitEntry(dirPath, entry) {
  90. return nil
  91. }
  92. }
  93. return nil
  94. })
  95. return
  96. }
  97. func shouldCacheToLocal(entry *filer_pb.Entry) bool {
  98. if entry.IsDirectory {
  99. return false
  100. }
  101. if entry.RemoteEntry == nil {
  102. return false
  103. }
  104. if entry.RemoteEntry.LastLocalSyncTsNs == 0 && entry.RemoteEntry.RemoteSize > 0 {
  105. return true
  106. }
  107. return false
  108. }
  109. func mayHaveCachedToLocal(entry *filer_pb.Entry) bool {
  110. if entry.IsDirectory {
  111. return false
  112. }
  113. if entry.RemoteEntry == nil {
  114. return false // should not uncache an entry that is not in remote
  115. }
  116. if entry.RemoteEntry.LastLocalSyncTsNs > 0 {
  117. return true
  118. }
  119. return false
  120. }
  121. func (c *commandRemoteCache) cacheContentData(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, dirToCache util.FullPath, fileFilter *FileFilter, remoteConf *remote_pb.RemoteConf, concurrency int) error {
  122. var wg sync.WaitGroup
  123. limitedConcurrentExecutor := util.NewLimitedConcurrentExecutor(concurrency)
  124. var executionErr error
  125. traverseErr := recursivelyTraverseDirectory(commandEnv, dirToCache, func(dir util.FullPath, entry *filer_pb.Entry) bool {
  126. if !shouldCacheToLocal(entry) {
  127. return true // true means recursive traversal should continue
  128. }
  129. if !fileFilter.matches(entry) {
  130. return true
  131. }
  132. wg.Add(1)
  133. limitedConcurrentExecutor.Execute(func() {
  134. defer wg.Done()
  135. fmt.Fprintf(writer, "Cache %+v ...\n", dir.Child(entry.Name))
  136. remoteLocation := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, dir.Child(entry.Name))
  137. if err := filer.CacheRemoteObjectToLocalCluster(commandEnv, remoteConf, remoteLocation, dir, entry); err != nil {
  138. fmt.Fprintf(writer, "CacheRemoteObjectToLocalCluster %+v: %v\n", remoteLocation, err)
  139. if executionErr == nil {
  140. executionErr = fmt.Errorf("CacheRemoteObjectToLocalCluster %+v: %v\n", remoteLocation, err)
  141. }
  142. return
  143. }
  144. fmt.Fprintf(writer, "Cache %+v Done\n", dir.Child(entry.Name))
  145. })
  146. return true
  147. })
  148. wg.Wait()
  149. if traverseErr != nil {
  150. return traverseErr
  151. }
  152. if executionErr != nil {
  153. return executionErr
  154. }
  155. return nil
  156. }