123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- package shell
- import (
- "flag"
- "fmt"
- "github.com/seaweedfs/seaweedfs/weed/filer"
- "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
- "github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
- "github.com/seaweedfs/seaweedfs/weed/util"
- "io"
- "sync"
- )
- func init() {
- Commands = append(Commands, &commandRemoteCache{})
- }
- type commandRemoteCache struct {
- }
- func (c *commandRemoteCache) Name() string {
- return "remote.cache"
- }
- func (c *commandRemoteCache) Help() string {
- return `cache the file content for mounted directories or files
- # assume a remote storage is configured to name "cloud1"
- remote.configure -name=cloud1 -type=s3 -s3.access_key=xxx -s3.secret_key=yyy
- # mount and pull one bucket
- remote.mount -dir=/xxx -remote=cloud1/bucket
- # after mount, run one of these command to cache the content of the files
- remote.cache -dir=/xxx
- remote.cache -dir=/xxx/some/sub/dir
- remote.cache -dir=/xxx/some/sub/dir -include=*.pdf
- remote.cache -dir=/xxx/some/sub/dir -exclude=*.txt
- remote.cache -maxSize=1024000 # cache files smaller than 100K
- remote.cache -maxAge=3600 # cache files less than 1 hour old
- This is designed to run regularly. So you can add it to some cronjob.
- If a file is already synchronized with the remote copy, the file will be skipped to avoid unnecessary copy.
- The actual data copying goes through volume severs in parallel.
- `
- }
- func (c *commandRemoteCache) HasTag(CommandTag) bool {
- return false
- }
- func (c *commandRemoteCache) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
- remoteMountCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
- dir := remoteMountCommand.String("dir", "", "a mounted directory or one of its sub folders in filer")
- concurrency := remoteMountCommand.Int("concurrent", 32, "concurrent file downloading")
- fileFiler := newFileFilter(remoteMountCommand)
- if err = remoteMountCommand.Parse(args); err != nil {
- return nil
- }
- if *dir != "" {
- if err := c.doCacheOneDirectory(commandEnv, writer, *dir, fileFiler, *concurrency); err != nil {
- return err
- }
- return nil
- }
- mappings, err := filer.ReadMountMappings(commandEnv.option.GrpcDialOption, commandEnv.option.FilerAddress)
- if err != nil {
- return err
- }
- for key, _ := range mappings.Mappings {
- if err := c.doCacheOneDirectory(commandEnv, writer, key, fileFiler, *concurrency); err != nil {
- return err
- }
- }
- return nil
- }
- func (c *commandRemoteCache) doCacheOneDirectory(commandEnv *CommandEnv, writer io.Writer, dir string, fileFiler *FileFilter, concurrency int) error {
- mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, detectErr := detectMountInfo(commandEnv, writer, dir)
- if detectErr != nil {
- jsonPrintln(writer, mappings)
- return detectErr
- }
- // pull content from remote
- if err := c.cacheContentData(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(dir), fileFiler, remoteStorageConf, concurrency); err != nil {
- return fmt.Errorf("cache content data on %s: %v", localMountedDir, err)
- }
- return nil
- }
- func recursivelyTraverseDirectory(filerClient filer_pb.FilerClient, dirPath util.FullPath, visitEntry func(dir util.FullPath, entry *filer_pb.Entry) bool) (err error) {
- err = filer_pb.ReadDirAllEntries(filerClient, dirPath, "", func(entry *filer_pb.Entry, isLast bool) error {
- if entry.IsDirectory {
- if !visitEntry(dirPath, entry) {
- return nil
- }
- subDir := dirPath.Child(entry.Name)
- if err := recursivelyTraverseDirectory(filerClient, subDir, visitEntry); err != nil {
- return err
- }
- } else {
- if !visitEntry(dirPath, entry) {
- return nil
- }
- }
- return nil
- })
- return
- }
- func shouldCacheToLocal(entry *filer_pb.Entry) bool {
- if entry.IsDirectory {
- return false
- }
- if entry.RemoteEntry == nil {
- return false
- }
- if entry.RemoteEntry.LastLocalSyncTsNs == 0 && entry.RemoteEntry.RemoteSize > 0 {
- return true
- }
- return false
- }
- func mayHaveCachedToLocal(entry *filer_pb.Entry) bool {
- if entry.IsDirectory {
- return false
- }
- if entry.RemoteEntry == nil {
- return false // should not uncache an entry that is not in remote
- }
- if entry.RemoteEntry.LastLocalSyncTsNs > 0 {
- return true
- }
- return false
- }
- func (c *commandRemoteCache) cacheContentData(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, dirToCache util.FullPath, fileFilter *FileFilter, remoteConf *remote_pb.RemoteConf, concurrency int) error {
- var wg sync.WaitGroup
- limitedConcurrentExecutor := util.NewLimitedConcurrentExecutor(concurrency)
- var executionErr error
- traverseErr := recursivelyTraverseDirectory(commandEnv, dirToCache, func(dir util.FullPath, entry *filer_pb.Entry) bool {
- if !shouldCacheToLocal(entry) {
- return true // true means recursive traversal should continue
- }
- if !fileFilter.matches(entry) {
- return true
- }
- wg.Add(1)
- limitedConcurrentExecutor.Execute(func() {
- defer wg.Done()
- fmt.Fprintf(writer, "Cache %+v ...\n", dir.Child(entry.Name))
- remoteLocation := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, dir.Child(entry.Name))
- if err := filer.CacheRemoteObjectToLocalCluster(commandEnv, remoteConf, remoteLocation, dir, entry); err != nil {
- fmt.Fprintf(writer, "CacheRemoteObjectToLocalCluster %+v: %v\n", remoteLocation, err)
- if executionErr == nil {
- executionErr = fmt.Errorf("CacheRemoteObjectToLocalCluster %+v: %v\n", remoteLocation, err)
- }
- return
- }
- fmt.Fprintf(writer, "Cache %+v Done\n", dir.Child(entry.Name))
- })
- return true
- })
- wg.Wait()
- if traverseErr != nil {
- return traverseErr
- }
- if executionErr != nil {
- return executionErr
- }
- return nil
- }
|