filer_backup.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. package command
  2. import (
  3. "fmt"
  4. "github.com/seaweedfs/seaweedfs/weed/glog"
  5. "github.com/seaweedfs/seaweedfs/weed/pb"
  6. "github.com/seaweedfs/seaweedfs/weed/replication/source"
  7. "github.com/seaweedfs/seaweedfs/weed/security"
  8. "github.com/seaweedfs/seaweedfs/weed/util"
  9. "google.golang.org/grpc"
  10. "regexp"
  11. "time"
  12. )
  13. type FilerBackupOptions struct {
  14. isActivePassive *bool
  15. filer *string
  16. path *string
  17. excludePaths *string
  18. excludeFileName *string
  19. debug *bool
  20. proxyByFiler *bool
  21. doDeleteFiles *bool
  22. timeAgo *time.Duration
  23. retentionDays *int
  24. }
  25. var (
  26. filerBackupOptions FilerBackupOptions
  27. )
  28. func init() {
  29. cmdFilerBackup.Run = runFilerBackup // break init cycle
  30. filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster")
  31. filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer")
  32. filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer")
  33. filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "exclude file names that match the regexp to sync on filer")
  34. filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers")
  35. filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination")
  36. filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files")
  37. filerBackupOptions.timeAgo = cmdFilerBackup.Flag.Duration("timeAgo", 0, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
  38. filerBackupOptions.retentionDays = cmdFilerBackup.Flag.Int("retentionDays", 0, "incremental backup retention days")
  39. }
  40. var cmdFilerBackup = &Command{
  41. UsageLine: "filer.backup -filer=<filerHost>:<filerPort> ",
  42. Short: "resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml",
  43. Long: `resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml
  44. filer.backup listens on filer notifications. If any file is updated, it will fetch the updated content,
  45. and write to the destination. This is to replace filer.replicate command since additional message queue is not needed.
  46. If restarted and "-timeAgo" is not set, the synchronization will resume from the previous checkpoints, persisted every minute.
  47. A fresh sync will start from the earliest metadata logs. To reset the checkpoints, just set "-timeAgo" to a high value.
  48. `,
  49. }
  50. func runFilerBackup(cmd *Command, args []string) bool {
  51. util.LoadConfiguration("security", false)
  52. util.LoadConfiguration("replication", true)
  53. grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
  54. clientId := util.RandomInt32()
  55. var clientEpoch int32
  56. for {
  57. clientEpoch++
  58. err := doFilerBackup(grpcDialOption, &filerBackupOptions, clientId, clientEpoch)
  59. if err != nil {
  60. glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err)
  61. time.Sleep(1747 * time.Millisecond)
  62. }
  63. }
  64. return true
  65. }
  66. const (
  67. BackupKeyPrefix = "backup."
  68. )
  69. func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, clientId int32, clientEpoch int32) error {
  70. // find data sink
  71. config := util.GetViper()
  72. dataSink := findSink(config)
  73. if dataSink == nil {
  74. return fmt.Errorf("no data sink configured in replication.toml")
  75. }
  76. sourceFiler := pb.ServerAddress(*backupOption.filer)
  77. sourcePath := *backupOption.path
  78. excludePaths := util.StringSplit(*backupOption.excludePaths, ",")
  79. var reExcludeFileName *regexp.Regexp
  80. if *backupOption.excludeFileName != "" {
  81. var err error
  82. if reExcludeFileName, err = regexp.Compile(*backupOption.excludeFileName); err != nil {
  83. return fmt.Errorf("error compile regexp %v for exclude file name: %+v", *backupOption.excludeFileName, err)
  84. }
  85. }
  86. timeAgo := *backupOption.timeAgo
  87. targetPath := dataSink.GetSinkToDirectory()
  88. debug := *backupOption.debug
  89. // get start time for the data sink
  90. startFrom := time.Unix(0, 0)
  91. sinkId := util.HashStringToLong(dataSink.GetName() + dataSink.GetSinkToDirectory())
  92. if timeAgo.Milliseconds() == 0 {
  93. lastOffsetTsNs, err := getOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId))
  94. if err != nil {
  95. glog.V(0).Infof("starting from %v", startFrom)
  96. } else {
  97. startFrom = time.Unix(0, lastOffsetTsNs)
  98. glog.V(0).Infof("resuming from %v", startFrom)
  99. }
  100. } else {
  101. startFrom = time.Now().Add(-timeAgo)
  102. glog.V(0).Infof("start time is set to %v", startFrom)
  103. }
  104. // create filer sink
  105. filerSource := &source.FilerSource{}
  106. filerSource.DoInitialize(
  107. sourceFiler.ToHttpAddress(),
  108. sourceFiler.ToGrpcAddress(),
  109. sourcePath,
  110. *backupOption.proxyByFiler)
  111. dataSink.SetSourceFiler(filerSource)
  112. processEventFn := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, dataSink, *backupOption.doDeleteFiles, debug)
  113. processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error {
  114. glog.V(0).Infof("backup %s progressed to %v %0.2f/sec", sourceFiler, time.Unix(0, lastTsNs), float64(counter)/float64(3))
  115. return setOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId), lastTsNs)
  116. })
  117. if dataSink.IsIncremental() && *filerBackupOptions.retentionDays > 0 {
  118. go func() {
  119. for {
  120. now := time.Now()
  121. time.Sleep(time.Hour * 24)
  122. key := util.Join(targetPath, now.Add(-1*time.Hour*24*time.Duration(*filerBackupOptions.retentionDays)).Format("2006-01-02"))
  123. _ = dataSink.DeleteEntry(util.Join(targetPath, key), true, true, nil)
  124. glog.V(0).Infof("incremental backup delete directory:%s", key)
  125. }
  126. }()
  127. }
  128. metadataFollowOption := &pb.MetadataFollowOption{
  129. ClientName: "backup_" + dataSink.GetName(),
  130. ClientId: clientId,
  131. ClientEpoch: clientEpoch,
  132. SelfSignature: 0,
  133. PathPrefix: sourcePath,
  134. AdditionalPathPrefixes: nil,
  135. DirectoriesToWatch: nil,
  136. StartTsNs: startFrom.UnixNano(),
  137. StopTsNs: 0,
  138. EventErrorType: pb.TrivialOnError,
  139. }
  140. return pb.FollowMetadata(sourceFiler, grpcDialOption, metadataFollowOption, processEventFnWithOffset)
  141. }