export.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. package command
  2. import (
  3. "archive/tar"
  4. "bytes"
  5. "fmt"
  6. "os"
  7. "path"
  8. "path/filepath"
  9. "strconv"
  10. "strings"
  11. "text/template"
  12. "time"
  13. "github.com/chrislusf/seaweedfs/weed/glog"
  14. "github.com/chrislusf/seaweedfs/weed/storage"
  15. )
  16. const (
  17. defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}`
  18. timeFormat = "2006-01-02T15:04:05"
  19. )
  20. var (
  21. export ExportOptions
  22. )
  23. type ExportOptions struct {
  24. dir *string
  25. collection *string
  26. volumeId *int
  27. }
  28. var cmdExport = &Command{
  29. UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
  30. Short: "list or export files from one volume data file",
  31. Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
  32. The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
  33. `,
  34. }
  35. func init() {
  36. cmdExport.Run = runExport // break init cycle
  37. export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
  38. export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
  39. export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
  40. }
  41. var (
  42. output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
  43. format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}")
  44. newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
  45. tarOutputFile *tar.Writer
  46. tarHeader tar.Header
  47. fileNameTemplate *template.Template
  48. fileNameTemplateBuffer = bytes.NewBuffer(nil)
  49. newerThan time.Time
  50. newerThanUnix int64 = -1
  51. localLocation, _ = time.LoadLocation("Local")
  52. )
  53. func runExport(cmd *Command, args []string) bool {
  54. var err error
  55. if *newer != "" {
  56. if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
  57. fmt.Println("cannot parse 'newer' argument: " + err.Error())
  58. return false
  59. }
  60. newerThanUnix = newerThan.Unix()
  61. }
  62. if *export.volumeId == -1 {
  63. return false
  64. }
  65. if *output != "" {
  66. if *output != "-" && !strings.HasSuffix(*output, ".tar") {
  67. fmt.Println("the output file", *output, "should be '-' or end with .tar")
  68. return false
  69. }
  70. if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
  71. fmt.Println("cannot parse format " + *format + ": " + err.Error())
  72. return false
  73. }
  74. var outputFile *os.File
  75. if *output == "-" {
  76. outputFile = os.Stdout
  77. } else {
  78. if outputFile, err = os.Create(*output); err != nil {
  79. glog.Fatalf("cannot open output tar %s: %s", *output, err)
  80. }
  81. }
  82. defer outputFile.Close()
  83. tarOutputFile = tar.NewWriter(outputFile)
  84. defer tarOutputFile.Close()
  85. t := time.Now()
  86. tarHeader = tar.Header{Mode: 0644,
  87. ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
  88. Typeflag: tar.TypeReg,
  89. AccessTime: t, ChangeTime: t}
  90. }
  91. fileName := strconv.Itoa(*export.volumeId)
  92. if *export.collection != "" {
  93. fileName = *export.collection + "_" + fileName
  94. }
  95. vid := storage.VolumeId(*export.volumeId)
  96. indexFile, err := os.OpenFile(path.Join(*export.dir, fileName+".idx"), os.O_RDONLY, 0644)
  97. if err != nil {
  98. glog.Fatalf("Create Volume Index [ERROR] %s\n", err)
  99. }
  100. defer indexFile.Close()
  101. needleMap, err := storage.LoadBtreeNeedleMap(indexFile)
  102. if err != nil {
  103. glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err)
  104. }
  105. var version storage.Version
  106. err = storage.ScanVolumeFile(*export.dir, *export.collection, vid,
  107. storage.NeedleMapInMemory,
  108. func(superBlock storage.SuperBlock) error {
  109. version = superBlock.Version()
  110. return nil
  111. }, true, func(n *storage.Needle, offset int64) error {
  112. nv, ok := needleMap.Get(n.Id)
  113. glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
  114. n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped(), ok, nv)
  115. if ok && nv.Size > 0 && int64(nv.Offset)*8 == offset {
  116. if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
  117. glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
  118. n.LastModified, newerThanUnix)
  119. return nil
  120. }
  121. return walker(vid, n, version)
  122. }
  123. if !ok {
  124. glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
  125. } else {
  126. glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
  127. }
  128. return nil
  129. })
  130. if err != nil {
  131. glog.Fatalf("Export Volume File [ERROR] %s\n", err)
  132. }
  133. return true
  134. }
  135. type nameParams struct {
  136. Name string
  137. Id uint64
  138. Mime string
  139. Key string
  140. Ext string
  141. }
  142. func walker(vid storage.VolumeId, n *storage.Needle, version storage.Version) (err error) {
  143. key := storage.NewFileIdFromNeedle(vid, n).String()
  144. if tarOutputFile != nil {
  145. fileNameTemplateBuffer.Reset()
  146. if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
  147. nameParams{
  148. Name: string(n.Name),
  149. Id: n.Id,
  150. Mime: string(n.Mime),
  151. Key: key,
  152. Ext: filepath.Ext(string(n.Name)),
  153. },
  154. ); err != nil {
  155. return err
  156. }
  157. fileName := fileNameTemplateBuffer.String()
  158. if n.IsGzipped() && path.Ext(fileName) != ".gz" {
  159. fileName = fileName + ".gz"
  160. }
  161. tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
  162. if n.HasLastModifiedDate() {
  163. tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
  164. } else {
  165. tarHeader.ModTime = time.Unix(0, 0)
  166. }
  167. tarHeader.ChangeTime = tarHeader.ModTime
  168. if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
  169. return err
  170. }
  171. _, err = tarOutputFile.Write(n.Data)
  172. } else {
  173. size := n.DataSize
  174. if version == storage.Version1 {
  175. size = n.Size
  176. }
  177. fmt.Printf("key=%s Name=%s Size=%d gzip=%t mime=%s\n",
  178. key,
  179. n.Name,
  180. size,
  181. n.IsGzipped(),
  182. n.Mime,
  183. )
  184. }
  185. return
  186. }