export.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. package command
  2. import (
  3. "archive/tar"
  4. "bytes"
  5. "fmt"
  6. "os"
  7. "path"
  8. "path/filepath"
  9. "strconv"
  10. "strings"
  11. "text/template"
  12. "time"
  13. "io"
  14. "github.com/chrislusf/seaweedfs/weed/glog"
  15. "github.com/chrislusf/seaweedfs/weed/storage"
  16. "github.com/chrislusf/seaweedfs/weed/storage/needle"
  17. "github.com/chrislusf/seaweedfs/weed/storage/types"
  18. )
  19. const (
  20. defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}`
  21. timeFormat = "2006-01-02T15:04:05"
  22. )
  23. var (
  24. export ExportOptions
  25. )
  26. type ExportOptions struct {
  27. dir *string
  28. collection *string
  29. volumeId *int
  30. }
  31. var cmdExport = &Command{
  32. UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
  33. Short: "list or export files from one volume data file",
  34. Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
  35. The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
  36. `,
  37. }
  38. func init() {
  39. cmdExport.Run = runExport // break init cycle
  40. export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
  41. export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
  42. export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
  43. }
  44. var (
  45. output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
  46. format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}")
  47. newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
  48. showDeleted = cmdExport.Flag.Bool("deleted", false, "export deleted files. only applies if -o is not specified")
  49. limit = cmdExport.Flag.Int("limit", 0, "only show first n entries if specified")
  50. tarOutputFile *tar.Writer
  51. tarHeader tar.Header
  52. fileNameTemplate *template.Template
  53. fileNameTemplateBuffer = bytes.NewBuffer(nil)
  54. newerThan time.Time
  55. newerThanUnix int64 = -1
  56. localLocation, _ = time.LoadLocation("Local")
  57. )
  58. func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool) {
  59. key := needle.NewFileIdFromNeedle(vid, n).String()
  60. size := n.DataSize
  61. if version == needle.Version1 {
  62. size = n.Size
  63. }
  64. fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n",
  65. key,
  66. n.Name,
  67. size,
  68. n.IsGzipped(),
  69. n.Mime,
  70. n.LastModifiedString(),
  71. n.Ttl.String(),
  72. deleted,
  73. )
  74. }
  75. type VolumeFileScanner4Export struct {
  76. version needle.Version
  77. counter int
  78. needleMap *storage.NeedleMap
  79. vid needle.VolumeId
  80. }
  81. func (scanner *VolumeFileScanner4Export) VisitSuperBlock(superBlock storage.SuperBlock) error {
  82. scanner.version = superBlock.Version()
  83. return nil
  84. }
  85. func (scanner *VolumeFileScanner4Export) ReadNeedleBody() bool {
  86. return true
  87. }
  88. func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
  89. needleMap := scanner.needleMap
  90. vid := scanner.vid
  91. nv, ok := needleMap.Get(n.Id)
  92. glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
  93. n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped(), ok, nv)
  94. if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && nv.Offset.ToAcutalOffset() == offset {
  95. if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
  96. glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
  97. n.LastModified, newerThanUnix)
  98. return nil
  99. }
  100. scanner.counter++
  101. if *limit > 0 && scanner.counter > *limit {
  102. return io.EOF
  103. }
  104. if tarOutputFile != nil {
  105. return writeFile(vid, n)
  106. } else {
  107. printNeedle(vid, n, scanner.version, false)
  108. return nil
  109. }
  110. }
  111. if !ok {
  112. if *showDeleted && tarOutputFile == nil {
  113. if n.DataSize > 0 {
  114. printNeedle(vid, n, scanner.version, true)
  115. } else {
  116. n.Name = []byte("*tombstone")
  117. printNeedle(vid, n, scanner.version, true)
  118. }
  119. }
  120. glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
  121. } else {
  122. glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
  123. }
  124. return nil
  125. }
  126. func runExport(cmd *Command, args []string) bool {
  127. var err error
  128. if *newer != "" {
  129. if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
  130. fmt.Println("cannot parse 'newer' argument: " + err.Error())
  131. return false
  132. }
  133. newerThanUnix = newerThan.Unix()
  134. }
  135. if *export.volumeId == -1 {
  136. return false
  137. }
  138. if *output != "" {
  139. if *output != "-" && !strings.HasSuffix(*output, ".tar") {
  140. fmt.Println("the output file", *output, "should be '-' or end with .tar")
  141. return false
  142. }
  143. if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
  144. fmt.Println("cannot parse format " + *format + ": " + err.Error())
  145. return false
  146. }
  147. var outputFile *os.File
  148. if *output == "-" {
  149. outputFile = os.Stdout
  150. } else {
  151. if outputFile, err = os.Create(*output); err != nil {
  152. glog.Fatalf("cannot open output tar %s: %s", *output, err)
  153. }
  154. }
  155. defer outputFile.Close()
  156. tarOutputFile = tar.NewWriter(outputFile)
  157. defer tarOutputFile.Close()
  158. t := time.Now()
  159. tarHeader = tar.Header{Mode: 0644,
  160. ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
  161. Typeflag: tar.TypeReg,
  162. AccessTime: t, ChangeTime: t}
  163. }
  164. fileName := strconv.Itoa(*export.volumeId)
  165. if *export.collection != "" {
  166. fileName = *export.collection + "_" + fileName
  167. }
  168. vid := needle.VolumeId(*export.volumeId)
  169. indexFile, err := os.OpenFile(path.Join(*export.dir, fileName+".idx"), os.O_RDONLY, 0644)
  170. if err != nil {
  171. glog.Fatalf("Create Volume Index [ERROR] %s\n", err)
  172. }
  173. defer indexFile.Close()
  174. needleMap, err := storage.LoadBtreeNeedleMap(indexFile)
  175. if err != nil {
  176. glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err)
  177. }
  178. volumeFileScanner := &VolumeFileScanner4Export{
  179. needleMap: needleMap,
  180. vid: vid,
  181. }
  182. if tarOutputFile == nil {
  183. fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n")
  184. }
  185. err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)
  186. if err != nil && err != io.EOF {
  187. glog.Fatalf("Export Volume File [ERROR] %s\n", err)
  188. }
  189. return true
  190. }
  191. type nameParams struct {
  192. Name string
  193. Id types.NeedleId
  194. Mime string
  195. Key string
  196. Ext string
  197. }
  198. func writeFile(vid needle.VolumeId, n *needle.Needle) (err error) {
  199. key := needle.NewFileIdFromNeedle(vid, n).String()
  200. fileNameTemplateBuffer.Reset()
  201. if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
  202. nameParams{
  203. Name: string(n.Name),
  204. Id: n.Id,
  205. Mime: string(n.Mime),
  206. Key: key,
  207. Ext: filepath.Ext(string(n.Name)),
  208. },
  209. ); err != nil {
  210. return err
  211. }
  212. fileName := fileNameTemplateBuffer.String()
  213. if n.IsGzipped() && path.Ext(fileName) != ".gz" {
  214. fileName = fileName + ".gz"
  215. }
  216. tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
  217. if n.HasLastModifiedDate() {
  218. tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
  219. } else {
  220. tarHeader.ModTime = time.Unix(0, 0)
  221. }
  222. tarHeader.ChangeTime = tarHeader.ModTime
  223. if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
  224. return err
  225. }
  226. _, err = tarOutputFile.Write(n.Data)
  227. return
  228. }