export.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. package command
  2. import (
  3. "archive/tar"
  4. "bytes"
  5. "fmt"
  6. "io"
  7. "os"
  8. "path"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "text/template"
  13. "time"
  14. "github.com/seaweedfs/seaweedfs/weed/glog"
  15. "github.com/seaweedfs/seaweedfs/weed/storage"
  16. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  17. "github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
  18. "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
  19. "github.com/seaweedfs/seaweedfs/weed/storage/types"
  20. "github.com/seaweedfs/seaweedfs/weed/util"
  21. )
  22. const (
  23. defaultFnFormat = `{{.Id}}_{{.Name}}{{.Ext}}`
  24. timeFormat = "2006-01-02T15:04:05"
  25. )
  26. var (
  27. export ExportOptions
  28. )
  29. type ExportOptions struct {
  30. dir *string
  31. collection *string
  32. volumeId *int
  33. }
  34. var cmdExport = &Command{
  35. UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
  36. Short: "list or export files from one volume data file",
  37. Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
  38. The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
  39. `,
  40. }
  41. func init() {
  42. cmdExport.Run = runExport // break init cycle
  43. export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
  44. export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
  45. export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
  46. }
  47. var (
  48. output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
  49. format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Id}} {{.Name}} {{.Ext}}")
  50. newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
  51. showDeleted = cmdExport.Flag.Bool("deleted", false, "export deleted files. only applies if -o is not specified")
  52. limit = cmdExport.Flag.Int("limit", 0, "only show first n entries if specified")
  53. tarOutputFile *tar.Writer
  54. tarHeader tar.Header
  55. fileNameTemplate *template.Template
  56. fileNameTemplateBuffer = bytes.NewBuffer(nil)
  57. newerThan time.Time
  58. newerThanUnix int64 = -1
  59. localLocation, _ = time.LoadLocation("Local")
  60. )
  61. func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool, offset int64, onDiskSize int64) {
  62. key := needle.NewFileIdFromNeedle(vid, n).String()
  63. size := int32(n.DataSize)
  64. if version == needle.Version1 {
  65. size = int32(n.Size)
  66. }
  67. fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\t%d\t%d\n",
  68. key,
  69. n.Name,
  70. size,
  71. n.IsCompressed(),
  72. n.Mime,
  73. n.LastModifiedString(),
  74. n.Ttl.String(),
  75. deleted,
  76. offset,
  77. offset+onDiskSize,
  78. )
  79. }
  80. type VolumeFileScanner4Export struct {
  81. version needle.Version
  82. counter int
  83. needleMap *needle_map.MemDb
  84. vid needle.VolumeId
  85. }
  86. func (scanner *VolumeFileScanner4Export) VisitSuperBlock(superBlock super_block.SuperBlock) error {
  87. scanner.version = superBlock.Version
  88. return nil
  89. }
  90. func (scanner *VolumeFileScanner4Export) ReadNeedleBody() bool {
  91. return true
  92. }
  93. func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
  94. needleMap := scanner.needleMap
  95. vid := scanner.vid
  96. nv, ok := needleMap.Get(n.Id)
  97. glog.V(3).Infof("key %d offset %d size %d disk_size %d compressed %v ok %v nv %+v",
  98. n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed(), ok, nv)
  99. if *showDeleted && n.Size > 0 || ok && nv.Size.IsValid() && nv.Offset.ToActualOffset() == offset {
  100. if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
  101. glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
  102. n.LastModified, newerThanUnix)
  103. return nil
  104. }
  105. scanner.counter++
  106. if *limit > 0 && scanner.counter > *limit {
  107. return io.EOF
  108. }
  109. if tarOutputFile != nil {
  110. return writeFile(vid, n)
  111. } else {
  112. printNeedle(vid, n, scanner.version, false, offset, n.DiskSize(scanner.version))
  113. return nil
  114. }
  115. }
  116. if !ok {
  117. if *showDeleted && tarOutputFile == nil {
  118. if n.DataSize > 0 {
  119. printNeedle(vid, n, scanner.version, true, offset, n.DiskSize(scanner.version))
  120. } else {
  121. n.Name = []byte("*tombstone")
  122. printNeedle(vid, n, scanner.version, true, offset, n.DiskSize(scanner.version))
  123. }
  124. }
  125. glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
  126. } else {
  127. glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
  128. }
  129. return nil
  130. }
  131. func runExport(cmd *Command, args []string) bool {
  132. var err error
  133. if *newer != "" {
  134. if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
  135. fmt.Println("cannot parse 'newer' argument: " + err.Error())
  136. return false
  137. }
  138. newerThanUnix = newerThan.Unix()
  139. }
  140. if *export.volumeId == -1 {
  141. return false
  142. }
  143. if *output != "" {
  144. if *output != "-" && !strings.HasSuffix(*output, ".tar") {
  145. fmt.Println("the output file", *output, "should be '-' or end with .tar")
  146. return false
  147. }
  148. if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
  149. fmt.Println("cannot parse format " + *format + ": " + err.Error())
  150. return false
  151. }
  152. var outputFile *os.File
  153. if *output == "-" {
  154. outputFile = os.Stdout
  155. } else {
  156. if outputFile, err = os.Create(*output); err != nil {
  157. glog.Fatalf("cannot open output tar %s: %s", *output, err)
  158. }
  159. }
  160. defer outputFile.Close()
  161. tarOutputFile = tar.NewWriter(outputFile)
  162. defer tarOutputFile.Close()
  163. t := time.Now()
  164. tarHeader = tar.Header{Mode: 0644,
  165. ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
  166. Typeflag: tar.TypeReg,
  167. AccessTime: t, ChangeTime: t}
  168. }
  169. fileName := strconv.Itoa(*export.volumeId)
  170. if *export.collection != "" {
  171. fileName = *export.collection + "_" + fileName
  172. }
  173. vid := needle.VolumeId(*export.volumeId)
  174. needleMap := needle_map.NewMemDb()
  175. defer needleMap.Close()
  176. if err := needleMap.LoadFromIdx(path.Join(util.ResolvePath(*export.dir), fileName+".idx")); err != nil {
  177. glog.Fatalf("cannot load needle map from %s.idx: %s", fileName, err)
  178. }
  179. volumeFileScanner := &VolumeFileScanner4Export{
  180. needleMap: needleMap,
  181. vid: vid,
  182. }
  183. if tarOutputFile == nil {
  184. fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\tstart\tstop\n")
  185. }
  186. err = storage.ScanVolumeFile(util.ResolvePath(*export.dir), *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)
  187. if err != nil && err != io.EOF {
  188. glog.Errorf("Export Volume File [ERROR] %s\n", err)
  189. }
  190. return true
  191. }
  192. type nameParams struct {
  193. Name string
  194. Id types.NeedleId
  195. Mime string
  196. Key string
  197. Ext string
  198. }
  199. func writeFile(vid needle.VolumeId, n *needle.Needle) (err error) {
  200. key := needle.NewFileIdFromNeedle(vid, n).String()
  201. fileNameTemplateBuffer.Reset()
  202. if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
  203. nameParams{
  204. Name: string(n.Name),
  205. Id: n.Id,
  206. Mime: string(n.Mime),
  207. Key: key,
  208. Ext: filepath.Ext(string(n.Name)),
  209. },
  210. ); err != nil {
  211. return err
  212. }
  213. fileName := fileNameTemplateBuffer.String()
  214. if n.IsCompressed() {
  215. if util.IsGzippedContent(n.Data) && path.Ext(fileName) != ".gz" {
  216. fileName = fileName + ".gz"
  217. }
  218. // TODO other compression method
  219. }
  220. tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
  221. if n.HasLastModifiedDate() {
  222. tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
  223. } else {
  224. tarHeader.ModTime = time.Unix(0, 0)
  225. }
  226. tarHeader.ChangeTime = tarHeader.ModTime
  227. if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
  228. return err
  229. }
  230. _, err = tarOutputFile.Write(n.Data)
  231. return
  232. }