123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- package command
- import (
- "archive/tar"
- "bytes"
- "fmt"
- "io"
- "os"
- "path"
- "path/filepath"
- "strconv"
- "strings"
- "text/template"
- "time"
- "github.com/chrislusf/seaweedfs/weed/glog"
- "github.com/chrislusf/seaweedfs/weed/storage"
- "github.com/chrislusf/seaweedfs/weed/storage/needle"
- "github.com/chrislusf/seaweedfs/weed/storage/needle_map"
- "github.com/chrislusf/seaweedfs/weed/storage/super_block"
- "github.com/chrislusf/seaweedfs/weed/storage/types"
- )
- const (
- defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}`
- timeFormat = "2006-01-02T15:04:05"
- )
- var (
- export ExportOptions
- )
- type ExportOptions struct {
- dir *string
- collection *string
- volumeId *int
- }
- var cmdExport = &Command{
- UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
- Short: "list or export files from one volume data file",
- Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
- The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
- `,
- }
- func init() {
- cmdExport.Run = runExport // break init cycle
- export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
- export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
- export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
- }
- var (
- output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
- format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}")
- newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone, e.g. 2006-01-02T15:04:05")
- showDeleted = cmdExport.Flag.Bool("deleted", false, "export deleted files. only applies if -o is not specified")
- limit = cmdExport.Flag.Int("limit", 0, "only show first n entries if specified")
- tarOutputFile *tar.Writer
- tarHeader tar.Header
- fileNameTemplate *template.Template
- fileNameTemplateBuffer = bytes.NewBuffer(nil)
- newerThan time.Time
- newerThanUnix int64 = -1
- localLocation, _ = time.LoadLocation("Local")
- )
- func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool) {
- key := needle.NewFileIdFromNeedle(vid, n).String()
- size := n.DataSize
- if version == needle.Version1 {
- size = n.Size
- }
- fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n",
- key,
- n.Name,
- size,
- n.IsGzipped(),
- n.Mime,
- n.LastModifiedString(),
- n.Ttl.String(),
- deleted,
- )
- }
- type VolumeFileScanner4Export struct {
- version needle.Version
- counter int
- needleMap *needle_map.MemDb
- vid needle.VolumeId
- }
- func (scanner *VolumeFileScanner4Export) VisitSuperBlock(superBlock super_block.SuperBlock) error {
- scanner.version = superBlock.Version
- return nil
- }
- func (scanner *VolumeFileScanner4Export) ReadNeedleBody() bool {
- return true
- }
- func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
- needleMap := scanner.needleMap
- vid := scanner.vid
- nv, ok := needleMap.Get(n.Id)
- glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
- n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped(), ok, nv)
- if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && nv.Offset.ToAcutalOffset() == offset {
- if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
- glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
- n.LastModified, newerThanUnix)
- return nil
- }
- scanner.counter++
- if *limit > 0 && scanner.counter > *limit {
- return io.EOF
- }
- if tarOutputFile != nil {
- return writeFile(vid, n)
- } else {
- printNeedle(vid, n, scanner.version, false)
- return nil
- }
- }
- if !ok {
- if *showDeleted && tarOutputFile == nil {
- if n.DataSize > 0 {
- printNeedle(vid, n, scanner.version, true)
- } else {
- n.Name = []byte("*tombstone")
- printNeedle(vid, n, scanner.version, true)
- }
- }
- glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
- } else {
- glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
- }
- return nil
- }
- func runExport(cmd *Command, args []string) bool {
- var err error
- if *newer != "" {
- if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
- fmt.Println("cannot parse 'newer' argument: " + err.Error())
- return false
- }
- newerThanUnix = newerThan.Unix()
- }
- if *export.volumeId == -1 {
- return false
- }
- if *output != "" {
- if *output != "-" && !strings.HasSuffix(*output, ".tar") {
- fmt.Println("the output file", *output, "should be '-' or end with .tar")
- return false
- }
- if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
- fmt.Println("cannot parse format " + *format + ": " + err.Error())
- return false
- }
- var outputFile *os.File
- if *output == "-" {
- outputFile = os.Stdout
- } else {
- if outputFile, err = os.Create(*output); err != nil {
- glog.Fatalf("cannot open output tar %s: %s", *output, err)
- }
- }
- defer outputFile.Close()
- tarOutputFile = tar.NewWriter(outputFile)
- defer tarOutputFile.Close()
- t := time.Now()
- tarHeader = tar.Header{Mode: 0644,
- ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
- Typeflag: tar.TypeReg,
- AccessTime: t, ChangeTime: t}
- }
- fileName := strconv.Itoa(*export.volumeId)
- if *export.collection != "" {
- fileName = *export.collection + "_" + fileName
- }
- vid := needle.VolumeId(*export.volumeId)
- needleMap := needle_map.NewMemDb()
- if err := needleMap.LoadFromIdx(path.Join(*export.dir, fileName+".idx")); err != nil {
- glog.Fatalf("cannot load needle map from %s.idx: %s", fileName, err)
- }
- volumeFileScanner := &VolumeFileScanner4Export{
- needleMap: needleMap,
- vid: vid,
- }
- if tarOutputFile == nil {
- fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n")
- }
- err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner)
- if err != nil && err != io.EOF {
- glog.Fatalf("Export Volume File [ERROR] %s\n", err)
- }
- return true
- }
- type nameParams struct {
- Name string
- Id types.NeedleId
- Mime string
- Key string
- Ext string
- }
- func writeFile(vid needle.VolumeId, n *needle.Needle) (err error) {
- key := needle.NewFileIdFromNeedle(vid, n).String()
- fileNameTemplateBuffer.Reset()
- if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
- nameParams{
- Name: string(n.Name),
- Id: n.Id,
- Mime: string(n.Mime),
- Key: key,
- Ext: filepath.Ext(string(n.Name)),
- },
- ); err != nil {
- return err
- }
- fileName := fileNameTemplateBuffer.String()
- if n.IsGzipped() && path.Ext(fileName) != ".gz" {
- fileName = fileName + ".gz"
- }
- tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
- if n.HasLastModifiedDate() {
- tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
- } else {
- tarHeader.ModTime = time.Unix(0, 0)
- }
- tarHeader.ChangeTime = tarHeader.ModTime
- if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
- return err
- }
- _, err = tarOutputFile.Write(n.Data)
- return
- }
|