123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- package logstore
- import (
- "fmt"
- "github.com/seaweedfs/seaweedfs/weed/filer"
- "github.com/seaweedfs/seaweedfs/weed/glog"
- "github.com/seaweedfs/seaweedfs/weed/mq/topic"
- "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
- "github.com/seaweedfs/seaweedfs/weed/util"
- util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
- "github.com/seaweedfs/seaweedfs/weed/util/log_buffer"
- "google.golang.org/protobuf/proto"
- "math"
- "strings"
- "time"
- )
- func GenLogOnDiskReadFunc(filerClient filer_pb.FilerClient, t topic.Topic, p topic.Partition) log_buffer.LogReadFromDiskFuncType {
- partitionDir := topic.PartitionDir(t, p)
- lookupFileIdFn := filer.LookupFn(filerClient)
- eachChunkFn := func(buf []byte, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) {
- for pos := 0; pos+4 < len(buf); {
- size := util.BytesToUint32(buf[pos : pos+4])
- if pos+4+int(size) > len(buf) {
- err = fmt.Errorf("GenLogOnDiskReadFunc: read [%d,%d) from [0,%d)", pos, pos+int(size)+4, len(buf))
- return
- }
- entryData := buf[pos+4 : pos+4+int(size)]
- logEntry := &filer_pb.LogEntry{}
- if err = proto.Unmarshal(entryData, logEntry); err != nil {
- pos += 4 + int(size)
- err = fmt.Errorf("unexpected unmarshal mq_pb.Message: %v", err)
- return
- }
- if logEntry.TsNs < starTsNs {
- pos += 4 + int(size)
- continue
- }
- if stopTsNs != 0 && logEntry.TsNs > stopTsNs {
- println("stopTsNs", stopTsNs, "logEntry.TsNs", logEntry.TsNs)
- return
- }
- // fmt.Printf(" read logEntry: %v, ts %v\n", string(logEntry.Key), time.Unix(0, logEntry.TsNs).UTC())
- if _, err = eachLogEntryFn(logEntry); err != nil {
- err = fmt.Errorf("process log entry %v: %v", logEntry, err)
- return
- }
- processedTsNs = logEntry.TsNs
- pos += 4 + int(size)
- }
- return
- }
- eachFileFn := func(entry *filer_pb.Entry, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) {
- if len(entry.Content) > 0 {
- // skip .offset files
- return
- }
- var urlStrings []string
- for _, chunk := range entry.Chunks {
- if chunk.Size == 0 {
- continue
- }
- if chunk.IsChunkManifest {
- glog.Warningf("this should not happen. unexpected chunk manifest in %s/%s", partitionDir, entry.Name)
- return
- }
- urlStrings, err = lookupFileIdFn(chunk.FileId)
- if err != nil {
- err = fmt.Errorf("lookup %s: %v", chunk.FileId, err)
- return
- }
- if len(urlStrings) == 0 {
- err = fmt.Errorf("no url found for %s", chunk.FileId)
- return
- }
- // try one of the urlString until util.Get(urlString) succeeds
- var processed bool
- for _, urlString := range urlStrings {
- // TODO optimization opportunity: reuse the buffer
- var data []byte
- // fmt.Printf("reading %s/%s %s\n", partitionDir, entry.Name, urlString)
- if data, _, err = util_http.Get(urlString); err == nil {
- processed = true
- if processedTsNs, err = eachChunkFn(data, eachLogEntryFn, starTsNs, stopTsNs); err != nil {
- return
- }
- break
- }
- }
- if !processed {
- err = fmt.Errorf("no data processed for %s %s", entry.Name, chunk.FileId)
- return
- }
- }
- return
- }
- return func(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastReadPosition log_buffer.MessagePosition, isDone bool, err error) {
- startFileName := startPosition.UTC().Format(topic.TIME_FORMAT)
- startTsNs := startPosition.Time.UnixNano()
- stopTime := time.Unix(0, stopTsNs)
- var processedTsNs int64
- err = filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
- return filer_pb.SeaweedList(client, partitionDir, "", func(entry *filer_pb.Entry, isLast bool) error {
- if entry.IsDirectory {
- return nil
- }
- if strings.HasSuffix(entry.Name, ".parquet") {
- return nil
- }
- // FIXME: this is a hack to skip the .offset files
- if strings.HasSuffix(entry.Name, ".offset") {
- return nil
- }
- if stopTsNs != 0 && entry.Name > stopTime.UTC().Format(topic.TIME_FORMAT) {
- isDone = true
- return nil
- }
- if entry.Name < startPosition.UTC().Format(topic.TIME_FORMAT) {
- return nil
- }
- if processedTsNs, err = eachFileFn(entry, eachLogEntryFn, startTsNs, stopTsNs); err != nil {
- return err
- }
- return nil
- }, startFileName, true, math.MaxInt32)
- })
- lastReadPosition = log_buffer.NewMessagePosition(processedTsNs, -2)
- return
- }
- }
|