read_log_from_disk.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. package logstore
  2. import (
  3. "fmt"
  4. "github.com/seaweedfs/seaweedfs/weed/filer"
  5. "github.com/seaweedfs/seaweedfs/weed/glog"
  6. "github.com/seaweedfs/seaweedfs/weed/mq/topic"
  7. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  8. "github.com/seaweedfs/seaweedfs/weed/util"
  9. util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
  10. "github.com/seaweedfs/seaweedfs/weed/util/log_buffer"
  11. "google.golang.org/protobuf/proto"
  12. "math"
  13. "strings"
  14. "time"
  15. )
  16. func GenLogOnDiskReadFunc(filerClient filer_pb.FilerClient, t topic.Topic, p topic.Partition) log_buffer.LogReadFromDiskFuncType {
  17. partitionDir := topic.PartitionDir(t, p)
  18. lookupFileIdFn := filer.LookupFn(filerClient)
  19. eachChunkFn := func(buf []byte, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) {
  20. for pos := 0; pos+4 < len(buf); {
  21. size := util.BytesToUint32(buf[pos : pos+4])
  22. if pos+4+int(size) > len(buf) {
  23. err = fmt.Errorf("GenLogOnDiskReadFunc: read [%d,%d) from [0,%d)", pos, pos+int(size)+4, len(buf))
  24. return
  25. }
  26. entryData := buf[pos+4 : pos+4+int(size)]
  27. logEntry := &filer_pb.LogEntry{}
  28. if err = proto.Unmarshal(entryData, logEntry); err != nil {
  29. pos += 4 + int(size)
  30. err = fmt.Errorf("unexpected unmarshal mq_pb.Message: %v", err)
  31. return
  32. }
  33. if logEntry.TsNs < starTsNs {
  34. pos += 4 + int(size)
  35. continue
  36. }
  37. if stopTsNs != 0 && logEntry.TsNs > stopTsNs {
  38. println("stopTsNs", stopTsNs, "logEntry.TsNs", logEntry.TsNs)
  39. return
  40. }
  41. // fmt.Printf(" read logEntry: %v, ts %v\n", string(logEntry.Key), time.Unix(0, logEntry.TsNs).UTC())
  42. if _, err = eachLogEntryFn(logEntry); err != nil {
  43. err = fmt.Errorf("process log entry %v: %v", logEntry, err)
  44. return
  45. }
  46. processedTsNs = logEntry.TsNs
  47. pos += 4 + int(size)
  48. }
  49. return
  50. }
  51. eachFileFn := func(entry *filer_pb.Entry, eachLogEntryFn log_buffer.EachLogEntryFuncType, starTsNs, stopTsNs int64) (processedTsNs int64, err error) {
  52. if len(entry.Content) > 0 {
  53. // skip .offset files
  54. return
  55. }
  56. var urlStrings []string
  57. for _, chunk := range entry.Chunks {
  58. if chunk.Size == 0 {
  59. continue
  60. }
  61. if chunk.IsChunkManifest {
  62. glog.Warningf("this should not happen. unexpected chunk manifest in %s/%s", partitionDir, entry.Name)
  63. return
  64. }
  65. urlStrings, err = lookupFileIdFn(chunk.FileId)
  66. if err != nil {
  67. err = fmt.Errorf("lookup %s: %v", chunk.FileId, err)
  68. return
  69. }
  70. if len(urlStrings) == 0 {
  71. err = fmt.Errorf("no url found for %s", chunk.FileId)
  72. return
  73. }
  74. // try one of the urlString until util.Get(urlString) succeeds
  75. var processed bool
  76. for _, urlString := range urlStrings {
  77. // TODO optimization opportunity: reuse the buffer
  78. var data []byte
  79. // fmt.Printf("reading %s/%s %s\n", partitionDir, entry.Name, urlString)
  80. if data, _, err = util_http.Get(urlString); err == nil {
  81. processed = true
  82. if processedTsNs, err = eachChunkFn(data, eachLogEntryFn, starTsNs, stopTsNs); err != nil {
  83. return
  84. }
  85. break
  86. }
  87. }
  88. if !processed {
  89. err = fmt.Errorf("no data processed for %s %s", entry.Name, chunk.FileId)
  90. return
  91. }
  92. }
  93. return
  94. }
  95. return func(startPosition log_buffer.MessagePosition, stopTsNs int64, eachLogEntryFn log_buffer.EachLogEntryFuncType) (lastReadPosition log_buffer.MessagePosition, isDone bool, err error) {
  96. startFileName := startPosition.UTC().Format(topic.TIME_FORMAT)
  97. startTsNs := startPosition.Time.UnixNano()
  98. stopTime := time.Unix(0, stopTsNs)
  99. var processedTsNs int64
  100. err = filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
  101. return filer_pb.SeaweedList(client, partitionDir, "", func(entry *filer_pb.Entry, isLast bool) error {
  102. if entry.IsDirectory {
  103. return nil
  104. }
  105. if strings.HasSuffix(entry.Name, ".parquet") {
  106. return nil
  107. }
  108. // FIXME: this is a hack to skip the .offset files
  109. if strings.HasSuffix(entry.Name, ".offset") {
  110. return nil
  111. }
  112. if stopTsNs != 0 && entry.Name > stopTime.UTC().Format(topic.TIME_FORMAT) {
  113. isDone = true
  114. return nil
  115. }
  116. if entry.Name < startPosition.UTC().Format(topic.TIME_FORMAT) {
  117. return nil
  118. }
  119. if processedTsNs, err = eachFileFn(entry, eachLogEntryFn, startTsNs, stopTsNs); err != nil {
  120. return err
  121. }
  122. return nil
  123. }, startFileName, true, math.MaxInt32)
  124. })
  125. lastReadPosition = log_buffer.NewMessagePosition(processedTsNs, -2)
  126. return
  127. }
  128. }