log_buffer.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. package log_buffer
  2. import (
  3. "bytes"
  4. "sync"
  5. "time"
  6. "google.golang.org/protobuf/proto"
  7. "github.com/seaweedfs/seaweedfs/weed/glog"
  8. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  9. "github.com/seaweedfs/seaweedfs/weed/util"
  10. )
  11. const BufferSize = 4 * 1024 * 1024
  12. const PreviousBufferCount = 3
  13. type dataToFlush struct {
  14. startTime time.Time
  15. stopTime time.Time
  16. data *bytes.Buffer
  17. }
  18. type LogBuffer struct {
  19. name string
  20. prevBuffers *SealedBuffers
  21. buf []byte
  22. idx []int
  23. pos int
  24. startTime time.Time
  25. stopTime time.Time
  26. lastFlushTime time.Time
  27. sizeBuf []byte
  28. flushInterval time.Duration
  29. flushFn func(startTime, stopTime time.Time, buf []byte)
  30. notifyFn func()
  31. isStopping bool
  32. flushChan chan *dataToFlush
  33. lastTsNs int64
  34. sync.RWMutex
  35. }
  36. func NewLogBuffer(name string, flushInterval time.Duration, flushFn func(startTime, stopTime time.Time, buf []byte), notifyFn func()) *LogBuffer {
  37. lb := &LogBuffer{
  38. name: name,
  39. prevBuffers: newSealedBuffers(PreviousBufferCount),
  40. buf: make([]byte, BufferSize),
  41. sizeBuf: make([]byte, 4),
  42. flushInterval: flushInterval,
  43. flushFn: flushFn,
  44. notifyFn: notifyFn,
  45. flushChan: make(chan *dataToFlush, 256),
  46. }
  47. go lb.loopFlush()
  48. go lb.loopInterval()
  49. return lb
  50. }
  51. func (m *LogBuffer) AddToBuffer(partitionKey, data []byte, processingTsNs int64) {
  52. var toFlush *dataToFlush
  53. m.Lock()
  54. defer func() {
  55. m.Unlock()
  56. if toFlush != nil {
  57. m.flushChan <- toFlush
  58. }
  59. if m.notifyFn != nil {
  60. m.notifyFn()
  61. }
  62. }()
  63. // need to put the timestamp inside the lock
  64. var ts time.Time
  65. if processingTsNs == 0 {
  66. ts = time.Now()
  67. processingTsNs = ts.UnixNano()
  68. } else {
  69. ts = time.Unix(0, processingTsNs)
  70. }
  71. if m.lastTsNs >= processingTsNs {
  72. // this is unlikely to happen, but just in case
  73. processingTsNs = m.lastTsNs + 1
  74. ts = time.Unix(0, processingTsNs)
  75. }
  76. m.lastTsNs = processingTsNs
  77. logEntry := &filer_pb.LogEntry{
  78. TsNs: processingTsNs,
  79. PartitionKeyHash: util.HashToInt32(partitionKey),
  80. Data: data,
  81. }
  82. logEntryData, _ := proto.Marshal(logEntry)
  83. size := len(logEntryData)
  84. if m.pos == 0 {
  85. m.startTime = ts
  86. }
  87. if m.startTime.Add(m.flushInterval).Before(ts) || len(m.buf)-m.pos < size+4 {
  88. // glog.V(4).Infof("%s copyToFlush1 start time %v, ts %v, remaining %d bytes", m.name, m.startTime, ts, len(m.buf)-m.pos)
  89. toFlush = m.copyToFlush()
  90. m.startTime = ts
  91. if len(m.buf) < size+4 {
  92. m.buf = make([]byte, 2*size+4)
  93. }
  94. }
  95. m.stopTime = ts
  96. m.idx = append(m.idx, m.pos)
  97. util.Uint32toBytes(m.sizeBuf, uint32(size))
  98. copy(m.buf[m.pos:m.pos+4], m.sizeBuf)
  99. copy(m.buf[m.pos+4:m.pos+4+size], logEntryData)
  100. m.pos += size + 4
  101. // fmt.Printf("entry size %d total %d count %d, buffer:%p\n", size, m.pos, len(m.idx), m)
  102. }
  103. func (m *LogBuffer) Shutdown() {
  104. m.Lock()
  105. defer m.Unlock()
  106. if m.isStopping {
  107. return
  108. }
  109. m.isStopping = true
  110. toFlush := m.copyToFlush()
  111. m.flushChan <- toFlush
  112. close(m.flushChan)
  113. }
  114. func (m *LogBuffer) loopFlush() {
  115. for d := range m.flushChan {
  116. if d != nil {
  117. // glog.V(4).Infof("%s flush [%v, %v] size %d", m.name, d.startTime, d.stopTime, len(d.data.Bytes()))
  118. m.flushFn(d.startTime, d.stopTime, d.data.Bytes())
  119. d.releaseMemory()
  120. // local logbuffer is different from aggregate logbuffer here
  121. m.lastFlushTime = d.stopTime
  122. }
  123. }
  124. }
  125. func (m *LogBuffer) loopInterval() {
  126. for !m.isStopping {
  127. time.Sleep(m.flushInterval)
  128. m.Lock()
  129. if m.isStopping {
  130. m.Unlock()
  131. return
  132. }
  133. toFlush := m.copyToFlush()
  134. m.Unlock()
  135. if toFlush != nil {
  136. m.flushChan <- toFlush
  137. }
  138. }
  139. }
  140. func (m *LogBuffer) copyToFlush() *dataToFlush {
  141. if m.pos > 0 {
  142. // fmt.Printf("flush buffer %d pos %d empty space %d\n", len(m.buf), m.pos, len(m.buf)-m.pos)
  143. var d *dataToFlush
  144. if m.flushFn != nil {
  145. d = &dataToFlush{
  146. startTime: m.startTime,
  147. stopTime: m.stopTime,
  148. data: copiedBytes(m.buf[:m.pos]),
  149. }
  150. // glog.V(4).Infof("%s flushing [0,%d) with %d entries [%v, %v]", m.name, m.pos, len(m.idx), m.startTime, m.stopTime)
  151. } else {
  152. // glog.V(4).Infof("%s removed from memory [0,%d) with %d entries [%v, %v]", m.name, m.pos, len(m.idx), m.startTime, m.stopTime)
  153. m.lastFlushTime = m.stopTime
  154. }
  155. m.buf = m.prevBuffers.SealBuffer(m.startTime, m.stopTime, m.buf, m.pos)
  156. m.startTime = time.Unix(0, 0)
  157. m.stopTime = time.Unix(0, 0)
  158. m.pos = 0
  159. m.idx = m.idx[:0]
  160. return d
  161. }
  162. return nil
  163. }
  164. func (d *dataToFlush) releaseMemory() {
  165. d.data.Reset()
  166. bufferPool.Put(d.data)
  167. }
  168. func (m *LogBuffer) ReadFromBuffer(lastReadTime time.Time) (bufferCopy *bytes.Buffer, err error) {
  169. m.RLock()
  170. defer m.RUnlock()
  171. // Read from disk and memory
  172. // 1. read from disk, last time is = td
  173. // 2. in memory, the earliest time = tm
  174. // if tm <= td, case 2.1
  175. // read from memory
  176. // if tm is empty, case 2.2
  177. // read from memory
  178. // if td < tm, case 2.3
  179. // read from disk again
  180. var tsMemory time.Time
  181. if !m.startTime.IsZero() {
  182. tsMemory = m.startTime
  183. }
  184. for _, prevBuf := range m.prevBuffers.buffers {
  185. if !prevBuf.startTime.IsZero() && prevBuf.startTime.Before(tsMemory) {
  186. tsMemory = prevBuf.startTime
  187. }
  188. }
  189. if tsMemory.IsZero() { // case 2.2
  190. return nil, nil
  191. } else if lastReadTime.Before(tsMemory) { // case 2.3
  192. if !m.lastFlushTime.IsZero() {
  193. glog.V(0).Infof("resume with last flush time: %v", m.lastFlushTime)
  194. return nil, ResumeFromDiskError
  195. }
  196. }
  197. // the following is case 2.1
  198. if lastReadTime.Equal(m.stopTime) {
  199. return nil, nil
  200. }
  201. if lastReadTime.After(m.stopTime) {
  202. // glog.Fatalf("unexpected last read time %v, older than latest %v", lastReadTime, m.stopTime)
  203. return nil, nil
  204. }
  205. if lastReadTime.Before(m.startTime) {
  206. // println("checking ", lastReadTime.UnixNano())
  207. for _, buf := range m.prevBuffers.buffers {
  208. if buf.startTime.After(lastReadTime) {
  209. // glog.V(4).Infof("%s return the %d sealed buffer %v", m.name, i, buf.startTime)
  210. // println("return the", i, "th in memory", buf.startTime.UnixNano())
  211. return copiedBytes(buf.buf[:buf.size]), nil
  212. }
  213. if !buf.startTime.After(lastReadTime) && buf.stopTime.After(lastReadTime) {
  214. pos := buf.locateByTs(lastReadTime)
  215. // fmt.Printf("locate buffer[%d] pos %d\n", i, pos)
  216. return copiedBytes(buf.buf[pos:buf.size]), nil
  217. }
  218. }
  219. // glog.V(4).Infof("%s return the current buf %v", m.name, lastReadTime)
  220. return copiedBytes(m.buf[:m.pos]), nil
  221. }
  222. lastTs := lastReadTime.UnixNano()
  223. l, h := 0, len(m.idx)-1
  224. /*
  225. for i, pos := range m.idx {
  226. logEntry, ts := readTs(m.buf, pos)
  227. event := &filer_pb.SubscribeMetadataResponse{}
  228. proto.Unmarshal(logEntry.Data, event)
  229. entry := event.EventNotification.OldEntry
  230. if entry == nil {
  231. entry = event.EventNotification.NewEntry
  232. }
  233. fmt.Printf("entry %d ts: %v offset:%d dir:%s name:%s\n", i, time.Unix(0, ts), pos, event.Directory, entry.Name)
  234. }
  235. fmt.Printf("l=%d, h=%d\n", l, h)
  236. */
  237. for l <= h {
  238. mid := (l + h) / 2
  239. pos := m.idx[mid]
  240. _, t := readTs(m.buf, pos)
  241. if t <= lastTs {
  242. l = mid + 1
  243. } else if lastTs < t {
  244. var prevT int64
  245. if mid > 0 {
  246. _, prevT = readTs(m.buf, m.idx[mid-1])
  247. }
  248. if prevT <= lastTs {
  249. // fmt.Printf("found l=%d, m-1=%d(ts=%d), m=%d(ts=%d), h=%d [%d, %d) \n", l, mid-1, prevT, mid, t, h, pos, m.pos)
  250. return copiedBytes(m.buf[pos:m.pos]), nil
  251. }
  252. h = mid
  253. }
  254. // fmt.Printf("l=%d, h=%d\n", l, h)
  255. }
  256. // FIXME: this could be that the buffer has been flushed already
  257. return nil, nil
  258. }
  259. func (m *LogBuffer) ReleaseMemory(b *bytes.Buffer) {
  260. bufferPool.Put(b)
  261. }
  262. var bufferPool = sync.Pool{
  263. New: func() interface{} {
  264. return new(bytes.Buffer)
  265. },
  266. }
  267. func copiedBytes(buf []byte) (copied *bytes.Buffer) {
  268. copied = bufferPool.Get().(*bytes.Buffer)
  269. copied.Reset()
  270. copied.Write(buf)
  271. return
  272. }
  273. func readTs(buf []byte, pos int) (size int, ts int64) {
  274. size = int(util.BytesToUint32(buf[pos : pos+4]))
  275. entryData := buf[pos+4 : pos+4+size]
  276. logEntry := &filer_pb.LogEntry{}
  277. err := proto.Unmarshal(entryData, logEntry)
  278. if err != nil {
  279. glog.Fatalf("unexpected unmarshal filer_pb.LogEntry: %v", err)
  280. }
  281. return size, logEntry.TsNs
  282. }