stream.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. package filer
  2. import (
  3. "bytes"
  4. "fmt"
  5. "golang.org/x/exp/slices"
  6. "io"
  7. "math"
  8. "strings"
  9. "sync"
  10. "time"
  11. "github.com/seaweedfs/seaweedfs/weed/glog"
  12. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  13. "github.com/seaweedfs/seaweedfs/weed/stats"
  14. "github.com/seaweedfs/seaweedfs/weed/util"
  15. "github.com/seaweedfs/seaweedfs/weed/wdclient"
  16. )
  17. var getLookupFileIdBackoffSchedule = []time.Duration{
  18. 150 * time.Millisecond,
  19. 600 * time.Millisecond,
  20. 1800 * time.Millisecond,
  21. }
  22. func HasData(entry *filer_pb.Entry) bool {
  23. if len(entry.Content) > 0 {
  24. return true
  25. }
  26. return len(entry.GetChunks()) > 0
  27. }
  28. func IsSameData(a, b *filer_pb.Entry) bool {
  29. if len(a.Content) > 0 || len(b.Content) > 0 {
  30. return bytes.Equal(a.Content, b.Content)
  31. }
  32. return isSameChunks(a.Chunks, b.Chunks)
  33. }
  34. func isSameChunks(a, b []*filer_pb.FileChunk) bool {
  35. if len(a) != len(b) {
  36. return false
  37. }
  38. slices.SortFunc(a, func(i, j *filer_pb.FileChunk) int {
  39. return strings.Compare(i.ETag, j.ETag)
  40. })
  41. slices.SortFunc(b, func(i, j *filer_pb.FileChunk) int {
  42. return strings.Compare(i.ETag, j.ETag)
  43. })
  44. for i := 0; i < len(a); i++ {
  45. if a[i].ETag != b[i].ETag {
  46. return false
  47. }
  48. }
  49. return true
  50. }
  51. func NewFileReader(filerClient filer_pb.FilerClient, entry *filer_pb.Entry) io.Reader {
  52. if len(entry.Content) > 0 {
  53. return bytes.NewReader(entry.Content)
  54. }
  55. return NewChunkStreamReader(filerClient, entry.GetChunks())
  56. }
  57. func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64) error {
  58. return StreamContentWithThrottler(masterClient, writer, chunks, offset, size, 0)
  59. }
  60. func StreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64, downloadMaxBytesPs int64) error {
  61. glog.V(4).Infof("start to stream content for chunks: %d", len(chunks))
  62. chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size)
  63. fileId2Url := make(map[string][]string)
  64. for x := chunkViews.Front(); x != nil; x = x.Next {
  65. chunkView := x.Value
  66. var urlStrings []string
  67. var err error
  68. for _, backoff := range getLookupFileIdBackoffSchedule {
  69. urlStrings, err = masterClient.GetLookupFileIdFunction()(chunkView.FileId)
  70. if err == nil && len(urlStrings) > 0 {
  71. break
  72. }
  73. glog.V(4).Infof("waiting for chunk: %s", chunkView.FileId)
  74. time.Sleep(backoff)
  75. }
  76. if err != nil {
  77. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  78. return err
  79. } else if len(urlStrings) == 0 {
  80. errUrlNotFound := fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
  81. glog.Error(errUrlNotFound)
  82. return errUrlNotFound
  83. }
  84. fileId2Url[chunkView.FileId] = urlStrings
  85. }
  86. downloadThrottler := util.NewWriteThrottler(downloadMaxBytesPs)
  87. remaining := size
  88. for x := chunkViews.Front(); x != nil; x = x.Next {
  89. chunkView := x.Value
  90. if offset < chunkView.ViewOffset {
  91. gap := chunkView.ViewOffset - offset
  92. remaining -= gap
  93. glog.V(4).Infof("zero [%d,%d)", offset, chunkView.ViewOffset)
  94. err := writeZero(writer, gap)
  95. if err != nil {
  96. return fmt.Errorf("write zero [%d,%d)", offset, chunkView.ViewOffset)
  97. }
  98. offset = chunkView.ViewOffset
  99. }
  100. urlStrings := fileId2Url[chunkView.FileId]
  101. start := time.Now()
  102. err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize))
  103. offset += int64(chunkView.ViewSize)
  104. remaining -= int64(chunkView.ViewSize)
  105. stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
  106. if err != nil {
  107. stats.FilerRequestCounter.WithLabelValues("chunkDownloadError").Inc()
  108. return fmt.Errorf("read chunk: %v", err)
  109. }
  110. stats.FilerRequestCounter.WithLabelValues("chunkDownload").Inc()
  111. downloadThrottler.MaybeSlowdown(int64(chunkView.ViewSize))
  112. }
  113. if remaining > 0 {
  114. glog.V(4).Infof("zero [%d,%d)", offset, offset+remaining)
  115. err := writeZero(writer, remaining)
  116. if err != nil {
  117. return fmt.Errorf("write zero [%d,%d)", offset, offset+remaining)
  118. }
  119. }
  120. return nil
  121. }
  122. // ---------------- ReadAllReader ----------------------------------
  123. func writeZero(w io.Writer, size int64) (err error) {
  124. zeroPadding := make([]byte, 1024)
  125. var written int
  126. for size > 0 {
  127. if size > 1024 {
  128. written, err = w.Write(zeroPadding)
  129. } else {
  130. written, err = w.Write(zeroPadding[:size])
  131. }
  132. size -= int64(written)
  133. if err != nil {
  134. return
  135. }
  136. }
  137. return
  138. }
  139. func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) error {
  140. lookupFileIdFn := func(fileId string) (targetUrls []string, err error) {
  141. return masterClient.LookupFileId(fileId)
  142. }
  143. chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, int64(len(buffer)))
  144. idx := 0
  145. for x := chunkViews.Front(); x != nil; x = x.Next {
  146. chunkView := x.Value
  147. urlStrings, err := lookupFileIdFn(chunkView.FileId)
  148. if err != nil {
  149. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  150. return err
  151. }
  152. n, err := retriedFetchChunkData(buffer[idx:idx+int(chunkView.ViewSize)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk)
  153. if err != nil {
  154. return err
  155. }
  156. idx += n
  157. }
  158. return nil
  159. }
  160. // ---------------- ChunkStreamReader ----------------------------------
  161. type ChunkStreamReader struct {
  162. head *Interval[*ChunkView]
  163. chunkView *Interval[*ChunkView]
  164. totalSize int64
  165. logicOffset int64
  166. buffer []byte
  167. bufferOffset int64
  168. bufferLock sync.Mutex
  169. chunk string
  170. lookupFileId wdclient.LookupFileIdFunctionType
  171. }
  172. var _ = io.ReadSeeker(&ChunkStreamReader{})
  173. var _ = io.ReaderAt(&ChunkStreamReader{})
  174. func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  175. chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
  176. var totalSize int64
  177. for x := chunkViews.Front(); x != nil; x = x.Next {
  178. chunk := x.Value
  179. totalSize += int64(chunk.ViewSize)
  180. }
  181. return &ChunkStreamReader{
  182. head: chunkViews.Front(),
  183. chunkView: chunkViews.Front(),
  184. lookupFileId: lookupFileIdFn,
  185. totalSize: totalSize,
  186. }
  187. }
  188. func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  189. lookupFileIdFn := func(fileId string) (targetUrl []string, err error) {
  190. return masterClient.LookupFileId(fileId)
  191. }
  192. return doNewChunkStreamReader(lookupFileIdFn, chunks)
  193. }
  194. func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  195. lookupFileIdFn := LookupFn(filerClient)
  196. return doNewChunkStreamReader(lookupFileIdFn, chunks)
  197. }
  198. func (c *ChunkStreamReader) ReadAt(p []byte, off int64) (n int, err error) {
  199. c.bufferLock.Lock()
  200. defer c.bufferLock.Unlock()
  201. if err = c.prepareBufferFor(off); err != nil {
  202. return
  203. }
  204. c.logicOffset = off
  205. return c.doRead(p)
  206. }
  207. func (c *ChunkStreamReader) Read(p []byte) (n int, err error) {
  208. c.bufferLock.Lock()
  209. defer c.bufferLock.Unlock()
  210. return c.doRead(p)
  211. }
  212. func (c *ChunkStreamReader) doRead(p []byte) (n int, err error) {
  213. // fmt.Printf("do read [%d,%d) at %s[%d,%d)\n", c.logicOffset, c.logicOffset+int64(len(p)), c.chunk, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)))
  214. for n < len(p) {
  215. // println("read", c.logicOffset)
  216. if err = c.prepareBufferFor(c.logicOffset); err != nil {
  217. return
  218. }
  219. t := copy(p[n:], c.buffer[c.logicOffset-c.bufferOffset:])
  220. n += t
  221. c.logicOffset += int64(t)
  222. }
  223. return
  224. }
  225. func (c *ChunkStreamReader) isBufferEmpty() bool {
  226. return len(c.buffer) <= int(c.logicOffset-c.bufferOffset)
  227. }
  228. func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
  229. c.bufferLock.Lock()
  230. defer c.bufferLock.Unlock()
  231. var err error
  232. switch whence {
  233. case io.SeekStart:
  234. case io.SeekCurrent:
  235. offset += c.logicOffset
  236. case io.SeekEnd:
  237. offset = c.totalSize + offset
  238. }
  239. if offset > c.totalSize {
  240. err = io.ErrUnexpectedEOF
  241. } else {
  242. c.logicOffset = offset
  243. }
  244. return offset, err
  245. }
  246. func insideChunk(offset int64, chunk *ChunkView) bool {
  247. return chunk.ViewOffset <= offset && offset < chunk.ViewOffset+int64(chunk.ViewSize)
  248. }
  249. func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
  250. // stay in the same chunk
  251. if c.bufferOffset <= offset && offset < c.bufferOffset+int64(len(c.buffer)) {
  252. return nil
  253. }
  254. // glog.V(2).Infof("c.chunkView: %v buffer:[%d,%d) offset:%d totalSize:%d", c.chunkView, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)), offset, c.totalSize)
  255. // find a possible chunk view
  256. p := c.chunkView
  257. for p != nil {
  258. chunk := p.Value
  259. // glog.V(2).Infof("prepareBufferFor check chunk:[%d,%d)", chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize))
  260. if insideChunk(offset, chunk) {
  261. if c.isBufferEmpty() || c.bufferOffset != chunk.ViewOffset {
  262. c.chunkView = p
  263. return c.fetchChunkToBuffer(chunk)
  264. }
  265. }
  266. if offset < c.bufferOffset {
  267. p = p.Prev
  268. } else {
  269. p = p.Next
  270. }
  271. }
  272. return io.EOF
  273. }
  274. func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
  275. urlStrings, err := c.lookupFileId(chunkView.FileId)
  276. if err != nil {
  277. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  278. return err
  279. }
  280. var buffer bytes.Buffer
  281. var shouldRetry bool
  282. for _, urlString := range urlStrings {
  283. shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize), func(data []byte) {
  284. buffer.Write(data)
  285. })
  286. if !shouldRetry {
  287. break
  288. }
  289. if err != nil {
  290. glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err)
  291. buffer.Reset()
  292. } else {
  293. break
  294. }
  295. }
  296. if err != nil {
  297. return err
  298. }
  299. c.buffer = buffer.Bytes()
  300. c.bufferOffset = chunkView.ViewOffset
  301. c.chunk = chunkView.FileId
  302. // glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.ViewOffset, chunkView.ViewOffset+int64(chunkView.ViewSize))
  303. return nil
  304. }
  305. func (c *ChunkStreamReader) Close() {
  306. // TODO try to release and reuse buffer
  307. }
  308. func VolumeId(fileId string) string {
  309. lastCommaIndex := strings.LastIndex(fileId, ",")
  310. if lastCommaIndex > 0 {
  311. return fileId[:lastCommaIndex]
  312. }
  313. return fileId
  314. }