stream.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. package filer
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "math"
  7. "strings"
  8. "sync"
  9. "time"
  10. "slices"
  11. "github.com/seaweedfs/seaweedfs/weed/glog"
  12. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  13. "github.com/seaweedfs/seaweedfs/weed/stats"
  14. "github.com/seaweedfs/seaweedfs/weed/util"
  15. util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
  16. "github.com/seaweedfs/seaweedfs/weed/wdclient"
  17. )
  18. var getLookupFileIdBackoffSchedule = []time.Duration{
  19. 150 * time.Millisecond,
  20. 600 * time.Millisecond,
  21. 1800 * time.Millisecond,
  22. }
  23. func HasData(entry *filer_pb.Entry) bool {
  24. if len(entry.Content) > 0 {
  25. return true
  26. }
  27. return len(entry.GetChunks()) > 0
  28. }
  29. func IsSameData(a, b *filer_pb.Entry) bool {
  30. if len(a.Content) > 0 || len(b.Content) > 0 {
  31. return bytes.Equal(a.Content, b.Content)
  32. }
  33. return isSameChunks(a.Chunks, b.Chunks)
  34. }
  35. func isSameChunks(a, b []*filer_pb.FileChunk) bool {
  36. if len(a) != len(b) {
  37. return false
  38. }
  39. slices.SortFunc(a, func(i, j *filer_pb.FileChunk) int {
  40. return strings.Compare(i.ETag, j.ETag)
  41. })
  42. slices.SortFunc(b, func(i, j *filer_pb.FileChunk) int {
  43. return strings.Compare(i.ETag, j.ETag)
  44. })
  45. for i := 0; i < len(a); i++ {
  46. if a[i].ETag != b[i].ETag {
  47. return false
  48. }
  49. }
  50. return true
  51. }
  52. func NewFileReader(filerClient filer_pb.FilerClient, entry *filer_pb.Entry) io.Reader {
  53. if len(entry.Content) > 0 {
  54. return bytes.NewReader(entry.Content)
  55. }
  56. return NewChunkStreamReader(filerClient, entry.GetChunks())
  57. }
  58. type DoStreamContent func(writer io.Writer) error
  59. func PrepareStreamContent(masterClient wdclient.HasLookupFileIdFunction, jwtFunc VolumeServerJwtFunction, chunks []*filer_pb.FileChunk, offset int64, size int64) (DoStreamContent, error) {
  60. return PrepareStreamContentWithThrottler(masterClient, jwtFunc, chunks, offset, size, 0)
  61. }
  62. type VolumeServerJwtFunction func(fileId string) string
  63. func noJwtFunc(string) string {
  64. return ""
  65. }
  66. func PrepareStreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, jwtFunc VolumeServerJwtFunction, chunks []*filer_pb.FileChunk, offset int64, size int64, downloadMaxBytesPs int64) (DoStreamContent, error) {
  67. glog.V(4).Infof("prepare to stream content for chunks: %d", len(chunks))
  68. chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size)
  69. fileId2Url := make(map[string][]string)
  70. for x := chunkViews.Front(); x != nil; x = x.Next {
  71. chunkView := x.Value
  72. var urlStrings []string
  73. var err error
  74. for _, backoff := range getLookupFileIdBackoffSchedule {
  75. urlStrings, err = masterClient.GetLookupFileIdFunction()(chunkView.FileId)
  76. if err == nil && len(urlStrings) > 0 {
  77. break
  78. }
  79. glog.V(4).Infof("waiting for chunk: %s", chunkView.FileId)
  80. time.Sleep(backoff)
  81. }
  82. if err != nil {
  83. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  84. return nil, err
  85. } else if len(urlStrings) == 0 {
  86. errUrlNotFound := fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
  87. glog.Error(errUrlNotFound)
  88. return nil, errUrlNotFound
  89. }
  90. fileId2Url[chunkView.FileId] = urlStrings
  91. }
  92. return func(writer io.Writer) error {
  93. downloadThrottler := util.NewWriteThrottler(downloadMaxBytesPs)
  94. remaining := size
  95. for x := chunkViews.Front(); x != nil; x = x.Next {
  96. chunkView := x.Value
  97. if offset < chunkView.ViewOffset {
  98. gap := chunkView.ViewOffset - offset
  99. remaining -= gap
  100. glog.V(4).Infof("zero [%d,%d)", offset, chunkView.ViewOffset)
  101. err := writeZero(writer, gap)
  102. if err != nil {
  103. return fmt.Errorf("write zero [%d,%d)", offset, chunkView.ViewOffset)
  104. }
  105. offset = chunkView.ViewOffset
  106. }
  107. urlStrings := fileId2Url[chunkView.FileId]
  108. start := time.Now()
  109. jwt := jwtFunc(chunkView.FileId)
  110. err := retriedStreamFetchChunkData(writer, urlStrings, jwt, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize))
  111. offset += int64(chunkView.ViewSize)
  112. remaining -= int64(chunkView.ViewSize)
  113. stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
  114. if err != nil {
  115. stats.FilerHandlerCounter.WithLabelValues("chunkDownloadError").Inc()
  116. return fmt.Errorf("read chunk: %v", err)
  117. }
  118. stats.FilerHandlerCounter.WithLabelValues("chunkDownload").Inc()
  119. downloadThrottler.MaybeSlowdown(int64(chunkView.ViewSize))
  120. }
  121. if remaining > 0 {
  122. glog.V(4).Infof("zero [%d,%d)", offset, offset+remaining)
  123. err := writeZero(writer, remaining)
  124. if err != nil {
  125. return fmt.Errorf("write zero [%d,%d)", offset, offset+remaining)
  126. }
  127. }
  128. return nil
  129. }, nil
  130. }
  131. func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64) error {
  132. streamFn, err := PrepareStreamContent(masterClient, noJwtFunc, chunks, offset, size)
  133. if err != nil {
  134. return err
  135. }
  136. return streamFn(writer)
  137. }
  138. // ---------------- ReadAllReader ----------------------------------
  139. func writeZero(w io.Writer, size int64) (err error) {
  140. zeroPadding := make([]byte, 1024)
  141. var written int
  142. for size > 0 {
  143. if size > 1024 {
  144. written, err = w.Write(zeroPadding)
  145. } else {
  146. written, err = w.Write(zeroPadding[:size])
  147. }
  148. size -= int64(written)
  149. if err != nil {
  150. return
  151. }
  152. }
  153. return
  154. }
  155. func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) error {
  156. lookupFileIdFn := func(fileId string) (targetUrls []string, err error) {
  157. return masterClient.LookupFileId(fileId)
  158. }
  159. chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, int64(len(buffer)))
  160. idx := 0
  161. for x := chunkViews.Front(); x != nil; x = x.Next {
  162. chunkView := x.Value
  163. urlStrings, err := lookupFileIdFn(chunkView.FileId)
  164. if err != nil {
  165. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  166. return err
  167. }
  168. n, err := util_http.RetriedFetchChunkData(buffer[idx:idx+int(chunkView.ViewSize)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk)
  169. if err != nil {
  170. return err
  171. }
  172. idx += n
  173. }
  174. return nil
  175. }
  176. // ---------------- ChunkStreamReader ----------------------------------
  177. type ChunkStreamReader struct {
  178. head *Interval[*ChunkView]
  179. chunkView *Interval[*ChunkView]
  180. totalSize int64
  181. logicOffset int64
  182. buffer []byte
  183. bufferOffset int64
  184. bufferLock sync.Mutex
  185. chunk string
  186. lookupFileId wdclient.LookupFileIdFunctionType
  187. }
  188. var _ = io.ReadSeeker(&ChunkStreamReader{})
  189. var _ = io.ReaderAt(&ChunkStreamReader{})
  190. func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  191. chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
  192. var totalSize int64
  193. for x := chunkViews.Front(); x != nil; x = x.Next {
  194. chunk := x.Value
  195. totalSize += int64(chunk.ViewSize)
  196. }
  197. return &ChunkStreamReader{
  198. head: chunkViews.Front(),
  199. chunkView: chunkViews.Front(),
  200. lookupFileId: lookupFileIdFn,
  201. totalSize: totalSize,
  202. }
  203. }
  204. func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  205. lookupFileIdFn := func(fileId string) (targetUrl []string, err error) {
  206. return masterClient.LookupFileId(fileId)
  207. }
  208. return doNewChunkStreamReader(lookupFileIdFn, chunks)
  209. }
  210. func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
  211. lookupFileIdFn := LookupFn(filerClient)
  212. return doNewChunkStreamReader(lookupFileIdFn, chunks)
  213. }
  214. func (c *ChunkStreamReader) ReadAt(p []byte, off int64) (n int, err error) {
  215. c.bufferLock.Lock()
  216. defer c.bufferLock.Unlock()
  217. if err = c.prepareBufferFor(off); err != nil {
  218. return
  219. }
  220. c.logicOffset = off
  221. return c.doRead(p)
  222. }
  223. func (c *ChunkStreamReader) Read(p []byte) (n int, err error) {
  224. c.bufferLock.Lock()
  225. defer c.bufferLock.Unlock()
  226. return c.doRead(p)
  227. }
  228. func (c *ChunkStreamReader) doRead(p []byte) (n int, err error) {
  229. // fmt.Printf("do read [%d,%d) at %s[%d,%d)\n", c.logicOffset, c.logicOffset+int64(len(p)), c.chunk, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)))
  230. for n < len(p) {
  231. // println("read", c.logicOffset)
  232. if err = c.prepareBufferFor(c.logicOffset); err != nil {
  233. return
  234. }
  235. t := copy(p[n:], c.buffer[c.logicOffset-c.bufferOffset:])
  236. n += t
  237. c.logicOffset += int64(t)
  238. }
  239. return
  240. }
  241. func (c *ChunkStreamReader) isBufferEmpty() bool {
  242. return len(c.buffer) <= int(c.logicOffset-c.bufferOffset)
  243. }
  244. func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
  245. c.bufferLock.Lock()
  246. defer c.bufferLock.Unlock()
  247. var err error
  248. switch whence {
  249. case io.SeekStart:
  250. case io.SeekCurrent:
  251. offset += c.logicOffset
  252. case io.SeekEnd:
  253. offset = c.totalSize + offset
  254. }
  255. if offset > c.totalSize {
  256. err = io.ErrUnexpectedEOF
  257. } else {
  258. c.logicOffset = offset
  259. }
  260. return offset, err
  261. }
  262. func insideChunk(offset int64, chunk *ChunkView) bool {
  263. return chunk.ViewOffset <= offset && offset < chunk.ViewOffset+int64(chunk.ViewSize)
  264. }
  265. func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
  266. // stay in the same chunk
  267. if c.bufferOffset <= offset && offset < c.bufferOffset+int64(len(c.buffer)) {
  268. return nil
  269. }
  270. // glog.V(2).Infof("c.chunkView: %v buffer:[%d,%d) offset:%d totalSize:%d", c.chunkView, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)), offset, c.totalSize)
  271. // find a possible chunk view
  272. p := c.chunkView
  273. for p != nil {
  274. chunk := p.Value
  275. // glog.V(2).Infof("prepareBufferFor check chunk:[%d,%d)", chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize))
  276. if insideChunk(offset, chunk) {
  277. if c.isBufferEmpty() || c.bufferOffset != chunk.ViewOffset {
  278. c.chunkView = p
  279. return c.fetchChunkToBuffer(chunk)
  280. }
  281. }
  282. if offset < c.bufferOffset {
  283. p = p.Prev
  284. } else {
  285. p = p.Next
  286. }
  287. }
  288. return io.EOF
  289. }
  290. func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
  291. urlStrings, err := c.lookupFileId(chunkView.FileId)
  292. if err != nil {
  293. glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
  294. return err
  295. }
  296. var buffer bytes.Buffer
  297. var shouldRetry bool
  298. for _, urlString := range urlStrings {
  299. shouldRetry, err = util_http.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize), func(data []byte) {
  300. buffer.Write(data)
  301. })
  302. if !shouldRetry {
  303. break
  304. }
  305. if err != nil {
  306. glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err)
  307. buffer.Reset()
  308. } else {
  309. break
  310. }
  311. }
  312. if err != nil {
  313. return err
  314. }
  315. c.buffer = buffer.Bytes()
  316. c.bufferOffset = chunkView.ViewOffset
  317. c.chunk = chunkView.FileId
  318. // glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.ViewOffset, chunkView.ViewOffset+int64(chunkView.ViewSize))
  319. return nil
  320. }
  321. func (c *ChunkStreamReader) Close() {
  322. // TODO try to release and reuse buffer
  323. }
  324. func VolumeId(fileId string) string {
  325. lastCommaIndex := strings.LastIndex(fileId, ",")
  326. if lastCommaIndex > 0 {
  327. return fileId[:lastCommaIndex]
  328. }
  329. return fileId
  330. }