123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393 |
- package filer
- import (
- "bytes"
- "fmt"
- "golang.org/x/exp/slices"
- "io"
- "math"
- "sort"
- "strings"
- "sync"
- "time"
- "github.com/seaweedfs/seaweedfs/weed/glog"
- "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
- "github.com/seaweedfs/seaweedfs/weed/stats"
- "github.com/seaweedfs/seaweedfs/weed/util"
- "github.com/seaweedfs/seaweedfs/weed/wdclient"
- )
- var getLookupFileIdBackoffSchedule = []time.Duration{
- 150 * time.Millisecond,
- 600 * time.Millisecond,
- 1800 * time.Millisecond,
- }
- func HasData(entry *filer_pb.Entry) bool {
- if len(entry.Content) > 0 {
- return true
- }
- return len(entry.GetChunks()) > 0
- }
- func IsSameData(a, b *filer_pb.Entry) bool {
- if len(a.Content) > 0 || len(b.Content) > 0 {
- return bytes.Equal(a.Content, b.Content)
- }
- return isSameChunks(a.Chunks, b.Chunks)
- }
- func isSameChunks(a, b []*filer_pb.FileChunk) bool {
- if len(a) != len(b) {
- return false
- }
- slices.SortFunc(a, func(i, j *filer_pb.FileChunk) bool {
- return strings.Compare(i.ETag, j.ETag) < 0
- })
- slices.SortFunc(b, func(i, j *filer_pb.FileChunk) bool {
- return strings.Compare(i.ETag, j.ETag) < 0
- })
- for i := 0; i < len(a); i++ {
- if a[i].ETag != b[i].ETag {
- return false
- }
- }
- return true
- }
- func NewFileReader(filerClient filer_pb.FilerClient, entry *filer_pb.Entry) io.Reader {
- if len(entry.Content) > 0 {
- return bytes.NewReader(entry.Content)
- }
- return NewChunkStreamReader(filerClient, entry.GetChunks())
- }
- func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64) error {
- return StreamContentWithThrottler(masterClient, writer, chunks, offset, size, 0)
- }
- func StreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64, downloadMaxBytesPs int64) error {
- glog.V(4).Infof("start to stream content for chunks: %d", len(chunks))
- chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size)
- fileId2Url := make(map[string][]string)
- for _, chunkView := range chunkViews {
- var urlStrings []string
- var err error
- for _, backoff := range getLookupFileIdBackoffSchedule {
- urlStrings, err = masterClient.GetLookupFileIdFunction()(chunkView.FileId)
- if err == nil && len(urlStrings) > 0 {
- break
- }
- glog.V(4).Infof("waiting for chunk: %s", chunkView.FileId)
- time.Sleep(backoff)
- }
- if err != nil {
- glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
- return err
- } else if len(urlStrings) == 0 {
- errUrlNotFound := fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
- glog.Error(errUrlNotFound)
- return errUrlNotFound
- }
- fileId2Url[chunkView.FileId] = urlStrings
- }
- downloadThrottler := util.NewWriteThrottler(downloadMaxBytesPs)
- remaining := size
- for _, chunkView := range chunkViews {
- if offset < chunkView.LogicOffset {
- gap := chunkView.LogicOffset - offset
- remaining -= gap
- glog.V(4).Infof("zero [%d,%d)", offset, chunkView.LogicOffset)
- err := writeZero(writer, gap)
- if err != nil {
- return fmt.Errorf("write zero [%d,%d)", offset, chunkView.LogicOffset)
- }
- offset = chunkView.LogicOffset
- }
- urlStrings := fileId2Url[chunkView.FileId]
- start := time.Now()
- err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size))
- offset += int64(chunkView.Size)
- remaining -= int64(chunkView.Size)
- stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
- if err != nil {
- stats.FilerRequestCounter.WithLabelValues("chunkDownloadError").Inc()
- return fmt.Errorf("read chunk: %v", err)
- }
- stats.FilerRequestCounter.WithLabelValues("chunkDownload").Inc()
- downloadThrottler.MaybeSlowdown(int64(chunkView.Size))
- }
- if remaining > 0 {
- glog.V(4).Infof("zero [%d,%d)", offset, offset+remaining)
- err := writeZero(writer, remaining)
- if err != nil {
- return fmt.Errorf("write zero [%d,%d)", offset, offset+remaining)
- }
- }
- return nil
- }
- // ---------------- ReadAllReader ----------------------------------
- func writeZero(w io.Writer, size int64) (err error) {
- zeroPadding := make([]byte, 1024)
- var written int
- for size > 0 {
- if size > 1024 {
- written, err = w.Write(zeroPadding)
- } else {
- written, err = w.Write(zeroPadding[:size])
- }
- size -= int64(written)
- if err != nil {
- return
- }
- }
- return
- }
- func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) error {
- lookupFileIdFn := func(fileId string) (targetUrls []string, err error) {
- return masterClient.LookupFileId(fileId)
- }
- chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, int64(len(buffer)))
- idx := 0
- for _, chunkView := range chunkViews {
- urlStrings, err := lookupFileIdFn(chunkView.FileId)
- if err != nil {
- glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
- return err
- }
- n, err := retriedFetchChunkData(buffer[idx:idx+int(chunkView.Size)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset)
- if err != nil {
- return err
- }
- idx += n
- }
- return nil
- }
- // ---------------- ChunkStreamReader ----------------------------------
- type ChunkStreamReader struct {
- chunkViews []*ChunkView
- totalSize int64
- logicOffset int64
- buffer []byte
- bufferOffset int64
- bufferLock sync.Mutex
- chunk string
- lookupFileId wdclient.LookupFileIdFunctionType
- }
- var _ = io.ReadSeeker(&ChunkStreamReader{})
- var _ = io.ReaderAt(&ChunkStreamReader{})
- func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
- chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
- slices.SortFunc(chunkViews, func(a, b *ChunkView) bool {
- return a.LogicOffset < b.LogicOffset
- })
- var totalSize int64
- for _, chunk := range chunkViews {
- totalSize += int64(chunk.Size)
- }
- return &ChunkStreamReader{
- chunkViews: chunkViews,
- lookupFileId: lookupFileIdFn,
- totalSize: totalSize,
- }
- }
- func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
- lookupFileIdFn := func(fileId string) (targetUrl []string, err error) {
- return masterClient.LookupFileId(fileId)
- }
- return doNewChunkStreamReader(lookupFileIdFn, chunks)
- }
- func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
- lookupFileIdFn := LookupFn(filerClient)
- return doNewChunkStreamReader(lookupFileIdFn, chunks)
- }
- func (c *ChunkStreamReader) ReadAt(p []byte, off int64) (n int, err error) {
- c.bufferLock.Lock()
- defer c.bufferLock.Unlock()
- if err = c.prepareBufferFor(off); err != nil {
- return
- }
- c.logicOffset = off
- return c.doRead(p)
- }
- func (c *ChunkStreamReader) Read(p []byte) (n int, err error) {
- c.bufferLock.Lock()
- defer c.bufferLock.Unlock()
- return c.doRead(p)
- }
- func (c *ChunkStreamReader) doRead(p []byte) (n int, err error) {
- // fmt.Printf("do read [%d,%d) at %s[%d,%d)\n", c.logicOffset, c.logicOffset+int64(len(p)), c.chunk, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)))
- for n < len(p) {
- // println("read", c.logicOffset)
- if err = c.prepareBufferFor(c.logicOffset); err != nil {
- return
- }
- t := copy(p[n:], c.buffer[c.logicOffset-c.bufferOffset:])
- n += t
- c.logicOffset += int64(t)
- }
- return
- }
- func (c *ChunkStreamReader) isBufferEmpty() bool {
- return len(c.buffer) <= int(c.logicOffset-c.bufferOffset)
- }
- func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
- c.bufferLock.Lock()
- defer c.bufferLock.Unlock()
- var err error
- switch whence {
- case io.SeekStart:
- case io.SeekCurrent:
- offset += c.logicOffset
- case io.SeekEnd:
- offset = c.totalSize + offset
- }
- if offset > c.totalSize {
- err = io.ErrUnexpectedEOF
- } else {
- c.logicOffset = offset
- }
- return offset, err
- }
- func insideChunk(offset int64, chunk *ChunkView) bool {
- return chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size)
- }
- func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
- // stay in the same chunk
- if c.bufferOffset <= offset && offset < c.bufferOffset+int64(len(c.buffer)) {
- return nil
- }
- // fmt.Printf("fetch for offset %d\n", offset)
- // need to seek to a different chunk
- currentChunkIndex := sort.Search(len(c.chunkViews), func(i int) bool {
- return offset < c.chunkViews[i].LogicOffset
- })
- if currentChunkIndex == len(c.chunkViews) {
- // not found
- if insideChunk(offset, c.chunkViews[0]) {
- // fmt.Printf("select0 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
- currentChunkIndex = 0
- } else if insideChunk(offset, c.chunkViews[len(c.chunkViews)-1]) {
- currentChunkIndex = len(c.chunkViews) - 1
- // fmt.Printf("select last chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
- } else {
- return io.EOF
- }
- } else if currentChunkIndex > 0 {
- if insideChunk(offset, c.chunkViews[currentChunkIndex]) {
- // good hit
- } else if insideChunk(offset, c.chunkViews[currentChunkIndex-1]) {
- currentChunkIndex -= 1
- // fmt.Printf("select -1 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
- } else {
- // glog.Fatalf("unexpected1 offset %d", offset)
- return fmt.Errorf("unexpected1 offset %d", offset)
- }
- } else {
- // glog.Fatalf("unexpected2 offset %d", offset)
- return fmt.Errorf("unexpected2 offset %d", offset)
- }
- // positioning within the new chunk
- chunk := c.chunkViews[currentChunkIndex]
- if insideChunk(offset, chunk) {
- if c.isBufferEmpty() || c.bufferOffset != chunk.LogicOffset {
- if err = c.fetchChunkToBuffer(chunk); err != nil {
- return
- }
- }
- } else {
- // glog.Fatalf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
- return fmt.Errorf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
- }
- return
- }
- func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
- urlStrings, err := c.lookupFileId(chunkView.FileId)
- if err != nil {
- glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
- return err
- }
- var buffer bytes.Buffer
- var shouldRetry bool
- for _, urlString := range urlStrings {
- shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
- buffer.Write(data)
- })
- if !shouldRetry {
- break
- }
- if err != nil {
- glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err)
- buffer.Reset()
- } else {
- break
- }
- }
- if err != nil {
- return err
- }
- c.buffer = buffer.Bytes()
- c.bufferOffset = chunkView.LogicOffset
- c.chunk = chunkView.FileId
- // glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
- return nil
- }
- func (c *ChunkStreamReader) Close() {
- // TODO try to release and reuse buffer
- }
- func VolumeId(fileId string) string {
- lastCommaIndex := strings.LastIndex(fileId, ",")
- if lastCommaIndex > 0 {
- return fileId[:lastCommaIndex]
- }
- return fileId
- }
|