Просмотр исходного кода

make reader_at handle random reads more efficiently for FUSE

Nathan Hawkins 3 лет назад
Родитель
Сommit
042de9359c

+ 19 - 3
weed/filer/reader_at.go

@@ -139,13 +139,15 @@ func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) {
 		}
 		glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.LogicOffset-chunk.Offset, chunk.LogicOffset-chunk.Offset+int64(chunk.Size))
 		var buffer []byte
-		buffer, err = c.readFromWholeChunkData(chunk, nextChunk)
+		bufferOffset := chunkStart - chunk.LogicOffset + chunk.Offset
+		bufferLength := chunkStop - chunkStart
+		buffer, err = c.readChunkSlice(chunk, nextChunk, uint64(bufferOffset), uint64(bufferLength))
 		if err != nil {
 			glog.Errorf("fetching chunk %+v: %v\n", chunk, err)
 			return
 		}
-		bufferOffset := chunkStart - chunk.LogicOffset + chunk.Offset
-		copied := copy(p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], buffer[bufferOffset:bufferOffset+chunkStop-chunkStart])
+
+		copied := copy(p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], buffer)
 		n += copied
 		startOffset, remaining = startOffset+int64(copied), remaining-int64(copied)
 	}
@@ -167,6 +169,20 @@ func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) {
 
 }
 
+func (c *ChunkReadAt) readChunkSlice(chunkView *ChunkView, nextChunkViews *ChunkView, offset, length uint64) ([]byte, error) {
+
+	chunkSlice := c.chunkCache.GetChunkSlice(chunkView.FileId, offset, length)
+	if len(chunkSlice) > 0 {
+		return chunkSlice, nil
+	}
+	chunkData, err := c.readFromWholeChunkData(chunkView, nextChunkViews)
+	if err != nil {
+		return nil, err
+	}
+	wanted := min(int64(length), int64(len(chunkData))-int64(offset))
+	return chunkData[offset : int64(offset)+wanted], nil
+}
+
 func (c *ChunkReadAt) readFromWholeChunkData(chunkView *ChunkView, nextChunkViews ...*ChunkView) (chunkData []byte, err error) {
 
 	if c.lastChunkFileId == chunkView.FileId {

+ 5 - 0
weed/filer/reader_at_test.go

@@ -20,6 +20,11 @@ func (m *mockChunkCache) GetChunk(fileId string, minSize uint64) (data []byte) {
 	}
 	return data
 }
+
+func(m *mockChunkCache) GetChunkSlice(fileId string, offset, length uint64) []byte {
+	return nil
+}
+
 func (m *mockChunkCache) SetChunk(fileId string, data []byte) {
 }
 

+ 65 - 0
weed/util/chunk_cache/chunk_cache.go

@@ -1,14 +1,18 @@
 package chunk_cache
 
 import (
+	"errors"
 	"sync"
 
 	"github.com/chrislusf/seaweedfs/weed/glog"
 	"github.com/chrislusf/seaweedfs/weed/storage/needle"
 )
 
+var ErrorOutOfBounds = errors.New("attempt to read out of bounds")
+
 type ChunkCache interface {
 	GetChunk(fileId string, minSize uint64) (data []byte)
+	GetChunkSlice(fileId string, offset, length uint64) []byte
 	SetChunk(fileId string, data []byte)
 }
 
@@ -22,6 +26,8 @@ type TieredChunkCache struct {
 	onDiskCacheSizeLimit2 uint64
 }
 
+var _ ChunkCache = &TieredChunkCache{}
+
 func NewTieredChunkCache(maxEntries int64, dir string, diskSizeInUnit int64, unitSize int64) *TieredChunkCache {
 
 	c := &TieredChunkCache{
@@ -87,6 +93,58 @@ func (c *TieredChunkCache) doGetChunk(fileId string, minSize uint64) (data []byt
 
 }
 
+func (c *TieredChunkCache) GetChunkSlice(fileId string, offset, length uint64) []byte {
+	if c == nil {
+		return nil
+	}
+
+	c.RLock()
+	defer c.RUnlock()
+
+	return c.doGetChunkSlice(fileId, offset, length)
+}
+
+func (c *TieredChunkCache) doGetChunkSlice(fileId string, offset, length uint64) (data []byte) {
+
+	minSize := offset + length
+	if minSize <= c.onDiskCacheSizeLimit0 {
+		data, err := c.memCache.getChunkSlice(fileId, offset, length)
+		if err != nil {
+			glog.Errorf("failed to read from memcache: %s", err)
+		}
+		if len(data) >= int(minSize) {
+			return data
+		}
+	}
+
+	fid, err := needle.ParseFileIdFromString(fileId)
+	if err != nil {
+		glog.Errorf("failed to parse file id %s", fileId)
+		return nil
+	}
+
+	if minSize <= c.onDiskCacheSizeLimit0 {
+		data = c.diskCaches[0].getChunkSlice(fid.Key, offset, length)
+		if len(data) >= int(minSize) {
+			return data
+		}
+	}
+	if minSize <= c.onDiskCacheSizeLimit1 {
+		data = c.diskCaches[1].getChunkSlice(fid.Key, offset, length)
+		if len(data) >= int(minSize) {
+			return data
+		}
+	}
+	{
+		data = c.diskCaches[2].getChunkSlice(fid.Key, offset, length)
+		if len(data) >= int(minSize) {
+			return data
+		}
+	}
+
+	return nil
+}
+
 func (c *TieredChunkCache) SetChunk(fileId string, data []byte) {
 	if c == nil {
 		return
@@ -131,3 +189,10 @@ func (c *TieredChunkCache) Shutdown() {
 		diskCache.shutdown()
 	}
 }
+
+func min(x, y int) int {
+	if x < y {
+		return x
+	}
+	return y
+}

+ 14 - 0
weed/util/chunk_cache/chunk_cache_in_memory.go

@@ -31,6 +31,20 @@ func (c *ChunkCacheInMemory) GetChunk(fileId string) []byte {
 	return data
 }
 
+func (c *ChunkCacheInMemory) getChunkSlice(fileId string, offset, length uint64) ([]byte, error) {
+	item := c.cache.Get(fileId)
+	if item == nil {
+		return nil, nil
+	}
+	data := item.Value().([]byte)
+	item.Extend(time.Hour)
+	wanted := min(int(length), len(data)-int(offset))
+	if wanted < 0 {
+		return nil, ErrorOutOfBounds
+	}
+	return data[offset : int(offset)+wanted], nil
+}
+
 func (c *ChunkCacheInMemory) SetChunk(fileId string, data []byte) {
 	localCopy := make([]byte, len(data))
 	copy(localCopy, data)

+ 27 - 4
weed/util/chunk_cache/chunk_cache_on_disk.go

@@ -90,11 +90,11 @@ func (v *ChunkCacheVolume) Shutdown() {
 
 func (v *ChunkCacheVolume) doReset() {
 	v.Shutdown()
-	os.Truncate(v.fileName + ".dat", 0)
-	os.Truncate(v.fileName + ".idx", 0)
-	glog.V(4).Infof("cache removeAll %s ...", v.fileName + ".ldb")
+	os.Truncate(v.fileName+".dat", 0)
+	os.Truncate(v.fileName+".idx", 0)
+	glog.V(4).Infof("cache removeAll %s ...", v.fileName+".ldb")
 	os.RemoveAll(v.fileName + ".ldb")
-	glog.V(4).Infof("cache removed %s", v.fileName + ".ldb")
+	glog.V(4).Infof("cache removed %s", v.fileName+".ldb")
 }
 
 func (v *ChunkCacheVolume) Reset() (*ChunkCacheVolume, error) {
@@ -121,6 +121,29 @@ func (v *ChunkCacheVolume) GetNeedle(key types.NeedleId) ([]byte, error) {
 	return data, nil
 }
 
+func (v *ChunkCacheVolume) getNeedleSlice(key types.NeedleId, offset, length uint64) ([]byte, error) {
+	nv, ok := v.nm.Get(key)
+	if !ok {
+		return nil, storage.ErrorNotFound
+	}
+	wanted := min(int(length), int(nv.Size)-int(offset))
+	if wanted < 0 {
+		// should never happen, but better than panicing
+		return nil, ErrorOutOfBounds
+	}
+	data := make([]byte, wanted)
+	if readSize, readErr := v.DataBackend.ReadAt(data, nv.Offset.ToActualOffset()+int64(offset)); readErr != nil {
+		return nil, fmt.Errorf("read %s.dat [%d,%d): %v",
+			v.fileName, nv.Offset.ToActualOffset()+int64(offset), int(nv.Offset.ToActualOffset())+int(offset)+wanted, readErr)
+	} else {
+		if readSize != wanted {
+			return nil, fmt.Errorf("read %d, expected %d", readSize, wanted)
+		}
+	}
+
+	return data, nil
+}
+
 func (v *ChunkCacheVolume) WriteNeedle(key types.NeedleId, data []byte) error {
 
 	offset := v.fileSize

+ 22 - 0
weed/util/chunk_cache/on_disk_cache_layer.go

@@ -82,6 +82,28 @@ func (c *OnDiskCacheLayer) getChunk(needleId types.NeedleId) (data []byte) {
 
 }
 
+func (c *OnDiskCacheLayer) getChunkSlice(needleId types.NeedleId, offset, length uint64) (data []byte) {
+
+	var err error
+
+	for _, diskCache := range c.diskCaches {
+		data, err = diskCache.getNeedleSlice(needleId, offset, length)
+		if err == storage.ErrorNotFound {
+			continue
+		}
+		if err != nil {
+			glog.Errorf("failed to read cache file %s id %d", diskCache.fileName, needleId)
+			continue
+		}
+		if len(data) != 0 {
+			return
+		}
+	}
+
+	return nil
+
+}
+
 func (c *OnDiskCacheLayer) shutdown() {
 
 	for _, diskCache := range c.diskCaches {