filechunk_manifest.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. package filer
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/chrislusf/seaweedfs/weed/wdclient"
  6. "io"
  7. "math"
  8. "time"
  9. "github.com/golang/protobuf/proto"
  10. "github.com/chrislusf/seaweedfs/weed/glog"
  11. "github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
  12. "github.com/chrislusf/seaweedfs/weed/util"
  13. )
  14. const (
  15. ManifestBatch = 1000
  16. )
  17. func HasChunkManifest(chunks []*filer_pb.FileChunk) bool {
  18. for _, chunk := range chunks {
  19. if chunk.IsChunkManifest {
  20. return true
  21. }
  22. }
  23. return false
  24. }
  25. func SeparateManifestChunks(chunks []*filer_pb.FileChunk) (manifestChunks, nonManifestChunks []*filer_pb.FileChunk) {
  26. for _, c := range chunks {
  27. if c.IsChunkManifest {
  28. manifestChunks = append(manifestChunks, c)
  29. } else {
  30. nonManifestChunks = append(nonManifestChunks, c)
  31. }
  32. }
  33. return
  34. }
  35. func ResolveChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (dataChunks, manifestChunks []*filer_pb.FileChunk, manifestResolveErr error) {
  36. // TODO maybe parallel this
  37. for _, chunk := range chunks {
  38. if !chunk.IsChunkManifest {
  39. dataChunks = append(dataChunks, chunk)
  40. continue
  41. }
  42. resolvedChunks, err := ResolveOneChunkManifest(lookupFileIdFn, chunk)
  43. if err != nil {
  44. return chunks, nil, err
  45. }
  46. manifestChunks = append(manifestChunks, chunk)
  47. // recursive
  48. dchunks, mchunks, subErr := ResolveChunkManifest(lookupFileIdFn, resolvedChunks)
  49. if subErr != nil {
  50. return chunks, nil, subErr
  51. }
  52. dataChunks = append(dataChunks, dchunks...)
  53. manifestChunks = append(manifestChunks, mchunks...)
  54. }
  55. return
  56. }
  57. func ResolveOneChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunk *filer_pb.FileChunk) (dataChunks []*filer_pb.FileChunk, manifestResolveErr error) {
  58. if !chunk.IsChunkManifest {
  59. return
  60. }
  61. // IsChunkManifest
  62. data, err := fetchChunk(lookupFileIdFn, chunk.GetFileIdString(), chunk.CipherKey, chunk.IsCompressed)
  63. if err != nil {
  64. return nil, fmt.Errorf("fail to read manifest %s: %v", chunk.GetFileIdString(), err)
  65. }
  66. m := &filer_pb.FileChunkManifest{}
  67. if err := proto.Unmarshal(data, m); err != nil {
  68. return nil, fmt.Errorf("fail to unmarshal manifest %s: %v", chunk.GetFileIdString(), err)
  69. }
  70. // recursive
  71. filer_pb.AfterEntryDeserialization(m.Chunks)
  72. return m.Chunks, nil
  73. }
  74. // TODO fetch from cache for weed mount?
  75. func fetchChunk(lookupFileIdFn wdclient.LookupFileIdFunctionType, fileId string, cipherKey []byte, isGzipped bool) ([]byte, error) {
  76. urlStrings, err := lookupFileIdFn(fileId)
  77. if err != nil {
  78. glog.Errorf("operation LookupFileId %s failed, err: %v", fileId, err)
  79. return nil, err
  80. }
  81. return retriedFetchChunkData(urlStrings, cipherKey, isGzipped, true, 0, 0)
  82. }
  83. func retriedFetchChunkData(urlStrings []string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64, size int) ([]byte, error) {
  84. var err error
  85. var buffer bytes.Buffer
  86. var shouldRetry bool
  87. for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
  88. for _, urlString := range urlStrings {
  89. shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, isFullChunk, offset, size, func(data []byte) {
  90. buffer.Write(data)
  91. })
  92. if !shouldRetry {
  93. break
  94. }
  95. if err != nil {
  96. glog.V(0).Infof("read %s failed, err: %v", urlString, err)
  97. buffer.Reset()
  98. } else {
  99. break
  100. }
  101. }
  102. if err != nil && shouldRetry {
  103. glog.V(0).Infof("retry reading in %v", waitTime)
  104. time.Sleep(waitTime)
  105. } else {
  106. break
  107. }
  108. }
  109. return buffer.Bytes(), err
  110. }
  111. func MaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk) (chunks []*filer_pb.FileChunk, err error) {
  112. return doMaybeManifestize(saveFunc, inputChunks, ManifestBatch, mergeIntoManifest)
  113. }
  114. func doMaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk, mergeFactor int, mergefn func(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error)) (chunks []*filer_pb.FileChunk, err error) {
  115. var dataChunks []*filer_pb.FileChunk
  116. for _, chunk := range inputChunks {
  117. if !chunk.IsChunkManifest {
  118. dataChunks = append(dataChunks, chunk)
  119. } else {
  120. chunks = append(chunks, chunk)
  121. }
  122. }
  123. remaining := len(dataChunks)
  124. for i := 0; i+mergeFactor <= len(dataChunks); i += mergeFactor {
  125. chunk, err := mergefn(saveFunc, dataChunks[i:i+mergeFactor])
  126. if err != nil {
  127. return dataChunks, err
  128. }
  129. chunks = append(chunks, chunk)
  130. remaining -= mergeFactor
  131. }
  132. // remaining
  133. for i := len(dataChunks) - remaining; i < len(dataChunks); i++ {
  134. chunks = append(chunks, dataChunks[i])
  135. }
  136. return
  137. }
  138. func mergeIntoManifest(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error) {
  139. filer_pb.BeforeEntrySerialization(dataChunks)
  140. // create and serialize the manifest
  141. data, serErr := proto.Marshal(&filer_pb.FileChunkManifest{
  142. Chunks: dataChunks,
  143. })
  144. if serErr != nil {
  145. return nil, fmt.Errorf("serializing manifest: %v", serErr)
  146. }
  147. minOffset, maxOffset := int64(math.MaxInt64), int64(math.MinInt64)
  148. for _, chunk := range dataChunks {
  149. if minOffset > int64(chunk.Offset) {
  150. minOffset = chunk.Offset
  151. }
  152. if maxOffset < int64(chunk.Size)+chunk.Offset {
  153. maxOffset = int64(chunk.Size) + chunk.Offset
  154. }
  155. }
  156. manifestChunk, _, _, err = saveFunc(bytes.NewReader(data), "", 0)
  157. if err != nil {
  158. return nil, err
  159. }
  160. manifestChunk.IsChunkManifest = true
  161. manifestChunk.Offset = minOffset
  162. manifestChunk.Size = uint64(maxOffset - minOffset)
  163. return
  164. }
  165. type SaveDataAsChunkFunctionType func(reader io.Reader, name string, offset int64) (chunk *filer_pb.FileChunk, collection, replication string, err error)