volume_read.go 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. package storage
  2. import (
  3. "fmt"
  4. "github.com/seaweedfs/seaweedfs/weed/util/mem"
  5. "io"
  6. "time"
  7. "github.com/seaweedfs/seaweedfs/weed/glog"
  8. "github.com/seaweedfs/seaweedfs/weed/stats"
  9. "github.com/seaweedfs/seaweedfs/weed/storage/backend"
  10. "github.com/seaweedfs/seaweedfs/weed/storage/needle"
  11. "github.com/seaweedfs/seaweedfs/weed/storage/super_block"
  12. . "github.com/seaweedfs/seaweedfs/weed/storage/types"
  13. )
  14. const PagedReadLimit = 1024 * 1024
  15. // read fills in Needle content by looking up n.Id from NeedleMapper
  16. func (v *Volume) readNeedle(n *needle.Needle, readOption *ReadOption, onReadSizeFn func(size Size)) (count int, err error) {
  17. v.dataFileAccessLock.RLock()
  18. defer v.dataFileAccessLock.RUnlock()
  19. nv, ok := v.nm.Get(n.Id)
  20. if !ok || nv.Offset.IsZero() {
  21. return -1, ErrorNotFound
  22. }
  23. readSize := nv.Size
  24. if readSize.IsDeleted() {
  25. if readOption != nil && readOption.ReadDeleted && readSize != TombstoneFileSize {
  26. glog.V(3).Infof("reading deleted %s", n.String())
  27. readSize = -readSize
  28. } else {
  29. return -1, ErrorDeleted
  30. }
  31. }
  32. if readSize == 0 {
  33. return 0, nil
  34. }
  35. if onReadSizeFn != nil {
  36. onReadSizeFn(readSize)
  37. }
  38. if readOption != nil && readOption.AttemptMetaOnly && readSize > PagedReadLimit {
  39. readOption.VolumeRevision = v.SuperBlock.CompactionRevision
  40. err = n.ReadNeedleMeta(v.DataBackend, nv.Offset.ToActualOffset(), readSize, v.Version())
  41. if err == needle.ErrorSizeMismatch && OffsetSize == 4 {
  42. readOption.IsOutOfRange = true
  43. err = n.ReadNeedleMeta(v.DataBackend, nv.Offset.ToActualOffset()+int64(MaxPossibleVolumeSize), readSize, v.Version())
  44. }
  45. if err != nil {
  46. return 0, err
  47. }
  48. if !n.IsCompressed() && !n.IsChunkedManifest() {
  49. readOption.IsMetaOnly = true
  50. }
  51. }
  52. if readOption == nil || !readOption.IsMetaOnly {
  53. err = n.ReadData(v.DataBackend, nv.Offset.ToActualOffset(), readSize, v.Version())
  54. v.checkReadWriteError(err)
  55. if err != nil {
  56. return 0, err
  57. }
  58. }
  59. count = int(n.DataSize)
  60. if !n.HasTtl() {
  61. return
  62. }
  63. ttlMinutes := n.Ttl.Minutes()
  64. if ttlMinutes == 0 {
  65. return
  66. }
  67. if !n.HasLastModifiedDate() {
  68. return
  69. }
  70. if time.Now().Before(time.Unix(0, int64(n.AppendAtNs)).Add(time.Duration(ttlMinutes) * time.Minute)) {
  71. return
  72. }
  73. return -1, ErrorNotFound
  74. }
  75. // read needle at a specific offset
  76. func (v *Volume) readNeedleMetaAt(n *needle.Needle, offset int64, size int32) (err error) {
  77. v.dataFileAccessLock.RLock()
  78. defer v.dataFileAccessLock.RUnlock()
  79. // read deleted needle meta data
  80. if size < 0 {
  81. size = 0
  82. }
  83. err = n.ReadNeedleMeta(v.DataBackend, offset, Size(size), v.Version())
  84. if err == needle.ErrorSizeMismatch && OffsetSize == 4 {
  85. err = n.ReadNeedleMeta(v.DataBackend, offset+int64(MaxPossibleVolumeSize), Size(size), v.Version())
  86. }
  87. if err != nil {
  88. return err
  89. }
  90. return nil
  91. }
  92. // read fills in Needle content by looking up n.Id from NeedleMapper
  93. func (v *Volume) readNeedleDataInto(n *needle.Needle, readOption *ReadOption, writer io.Writer, offset int64, size int64) (err error) {
  94. if !readOption.HasSlowRead {
  95. v.dataFileAccessLock.RLock()
  96. defer v.dataFileAccessLock.RUnlock()
  97. }
  98. if readOption.HasSlowRead {
  99. v.dataFileAccessLock.RLock()
  100. }
  101. nv, ok := v.nm.Get(n.Id)
  102. if readOption.HasSlowRead {
  103. v.dataFileAccessLock.RUnlock()
  104. }
  105. if !ok || nv.Offset.IsZero() {
  106. return ErrorNotFound
  107. }
  108. readSize := nv.Size
  109. if readSize.IsDeleted() {
  110. if readOption != nil && readOption.ReadDeleted && readSize != TombstoneFileSize {
  111. glog.V(3).Infof("reading deleted %s", n.String())
  112. readSize = -readSize
  113. } else {
  114. return ErrorDeleted
  115. }
  116. }
  117. if readSize == 0 {
  118. return nil
  119. }
  120. actualOffset := nv.Offset.ToActualOffset()
  121. if readOption.IsOutOfRange {
  122. actualOffset += int64(MaxPossibleVolumeSize)
  123. }
  124. buf := mem.Allocate(min(readOption.ReadBufferSize, int(size)))
  125. defer mem.Free(buf)
  126. // read needle data
  127. crc := needle.CRC(0)
  128. for x := offset; x < offset+size; x += int64(len(buf)) {
  129. if readOption.HasSlowRead {
  130. v.dataFileAccessLock.RLock()
  131. }
  132. // possibly re-read needle offset if volume is compacted
  133. if readOption.VolumeRevision != v.SuperBlock.CompactionRevision {
  134. // the volume is compacted
  135. nv, ok = v.nm.Get(n.Id)
  136. if !ok || nv.Offset.IsZero() {
  137. if readOption.HasSlowRead {
  138. v.dataFileAccessLock.RUnlock()
  139. }
  140. return ErrorNotFound
  141. }
  142. actualOffset = nv.Offset.ToActualOffset()
  143. readOption.VolumeRevision = v.SuperBlock.CompactionRevision
  144. }
  145. count, err := n.ReadNeedleData(v.DataBackend, actualOffset, buf, x)
  146. if readOption.HasSlowRead {
  147. v.dataFileAccessLock.RUnlock()
  148. }
  149. toWrite := min(count, int(offset+size-x))
  150. if toWrite > 0 {
  151. crc = crc.Update(buf[0:toWrite])
  152. // the crc.Value() function is to be deprecated. this double checking is for backward compatibility
  153. // with seaweed version using crc.Value() instead of uint32(crc), which appears in commit 056c480eb
  154. // and switch appeared in version 3.09.
  155. if offset == 0 && size == int64(n.DataSize) && int64(count) == size && (n.Checksum != crc && uint32(n.Checksum) != crc.Value()) {
  156. // This check works only if the buffer is big enough to hold the whole needle data
  157. // and we ask for all needle data.
  158. // Otherwise we cannot check the validity of partially aquired data.
  159. stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorCRC).Inc()
  160. return fmt.Errorf("ReadNeedleData checksum %v expected %v for Needle: %v,%v", crc, n.Checksum, v.Id, n)
  161. }
  162. if _, err = writer.Write(buf[0:toWrite]); err != nil {
  163. return fmt.Errorf("ReadNeedleData write: %v", err)
  164. }
  165. }
  166. if err != nil {
  167. if err == io.EOF {
  168. err = nil
  169. break
  170. }
  171. return fmt.Errorf("ReadNeedleData: %v", err)
  172. }
  173. if count <= 0 {
  174. break
  175. }
  176. }
  177. if offset == 0 && size == int64(n.DataSize) && (n.Checksum != crc && uint32(n.Checksum) != crc.Value()) {
  178. // the crc.Value() function is to be deprecated. this double checking is for backward compatibility
  179. // with seaweed version using crc.Value() instead of uint32(crc), which appears in commit 056c480eb
  180. // and switch appeared in version 3.09.
  181. stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorCRC).Inc()
  182. return fmt.Errorf("ReadNeedleData checksum %v expected %v for Needle: %v,%v", crc, n.Checksum, v.Id, n)
  183. }
  184. return nil
  185. }
  186. func min(x, y int) int {
  187. if x < y {
  188. return x
  189. }
  190. return y
  191. }
  192. // read fills in Needle content by looking up n.Id from NeedleMapper
  193. func (v *Volume) ReadNeedleBlob(offset int64, size Size) ([]byte, error) {
  194. v.dataFileAccessLock.RLock()
  195. defer v.dataFileAccessLock.RUnlock()
  196. return needle.ReadNeedleBlob(v.DataBackend, offset, size, v.Version())
  197. }
  198. type VolumeFileScanner interface {
  199. VisitSuperBlock(super_block.SuperBlock) error
  200. ReadNeedleBody() bool
  201. VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error
  202. }
  203. func ScanVolumeFile(dirname string, collection string, id needle.VolumeId,
  204. needleMapKind NeedleMapKind,
  205. volumeFileScanner VolumeFileScanner) (err error) {
  206. var v *Volume
  207. if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil {
  208. return fmt.Errorf("failed to load volume %d: %v", id, err)
  209. }
  210. if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil {
  211. return fmt.Errorf("failed to process volume %d super block: %v", id, err)
  212. }
  213. defer v.Close()
  214. version := v.Version()
  215. offset := int64(v.SuperBlock.BlockSize())
  216. return ScanVolumeFileFrom(version, v.DataBackend, offset, volumeFileScanner)
  217. }
  218. func ScanVolumeFileFrom(version needle.Version, datBackend backend.BackendStorageFile, offset int64, volumeFileScanner VolumeFileScanner) (err error) {
  219. n, nh, rest, e := needle.ReadNeedleHeader(datBackend, version, offset)
  220. if e != nil {
  221. if e == io.EOF {
  222. return nil
  223. }
  224. return fmt.Errorf("cannot read %s at offset %d: %v", datBackend.Name(), offset, e)
  225. }
  226. for n != nil {
  227. var needleBody []byte
  228. if volumeFileScanner.ReadNeedleBody() {
  229. // println("needle", n.Id.String(), "offset", offset, "size", n.Size, "rest", rest)
  230. if needleBody, err = n.ReadNeedleBody(datBackend, version, offset+NeedleHeaderSize, rest); err != nil {
  231. glog.V(0).Infof("cannot read needle head [%d, %d) body [%d, %d) body length %d: %v", offset, offset+NeedleHeaderSize, offset+NeedleHeaderSize, offset+NeedleHeaderSize+rest, rest, err)
  232. // err = fmt.Errorf("cannot read needle body: %v", err)
  233. // return
  234. }
  235. }
  236. err := volumeFileScanner.VisitNeedle(n, offset, nh, needleBody)
  237. if err == io.EOF {
  238. return nil
  239. }
  240. if err != nil {
  241. glog.V(0).Infof("visit needle error: %v", err)
  242. return fmt.Errorf("visit needle error: %v", err)
  243. }
  244. offset += NeedleHeaderSize + rest
  245. glog.V(4).Infof("==> new entry offset %d", offset)
  246. if n, nh, rest, err = needle.ReadNeedleHeader(datBackend, version, offset); err != nil {
  247. if err == io.EOF {
  248. return nil
  249. }
  250. return fmt.Errorf("cannot read needle header at offset %d: %v", offset, err)
  251. }
  252. glog.V(4).Infof("new entry needle size:%d rest:%d", n.Size, rest)
  253. }
  254. return nil
  255. }