needle_read_write.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. package needle
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "github.com/chrislusf/seaweedfs/weed/glog"
  7. "github.com/chrislusf/seaweedfs/weed/storage/backend"
  8. . "github.com/chrislusf/seaweedfs/weed/storage/types"
  9. "github.com/chrislusf/seaweedfs/weed/util"
  10. "io"
  11. "math"
  12. "sync"
  13. )
  14. const (
  15. FlagIsCompressed = 0x01
  16. FlagHasName = 0x02
  17. FlagHasMime = 0x04
  18. FlagHasLastModifiedDate = 0x08
  19. FlagHasTtl = 0x10
  20. FlagHasPairs = 0x20
  21. FlagIsChunkManifest = 0x80
  22. LastModifiedBytesLength = 5
  23. TtlBytesLength = 2
  24. )
  25. var ErrorSizeMismatch = errors.New("size mismatch")
  26. func (n *Needle) DiskSize(version Version) int64 {
  27. return GetActualSize(n.Size, version)
  28. }
  29. var bufPool = sync.Pool{
  30. New: func() interface{} {
  31. return new(bytes.Buffer)
  32. },
  33. }
  34. func (n *Needle) prepareWriteBuffer(version Version, writeBytes *bytes.Buffer) (Size, int64, error) {
  35. writeBytes.Reset()
  36. switch version {
  37. case Version1:
  38. header := make([]byte, NeedleHeaderSize)
  39. CookieToBytes(header[0:CookieSize], n.Cookie)
  40. NeedleIdToBytes(header[CookieSize:CookieSize+NeedleIdSize], n.Id)
  41. n.Size = Size(len(n.Data))
  42. SizeToBytes(header[CookieSize+NeedleIdSize:CookieSize+NeedleIdSize+SizeSize], n.Size)
  43. size := n.Size
  44. actualSize := NeedleHeaderSize + int64(n.Size)
  45. writeBytes.Write(header)
  46. writeBytes.Write(n.Data)
  47. padding := PaddingLength(n.Size, version)
  48. util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value())
  49. writeBytes.Write(header[0 : NeedleChecksumSize+padding])
  50. return size, actualSize, nil
  51. case Version2, Version3:
  52. header := make([]byte, NeedleHeaderSize+TimestampSize) // adding timestamp to reuse it and avoid extra allocation
  53. CookieToBytes(header[0:CookieSize], n.Cookie)
  54. NeedleIdToBytes(header[CookieSize:CookieSize+NeedleIdSize], n.Id)
  55. if len(n.Name) >= math.MaxUint8 {
  56. n.NameSize = math.MaxUint8
  57. } else {
  58. n.NameSize = uint8(len(n.Name))
  59. }
  60. n.DataSize, n.MimeSize = uint32(len(n.Data)), uint8(len(n.Mime))
  61. if n.DataSize > 0 {
  62. n.Size = 4 + Size(n.DataSize) + 1
  63. if n.HasName() {
  64. n.Size = n.Size + 1 + Size(n.NameSize)
  65. }
  66. if n.HasMime() {
  67. n.Size = n.Size + 1 + Size(n.MimeSize)
  68. }
  69. if n.HasLastModifiedDate() {
  70. n.Size = n.Size + LastModifiedBytesLength
  71. }
  72. if n.HasTtl() {
  73. n.Size = n.Size + TtlBytesLength
  74. }
  75. if n.HasPairs() {
  76. n.Size += 2 + Size(n.PairsSize)
  77. }
  78. } else {
  79. n.Size = 0
  80. }
  81. SizeToBytes(header[CookieSize+NeedleIdSize:CookieSize+NeedleIdSize+SizeSize], n.Size)
  82. writeBytes.Write(header[0:NeedleHeaderSize])
  83. if n.DataSize > 0 {
  84. util.Uint32toBytes(header[0:4], n.DataSize)
  85. writeBytes.Write(header[0:4])
  86. writeBytes.Write(n.Data)
  87. util.Uint8toBytes(header[0:1], n.Flags)
  88. writeBytes.Write(header[0:1])
  89. if n.HasName() {
  90. util.Uint8toBytes(header[0:1], n.NameSize)
  91. writeBytes.Write(header[0:1])
  92. writeBytes.Write(n.Name[:n.NameSize])
  93. }
  94. if n.HasMime() {
  95. util.Uint8toBytes(header[0:1], n.MimeSize)
  96. writeBytes.Write(header[0:1])
  97. writeBytes.Write(n.Mime)
  98. }
  99. if n.HasLastModifiedDate() {
  100. util.Uint64toBytes(header[0:8], n.LastModified)
  101. writeBytes.Write(header[8-LastModifiedBytesLength : 8])
  102. }
  103. if n.HasTtl() && n.Ttl != nil {
  104. n.Ttl.ToBytes(header[0:TtlBytesLength])
  105. writeBytes.Write(header[0:TtlBytesLength])
  106. }
  107. if n.HasPairs() {
  108. util.Uint16toBytes(header[0:2], n.PairsSize)
  109. writeBytes.Write(header[0:2])
  110. writeBytes.Write(n.Pairs)
  111. }
  112. }
  113. padding := PaddingLength(n.Size, version)
  114. util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value())
  115. if version == Version2 {
  116. writeBytes.Write(header[0 : NeedleChecksumSize+padding])
  117. } else {
  118. // version3
  119. util.Uint64toBytes(header[NeedleChecksumSize:NeedleChecksumSize+TimestampSize], n.AppendAtNs)
  120. writeBytes.Write(header[0 : NeedleChecksumSize+TimestampSize+padding])
  121. }
  122. return Size(n.DataSize), GetActualSize(n.Size, version), nil
  123. }
  124. return 0, 0, fmt.Errorf("Unsupported Version! (%d)", version)
  125. }
  126. func (n *Needle) Append(w backend.BackendStorageFile, version Version) (offset uint64, size Size, actualSize int64, err error) {
  127. if end, _, e := w.GetStat(); e == nil {
  128. defer func(w backend.BackendStorageFile, off int64) {
  129. if err != nil {
  130. if te := w.Truncate(end); te != nil {
  131. glog.V(0).Infof("Failed to truncate %s back to %d with error: %v", w.Name(), end, te)
  132. }
  133. }
  134. }(w, end)
  135. offset = uint64(end)
  136. } else {
  137. err = fmt.Errorf("Cannot Read Current Volume Position: %v", e)
  138. return
  139. }
  140. if offset >= MaxPossibleVolumeSize && n.Size.IsValid() {
  141. err = fmt.Errorf("Volume Size %d Exeededs %d", offset, MaxPossibleVolumeSize)
  142. return
  143. }
  144. bytesBuffer := bufPool.Get().(*bytes.Buffer)
  145. defer bufPool.Put(bytesBuffer)
  146. size, actualSize, err = n.prepareWriteBuffer(version, bytesBuffer)
  147. if err == nil {
  148. _, err = w.WriteAt(bytesBuffer.Bytes(), int64(offset))
  149. }
  150. return offset, size, actualSize, err
  151. }
  152. func WriteNeedleBlob(w backend.BackendStorageFile, dataSlice []byte, size Size, appendAtNs uint64, version Version) (offset uint64, err error) {
  153. if end, _, e := w.GetStat(); e == nil {
  154. defer func(w backend.BackendStorageFile, off int64) {
  155. if err != nil {
  156. if te := w.Truncate(end); te != nil {
  157. glog.V(0).Infof("Failed to truncate %s back to %d with error: %v", w.Name(), end, te)
  158. }
  159. }
  160. }(w, end)
  161. offset = uint64(end)
  162. } else {
  163. err = fmt.Errorf("Cannot Read Current Volume Position: %v", e)
  164. return
  165. }
  166. if version == Version3 {
  167. tsOffset := NeedleHeaderSize + size + NeedleChecksumSize
  168. util.Uint64toBytes(dataSlice[tsOffset:tsOffset+TimestampSize], appendAtNs)
  169. }
  170. if err == nil {
  171. _, err = w.WriteAt(dataSlice, int64(offset))
  172. }
  173. return
  174. }
  175. func ReadNeedleBlob(r backend.BackendStorageFile, offset int64, size Size, version Version) (dataSlice []byte, err error) {
  176. dataSize := GetActualSize(size, version)
  177. dataSlice = make([]byte, int(dataSize))
  178. var n int
  179. n, err = r.ReadAt(dataSlice, offset)
  180. if err != nil && int64(n) == dataSize {
  181. err = nil
  182. }
  183. if err != nil {
  184. fileSize, _, _ := r.GetStat()
  185. println("n", n, "dataSize", dataSize, "offset", offset, "fileSize", fileSize)
  186. }
  187. return dataSlice, err
  188. }
  189. // ReadBytes hydrates the needle from the bytes buffer, with only n.Id is set.
  190. func (n *Needle) ReadBytes(bytes []byte, offset int64, size Size, version Version) (err error) {
  191. n.ParseNeedleHeader(bytes)
  192. if n.Size != size {
  193. // cookie is not always passed in for this API. Use size to do preliminary checking.
  194. if OffsetSize == 4 && offset < int64(MaxPossibleVolumeSize) {
  195. glog.Errorf("entry not found1: offset %d found id %x size %d, expected size %d", offset, n.Id, n.Size, size)
  196. return ErrorSizeMismatch
  197. }
  198. return fmt.Errorf("entry not found: offset %d found id %x size %d, expected size %d", offset, n.Id, n.Size, size)
  199. }
  200. switch version {
  201. case Version1:
  202. n.Data = bytes[NeedleHeaderSize : NeedleHeaderSize+size]
  203. case Version2, Version3:
  204. err = n.readNeedleDataVersion2(bytes[NeedleHeaderSize : NeedleHeaderSize+int(n.Size)])
  205. }
  206. if err != nil && err != io.EOF {
  207. return err
  208. }
  209. if size > 0 {
  210. checksum := util.BytesToUint32(bytes[NeedleHeaderSize+size : NeedleHeaderSize+size+NeedleChecksumSize])
  211. newChecksum := NewCRC(n.Data)
  212. if checksum != newChecksum.Value() {
  213. return errors.New("CRC error! Data On Disk Corrupted")
  214. }
  215. n.Checksum = newChecksum
  216. }
  217. if version == Version3 {
  218. tsOffset := NeedleHeaderSize + size + NeedleChecksumSize
  219. n.AppendAtNs = util.BytesToUint64(bytes[tsOffset : tsOffset+TimestampSize])
  220. }
  221. return nil
  222. }
  223. // ReadData hydrates the needle from the file, with only n.Id is set.
  224. func (n *Needle) ReadData(r backend.BackendStorageFile, offset int64, size Size, version Version) (err error) {
  225. bytes, err := ReadNeedleBlob(r, offset, size, version)
  226. if err != nil {
  227. return err
  228. }
  229. return n.ReadBytes(bytes, offset, size, version)
  230. }
  231. func (n *Needle) ParseNeedleHeader(bytes []byte) {
  232. n.Cookie = BytesToCookie(bytes[0:CookieSize])
  233. n.Id = BytesToNeedleId(bytes[CookieSize : CookieSize+NeedleIdSize])
  234. n.Size = BytesToSize(bytes[CookieSize+NeedleIdSize : NeedleHeaderSize])
  235. }
  236. func (n *Needle) readNeedleDataVersion2(bytes []byte) (err error) {
  237. index, lenBytes := 0, len(bytes)
  238. if index < lenBytes {
  239. n.DataSize = util.BytesToUint32(bytes[index : index+4])
  240. index = index + 4
  241. if int(n.DataSize)+index > lenBytes {
  242. return fmt.Errorf("index out of range %d", 1)
  243. }
  244. n.Data = bytes[index : index+int(n.DataSize)]
  245. index = index + int(n.DataSize)
  246. n.Flags = bytes[index]
  247. index = index + 1
  248. }
  249. if index < lenBytes && n.HasName() {
  250. n.NameSize = uint8(bytes[index])
  251. index = index + 1
  252. if int(n.NameSize)+index > lenBytes {
  253. return fmt.Errorf("index out of range %d", 2)
  254. }
  255. n.Name = bytes[index : index+int(n.NameSize)]
  256. index = index + int(n.NameSize)
  257. }
  258. if index < lenBytes && n.HasMime() {
  259. n.MimeSize = uint8(bytes[index])
  260. index = index + 1
  261. if int(n.MimeSize)+index > lenBytes {
  262. return fmt.Errorf("index out of range %d", 3)
  263. }
  264. n.Mime = bytes[index : index+int(n.MimeSize)]
  265. index = index + int(n.MimeSize)
  266. }
  267. if index < lenBytes && n.HasLastModifiedDate() {
  268. if LastModifiedBytesLength+index > lenBytes {
  269. return fmt.Errorf("index out of range %d", 4)
  270. }
  271. n.LastModified = util.BytesToUint64(bytes[index : index+LastModifiedBytesLength])
  272. index = index + LastModifiedBytesLength
  273. }
  274. if index < lenBytes && n.HasTtl() {
  275. if TtlBytesLength+index > lenBytes {
  276. return fmt.Errorf("index out of range %d", 5)
  277. }
  278. n.Ttl = LoadTTLFromBytes(bytes[index : index+TtlBytesLength])
  279. index = index + TtlBytesLength
  280. }
  281. if index < lenBytes && n.HasPairs() {
  282. if 2+index > lenBytes {
  283. return fmt.Errorf("index out of range %d", 6)
  284. }
  285. n.PairsSize = util.BytesToUint16(bytes[index : index+2])
  286. index += 2
  287. if int(n.PairsSize)+index > lenBytes {
  288. return fmt.Errorf("index out of range %d", 7)
  289. }
  290. end := index + int(n.PairsSize)
  291. n.Pairs = bytes[index:end]
  292. index = end
  293. }
  294. return nil
  295. }
  296. func ReadNeedleHeader(r backend.BackendStorageFile, version Version, offset int64) (n *Needle, bytes []byte, bodyLength int64, err error) {
  297. n = new(Needle)
  298. if version == Version1 || version == Version2 || version == Version3 {
  299. bytes = make([]byte, NeedleHeaderSize)
  300. var count int
  301. count, err = r.ReadAt(bytes, offset)
  302. if count <= 0 || err != nil {
  303. return nil, bytes, 0, err
  304. }
  305. n.ParseNeedleHeader(bytes)
  306. bodyLength = NeedleBodyLength(n.Size, version)
  307. }
  308. return
  309. }
  310. func PaddingLength(needleSize Size, version Version) Size {
  311. if version == Version3 {
  312. // this is same value as version2, but just listed here for clarity
  313. return NeedlePaddingSize - ((NeedleHeaderSize + needleSize + NeedleChecksumSize + TimestampSize) % NeedlePaddingSize)
  314. }
  315. return NeedlePaddingSize - ((NeedleHeaderSize + needleSize + NeedleChecksumSize) % NeedlePaddingSize)
  316. }
  317. func NeedleBodyLength(needleSize Size, version Version) int64 {
  318. if version == Version3 {
  319. return int64(needleSize) + NeedleChecksumSize + TimestampSize + int64(PaddingLength(needleSize, version))
  320. }
  321. return int64(needleSize) + NeedleChecksumSize + int64(PaddingLength(needleSize, version))
  322. }
  323. //n should be a needle already read the header
  324. //the input stream will read until next file entry
  325. func (n *Needle) ReadNeedleBody(r backend.BackendStorageFile, version Version, offset int64, bodyLength int64) (bytes []byte, err error) {
  326. if bodyLength <= 0 {
  327. return nil, nil
  328. }
  329. bytes = make([]byte, bodyLength)
  330. if _, err = r.ReadAt(bytes, offset); err != nil {
  331. return
  332. }
  333. err = n.ReadNeedleBodyBytes(bytes, version)
  334. return
  335. }
  336. func (n *Needle) ReadNeedleBodyBytes(needleBody []byte, version Version) (err error) {
  337. if len(needleBody) <= 0 {
  338. return nil
  339. }
  340. switch version {
  341. case Version1:
  342. n.Data = needleBody[:n.Size]
  343. n.Checksum = NewCRC(n.Data)
  344. case Version2, Version3:
  345. err = n.readNeedleDataVersion2(needleBody[0:n.Size])
  346. n.Checksum = NewCRC(n.Data)
  347. if version == Version3 {
  348. tsOffset := n.Size + NeedleChecksumSize
  349. n.AppendAtNs = util.BytesToUint64(needleBody[tsOffset : tsOffset+TimestampSize])
  350. }
  351. default:
  352. err = fmt.Errorf("unsupported version %d!", version)
  353. }
  354. return
  355. }
  356. func (n *Needle) IsCompressed() bool {
  357. return n.Flags&FlagIsCompressed > 0
  358. }
  359. func (n *Needle) SetIsCompressed() {
  360. n.Flags = n.Flags | FlagIsCompressed
  361. }
  362. func (n *Needle) HasName() bool {
  363. return n.Flags&FlagHasName > 0
  364. }
  365. func (n *Needle) SetHasName() {
  366. n.Flags = n.Flags | FlagHasName
  367. }
  368. func (n *Needle) HasMime() bool {
  369. return n.Flags&FlagHasMime > 0
  370. }
  371. func (n *Needle) SetHasMime() {
  372. n.Flags = n.Flags | FlagHasMime
  373. }
  374. func (n *Needle) HasLastModifiedDate() bool {
  375. return n.Flags&FlagHasLastModifiedDate > 0
  376. }
  377. func (n *Needle) SetHasLastModifiedDate() {
  378. n.Flags = n.Flags | FlagHasLastModifiedDate
  379. }
  380. func (n *Needle) HasTtl() bool {
  381. return n.Flags&FlagHasTtl > 0
  382. }
  383. func (n *Needle) SetHasTtl() {
  384. n.Flags = n.Flags | FlagHasTtl
  385. }
  386. func (n *Needle) IsChunkedManifest() bool {
  387. return n.Flags&FlagIsChunkManifest > 0
  388. }
  389. func (n *Needle) SetIsChunkManifest() {
  390. n.Flags = n.Flags | FlagIsChunkManifest
  391. }
  392. func (n *Needle) HasPairs() bool {
  393. return n.Flags&FlagHasPairs != 0
  394. }
  395. func (n *Needle) SetHasPairs() {
  396. n.Flags = n.Flags | FlagHasPairs
  397. }
  398. func GetActualSize(size Size, version Version) int64 {
  399. return NeedleHeaderSize + NeedleBodyLength(size, version)
  400. }