needle_parse_upload.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. package needle
  2. import (
  3. "crypto/md5"
  4. "encoding/base64"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "mime"
  9. "net/http"
  10. "path"
  11. "path/filepath"
  12. "strconv"
  13. "strings"
  14. "github.com/chrislusf/seaweedfs/weed/glog"
  15. "github.com/chrislusf/seaweedfs/weed/util"
  16. )
  17. type ParsedUpload struct {
  18. FileName string
  19. Data []byte
  20. MimeType string
  21. PairMap map[string]string
  22. IsGzipped bool
  23. // IsZstd bool
  24. OriginalDataSize int
  25. ModifiedTime uint64
  26. Ttl *TTL
  27. IsChunkedFile bool
  28. UncompressedData []byte
  29. ContentMd5 string
  30. }
  31. func ParseUpload(r *http.Request, sizeLimit int64) (pu *ParsedUpload, e error) {
  32. pu = &ParsedUpload{}
  33. pu.PairMap = make(map[string]string)
  34. for k, v := range r.Header {
  35. if len(v) > 0 && strings.HasPrefix(k, PairNamePrefix) {
  36. pu.PairMap[k] = v[0]
  37. }
  38. }
  39. if r.Method == "POST" {
  40. e = parseMultipart(r, sizeLimit, pu)
  41. } else {
  42. e = parsePut(r, sizeLimit, pu)
  43. }
  44. if e != nil {
  45. return
  46. }
  47. pu.ModifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64)
  48. pu.Ttl, _ = ReadTTL(r.FormValue("ttl"))
  49. pu.OriginalDataSize = len(pu.Data)
  50. pu.UncompressedData = pu.Data
  51. // println("received data", len(pu.Data), "isGzipped", pu.IsGzipped, "mime", pu.MimeType, "name", pu.FileName)
  52. if pu.IsGzipped {
  53. if unzipped, e := util.DecompressData(pu.Data); e == nil {
  54. pu.OriginalDataSize = len(unzipped)
  55. pu.UncompressedData = unzipped
  56. // println("ungzipped data size", len(unzipped))
  57. }
  58. } else {
  59. ext := filepath.Base(pu.FileName)
  60. mimeType := pu.MimeType
  61. if mimeType == "" {
  62. mimeType = http.DetectContentType(pu.Data)
  63. }
  64. // println("detected mimetype to", pu.MimeType)
  65. if mimeType == "application/octet-stream" {
  66. mimeType = ""
  67. }
  68. if shouldBeCompressed, iAmSure := util.IsCompressableFileType(ext, mimeType); mimeType == "" && !iAmSure || shouldBeCompressed && iAmSure {
  69. // println("ext", ext, "iAmSure", iAmSure, "shouldBeCompressed", shouldBeCompressed, "mimeType", pu.MimeType)
  70. if compressedData, err := util.GzipData(pu.Data); err == nil {
  71. if len(compressedData)*10 < len(pu.Data)*9 {
  72. pu.Data = compressedData
  73. pu.IsGzipped = true
  74. }
  75. // println("gzipped data size", len(compressedData))
  76. }
  77. }
  78. }
  79. // md5
  80. h := md5.New()
  81. h.Write(pu.UncompressedData)
  82. pu.ContentMd5 = base64.StdEncoding.EncodeToString(h.Sum(nil))
  83. if expectedChecksum := r.Header.Get("Content-MD5"); expectedChecksum != "" {
  84. if expectedChecksum != pu.ContentMd5 {
  85. e = fmt.Errorf("Content-MD5 did not match md5 of file data expected [%s] received [%s] size %d", expectedChecksum, pu.ContentMd5, len(pu.UncompressedData))
  86. return
  87. }
  88. }
  89. return
  90. }
  91. func parsePut(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
  92. pu.IsGzipped = r.Header.Get("Content-Encoding") == "gzip"
  93. // pu.IsZstd = r.Header.Get("Content-Encoding") == "zstd"
  94. pu.MimeType = r.Header.Get("Content-Type")
  95. pu.FileName = ""
  96. pu.Data, e = ioutil.ReadAll(io.LimitReader(r.Body, sizeLimit+1))
  97. if e == io.EOF || int64(pu.OriginalDataSize) == sizeLimit+1 {
  98. io.Copy(ioutil.Discard, r.Body)
  99. }
  100. r.Body.Close()
  101. return nil
  102. }
  103. func parseMultipart(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
  104. defer func() {
  105. if e != nil && r.Body != nil {
  106. io.Copy(ioutil.Discard, r.Body)
  107. r.Body.Close()
  108. }
  109. }()
  110. form, fe := r.MultipartReader()
  111. if fe != nil {
  112. glog.V(0).Infoln("MultipartReader [ERROR]", fe)
  113. e = fe
  114. return
  115. }
  116. // first multi-part item
  117. part, fe := form.NextPart()
  118. if fe != nil {
  119. glog.V(0).Infoln("Reading Multi part [ERROR]", fe)
  120. e = fe
  121. return
  122. }
  123. pu.FileName = part.FileName()
  124. if pu.FileName != "" {
  125. pu.FileName = path.Base(pu.FileName)
  126. }
  127. pu.Data, e = ioutil.ReadAll(io.LimitReader(part, sizeLimit+1))
  128. if e != nil {
  129. glog.V(0).Infoln("Reading Content [ERROR]", e)
  130. return
  131. }
  132. if len(pu.Data) == int(sizeLimit)+1 {
  133. e = fmt.Errorf("file over the limited %d bytes", sizeLimit)
  134. return
  135. }
  136. // if the filename is empty string, do a search on the other multi-part items
  137. for pu.FileName == "" {
  138. part2, fe := form.NextPart()
  139. if fe != nil {
  140. break // no more or on error, just safely break
  141. }
  142. fName := part2.FileName()
  143. // found the first <file type> multi-part has filename
  144. if fName != "" {
  145. data2, fe2 := ioutil.ReadAll(io.LimitReader(part2, sizeLimit+1))
  146. if fe2 != nil {
  147. glog.V(0).Infoln("Reading Content [ERROR]", fe2)
  148. e = fe2
  149. return
  150. }
  151. if len(data2) == int(sizeLimit)+1 {
  152. e = fmt.Errorf("file over the limited %d bytes", sizeLimit)
  153. return
  154. }
  155. // update
  156. pu.Data = data2
  157. pu.FileName = path.Base(fName)
  158. break
  159. }
  160. }
  161. pu.IsChunkedFile, _ = strconv.ParseBool(r.FormValue("cm"))
  162. if !pu.IsChunkedFile {
  163. dotIndex := strings.LastIndex(pu.FileName, ".")
  164. ext, mtype := "", ""
  165. if dotIndex > 0 {
  166. ext = strings.ToLower(pu.FileName[dotIndex:])
  167. mtype = mime.TypeByExtension(ext)
  168. }
  169. contentType := part.Header.Get("Content-Type")
  170. if contentType != "" && contentType != "application/octet-stream" && mtype != contentType {
  171. pu.MimeType = contentType // only return mime type if not deductable
  172. mtype = contentType
  173. }
  174. }
  175. pu.IsGzipped = part.Header.Get("Content-Encoding") == "gzip"
  176. // pu.IsZstd = part.Header.Get("Content-Encoding") == "zstd"
  177. return
  178. }