needle_parse_upload.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. package needle
  2. import (
  3. "bytes"
  4. "crypto/md5"
  5. "encoding/base64"
  6. "fmt"
  7. "io"
  8. "mime"
  9. "net/http"
  10. "path"
  11. "path/filepath"
  12. "strconv"
  13. "strings"
  14. "github.com/seaweedfs/seaweedfs/weed/glog"
  15. "github.com/seaweedfs/seaweedfs/weed/util"
  16. )
  17. type ParsedUpload struct {
  18. FileName string
  19. Data []byte
  20. bytesBuffer *bytes.Buffer
  21. MimeType string
  22. PairMap map[string]string
  23. IsGzipped bool
  24. // IsZstd bool
  25. OriginalDataSize int
  26. ModifiedTime uint64
  27. Ttl *TTL
  28. IsChunkedFile bool
  29. UncompressedData []byte
  30. ContentMd5 string
  31. }
  32. func ParseUpload(r *http.Request, sizeLimit int64, bytesBuffer *bytes.Buffer) (pu *ParsedUpload, e error) {
  33. bytesBuffer.Reset()
  34. pu = &ParsedUpload{bytesBuffer: bytesBuffer}
  35. pu.PairMap = make(map[string]string)
  36. for k, v := range r.Header {
  37. if len(v) > 0 && strings.HasPrefix(k, PairNamePrefix) {
  38. pu.PairMap[k] = v[0]
  39. }
  40. }
  41. if r.Method == http.MethodPost {
  42. contentType := r.Header.Get("Content-Type")
  43. // If content-type is explicitly set, upload the file without parsing form-data
  44. if contentType != "" && !strings.Contains(contentType, "form-data") {
  45. e = parseRawPost(r, sizeLimit, pu)
  46. } else {
  47. e = parseMultipart(r, sizeLimit, pu)
  48. }
  49. } else {
  50. e = parsePut(r, sizeLimit, pu)
  51. }
  52. if e != nil {
  53. return
  54. }
  55. pu.ModifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64)
  56. pu.Ttl, _ = ReadTTL(r.FormValue("ttl"))
  57. pu.OriginalDataSize = len(pu.Data)
  58. pu.UncompressedData = pu.Data
  59. // println("received data", len(pu.Data), "isGzipped", pu.IsGzipped, "mime", pu.MimeType, "name", pu.FileName)
  60. if pu.IsGzipped {
  61. if unzipped, e := util.DecompressData(pu.Data); e == nil {
  62. pu.OriginalDataSize = len(unzipped)
  63. pu.UncompressedData = unzipped
  64. // println("ungzipped data size", len(unzipped))
  65. }
  66. } else {
  67. ext := filepath.Base(pu.FileName)
  68. mimeType := pu.MimeType
  69. if mimeType == "" {
  70. mimeType = http.DetectContentType(pu.Data)
  71. }
  72. // println("detected mimetype to", pu.MimeType)
  73. if mimeType == "application/octet-stream" {
  74. mimeType = ""
  75. }
  76. if shouldBeCompressed, iAmSure := util.IsCompressableFileType(ext, mimeType); shouldBeCompressed && iAmSure {
  77. // println("ext", ext, "iAmSure", iAmSure, "shouldBeCompressed", shouldBeCompressed, "mimeType", pu.MimeType)
  78. if compressedData, err := util.GzipData(pu.Data); err == nil {
  79. if len(compressedData)*10 < len(pu.Data)*9 {
  80. pu.Data = compressedData
  81. pu.IsGzipped = true
  82. }
  83. // println("gzipped data size", len(compressedData))
  84. }
  85. }
  86. }
  87. // md5
  88. h := md5.New()
  89. h.Write(pu.UncompressedData)
  90. pu.ContentMd5 = base64.StdEncoding.EncodeToString(h.Sum(nil))
  91. if expectedChecksum := r.Header.Get("Content-MD5"); expectedChecksum != "" {
  92. if expectedChecksum != pu.ContentMd5 {
  93. e = fmt.Errorf("Content-MD5 did not match md5 of file data expected [%s] received [%s] size %d", expectedChecksum, pu.ContentMd5, len(pu.UncompressedData))
  94. return
  95. }
  96. }
  97. return
  98. }
  99. func parsePut(r *http.Request, sizeLimit int64, pu *ParsedUpload) error {
  100. pu.IsGzipped = r.Header.Get("Content-Encoding") == "gzip"
  101. // pu.IsZstd = r.Header.Get("Content-Encoding") == "zstd"
  102. pu.MimeType = r.Header.Get("Content-Type")
  103. pu.FileName = ""
  104. dataSize, err := pu.bytesBuffer.ReadFrom(io.LimitReader(r.Body, sizeLimit+1))
  105. if err == io.EOF || dataSize == sizeLimit+1 {
  106. io.Copy(io.Discard, r.Body)
  107. }
  108. pu.Data = pu.bytesBuffer.Bytes()
  109. r.Body.Close()
  110. return nil
  111. }
  112. func parseMultipart(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
  113. defer func() {
  114. if e != nil && r.Body != nil {
  115. io.Copy(io.Discard, r.Body)
  116. r.Body.Close()
  117. }
  118. }()
  119. form, fe := r.MultipartReader()
  120. if fe != nil {
  121. glog.V(0).Infoln("MultipartReader [ERROR]", fe)
  122. e = fe
  123. return
  124. }
  125. // first multi-part item
  126. part, fe := form.NextPart()
  127. if fe != nil {
  128. glog.V(0).Infoln("Reading Multi part [ERROR]", fe)
  129. e = fe
  130. return
  131. }
  132. pu.FileName = part.FileName()
  133. if pu.FileName != "" {
  134. pu.FileName = path.Base(pu.FileName)
  135. }
  136. var dataSize int64
  137. dataSize, e = pu.bytesBuffer.ReadFrom(io.LimitReader(part, sizeLimit+1))
  138. if e != nil {
  139. glog.V(0).Infoln("Reading Content [ERROR]", e)
  140. return
  141. }
  142. if dataSize == sizeLimit+1 {
  143. e = fmt.Errorf("file over the limited %d bytes", sizeLimit)
  144. return
  145. }
  146. pu.Data = pu.bytesBuffer.Bytes()
  147. // if the filename is empty string, do a search on the other multi-part items
  148. for pu.FileName == "" {
  149. part2, fe := form.NextPart()
  150. if fe != nil {
  151. break // no more or on error, just safely break
  152. }
  153. fName := part2.FileName()
  154. // found the first <file type> multi-part has filename
  155. if fName != "" {
  156. pu.bytesBuffer.Reset()
  157. dataSize2, fe2 := pu.bytesBuffer.ReadFrom(io.LimitReader(part2, sizeLimit+1))
  158. if fe2 != nil {
  159. glog.V(0).Infoln("Reading Content [ERROR]", fe2)
  160. e = fe2
  161. return
  162. }
  163. if dataSize2 == sizeLimit+1 {
  164. e = fmt.Errorf("file over the limited %d bytes", sizeLimit)
  165. return
  166. }
  167. // update
  168. pu.Data = pu.bytesBuffer.Bytes()
  169. pu.FileName = path.Base(fName)
  170. break
  171. }
  172. }
  173. pu.IsChunkedFile, _ = strconv.ParseBool(r.FormValue("cm"))
  174. if !pu.IsChunkedFile {
  175. dotIndex := strings.LastIndex(pu.FileName, ".")
  176. ext, mtype := "", ""
  177. if dotIndex > 0 {
  178. ext = strings.ToLower(pu.FileName[dotIndex:])
  179. mtype = mime.TypeByExtension(ext)
  180. }
  181. contentType := part.Header.Get("Content-Type")
  182. if contentType != "" && contentType != "application/octet-stream" && mtype != contentType {
  183. pu.MimeType = contentType // only return mime type if not deducible
  184. mtype = contentType
  185. }
  186. }
  187. pu.IsGzipped = part.Header.Get("Content-Encoding") == "gzip"
  188. // pu.IsZstd = part.Header.Get("Content-Encoding") == "zstd"
  189. return
  190. }
  191. func parseRawPost(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) {
  192. defer func() {
  193. if e != nil && r.Body != nil {
  194. io.Copy(io.Discard, r.Body)
  195. r.Body.Close()
  196. }
  197. }()
  198. pu.FileName = r.Header.Get("Content-Disposition")
  199. if pu.FileName != "" && strings.Contains(pu.FileName, "filename=") {
  200. parts := strings.Split(pu.FileName, "filename=")
  201. parts = strings.Split(parts[1], "\"")
  202. pu.FileName = parts[1]
  203. } else {
  204. pu.FileName = ""
  205. }
  206. if pu.FileName != "" {
  207. pu.FileName = path.Base(pu.FileName)
  208. } else {
  209. pu.FileName = path.Base(r.URL.Path)
  210. }
  211. var dataSize int64
  212. dataSize, e = pu.bytesBuffer.ReadFrom(io.LimitReader(r.Body, sizeLimit+1))
  213. if e != nil {
  214. glog.V(0).Infoln("Reading Content [ERROR]", e)
  215. return
  216. }
  217. if dataSize == sizeLimit+1 {
  218. e = fmt.Errorf("file over the limited %d bytes", sizeLimit)
  219. return
  220. }
  221. pu.Data = pu.bytesBuffer.Bytes()
  222. pu.IsChunkedFile, _ = strconv.ParseBool(r.FormValue("cm"))
  223. if !pu.IsChunkedFile {
  224. dotIndex := strings.LastIndex(pu.FileName, ".")
  225. ext, mtype := "", ""
  226. if dotIndex > 0 {
  227. ext = strings.ToLower(pu.FileName[dotIndex:])
  228. mtype = mime.TypeByExtension(ext)
  229. }
  230. contentType := r.Header.Get("Content-Type")
  231. if contentType != "" && contentType != "application/octet-stream" && mtype != contentType {
  232. pu.MimeType = contentType // only return mime type if not deducible
  233. mtype = contentType
  234. }
  235. }
  236. pu.IsGzipped = r.Header.Get("Content-Encoding") == "gzip"
  237. // pu.IsZstd = r.Header.Get("Content-Encoding") == "zstd"
  238. return
  239. }