upload_content.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. package operation
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "github.com/valyala/bytebufferpool"
  8. "io"
  9. "mime"
  10. "mime/multipart"
  11. "net"
  12. "net/http"
  13. "net/textproto"
  14. "path/filepath"
  15. "strings"
  16. "time"
  17. "github.com/seaweedfs/seaweedfs/weed/glog"
  18. "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
  19. "github.com/seaweedfs/seaweedfs/weed/security"
  20. "github.com/seaweedfs/seaweedfs/weed/stats"
  21. "github.com/seaweedfs/seaweedfs/weed/util"
  22. )
  23. type UploadOption struct {
  24. UploadUrl string
  25. Filename string
  26. Cipher bool
  27. IsInputCompressed bool
  28. MimeType string
  29. PairMap map[string]string
  30. Jwt security.EncodedJwt
  31. RetryForever bool
  32. Md5 string
  33. BytesBuffer *bytes.Buffer
  34. }
  35. type UploadResult struct {
  36. Name string `json:"name,omitempty"`
  37. Size uint32 `json:"size,omitempty"`
  38. Error string `json:"error,omitempty"`
  39. ETag string `json:"eTag,omitempty"`
  40. CipherKey []byte `json:"cipherKey,omitempty"`
  41. Mime string `json:"mime,omitempty"`
  42. Gzip uint32 `json:"gzip,omitempty"`
  43. ContentMd5 string `json:"contentMd5,omitempty"`
  44. RetryCount int `json:"-"`
  45. }
  46. func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64, tsNs int64) *filer_pb.FileChunk {
  47. fid, _ := filer_pb.ToFileIdObject(fileId)
  48. return &filer_pb.FileChunk{
  49. FileId: fileId,
  50. Offset: offset,
  51. Size: uint64(uploadResult.Size),
  52. ModifiedTsNs: tsNs,
  53. ETag: uploadResult.ContentMd5,
  54. CipherKey: uploadResult.CipherKey,
  55. IsCompressed: uploadResult.Gzip > 0,
  56. Fid: fid,
  57. }
  58. }
  59. // HTTPClient interface for testing
  60. type HTTPClient interface {
  61. Do(req *http.Request) (*http.Response, error)
  62. }
  63. var (
  64. HttpClient HTTPClient
  65. )
  66. func init() {
  67. HttpClient = &http.Client{Transport: &http.Transport{
  68. DialContext: (&net.Dialer{
  69. Timeout: 10 * time.Second,
  70. KeepAlive: 10 * time.Second,
  71. }).DialContext,
  72. MaxIdleConns: 1024,
  73. MaxIdleConnsPerHost: 1024,
  74. }}
  75. }
  76. // UploadWithRetry will retry both assigning volume request and uploading content
  77. // The option parameter does not need to specify UploadUrl and Jwt, which will come from assigning volume.
  78. func UploadWithRetry(filerClient filer_pb.FilerClient, assignRequest *filer_pb.AssignVolumeRequest, uploadOption *UploadOption, genFileUrlFn func(host, fileId string) string, reader io.Reader) (fileId string, uploadResult *UploadResult, err error, data []byte) {
  79. doUploadFunc := func() error {
  80. var host string
  81. var auth security.EncodedJwt
  82. // grpc assign volume
  83. if grpcAssignErr := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
  84. resp, assignErr := client.AssignVolume(context.Background(), assignRequest)
  85. if assignErr != nil {
  86. glog.V(0).Infof("assign volume failure %v: %v", assignRequest, assignErr)
  87. return assignErr
  88. }
  89. if resp.Error != "" {
  90. return fmt.Errorf("assign volume failure %v: %v", assignRequest, resp.Error)
  91. }
  92. fileId, auth = resp.FileId, security.EncodedJwt(resp.Auth)
  93. loc := resp.Location
  94. host = filerClient.AdjustedUrl(loc)
  95. return nil
  96. }); grpcAssignErr != nil {
  97. return fmt.Errorf("filerGrpcAddress assign volume: %v", grpcAssignErr)
  98. }
  99. uploadOption.UploadUrl = genFileUrlFn(host, fileId)
  100. uploadOption.Jwt = auth
  101. var uploadErr error
  102. uploadResult, uploadErr, data = doUpload(reader, uploadOption)
  103. return uploadErr
  104. }
  105. if uploadOption.RetryForever {
  106. util.RetryUntil("uploadWithRetryForever", doUploadFunc, func(err error) (shouldContinue bool) {
  107. glog.V(0).Infof("upload content: %v", err)
  108. return true
  109. })
  110. } else {
  111. uploadErrList := []string{"transport", "is read only"}
  112. err = util.MultiRetry("uploadWithRetry", uploadErrList, doUploadFunc)
  113. }
  114. return
  115. }
  116. var fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`, "\n", "")
  117. // Upload sends a POST request to a volume server to upload the content with adjustable compression level
  118. func UploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  119. uploadResult, err = retriedUploadData(data, option)
  120. return
  121. }
  122. // Upload sends a POST request to a volume server to upload the content with fast compression
  123. func Upload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  124. uploadResult, err, data = doUpload(reader, option)
  125. return
  126. }
  127. func doUpload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
  128. bytesReader, ok := reader.(*util.BytesReader)
  129. if ok {
  130. data = bytesReader.Bytes
  131. } else {
  132. data, err = io.ReadAll(reader)
  133. if err != nil {
  134. err = fmt.Errorf("read input: %v", err)
  135. return
  136. }
  137. }
  138. uploadResult, uploadErr := retriedUploadData(data, option)
  139. return uploadResult, uploadErr, data
  140. }
  141. func retriedUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  142. for i := 0; i < 3; i++ {
  143. if i > 0 {
  144. time.Sleep(time.Millisecond * time.Duration(237*(i+1)))
  145. }
  146. uploadResult, err = doUploadData(data, option)
  147. if err == nil {
  148. uploadResult.RetryCount = i
  149. return
  150. }
  151. glog.Warningf("uploading %d to %s: %v", i, option.UploadUrl, err)
  152. }
  153. return
  154. }
  155. func doUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
  156. contentIsGzipped := option.IsInputCompressed
  157. shouldGzipNow := false
  158. if !option.IsInputCompressed {
  159. if option.MimeType == "" {
  160. option.MimeType = http.DetectContentType(data)
  161. // println("detect1 mimetype to", MimeType)
  162. if option.MimeType == "application/octet-stream" {
  163. option.MimeType = ""
  164. }
  165. }
  166. if shouldBeCompressed, iAmSure := util.IsCompressableFileType(filepath.Base(option.Filename), option.MimeType); iAmSure && shouldBeCompressed {
  167. shouldGzipNow = true
  168. } else if !iAmSure && option.MimeType == "" && len(data) > 16*1024 {
  169. var compressed []byte
  170. compressed, err = util.GzipData(data[0:128])
  171. if err != nil {
  172. return
  173. }
  174. shouldGzipNow = len(compressed)*10 < 128*9 // can not compress to less than 90%
  175. }
  176. }
  177. var clearDataLen int
  178. // gzip if possible
  179. // this could be double copying
  180. clearDataLen = len(data)
  181. clearData := data
  182. if shouldGzipNow && !option.Cipher {
  183. compressed, compressErr := util.GzipData(data)
  184. // fmt.Printf("data is compressed from %d ==> %d\n", len(data), len(compressed))
  185. if compressErr == nil {
  186. data = compressed
  187. contentIsGzipped = true
  188. }
  189. } else if option.IsInputCompressed {
  190. // just to get the clear data length
  191. clearData, err = util.DecompressData(data)
  192. if err == nil {
  193. clearDataLen = len(clearData)
  194. }
  195. }
  196. if option.Cipher {
  197. // encrypt(gzip(data))
  198. // encrypt
  199. cipherKey := util.GenCipherKey()
  200. encryptedData, encryptionErr := util.Encrypt(clearData, cipherKey)
  201. if encryptionErr != nil {
  202. err = fmt.Errorf("encrypt input: %v", encryptionErr)
  203. return
  204. }
  205. // upload data
  206. uploadResult, err = upload_content(func(w io.Writer) (err error) {
  207. _, err = w.Write(encryptedData)
  208. return
  209. }, len(encryptedData), &UploadOption{
  210. UploadUrl: option.UploadUrl,
  211. Filename: "",
  212. Cipher: false,
  213. IsInputCompressed: false,
  214. MimeType: "",
  215. PairMap: nil,
  216. Jwt: option.Jwt,
  217. })
  218. if uploadResult == nil {
  219. return
  220. }
  221. uploadResult.Name = option.Filename
  222. uploadResult.Mime = option.MimeType
  223. uploadResult.CipherKey = cipherKey
  224. uploadResult.Size = uint32(clearDataLen)
  225. } else {
  226. // upload data
  227. uploadResult, err = upload_content(func(w io.Writer) (err error) {
  228. _, err = w.Write(data)
  229. return
  230. }, len(data), &UploadOption{
  231. UploadUrl: option.UploadUrl,
  232. Filename: option.Filename,
  233. Cipher: false,
  234. IsInputCompressed: contentIsGzipped,
  235. MimeType: option.MimeType,
  236. PairMap: option.PairMap,
  237. Jwt: option.Jwt,
  238. Md5: option.Md5,
  239. BytesBuffer: option.BytesBuffer,
  240. })
  241. if uploadResult == nil {
  242. return
  243. }
  244. uploadResult.Size = uint32(clearDataLen)
  245. if contentIsGzipped {
  246. uploadResult.Gzip = 1
  247. }
  248. }
  249. return uploadResult, err
  250. }
  251. func upload_content(fillBufferFunction func(w io.Writer) error, originalDataSize int, option *UploadOption) (*UploadResult, error) {
  252. var body_writer *multipart.Writer
  253. var reqReader *bytes.Reader
  254. var buf *bytebufferpool.ByteBuffer
  255. if option.BytesBuffer == nil {
  256. buf = GetBuffer()
  257. defer PutBuffer(buf)
  258. body_writer = multipart.NewWriter(buf)
  259. } else {
  260. option.BytesBuffer.Reset()
  261. body_writer = multipart.NewWriter(option.BytesBuffer)
  262. }
  263. h := make(textproto.MIMEHeader)
  264. filename := fileNameEscaper.Replace(option.Filename)
  265. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, filename))
  266. h.Set("Idempotency-Key", option.UploadUrl)
  267. if option.MimeType == "" {
  268. option.MimeType = mime.TypeByExtension(strings.ToLower(filepath.Ext(option.Filename)))
  269. }
  270. if option.MimeType != "" {
  271. h.Set("Content-Type", option.MimeType)
  272. }
  273. if option.IsInputCompressed {
  274. h.Set("Content-Encoding", "gzip")
  275. }
  276. if option.Md5 != "" {
  277. h.Set("Content-MD5", option.Md5)
  278. }
  279. file_writer, cp_err := body_writer.CreatePart(h)
  280. if cp_err != nil {
  281. glog.V(0).Infoln("error creating form file", cp_err.Error())
  282. return nil, cp_err
  283. }
  284. if err := fillBufferFunction(file_writer); err != nil {
  285. glog.V(0).Infoln("error copying data", err)
  286. return nil, err
  287. }
  288. content_type := body_writer.FormDataContentType()
  289. if err := body_writer.Close(); err != nil {
  290. glog.V(0).Infoln("error closing body", err)
  291. return nil, err
  292. }
  293. if option.BytesBuffer == nil {
  294. reqReader = bytes.NewReader(buf.Bytes())
  295. } else {
  296. reqReader = bytes.NewReader(option.BytesBuffer.Bytes())
  297. }
  298. req, postErr := http.NewRequest("POST", option.UploadUrl, reqReader)
  299. if postErr != nil {
  300. glog.V(1).Infof("create upload request %s: %v", option.UploadUrl, postErr)
  301. return nil, fmt.Errorf("create upload request %s: %v", option.UploadUrl, postErr)
  302. }
  303. req.Header.Set("Content-Type", content_type)
  304. for k, v := range option.PairMap {
  305. req.Header.Set(k, v)
  306. }
  307. if option.Jwt != "" {
  308. req.Header.Set("Authorization", "BEARER "+string(option.Jwt))
  309. }
  310. // print("+")
  311. resp, post_err := HttpClient.Do(req)
  312. defer util.CloseResponse(resp)
  313. if post_err != nil {
  314. if strings.Contains(post_err.Error(), "connection reset by peer") ||
  315. strings.Contains(post_err.Error(), "use of closed network connection") {
  316. glog.V(1).Infof("repeat error upload request %s: %v", option.UploadUrl, postErr)
  317. stats.FilerHandlerCounter.WithLabelValues(stats.RepeatErrorUploadContent).Inc()
  318. resp, post_err = HttpClient.Do(req)
  319. defer util.CloseResponse(resp)
  320. }
  321. }
  322. if post_err != nil {
  323. return nil, fmt.Errorf("upload %s %d bytes to %v: %v", option.Filename, originalDataSize, option.UploadUrl, post_err)
  324. }
  325. // print("-")
  326. var ret UploadResult
  327. etag := getEtag(resp)
  328. if resp.StatusCode == http.StatusNoContent {
  329. ret.ETag = etag
  330. return &ret, nil
  331. }
  332. resp_body, ra_err := io.ReadAll(resp.Body)
  333. if ra_err != nil {
  334. return nil, fmt.Errorf("read response body %v: %v", option.UploadUrl, ra_err)
  335. }
  336. unmarshal_err := json.Unmarshal(resp_body, &ret)
  337. if unmarshal_err != nil {
  338. glog.Errorf("unmarshal %s: %v", option.UploadUrl, string(resp_body))
  339. return nil, fmt.Errorf("unmarshal %v: %v", option.UploadUrl, unmarshal_err)
  340. }
  341. if ret.Error != "" {
  342. return nil, fmt.Errorf("unmarshalled error %v: %v", option.UploadUrl, ret.Error)
  343. }
  344. ret.ETag = etag
  345. ret.ContentMd5 = resp.Header.Get("Content-MD5")
  346. return &ret, nil
  347. }
  348. func getEtag(r *http.Response) (etag string) {
  349. etag = r.Header.Get("ETag")
  350. if strings.HasPrefix(etag, "\"") && strings.HasSuffix(etag, "\"") {
  351. etag = etag[1 : len(etag)-1]
  352. }
  353. return
  354. }