Improve the s3 uploader for better performance.

Signed-off-by: dkkb <dabkb@aol.com>
This commit is contained in:
dkkb
2021-06-15 16:53:02 +08:00
parent d97bdffd94
commit 3ec5a5aea6
9 changed files with 58 additions and 16 deletions

View File

@@ -43,7 +43,7 @@ type Object struct {
Body io.Reader
}
func (s *FakeS3) Upload(key, fileName string, body io.Reader) error {
func (s *FakeS3) Upload(key, fileName string, body io.Reader, size int) error {
s.Storage[key] = &Object{
Key: key,
FileName: fileName,

View File

@@ -25,7 +25,7 @@ func TestFakeS3(t *testing.T) {
s3 := NewFakeS3()
key := "hello"
fileName := "world"
err := s3.Upload(key, fileName, nil)
err := s3.Upload(key, fileName, nil, 0)
if err != nil {
t.Fatal(err)
}

View File

@@ -16,16 +16,14 @@ limitations under the License.
package s3
import (
"io"
)
import "io"
type Interface interface {
//read the content, caller should close the io.ReadCloser.
Read(key string) ([]byte, error)
// Upload uploads a object to storage and returns object location if succeeded
Upload(key, fileName string, body io.Reader) error
Upload(key, fileName string, body io.Reader, size int) error
GetDownloadURL(key string, fileName string) (string, error)

View File

@@ -19,6 +19,7 @@ package s3
import (
"fmt"
"io"
"math"
"time"
"code.cloudfoundry.org/bytefmt"
@@ -36,10 +37,40 @@ type Client struct {
bucket string
}
func (s *Client) Upload(key, fileName string, body io.Reader) error {
const (
DefaultPartSize = 5 * bytefmt.MEGABYTE
// MinConcurrency is the minimum concurrency when uploading a part to Amazon S3,
// it's also the default value of Concurrency in aws-sdk-go.
MinConcurrency = 5
// MaxConcurrency is the maximum concurrency to limit the goroutines.
MaxConcurrency = 128
)
// calculateConcurrency calculates the concurrency for better performance,
// make the concurrency in range [5, 128].
func calculateConcurrency(size int) int {
if size <= 0 {
return MinConcurrency
}
c := int(math.Ceil(float64(size) / float64(DefaultPartSize)))
if c < MinConcurrency {
return MinConcurrency
} else if c > MaxConcurrency {
return MaxConcurrency
}
return c
}
// Upload use Multipart upload to upload a single object as a set of parts.
// If the data length is known to be large, it is recommended to pass in the data length,
// it will helps to calculate concurrency. Otherwise, `size` can be 0,
// use 5 as default upload concurrency, same as aws-sdk-go.
// See https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html for more details.
func (s *Client) Upload(key, fileName string, body io.Reader, size int) error {
uploader := s3manager.NewUploader(s.s3Session, func(uploader *s3manager.Uploader) {
uploader.PartSize = 5 * bytefmt.MEGABYTE
uploader.PartSize = DefaultPartSize
uploader.LeavePartsOnError = true
uploader.Concurrency = calculateConcurrency(size)
})
_, err := uploader.Upload(&s3manager.UploadInput{
Bucket: aws.String(s.bucket),

View File

@@ -0,0 +1,14 @@
package s3
import (
"testing"
"gotest.tools/assert"
)
func TestCalculateConcurrency(t *testing.T) {
assert.Equal(t, 5, calculateConcurrency(1*1024*1024))
assert.Equal(t, 5, calculateConcurrency(5*1024*1024))
assert.Equal(t, 20, calculateConcurrency(99*1024*1024))
assert.Equal(t, 128, calculateConcurrency(129*5*1024*1024))
}