update dependencies

Signed-off-by: hongming <talonwan@yunify.com>
This commit is contained in:
hongming
2020-12-22 16:48:26 +08:00
parent 4a11a50544
commit fe6c5de00f
2857 changed files with 252134 additions and 115656 deletions

View File

@@ -0,0 +1,81 @@
package s3manager
import (
"io"
"github.com/aws/aws-sdk-go/internal/sdkio"
)
// BufferedReadSeeker is buffered io.ReadSeeker
type BufferedReadSeeker struct {
r io.ReadSeeker
buffer []byte
readIdx, writeIdx int
}
// NewBufferedReadSeeker returns a new BufferedReadSeeker
// if len(b) == 0 then the buffer will be initialized to 64 KiB.
func NewBufferedReadSeeker(r io.ReadSeeker, b []byte) *BufferedReadSeeker {
if len(b) == 0 {
b = make([]byte, 64*1024)
}
return &BufferedReadSeeker{r: r, buffer: b}
}
func (b *BufferedReadSeeker) reset(r io.ReadSeeker) {
b.r = r
b.readIdx, b.writeIdx = 0, 0
}
// Read will read up len(p) bytes into p and will return
// the number of bytes read and any error that occurred.
// If the len(p) > the buffer size then a single read request
// will be issued to the underlying io.ReadSeeker for len(p) bytes.
// A Read request will at most perform a single Read to the underlying
// io.ReadSeeker, and may return < len(p) if serviced from the buffer.
func (b *BufferedReadSeeker) Read(p []byte) (n int, err error) {
if len(p) == 0 {
return n, err
}
if b.readIdx == b.writeIdx {
if len(p) >= len(b.buffer) {
n, err = b.r.Read(p)
return n, err
}
b.readIdx, b.writeIdx = 0, 0
n, err = b.r.Read(b.buffer)
if n == 0 {
return n, err
}
b.writeIdx += n
}
n = copy(p, b.buffer[b.readIdx:b.writeIdx])
b.readIdx += n
return n, err
}
// Seek will position then underlying io.ReadSeeker to the given offset
// and will clear the buffer.
func (b *BufferedReadSeeker) Seek(offset int64, whence int) (int64, error) {
n, err := b.r.Seek(offset, whence)
b.reset(b.r)
return n, err
}
// ReadAt will read up to len(p) bytes at the given file offset.
// This will result in the buffer being cleared.
func (b *BufferedReadSeeker) ReadAt(p []byte, off int64) (int, error) {
_, err := b.Seek(off, sdkio.SeekStart)
if err != nil {
return 0, err
}
return b.Read(p)
}

View File

@@ -0,0 +1,7 @@
// +build !windows
package s3manager
func defaultUploadBufferProvider() ReadSeekerWriteToProvider {
return nil
}

View File

@@ -0,0 +1,5 @@
package s3manager
func defaultUploadBufferProvider() ReadSeekerWriteToProvider {
return NewBufferedReadSeekerWriteToPool(1024 * 1024)
}

View File

@@ -0,0 +1,7 @@
// +build !windows
package s3manager
func defaultDownloadBufferProvider() WriterReadFromProvider {
return nil
}

View File

@@ -0,0 +1,5 @@
package s3manager
func defaultDownloadBufferProvider() WriterReadFromProvider {
return NewPooledBufferedWriterReadFromProvider(1024 * 1024)
}

View File

@@ -25,13 +25,25 @@ const DefaultDownloadPartSize = 1024 * 1024 * 5
// when using Download().
const DefaultDownloadConcurrency = 5
type errReadingBody struct {
err error
}
func (e *errReadingBody) Error() string {
return fmt.Sprintf("failed to read part body: %v", e.err)
}
func (e *errReadingBody) Unwrap() error {
return e.err
}
// The Downloader structure that calls Download(). It is safe to call Download()
// on this structure for multiple objects and across concurrent goroutines.
// Mutating the Downloader's properties is not safe to be done concurrently.
type Downloader struct {
// The buffer size (in bytes) to use when buffering data into chunks and
// sending them as parts to S3. The minimum allowed part size is 5MB, and
// if this value is set to zero, the DefaultDownloadPartSize value will be used.
// The size (in bytes) to request from S3 for each part.
// The minimum allowed part size is 5MB, and if this value is set to zero,
// the DefaultDownloadPartSize value will be used.
//
// PartSize is ignored if the Range input parameter is provided.
PartSize int64
@@ -50,6 +62,14 @@ type Downloader struct {
// List of request options that will be passed down to individual API
// operation requests made by the downloader.
RequestOptions []request.Option
// Defines the buffer strategy used when downloading a part.
//
// If a WriterReadFromProvider is given the Download manager
// will pass the io.WriterAt of the Download request to the provider
// and will use the returned WriterReadFrom from the provider as the
// destination writer when copying from http response body.
BufferProvider WriterReadFromProvider
}
// WithDownloaderRequestOptions appends to the Downloader's API request options.
@@ -77,10 +97,15 @@ func WithDownloaderRequestOptions(opts ...request.Option) func(*Downloader) {
// d.PartSize = 64 * 1024 * 1024 // 64MB per part
// })
func NewDownloader(c client.ConfigProvider, options ...func(*Downloader)) *Downloader {
return newDownloader(s3.New(c), options...)
}
func newDownloader(client s3iface.S3API, options ...func(*Downloader)) *Downloader {
d := &Downloader{
S3: s3.New(c),
PartSize: DefaultDownloadPartSize,
Concurrency: DefaultDownloadConcurrency,
S3: client,
PartSize: DefaultDownloadPartSize,
Concurrency: DefaultDownloadConcurrency,
BufferProvider: defaultDownloadBufferProvider(),
}
for _, option := range options {
option(d)
@@ -99,7 +124,7 @@ func NewDownloader(c client.ConfigProvider, options ...func(*Downloader)) *Downl
// sess := session.Must(session.NewSession())
//
// // The S3 client the S3 Downloader will use
// s3Svc := s3.new(sess)
// s3Svc := s3.New(sess)
//
// // Create a downloader with the s3 client and default options
// downloader := s3manager.NewDownloaderWithClient(s3Svc)
@@ -109,16 +134,7 @@ func NewDownloader(c client.ConfigProvider, options ...func(*Downloader)) *Downl
// d.PartSize = 64 * 1024 * 1024 // 64MB per part
// })
func NewDownloaderWithClient(svc s3iface.S3API, options ...func(*Downloader)) *Downloader {
d := &Downloader{
S3: svc,
PartSize: DefaultDownloadPartSize,
Concurrency: DefaultDownloadConcurrency,
}
for _, option := range options {
option(d)
}
return d
return newDownloader(svc, options...)
}
type maxRetrier interface {
@@ -405,18 +421,20 @@ func (d *downloader) downloadChunk(chunk dlchunk) error {
var n int64
var err error
for retry := 0; retry <= d.partBodyMaxRetries; retry++ {
var resp *s3.GetObjectOutput
resp, err = d.cfg.S3.GetObjectWithContext(d.ctx, in, d.cfg.RequestOptions...)
if err != nil {
return err
}
d.setTotalBytes(resp) // Set total if not yet set.
n, err = io.Copy(&chunk, resp.Body)
resp.Body.Close()
n, err = d.tryDownloadChunk(in, &chunk)
if err == nil {
break
}
// Check if the returned error is an errReadingBody.
// If err is errReadingBody this indicates that an error
// occurred while copying the http response body.
// If this occurs we unwrap the err to set the underlying error
// and attempt any remaining retries.
if bodyErr, ok := err.(*errReadingBody); ok {
err = bodyErr.Unwrap()
} else {
return err
}
chunk.cur = 0
logMessage(d.cfg.S3, aws.LogDebugWithRequestRetries,
@@ -429,6 +447,28 @@ func (d *downloader) downloadChunk(chunk dlchunk) error {
return err
}
func (d *downloader) tryDownloadChunk(in *s3.GetObjectInput, w io.Writer) (int64, error) {
cleanup := func() {}
if d.cfg.BufferProvider != nil {
w, cleanup = d.cfg.BufferProvider.GetReadFrom(w)
}
defer cleanup()
resp, err := d.cfg.S3.GetObjectWithContext(d.ctx, in, d.cfg.RequestOptions...)
if err != nil {
return 0, err
}
d.setTotalBytes(resp) // Set total if not yet set.
n, err := io.Copy(w, resp.Body)
resp.Body.Close()
if err != nil {
return n, &errReadingBody{err: err}
}
return n, nil
}
func logMessage(svc s3iface.S3API, level aws.LogLevelType, msg string) {
s, ok := svc.(*s3.S3)
if !ok {

View File

@@ -0,0 +1,244 @@
package s3manager
import (
"fmt"
"sync"
"github.com/aws/aws-sdk-go/aws"
)
type byteSlicePool interface {
Get(aws.Context) (*[]byte, error)
Put(*[]byte)
ModifyCapacity(int)
SliceSize() int64
Close()
}
type maxSlicePool struct {
// allocator is defined as a function pointer to allow
// for test cases to instrument custom tracers when allocations
// occur.
allocator sliceAllocator
slices chan *[]byte
allocations chan struct{}
capacityChange chan struct{}
max int
sliceSize int64
mtx sync.RWMutex
}
func newMaxSlicePool(sliceSize int64) *maxSlicePool {
p := &maxSlicePool{sliceSize: sliceSize}
p.allocator = p.newSlice
return p
}
var errZeroCapacity = fmt.Errorf("get called on zero capacity pool")
func (p *maxSlicePool) Get(ctx aws.Context) (*[]byte, error) {
// check if context is canceled before attempting to get a slice
// this ensures priority is given to the cancel case first
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
p.mtx.RLock()
for {
select {
case bs, ok := <-p.slices:
p.mtx.RUnlock()
if !ok {
// attempt to get on a zero capacity pool
return nil, errZeroCapacity
}
return bs, nil
case _, ok := <-p.allocations:
p.mtx.RUnlock()
if !ok {
// attempt to get on a zero capacity pool
return nil, errZeroCapacity
}
return p.allocator(), nil
case <-ctx.Done():
p.mtx.RUnlock()
return nil, ctx.Err()
default:
// In the event that there are no slices or allocations available
// This prevents some deadlock situations that can occur around sync.RWMutex
// When a lock request occurs on ModifyCapacity, no new readers are allowed to acquire a read lock.
// By releasing the read lock here and waiting for a notification, we prevent a deadlock situation where
// Get could hold the read lock indefinitely waiting for capacity, ModifyCapacity is waiting for a write lock,
// and a Put is blocked trying to get a read-lock which is blocked by ModifyCapacity.
// Short-circuit if the pool capacity is zero.
if p.max == 0 {
p.mtx.RUnlock()
return nil, errZeroCapacity
}
// Since we will be releasing the read-lock we need to take the reference to the channel.
// Since channels are references we will still get notified if slices are added, or if
// the channel is closed due to a capacity modification. This specifically avoids a data race condition
// where ModifyCapacity both closes a channel and initializes a new one while we don't have a read-lock.
c := p.capacityChange
p.mtx.RUnlock()
select {
case _ = <-c:
p.mtx.RLock()
case <-ctx.Done():
return nil, ctx.Err()
}
}
}
}
func (p *maxSlicePool) Put(bs *[]byte) {
p.mtx.RLock()
defer p.mtx.RUnlock()
if p.max == 0 {
return
}
select {
case p.slices <- bs:
p.notifyCapacity()
default:
// If the new channel when attempting to add the slice then we drop the slice.
// The logic here is to prevent a deadlock situation if channel is already at max capacity.
// Allows us to reap allocations that are returned and are no longer needed.
}
}
func (p *maxSlicePool) ModifyCapacity(delta int) {
if delta == 0 {
return
}
p.mtx.Lock()
defer p.mtx.Unlock()
p.max += delta
if p.max == 0 {
p.empty()
return
}
if p.capacityChange != nil {
close(p.capacityChange)
}
p.capacityChange = make(chan struct{}, p.max)
origAllocations := p.allocations
p.allocations = make(chan struct{}, p.max)
newAllocs := len(origAllocations) + delta
for i := 0; i < newAllocs; i++ {
p.allocations <- struct{}{}
}
if origAllocations != nil {
close(origAllocations)
}
origSlices := p.slices
p.slices = make(chan *[]byte, p.max)
if origSlices == nil {
return
}
close(origSlices)
for bs := range origSlices {
select {
case p.slices <- bs:
default:
// If the new channel blocks while adding slices from the old channel
// then we drop the slice. The logic here is to prevent a deadlock situation
// if the new channel has a smaller capacity then the old.
}
}
}
func (p *maxSlicePool) notifyCapacity() {
select {
case p.capacityChange <- struct{}{}:
default:
// This *shouldn't* happen as the channel is both buffered to the max pool capacity size and is resized
// on capacity modifications. This is just a safety to ensure that a blocking situation can't occur.
}
}
func (p *maxSlicePool) SliceSize() int64 {
return p.sliceSize
}
func (p *maxSlicePool) Close() {
p.mtx.Lock()
defer p.mtx.Unlock()
p.empty()
}
func (p *maxSlicePool) empty() {
p.max = 0
if p.capacityChange != nil {
close(p.capacityChange)
p.capacityChange = nil
}
if p.allocations != nil {
close(p.allocations)
for range p.allocations {
// drain channel
}
p.allocations = nil
}
if p.slices != nil {
close(p.slices)
for range p.slices {
// drain channel
}
p.slices = nil
}
}
func (p *maxSlicePool) newSlice() *[]byte {
bs := make([]byte, p.sliceSize)
return &bs
}
type returnCapacityPoolCloser struct {
byteSlicePool
returnCapacity int
}
func (n *returnCapacityPoolCloser) ModifyCapacity(delta int) {
if delta > 0 {
n.returnCapacity = -1 * delta
}
n.byteSlicePool.ModifyCapacity(delta)
}
func (n *returnCapacityPoolCloser) Close() {
if n.returnCapacity < 0 {
n.byteSlicePool.ModifyCapacity(n.returnCapacity)
}
}
type sliceAllocator func() *[]byte
var newByteSlicePool = func(sliceSize int64) byteSlicePool {
return newMaxSlicePool(sliceSize)
}

View File

@@ -0,0 +1,65 @@
package s3manager
import (
"io"
"sync"
)
// ReadSeekerWriteTo defines an interface implementing io.WriteTo and io.ReadSeeker
type ReadSeekerWriteTo interface {
io.ReadSeeker
io.WriterTo
}
// BufferedReadSeekerWriteTo wraps a BufferedReadSeeker with an io.WriteAt
// implementation.
type BufferedReadSeekerWriteTo struct {
*BufferedReadSeeker
}
// WriteTo writes to the given io.Writer from BufferedReadSeeker until there's no more data to write or
// an error occurs. Returns the number of bytes written and any error encountered during the write.
func (b *BufferedReadSeekerWriteTo) WriteTo(writer io.Writer) (int64, error) {
return io.Copy(writer, b.BufferedReadSeeker)
}
// ReadSeekerWriteToProvider provides an implementation of io.WriteTo for an io.ReadSeeker
type ReadSeekerWriteToProvider interface {
GetWriteTo(seeker io.ReadSeeker) (r ReadSeekerWriteTo, cleanup func())
}
// BufferedReadSeekerWriteToPool uses a sync.Pool to create and reuse
// []byte slices for buffering parts in memory
type BufferedReadSeekerWriteToPool struct {
pool sync.Pool
}
// NewBufferedReadSeekerWriteToPool will return a new BufferedReadSeekerWriteToPool that will create
// a pool of reusable buffers . If size is less then < 64 KiB then the buffer
// will default to 64 KiB. Reason: io.Copy from writers or readers that don't support io.WriteTo or io.ReadFrom
// respectively will default to copying 32 KiB.
func NewBufferedReadSeekerWriteToPool(size int) *BufferedReadSeekerWriteToPool {
if size < 65536 {
size = 65536
}
return &BufferedReadSeekerWriteToPool{
pool: sync.Pool{New: func() interface{} {
return make([]byte, size)
}},
}
}
// GetWriteTo will wrap the provided io.ReadSeeker with a BufferedReadSeekerWriteTo.
// The provided cleanup must be called after operations have been completed on the
// returned io.ReadSeekerWriteTo in order to signal the return of resources to the pool.
func (p *BufferedReadSeekerWriteToPool) GetWriteTo(seeker io.ReadSeeker) (r ReadSeekerWriteTo, cleanup func()) {
buffer := p.pool.Get().([]byte)
r = &BufferedReadSeekerWriteTo{BufferedReadSeeker: NewBufferedReadSeeker(seeker, buffer)}
cleanup = func() {
p.pool.Put(buffer)
}
return r, cleanup
}

View File

@@ -162,6 +162,12 @@ type Uploader struct {
// List of request options that will be passed down to individual API
// operation requests made by the uploader.
RequestOptions []request.Option
// Defines the buffer strategy used when uploading a part
BufferProvider ReadSeekerWriteToProvider
// partPool allows for the re-usage of streaming payload part buffers between upload calls
partPool byteSlicePool
}
// NewUploader creates a new Uploader instance to upload objects to S3. Pass In
@@ -181,18 +187,25 @@ type Uploader struct {
// u.PartSize = 64 * 1024 * 1024 // 64MB per part
// })
func NewUploader(c client.ConfigProvider, options ...func(*Uploader)) *Uploader {
return newUploader(s3.New(c), options...)
}
func newUploader(client s3iface.S3API, options ...func(*Uploader)) *Uploader {
u := &Uploader{
S3: s3.New(c),
S3: client,
PartSize: DefaultUploadPartSize,
Concurrency: DefaultUploadConcurrency,
LeavePartsOnError: false,
MaxUploadParts: MaxUploadParts,
BufferProvider: defaultUploadBufferProvider(),
}
for _, option := range options {
option(u)
}
u.partPool = newByteSlicePool(u.PartSize)
return u
}
@@ -215,19 +228,7 @@ func NewUploader(c client.ConfigProvider, options ...func(*Uploader)) *Uploader
// u.PartSize = 64 * 1024 * 1024 // 64MB per part
// })
func NewUploaderWithClient(svc s3iface.S3API, options ...func(*Uploader)) *Uploader {
u := &Uploader{
S3: svc,
PartSize: DefaultUploadPartSize,
Concurrency: DefaultUploadConcurrency,
LeavePartsOnError: false,
MaxUploadParts: MaxUploadParts,
}
for _, option := range options {
option(u)
}
return u
return newUploader(svc, options...)
}
// Upload uploads an object to S3, intelligently buffering large files into
@@ -287,6 +288,7 @@ func (u Uploader) UploadWithContext(ctx aws.Context, input *UploadInput, opts ..
for _, opt := range opts {
opt(&i.cfg)
}
i.cfg.RequestOptions = append(i.cfg.RequestOptions, request.WithAppendUserAgent("S3Manager"))
return i.upload()
@@ -356,8 +358,6 @@ type uploader struct {
readerPos int64 // current reader position
totalSize int64 // set to -1 if the size is not known
bufferPool sync.Pool
}
// internal logic for deciding whether to upload a single part or use a
@@ -366,6 +366,7 @@ func (u *uploader) upload() (*UploadOutput, error) {
if err := u.init(); err != nil {
return nil, awserr.New("ReadRequestBody", "unable to initialize upload", err)
}
defer u.cfg.partPool.Close()
if u.cfg.PartSize < MinUploadPartSize {
msg := fmt.Sprintf("part size must be at least %d bytes", MinUploadPartSize)
@@ -373,15 +374,16 @@ func (u *uploader) upload() (*UploadOutput, error) {
}
// Do one read to determine if we have more than one part
reader, _, part, err := u.nextReader()
reader, _, cleanup, err := u.nextReader()
if err == io.EOF { // single part
return u.singlePart(reader)
return u.singlePart(reader, cleanup)
} else if err != nil {
cleanup()
return nil, awserr.New("ReadRequestBody", "read upload data failed", err)
}
mu := multiuploader{uploader: u}
return mu.upload(reader, part)
return mu.upload(reader, cleanup)
}
// init will initialize all default options.
@@ -396,12 +398,23 @@ func (u *uploader) init() error {
u.cfg.MaxUploadParts = MaxUploadParts
}
u.bufferPool = sync.Pool{
New: func() interface{} { return make([]byte, u.cfg.PartSize) },
// Try to get the total size for some optimizations
if err := u.initSize(); err != nil {
return err
}
// Try to get the total size for some optimizations
return u.initSize()
// If PartSize was changed or partPool was never setup then we need to allocated a new pool
// so that we return []byte slices of the correct size
poolCap := u.cfg.Concurrency + 1
if u.cfg.partPool == nil || u.cfg.partPool.SliceSize() != u.cfg.PartSize {
u.cfg.partPool = newByteSlicePool(u.cfg.PartSize)
u.cfg.partPool.ModifyCapacity(poolCap)
} else {
u.cfg.partPool = &returnCapacityPoolCloser{byteSlicePool: u.cfg.partPool}
u.cfg.partPool.ModifyCapacity(poolCap)
}
return nil
}
// initSize tries to detect the total stream size, setting u.totalSize. If
@@ -433,11 +446,7 @@ func (u *uploader) initSize() error {
// This operation increases the shared u.readerPos counter, but note that it
// does not need to be wrapped in a mutex because nextReader is only called
// from the main thread.
func (u *uploader) nextReader() (io.ReadSeeker, int, []byte, error) {
type readerAtSeeker interface {
io.ReaderAt
io.ReadSeeker
}
func (u *uploader) nextReader() (io.ReadSeeker, int, func(), error) {
switch r := u.in.Body.(type) {
case readerAtSeeker:
var err error
@@ -452,17 +461,36 @@ func (u *uploader) nextReader() (io.ReadSeeker, int, []byte, error) {
}
}
reader := io.NewSectionReader(r, u.readerPos, n)
var (
reader io.ReadSeeker
cleanup func()
)
reader = io.NewSectionReader(r, u.readerPos, n)
if u.cfg.BufferProvider != nil {
reader, cleanup = u.cfg.BufferProvider.GetWriteTo(reader)
} else {
cleanup = func() {}
}
u.readerPos += n
return reader, int(n), nil, err
return reader, int(n), cleanup, err
default:
part := u.bufferPool.Get().([]byte)
n, err := readFillBuf(r, part)
part, err := u.cfg.partPool.Get(u.ctx)
if err != nil {
return nil, 0, func() {}, err
}
n, err := readFillBuf(r, *part)
u.readerPos += int64(n)
return bytes.NewReader(part[0:n]), n, part, err
cleanup := func() {
u.cfg.partPool.Put(part)
}
return bytes.NewReader((*part)[0:n]), n, cleanup, err
}
}
@@ -479,10 +507,12 @@ func readFillBuf(r io.Reader, b []byte) (offset int, err error) {
// singlePart contains upload logic for uploading a single chunk via
// a regular PutObject request. Multipart requests require at least two
// parts, or at least 5MB of data.
func (u *uploader) singlePart(buf io.ReadSeeker) (*UploadOutput, error) {
func (u *uploader) singlePart(r io.ReadSeeker, cleanup func()) (*UploadOutput, error) {
defer cleanup()
params := &s3.PutObjectInput{}
awsutil.Copy(params, u.in)
params.Body = buf
params.Body = r
// Need to use request form because URL generated in request is
// used in return.
@@ -512,9 +542,9 @@ type multiuploader struct {
// keeps track of a single chunk of data being sent to S3.
type chunk struct {
buf io.ReadSeeker
part []byte
num int64
buf io.ReadSeeker
num int64
cleanup func()
}
// completedParts is a wrapper to make parts sortable by their part number,
@@ -527,13 +557,14 @@ func (a completedParts) Less(i, j int) bool { return *a[i].PartNumber < *a[j].Pa
// upload will perform a multipart upload using the firstBuf buffer containing
// the first chunk of data.
func (u *multiuploader) upload(firstBuf io.ReadSeeker, firstPart []byte) (*UploadOutput, error) {
func (u *multiuploader) upload(firstBuf io.ReadSeeker, cleanup func()) (*UploadOutput, error) {
params := &s3.CreateMultipartUploadInput{}
awsutil.Copy(params, u.in)
// Create the multipart
resp, err := u.cfg.S3.CreateMultipartUploadWithContext(u.ctx, params, u.cfg.RequestOptions...)
if err != nil {
cleanup()
return nil, err
}
u.uploadID = *resp.UploadId
@@ -547,46 +578,29 @@ func (u *multiuploader) upload(firstBuf io.ReadSeeker, firstPart []byte) (*Uploa
// Send part 1 to the workers
var num int64 = 1
ch <- chunk{buf: firstBuf, part: firstPart, num: num}
ch <- chunk{buf: firstBuf, num: num, cleanup: cleanup}
// Read and queue the rest of the parts
for u.geterr() == nil && err == nil {
var reader io.ReadSeeker
var nextChunkLen int
var part []byte
reader, nextChunkLen, part, err = u.nextReader()
var (
reader io.ReadSeeker
nextChunkLen int
ok bool
)
if err != nil && err != io.EOF {
u.seterr(awserr.New(
"ReadRequestBody",
"read multipart upload data failed",
err))
break
}
if nextChunkLen == 0 {
// No need to upload empty part, if file was empty to start
// with empty single part would of been created and never
// started multipart upload.
reader, nextChunkLen, cleanup, err = u.nextReader()
ok, err = u.shouldContinue(num, nextChunkLen, err)
if !ok {
cleanup()
if err != nil {
u.seterr(err)
}
break
}
num++
// This upload exceeded maximum number of supported parts, error now.
if num > int64(u.cfg.MaxUploadParts) || num > int64(MaxUploadParts) {
var msg string
if num > int64(u.cfg.MaxUploadParts) {
msg = fmt.Sprintf("exceeded total allowed configured MaxUploadParts (%d). Adjust PartSize to fit in this limit",
u.cfg.MaxUploadParts)
} else {
msg = fmt.Sprintf("exceeded total allowed S3 limit MaxUploadParts (%d). Adjust PartSize to fit in this limit",
MaxUploadParts)
}
u.seterr(awserr.New("TotalPartsExceeded", msg, nil))
break
}
ch <- chunk{buf: reader, part: part, num: num}
ch <- chunk{buf: reader, num: num, cleanup: cleanup}
}
// Close the channel, wait for workers, and complete upload
@@ -611,6 +625,7 @@ func (u *multiuploader) upload(firstBuf io.ReadSeeker, firstPart []byte) (*Uploa
Key: u.in.Key,
})
getReq.Config.Credentials = credentials.AnonymousCredentials
getReq.SetContext(u.ctx)
uploadLocation, _, _ := getReq.PresignRequest(1)
return &UploadOutput{
@@ -620,6 +635,35 @@ func (u *multiuploader) upload(firstBuf io.ReadSeeker, firstPart []byte) (*Uploa
}, nil
}
func (u *multiuploader) shouldContinue(part int64, nextChunkLen int, err error) (bool, error) {
if err != nil && err != io.EOF {
return false, awserr.New("ReadRequestBody", "read multipart upload data failed", err)
}
if nextChunkLen == 0 {
// No need to upload empty part, if file was empty to start
// with empty single part would of been created and never
// started multipart upload.
return false, nil
}
part++
// This upload exceeded maximum number of supported parts, error now.
if part > int64(u.cfg.MaxUploadParts) || part > int64(MaxUploadParts) {
var msg string
if part > int64(u.cfg.MaxUploadParts) {
msg = fmt.Sprintf("exceeded total allowed configured MaxUploadParts (%d). Adjust PartSize to fit in this limit",
u.cfg.MaxUploadParts)
} else {
msg = fmt.Sprintf("exceeded total allowed S3 limit MaxUploadParts (%d). Adjust PartSize to fit in this limit",
MaxUploadParts)
}
return false, awserr.New("TotalPartsExceeded", msg, nil)
}
return true, err
}
// readChunk runs in worker goroutines to pull chunks off of the ch channel
// and send() them as UploadPart requests.
func (u *multiuploader) readChunk(ch chan chunk) {
@@ -636,6 +680,8 @@ func (u *multiuploader) readChunk(ch chan chunk) {
u.seterr(err)
}
}
data.cleanup()
}
}
@@ -651,9 +697,8 @@ func (u *multiuploader) send(c chunk) error {
SSECustomerKey: u.in.SSECustomerKey,
PartNumber: &c.num,
}
resp, err := u.cfg.S3.UploadPartWithContext(u.ctx, params, u.cfg.RequestOptions...)
// put the byte array back into the pool to conserve memory
u.bufferPool.Put(c.part)
if err != nil {
return err
}
@@ -725,3 +770,8 @@ func (u *multiuploader) complete() *s3.CompleteMultipartUploadOutput {
return resp
}
type readerAtSeeker interface {
io.ReaderAt
io.ReadSeeker
}

View File

@@ -12,42 +12,59 @@ import (
// package's PutObjectInput with the exception that the Body member is an
// io.Reader instead of an io.ReadSeeker.
type UploadInput struct {
_ struct{} `type:"structure" payload:"Body"`
_ struct{} `locationName:"PutObjectRequest" type:"structure" payload:"Body"`
// The canned ACL to apply to the object.
// The canned ACL to apply to the object. For more information, see Canned ACL
// (https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#CannedACL).
ACL *string `location:"header" locationName:"x-amz-acl" type:"string" enum:"ObjectCannedACL"`
// The readable body payload to send to S3.
Body io.Reader
// Name of the bucket to which the PUT operation was initiated.
// Bucket name to which the PUT operation was initiated.
//
// When using this API with an access point, you must direct requests to the
// access point hostname. The access point hostname takes the form AccessPointName-AccountId.s3-accesspoint.Region.amazonaws.com.
// When using this operation using an access point through the AWS SDKs, you
// provide the access point ARN in place of the bucket name. For more information
// about access point ARNs, see Using Access Points (https://docs.aws.amazon.com/AmazonS3/latest/dev/using-access-points.html)
// in the Amazon Simple Storage Service Developer Guide.
//
// Bucket is a required field
Bucket *string `location:"uri" locationName:"Bucket" type:"string" required:"true"`
// Specifies caching behavior along the request/reply chain.
// Can be used to specify caching behavior along the request/reply chain. For
// more information, see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9
// (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9).
CacheControl *string `location:"header" locationName:"Cache-Control" type:"string"`
// Specifies presentational information for the object.
// Specifies presentational information for the object. For more information,
// see http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.5.1 (http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.5.1).
ContentDisposition *string `location:"header" locationName:"Content-Disposition" type:"string"`
// Specifies what content encodings have been applied to the object and thus
// what decoding mechanisms must be applied to obtain the media-type referenced
// by the Content-Type header field.
// by the Content-Type header field. For more information, see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.11
// (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.11).
ContentEncoding *string `location:"header" locationName:"Content-Encoding" type:"string"`
// The language the content is in.
ContentLanguage *string `location:"header" locationName:"Content-Language" type:"string"`
// The base64-encoded 128-bit MD5 digest of the part data. This parameter is
// auto-populated when using the command from the CLI. This parameted is required
// if object lock parameters are specified.
// The base64-encoded 128-bit MD5 digest of the message (without the headers)
// according to RFC 1864. This header can be used as a message integrity check
// to verify that the data is the same data that was originally sent. Although
// it is optional, we recommend using the Content-MD5 mechanism as an end-to-end
// integrity check. For more information about REST request authentication,
// see REST Authentication (https://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html).
ContentMD5 *string `location:"header" locationName:"Content-MD5" type:"string"`
// A standard MIME type describing the format of the object data.
// A standard MIME type describing the format of the contents. For more information,
// see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17).
ContentType *string `location:"header" locationName:"Content-Type" type:"string"`
// The date and time at which the object is no longer cacheable.
// The date and time at which the object is no longer cacheable. For more information,
// see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.21 (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.21).
Expires *time.Time `location:"header" locationName:"Expires" type:"timestamp"`
// Gives the grantee READ, READ_ACP, and WRITE_ACP permissions on the object.
@@ -70,34 +87,37 @@ type UploadInput struct {
// A map of metadata to store with the object in S3.
Metadata map[string]*string `location:"headers" locationName:"x-amz-meta-" type:"map"`
// The Legal Hold status that you want to apply to the specified object.
// Specifies whether a legal hold will be applied to this object. For more information
// about S3 Object Lock, see Object Lock (https://docs.aws.amazon.com/AmazonS3/latest/dev/object-lock.html).
ObjectLockLegalHoldStatus *string `location:"header" locationName:"x-amz-object-lock-legal-hold" type:"string" enum:"ObjectLockLegalHoldStatus"`
// The object lock mode that you want to apply to this object.
// The Object Lock mode that you want to apply to this object.
ObjectLockMode *string `location:"header" locationName:"x-amz-object-lock-mode" type:"string" enum:"ObjectLockMode"`
// The date and time when you want this object's object lock to expire.
// The date and time when you want this object's Object Lock to expire.
ObjectLockRetainUntilDate *time.Time `location:"header" locationName:"x-amz-object-lock-retain-until-date" type:"timestamp" timestampFormat:"iso8601"`
// Confirms that the requester knows that she or he will be charged for the
// request. Bucket owners need not specify this parameter in their requests.
// Documentation on downloading objects from requester pays buckets can be found
// at http://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectsinRequesterPaysBuckets.html
// Confirms that the requester knows that they will be charged for the request.
// Bucket owners need not specify this parameter in their requests. For information
// about downloading objects from requester pays buckets, see Downloading Objects
// in Requestor Pays Buckets (https://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectsinRequesterPaysBuckets.html)
// in the Amazon S3 Developer Guide.
RequestPayer *string `location:"header" locationName:"x-amz-request-payer" type:"string" enum:"RequestPayer"`
// Specifies the algorithm to use to when encrypting the object (e.g., AES256).
// Specifies the algorithm to use to when encrypting the object (for example,
// AES256).
SSECustomerAlgorithm *string `location:"header" locationName:"x-amz-server-side-encryption-customer-algorithm" type:"string"`
// Specifies the customer-provided encryption key for Amazon S3 to use in encrypting
// data. This value is used to store the object and then it is discarded; Amazon
// does not store the encryption key. The key must be appropriate for use with
// the algorithm specified in the x-amz-server-side-encryption-customer-algorithm
// S3 does not store the encryption key. The key must be appropriate for use
// with the algorithm specified in the x-amz-server-side-encryption-customer-algorithm
// header.
SSECustomerKey *string `marshal-as:"blob" location:"header" locationName:"x-amz-server-side-encryption-customer-key" type:"string" sensitive:"true"`
// Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321.
// Amazon S3 uses this header for a message integrity check to ensure the encryption
// key was transmitted without error.
// Amazon S3 uses this header for a message integrity check to ensure that the
// encryption key was transmitted without error.
SSECustomerKeyMD5 *string `location:"header" locationName:"x-amz-server-side-encryption-customer-key-MD5" type:"string"`
// Specifies the AWS KMS Encryption Context to use for object encryption. The
@@ -105,17 +125,24 @@ type UploadInput struct {
// encryption context key-value pairs.
SSEKMSEncryptionContext *string `location:"header" locationName:"x-amz-server-side-encryption-context" type:"string" sensitive:"true"`
// Specifies the AWS KMS key ID to use for object encryption. All GET and PUT
// requests for an object protected by AWS KMS will fail if not made via SSL
// or using SigV4. Documentation on configuring any of the officially supported
// AWS SDKs and CLI can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html#specify-signature-version
// If x-amz-server-side-encryption is present and has the value of aws:kms,
// this header specifies the ID of the AWS Key Management Service (AWS KMS)
// symmetrical customer managed customer master key (CMK) that was used for
// the object.
//
// If the value of x-amz-server-side-encryption is aws:kms, this header specifies
// the ID of the symmetric customer managed AWS KMS CMK that will be used for
// the object. If you specify x-amz-server-side-encryption:aws:kms, but do not
// providex-amz-server-side-encryption-aws-kms-key-id, Amazon S3 uses the AWS
// managed CMK in AWS to protect the data.
SSEKMSKeyId *string `location:"header" locationName:"x-amz-server-side-encryption-aws-kms-key-id" type:"string" sensitive:"true"`
// The Server-side encryption algorithm used when storing this object in S3
// (e.g., AES256, aws:kms).
// The server-side encryption algorithm used when storing this object in Amazon
// S3 (for example, AES256, aws:kms).
ServerSideEncryption *string `location:"header" locationName:"x-amz-server-side-encryption" type:"string" enum:"ServerSideEncryption"`
// The type of storage to use for the object. Defaults to 'STANDARD'.
// If you don't specify, Standard is the default storage class. Amazon S3 supports
// other storage classes.
StorageClass *string `location:"header" locationName:"x-amz-storage-class" type:"string" enum:"StorageClass"`
// The tag-set for the object. The tag-set must be encoded as URL Query parameters.
@@ -124,6 +151,21 @@ type UploadInput struct {
// If the bucket is configured as a website, redirects requests for this object
// to another object in the same bucket or to an external URL. Amazon S3 stores
// the value of this header in the object metadata.
// the value of this header in the object metadata. For information about object
// metadata, see Object Key and Metadata (https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html).
//
// In the following example, the request header sets the redirect to an object
// (anotherPage.html) in the same bucket:
//
// x-amz-website-redirect-location: /anotherPage.html
//
// In the following example, the request header sets the object redirect to
// another website:
//
// x-amz-website-redirect-location: http://www.example.com/
//
// For more information about website hosting in Amazon S3, see Hosting Websites
// on Amazon S3 (https://docs.aws.amazon.com/AmazonS3/latest/dev/WebsiteHosting.html)
// and How to Configure Website Page Redirects (https://docs.aws.amazon.com/AmazonS3/latest/dev/how-to-page-redirect.html).
WebsiteRedirectLocation *string `location:"header" locationName:"x-amz-website-redirect-location" type:"string"`
}

View File

@@ -0,0 +1,75 @@
package s3manager
import (
"bufio"
"io"
"sync"
"github.com/aws/aws-sdk-go/internal/sdkio"
)
// WriterReadFrom defines an interface implementing io.Writer and io.ReaderFrom
type WriterReadFrom interface {
io.Writer
io.ReaderFrom
}
// WriterReadFromProvider provides an implementation of io.ReadFrom for the given io.Writer
type WriterReadFromProvider interface {
GetReadFrom(writer io.Writer) (w WriterReadFrom, cleanup func())
}
type bufferedWriter interface {
WriterReadFrom
Flush() error
Reset(io.Writer)
}
type bufferedReadFrom struct {
bufferedWriter
}
func (b *bufferedReadFrom) ReadFrom(r io.Reader) (int64, error) {
n, err := b.bufferedWriter.ReadFrom(r)
if flushErr := b.Flush(); flushErr != nil && err == nil {
err = flushErr
}
return n, err
}
// PooledBufferedReadFromProvider is a WriterReadFromProvider that uses a sync.Pool
// to manage allocation and reuse of *bufio.Writer structures.
type PooledBufferedReadFromProvider struct {
pool sync.Pool
}
// NewPooledBufferedWriterReadFromProvider returns a new PooledBufferedReadFromProvider
// Size is used to control the size of the underlying *bufio.Writer created for
// calls to GetReadFrom.
func NewPooledBufferedWriterReadFromProvider(size int) *PooledBufferedReadFromProvider {
if size < int(32*sdkio.KibiByte) {
size = int(64 * sdkio.KibiByte)
}
return &PooledBufferedReadFromProvider{
pool: sync.Pool{
New: func() interface{} {
return &bufferedReadFrom{bufferedWriter: bufio.NewWriterSize(nil, size)}
},
},
}
}
// GetReadFrom takes an io.Writer and wraps it with a type which satisfies the WriterReadFrom
// interface/ Additionally a cleanup function is provided which must be called after usage of the WriterReadFrom
// has been completed in order to allow the reuse of the *bufio.Writer
func (p *PooledBufferedReadFromProvider) GetReadFrom(writer io.Writer) (r WriterReadFrom, cleanup func()) {
buffer := p.pool.Get().(*bufferedReadFrom)
buffer.Reset(writer)
r = buffer
cleanup = func() {
buffer.Reset(nil) // Reset to nil writer to release reference
p.pool.Put(buffer)
}
return r, cleanup
}