s3store: Use queue for parallel uploads
This commit is contained in:
parent
86a329cef2
commit
29b59a2c90
|
@ -159,6 +159,8 @@ type S3Store struct {
|
|||
// CPU, so it might be desirable to disable them.
|
||||
// Note that this property is experimental and might be removed in the future!
|
||||
DisableContentHashes bool
|
||||
|
||||
uploadQueue *s3UploadQueue
|
||||
}
|
||||
|
||||
type S3API interface {
|
||||
|
@ -177,15 +179,16 @@ type S3API interface {
|
|||
// New constructs a new storage using the supplied bucket and service object.
|
||||
func New(bucket string, service S3API) S3Store {
|
||||
return S3Store{
|
||||
Bucket: bucket,
|
||||
Service: service,
|
||||
MaxPartSize: 5 * 1024 * 1024 * 1024,
|
||||
MinPartSize: 5 * 1024 * 1024,
|
||||
PreferredPartSize: 50 * 1024 * 1024,
|
||||
MaxMultipartParts: 10000,
|
||||
MaxObjectSize: 5 * 1024 * 1024 * 1024 * 1024,
|
||||
MaxBufferedParts: 20,
|
||||
TemporaryDirectory: "",
|
||||
Bucket: bucket,
|
||||
Service: service,
|
||||
MaxPartSize: 5 * 1024 * 1024 * 1024,
|
||||
MinPartSize: 5 * 1024 * 1024,
|
||||
PreferredPartSize: 50 * 1024 * 1024,
|
||||
MaxMultipartParts: 10000,
|
||||
MaxObjectSize: 5 * 1024 * 1024 * 1024 * 1024,
|
||||
MaxBufferedParts: 20,
|
||||
ConcurrentPartUploads: 8,
|
||||
TemporaryDirectory: "",
|
||||
}
|
||||
|
||||
// TODO: Start goroutine
|
||||
|
@ -328,6 +331,10 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
|
|||
return 0, err
|
||||
}
|
||||
|
||||
if store.uploadQueue == nil {
|
||||
store.uploadQueue = newS3UploadQueue(store.Service, store.ConcurrentPartUploads, store.MaxBufferedParts, store.DisableContentHashes)
|
||||
}
|
||||
|
||||
numParts := len(parts)
|
||||
nextPartNum := int64(numParts + 1)
|
||||
|
||||
|
@ -357,6 +364,27 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
|
|||
}
|
||||
}()
|
||||
|
||||
resultChannel := make(chan *s3UploadJob)
|
||||
var wg sync.WaitGroup
|
||||
var uploadErr error
|
||||
defer close(resultChannel)
|
||||
go func() {
|
||||
for {
|
||||
job, more := <-resultChannel
|
||||
if !more {
|
||||
break
|
||||
}
|
||||
|
||||
wg.Done()
|
||||
fmt.Println("Job completed", *job.uploadPartInput.PartNumber, job.err)
|
||||
if job.err != nil {
|
||||
uploadErr = job.err
|
||||
} else {
|
||||
bytesUploaded += job.size
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
partProducer := s3PartProducer{
|
||||
store: store,
|
||||
done: doneChan,
|
||||
|
@ -380,10 +408,27 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
|
|||
UploadId: aws.String(multipartId),
|
||||
PartNumber: aws.Int64(nextPartNum),
|
||||
}
|
||||
if err := upload.putPartForUpload(ctx, uploadPartInput, file, n); err != nil {
|
||||
return bytesUploaded, err
|
||||
}
|
||||
store.uploadQueue.push(&s3UploadJob{
|
||||
ctx: ctx,
|
||||
uploadPartInput: uploadPartInput,
|
||||
file: file,
|
||||
size: n,
|
||||
resultChannel: resultChannel,
|
||||
})
|
||||
wg.Add(1)
|
||||
//if err := upload.putPartForUpload(ctx, uploadPartInput, file, n); err != nil {
|
||||
// return bytesUploaded, err
|
||||
//}
|
||||
} else {
|
||||
wg.Wait()
|
||||
|
||||
if uploadErr != nil {
|
||||
cleanUpTempFile(file)
|
||||
return bytesUploaded, uploadErr
|
||||
}
|
||||
|
||||
fmt.Println("putIncompletePartForUpload", n >= store.MinPartSize, isFinalChunk)
|
||||
|
||||
if err := store.putIncompletePartForUpload(ctx, uploadId, file); err != nil {
|
||||
return bytesUploaded, err
|
||||
}
|
||||
|
@ -394,10 +439,11 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
|
|||
}
|
||||
|
||||
offset += n
|
||||
bytesUploaded += n
|
||||
nextPartNum += 1
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
return bytesUploaded - incompletePartSize, partProducer.err
|
||||
}
|
||||
|
||||
|
|
|
@ -726,34 +726,34 @@ func TestWriteChunk(t *testing.T) {
|
|||
Key: aws.String("uploadId.part"),
|
||||
}).Return(&s3.GetObjectOutput{}, awserr.New("NoSuchKey", "The specified key does not exist.", nil))
|
||||
|
||||
gomock.InOrder(
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(3),
|
||||
Body: bytes.NewReader([]byte("1234")),
|
||||
})).Return(nil, nil),
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(4),
|
||||
Body: bytes.NewReader([]byte("5678")),
|
||||
})).Return(nil, nil),
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(5),
|
||||
Body: bytes.NewReader([]byte("90AB")),
|
||||
})).Return(nil, nil),
|
||||
s3obj.EXPECT().PutObjectWithContext(context.Background(), NewPutObjectInputMatcher(&s3.PutObjectInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId.part"),
|
||||
Body: bytes.NewReader([]byte("CD")),
|
||||
})).Return(nil, nil),
|
||||
)
|
||||
//gomock.InOrder(
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(3),
|
||||
Body: bytes.NewReader([]byte("1234")),
|
||||
})).Return(nil, nil)
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(4),
|
||||
Body: bytes.NewReader([]byte("5678")),
|
||||
})).Return(nil, nil)
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(5),
|
||||
Body: bytes.NewReader([]byte("90AB")),
|
||||
})).Return(nil, nil)
|
||||
s3obj.EXPECT().PutObjectWithContext(context.Background(), NewPutObjectInputMatcher(&s3.PutObjectInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId.part"),
|
||||
Body: bytes.NewReader([]byte("CD")),
|
||||
})).Return(nil, nil)
|
||||
//)
|
||||
|
||||
upload, err := store.GetUpload(context.Background(), "uploadId+multipartId")
|
||||
assert.Nil(err)
|
||||
|
@ -861,22 +861,22 @@ func TestWriteChunkPrependsIncompletePart(t *testing.T) {
|
|||
PartNumberMarker: aws.Int64(0),
|
||||
}).Return(&s3.ListPartsOutput{Parts: []*s3.Part{}}, nil).Times(2)
|
||||
|
||||
gomock.InOrder(
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(1),
|
||||
Body: bytes.NewReader([]byte("1234")),
|
||||
})).Return(nil, nil),
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(2),
|
||||
Body: bytes.NewReader([]byte("5")),
|
||||
})).Return(nil, nil),
|
||||
)
|
||||
//gomock.InOrder(
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(1),
|
||||
Body: bytes.NewReader([]byte("1234")),
|
||||
})).Return(nil, nil)
|
||||
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
|
||||
Bucket: aws.String("bucket"),
|
||||
Key: aws.String("uploadId"),
|
||||
UploadId: aws.String("multipartId"),
|
||||
PartNumber: aws.Int64(2),
|
||||
Body: bytes.NewReader([]byte("5")),
|
||||
})).Return(nil, nil)
|
||||
//)
|
||||
|
||||
upload, err := store.GetUpload(context.Background(), "uploadId+multipartId")
|
||||
assert.Nil(err)
|
||||
|
@ -1267,6 +1267,7 @@ type s3APIWithTempFileAssertion struct {
|
|||
}
|
||||
|
||||
func (s s3APIWithTempFileAssertion) UploadPartWithContext(context.Context, *s3.UploadPartInput, ...request.Option) (*s3.UploadPartOutput, error) {
|
||||
fmt.Println("Helloz")
|
||||
assert := s.assert
|
||||
|
||||
// Make sure that only the two temporary files from tusd are in here.
|
||||
|
|
|
@ -44,7 +44,7 @@ func newS3UploadQueue(service S3API, concurrency int64, maxBufferedParts int64,
|
|||
disableContentHashes: disableContentHashes,
|
||||
}
|
||||
|
||||
for i := 0; i < concurrency; i++ {
|
||||
for i := 0; i < int(concurrency); i++ {
|
||||
go s.uploadLoop()
|
||||
}
|
||||
|
||||
|
@ -58,6 +58,7 @@ func (s s3UploadQueue) queueLength() int {
|
|||
|
||||
// push appends another item to the queue and returns immediately.
|
||||
func (s s3UploadQueue) push(job *s3UploadJob) {
|
||||
// TODO: Handle closed channel
|
||||
s.queue <- job
|
||||
}
|
||||
|
||||
|
@ -75,6 +76,7 @@ func (s s3UploadQueue) uploadLoop() {
|
|||
}
|
||||
|
||||
job.etag, job.err = s.putPartForUpload(job.ctx, job.uploadPartInput, job.file, job.size)
|
||||
// TODO: Handle closed channel
|
||||
job.resultChannel <- job
|
||||
}
|
||||
}
|
||||
|
@ -82,23 +84,24 @@ func (s s3UploadQueue) uploadLoop() {
|
|||
func (s s3UploadQueue) putPartForUpload(ctx context.Context, uploadPartInput *s3.UploadPartInput, file *os.File, size int64) (etag string, err error) {
|
||||
// TODO: Move this back into s3store where the file is created
|
||||
defer cleanUpTempFile(file)
|
||||
fmt.Println("Job started", *uploadPartInput.PartNumber)
|
||||
|
||||
if !s.disableContentHashes {
|
||||
// By default, use the traditional approach to upload data
|
||||
uploadPartInput.Body = file
|
||||
res, err := s.service.UploadPartWithContext(ctx, uploadPartInput)
|
||||
if res.ETag != nil {
|
||||
etag = *res.ETag
|
||||
}
|
||||
_, err = s.service.UploadPartWithContext(ctx, uploadPartInput)
|
||||
//if res.ETag != nil {
|
||||
//etag = *res.ETag
|
||||
//}
|
||||
return etag, err
|
||||
} else {
|
||||
// Experimental feature to prevent the AWS SDK from calculating the SHA256 hash
|
||||
// for the parts we upload to S3.
|
||||
// We compute the presigned URL without the body attached and then send the request
|
||||
// on our own. This way, the body is not included in the SHA256 calculation.
|
||||
s3api, ok := s.service.Service.(s3APIForPresigning)
|
||||
s3api, ok := s.service.(s3APIForPresigning)
|
||||
if !ok {
|
||||
return fmt.Errorf("s3store: failed to cast S3 service for presigning")
|
||||
return "", fmt.Errorf("s3store: failed to cast S3 service for presigning")
|
||||
}
|
||||
|
||||
s3Req, _ := s3api.UploadPartRequest(uploadPartInput)
|
||||
|
|
Loading…
Reference in New Issue