s3store: Use queue for parallel uploads

This commit is contained in:
Marius 2021-04-27 00:34:23 +02:00
parent 86a329cef2
commit 29b59a2c90
3 changed files with 114 additions and 64 deletions

View File

@ -159,6 +159,8 @@ type S3Store struct {
// CPU, so it might be desirable to disable them. // CPU, so it might be desirable to disable them.
// Note that this property is experimental and might be removed in the future! // Note that this property is experimental and might be removed in the future!
DisableContentHashes bool DisableContentHashes bool
uploadQueue *s3UploadQueue
} }
type S3API interface { type S3API interface {
@ -177,15 +179,16 @@ type S3API interface {
// New constructs a new storage using the supplied bucket and service object. // New constructs a new storage using the supplied bucket and service object.
func New(bucket string, service S3API) S3Store { func New(bucket string, service S3API) S3Store {
return S3Store{ return S3Store{
Bucket: bucket, Bucket: bucket,
Service: service, Service: service,
MaxPartSize: 5 * 1024 * 1024 * 1024, MaxPartSize: 5 * 1024 * 1024 * 1024,
MinPartSize: 5 * 1024 * 1024, MinPartSize: 5 * 1024 * 1024,
PreferredPartSize: 50 * 1024 * 1024, PreferredPartSize: 50 * 1024 * 1024,
MaxMultipartParts: 10000, MaxMultipartParts: 10000,
MaxObjectSize: 5 * 1024 * 1024 * 1024 * 1024, MaxObjectSize: 5 * 1024 * 1024 * 1024 * 1024,
MaxBufferedParts: 20, MaxBufferedParts: 20,
TemporaryDirectory: "", ConcurrentPartUploads: 8,
TemporaryDirectory: "",
} }
// TODO: Start goroutine // TODO: Start goroutine
@ -328,6 +331,10 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
return 0, err return 0, err
} }
if store.uploadQueue == nil {
store.uploadQueue = newS3UploadQueue(store.Service, store.ConcurrentPartUploads, store.MaxBufferedParts, store.DisableContentHashes)
}
numParts := len(parts) numParts := len(parts)
nextPartNum := int64(numParts + 1) nextPartNum := int64(numParts + 1)
@ -357,6 +364,27 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
} }
}() }()
resultChannel := make(chan *s3UploadJob)
var wg sync.WaitGroup
var uploadErr error
defer close(resultChannel)
go func() {
for {
job, more := <-resultChannel
if !more {
break
}
wg.Done()
fmt.Println("Job completed", *job.uploadPartInput.PartNumber, job.err)
if job.err != nil {
uploadErr = job.err
} else {
bytesUploaded += job.size
}
}
}()
partProducer := s3PartProducer{ partProducer := s3PartProducer{
store: store, store: store,
done: doneChan, done: doneChan,
@ -380,10 +408,27 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
UploadId: aws.String(multipartId), UploadId: aws.String(multipartId),
PartNumber: aws.Int64(nextPartNum), PartNumber: aws.Int64(nextPartNum),
} }
if err := upload.putPartForUpload(ctx, uploadPartInput, file, n); err != nil { store.uploadQueue.push(&s3UploadJob{
return bytesUploaded, err ctx: ctx,
} uploadPartInput: uploadPartInput,
file: file,
size: n,
resultChannel: resultChannel,
})
wg.Add(1)
//if err := upload.putPartForUpload(ctx, uploadPartInput, file, n); err != nil {
// return bytesUploaded, err
//}
} else { } else {
wg.Wait()
if uploadErr != nil {
cleanUpTempFile(file)
return bytesUploaded, uploadErr
}
fmt.Println("putIncompletePartForUpload", n >= store.MinPartSize, isFinalChunk)
if err := store.putIncompletePartForUpload(ctx, uploadId, file); err != nil { if err := store.putIncompletePartForUpload(ctx, uploadId, file); err != nil {
return bytesUploaded, err return bytesUploaded, err
} }
@ -394,10 +439,11 @@ func (upload s3Upload) WriteChunk(ctx context.Context, offset int64, src io.Read
} }
offset += n offset += n
bytesUploaded += n
nextPartNum += 1 nextPartNum += 1
} }
wg.Wait()
return bytesUploaded - incompletePartSize, partProducer.err return bytesUploaded - incompletePartSize, partProducer.err
} }

View File

@ -726,34 +726,34 @@ func TestWriteChunk(t *testing.T) {
Key: aws.String("uploadId.part"), Key: aws.String("uploadId.part"),
}).Return(&s3.GetObjectOutput{}, awserr.New("NoSuchKey", "The specified key does not exist.", nil)) }).Return(&s3.GetObjectOutput{}, awserr.New("NoSuchKey", "The specified key does not exist.", nil))
gomock.InOrder( //gomock.InOrder(
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{ s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId"), Key: aws.String("uploadId"),
UploadId: aws.String("multipartId"), UploadId: aws.String("multipartId"),
PartNumber: aws.Int64(3), PartNumber: aws.Int64(3),
Body: bytes.NewReader([]byte("1234")), Body: bytes.NewReader([]byte("1234")),
})).Return(nil, nil), })).Return(nil, nil)
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{ s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId"), Key: aws.String("uploadId"),
UploadId: aws.String("multipartId"), UploadId: aws.String("multipartId"),
PartNumber: aws.Int64(4), PartNumber: aws.Int64(4),
Body: bytes.NewReader([]byte("5678")), Body: bytes.NewReader([]byte("5678")),
})).Return(nil, nil), })).Return(nil, nil)
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{ s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId"), Key: aws.String("uploadId"),
UploadId: aws.String("multipartId"), UploadId: aws.String("multipartId"),
PartNumber: aws.Int64(5), PartNumber: aws.Int64(5),
Body: bytes.NewReader([]byte("90AB")), Body: bytes.NewReader([]byte("90AB")),
})).Return(nil, nil), })).Return(nil, nil)
s3obj.EXPECT().PutObjectWithContext(context.Background(), NewPutObjectInputMatcher(&s3.PutObjectInput{ s3obj.EXPECT().PutObjectWithContext(context.Background(), NewPutObjectInputMatcher(&s3.PutObjectInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId.part"), Key: aws.String("uploadId.part"),
Body: bytes.NewReader([]byte("CD")), Body: bytes.NewReader([]byte("CD")),
})).Return(nil, nil), })).Return(nil, nil)
) //)
upload, err := store.GetUpload(context.Background(), "uploadId+multipartId") upload, err := store.GetUpload(context.Background(), "uploadId+multipartId")
assert.Nil(err) assert.Nil(err)
@ -861,22 +861,22 @@ func TestWriteChunkPrependsIncompletePart(t *testing.T) {
PartNumberMarker: aws.Int64(0), PartNumberMarker: aws.Int64(0),
}).Return(&s3.ListPartsOutput{Parts: []*s3.Part{}}, nil).Times(2) }).Return(&s3.ListPartsOutput{Parts: []*s3.Part{}}, nil).Times(2)
gomock.InOrder( //gomock.InOrder(
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{ s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId"), Key: aws.String("uploadId"),
UploadId: aws.String("multipartId"), UploadId: aws.String("multipartId"),
PartNumber: aws.Int64(1), PartNumber: aws.Int64(1),
Body: bytes.NewReader([]byte("1234")), Body: bytes.NewReader([]byte("1234")),
})).Return(nil, nil), })).Return(nil, nil)
s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{ s3obj.EXPECT().UploadPartWithContext(context.Background(), NewUploadPartInputMatcher(&s3.UploadPartInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId"), Key: aws.String("uploadId"),
UploadId: aws.String("multipartId"), UploadId: aws.String("multipartId"),
PartNumber: aws.Int64(2), PartNumber: aws.Int64(2),
Body: bytes.NewReader([]byte("5")), Body: bytes.NewReader([]byte("5")),
})).Return(nil, nil), })).Return(nil, nil)
) //)
upload, err := store.GetUpload(context.Background(), "uploadId+multipartId") upload, err := store.GetUpload(context.Background(), "uploadId+multipartId")
assert.Nil(err) assert.Nil(err)
@ -1267,6 +1267,7 @@ type s3APIWithTempFileAssertion struct {
} }
func (s s3APIWithTempFileAssertion) UploadPartWithContext(context.Context, *s3.UploadPartInput, ...request.Option) (*s3.UploadPartOutput, error) { func (s s3APIWithTempFileAssertion) UploadPartWithContext(context.Context, *s3.UploadPartInput, ...request.Option) (*s3.UploadPartOutput, error) {
fmt.Println("Helloz")
assert := s.assert assert := s.assert
// Make sure that only the two temporary files from tusd are in here. // Make sure that only the two temporary files from tusd are in here.

View File

@ -44,7 +44,7 @@ func newS3UploadQueue(service S3API, concurrency int64, maxBufferedParts int64,
disableContentHashes: disableContentHashes, disableContentHashes: disableContentHashes,
} }
for i := 0; i < concurrency; i++ { for i := 0; i < int(concurrency); i++ {
go s.uploadLoop() go s.uploadLoop()
} }
@ -58,6 +58,7 @@ func (s s3UploadQueue) queueLength() int {
// push appends another item to the queue and returns immediately. // push appends another item to the queue and returns immediately.
func (s s3UploadQueue) push(job *s3UploadJob) { func (s s3UploadQueue) push(job *s3UploadJob) {
// TODO: Handle closed channel
s.queue <- job s.queue <- job
} }
@ -75,6 +76,7 @@ func (s s3UploadQueue) uploadLoop() {
} }
job.etag, job.err = s.putPartForUpload(job.ctx, job.uploadPartInput, job.file, job.size) job.etag, job.err = s.putPartForUpload(job.ctx, job.uploadPartInput, job.file, job.size)
// TODO: Handle closed channel
job.resultChannel <- job job.resultChannel <- job
} }
} }
@ -82,23 +84,24 @@ func (s s3UploadQueue) uploadLoop() {
func (s s3UploadQueue) putPartForUpload(ctx context.Context, uploadPartInput *s3.UploadPartInput, file *os.File, size int64) (etag string, err error) { func (s s3UploadQueue) putPartForUpload(ctx context.Context, uploadPartInput *s3.UploadPartInput, file *os.File, size int64) (etag string, err error) {
// TODO: Move this back into s3store where the file is created // TODO: Move this back into s3store where the file is created
defer cleanUpTempFile(file) defer cleanUpTempFile(file)
fmt.Println("Job started", *uploadPartInput.PartNumber)
if !s.disableContentHashes { if !s.disableContentHashes {
// By default, use the traditional approach to upload data // By default, use the traditional approach to upload data
uploadPartInput.Body = file uploadPartInput.Body = file
res, err := s.service.UploadPartWithContext(ctx, uploadPartInput) _, err = s.service.UploadPartWithContext(ctx, uploadPartInput)
if res.ETag != nil { //if res.ETag != nil {
etag = *res.ETag //etag = *res.ETag
} //}
return etag, err return etag, err
} else { } else {
// Experimental feature to prevent the AWS SDK from calculating the SHA256 hash // Experimental feature to prevent the AWS SDK from calculating the SHA256 hash
// for the parts we upload to S3. // for the parts we upload to S3.
// We compute the presigned URL without the body attached and then send the request // We compute the presigned URL without the body attached and then send the request
// on our own. This way, the body is not included in the SHA256 calculation. // on our own. This way, the body is not included in the SHA256 calculation.
s3api, ok := s.service.Service.(s3APIForPresigning) s3api, ok := s.service.(s3APIForPresigning)
if !ok { if !ok {
return fmt.Errorf("s3store: failed to cast S3 service for presigning") return "", fmt.Errorf("s3store: failed to cast S3 service for presigning")
} }
s3Req, _ := s3api.UploadPartRequest(uploadPartInput) s3Req, _ := s3api.UploadPartRequest(uploadPartInput)