From 9831c8b4dae2b3e4a31510f4705146db0651f43e Mon Sep 17 00:00:00 2001 From: Marius Date: Sat, 1 Aug 2020 14:58:31 +0200 Subject: [PATCH] s3store: Increase default part size (#410) * s3store: Increase default part size * Update tests and add flag * Fix minor issues --- cmd/tusd/cli/composer.go | 1 + cmd/tusd/cli/flags.go | 2 ++ pkg/s3store/calcpartsize_test.go | 7 +++++++ pkg/s3store/s3store.go | 19 +++++++++++++------ pkg/s3store/s3store_test.go | 5 +++++ 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/cmd/tusd/cli/composer.go b/cmd/tusd/cli/composer.go index 606e2f2..6b0239a 100644 --- a/cmd/tusd/cli/composer.go +++ b/cmd/tusd/cli/composer.go @@ -38,6 +38,7 @@ func CreateComposer() { // as per https://github.com/aws/aws-sdk-go#configuring-credentials store := s3store.New(Flags.S3Bucket, s3.New(session.Must(session.NewSession()), s3Config)) store.ObjectPrefix = Flags.S3ObjectPrefix + store.PreferredPartSize = Flags.S3PartSize store.UseIn(Composer) locker := memorylocker.New() diff --git a/cmd/tusd/cli/flags.go b/cmd/tusd/cli/flags.go index 2f47b65..1e0ae91 100644 --- a/cmd/tusd/cli/flags.go +++ b/cmd/tusd/cli/flags.go @@ -19,6 +19,7 @@ var Flags struct { S3Bucket string S3ObjectPrefix string S3Endpoint string + S3PartSize int64 GCSBucket string GCSObjectPrefix string EnabledHooksString string @@ -51,6 +52,7 @@ func ParseFlags() { flag.StringVar(&Flags.S3Bucket, "s3-bucket", "", "Use AWS S3 with this bucket as storage backend (requires the AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_REGION environment variables to be set)") flag.StringVar(&Flags.S3ObjectPrefix, "s3-object-prefix", "", "Prefix for S3 object names") flag.StringVar(&Flags.S3Endpoint, "s3-endpoint", "", "Endpoint to use S3 compatible implementations like minio (requires s3-bucket to be pass)") + flag.Int64Var(&Flags.S3PartSize, "s3-part-size", 50*1024*1024, "Size in bytes of the individual upload requests made to the S3 API. Defaults to 50MiB (experimental and may be removed in the future)") flag.StringVar(&Flags.GCSBucket, "gcs-bucket", "", "Use Google Cloud Storage with this bucket as storage backend (requires the GCS_SERVICE_ACCOUNT_FILE environment variable to be set)") flag.StringVar(&Flags.GCSObjectPrefix, "gcs-object-prefix", "", "Prefix for GCS object names (can't contain underscore character)") flag.StringVar(&Flags.EnabledHooksString, "hooks-enabled-events", "pre-create,post-create,post-receive,post-terminate,post-finish", "Comma separated list of enabled hook events (e.g. post-create,post-finish). Leave empty to enable default events") diff --git a/pkg/s3store/calcpartsize_test.go b/pkg/s3store/calcpartsize_test.go index 4b0f9d7..f2ed509 100644 --- a/pkg/s3store/calcpartsize_test.go +++ b/pkg/s3store/calcpartsize_test.go @@ -46,6 +46,7 @@ func TestCalcOptimalPartSize(t *testing.T) { /* store.MinPartSize = 2 store.MaxPartSize = 10 + store.PreferredPartSize = 5 store.MaxMultipartParts = 20 store.MaxObjectSize = 200 */ @@ -67,6 +68,11 @@ func TestCalcOptimalPartSize(t *testing.T) { testcases := []int64{ 0, 1, + + store.PreferredPartSize - 1, + store.PreferredPartSize, + store.PreferredPartSize + 1, + store.MinPartSize - 1, store.MinPartSize, store.MinPartSize + 1, @@ -136,6 +142,7 @@ func TestCalcOptimalPartSize_AllUploadSizes(t *testing.T) { store.MinPartSize = 5 store.MaxPartSize = 5 * 1024 + store.PreferredPartSize = 10 store.MaxMultipartParts = 1000 store.MaxObjectSize = store.MaxPartSize * store.MaxMultipartParts diff --git a/pkg/s3store/s3store.go b/pkg/s3store/s3store.go index 3c6b583..e83b81f 100644 --- a/pkg/s3store/s3store.go +++ b/pkg/s3store/s3store.go @@ -128,6 +128,12 @@ type S3Store struct { // in bytes. This number needs to match with the underlying S3 backend or else // uploaded parts will be reject. AWS S3, for example, uses 5MB for this value. MinPartSize int64 + // PreferredPartSize specifies the preferred size of a single part uploaded to + // S3. S3Store will attempt to slice the incoming data into parts with this + // size whenever possible. In some cases, smaller parts are necessary, so + // not every part may reach this value. The PreferredPartSize must be inside the + // range of MinPartSize to MaxPartSize. + PreferredPartSize int64 // MaxMultipartParts is the maximum number of parts an S3 multipart upload is // allowed to have according to AWS S3 API specifications. // See: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html @@ -166,6 +172,7 @@ func New(bucket string, service S3API) S3Store { Service: service, MaxPartSize: 5 * 1024 * 1024 * 1024, MinPartSize: 5 * 1024 * 1024, + PreferredPartSize: 50 * 1024 * 1024, MaxMultipartParts: 10000, MaxObjectSize: 5 * 1024 * 1024 * 1024 * 1024, MaxBufferedParts: 20, @@ -948,12 +955,12 @@ func isAwsError(err error, code string) bool { func (store S3Store) calcOptimalPartSize(size int64) (optimalPartSize int64, err error) { switch { - // When upload is smaller or equal MinPartSize, we upload in just one part. - case size <= store.MinPartSize: - optimalPartSize = store.MinPartSize - // Does the upload fit in MaxMultipartParts parts or less with MinPartSize. - case size <= store.MinPartSize*store.MaxMultipartParts: - optimalPartSize = store.MinPartSize + // When upload is smaller or equal to PreferredPartSize, we upload in just one part. + case size <= store.PreferredPartSize: + optimalPartSize = store.PreferredPartSize + // Does the upload fit in MaxMultipartParts parts or less with PreferredPartSize. + case size <= store.PreferredPartSize*store.MaxMultipartParts: + optimalPartSize = store.PreferredPartSize // Prerequisite: Be aware, that the result of an integer division (x/y) is // ALWAYS rounded DOWN, as there are no digits behind the comma. // In order to find out, whether we have an exact result or a rounded down diff --git a/pkg/s3store/s3store_test.go b/pkg/s3store/s3store_test.go index a30c4b5..002923d 100644 --- a/pkg/s3store/s3store_test.go +++ b/pkg/s3store/s3store_test.go @@ -690,6 +690,7 @@ func TestWriteChunk(t *testing.T) { store := New("bucket", s3obj) store.MaxPartSize = 8 store.MinPartSize = 4 + store.PreferredPartSize = 4 store.MaxMultipartParts = 10000 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 @@ -772,6 +773,7 @@ func TestWriteChunkWithUnexpectedEOF(t *testing.T) { store := New("bucket", s3obj) store.MaxPartSize = 500 store.MinPartSize = 100 + store.PreferredPartSize = 100 store.MaxMultipartParts = 10000 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 @@ -888,6 +890,7 @@ func TestWriteChunkPrependsIncompletePart(t *testing.T) { store := New("bucket", s3obj) store.MaxPartSize = 8 store.MinPartSize = 4 + store.PreferredPartSize = 4 store.MaxMultipartParts = 10000 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 @@ -956,6 +959,7 @@ func TestWriteChunkPrependsIncompletePartAndWritesANewIncompletePart(t *testing. store := New("bucket", s3obj) store.MaxPartSize = 8 store.MinPartSize = 4 + store.PreferredPartSize = 4 store.MaxMultipartParts = 10000 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 @@ -1366,6 +1370,7 @@ func TestWriteChunkCleansUpTempFiles(t *testing.T) { store := New("bucket", s3api) store.MaxPartSize = 10 store.MinPartSize = 10 + store.PreferredPartSize = 10 store.MaxMultipartParts = 10000 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 store.TemporaryDirectory = tempDir