s3store: Increase default part size (#410)

* s3store: Increase default part size

* Update tests and add flag

* Fix minor issues
This commit is contained in:
Marius 2020-08-01 14:58:31 +02:00 committed by GitHub
parent 6662f43d01
commit 9831c8b4da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 6 deletions

View File

@ -38,6 +38,7 @@ func CreateComposer() {
// as per https://github.com/aws/aws-sdk-go#configuring-credentials // as per https://github.com/aws/aws-sdk-go#configuring-credentials
store := s3store.New(Flags.S3Bucket, s3.New(session.Must(session.NewSession()), s3Config)) store := s3store.New(Flags.S3Bucket, s3.New(session.Must(session.NewSession()), s3Config))
store.ObjectPrefix = Flags.S3ObjectPrefix store.ObjectPrefix = Flags.S3ObjectPrefix
store.PreferredPartSize = Flags.S3PartSize
store.UseIn(Composer) store.UseIn(Composer)
locker := memorylocker.New() locker := memorylocker.New()

View File

@ -19,6 +19,7 @@ var Flags struct {
S3Bucket string S3Bucket string
S3ObjectPrefix string S3ObjectPrefix string
S3Endpoint string S3Endpoint string
S3PartSize int64
GCSBucket string GCSBucket string
GCSObjectPrefix string GCSObjectPrefix string
EnabledHooksString string EnabledHooksString string
@ -51,6 +52,7 @@ func ParseFlags() {
flag.StringVar(&Flags.S3Bucket, "s3-bucket", "", "Use AWS S3 with this bucket as storage backend (requires the AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_REGION environment variables to be set)") flag.StringVar(&Flags.S3Bucket, "s3-bucket", "", "Use AWS S3 with this bucket as storage backend (requires the AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_REGION environment variables to be set)")
flag.StringVar(&Flags.S3ObjectPrefix, "s3-object-prefix", "", "Prefix for S3 object names") flag.StringVar(&Flags.S3ObjectPrefix, "s3-object-prefix", "", "Prefix for S3 object names")
flag.StringVar(&Flags.S3Endpoint, "s3-endpoint", "", "Endpoint to use S3 compatible implementations like minio (requires s3-bucket to be pass)") flag.StringVar(&Flags.S3Endpoint, "s3-endpoint", "", "Endpoint to use S3 compatible implementations like minio (requires s3-bucket to be pass)")
flag.Int64Var(&Flags.S3PartSize, "s3-part-size", 50*1024*1024, "Size in bytes of the individual upload requests made to the S3 API. Defaults to 50MiB (experimental and may be removed in the future)")
flag.StringVar(&Flags.GCSBucket, "gcs-bucket", "", "Use Google Cloud Storage with this bucket as storage backend (requires the GCS_SERVICE_ACCOUNT_FILE environment variable to be set)") flag.StringVar(&Flags.GCSBucket, "gcs-bucket", "", "Use Google Cloud Storage with this bucket as storage backend (requires the GCS_SERVICE_ACCOUNT_FILE environment variable to be set)")
flag.StringVar(&Flags.GCSObjectPrefix, "gcs-object-prefix", "", "Prefix for GCS object names (can't contain underscore character)") flag.StringVar(&Flags.GCSObjectPrefix, "gcs-object-prefix", "", "Prefix for GCS object names (can't contain underscore character)")
flag.StringVar(&Flags.EnabledHooksString, "hooks-enabled-events", "pre-create,post-create,post-receive,post-terminate,post-finish", "Comma separated list of enabled hook events (e.g. post-create,post-finish). Leave empty to enable default events") flag.StringVar(&Flags.EnabledHooksString, "hooks-enabled-events", "pre-create,post-create,post-receive,post-terminate,post-finish", "Comma separated list of enabled hook events (e.g. post-create,post-finish). Leave empty to enable default events")

View File

@ -46,6 +46,7 @@ func TestCalcOptimalPartSize(t *testing.T) {
/* /*
store.MinPartSize = 2 store.MinPartSize = 2
store.MaxPartSize = 10 store.MaxPartSize = 10
store.PreferredPartSize = 5
store.MaxMultipartParts = 20 store.MaxMultipartParts = 20
store.MaxObjectSize = 200 store.MaxObjectSize = 200
*/ */
@ -67,6 +68,11 @@ func TestCalcOptimalPartSize(t *testing.T) {
testcases := []int64{ testcases := []int64{
0, 0,
1, 1,
store.PreferredPartSize - 1,
store.PreferredPartSize,
store.PreferredPartSize + 1,
store.MinPartSize - 1, store.MinPartSize - 1,
store.MinPartSize, store.MinPartSize,
store.MinPartSize + 1, store.MinPartSize + 1,
@ -136,6 +142,7 @@ func TestCalcOptimalPartSize_AllUploadSizes(t *testing.T) {
store.MinPartSize = 5 store.MinPartSize = 5
store.MaxPartSize = 5 * 1024 store.MaxPartSize = 5 * 1024
store.PreferredPartSize = 10
store.MaxMultipartParts = 1000 store.MaxMultipartParts = 1000
store.MaxObjectSize = store.MaxPartSize * store.MaxMultipartParts store.MaxObjectSize = store.MaxPartSize * store.MaxMultipartParts

View File

@ -128,6 +128,12 @@ type S3Store struct {
// in bytes. This number needs to match with the underlying S3 backend or else // in bytes. This number needs to match with the underlying S3 backend or else
// uploaded parts will be reject. AWS S3, for example, uses 5MB for this value. // uploaded parts will be reject. AWS S3, for example, uses 5MB for this value.
MinPartSize int64 MinPartSize int64
// PreferredPartSize specifies the preferred size of a single part uploaded to
// S3. S3Store will attempt to slice the incoming data into parts with this
// size whenever possible. In some cases, smaller parts are necessary, so
// not every part may reach this value. The PreferredPartSize must be inside the
// range of MinPartSize to MaxPartSize.
PreferredPartSize int64
// MaxMultipartParts is the maximum number of parts an S3 multipart upload is // MaxMultipartParts is the maximum number of parts an S3 multipart upload is
// allowed to have according to AWS S3 API specifications. // allowed to have according to AWS S3 API specifications.
// See: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html // See: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html
@ -166,6 +172,7 @@ func New(bucket string, service S3API) S3Store {
Service: service, Service: service,
MaxPartSize: 5 * 1024 * 1024 * 1024, MaxPartSize: 5 * 1024 * 1024 * 1024,
MinPartSize: 5 * 1024 * 1024, MinPartSize: 5 * 1024 * 1024,
PreferredPartSize: 50 * 1024 * 1024,
MaxMultipartParts: 10000, MaxMultipartParts: 10000,
MaxObjectSize: 5 * 1024 * 1024 * 1024 * 1024, MaxObjectSize: 5 * 1024 * 1024 * 1024 * 1024,
MaxBufferedParts: 20, MaxBufferedParts: 20,
@ -948,12 +955,12 @@ func isAwsError(err error, code string) bool {
func (store S3Store) calcOptimalPartSize(size int64) (optimalPartSize int64, err error) { func (store S3Store) calcOptimalPartSize(size int64) (optimalPartSize int64, err error) {
switch { switch {
// When upload is smaller or equal MinPartSize, we upload in just one part. // When upload is smaller or equal to PreferredPartSize, we upload in just one part.
case size <= store.MinPartSize: case size <= store.PreferredPartSize:
optimalPartSize = store.MinPartSize optimalPartSize = store.PreferredPartSize
// Does the upload fit in MaxMultipartParts parts or less with MinPartSize. // Does the upload fit in MaxMultipartParts parts or less with PreferredPartSize.
case size <= store.MinPartSize*store.MaxMultipartParts: case size <= store.PreferredPartSize*store.MaxMultipartParts:
optimalPartSize = store.MinPartSize optimalPartSize = store.PreferredPartSize
// Prerequisite: Be aware, that the result of an integer division (x/y) is // Prerequisite: Be aware, that the result of an integer division (x/y) is
// ALWAYS rounded DOWN, as there are no digits behind the comma. // ALWAYS rounded DOWN, as there are no digits behind the comma.
// In order to find out, whether we have an exact result or a rounded down // In order to find out, whether we have an exact result or a rounded down

View File

@ -690,6 +690,7 @@ func TestWriteChunk(t *testing.T) {
store := New("bucket", s3obj) store := New("bucket", s3obj)
store.MaxPartSize = 8 store.MaxPartSize = 8
store.MinPartSize = 4 store.MinPartSize = 4
store.PreferredPartSize = 4
store.MaxMultipartParts = 10000 store.MaxMultipartParts = 10000
store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024
@ -772,6 +773,7 @@ func TestWriteChunkWithUnexpectedEOF(t *testing.T) {
store := New("bucket", s3obj) store := New("bucket", s3obj)
store.MaxPartSize = 500 store.MaxPartSize = 500
store.MinPartSize = 100 store.MinPartSize = 100
store.PreferredPartSize = 100
store.MaxMultipartParts = 10000 store.MaxMultipartParts = 10000
store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024
@ -888,6 +890,7 @@ func TestWriteChunkPrependsIncompletePart(t *testing.T) {
store := New("bucket", s3obj) store := New("bucket", s3obj)
store.MaxPartSize = 8 store.MaxPartSize = 8
store.MinPartSize = 4 store.MinPartSize = 4
store.PreferredPartSize = 4
store.MaxMultipartParts = 10000 store.MaxMultipartParts = 10000
store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024
@ -956,6 +959,7 @@ func TestWriteChunkPrependsIncompletePartAndWritesANewIncompletePart(t *testing.
store := New("bucket", s3obj) store := New("bucket", s3obj)
store.MaxPartSize = 8 store.MaxPartSize = 8
store.MinPartSize = 4 store.MinPartSize = 4
store.PreferredPartSize = 4
store.MaxMultipartParts = 10000 store.MaxMultipartParts = 10000
store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024
@ -1366,6 +1370,7 @@ func TestWriteChunkCleansUpTempFiles(t *testing.T) {
store := New("bucket", s3api) store := New("bucket", s3api)
store.MaxPartSize = 10 store.MaxPartSize = 10
store.MinPartSize = 10 store.MinPartSize = 10
store.PreferredPartSize = 10
store.MaxMultipartParts = 10000 store.MaxMultipartParts = 10000
store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024 store.MaxObjectSize = 5 * 1024 * 1024 * 1024 * 1024
store.TemporaryDirectory = tempDir store.TemporaryDirectory = tempDir