From 015ae39d180c9d39464f83c685ebfcd73ff8d1aa Mon Sep 17 00:00:00 2001 From: Marius Date: Wed, 6 Jul 2016 16:25:06 +0200 Subject: [PATCH] Replace non-ASCII chars in metadata for multipart uploads with question mark AWS does not handle non-ASCII encoded values for metadata values well since they are transported in HTTP header values which, by specification, should only contain ASCII characters. If you still supply AWS with, for example, UTF-8 encoded strings it will reject the request due to mismatching signatures. Our solution is to replace these characters with question marks. --- s3store/s3store.go | 19 +++++++++++++++---- s3store/s3store_test.go | 10 ++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/s3store/s3store.go b/s3store/s3store.go index 0980bf4..7b467e6 100644 --- a/s3store/s3store.go +++ b/s3store/s3store.go @@ -31,7 +31,13 @@ // // If meta data is associated with the upload during creation, it will be added // to the multipart upload and after finishing it, the meta data will be passed -// to the final object. +// to the final object. However, the metadata which will be attached to the +// final object can only contain ASCII characters and every non-ASCII character +// will be replaced by a question mark (for example, "Menü" will be "Men?"). +// However, this does not apply for the metadata returned by the GetInfo +// function since it relies on the info object for reading the metadata. +// Therefore, HEAD responses will always contain the unchanged metadata, Base64- +// encoded, even if it contains non-ASCII characters. // // Once the upload is finish, the multipart upload is completed, resulting in // the entire file being stored in the bucket. The info object, containing @@ -81,6 +87,7 @@ import ( "io" "io/ioutil" "os" + "regexp" "strings" "sync" @@ -93,6 +100,10 @@ import ( "github.com/aws/aws-sdk-go/service/s3/s3iface" ) +// This regular expression matches every character which is not defined in the +// ASCII tables which range from 00 to 7F, inclusive. +var nonASCIIRegexp = regexp.MustCompile(`([^\x00-\x7F])`) + // See the tusd.DataStore interface for documentation about the different // methods. type S3Store struct { @@ -162,7 +173,7 @@ func (store S3Store) NewUpload(info tusd.FileInfo) (id string, err error) { ContentLength: aws.Int64(int64(len(infoJson))), }) if err != nil { - return "", err + return "", fmt.Errorf("s3store: unable to create info file:\n%s", err) } // Convert meta data into a map of pointers for AWS Go SDK, sigh. @@ -170,7 +181,7 @@ func (store S3Store) NewUpload(info tusd.FileInfo) (id string, err error) { for key, value := range info.MetaData { // Copying the value is required in order to prevent it from being // overwritten by the next iteration. - v := value + v := nonASCIIRegexp.ReplaceAllString(value, "?") metadata[key] = &v } @@ -181,7 +192,7 @@ func (store S3Store) NewUpload(info tusd.FileInfo) (id string, err error) { Metadata: metadata, }) if err != nil { - return "", err + return "", fmt.Errorf("s3store: unable to create multipart upload:\n%s", err) } id = uploadId + "+" + *res.UploadId diff --git a/s3store/s3store_test.go b/s3store/s3store_test.go index c77b13f..de45129 100644 --- a/s3store/s3store_test.go +++ b/s3store/s3store_test.go @@ -36,13 +36,13 @@ func TestNewUpload(t *testing.T) { assert.Equal(store.Service, s3obj) s1 := "hello" - s2 := "world" + s2 := "men?" gomock.InOrder( s3obj.EXPECT().PutObject(&s3.PutObjectInput{ Bucket: aws.String("bucket"), Key: aws.String("uploadId.info"), - Body: bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":{"bar":"world","foo":"hello"},"IsPartial":false,"IsFinal":false,"PartialUploads":null}`)), + Body: bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":{"bar":"menü","foo":"hello"},"IsPartial":false,"IsFinal":false,"PartialUploads":null}`)), ContentLength: aws.Int64(int64(136)), }), s3obj.EXPECT().CreateMultipartUpload(&s3.CreateMultipartUploadInput{ @@ -62,7 +62,7 @@ func TestNewUpload(t *testing.T) { Size: 500, MetaData: map[string]string{ "foo": "hello", - "bar": "world", + "bar": "menü", }, } @@ -101,7 +101,7 @@ func TestGetInfo(t *testing.T) { Bucket: aws.String("bucket"), Key: aws.String("uploadId.info"), }).Return(&s3.GetObjectOutput{ - Body: ioutil.NopCloser(bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":null,"IsPartial":false,"IsFinal":false,"PartialUploads":null}`))), + Body: ioutil.NopCloser(bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":{"bar":"menü","foo":"hello"},"IsPartial":false,"IsFinal":false,"PartialUploads":null}`))), }, nil), s3obj.EXPECT().ListParts(&s3.ListPartsInput{ Bucket: aws.String("bucket"), @@ -124,6 +124,8 @@ func TestGetInfo(t *testing.T) { assert.Equal(int64(500), info.Size) assert.Equal(int64(300), info.Offset) assert.Equal("uploadId+multipartId", info.ID) + assert.Equal("hello", info.MetaData["foo"]) + assert.Equal("menü", info.MetaData["bar"]) } func TestGetInfoFinished(t *testing.T) {