Replace non-ASCII chars in metadata for multipart uploads with question mark

AWS does not handle non-ASCII encoded values for metadata values well since
they are transported in HTTP header values which, by specification, should
only contain ASCII characters. If you still supply AWS with, for example,
UTF-8 encoded strings it will reject the request due to mismatching
signatures. Our solution is to replace these characters with question
marks.
This commit is contained in:
Marius 2016-07-06 16:25:06 +02:00
parent a490dc01de
commit 015ae39d18
2 changed files with 21 additions and 8 deletions

View File

@ -31,7 +31,13 @@
// //
// If meta data is associated with the upload during creation, it will be added // If meta data is associated with the upload during creation, it will be added
// to the multipart upload and after finishing it, the meta data will be passed // to the multipart upload and after finishing it, the meta data will be passed
// to the final object. // to the final object. However, the metadata which will be attached to the
// final object can only contain ASCII characters and every non-ASCII character
// will be replaced by a question mark (for example, "Menü" will be "Men?").
// However, this does not apply for the metadata returned by the GetInfo
// function since it relies on the info object for reading the metadata.
// Therefore, HEAD responses will always contain the unchanged metadata, Base64-
// encoded, even if it contains non-ASCII characters.
// //
// Once the upload is finish, the multipart upload is completed, resulting in // Once the upload is finish, the multipart upload is completed, resulting in
// the entire file being stored in the bucket. The info object, containing // the entire file being stored in the bucket. The info object, containing
@ -81,6 +87,7 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"os" "os"
"regexp"
"strings" "strings"
"sync" "sync"
@ -93,6 +100,10 @@ import (
"github.com/aws/aws-sdk-go/service/s3/s3iface" "github.com/aws/aws-sdk-go/service/s3/s3iface"
) )
// This regular expression matches every character which is not defined in the
// ASCII tables which range from 00 to 7F, inclusive.
var nonASCIIRegexp = regexp.MustCompile(`([^\x00-\x7F])`)
// See the tusd.DataStore interface for documentation about the different // See the tusd.DataStore interface for documentation about the different
// methods. // methods.
type S3Store struct { type S3Store struct {
@ -162,7 +173,7 @@ func (store S3Store) NewUpload(info tusd.FileInfo) (id string, err error) {
ContentLength: aws.Int64(int64(len(infoJson))), ContentLength: aws.Int64(int64(len(infoJson))),
}) })
if err != nil { if err != nil {
return "", err return "", fmt.Errorf("s3store: unable to create info file:\n%s", err)
} }
// Convert meta data into a map of pointers for AWS Go SDK, sigh. // Convert meta data into a map of pointers for AWS Go SDK, sigh.
@ -170,7 +181,7 @@ func (store S3Store) NewUpload(info tusd.FileInfo) (id string, err error) {
for key, value := range info.MetaData { for key, value := range info.MetaData {
// Copying the value is required in order to prevent it from being // Copying the value is required in order to prevent it from being
// overwritten by the next iteration. // overwritten by the next iteration.
v := value v := nonASCIIRegexp.ReplaceAllString(value, "?")
metadata[key] = &v metadata[key] = &v
} }
@ -181,7 +192,7 @@ func (store S3Store) NewUpload(info tusd.FileInfo) (id string, err error) {
Metadata: metadata, Metadata: metadata,
}) })
if err != nil { if err != nil {
return "", err return "", fmt.Errorf("s3store: unable to create multipart upload:\n%s", err)
} }
id = uploadId + "+" + *res.UploadId id = uploadId + "+" + *res.UploadId

View File

@ -36,13 +36,13 @@ func TestNewUpload(t *testing.T) {
assert.Equal(store.Service, s3obj) assert.Equal(store.Service, s3obj)
s1 := "hello" s1 := "hello"
s2 := "world" s2 := "men?"
gomock.InOrder( gomock.InOrder(
s3obj.EXPECT().PutObject(&s3.PutObjectInput{ s3obj.EXPECT().PutObject(&s3.PutObjectInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId.info"), Key: aws.String("uploadId.info"),
Body: bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":{"bar":"world","foo":"hello"},"IsPartial":false,"IsFinal":false,"PartialUploads":null}`)), Body: bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":{"bar":"menü","foo":"hello"},"IsPartial":false,"IsFinal":false,"PartialUploads":null}`)),
ContentLength: aws.Int64(int64(136)), ContentLength: aws.Int64(int64(136)),
}), }),
s3obj.EXPECT().CreateMultipartUpload(&s3.CreateMultipartUploadInput{ s3obj.EXPECT().CreateMultipartUpload(&s3.CreateMultipartUploadInput{
@ -62,7 +62,7 @@ func TestNewUpload(t *testing.T) {
Size: 500, Size: 500,
MetaData: map[string]string{ MetaData: map[string]string{
"foo": "hello", "foo": "hello",
"bar": "world", "bar": "menü",
}, },
} }
@ -101,7 +101,7 @@ func TestGetInfo(t *testing.T) {
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
Key: aws.String("uploadId.info"), Key: aws.String("uploadId.info"),
}).Return(&s3.GetObjectOutput{ }).Return(&s3.GetObjectOutput{
Body: ioutil.NopCloser(bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":null,"IsPartial":false,"IsFinal":false,"PartialUploads":null}`))), Body: ioutil.NopCloser(bytes.NewReader([]byte(`{"ID":"uploadId","Size":500,"Offset":0,"MetaData":{"bar":"menü","foo":"hello"},"IsPartial":false,"IsFinal":false,"PartialUploads":null}`))),
}, nil), }, nil),
s3obj.EXPECT().ListParts(&s3.ListPartsInput{ s3obj.EXPECT().ListParts(&s3.ListPartsInput{
Bucket: aws.String("bucket"), Bucket: aws.String("bucket"),
@ -124,6 +124,8 @@ func TestGetInfo(t *testing.T) {
assert.Equal(int64(500), info.Size) assert.Equal(int64(500), info.Size)
assert.Equal(int64(300), info.Offset) assert.Equal(int64(300), info.Offset)
assert.Equal("uploadId+multipartId", info.ID) assert.Equal("uploadId+multipartId", info.ID)
assert.Equal("hello", info.MetaData["foo"])
assert.Equal("menü", info.MetaData["bar"])
} }
func TestGetInfoFinished(t *testing.T) { func TestGetInfoFinished(t *testing.T) {