diff --git a/.hooks/post-receive b/.hooks/post-receive new file mode 100755 index 0000000..5f98009 --- /dev/null +++ b/.hooks/post-receive @@ -0,0 +1,8 @@ +#!/bin/bash + +id="$TUS_ID" +offset="$TUS_OFFSET" +size="$TUS_SIZE" +progress=$((100 * $offset/$size)) + +echo "Upload ${id} is at ${progress}% (${offset}/${size})" diff --git a/.hooks/pre-create b/.hooks/pre-create index 06215b2..4501f89 100755 --- a/.hooks/pre-create +++ b/.hooks/pre-create @@ -1,8 +1,7 @@ #!/bin/bash filename=$(cat /dev/stdin | jq .MetaData.filename) - -if [ -n "$filename" ]; then +if [ -z "$filename" ]; then echo "Error: no filename provided" exit 1 fi diff --git a/.infra/Freyfile.hcl b/.infra/Freyfile.hcl index ee56805..328160c 100644 --- a/.infra/Freyfile.hcl +++ b/.infra/Freyfile.hcl @@ -206,7 +206,7 @@ setup { name = "Setup nginx" tasks { name = "nginx | Add nginx PPA" - apt_repository = "repo='ppa:nginx/stable'" + apt_repository = "repo='ppa:ondrej/nginx'" } tasks { name = "nginx | Create public www directory" diff --git a/.infra/group_vars/all/_frey.yml b/.infra/group_vars/all/_frey.yml index 81fd465..4553cfd 100644 --- a/.infra/group_vars/all/_frey.yml +++ b/.infra/group_vars/all/_frey.yml @@ -1,9 +1,26 @@ -# Frey specific overrides +# Frey specific overrides, DO NOT EDIT THIS FILE --- # apt role apt_manage_sources_list: true apt_src_enable: false -apt_install_state: present - -# Temporary fix for failing build: https://travis-ci.org/tus/tusd/jobs/190891590 +apt_update_cache_valid_time: 86400 apt_upgrade: false +apt_dpkg_configure: true +apt_install_state: present +apt_clean: true +apt_autoremove: true + +# ansistrano roles +ansistrano_shared_paths: + - logs +ansistrano_keep_releases: 10 +ansistrano_npm: no +ansistrano_owner: www-data +ansistrano_group: www-data +ansistrano_allow_anonymous_stats: no + +ansistrano_remove_rolled_back: no + +# fqdn +fqdn: "{{ lookup('env', 'FREY_DOMAIN') }}" +hostname: "{{ fqdn.split('.')[0] }}" diff --git a/LICENSE.txt b/LICENSE.txt index d09e4df..bdf1382 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2013-2015 Transloadit Ltd and Contributors +Copyright (c) 2013-2017 Transloadit Ltd and Contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/README.md b/README.md index fe9651e..c65ed86 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,12 @@ Yes, it is absolutely possible to do so. Firstly, you should execute the tusd bi - *Forward hostname and scheme.* If the proxy rewrites the request URL, the tusd server does not know the original URL which was used to reach the proxy. This behavior can lead to situations, where tusd returns a redirect to a URL which can not be reached by the client. To avoid this confusion, you can explicitly tell tusd which hostname and scheme to use by supplying the `X-Forwarded-Host` and `X-Forwarded-Proto` headers. Explicit examples for the above points can be found in the [Nginx configuration](https://github.com/tus/tusd/blob/master/.infra/files/nginx.conf) which is used to power the [master.tus.io](https://master.tus.io) instace. + +### Can I run custom verification/authentication checks before an upload begins? + +Yes, this is made possible by the [hook system](/docs/hooks.md) inside the tusd binary. It enables custom routines to be executed when certain events occurs, such as a new upload being created which can be handled by the `pre-create` hook. Inside the corresponding hook file, you can run your own validations against the provided upload metadata to determine whether the action is actually allowed or should be rejected by tusd. Please have a look at the [corresponding documentation](docs/hooks.md#pre-create) for a more detailed explanation. + + ## License This project is licensed under the MIT license, see `LICENSE.txt`. diff --git a/appveyor.yml b/appveyor.yml index 77d1c42..5dcb125 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -8,10 +8,12 @@ install: - git submodule update --init --recursive build_script: + - set PATH=%GOPATH%\bin;%PATH% - go env - go version - go get ./s3store - go get ./consullocker + - go get github.com/hashicorp/consul test_script: - go test . diff --git a/cmd/tusd/cli/composer.go b/cmd/tusd/cli/composer.go index b68d090..3590ef7 100644 --- a/cmd/tusd/cli/composer.go +++ b/cmd/tusd/cli/composer.go @@ -32,12 +32,20 @@ func CreateComposer() { store := filestore.New(dir) store.UseIn(Composer) } else { - stdout.Printf("Using 's3://%s' as S3 bucket for storage.\n", Flags.S3Bucket) + s3Config := aws.NewConfig() + + if Flags.S3Endpoint == "" { + stdout.Printf("Using 's3://%s' as S3 bucket for storage.\n", Flags.S3Bucket) + } else { + stdout.Printf("Using '%s/%s' as S3 endpoint and bucket for storage.\n", Flags.S3Endpoint, Flags.S3Bucket) + + s3Config = s3Config.WithEndpoint(Flags.S3Endpoint).WithS3ForcePathStyle(true) + } // Derive credentials from AWS_SECRET_ACCESS_KEY, AWS_ACCESS_KEY_ID and // AWS_REGION environment variables. - credentials := aws.NewConfig().WithCredentials(credentials.NewEnvCredentials()) - store := s3store.New(Flags.S3Bucket, s3.New(session.New(), credentials)) + s3Config = s3Config.WithCredentials(credentials.NewEnvCredentials()) + store := s3store.New(Flags.S3Bucket, s3.New(session.New(), s3Config)) store.UseIn(Composer) locker := memorylocker.New() diff --git a/cmd/tusd/cli/flags.go b/cmd/tusd/cli/flags.go index f8f4dcf..b61f4e7 100644 --- a/cmd/tusd/cli/flags.go +++ b/cmd/tusd/cli/flags.go @@ -14,6 +14,7 @@ var Flags struct { Basepath string Timeout int64 S3Bucket string + S3Endpoint string HooksDir string ShowVersion bool ExposeMetrics bool @@ -32,6 +33,7 @@ func ParseFlags() { flag.StringVar(&Flags.Basepath, "base-path", "/files/", "Basepath of the HTTP server") flag.Int64Var(&Flags.Timeout, "timeout", 30*1000, "Read timeout for connections in milliseconds. A zero value means that reads will not timeout") flag.StringVar(&Flags.S3Bucket, "s3-bucket", "", "Use AWS S3 with this bucket as storage backend (requires the AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY and AWS_REGION environment variables to be set)") + flag.StringVar(&Flags.S3Endpoint, "s3-endpoint", "", "Endpoint to use S3 compatible implementations like minio (requires s3-bucket to be pass)") flag.StringVar(&Flags.HooksDir, "hooks-dir", "", "Directory to search for available hooks scripts") flag.BoolVar(&Flags.ShowVersion, "version", false, "Print tusd version information") flag.BoolVar(&Flags.ExposeMetrics, "expose-metrics", true, "Expose metrics about tusd usage") diff --git a/cmd/tusd/cli/hooks.go b/cmd/tusd/cli/hooks.go index 7cb8d7f..5caf28e 100644 --- a/cmd/tusd/cli/hooks.go +++ b/cmd/tusd/cli/hooks.go @@ -16,6 +16,7 @@ type HookType string const ( HookPostFinish HookType = "post-finish" HookPostTerminate HookType = "post-terminate" + HookPostReceive HookType = "post-receive" HookPreCreate HookType = "pre-create" ) @@ -44,6 +45,8 @@ func SetupPostHooks(handler *tusd.Handler) { invokeHook(HookPostFinish, info) case info := <-handler.TerminatedUploads: invokeHook(HookPostTerminate, info) + case info := <-handler.UploadProgress: + invokeHook(HookPostReceive, info) } } }() @@ -75,6 +78,7 @@ func invokeHookSync(typ HookType, info tusd.FileInfo, captureOutput bool) ([]byt env := os.Environ() env = append(env, "TUS_ID="+info.ID) env = append(env, "TUS_SIZE="+strconv.FormatInt(info.Size, 10)) + env = append(env, "TUS_OFFSET="+strconv.FormatInt(info.Offset, 10)) jsonInfo, err := json.Marshal(info) if err != nil { diff --git a/cmd/tusd/cli/serve.go b/cmd/tusd/cli/serve.go index 37d6317..594d66e 100644 --- a/cmd/tusd/cli/serve.go +++ b/cmd/tusd/cli/serve.go @@ -17,6 +17,7 @@ func Serve() { StoreComposer: Composer, NotifyCompleteUploads: true, NotifyTerminatedUploads: true, + NotifyUploadProgress: true, }) if err != nil { stderr.Fatalf("Unable to create handler: %s", err) diff --git a/config.go b/config.go index 1ed82cf..f2517a2 100644 --- a/config.go +++ b/config.go @@ -31,6 +31,7 @@ type Config struct { // NotifyTerminatedUploads indicates whether sending notifications about // terminated uploads using the TerminatedUploads channel should be enabled. NotifyTerminatedUploads bool + NotifyUploadProgress bool // Logger is the logger to use internally, mostly for printing requests. Logger *log.Logger // Respect the X-Forwarded-Host, X-Forwarded-Proto and Forwarded headers diff --git a/docs/hooks.md b/docs/hooks.md new file mode 100644 index 0000000..973e27a --- /dev/null +++ b/docs/hooks.md @@ -0,0 +1,71 @@ +# Hooks + +When integrating tusd into an application, it is important to establish a communication channel between the two components. The tusd binary accomplishes this by providing a Hook system which will execute custom code when certain events happen, such as an upload being created or finished. While being simple, yet powerful enough, enabled uses ranging from logging over validation and authorization to processing the uploaded files. + +If you have previously worked with the hook system provided by [Git](https://git-scm.com/book/it/v2/Customizing-Git-Git-Hooks), you will see a lot of parallels. If this does not apply to you, don't worry, it is not complicated. Before getting stated, it is good to have a high level overview of what a hook is actually: It is a regular file, located in a specific directory, which will be executed once a certain event occurs. This file can either be a script interpreted by a runtime, such as Bash or Python, or a compiled binary. When invoked, the process will be provided with information about the event triggering the occuring event and the associated uploaded. + +## The Hook Directory + +By default, the hook system is disabled. To enable it, pass the `--hook-dir` option to the tusd binary. The flag's value will be a path, the **hook directory**, relative to the current working directory, pointing to the folder containing the executable **hook files**: + +```bash +$ tusd --hook-dir ./path/to/hooks/ + +[tusd] Using './path/to/hooks/' for hooks +[tusd] Using './data' as directory storage. +... +``` + +If an event occurs, the tusd binary will look for a file, named exactly as the event, which will then be executed, as long as the object exists. In the example above, the binary `./path/to/hooks/pre-create` will be invoked, before an upload is created, which can be used to e.g. validate certain metadata. Please note, that the hook file *must not* have an extension, such as `.sh` or `.py`, or else tusd will not recognize and ignore it. A detailed list of all events can be found at the end of this document. + +## The Hook's Environment + +The process of the hook files are provided with information about the event and the upload using to two methods: +* The `TUS_ID` and `TUS_SIZE` environment variables will contain the upload ID and its size in bytes, which triggered the event. Please be aware, that in the `pre-create` hook the upload ID will be an empty string as the entity has not been created and therefore this piece of information is not yet available. +* On `stdin` a JSON-encoded object can be read which contains more details about the corresponding upload in following format: + +```js +{ + // The upload's ID. Will be empty during the pre-create event + "ID": "14b1c4c77771671a8479bc0444bbc5ce", + // The upload's total size in bytes. + "Size": 46205, + // The upload's current offset in bytes. + "Offset": 1592, + // These properties will be set to true, if the upload as a final or partial + // one. See the Concatenation extension for details: + // http://tus.io/protocols/resumable-upload.html#concatenation + "IsFinal": false, + "IsPartial": false, + // If the upload is a final one, this value will be an array of upload IDs + // which are concatenated to produce the upload. + "PartialUploads": null, + // The upload's meta data which can be supplied by the clients as it wishes. + // All keys and values in this object will be strings. + // Be aware that it may contain maliciously crafted values and you must not + // trust it without escaping it first! + "MetaData": { + "filename": "transloadit.png" + } +} +``` + +## Blocking and Non-Blocking Hooks + +If not otherwise noted, all hooks are invoked in a *non-blocking* way, meaning that tusd will not wait until the hook process has finished and exited. Therefore, the hook process is not able to influence how tusd may continue handling the current request, regardless of which exit code it may set. Furthermore, the hook process' stdout and stderr will be piped to tusd's stdout and stderr correspondingly, allowing one to use these channels for additional logging. + +On the other hand, there are a few *blocking* hooks, such as caused by the `pre-create` event. Because their exit code will dictate whether tusd will accept the current incoming request, tusd will wait until the hook process has exited. Therefore, in order to keep the response times low, one should avoid to make time-consuming operations inside the processes for blocking hooks. An exit code of `0` indicates that tusd should continue handling the request as normal. On the other hand, a non-zero exit code tells tusd to reject the request with a `500 Internal Server Error` response containing the process' output from stderr. For the sake of logging, the process' output from stdout will always be piped to tusd's stdout. + +## List of Available Hooks + +### pre-create + +This event will be triggered before an upload is created, allowing you to run certain routines. For example, validating that specific metadata values are set, or verifying that a corresponding entity belonging to the upload (e.g. a user) exists. Because this event will result in a blocking hook, you can determine whether the upload should be created or rejected using the exit code. An exit code of `0` will allow the upload to be created and continued as usual. A non-zero exit code will reject an upload creation request, making it a good place for authentication and authorization. Please be aware, that during this stage the upload ID will be an empty string as the entity has not been created and therefore this piece of information is not yet available. + +### post-finish + +This event will be triggered after an upload is fully finished, meaning that all chunks have been transfered and saved in the storage. After this point, no further modifications, except possible deletion, can be made to the upload entity and it may be desirable to use the file for further processing or notify other applications of the completions of this upload. + +### post-terminate + +This event will be triggered after an upload has been terminated, meaning that the upload has been totally stopped and all associating chunks have been fully removed from the storage. Therefore, one is not able to retrieve the upload's content anymore and one may wish to notify further applications that this upload will never be resumed nor finished. diff --git a/filestore/filestore.go b/filestore/filestore.go index 5808cac..f1b5c5b 100644 --- a/filestore/filestore.go +++ b/filestore/filestore.go @@ -17,6 +17,7 @@ package filestore import ( "encoding/json" + "fmt" "io" "io/ioutil" "os" @@ -62,7 +63,10 @@ func (store FileStore) NewUpload(info tusd.FileInfo) (id string, err error) { // Create .bin file with no content file, err := os.OpenFile(store.binPath(id), os.O_CREATE|os.O_WRONLY, defaultFilePerm) if err != nil { - return + if os.IsNotExist(err) { + err = fmt.Errorf("upload directory does not exist: %s", store.Path) + } + return "", err } defer file.Close() diff --git a/filestore/filestore_test.go b/filestore/filestore_test.go index b66d299..06c1d39 100644 --- a/filestore/filestore_test.go +++ b/filestore/filestore_test.go @@ -72,6 +72,17 @@ func TestFilestore(t *testing.T) { a.True(os.IsNotExist(err)) } +func TestMissingPath(t *testing.T) { + a := assert.New(t) + + store := FileStore{"./path-that-does-not-exist"} + + id, err := store.NewUpload(tusd.FileInfo{}) + a.Error(err) + a.Equal(err.Error(), "upload directory does not exist: ./path-that-does-not-exist") + a.Equal(id, "") +} + func TestFileLocker(t *testing.T) { a := assert.New(t) diff --git a/metrics.go b/metrics.go index 92df351..177d965 100644 --- a/metrics.go +++ b/metrics.go @@ -31,11 +31,14 @@ func (m Metrics) incRequestsTotal(method string) { // incErrorsTotal increases the counter for this error atomically by one. func (m Metrics) incErrorsTotal(err error) { msg := err.Error() - if _, ok := ErrStatusCodes[err]; !ok { - msg = "system error" - } - atomic.AddUint64(m.ErrorsTotal[msg], 1) + if addr, ok := m.ErrorsTotal[msg]; ok { + atomic.AddUint64(addr, 1) + } else { + addr := new(uint64) + *addr = 1 + m.ErrorsTotal[msg] = addr + } } // incBytesReceived increases the number of received bytes atomically be the @@ -78,13 +81,6 @@ func newMetrics() Metrics { } func newErrorsTotalMap() map[string]*uint64 { - m := make(map[string]*uint64, len(ErrStatusCodes)+1) - - for err := range ErrStatusCodes { - m[err.Error()] = new(uint64) - } - - m["system error"] = new(uint64) - + m := make(map[string]*uint64, 20) return m } diff --git a/unrouted_handler.go b/unrouted_handler.go index 8ceaf3e..74bc6e9 100644 --- a/unrouted_handler.go +++ b/unrouted_handler.go @@ -10,6 +10,8 @@ import ( "regexp" "strconv" "strings" + "sync/atomic" + "time" ) var ( @@ -18,39 +20,46 @@ var ( reForwardedProto = regexp.MustCompile(`proto=(https?)`) ) -var ( - ErrUnsupportedVersion = errors.New("unsupported version") - ErrMaxSizeExceeded = errors.New("maximum size exceeded") - ErrInvalidContentType = errors.New("missing or invalid Content-Type header") - ErrInvalidUploadLength = errors.New("missing or invalid Upload-Length header") - ErrInvalidOffset = errors.New("missing or invalid Upload-Offset header") - ErrNotFound = errors.New("upload not found") - ErrFileLocked = errors.New("file currently locked") - ErrMismatchOffset = errors.New("mismatched offset") - ErrSizeExceeded = errors.New("resource's size exceeded") - ErrNotImplemented = errors.New("feature not implemented") - ErrUploadNotFinished = errors.New("one of the partial uploads is not finished") - ErrInvalidConcat = errors.New("invalid Upload-Concat header") - ErrModifyFinal = errors.New("modifying a final upload is not allowed") -) - -// HTTP status codes sent in the response when the specific error is returned. -var ErrStatusCodes = map[error]int{ - ErrUnsupportedVersion: http.StatusPreconditionFailed, - ErrMaxSizeExceeded: http.StatusRequestEntityTooLarge, - ErrInvalidContentType: http.StatusBadRequest, - ErrInvalidUploadLength: http.StatusBadRequest, - ErrInvalidOffset: http.StatusBadRequest, - ErrNotFound: http.StatusNotFound, - ErrFileLocked: 423, // Locked (WebDAV) (RFC 4918) - ErrMismatchOffset: http.StatusConflict, - ErrSizeExceeded: http.StatusRequestEntityTooLarge, - ErrNotImplemented: http.StatusNotImplemented, - ErrUploadNotFinished: http.StatusBadRequest, - ErrInvalidConcat: http.StatusBadRequest, - ErrModifyFinal: http.StatusForbidden, +// HTTPError represents an error with an additional status code attached +// which may be used when this error is sent in a HTTP response. +// See the net/http package for standardized status codes. +type HTTPError interface { + error + StatusCode() int } +type httpError struct { + error + statusCode int +} + +func (err httpError) StatusCode() int { + return err.statusCode +} + +// NewHTTPError adds the given status code to the provided error and returns +// the new error instance. The status code may be used in corresponding HTTP +// responses. See the net/http package for standardized status codes. +func NewHTTPError(err error, statusCode int) HTTPError { + return httpError{err, statusCode} +} + +var ( + ErrUnsupportedVersion = NewHTTPError(errors.New("unsupported version"), http.StatusPreconditionFailed) + ErrMaxSizeExceeded = NewHTTPError(errors.New("maximum size exceeded"), http.StatusRequestEntityTooLarge) + ErrInvalidContentType = NewHTTPError(errors.New("missing or invalid Content-Type header"), http.StatusBadRequest) + ErrInvalidUploadLength = NewHTTPError(errors.New("missing or invalid Upload-Length header"), http.StatusBadRequest) + ErrInvalidOffset = NewHTTPError(errors.New("missing or invalid Upload-Offset header"), http.StatusBadRequest) + ErrNotFound = NewHTTPError(errors.New("upload not found"), http.StatusNotFound) + ErrFileLocked = NewHTTPError(errors.New("file currently locked"), 423) // Locked (WebDAV) (RFC 4918) + ErrMismatchOffset = NewHTTPError(errors.New("mismatched offset"), http.StatusConflict) + ErrSizeExceeded = NewHTTPError(errors.New("resource's size exceeded"), http.StatusRequestEntityTooLarge) + ErrNotImplemented = NewHTTPError(errors.New("feature not implemented"), http.StatusNotImplemented) + ErrUploadNotFinished = NewHTTPError(errors.New("one of the partial uploads is not finished"), http.StatusBadRequest) + ErrInvalidConcat = NewHTTPError(errors.New("invalid Upload-Concat header"), http.StatusBadRequest) + ErrModifyFinal = NewHTTPError(errors.New("modifying a final upload is not allowed"), http.StatusForbidden) +) + // UnroutedHandler exposes methods to handle requests as part of the tus protocol, // such as PostFile, HeadFile, PatchFile and DelFile. In addition the GetFile method // is provided which is, however, not part of the specification. @@ -75,6 +84,7 @@ type UnroutedHandler struct { // happen if the NotifyTerminatedUploads field is set to true in the Config // structure. TerminatedUploads chan FileInfo + UploadProgress chan FileInfo // Metrics provides numbers of the usage for this handler. Metrics Metrics } @@ -104,6 +114,7 @@ func NewUnroutedHandler(config Config) (*UnroutedHandler, error) { isBasePathAbs: config.isAbs, CompleteUploads: make(chan FileInfo), TerminatedUploads: make(chan FileInfo), + UploadProgress: make(chan FileInfo), logger: config.Logger, extensions: extensions, Metrics: newMetrics(), @@ -428,12 +439,18 @@ func (handler *UnroutedHandler) writeChunk(id string, info FileInfo, w http.Resp handler.log("ChunkWriteStart", "id", id, "maxSize", i64toa(maxSize), "offset", i64toa(offset)) var bytesWritten int64 - // Prevent a nil pointer derefernce when accessing the body which may not be + // Prevent a nil pointer dereference when accessing the body which may not be // available in the case of a malicious request. if r.Body != nil { - // Limit the data read from the request's body to the allowed maxiumum + // Limit the data read from the request's body to the allowed maximum reader := io.LimitReader(r.Body, maxSize) + if handler.config.NotifyUploadProgress { + var stop chan<- struct{} + reader, stop = handler.sendProgressMessages(info, reader) + defer close(stop) + } + var err error bytesWritten, err = handler.composer.Core.WriteChunk(id, offset, reader) if err != nil { @@ -583,9 +600,9 @@ func (handler *UnroutedHandler) sendError(w http.ResponseWriter, r *http.Request err = ErrNotFound } - status, ok := ErrStatusCodes[err] - if !ok { - status = 500 + status := 500 + if statusErr, ok := err.(HTTPError); ok { + status = statusErr.StatusCode() } reason := err.Error() + "\n" @@ -625,6 +642,39 @@ func (handler *UnroutedHandler) absFileURL(r *http.Request, id string) string { return url } +type progressWriter struct { + Offset int64 +} + +func (w *progressWriter) Write(b []byte) (int, error) { + atomic.AddInt64(&w.Offset, int64(len(b))) + return len(b), nil +} + +func (handler *UnroutedHandler) sendProgressMessages(info FileInfo, reader io.Reader) (io.Reader, chan<- struct{}) { + progress := &progressWriter{ + Offset: info.Offset, + } + stop := make(chan struct{}, 1) + reader = io.TeeReader(reader, progress) + + go func() { + for { + select { + case <-stop: + info.Offset = atomic.LoadInt64(&progress.Offset) + handler.UploadProgress <- info + return + case <-time.After(1 * time.Second): + info.Offset = atomic.LoadInt64(&progress.Offset) + handler.UploadProgress <- info + } + } + }() + + return reader, stop +} + // getHostAndProtocol extracts the host and used protocol (either HTTP or HTTPS) // from the given request. If `allowForwarded` is set, the X-Forwarded-Host, // X-Forwarded-Proto and Forwarded headers will also be checked to