From 74fca8c6c3033b7a55735095359c6e42e7ae532f Mon Sep 17 00:00:00 2001 From: oliverpool Date: Tue, 28 Feb 2017 20:39:25 +0100 Subject: [PATCH] Squashed commit of the following: commit d214ad5c92073cb754c70ab73e6cf229cba01c72 Author: Marius Date: Tue Feb 28 20:38:47 2017 +0100 Minor code and comment cleanups commit 2826227404296d98a2d83519efaa754cc07a47b1 Merge: db47c89 1a58d6e Author: Marius Date: Tue Feb 28 20:13:15 2017 +0100 Merge branch 'metrics_race_condition' of https://github.com/oliverpool/tusd into oliverpool-metrics_race_condition commit 1a58d6e084631f5c039dd64a303b524abce6d4f2 Author: oliverpool Date: Tue Feb 28 19:14:21 2017 +0100 prometheus metrics typo commit db47c8976489917a8afc53878335ab66d73fe60d Merge: 168942b 12054be Author: Marius Date: Tue Feb 28 17:36:47 2017 +0100 Merge branch 'metrics_race_condition' of https://github.com/oliverpool/tusd into oliverpool-metrics_race_condition commit 12054be4e76814f286f4630e7fedae36a9cdba77 Author: oliverpool Date: Tue Feb 28 12:07:43 2017 +0100 add go vet on travis commit 16e95d2d91d318493c3090eeadf8eeb74d8088ad Author: oliverpool Date: Tue Feb 28 11:58:58 2017 +0100 add one go vet on appveyor commit 13bc64517ba84741ad26ade44c1e8aa3ed451eba Author: oliverpool Date: Tue Feb 28 11:57:56 2017 +0100 use even less pointers commit db7130f18a9c69f060cf65e6909f753a4b3cc238 Author: oliverpool Date: Mon Feb 27 22:12:45 2017 +0100 use pointer only when needed commit 647f7390c6599aa7e4a8747d62ebdfb4ddc5b980 Author: oliverpool Date: Thu Feb 23 16:29:01 2017 +0100 Prevent lock malfunction literal copies lock value from metrics commit 1ce196ed35492684666d4512f82fcd5f6c3e24b0 Author: oliverpool Date: Wed Feb 22 12:22:19 2017 +0100 handle read timeout simplification in metrics commit 338017c9f4b24e5733787a6b0fd5fd8ea1bd8991 Merge: c378bc9 45a9278 Author: oliverpool Date: Wed Feb 22 12:21:42 2017 +0100 Merge branch 'master' into metrics_race_condition commit c378bc97988363707d0ec3d185cc7ca830d7a5e0 Author: oliverpool Date: Wed Feb 22 11:51:56 2017 +0100 consistent naming commit e2b0050b8d00f5df9bf7941cca986457b2d91df1 Author: oliverpool Date: Wed Feb 22 11:44:05 2017 +0100 Use a simplfied error structure for error counting commit 3da4095c082fca8be187fb58a25c8b1fa6b316a6 Author: oliverpool Date: Mon Feb 20 16:38:27 2017 +0100 split ptr retrieval and incrementation commit ebd6873cbbf669590ef2946ee98c55ed194cb150 Author: oliverpool Date: Sun Feb 5 22:01:34 2017 +0100 Useless initialization commit 691224cdbf6b16027c598102d561c16239d959b7 Author: oliverpool Date: Thu Feb 2 08:25:55 2017 +0100 increment the pointer commit 3d9395e1df1df279e7f54de9e00dd4c37c882042 Author: oliverpool Date: Thu Feb 2 08:25:42 2017 +0100 Simplify lock logic commit d7a46190802a5b8312d44839228ffe46947e69d2 Author: oliverpool Date: Wed Feb 1 08:37:46 2017 +0100 Prevent race condition on errorstotal increment The HTTPError interface is used to have detailed metrics (by code as well) --- .scripts/test_all.sh | 2 + appveyor.yml | 2 + metrics.go | 88 ++++++++++++++++++---- prometheuscollector/prometheuscollector.go | 12 +-- unrouted_handler.go | 21 ++---- 5 files changed, 92 insertions(+), 33 deletions(-) diff --git a/.scripts/test_all.sh b/.scripts/test_all.sh index aefb293..7d85877 100755 --- a/.scripts/test_all.sh +++ b/.scripts/test_all.sh @@ -26,3 +26,5 @@ go get -u github.com/prometheus/client_golang/prometheus # Test all packages which are allowed on all Go versions go test $packages + +go vet $packages diff --git a/appveyor.yml b/appveyor.yml index f97e076..444b35b 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -23,3 +23,5 @@ test_script: - go test ./memorylocker - go test ./consullocker - go test ./s3store + + - go vet ./prometheuscollector diff --git a/metrics.go b/metrics.go index 177d965..3639f23 100644 --- a/metrics.go +++ b/metrics.go @@ -1,6 +1,9 @@ package tusd import ( + "errors" + "net" + "sync" "sync/atomic" ) @@ -13,7 +16,7 @@ type Metrics struct { // RequestTotal counts the number of incoming requests per method RequestsTotal map[string]*uint64 // ErrorsTotal counts the number of returned errors by their message - ErrorsTotal map[string]*uint64 + ErrorsTotal *ErrorsTotalMap BytesReceived *uint64 UploadsFinished *uint64 UploadsCreated *uint64 @@ -29,16 +32,9 @@ func (m Metrics) incRequestsTotal(method string) { } // incErrorsTotal increases the counter for this error atomically by one. -func (m Metrics) incErrorsTotal(err error) { - msg := err.Error() - - if addr, ok := m.ErrorsTotal[msg]; ok { - atomic.AddUint64(addr, 1) - } else { - addr := new(uint64) - *addr = 1 - m.ErrorsTotal[msg] = addr - } +func (m Metrics) incErrorsTotal(err HTTPError) { + ptr := m.ErrorsTotal.retrievePointerFor(err) + atomic.AddUint64(ptr, 1) } // incBytesReceived increases the number of received bytes atomically be the @@ -80,7 +76,73 @@ func newMetrics() Metrics { } } -func newErrorsTotalMap() map[string]*uint64 { - m := make(map[string]*uint64, 20) +// ErrorsTotalMap stores the counters for the different HTTP errors. +type ErrorsTotalMap struct { + lock sync.RWMutex + counter map[simpleHTTPError]*uint64 +} + +type simpleHTTPError struct { + Message string + StatusCode int +} + +func simplifyHTTPError(err HTTPError) simpleHTTPError { + var msg string + // Errors for read timeouts contain too much information which is not + // necessary for us and makes grouping for the metrics harder. The error + // message looks like: read tcp 127.0.0.1:1080->127.0.0.1:53673: i/o timeout + // Therefore, we use a common error message for all of them. + if netErr, ok := err.(net.Error); ok && netErr.Timeout() { + msg = "read tcp: i/o timeout" + } else { + msg = err.Error() + } + return simpleHTTPError{ + Message: msg, + StatusCode: err.StatusCode(), + } +} + +func newErrorsTotalMap() *ErrorsTotalMap { + m := make(map[simpleHTTPError]*uint64, 20) + return &ErrorsTotalMap{ + counter: m, + } +} + +// retrievePointerFor returns (after creating it if necessary) the pointer to +// the counter for the error. +func (e *ErrorsTotalMap) retrievePointerFor(err HTTPError) *uint64 { + serr := simplifyHTTPError(err) + e.lock.RLock() + ptr, ok := e.counter[serr] + e.lock.RUnlock() + if ok { + return ptr + } + + // For pointer creation, a write-lock is required + e.lock.Lock() + // We ensure that the pointer wasn't created in the meantime + if ptr, ok = e.counter[serr]; !ok { + ptr = new(uint64) + e.counter[serr] = ptr + } + e.lock.Unlock() + + return ptr +} + +// Load retrieves the map of the counter pointers atomically +func (e *ErrorsTotalMap) Load() map[HTTPError]*uint64 { + m := make(map[HTTPError]*uint64, len(e.counter)) + e.lock.RLock() + for err, ptr := range e.counter { + httpErr := NewHTTPError(errors.New(err.Message), err.StatusCode) + m[httpErr] = ptr + } + e.lock.RUnlock() + return m } diff --git a/prometheuscollector/prometheuscollector.go b/prometheuscollector/prometheuscollector.go index 25c8943..8486f23 100644 --- a/prometheuscollector/prometheuscollector.go +++ b/prometheuscollector/prometheuscollector.go @@ -9,6 +9,7 @@ package prometheuscollector import ( + "strconv" "sync/atomic" "github.com/tus/tusd" @@ -23,8 +24,8 @@ var ( []string{"method"}, nil) errorsTotalDesc = prometheus.NewDesc( "tusd_errors_total", - "Total number of erorrs per cause.", - []string{"cause"}, nil) + "Total number of errors per status.", + []string{"status", "message"}, nil) bytesReceivedDesc = prometheus.NewDesc( "tusd_bytes_received", "Number of bytes received for uploads.", @@ -39,7 +40,7 @@ var ( nil, nil) uploadsTerminatedDesc = prometheus.NewDesc( "tusd_uploads_terminated", - "Number of terminted uploads.", + "Number of terminated uploads.", nil, nil) ) @@ -73,12 +74,13 @@ func (c Collector) Collect(metrics chan<- prometheus.Metric) { ) } - for error, valuePtr := range c.metrics.ErrorsTotal { + for httpError, valuePtr := range c.metrics.ErrorsTotal.Load() { metrics <- prometheus.MustNewConstMetric( errorsTotalDesc, prometheus.GaugeValue, float64(atomic.LoadUint64(valuePtr)), - error, + strconv.Itoa(httpError.StatusCode()), + httpError.Error(), ) } diff --git a/unrouted_handler.go b/unrouted_handler.go index 73425e8..6fd8d9c 100644 --- a/unrouted_handler.go +++ b/unrouted_handler.go @@ -5,7 +5,6 @@ import ( "errors" "io" "log" - "net" "net/http" "os" "regexp" @@ -612,17 +611,9 @@ func (handler *UnroutedHandler) sendError(w http.ResponseWriter, r *http.Request err = ErrNotFound } - // Errors for read timeouts contain too much information which is not - // necessary for us and makes grouping for the metrics harder. The error - // message looks like: read tcp 127.0.0.1:1080->127.0.0.1:53673: i/o timeout - // Therefore, we use a common error message for all of them. - if netErr, ok := err.(net.Error); ok && netErr.Timeout() { - err = errors.New("read tcp: i/o timeout") - } - - status := 500 - if statusErr, ok := err.(HTTPError); ok { - status = statusErr.StatusCode() + statusErr, ok := err.(HTTPError) + if !ok { + statusErr = NewHTTPError(err, http.StatusInternalServerError) } reason := err.Error() + "\n" @@ -632,12 +623,12 @@ func (handler *UnroutedHandler) sendError(w http.ResponseWriter, r *http.Request w.Header().Set("Content-Type", "text/plain; charset=utf-8") w.Header().Set("Content-Length", strconv.Itoa(len(reason))) - w.WriteHeader(status) + w.WriteHeader(statusErr.StatusCode()) w.Write([]byte(reason)) - handler.log("ResponseOutgoing", "status", strconv.Itoa(status), "method", r.Method, "path", r.URL.Path, "error", err.Error()) + handler.log("ResponseOutgoing", "status", strconv.Itoa(statusErr.StatusCode()), "method", r.Method, "path", r.URL.Path, "error", err.Error()) - go handler.Metrics.incErrorsTotal(err) + go handler.Metrics.incErrorsTotal(statusErr) } // sendResp writes the header to w with the specified status code.