Squashed commit of the following:
commit d214ad5c92073cb754c70ab73e6cf229cba01c72 Author: Marius <maerious@gmail.com> Date: Tue Feb 28 20:38:47 2017 +0100 Minor code and comment cleanups commit 2826227404296d98a2d83519efaa754cc07a47b1 Merge: db47c891a58d6e
Author: Marius <maerious@gmail.com> Date: Tue Feb 28 20:13:15 2017 +0100 Merge branch 'metrics_race_condition' of https://github.com/oliverpool/tusd into oliverpool-metrics_race_condition commit1a58d6e084
Author: oliverpool <oliverpool@hotmail.fr> Date: Tue Feb 28 19:14:21 2017 +0100 prometheus metrics typo commit db47c8976489917a8afc53878335ab66d73fe60d Merge:168942b
12054be
Author: Marius <maerious@gmail.com> Date: Tue Feb 28 17:36:47 2017 +0100 Merge branch 'metrics_race_condition' of https://github.com/oliverpool/tusd into oliverpool-metrics_race_condition commit12054be4e7
Author: oliverpool <oliverpool@hotmail.fr> Date: Tue Feb 28 12:07:43 2017 +0100 add go vet on travis commit16e95d2d91
Author: oliverpool <oliverpool@hotmail.fr> Date: Tue Feb 28 11:58:58 2017 +0100 add one go vet on appveyor commit13bc64517b
Author: oliverpool <oliverpool@hotmail.fr> Date: Tue Feb 28 11:57:56 2017 +0100 use even less pointers commitdb7130f18a
Author: oliverpool <oliverpool@hotmail.fr> Date: Mon Feb 27 22:12:45 2017 +0100 use pointer only when needed commit647f7390c6
Author: oliverpool <oliverpool@hotmail.fr> Date: Thu Feb 23 16:29:01 2017 +0100 Prevent lock malfunction literal copies lock value from metrics commit1ce196ed35
Author: oliverpool <oliverpool@hotmail.fr> Date: Wed Feb 22 12:22:19 2017 +0100 handle read timeout simplification in metrics commit338017c9f4
Merge:c378bc9
45a9278
Author: oliverpool <oliverpool@hotmail.fr> Date: Wed Feb 22 12:21:42 2017 +0100 Merge branch 'master' into metrics_race_condition commitc378bc9798
Author: oliverpool <oliverpool@hotmail.fr> Date: Wed Feb 22 11:51:56 2017 +0100 consistent naming commite2b0050b8d
Author: oliverpool <oliverpool@hotmail.fr> Date: Wed Feb 22 11:44:05 2017 +0100 Use a simplfied error structure for error counting commit3da4095c08
Author: oliverpool <oliverpool@hotmail.fr> Date: Mon Feb 20 16:38:27 2017 +0100 split ptr retrieval and incrementation commitebd6873cbb
Author: oliverpool <oliverpool@hotmail.fr> Date: Sun Feb 5 22:01:34 2017 +0100 Useless initialization commit691224cdbf
Author: oliverpool <oliverpool@hotmail.fr> Date: Thu Feb 2 08:25:55 2017 +0100 increment the pointer commit3d9395e1df
Author: oliverpool <oliverpool@hotmail.fr> Date: Thu Feb 2 08:25:42 2017 +0100 Simplify lock logic commitd7a4619080
Author: oliverpool <oliverpool@hotmail.fr> Date: Wed Feb 1 08:37:46 2017 +0100 Prevent race condition on errorstotal increment The HTTPError interface is used to have detailed metrics (by code as well)
This commit is contained in:
parent
168942bfb6
commit
74fca8c6c3
|
@ -26,3 +26,5 @@ go get -u github.com/prometheus/client_golang/prometheus
|
||||||
|
|
||||||
# Test all packages which are allowed on all Go versions
|
# Test all packages which are allowed on all Go versions
|
||||||
go test $packages
|
go test $packages
|
||||||
|
|
||||||
|
go vet $packages
|
||||||
|
|
|
@ -23,3 +23,5 @@ test_script:
|
||||||
- go test ./memorylocker
|
- go test ./memorylocker
|
||||||
- go test ./consullocker
|
- go test ./consullocker
|
||||||
- go test ./s3store
|
- go test ./s3store
|
||||||
|
|
||||||
|
- go vet ./prometheuscollector
|
||||||
|
|
88
metrics.go
88
metrics.go
|
@ -1,6 +1,9 @@
|
||||||
package tusd
|
package tusd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
|
"net"
|
||||||
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -13,7 +16,7 @@ type Metrics struct {
|
||||||
// RequestTotal counts the number of incoming requests per method
|
// RequestTotal counts the number of incoming requests per method
|
||||||
RequestsTotal map[string]*uint64
|
RequestsTotal map[string]*uint64
|
||||||
// ErrorsTotal counts the number of returned errors by their message
|
// ErrorsTotal counts the number of returned errors by their message
|
||||||
ErrorsTotal map[string]*uint64
|
ErrorsTotal *ErrorsTotalMap
|
||||||
BytesReceived *uint64
|
BytesReceived *uint64
|
||||||
UploadsFinished *uint64
|
UploadsFinished *uint64
|
||||||
UploadsCreated *uint64
|
UploadsCreated *uint64
|
||||||
|
@ -29,16 +32,9 @@ func (m Metrics) incRequestsTotal(method string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// incErrorsTotal increases the counter for this error atomically by one.
|
// incErrorsTotal increases the counter for this error atomically by one.
|
||||||
func (m Metrics) incErrorsTotal(err error) {
|
func (m Metrics) incErrorsTotal(err HTTPError) {
|
||||||
msg := err.Error()
|
ptr := m.ErrorsTotal.retrievePointerFor(err)
|
||||||
|
atomic.AddUint64(ptr, 1)
|
||||||
if addr, ok := m.ErrorsTotal[msg]; ok {
|
|
||||||
atomic.AddUint64(addr, 1)
|
|
||||||
} else {
|
|
||||||
addr := new(uint64)
|
|
||||||
*addr = 1
|
|
||||||
m.ErrorsTotal[msg] = addr
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// incBytesReceived increases the number of received bytes atomically be the
|
// incBytesReceived increases the number of received bytes atomically be the
|
||||||
|
@ -80,7 +76,73 @@ func newMetrics() Metrics {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newErrorsTotalMap() map[string]*uint64 {
|
// ErrorsTotalMap stores the counters for the different HTTP errors.
|
||||||
m := make(map[string]*uint64, 20)
|
type ErrorsTotalMap struct {
|
||||||
|
lock sync.RWMutex
|
||||||
|
counter map[simpleHTTPError]*uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
type simpleHTTPError struct {
|
||||||
|
Message string
|
||||||
|
StatusCode int
|
||||||
|
}
|
||||||
|
|
||||||
|
func simplifyHTTPError(err HTTPError) simpleHTTPError {
|
||||||
|
var msg string
|
||||||
|
// Errors for read timeouts contain too much information which is not
|
||||||
|
// necessary for us and makes grouping for the metrics harder. The error
|
||||||
|
// message looks like: read tcp 127.0.0.1:1080->127.0.0.1:53673: i/o timeout
|
||||||
|
// Therefore, we use a common error message for all of them.
|
||||||
|
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
|
||||||
|
msg = "read tcp: i/o timeout"
|
||||||
|
} else {
|
||||||
|
msg = err.Error()
|
||||||
|
}
|
||||||
|
return simpleHTTPError{
|
||||||
|
Message: msg,
|
||||||
|
StatusCode: err.StatusCode(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newErrorsTotalMap() *ErrorsTotalMap {
|
||||||
|
m := make(map[simpleHTTPError]*uint64, 20)
|
||||||
|
return &ErrorsTotalMap{
|
||||||
|
counter: m,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// retrievePointerFor returns (after creating it if necessary) the pointer to
|
||||||
|
// the counter for the error.
|
||||||
|
func (e *ErrorsTotalMap) retrievePointerFor(err HTTPError) *uint64 {
|
||||||
|
serr := simplifyHTTPError(err)
|
||||||
|
e.lock.RLock()
|
||||||
|
ptr, ok := e.counter[serr]
|
||||||
|
e.lock.RUnlock()
|
||||||
|
if ok {
|
||||||
|
return ptr
|
||||||
|
}
|
||||||
|
|
||||||
|
// For pointer creation, a write-lock is required
|
||||||
|
e.lock.Lock()
|
||||||
|
// We ensure that the pointer wasn't created in the meantime
|
||||||
|
if ptr, ok = e.counter[serr]; !ok {
|
||||||
|
ptr = new(uint64)
|
||||||
|
e.counter[serr] = ptr
|
||||||
|
}
|
||||||
|
e.lock.Unlock()
|
||||||
|
|
||||||
|
return ptr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load retrieves the map of the counter pointers atomically
|
||||||
|
func (e *ErrorsTotalMap) Load() map[HTTPError]*uint64 {
|
||||||
|
m := make(map[HTTPError]*uint64, len(e.counter))
|
||||||
|
e.lock.RLock()
|
||||||
|
for err, ptr := range e.counter {
|
||||||
|
httpErr := NewHTTPError(errors.New(err.Message), err.StatusCode)
|
||||||
|
m[httpErr] = ptr
|
||||||
|
}
|
||||||
|
e.lock.RUnlock()
|
||||||
|
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
package prometheuscollector
|
package prometheuscollector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strconv"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
"github.com/tus/tusd"
|
"github.com/tus/tusd"
|
||||||
|
@ -23,8 +24,8 @@ var (
|
||||||
[]string{"method"}, nil)
|
[]string{"method"}, nil)
|
||||||
errorsTotalDesc = prometheus.NewDesc(
|
errorsTotalDesc = prometheus.NewDesc(
|
||||||
"tusd_errors_total",
|
"tusd_errors_total",
|
||||||
"Total number of erorrs per cause.",
|
"Total number of errors per status.",
|
||||||
[]string{"cause"}, nil)
|
[]string{"status", "message"}, nil)
|
||||||
bytesReceivedDesc = prometheus.NewDesc(
|
bytesReceivedDesc = prometheus.NewDesc(
|
||||||
"tusd_bytes_received",
|
"tusd_bytes_received",
|
||||||
"Number of bytes received for uploads.",
|
"Number of bytes received for uploads.",
|
||||||
|
@ -39,7 +40,7 @@ var (
|
||||||
nil, nil)
|
nil, nil)
|
||||||
uploadsTerminatedDesc = prometheus.NewDesc(
|
uploadsTerminatedDesc = prometheus.NewDesc(
|
||||||
"tusd_uploads_terminated",
|
"tusd_uploads_terminated",
|
||||||
"Number of terminted uploads.",
|
"Number of terminated uploads.",
|
||||||
nil, nil)
|
nil, nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -73,12 +74,13 @@ func (c Collector) Collect(metrics chan<- prometheus.Metric) {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
for error, valuePtr := range c.metrics.ErrorsTotal {
|
for httpError, valuePtr := range c.metrics.ErrorsTotal.Load() {
|
||||||
metrics <- prometheus.MustNewConstMetric(
|
metrics <- prometheus.MustNewConstMetric(
|
||||||
errorsTotalDesc,
|
errorsTotalDesc,
|
||||||
prometheus.GaugeValue,
|
prometheus.GaugeValue,
|
||||||
float64(atomic.LoadUint64(valuePtr)),
|
float64(atomic.LoadUint64(valuePtr)),
|
||||||
error,
|
strconv.Itoa(httpError.StatusCode()),
|
||||||
|
httpError.Error(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
@ -612,17 +611,9 @@ func (handler *UnroutedHandler) sendError(w http.ResponseWriter, r *http.Request
|
||||||
err = ErrNotFound
|
err = ErrNotFound
|
||||||
}
|
}
|
||||||
|
|
||||||
// Errors for read timeouts contain too much information which is not
|
statusErr, ok := err.(HTTPError)
|
||||||
// necessary for us and makes grouping for the metrics harder. The error
|
if !ok {
|
||||||
// message looks like: read tcp 127.0.0.1:1080->127.0.0.1:53673: i/o timeout
|
statusErr = NewHTTPError(err, http.StatusInternalServerError)
|
||||||
// Therefore, we use a common error message for all of them.
|
|
||||||
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
|
|
||||||
err = errors.New("read tcp: i/o timeout")
|
|
||||||
}
|
|
||||||
|
|
||||||
status := 500
|
|
||||||
if statusErr, ok := err.(HTTPError); ok {
|
|
||||||
status = statusErr.StatusCode()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
reason := err.Error() + "\n"
|
reason := err.Error() + "\n"
|
||||||
|
@ -632,12 +623,12 @@ func (handler *UnroutedHandler) sendError(w http.ResponseWriter, r *http.Request
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||||
w.Header().Set("Content-Length", strconv.Itoa(len(reason)))
|
w.Header().Set("Content-Length", strconv.Itoa(len(reason)))
|
||||||
w.WriteHeader(status)
|
w.WriteHeader(statusErr.StatusCode())
|
||||||
w.Write([]byte(reason))
|
w.Write([]byte(reason))
|
||||||
|
|
||||||
handler.log("ResponseOutgoing", "status", strconv.Itoa(status), "method", r.Method, "path", r.URL.Path, "error", err.Error())
|
handler.log("ResponseOutgoing", "status", strconv.Itoa(statusErr.StatusCode()), "method", r.Method, "path", r.URL.Path, "error", err.Error())
|
||||||
|
|
||||||
go handler.Metrics.incErrorsTotal(err)
|
go handler.Metrics.incErrorsTotal(statusErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// sendResp writes the header to w with the specified status code.
|
// sendResp writes the header to w with the specified status code.
|
||||||
|
|
Loading…
Reference in New Issue