From a3435fde97de91e9522d8c6af4cc070f95dc9782 Mon Sep 17 00:00:00 2001 From: Brian Dittmer Date: Sat, 3 Oct 2020 14:28:28 -0400 Subject: [PATCH 1/3] Introduce `metric.Collector` interface This commit introduces interfaces for collecting metrics. It allows users to implement their own custom metric collection system instead of the default Prometheus implementation. --- cache/disk/disk.go | 26 +++++++++-------- cache/disk/lru.go | 32 ++++++++++----------- cache/gcsproxy/gcsproxy.go | 5 ++-- cache/httpproxy/httpproxy.go | 24 ++++++++-------- cache/s3proxy/s3proxy.go | 23 ++++++++------- main.go | 37 +++++++++--------------- metric/collector.go | 34 ++++++++++++++++++++++ metric/prometheus/prometheus.go | 51 +++++++++++++++++++++++++++++++++ 8 files changed, 155 insertions(+), 77 deletions(-) create mode 100644 metric/collector.go create mode 100644 metric/prometheus/prometheus.go diff --git a/cache/disk/disk.go b/cache/disk/disk.go index 40c4a6a7c..6d46b3874 100644 --- a/cache/disk/disk.go +++ b/cache/disk/disk.go @@ -17,25 +17,18 @@ import ( "syscall" "github.com/buchgr/bazel-remote/cache" + "github.com/buchgr/bazel-remote/metric" "github.com/buchgr/bazel-remote/utils/tempfile" "github.com/djherbis/atime" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" pb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" "github.com/golang/protobuf/proto" ) var ( - cacheHits = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_disk_cache_hits", - Help: "The total number of disk backend cache hits", - }) - cacheMisses = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_disk_cache_misses", - Help: "The total number of disk backend cache misses", - }) + cacheHits metric.Counter + cacheMisses metric.Counter ) var tfc = tempfile.NewCreator() @@ -70,7 +63,7 @@ const emptySha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b785 // New returns a new instance of a filesystem-based cache rooted at `dir`, // with a maximum size of `maxSizeBytes` bytes and an optional backend `proxy`. // Cache is safe for concurrent use. -func New(dir string, maxSizeBytes int64, proxy cache.Proxy) *Cache { +func New(dir string, maxSizeBytes int64, proxy cache.Proxy, collector metric.Collector) *Cache { // Create the directory structure. hexLetters := []byte("0123456789abcdef") for _, c1 := range hexLetters { @@ -105,7 +98,7 @@ func New(dir string, maxSizeBytes int64, proxy cache.Proxy) *Cache { c := &Cache{ dir: filepath.Clean(dir), proxy: proxy, - lru: NewSizedLRU(maxSizeBytes, onEvict), + lru: NewSizedLRU(maxSizeBytes, onEvict, collector), } err := c.migrateDirectories() @@ -118,6 +111,15 @@ func New(dir string, maxSizeBytes int64, proxy cache.Proxy) *Cache { log.Fatalf("Loading of existing cache entries failed due to error: %v", err) } + // Setup metrics + if collector != nil { + cacheHits = collector.NewCounter("bazel_remote_disk_cache_hits") + cacheMisses = collector.NewCounter("bazel_remote_disk_cache_misses") + } else { + cacheHits = metric.NoOpCounter() + cacheMisses = metric.NoOpCounter() + } + return c } diff --git a/cache/disk/lru.go b/cache/disk/lru.go index f63b1cd22..d0e4d17dc 100644 --- a/cache/disk/lru.go +++ b/cache/disk/lru.go @@ -5,25 +5,13 @@ import ( "errors" "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/buchgr/bazel-remote/metric" ) var ( - gaugeCacheSizeBytes = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "bazel_remote_disk_cache_size_bytes", - Help: "The current number of bytes in the disk backend", - }) - - counterEvictedBytes = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_disk_cache_evicted_bytes_total", - Help: "The total number of bytes evicted from disk backend, due to full cache", - }) - - counterOverwrittenBytes = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_disk_cache_overwritten_bytes_total", - Help: "The total number of bytes removed from disk backend, due to put of already existing key", - }) + gaugeCacheSizeBytes metric.Gauge + counterEvictedBytes metric.Counter + counterOverwrittenBytes metric.Counter ) type sizedItem interface { @@ -84,7 +72,17 @@ type entry struct { } // NewSizedLRU returns a new sizedLRU cache -func NewSizedLRU(maxSize int64, onEvict EvictCallback) SizedLRU { +func NewSizedLRU(maxSize int64, onEvict EvictCallback, collector metric.Collector) SizedLRU { + if collector != nil { + gaugeCacheSizeBytes = collector.NewGuage("bazel_remote_disk_cache_size_bytes") + counterEvictedBytes = collector.NewCounter("bazel_remote_disk_cache_evicted_bytes_total") + counterOverwrittenBytes = collector.NewCounter("bazel_remote_disk_cache_overwritten_bytes_total") + } else { + gaugeCacheSizeBytes = metric.NoOpGauge() + counterEvictedBytes = metric.NoOpCounter() + counterOverwrittenBytes = metric.NoOpCounter() + } + return &sizedLRU{ maxSize: maxSize, ll: list.New(), diff --git a/cache/gcsproxy/gcsproxy.go b/cache/gcsproxy/gcsproxy.go index 59275b2e7..e985e78e6 100644 --- a/cache/gcsproxy/gcsproxy.go +++ b/cache/gcsproxy/gcsproxy.go @@ -10,6 +10,7 @@ import ( "github.com/buchgr/bazel-remote/cache" "github.com/buchgr/bazel-remote/cache/httpproxy" + "github.com/buchgr/bazel-remote/metric" "golang.org/x/oauth2" "golang.org/x/oauth2/google" @@ -17,7 +18,7 @@ import ( // New creates a cache that proxies requests to Google Cloud Storage. func New(bucket string, useDefaultCredentials bool, jsonCredentialsFile string, - accessLogger cache.Logger, errorLogger cache.Logger, numUploaders, maxQueuedUploads int) (cache.Proxy, error) { + accessLogger cache.Logger, errorLogger cache.Logger, numUploaders, maxQueuedUploads int, collector metric.Collector) (cache.Proxy, error) { var remoteClient *http.Client var err error @@ -53,5 +54,5 @@ func New(bucket string, useDefaultCredentials bool, jsonCredentialsFile string, Path: bucket, } - return httpproxy.New(&baseURL, remoteClient, accessLogger, errorLogger, numUploaders, maxQueuedUploads), nil + return httpproxy.New(&baseURL, remoteClient, accessLogger, errorLogger, numUploaders, maxQueuedUploads, collector), nil } diff --git a/cache/httpproxy/httpproxy.go b/cache/httpproxy/httpproxy.go index d640a2239..4b08b1f10 100644 --- a/cache/httpproxy/httpproxy.go +++ b/cache/httpproxy/httpproxy.go @@ -12,9 +12,7 @@ import ( "strconv" "github.com/buchgr/bazel-remote/cache" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/buchgr/bazel-remote/metric" ) type uploadReq struct { @@ -33,14 +31,8 @@ type remoteHTTPProxyCache struct { } var ( - cacheHits = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_http_cache_hits", - Help: "The total number of HTTP backend cache hits", - }) - cacheMisses = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_http_cache_misses", - Help: "The total number of HTTP backend cache misses", - }) + cacheHits metric.Counter + cacheMisses metric.Counter ) func uploadFile(remote *http.Client, baseURL *url.URL, accessLogger cache.Logger, @@ -84,7 +76,15 @@ func uploadFile(remote *http.Client, baseURL *url.URL, accessLogger cache.Logger // New creates a cache that proxies requests to a HTTP remote cache. func New(baseURL *url.URL, remote *http.Client, accessLogger cache.Logger, - errorLogger cache.Logger, numUploaders, maxQueuedUploads int) cache.Proxy { + errorLogger cache.Logger, numUploaders, maxQueuedUploads int, collector metric.Collector) cache.Proxy { + + if collector != nil { + cacheHits = collector.NewCounter("bazel_remote_http_cache_hits") + cacheMisses = collector.NewCounter("bazel_remote_http_cache_misses") + } else { + cacheHits = metric.NoOpCounter() + cacheMisses = metric.NoOpCounter() + } proxy := &remoteHTTPProxyCache{ remote: remote, diff --git a/cache/s3proxy/s3proxy.go b/cache/s3proxy/s3proxy.go index 32377a5fc..37d57b7cc 100644 --- a/cache/s3proxy/s3proxy.go +++ b/cache/s3proxy/s3proxy.go @@ -9,10 +9,9 @@ import ( "github.com/buchgr/bazel-remote/cache" "github.com/buchgr/bazel-remote/config" + "github.com/buchgr/bazel-remote/metric" "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/credentials" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" ) type uploadReq struct { @@ -33,14 +32,8 @@ type s3Cache struct { } var ( - cacheHits = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_s3_cache_hits", - Help: "The total number of s3 backend cache hits", - }) - cacheMisses = promauto.NewCounter(prometheus.CounterOpts{ - Name: "bazel_remote_s3_cache_misses", - Help: "The total number of s3 backend cache misses", - }) + cacheHits metric.Counter + cacheMisses metric.Counter ) // Used in place of minio's verbose "NoSuchKey" error. @@ -48,10 +41,18 @@ var errNotFound = errors.New("NOT FOUND") // New returns a new instance of the S3-API based cache func New(s3Config *config.S3CloudStorageConfig, accessLogger cache.Logger, - errorLogger cache.Logger, numUploaders, maxQueuedUploads int) cache.Proxy { + errorLogger cache.Logger, numUploaders, maxQueuedUploads int, collector metric.Collector) cache.Proxy { fmt.Println("Using S3 backend.") + if collector != nil { + cacheHits = collector.NewCounter("bazel_remote_http_cache_hits") + cacheMisses = collector.NewCounter("bazel_remote_http_cache_misses") + } else { + cacheHits = metric.NoOpCounter() + cacheMisses = metric.NoOpCounter() + } + var minioCore *minio.Core var err error diff --git a/main.go b/main.go index b055cc946..a00cc9da1 100644 --- a/main.go +++ b/main.go @@ -16,6 +16,8 @@ import ( "github.com/buchgr/bazel-remote/cache/disk" "github.com/buchgr/bazel-remote/cache/gcsproxy" "github.com/buchgr/bazel-remote/cache/s3proxy" + "github.com/buchgr/bazel-remote/metric" + "github.com/buchgr/bazel-remote/metric/prometheus" "github.com/buchgr/bazel-remote/cache/httpproxy" @@ -25,10 +27,6 @@ import ( "github.com/buchgr/bazel-remote/utils/rlimit" grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - httpmetrics "github.com/slok/go-http-metrics/metrics/prometheus" - middleware "github.com/slok/go-http-metrics/middleware" - middlewarestd "github.com/slok/go-http-metrics/middleware/std" "github.com/urfave/cli/v2" "google.golang.org/grpc" @@ -43,9 +41,6 @@ const ( // is set through linker options. var gitCommit string -// durationBuckets is the buckets used for Prometheus histograms in seconds. -var durationBuckets = []float64{.5, 1, 2.5, 5, 10, 20, 40, 80, 160, 320} - func main() { log.SetFlags(logFlags) @@ -312,11 +307,16 @@ func main() { accessLogger := log.New(os.Stdout, "", logFlags) errorLogger := log.New(os.Stderr, "", logFlags) + var metricCollector metric.Collector + if c.EnableEndpointMetrics { + metricCollector = prometheus.NewCollector() + } + var proxyCache cache.Proxy if c.GoogleCloudStorage != nil { proxyCache, err = gcsproxy.New(c.GoogleCloudStorage.Bucket, c.GoogleCloudStorage.UseDefaultCredentials, c.GoogleCloudStorage.JSONCredentialsFile, - accessLogger, errorLogger, c.NumUploaders, c.MaxQueuedUploads) + accessLogger, errorLogger, c.NumUploaders, c.MaxQueuedUploads, metricCollector) if err != nil { log.Fatal(err) } @@ -328,12 +328,12 @@ func main() { log.Fatal(err) } proxyCache = httpproxy.New(baseURL, - httpClient, accessLogger, errorLogger, c.NumUploaders, c.MaxQueuedUploads) + httpClient, accessLogger, errorLogger, c.NumUploaders, c.MaxQueuedUploads, metricCollector) } else if c.S3CloudStorage != nil { - proxyCache = s3proxy.New(c.S3CloudStorage, accessLogger, errorLogger, c.NumUploaders, c.MaxQueuedUploads) + proxyCache = s3proxy.New(c.S3CloudStorage, accessLogger, errorLogger, c.NumUploaders, c.MaxQueuedUploads, metricCollector) } - diskCache := disk.New(c.Dir, int64(c.MaxSize)*1024*1024*1024, proxyCache) + diskCache := disk.New(c.Dir, int64(c.MaxSize)*1024*1024*1024, proxyCache, metricCollector) mux := http.NewServeMux() httpServer := &http.Server{ @@ -365,17 +365,8 @@ func main() { } log.Println("Mangling non-empty instance names with AC keys:", acKeyManglingStatus) - if c.EnableEndpointMetrics { - metricsMdlw := middleware.New(middleware.Config{ - Recorder: httpmetrics.NewRecorder(httpmetrics.Config{ - DurationBuckets: durationBuckets, - }), - }) - mux.Handle("/metrics", middlewarestd.Handler("metrics", metricsMdlw, promhttp.Handler())) - mux.Handle("/status", middlewarestd.Handler("status", metricsMdlw, http.HandlerFunc(h.StatusPageHandler))) - mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - middlewarestd.Handler(r.Method, metricsMdlw, http.HandlerFunc(cacheHandler)).ServeHTTP(w, r) - }) + if metricCollector != nil { + prometheus.WrapEndpoints(mux, cacheHandler, h.StatusPageHandler) } else { mux.HandleFunc("/status", h.StatusPageHandler) mux.HandleFunc("/", cacheHandler) @@ -397,7 +388,7 @@ func main() { if c.EnableEndpointMetrics { streamInterceptors = append(streamInterceptors, grpc_prometheus.StreamServerInterceptor) unaryInterceptors = append(unaryInterceptors, grpc_prometheus.UnaryServerInterceptor) - grpc_prometheus.EnableHandlingTimeHistogram(grpc_prometheus.WithHistogramBuckets(durationBuckets)) + grpc_prometheus.EnableHandlingTimeHistogram(grpc_prometheus.WithHistogramBuckets([]float64{.5, 1, 2.5, 5, 10, 20, 40, 80, 160, 320})) } if len(c.TLSCertFile) > 0 && len(c.TLSKeyFile) > 0 { diff --git a/metric/collector.go b/metric/collector.go new file mode 100644 index 000000000..6055b25c1 --- /dev/null +++ b/metric/collector.go @@ -0,0 +1,34 @@ +package metric + +// Counter is a standard metric counter +type Counter interface { + Inc() + Add(value float64) +} + +type noop struct{} + +func (c *noop) Inc() {} +func (c *noop) Set(v float64) {} +func (c *noop) Add(value float64) {} + +// NoOpCounter is a Counter that does nothing +func NoOpCounter() Counter { + return &noop{} +} + +// Gauge is a standard metric gauge +type Gauge interface { + Set(value float64) +} + +// NoOpGauge is a Gauge that does nothing +func NoOpGauge() Gauge { + return &noop{} +} + +// Collector is an interface for creating metrics +type Collector interface { + NewCounter(name string) Counter + NewGuage(name string) Gauge +} diff --git a/metric/prometheus/prometheus.go b/metric/prometheus/prometheus.go new file mode 100644 index 000000000..794b5597c --- /dev/null +++ b/metric/prometheus/prometheus.go @@ -0,0 +1,51 @@ +package prometheus + +import ( + "net/http" + + "github.com/buchgr/bazel-remote/metric" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/promhttp" + httpmetrics "github.com/slok/go-http-metrics/metrics/prometheus" + "github.com/slok/go-http-metrics/middleware" + middlewarestd "github.com/slok/go-http-metrics/middleware/std" +) + +// durationBuckets is the buckets used for Prometheus histograms in seconds. +var durationBuckets = []float64{.5, 1, 2.5, 5, 10, 20, 40, 80, 160, 320} + +// NewCollector returns a prometheus backed collector +func NewCollector() metric.Collector { + return &collector{} +} + +// WrapEndpoints attaches the prometheus metrics endpoints to a mux +func WrapEndpoints(mux *http.ServeMux, cache http.HandlerFunc, status http.HandlerFunc) { + metricsMdlw := middleware.New(middleware.Config{ + Recorder: httpmetrics.NewRecorder(httpmetrics.Config{ + DurationBuckets: durationBuckets, + }), + }) + mux.Handle("/metrics", middlewarestd.Handler("metrics", metricsMdlw, promhttp.Handler())) + mux.Handle("/status", middlewarestd.Handler("status", metricsMdlw, http.HandlerFunc(status))) + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + middlewarestd.Handler(r.Method, metricsMdlw, http.HandlerFunc(cache)).ServeHTTP(w, r) + }) +} + +type collector struct{} + +func (c *collector) NewCounter(name string) metric.Counter { + return promauto.NewCounter(prometheus.CounterOpts{ + Name: name, + Help: "The total number of disk backend cache hits", + }) +} + +func (c *collector) NewGuage(name string) metric.Gauge { + return promauto.NewGauge(prometheus.GaugeOpts{ + Name: "bazel_remote_disk_cache_size_bytes", + Help: "The current number of bytes in the disk backend", + }) +} From 94d39cb113a3cd896c7a772f829faf0da8e1cf22 Mon Sep 17 00:00:00 2001 From: Brian Dittmer Date: Sat, 3 Oct 2020 14:59:33 -0400 Subject: [PATCH 2/3] Add a mapping between metric names and their help messages. --- metric/prometheus/prometheus.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/metric/prometheus/prometheus.go b/metric/prometheus/prometheus.go index 794b5597c..f08bfc967 100644 --- a/metric/prometheus/prometheus.go +++ b/metric/prometheus/prometheus.go @@ -15,6 +15,19 @@ import ( // durationBuckets is the buckets used for Prometheus histograms in seconds. var durationBuckets = []float64{.5, 1, 2.5, 5, 10, 20, 40, 80, 160, 320} +// map metric names to their help message +var help = map[string]string{ + "bazel_remote_disk_cache_hits": "The total number of disk backend cache hits", + "bazel_remote_disk_cache_misses": "The total number of disk backend cache misses", + "bazel_remote_disk_cache_size_bytes": "The current number of bytes in the disk backend", + "bazel_remote_disk_cache_evicted_bytes_total": "The total number of bytes evicted from disk backend, due to full cache", + "bazel_remote_disk_cache_overwritten_bytes_total": "The total number of bytes removed from disk backend, due to put of already existing key", + "bazel_remote_http_cache_hits": "The total number of HTTP backend cache hits", + "bazel_remote_http_cache_misses": "The total number of HTTP backend cache misses", + "bazel_remote_s3_cache_hits": "The total number of s3 backend cache hits", + "bazel_remote_s3_cache_misses": "The total number of s3 backend cache misses", +} + // NewCollector returns a prometheus backed collector func NewCollector() metric.Collector { return &collector{} @@ -39,13 +52,13 @@ type collector struct{} func (c *collector) NewCounter(name string) metric.Counter { return promauto.NewCounter(prometheus.CounterOpts{ Name: name, - Help: "The total number of disk backend cache hits", + Help: help["name"], }) } func (c *collector) NewGuage(name string) metric.Gauge { return promauto.NewGauge(prometheus.GaugeOpts{ Name: "bazel_remote_disk_cache_size_bytes", - Help: "The current number of bytes in the disk backend", + Help: help["name"], }) } From 13364dc51ac76d9a6aee4b58c1c865d08202cc97 Mon Sep 17 00:00:00 2001 From: Brian Dittmer Date: Sat, 3 Oct 2020 15:10:10 -0400 Subject: [PATCH 3/3] Fix silly typo --- metric/prometheus/prometheus.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric/prometheus/prometheus.go b/metric/prometheus/prometheus.go index f08bfc967..ee086db64 100644 --- a/metric/prometheus/prometheus.go +++ b/metric/prometheus/prometheus.go @@ -52,13 +52,13 @@ type collector struct{} func (c *collector) NewCounter(name string) metric.Counter { return promauto.NewCounter(prometheus.CounterOpts{ Name: name, - Help: help["name"], + Help: help[name], }) } func (c *collector) NewGuage(name string) metric.Gauge { return promauto.NewGauge(prometheus.GaugeOpts{ Name: "bazel_remote_disk_cache_size_bytes", - Help: help["name"], + Help: help[name], }) }