Skip to content

Commit

Permalink
feat: add agent injector telemetry (#703)
Browse files Browse the repository at this point in the history
* feat: add agent injector telemetry

Add Prometheus metrics to monitor the Agent Injector's performance. New
metrics include a gauge of current requests being processed by the
webhook, a summary of request processing times, and a count of
successful and failed injections by Kubernetes namespace.

Successful injections are broken down by injection type. The `injection_type`
label can assume the value `init_only` for injections with only an
initContainer (no sidecar) and `sidecar` for all other cases (sidecar
only or sidecar + initContainer).

Fixes AG-005161.

* refactor(metrics): add metadata to mutate response

Update the `Mutate()` method to return a struct that extends the
existing return data (AdmissionResponse) with metadata on the types of
Vault Agent injections made. The metadata informs the count of
injections by namespace, which are now further broken down by type of
injection.
  • Loading branch information
gsantos-hc authored Dec 2, 2024
1 parent f40afeb commit b05b4ca
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 15 deletions.
66 changes: 52 additions & 14 deletions agent-inject/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"io"
"net/http"
"strings"
"time"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault-k8s/agent-inject/agent"
Expand Down Expand Up @@ -85,6 +86,14 @@ type Handler struct {
func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
h.Log.Info("Request received", "Method", r.Method, "URL", r.URL)

// Measure request processing duration and monitor request queue
requestQueue.Inc()
requestStart := time.Now()
defer func() {
requestProcessingTime.Observe(float64(time.Since(requestStart).Milliseconds()))
requestQueue.Dec()
}()

if ct := r.Header.Get("Content-Type"); ct != "application/json" {
msg := fmt.Sprintf("Invalid content-type: %q", ct)
http.Error(w, msg, http.StatusBadRequest)
Expand All @@ -109,7 +118,10 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
return
}

var admResp admissionv1.AdmissionReview
var (
mutateResp MutateResponse
admResp admissionv1.AdmissionReview
)

// Both v1 and v1beta1 AdmissionReview types are exactly the same, so the v1beta1 type can
// be decoded into the v1 type. However the runtime codec's decoder guesses which type to
Expand All @@ -126,7 +138,8 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
h.Log.Error("error on request", "Error", msg, "Code", http.StatusInternalServerError)
return
} else {
admResp.Response = h.Mutate(admReq.Request)
mutateResp = h.Mutate(admReq.Request)
admResp.Response = mutateResp.Resp
}

// Default to a v1 AdmissionReview, otherwise the API server may not recognize the request
Expand All @@ -142,26 +155,43 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
msg := fmt.Sprintf("error marshalling admission response: %s", err)
http.Error(w, msg, http.StatusInternalServerError)
h.Log.Error("error on request", "Error", msg, "Code", http.StatusInternalServerError)
incrementInjectionFailures(admReq.Request.Namespace)
return
}

if _, err := w.Write(resp); err != nil {
h.Log.Error("error writing response", "Error", err)
incrementInjectionFailures(admReq.Request.Namespace)
return
}

if admResp.Response.Allowed {
incrementInjections(admReq.Request.Namespace, mutateResp)
} else {
incrementInjectionFailures(admReq.Request.Namespace)
}
}

type MutateResponse struct {
Resp *admissionv1.AdmissionResponse
InjectedInit bool
InjectedSidecar bool
}

// Mutate takes an admission request and performs mutation if necessary,
// returning the final API response.
func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.AdmissionResponse {
func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) MutateResponse {
// Decode the pod from the request
var pod corev1.Pod
if err := json.Unmarshal(req.Object.Raw, &pod); err != nil {
h.Log.Error("could not unmarshal request to pod: %s", err)
h.Log.Debug("%s", req.Object.Raw)
return &admissionv1.AdmissionResponse{
UID: req.UID,
Result: &metav1.Status{
Message: err.Error(),
return MutateResponse{
Resp: &admissionv1.AdmissionResponse{
UID: req.UID,
Result: &metav1.Status{
Message: err.Error(),
},
},
}
}
Expand All @@ -178,7 +208,9 @@ func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.Admissi
err := fmt.Errorf("error checking if should inject agent: %s", err)
return admissionError(req.UID, err)
} else if !inject {
return resp
return MutateResponse{
Resp: resp,
}
}

h.Log.Debug("checking namespaces..")
Expand Down Expand Up @@ -249,14 +281,20 @@ func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.Admissi
patchType := admissionv1.PatchTypeJSONPatch
resp.PatchType = &patchType

return resp
return MutateResponse{
Resp: resp,
InjectedInit: agentSidecar.PrePopulate,
InjectedSidecar: !agentSidecar.PrePopulateOnly,
}
}

func admissionError(UID types.UID, err error) *admissionv1.AdmissionResponse {
return &admissionv1.AdmissionResponse{
UID: UID,
Result: &metav1.Status{
Message: err.Error(),
func admissionError(UID types.UID, err error) MutateResponse {
return MutateResponse{
Resp: &admissionv1.AdmissionResponse{
UID: UID,
Result: &metav1.Status{
Message: err.Error(),
},
},
}
}
Expand Down
2 changes: 1 addition & 1 deletion agent-inject/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ func TestHandlerHandle(t *testing.T) {
for _, tt := range cases {
t.Run(tt.Name, func(t *testing.T) {
req := require.New(t)
resp := tt.Handler.Mutate(&tt.Req)
resp := (tt.Handler.Mutate(&tt.Req)).Resp
if (tt.Err == "") != resp.Allowed {
t.Fatalf("allowed: %v, expected err: %v", resp.Allowed, tt.Err)
}
Expand Down
77 changes: 77 additions & 0 deletions agent-inject/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package agent_inject

import (
"github.com/prometheus/client_golang/prometheus"
)

const (
metricsNamespace = "vault"
metricsSubsystem = "agent_injector"
metricsLabelNamespace = "namespace"
metricsLabelType = "injection_type"
metricsLabelTypeBoth = "init_and_sidecar"
metricsLabelTypeInitOnly = "init_only"
metricsLabelTypeSidecarOnly = "sidecar_only"
)

var (
requestQueue = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "request_queue_length",
Help: "Count of webhook requests in the injector's queue",
})

requestProcessingTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "request_processing_duration_ms",
Help: "Webhook request processing times in milliseconds",
Buckets: []float64{5, 10, 25, 50, 75, 100, 250, 500, 1000, 2500, 5000, 7500, 10000},
})

injectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "injections_by_namespace_total",
Help: "Total count of Agent Sidecar injections by namespace",
}, []string{metricsLabelNamespace, metricsLabelType})

failedInjectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "failed_injections_by_namespace_total",
Help: "Total count of failed Agent Sidecar injections by namespace",
}, []string{metricsLabelNamespace})
)

func incrementInjections(namespace string, res MutateResponse) {
// Injection type can be one of: init_and_sidecar (default); init_only; or sidecar_only
typeLabel := metricsLabelTypeBoth
if res.InjectedInit && !res.InjectedSidecar {
typeLabel = metricsLabelTypeInitOnly
} else if res.InjectedSidecar && !res.InjectedInit {
typeLabel = metricsLabelTypeSidecarOnly
}

injectionsByNamespace.With(prometheus.Labels{
metricsLabelNamespace: namespace,
metricsLabelType: typeLabel,
}).Inc()
}

func incrementInjectionFailures(namespace string) {
failedInjectionsByNamespace.With(prometheus.Labels{metricsLabelNamespace: namespace}).Inc()
}

func MustRegisterInjectorMetrics(registry prometheus.Registerer) {
registry.MustRegister(
requestQueue,
requestProcessingTime,
injectionsByNamespace,
failedInjectionsByNamespace,
)
}
2 changes: 2 additions & 0 deletions subcommand/injector/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/hashicorp/vault-k8s/leader"
"github.com/hashicorp/vault-k8s/version"
"github.com/mitchellh/cli"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
adminv1 "k8s.io/api/admissionregistration/v1"
adminv1beta "k8s.io/api/admissionregistration/v1beta1"
Expand Down Expand Up @@ -231,6 +232,7 @@ func (c *Command) Run(args []string) int {

// Registering path to expose metrics
if c.flagTelemetryPath != "" {
agentInject.MustRegisterInjectorMetrics(prometheus.DefaultRegisterer)
c.UI.Info(fmt.Sprintf("Registering telemetry path on %q", c.flagTelemetryPath))
mux.Handle(c.flagTelemetryPath, promhttp.Handler())
}
Expand Down

0 comments on commit b05b4ca

Please sign in to comment.