Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ffresty] [metric] HTTP Response Time and Complete Gauge Support #160

Merged
merged 18 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ coverage.txt
**/debug.test
.DS_Store
__debug*
.vscode/*.log
.vscode/*.log
*.iml
.idea/
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ coverage.html:
$(VGO) tool cover -html=coverage.txt
coverage: test coverage.html
lint: ${LINT}
GOGC=20 $(LINT) run -v --timeout 5m
GOGC=20 $(LINT) run -v --timeout 5m --fast --allow-parallel-runners
${MOCKERY}:
$(VGO) install github.com/vektra/mockery/v2@latest
${LINT}:
Expand Down
68 changes: 47 additions & 21 deletions pkg/ffresty/ffresty.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright © 2024 Kaleido, Inc.
// Copyright © 2025 Kaleido, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
Expand Down Expand Up @@ -40,7 +40,14 @@ import (
"golang.org/x/time/rate"
)

const (
metricsHTTPResponsesTotal = "http_responses_total"
metricsNetworkErrorsTotal = "network_errors_total"
metricsHTTPResponseTime = "http_response_time_seconds"
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some size metrics for data transmitted would be useful to add as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Chengxuan still required? or follow up PR for @onelapahead

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@EnriqueL8 this is a suggestion, it should not block the merge of this PR. thanks for checking.


type retryCtxKey struct{}
type hostCtxKey struct{}

type retryCtx struct {
id string
Expand Down Expand Up @@ -98,27 +105,21 @@ func EnableClientMetrics(ctx context.Context, metricsRegistry metric.MetricsRegi
if err != nil {
return err
}
metricsManager.NewCounterMetricWithLabels(ctx, "http_response", "HTTP response", []string{"status", "error", "host", "method"}, false)
metricsManager.NewCounterMetricWithLabels(ctx, "network_error", "Network error", []string{"host", "method"}, false)
metricsManager.NewCounterMetricWithLabels(ctx, metricsHTTPResponsesTotal, "HTTP response", []string{"status", "error", "host", "method"}, false)
metricsManager.NewCounterMetricWithLabels(ctx, metricsNetworkErrorsTotal, "Network error", []string{"host", "method"}, false)
metricsManager.NewSummaryMetricWithLabels(ctx, metricsHTTPResponseTime, "HTTP response time", []string{"status", "host", "method"}, false)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@onelapahead what's your consideration between Histogram and Summary for this timing metrics?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Histogram is more costly in terms of cardinality bc of all the buckets you make. So this felt light enough to only calculate average response time across a few basic dimensions.

Would love to toggle btwn histogram and summaries based on certain settings eventually, but that was a bigger change than I had an appetite for.


// create hooks
onErrorMetricsHook := func(req *resty.Request, _ error) {
method := req.Method
u, _ := url.Parse(req.URL)
host := u.Host
// whilst there it is a possibility to get an response returned in the error here (and resty doc for OnError shows this) it seems to be a special case and the statuscode in such cases was not set.
// therefore we log all cases as network_error we may in future find reason to extract more detail from the error
metricsManager.IncCounterMetricWithLabels(ctx, "network_error", map[string]string{"host": host, "method": method}, nil)
metricsManager.IncCounterMetricWithLabels(ctx, metricsNetworkErrorsTotal, map[string]string{"host": req.Context().Value(hostCtxKey{}).(string), "method": req.Method}, nil)
}
RegisterGlobalOnError(onErrorMetricsHook)

onSuccessMetricsHook := func(_ *resty.Client, resp *resty.Response) {
method := resp.Request.Method
u, _ := url.Parse(resp.Request.URL)
host := u.Host
code := resp.RawResponse.StatusCode
metricsManager.IncCounterMetricWithLabels(ctx, "http_response", map[string]string{"status": fmt.Sprintf("%d", code), "error": "false", "host": host, "method": method}, nil)
metricsManager.IncCounterMetricWithLabels(ctx, metricsHTTPResponsesTotal, map[string]string{"status": fmt.Sprintf("%d", resp.RawResponse.StatusCode), "error": "false", "host": resp.Request.Context().Value(hostCtxKey{}).(string), "method": resp.Request.Method}, nil)
}
RegisterGlobalOnSuccess(onSuccessMetricsHook)
return nil
Expand All @@ -142,13 +143,17 @@ func OnAfterResponse(c *resty.Client, resp *resty.Response) {
}
rCtx := resp.Request.Context()
rc := rCtx.Value(retryCtxKey{}).(*retryCtx)
elapsed := float64(time.Since(rc.start)) / float64(time.Millisecond)
elapsed := time.Since(rc.start)
level := logrus.DebugLevel
status := resp.StatusCode()
if status >= 300 {
level = logrus.ErrorLevel
}
log.L(rCtx).Logf(level, "<== %s %s [%d] (%.2fms)", resp.Request.Method, resp.Request.URL, status, elapsed)
log.L(rCtx).Logf(level, "<== %s %s [%d] (%dms)", resp.Request.Method, resp.Request.URL, status, time.Since(rc.start).Milliseconds())
if metricsManager != nil {
metricsManager.ObserveSummaryMetricWithLabels(rCtx, metricsHTTPResponseTime, elapsed.Seconds(), map[string]string{"status": fmt.Sprintf("%d", status), "host": rCtx.Value(hostCtxKey{}).(string), "method": resp.Request.Method}, nil)
}
// TODO use req.TraceInfo() for richer metrics at the DNS and transport layer
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

leaving this todo here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correct, dont think its worth of an issue (unless you feel otherwise) but a good reminder for future contributors reading code that more is possible with the metrics and where to get started with it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good


func OnError(req *resty.Request, err error) {
Expand Down Expand Up @@ -193,7 +198,7 @@ func GetRateLimiter(rps, burst int) *rate.Limiter {
//
// You can use the normal Resty builder pattern, to set per-instance configuration
// as required.
func NewWithConfig(ctx context.Context, ffrestyConfig Config) (client *resty.Client) {
func NewWithConfig(ctx context.Context, ffrestyConfig Config) (client *resty.Client) { //nolint:gocyclo
if ffrestyConfig.HTTPCustomClient != nil {
if httpClient, ok := ffrestyConfig.HTTPCustomClient.(*http.Client); ok {
client = resty.NewWithClient(httpClient)
Expand Down Expand Up @@ -232,10 +237,10 @@ func NewWithConfig(ctx context.Context, ffrestyConfig Config) (client *resty.Cli

rateLimiterMap[client] = GetRateLimiter(ffrestyConfig.ThrottleRequestsPerSecond, ffrestyConfig.ThrottleBurst)

url := strings.TrimSuffix(ffrestyConfig.URL, "/")
if url != "" {
client.SetBaseURL(url)
log.L(ctx).Debugf("Created REST client to %s", url)
_url := strings.TrimSuffix(ffrestyConfig.URL, "/")
if _url != "" {
client.SetBaseURL(_url)
log.L(ctx).Debugf("Created REST client to %s", _url)
}

if ffrestyConfig.ProxyURL != "" {
Expand All @@ -244,7 +249,7 @@ func NewWithConfig(ctx context.Context, ffrestyConfig Config) (client *resty.Cli

client.SetTimeout(time.Duration(ffrestyConfig.HTTPRequestTimeout))

client.OnBeforeRequest(func(_ *resty.Client, req *resty.Request) error {
client.OnBeforeRequest(func(c *resty.Client, req *resty.Request) error {
if rateLimiterMap[client] != nil {
// Wait for permission to proceed with the request
err := rateLimiterMap[client].Wait(req.Context())
Expand All @@ -253,6 +258,26 @@ func NewWithConfig(ctx context.Context, ffrestyConfig Config) (client *resty.Cli
}
}
rCtx := req.Context()
// Record host in context to avoid redundant parses in hooks
var u *url.URL
if req.URL != "" {
u, _ = url.Parse(req.URL)
}
// The req.URL might have only set a path i.e. /home, fallbacking to the base URL of the client.
// So if the URL is nil, that's likely the case and we'll derive the host from the configured
// base instead.
if (u == nil || u.Host == "") && _url != "" {
u, _ = url.Parse(_url)
}
if (u == nil || u.Host == "") && c.BaseURL != "" {
u, _ = url.Parse(c.BaseURL)
}
if u != nil && u.Host != "" {
host := u.Host
rCtx = context.WithValue(rCtx, hostCtxKey{}, host)
} else {
rCtx = context.WithValue(rCtx, hostCtxKey{}, "unknown")
}
rc := rCtx.Value(retryCtxKey{})
if rc == nil {
// First attempt
Expand Down Expand Up @@ -289,8 +314,9 @@ func NewWithConfig(ctx context.Context, ffrestyConfig Config) (client *resty.Cli
}
}

log.L(rCtx).Debugf("==> %s %s%s", req.Method, url, req.URL)
log.L(rCtx).Debugf("==> %s %s%s", req.Method, _url, req.URL)
log.L(rCtx).Tracef("==> (body) %+v", req.Body)

return nil
})

Expand Down
6 changes: 3 additions & 3 deletions pkg/ffresty/ffresty_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright © 2024 Kaleido, Inc.
// Copyright © 2025 Kaleido, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
Expand Down Expand Up @@ -681,7 +681,7 @@ func TestMTLSClientWithServer(t *testing.T) {
var restyConfig = config.RootSection("resty")
InitConfig(restyConfig)
clientTLSSection := restyConfig.SubSection("tls")
restyConfig.Set(HTTPConfigURL, ln.Addr())
restyConfig.Set(HTTPConfigURL, ln.Addr()) // note this does not have https:// in the URL
clientTLSSection.Set(fftls.HTTPConfTLSEnabled, true)
clientTLSSection.Set(fftls.HTTPConfTLSKeyFile, privateKeyFile.Name())
clientTLSSection.Set(fftls.HTTPConfTLSCertFile, publicKeyFile.Name())
Expand Down Expand Up @@ -722,7 +722,6 @@ func TestEnableClientMetrics(t *testing.T) {

err := EnableClientMetrics(ctx, mr)
assert.NoError(t, err)

}

func TestEnableClientMetricsIdempotent(t *testing.T) {
Expand All @@ -749,6 +748,7 @@ func TestHooks(t *testing.T) {
}

customOnSuccess := func(c *resty.Client, resp *resty.Response) {
assert.Equal(t, "localhost:12345", resp.Request.Context().Value(hostCtxKey{}).(string))
onSuccessCount++
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/metric/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ type MetricsManager interface {
// functions for emitting metrics
SetGaugeMetric(ctx context.Context, metricName string, number float64, defaultLabels *FireflyDefaultLabels)
SetGaugeMetricWithLabels(ctx context.Context, metricName string, number float64, labels map[string]string, defaultLabels *FireflyDefaultLabels)
IncGaugeMetric(ctx context.Context, metricName string, defaultLabels *FireflyDefaultLabels)
IncGaugeMetricWithLabels(ctx context.Context, metricName string, labels map[string]string, defaultLabels *FireflyDefaultLabels)
DecGaugeMetric(ctx context.Context, metricName string, defaultLabels *FireflyDefaultLabels)
DecGaugeMetricWithLabels(ctx context.Context, metricName string, labels map[string]string, defaultLabels *FireflyDefaultLabels)

IncCounterMetric(ctx context.Context, metricName string, defaultLabels *FireflyDefaultLabels)
IncCounterMetricWithLabels(ctx context.Context, metricName string, labels map[string]string, defaultLabels *FireflyDefaultLabels)
ObserveHistogramMetric(ctx context.Context, metricName string, number float64, defaultLabels *FireflyDefaultLabels)
Expand Down
34 changes: 20 additions & 14 deletions pkg/metric/metric_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright © 2023 Kaleido, Inc.
// Copyright © 2025 Kaleido, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
Expand Down Expand Up @@ -68,12 +68,12 @@ func TestMetricsManager(t *testing.T) {
mm.IncCounterMetric(ctx, "tx_request", nil)
mm.IncCounterMetricWithLabels(ctx, "tx_process", map[string]string{"status": "success"}, nil)
mm.SetGaugeMetric(ctx, "tx_stalled", 2, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetric(ctx, "tx_timeout_seconds", 2000, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{}, nil) // no label provided
mm.ObserveSummaryMetric(ctx, "tx_request_bytes", 2000, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "signing"}, nil)
}

func TestMetricsManagerWithDefaultLabels(t *testing.T) {
Expand All @@ -87,6 +87,8 @@ func TestMetricsManagerWithDefaultLabels(t *testing.T) {
mm.NewCounterMetricWithLabels(ctx, "tx_process", "Transaction processed", []string{"status"}, true)
mm.NewGaugeMetric(ctx, "tx_stalled", "Transactions that are stuck in a loop", true)
mm.NewGaugeMetricWithLabels(ctx, "tx_inflight", "Transactions that are in flight", []string{"stage"}, true)
mm.NewGaugeMetric(ctx, "tx_deadlettered", "Transactions that are put onto the deadletter queue", true)
mm.NewGaugeMetricWithLabels(ctx, "tx_confirming", "Transactions that are waiting to be confirmed", []string{"stage"}, true)
mm.NewHistogramMetric(ctx, "tx_timeout_seconds", "Duration of timed out transactions", []float64{}, true)
mm.NewHistogramMetricWithLabels(ctx, "tx_stage_seconds", "Duration of each transaction stage", []float64{}, []string{"stage"}, true)
mm.NewSummaryMetric(ctx, "tx_request_bytes", "Request size of timed out transactions", true)
Expand All @@ -96,21 +98,25 @@ func TestMetricsManagerWithDefaultLabels(t *testing.T) {
mm.IncCounterMetric(ctx, "tx_request", nil)
mm.IncCounterMetricWithLabels(ctx, "tx_process", map[string]string{"status": "success"}, nil)
mm.SetGaugeMetric(ctx, "tx_stalled", 2, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "signing"}, nil)
mm.IncGaugeMetric(ctx, "tx_deadlettered", nil)
mm.IncGaugeMetricWithLabels(ctx, "tx_confirming", map[string]string{"stage": "signing"}, nil)
mm.DecGaugeMetric(ctx, "tx_deadlettered", nil)
mm.DecGaugeMetricWithLabels(ctx, "tx_confirming", map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetric(ctx, "tx_timeout_seconds", 2000, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "signing"}, nil)
mm.ObserveSummaryMetric(ctx, "tx_request_bytes", 2000, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "signing"}, nil)

mm.IncCounterMetric(ctx, "tx_request", &FireflyDefaultLabels{Namespace: "ns_1"})
mm.IncCounterMetricWithLabels(ctx, "tx_process", map[string]string{"status": "success"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.SetGaugeMetric(ctx, "tx_stalled", 2, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "singing:)"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "signing"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetric(ctx, "tx_timeout_seconds", 2000, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "singing:)"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "signing"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{}, nil) // no label provided
mm.ObserveSummaryMetric(ctx, "tx_request_bytes", 2000, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "singing:)"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "signing"}, &FireflyDefaultLabels{Namespace: "ns_1"})

}

Expand Down Expand Up @@ -142,12 +148,12 @@ func TestMetricsManagerErrors(t *testing.T) {
mm.IncCounterMetric(ctx, "tx_not_exist", nil)
mm.IncCounterMetricWithLabels(ctx, "tx_not_exist", map[string]string{"status": "success"}, nil)
mm.SetGaugeMetric(ctx, "tx_not_exist", 2, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_not_exist", 2, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_not_exist", 2, map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetric(ctx, "tx_not_exist", 2000, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "signing"}, nil)
mm.ObserveSummaryMetric(ctx, "tx_not_exist", 2000, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight_invalid_labels", 2, map[string]string{"ff_stage": "singing:)"}, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "signing"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight_invalid_labels", 2, map[string]string{"ff_stage": "signing"}, nil)
}

func TestHTTPMetricsInstrumentations(t *testing.T) {
Expand Down
Loading
Loading