From f6d8964ebd3587272cc75b40bbaa6c2b7dc46d58 Mon Sep 17 00:00:00 2001 From: Pankaj Mishra Date: Mon, 9 Sep 2024 09:40:39 +0000 Subject: [PATCH] Record metrics for temporary error. --- pkg/metrics/metrics.go | 19 ++++++++- pkg/metrics/metrics_test.go | 83 +++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index a2128fb5d..3156032c3 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -14,6 +14,7 @@ limitations under the License. package metrics import ( + "errors" "fmt" "net/http" "os" @@ -23,6 +24,8 @@ import ( "google.golang.org/grpc/status" "k8s.io/component-base/metrics" "k8s.io/klog/v2" + "sigs.k8s.io/gcp-filestore-csi-driver/pkg/cloud_provider/file" + "sigs.k8s.io/gcp-filestore-csi-driver/pkg/common" ) const ( @@ -147,7 +150,7 @@ func (mm *MetricsManager) recordComponentVersionMetric() error { } func (mm *MetricsManager) RecordOperationMetrics(opErr error, methodName string, filestoreMode string, opDuration time.Duration) { - operationSeconds.WithLabelValues(getErrorCode(opErr), methodName, filestoreMode).Observe(opDuration.Seconds()) + operationSeconds.WithLabelValues(errorCodeLabelValue(opErr), methodName, filestoreMode).Observe(opDuration.Seconds()) } func (mm *MetricsManager) RecordKubeAPIMetrics(opErr error, resourceType, opType, opSource string, opDuration time.Duration) { @@ -194,6 +197,20 @@ func (mm *MetricsManager) EmitGKEComponentVersion() error { return nil } +// errorCodeLabelValue returns the label value for the given operation error. +func errorCodeLabelValue(operationErr error) string { + err := codes.OK.String() + if operationErr != nil { + // If the operationErr is a TemporaryError, unwrap the temporary error before passing it to CodeForError. + var tempErr *common.TemporaryError + if errors.As(operationErr, &tempErr) { + operationErr = tempErr.Unwrap() + } + err = getErrorCode(file.StatusError(operationErr)) + } + return err +} + // Server represents any type that could serve HTTP requests for the metrics // endpoint. type Server interface { diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 31fd37ac0..30c958231 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -1,7 +1,17 @@ package metrics import ( + "context" + "errors" + "fmt" + "net/http" "testing" + + "google.golang.org/api/googleapi" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "github.com/google/go-cmp/cmp" + "sigs.k8s.io/gcp-filestore-csi-driver/pkg/common" ) const ( @@ -24,3 +34,76 @@ func TestProcessStartTimeMetricExist(t *testing.T) { t.Fatalf("Metrics does not contain %v. Scraped content: %v", ProcessStartTimeMetric, metricsFamilies) } + + +func TestErrorCodeLabelValue(t *testing.T) { + testCases := []struct { + name string + operationErr error + wantErrorCode string + }{ + { + name: "Not googleapi.Error", + operationErr: errors.New("I am not a googleapi.Error"), + wantErrorCode: "Internal", + }, + { + name: "User error", + operationErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"}, + wantErrorCode: "InvalidArgument", + }, + { + name: "googleapi.Error but not a user error", + operationErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"}, + wantErrorCode: "Internal", + }, + { + name: "context canceled error", + operationErr: context.Canceled, + wantErrorCode: "Canceled", + }, + { + name: "context deadline exceeded error", + operationErr: context.DeadlineExceeded, + wantErrorCode: "DeadlineExceeded", + }, + { + name: "status error with Aborted error code", + operationErr: status.Error(codes.Aborted, "aborted error"), + wantErrorCode: "Aborted", + }, + { + name: "user multiattach error", + operationErr: fmt.Errorf("The disk resource 'projects/foo/disk/bar' is already being used by 'projects/foo/instances/1'"), + wantErrorCode: "Internal", + }, + { + name: "TemporaryError that wraps googleapi error", + operationErr: common.NewTemporaryError(codes.Unavailable, &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"}), + wantErrorCode: "InvalidArgument", + }, + { + name: "TemporaryError that wraps fmt.Errorf, which wraps googleapi error", + operationErr: common.NewTemporaryError(codes.Aborted, fmt.Errorf("got error: %w", &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"})), + wantErrorCode: "InvalidArgument", + }, + { + name: "TemporaryError that wraps status error", + operationErr: common.NewTemporaryError(codes.Aborted, status.Error(codes.InvalidArgument, "User error with bad request")), + wantErrorCode: "InvalidArgument", + }, + { + name: "TemporaryError that wraps multiattach error", + operationErr: common.NewTemporaryError(codes.Unavailable, fmt.Errorf("The disk resource 'projects/foo/disk/bar' is already being used by 'projects/foo/instances/1'")), + wantErrorCode: "Internal", + }, + } + + for _, tc := range testCases { + t.Logf("Running test: %v", tc.name) + errCode := errorCodeLabelValue(tc.operationErr) + if diff := cmp.Diff(tc.wantErrorCode, errCode); diff != "" { + t.Errorf("%s: -want err, +got err\n%s", tc.name, diff) + } + } +} \ No newline at end of file