From b7307c5279bf6c5337be2999053a42cc8a278dc4 Mon Sep 17 00:00:00 2001 From: Michael Shitrit Date: Sun, 14 Jan 2024 16:40:24 +0200 Subject: [PATCH 1/4] ECOPROJECT-1826 - multiple messages of "Automatically selected OutOfServiceTaint Remediation strategy" Signed-off-by: Michael Shitrit --- controllers/selfnoderemediation_controller.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/controllers/selfnoderemediation_controller.go b/controllers/selfnoderemediation_controller.go index 6a938a3a..f1a75740 100644 --- a/controllers/selfnoderemediation_controller.go +++ b/controllers/selfnoderemediation_controller.go @@ -230,7 +230,7 @@ func (r *SelfNodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl. return ctrl.Result{}, nil } - strategy := r.getRuntimeStrategy(snr.Spec.RemediationStrategy) + strategy := r.getRuntimeStrategy(snr) switch strategy { case v1alpha1.ResourceDeletionRemediationStrategy: result, err = r.remediateWithResourceDeletion(snr, node) @@ -897,18 +897,23 @@ func (r *SelfNodeRemediationReconciler) isResourceDeletionExpired(snr *v1alpha1. return true, 0 } -func (r *SelfNodeRemediationReconciler) getRuntimeStrategy(strategy v1alpha1.RemediationStrategyType) v1alpha1.RemediationStrategyType { +func (r *SelfNodeRemediationReconciler) getRuntimeStrategy(snr *v1alpha1.SelfNodeRemediation) v1alpha1.RemediationStrategyType { + strategy := snr.Spec.RemediationStrategy if strategy != v1alpha1.AutomaticRemediationStrategy { return strategy } + remediationStrategy := v1alpha1.ResourceDeletionRemediationStrategy if utils.IsOutOfServiceTaintGA { - r.logger.Info("Automatically selected OutOfServiceTaint Remediation strategy") - return v1alpha1.OutOfServiceTaintRemediationStrategy + remediationStrategy = v1alpha1.OutOfServiceTaintRemediationStrategy } - r.logger.Info("Automatically selected ResourceDeletion Remediation strategy") - return v1alpha1.ResourceDeletionRemediationStrategy + //used as an indication not to spam the log + if isFinalizerAlreadyAdded := controllerutil.ContainsFinalizer(snr, SNRFinalizer); !isFinalizerAlreadyAdded { + r.logger.Info(fmt.Sprintf("Automatically selected %s Remediation strategy", remediationStrategy)) + } + + return remediationStrategy } From 96d6d179ca527204a159a586798c99dc04525490 Mon Sep 17 00:00:00 2001 From: Michael Shitrit Date: Mon, 15 Jan 2024 09:09:37 +0200 Subject: [PATCH 2/4] Reorder jobs in full-gen in order for it to work properly on vendor packages version change Signed-off-by: Michael Shitrit --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9a7e568d..82b59da6 100644 --- a/Makefile +++ b/Makefile @@ -446,4 +446,4 @@ fix-imports: sort-imports ## Sort imports $(SORT_IMPORTS) -w . .PHONY: full-gen -full-gen: generate manifests vendor tidy bundle fix-imports bundle-reset ## generates all automatically generated content +full-gen: tidy vendor generate manifests bundle fix-imports bundle-reset ## generates all automatically generated content From af81a4477d97538ddc2093dd79b195f72e4e6168 Mon Sep 17 00:00:00 2001 From: Michael Shitrit Date: Mon, 15 Jan 2024 09:21:37 +0200 Subject: [PATCH 3/4] ECOPROJECT-1827 Use common events and create an event signaling remediation is finished Signed-off-by: Michael Shitrit --- controllers/selfnoderemediation_controller.go | 11 ++-- go.mod | 2 +- go.sum | 4 +- .../medik8s/common/pkg/events/events.go | 51 +++++++++++++++++++ .../medik8s/common/pkg/labels/labels.go | 2 + vendor/modules.txt | 3 +- 6 files changed, 63 insertions(+), 10 deletions(-) create mode 100644 vendor/github.com/medik8s/common/pkg/events/events.go diff --git a/controllers/selfnoderemediation_controller.go b/controllers/selfnoderemediation_controller.go index f1a75740..236c9ae0 100644 --- a/controllers/selfnoderemediation_controller.go +++ b/controllers/selfnoderemediation_controller.go @@ -22,6 +22,7 @@ import ( "time" "github.com/go-logr/logr" + "github.com/medik8s/common/pkg/events" "github.com/medik8s/common/pkg/resources" "github.com/pkg/errors" @@ -49,7 +50,6 @@ const ( nhcTimeOutAnnotation = "remediation.medik8s.io/nhc-timed-out" excludeRemediationLabel = "remediation.medik8s.io/exclude-from-remediation" - eventReasonRemediationCreated = "RemediationCreated" eventReasonRemediationStopped = "RemediationStopped" eventReasonRemediationSkipped = "RemediationSkipped" @@ -174,8 +174,7 @@ func (r *SelfNodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl. r.logger.Error(err, "failed to get SNR") return ctrl.Result{}, err } - - r.Recorder.Event(snr, eventTypeNormal, eventReasonRemediationCreated, "Remediation started") + events.RemediationStarted(r.Recorder, snr) defer func() { if updateErr := r.updateSnrStatus(ctx, snr); updateErr != nil { @@ -195,9 +194,8 @@ func (r *SelfNodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl. }() if r.isStoppedByNHC(snr) { - msg := "SNR remediation was stopped by Node Healthcheck" - r.logger.Info(msg) - r.Recorder.Event(snr, eventTypeNormal, eventReasonRemediationStopped, msg) + r.logger.Info("NHC added the timed-out annotation, remediation will be stopped") + events.RemediationStoppedByNHC(r.Recorder, snr) return ctrl.Result{}, r.updateConditions(remediationTimeoutByNHC, snr) } @@ -483,6 +481,7 @@ func (r *SelfNodeRemediationReconciler) recoverNode(node *v1.Node, snr *v1alpha1 return ctrl.Result{}, err } r.Recorder.Event(snr, eventTypeNormal, eventReasonRemoveFinalizer, "Remediation process - remove finalizer from snr") + events.RemediationFinished(r.Recorder, snr) } return ctrl.Result{}, nil diff --git a/go.mod b/go.mod index cb427d80..3f1b6855 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/Masterminds/sprig v2.22.0+incompatible github.com/go-logr/logr v1.2.3 github.com/go-ping/ping v1.1.0 - github.com/medik8s/common v1.10.0 + github.com/medik8s/common v1.12.0 github.com/onsi/ginkgo/v2 v2.9.1 github.com/onsi/gomega v1.27.4 github.com/openshift/api v0.0.0-20230414143018-3367bc7e6ac7 // release-4.13 diff --git a/go.sum b/go.sum index 7320563a..6bbf0558 100644 --- a/go.sum +++ b/go.sum @@ -215,8 +215,8 @@ github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.2 h1:hAHbPm5IJGijwng3PWk09JkG9WeqChjprR5s9bBZ+OM= github.com/matttproud/golang_protobuf_extensions v1.0.2/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= -github.com/medik8s/common v1.10.0 h1:ghvW/Kiv9xdkJbFhzeh38dSmx0ltoZNT7cHrCh3WpX4= -github.com/medik8s/common v1.10.0/go.mod h1:ZT/3hfMXJLmZEcqmxRWB5LGC8Wl+qKGGQ4zM8hOE7PY= +github.com/medik8s/common v1.12.0 h1:UJ5VS4rbo/K0snfuqRiYam1NhXTwo4Q7fTng34YSlmA= +github.com/medik8s/common v1.12.0/go.mod h1:Q6YR2rgyiLl6ob4Mx2uDBmybAB3d94fImwoHPbeiE54= github.com/mitchellh/copystructure v1.1.2 h1:Th2TIvG1+6ma3e/0/bopBKohOTY7s4dA8V2q4EUcBJ0= github.com/mitchellh/copystructure v1.1.2/go.mod h1:EBArHfARyrSWO/+Wyr9zwEkc6XMFB9XyNgFNmRkZZU4= github.com/mitchellh/reflectwalk v1.0.1 h1:FVzMWA5RllMAKIdUSC8mdWo3XtwoecrH79BY70sEEpE= diff --git a/vendor/github.com/medik8s/common/pkg/events/events.go b/vendor/github.com/medik8s/common/pkg/events/events.go new file mode 100644 index 00000000..7550d3c8 --- /dev/null +++ b/vendor/github.com/medik8s/common/pkg/events/events.go @@ -0,0 +1,51 @@ +package events + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" +) + +// Event message format "medik8s " +const customFmt = "[remediation] %s" + +// NormalEvent will record an event with type Normal and fixed message. +func NormalEvent(recorder record.EventRecorder, object runtime.Object, reason, message string) { + recorder.Event(object, corev1.EventTypeNormal, reason, fmt.Sprintf(customFmt, message)) +} + +// NormalEventf will record an event with type Normal and formatted message. +func NormalEventf(recorder record.EventRecorder, object runtime.Object, reason, messageFmt string, a ...interface{}) { + message := fmt.Sprintf(messageFmt, a...) + recorder.Event(object, corev1.EventTypeNormal, reason, fmt.Sprintf(customFmt, message)) +} + +// WarningEvent will record an event with type Warning and fixed message. +func WarningEvent(recorder record.EventRecorder, object runtime.Object, reason, message string) { + recorder.Event(object, corev1.EventTypeWarning, reason, fmt.Sprintf(customFmt, message)) +} + +// WarningEventf will record an event with type Warning and formatted message. +func WarningEventf(recorder record.EventRecorder, object runtime.Object, reason, messageFmt string, a ...interface{}) { + message := fmt.Sprintf(messageFmt, a...) + recorder.Event(object, corev1.EventTypeWarning, reason, fmt.Sprintf(customFmt, message)) +} + +// Special case events + +// RemediationStarted will record a Normal event with reason RemediationStarted and message Remediation started. +func RemediationStarted(recorder record.EventRecorder, object runtime.Object) { + NormalEvent(recorder, object, "RemediationStarted", "Remediation started") +} + +// RemediationStoppedByNHC will record a Normal event with reason RemediationStopped. +func RemediationStoppedByNHC(recorder record.EventRecorder, object runtime.Object) { + NormalEvent(recorder, object, "RemediationStopped", "NHC added the timed-out annotation, remediation will be stopped") +} + +// RemediationFinished will record a Normal event with reason RemediationFinished and message Remediation finished. +func RemediationFinished(recorder record.EventRecorder, object runtime.Object) { + NormalEvent(recorder, object, "RemediationFinished", "Remediation finished") +} diff --git a/vendor/github.com/medik8s/common/pkg/labels/labels.go b/vendor/github.com/medik8s/common/pkg/labels/labels.go index 001cafa8..87409558 100644 --- a/vendor/github.com/medik8s/common/pkg/labels/labels.go +++ b/vendor/github.com/medik8s/common/pkg/labels/labels.go @@ -9,4 +9,6 @@ const ( ControlPlaneRole = "node-role.kubernetes.io/control-plane" // DefaultTemplate label indicates to third party tools (e.g. UI) the default remediation template in case several exists. DefaultTemplate = "remediation.medik8s.io/default-template" + // ExcludeFromRemediation label would be put on a node with value "true" in order to indicate this node should not be remediated. + ExcludeFromRemediation = "remediation.medik8s.io/exclude-from-remediation" ) diff --git a/vendor/modules.txt b/vendor/modules.txt index f708c858..e8d71b12 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -107,8 +107,9 @@ github.com/mailru/easyjson/jwriter # github.com/matttproud/golang_protobuf_extensions v1.0.2 ## explicit; go 1.9 github.com/matttproud/golang_protobuf_extensions/pbutil -# github.com/medik8s/common v1.10.0 +# github.com/medik8s/common v1.12.0 ## explicit; go 1.20 +github.com/medik8s/common/pkg/events github.com/medik8s/common/pkg/labels github.com/medik8s/common/pkg/nodes github.com/medik8s/common/pkg/resources From 953ded51c2cf0b0e1a491def8f70f158cd303bbd Mon Sep 17 00:00:00 2001 From: Michael Shitrit Date: Mon, 15 Jan 2024 09:30:12 +0200 Subject: [PATCH 4/4] ECOPROJECT-1829 Verify that Remediation started event isn't spammed Signed-off-by: Michael Shitrit --- controllers/selfnoderemediation_controller.go | 6 +++++- .../tests/controller/selfnoderemediation_controller_test.go | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/controllers/selfnoderemediation_controller.go b/controllers/selfnoderemediation_controller.go index 236c9ae0..8705f2f3 100644 --- a/controllers/selfnoderemediation_controller.go +++ b/controllers/selfnoderemediation_controller.go @@ -174,7 +174,11 @@ func (r *SelfNodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl. r.logger.Error(err, "failed to get SNR") return ctrl.Result{}, err } - events.RemediationStarted(r.Recorder, snr) + + //used as an indication not to spam the event + if isFinalizerAlreadyAdded := controllerutil.ContainsFinalizer(snr, SNRFinalizer); !isFinalizerAlreadyAdded { + events.RemediationStarted(r.Recorder, snr) + } defer func() { if updateErr := r.updateSnrStatus(ctx, snr); updateErr != nil { diff --git a/controllers/tests/controller/selfnoderemediation_controller_test.go b/controllers/tests/controller/selfnoderemediation_controller_test.go index c64b7c75..dbd5d30d 100644 --- a/controllers/tests/controller/selfnoderemediation_controller_test.go +++ b/controllers/tests/controller/selfnoderemediation_controller_test.go @@ -173,7 +173,7 @@ var _ = Describe("SNR Controller", func() { It("Remediation flow", func() { node := verifyNodeIsUnschedulable() - verifyEvent("Normal", "RemediationCreated", "Remediation started") + verifyEvent("Normal", "RemediationStarted", "[remediation] Remediation started") verifyEvent("Normal", "MarkUnschedulable", "Remediation process - unhealthy node marked as unschedulable")