From 1deb25b2f8ba28df46eca69dce2b4274e3e51e36 Mon Sep 17 00:00:00 2001 From: googs1025 Date: Sun, 27 Oct 2024 01:01:16 +0800 Subject: [PATCH] feature(eviction): add event when EvictPod failed --- pkg/descheduler/evictions/evictions.go | 4 + pkg/descheduler/evictions/evictions_test.go | 225 +++++++++++++++----- 2 files changed, 174 insertions(+), 55 deletions(-) diff --git a/pkg/descheduler/evictions/evictions.go b/pkg/descheduler/evictions/evictions.go index 04b242702a..7b0036f472 100644 --- a/pkg/descheduler/evictions/evictions.go +++ b/pkg/descheduler/evictions/evictions.go @@ -134,6 +134,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio } span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error()))) klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictTotal) + pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: total eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictTotal) return err } @@ -145,6 +146,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio } span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error()))) klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName) + pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: node eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictPerNode) return err } } @@ -156,6 +158,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio } span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error()))) klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace) + pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: namespace eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictPerNamespace) return err } @@ -167,6 +170,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio if pe.metricsEnabled { metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc() } + pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler failed: %v", pod.Spec.NodeName, err.Error()) return err } diff --git a/pkg/descheduler/evictions/evictions_test.go b/pkg/descheduler/evictions/evictions_test.go index 475b0bc9d8..fcc8cb1164 100644 --- a/pkg/descheduler/evictions/evictions_test.go +++ b/pkg/descheduler/evictions/evictions_test.go @@ -18,12 +18,17 @@ package evictions import ( "context" + "fmt" + "reflect" "testing" + "time" v1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" "k8s.io/client-go/tools/events" @@ -40,35 +45,57 @@ func TestEvictPod(t *testing.T) { tests := []struct { description string node *v1.Node - pod *v1.Pod - pods []v1.Pod - want error + evictedPod *v1.Pod + pods []runtime.Object + wantErr bool + wantErrMsg string }{ { description: "test pod eviction - pod present", node: node1, - pod: pod1, - pods: []v1.Pod{*pod1}, - want: nil, + evictedPod: pod1, + pods: []runtime.Object{pod1}, + wantErr: false, }, { - description: "test pod eviction - pod absent", + description: "test pod eviction - pod absent (not found error)", node: node1, - pod: pod1, - pods: []v1.Pod{*test.BuildTestPod("p2", 400, 0, "node1", nil), *test.BuildTestPod("p3", 450, 0, "node1", nil)}, - want: nil, + evictedPod: pod1, + pods: []runtime.Object{test.BuildTestPod("p2", 400, 0, "node1", nil), test.BuildTestPod("p3", 450, 0, "node1", nil)}, + wantErr: true, + wantErrMsg: fmt.Sprintf("pod not found when evicting \"%s\": pods \"%s\" not found", pod1.Name, pod1.Name), + }, + { + description: "test pod eviction - pod absent (too many requests error)", + node: node1, + evictedPod: pod1, + pods: []runtime.Object{test.BuildTestPod("p2", 400, 0, "node1", nil), test.BuildTestPod("p3", 450, 0, "node1", nil)}, + wantErr: true, + wantErrMsg: fmt.Sprintf("error when evicting pod (ignoring) \"%s\": Too many requests: too many requests", pod1.Name), }, } for _, test := range tests { - fakeClient := &fake.Clientset{} - fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { - return true, &v1.PodList{Items: test.pods}, nil + t.Run(test.description, func(t *testing.T) { + fakeClient := fake.NewClientset(test.pods...) + fakeClient.PrependReactor("create", "pods/eviction", func(action core.Action) (handled bool, ret runtime.Object, err error) { + if test.wantErrMsg == fmt.Sprintf("pod not found when evicting \"%s\": pods \"%s\" not found", test.evictedPod.Name, test.evictedPod.Name) { + return true, nil, k8serrors.NewNotFound(v1.Resource("pods"), test.evictedPod.Name) + } + if test.wantErrMsg == fmt.Sprintf("error when evicting pod (ignoring) \"%s\": Too many requests: too many requests", test.evictedPod.Name) { + return true, nil, k8serrors.NewTooManyRequestsError("too many requests") + } + return true, nil, nil + }) + + gotErr := evictPod(ctx, fakeClient, test.evictedPod, "v1") + if (gotErr != nil) != test.wantErr { + t.Errorf("Test error for Desc: %s. Expected error: %t, got: %v", test.description, test.wantErr, gotErr) + } + if gotErr != nil && test.wantErrMsg != "" && gotErr.Error() != test.wantErrMsg { + t.Errorf("Test error for Desc: %s. Expected error message: %q, got: %q", test.description, test.wantErrMsg, gotErr.Error()) + } }) - got := evictPod(ctx, fakeClient, test.pod, "v1") - if got != test.want { - t.Errorf("Test error for Desc: %s. Expected %v pod eviction to be %v, got %v", test.description, test.pod.Name, test.want, got) - } } } @@ -117,51 +144,139 @@ func TestPodTypes(t *testing.T) { } } -func TestNewPodEvictor(t *testing.T) { +func TestNewPodEvictor1(t *testing.T) { pod1 := test.BuildTestPod("pod", 400, 0, "node", nil) - - fakeClient := fake.NewSimpleClientset(pod1) - - eventRecorder := &events.FakeRecorder{} - - podEvictor := NewPodEvictor( - fakeClient, - eventRecorder, - NewOptions().WithMaxPodsToEvictPerNode(utilptr.To[uint](1)), - ) - - stubNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node"}} - - // 0 evictions expected - if evictions := podEvictor.NodeEvicted(stubNode); evictions != 0 { - t.Errorf("Expected 0 node evictions, got %q instead", evictions) + type podEvictorTest struct { + description string + pod *v1.Pod + maxPodsToEvictTotal *uint + maxPodsToEvictPerNode *uint + maxPodsToEvictPerNamespace *uint + expectedNodeEvictions uint + expectedTotalEvictions uint + expectedError error + // expectedEvent is a slice of strings representing expected events. + // Each string in the slice should follow the format: "EventType Reason Message". + // - "Warning Failed processing failed" + events []string } - // 0 evictions expected - if evictions := podEvictor.TotalEvicted(); evictions != 0 { - t.Errorf("Expected 0 total evictions, got %q instead", evictions) + tests := []podEvictorTest{ + { + description: "one eviction expected", + pod: pod1, + maxPodsToEvictTotal: utilptr.To[uint](1), + maxPodsToEvictPerNode: utilptr.To[uint](1), + maxPodsToEvictPerNamespace: utilptr.To[uint](1), + expectedNodeEvictions: 1, + expectedTotalEvictions: 1, + expectedError: nil, + events: []string{"Normal NotSet pod evicted from node node by sigs.k8s.io/descheduler"}, + }, + { + description: "eviction limit exceeded on total", + pod: pod1, + maxPodsToEvictTotal: utilptr.To[uint](0), + maxPodsToEvictPerNode: utilptr.To[uint](1), + maxPodsToEvictPerNamespace: utilptr.To[uint](1), + expectedNodeEvictions: 0, + expectedTotalEvictions: 0, + expectedError: NewEvictionTotalLimitError(), + events: []string{"Warning EvictionFailed pod eviction from node node by sigs.k8s.io/descheduler failed: total eviction limit exceeded (0)"}, + }, + { + description: "eviction limit exceeded on node", + pod: pod1, + maxPodsToEvictTotal: utilptr.To[uint](1), + maxPodsToEvictPerNode: utilptr.To[uint](0), + maxPodsToEvictPerNamespace: utilptr.To[uint](1), + expectedNodeEvictions: 0, + expectedTotalEvictions: 0, + expectedError: NewEvictionNodeLimitError("node"), + events: []string{"Warning EvictionFailed pod eviction from node node by sigs.k8s.io/descheduler failed: node eviction limit exceeded (0)"}, + }, + { + description: "eviction limit exceeded on node", + pod: pod1, + maxPodsToEvictTotal: utilptr.To[uint](1), + maxPodsToEvictPerNode: utilptr.To[uint](1), + maxPodsToEvictPerNamespace: utilptr.To[uint](0), + expectedNodeEvictions: 0, + expectedTotalEvictions: 0, + expectedError: NewEvictionNamespaceLimitError("default"), + events: []string{"Warning EvictionFailed pod eviction from node node by sigs.k8s.io/descheduler failed: namespace eviction limit exceeded (0)"}, + }, + { + description: "eviction error", + pod: pod1, + maxPodsToEvictTotal: utilptr.To[uint](1), + maxPodsToEvictPerNode: utilptr.To[uint](1), + maxPodsToEvictPerNamespace: utilptr.To[uint](1), + expectedNodeEvictions: 0, + expectedTotalEvictions: 0, + expectedError: fmt.Errorf("eviction error"), + events: []string{"Warning EvictionFailed pod evicted from node node by sigs.k8s.io/descheduler failed: eviction error"}, + }, } - if err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{}); err != nil { - t.Errorf("Expected a pod eviction, got an eviction error instead: %v", err) - } + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + fakeClient := fake.NewSimpleClientset(pod1) + fakeClient.PrependReactor("create", "pods/eviction", func(action core.Action) (handled bool, ret runtime.Object, err error) { + if test.expectedError != nil { + return true, nil, test.expectedError + } + return true, nil, nil + }) + eventRecorder := events.NewFakeRecorder(100) + podEvictor := NewPodEvictor( + fakeClient, + eventRecorder, + NewOptions(). + WithMaxPodsToEvictTotal(test.maxPodsToEvictTotal). + WithMaxPodsToEvictPerNode(test.maxPodsToEvictPerNode). + WithMaxPodsToEvictPerNamespace(test.maxPodsToEvictPerNamespace), + ) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + stubNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "node"}} - // 1 node eviction expected - if evictions := podEvictor.NodeEvicted(stubNode); evictions != 1 { - t.Errorf("Expected 1 node eviction, got %q instead", evictions) - } - // 1 total eviction expected - if evictions := podEvictor.TotalEvicted(); evictions != 1 { - t.Errorf("Expected 1 total evictions, got %q instead", evictions) + if err := podEvictor.EvictPod(ctx, test.pod, EvictOptions{}); err != nil && err.Error() != test.expectedError.Error() { + t.Errorf("Expected error: %v, got: %v", test.expectedError, err) + } + + if evictions := podEvictor.NodeEvicted(stubNode); evictions != test.expectedNodeEvictions { + t.Errorf("Expected %d node evictions, got %d instead", test.expectedNodeEvictions, evictions) + } + + if evictions := podEvictor.TotalEvicted(); evictions != test.expectedTotalEvictions { + t.Errorf("Expected %d total evictions, got %d instead", test.expectedTotalEvictions, evictions) + } + // Assert that the events are correct. + assertEqualEvents(t, test.events, eventRecorder.Events) + }) } +} - err := podEvictor.EvictPod(context.TODO(), pod1, EvictOptions{}) - if err == nil { - t.Errorf("Expected a pod eviction error, got nil instead") +func assertEqualEvents(t *testing.T, expected []string, actual <-chan string) { + t.Logf("Assert for events: %v", expected) + c := time.After(wait.ForeverTestTimeout) + for _, e := range expected { + select { + case a := <-actual: + if !reflect.DeepEqual(a, e) { + t.Errorf("Expected event %q, got %q instead", e, a) + } + case <-c: + t.Errorf("Expected event %q, got nothing", e) + // continue iterating to print all expected events + } } - switch err.(type) { - case *EvictionNodeLimitError: - // all good - default: - t.Errorf("Expected a pod eviction EvictionNodeLimitError error, got a different error instead: %v", err) + for { + select { + case a := <-actual: + t.Errorf("Unexpected event: %q", a) + default: + return // No more events, as expected. + } } }