diff --git a/README.md b/README.md index e9a23ca034..36dce8aa0d 100644 --- a/README.md +++ b/README.md @@ -103,17 +103,17 @@ See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/ku Run As A Job ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.1' | kubectl apply -f - ``` Run As A CronJob ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.1' | kubectl apply -f - ``` Run As A Deployment ``` -kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.0' | kubectl apply -f - +kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.1' | kubectl apply -f - ``` ## User Guide diff --git a/charts/descheduler/Chart.yaml b/charts/descheduler/Chart.yaml index b194a1b7ea..dab3328232 100644 --- a/charts/descheduler/Chart.yaml +++ b/charts/descheduler/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: descheduler -version: 0.22.0 -appVersion: 0.22.0 +version: 0.22.1 +appVersion: 0.22.1 description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that. keywords: - kubernetes diff --git a/docs/user-guide.md b/docs/user-guide.md index f4a4881e6d..c7996020d5 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -4,6 +4,7 @@ Starting with descheduler release v0.10.0 container images are available in the Descheduler Version | Container Image | Architectures | ------------------- |-----------------------------------------------------|-------------------------| +v0.22.1 | k8s.gcr.io/descheduler/descheduler:v0.22.1 | AMD64
ARM64
ARMv7 | v0.22.0 | k8s.gcr.io/descheduler/descheduler:v0.22.0 | AMD64
ARM64
ARMv7 | v0.21.0 | k8s.gcr.io/descheduler/descheduler:v0.21.0 | AMD64
ARM64
ARMv7 | v0.20.0 | k8s.gcr.io/descheduler/descheduler:v0.20.0 | AMD64
ARM64 | diff --git a/kubernetes/cronjob/cronjob.yaml b/kubernetes/cronjob/cronjob.yaml index c38d52efae..8f30b53797 100644 --- a/kubernetes/cronjob/cronjob.yaml +++ b/kubernetes/cronjob/cronjob.yaml @@ -16,7 +16,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 volumeMounts: - mountPath: /policy-dir name: policy-volume diff --git a/kubernetes/deployment/deployment.yaml b/kubernetes/deployment/deployment.yaml index 478cc69072..82ff868065 100644 --- a/kubernetes/deployment/deployment.yaml +++ b/kubernetes/deployment/deployment.yaml @@ -19,7 +19,7 @@ spec: serviceAccountName: descheduler-sa containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 imagePullPolicy: IfNotPresent command: - "/bin/descheduler" diff --git a/kubernetes/job/job.yaml b/kubernetes/job/job.yaml index 9f28d29767..1c544fee54 100644 --- a/kubernetes/job/job.yaml +++ b/kubernetes/job/job.yaml @@ -14,7 +14,7 @@ spec: priorityClassName: system-cluster-critical containers: - name: descheduler - image: k8s.gcr.io/descheduler/descheduler:v0.22.0 + image: k8s.gcr.io/descheduler/descheduler:v0.22.1 volumeMounts: - mountPath: /policy-dir name: policy-volume diff --git a/pkg/descheduler/strategies/failedpods.go b/pkg/descheduler/strategies/failedpods.go index e9c7ad23c6..74d79956c9 100644 --- a/pkg/descheduler/strategies/failedpods.go +++ b/pkg/descheduler/strategies/failedpods.go @@ -19,7 +19,7 @@ import ( // validatedFailedPodsStrategyParams contains validated strategy parameters type validatedFailedPodsStrategyParams struct { - *validation.ValidatedStrategyParams + validation.ValidatedStrategyParams includingInitContainers bool reasons sets.String excludeOwnerKinds sets.String @@ -46,9 +46,15 @@ func RemoveFailedPods( evictions.WithLabelSelector(strategyParams.LabelSelector), ) + var labelSelector *metav1.LabelSelector + if strategy.Params != nil { + labelSelector = strategy.Params.LabelSelector + } + for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase=" + string(v1.PodFailed) + pods, err := podutil.ListPodsOnANodeWithFieldSelector( ctx, client, @@ -57,7 +63,7 @@ func RemoveFailedPods( podutil.WithFilter(evictable.IsEvictable), podutil.WithNamespaces(strategyParams.IncludedNamespaces.UnsortedList()), podutil.WithoutNamespaces(strategyParams.ExcludedNamespaces.UnsortedList()), - podutil.WithLabelSelector(strategy.Params.LabelSelector), + podutil.WithLabelSelector(labelSelector), ) if err != nil { klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node)) @@ -84,7 +90,9 @@ func validateAndParseRemoveFailedPodsParams( params *api.StrategyParameters, ) (*validatedFailedPodsStrategyParams, error) { if params == nil { - return &validatedFailedPodsStrategyParams{}, nil + return &validatedFailedPodsStrategyParams{ + ValidatedStrategyParams: validation.DefaultValidatedStrategyParams(), + }, nil } strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params) @@ -103,7 +111,7 @@ func validateAndParseRemoveFailedPodsParams( } return &validatedFailedPodsStrategyParams{ - ValidatedStrategyParams: strategyParams, + ValidatedStrategyParams: *strategyParams, includingInitContainers: includingInitContainers, reasons: reasons, excludeOwnerKinds: excludeOwnerKinds, diff --git a/pkg/descheduler/strategies/failedpods_test.go b/pkg/descheduler/strategies/failedpods_test.go index 52f4f05740..c5caf5a43c 100644 --- a/pkg/descheduler/strategies/failedpods_test.go +++ b/pkg/descheduler/strategies/failedpods_test.go @@ -44,6 +44,13 @@ func TestRemoveFailedPods(t *testing.T) { expectedEvictedPodCount int pods []v1.Pod }{ + { + description: "default empty strategy, 0 failures, 0 evictions", + strategy: api.DeschedulerStrategy{}, + nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)}, + expectedEvictedPodCount: 0, + pods: []v1.Pod{}, // no pods come back with field selector phase=Failed + }, { description: "0 failures, 0 evictions", strategy: createStrategy(true, false, nil, nil, nil, false), @@ -225,6 +232,7 @@ func TestValidRemoveFailedPodsParams(t *testing.T) { params *api.StrategyParameters }{ {name: "validate nil params", params: nil}, + {name: "validate empty params", params: &api.StrategyParameters{}}, {name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{ Reasons: []string{"CreateContainerConfigError"}, }}}, diff --git a/pkg/descheduler/strategies/validation/strategyparams.go b/pkg/descheduler/strategies/validation/strategyparams.go index cd6d4570af..33446e6fa1 100644 --- a/pkg/descheduler/strategies/validation/strategyparams.go +++ b/pkg/descheduler/strategies/validation/strategyparams.go @@ -22,20 +22,22 @@ type ValidatedStrategyParams struct { NodeFit bool } +func DefaultValidatedStrategyParams() ValidatedStrategyParams { + return ValidatedStrategyParams{ThresholdPriority: utils.SystemCriticalPriority} +} + func ValidateAndParseStrategyParams( ctx context.Context, client clientset.Interface, params *api.StrategyParameters, ) (*ValidatedStrategyParams, error) { - var includedNamespaces, excludedNamespaces sets.String if params == nil { - return &ValidatedStrategyParams{ - IncludedNamespaces: includedNamespaces, - ExcludedNamespaces: excludedNamespaces, - }, nil + defaultValidatedStrategyParams := DefaultValidatedStrategyParams() + return &defaultValidatedStrategyParams, nil } // At most one of include/exclude can be set + var includedNamespaces, excludedNamespaces sets.String if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 { return nil, fmt.Errorf("only one of Include/Exclude namespaces can be set") } diff --git a/test/e2e/e2e_failedpods_test.go b/test/e2e/e2e_failedpods_test.go new file mode 100644 index 0000000000..4b4a9665fe --- /dev/null +++ b/test/e2e/e2e_failedpods_test.go @@ -0,0 +1,154 @@ +package e2e + +import ( + "context" + batchv1 "k8s.io/api/batch/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "strings" + "testing" + "time" + + deschedulerapi "sigs.k8s.io/descheduler/pkg/api" + "sigs.k8s.io/descheduler/pkg/descheduler/strategies" +) + +var oneHourPodLifetimeSeconds uint = 3600 + +func TestFailedPods(t *testing.T) { + ctx := context.Background() + clientSet, _, stopCh := initializeClient(t) + defer close(stopCh) + nodeList, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + t.Errorf("Error listing node with %v", err) + } + nodes, _ := splitNodesAndWorkerNodes(nodeList.Items) + t.Log("Creating testing namespace") + testNamespace := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "e2e-" + strings.ToLower(t.Name())}} + if _, err := clientSet.CoreV1().Namespaces().Create(ctx, testNamespace, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unable to create ns %v", testNamespace.Name) + } + defer clientSet.CoreV1().Namespaces().Delete(ctx, testNamespace.Name, metav1.DeleteOptions{}) + testCases := map[string]struct { + expectedEvictedCount int + strategyParams *deschedulerapi.StrategyParameters + }{ + "test-failed-pods-nil-strategy": { + expectedEvictedCount: 1, + strategyParams: nil, + }, + "test-failed-pods-default-strategy": { + expectedEvictedCount: 1, + strategyParams: &deschedulerapi.StrategyParameters{}, + }, + "test-failed-pods-default-failed-pods": { + expectedEvictedCount: 1, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{}, + }, + }, + "test-failed-pods-reason-unmatched": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{Reasons: []string{"ReasonDoesNotMatch"}}, + }, + }, + "test-failed-pods-min-age-unmet": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{MinPodLifetimeSeconds: &oneHourPodLifetimeSeconds}, + }, + }, + "test-failed-pods-exclude-job-kind": { + expectedEvictedCount: 0, + strategyParams: &deschedulerapi.StrategyParameters{ + FailedPods: &deschedulerapi.FailedPods{ExcludeOwnerKinds: []string{"Job"}}, + }, + }, + } + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + job := initFailedJob(name, testNamespace.Namespace) + t.Logf("Creating job %s in %s namespace", job.Name, job.Namespace) + jobClient := clientSet.BatchV1().Jobs(testNamespace.Name) + if _, err := jobClient.Create(ctx, job, metav1.CreateOptions{}); err != nil { + t.Fatalf("Error creating Job %s: %v", name, err) + } + deletePropagationPolicy := metav1.DeletePropagationForeground + defer jobClient.Delete(ctx, job.Name, metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}) + waitForJobPodPhase(ctx, t, clientSet, job, v1.PodFailed) + + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + + t.Logf("Running RemoveFailedPods strategy for %s", name) + strategies.RemoveFailedPods( + ctx, + clientSet, + deschedulerapi.DeschedulerStrategy{ + Enabled: true, + Params: tc.strategyParams, + }, + nodes, + podEvictor, + ) + t.Logf("Finished RemoveFailedPods strategy for %s", name) + + if actualEvictedCount := podEvictor.TotalEvicted(); actualEvictedCount == tc.expectedEvictedCount { + t.Logf("Total of %d Pods were evicted for %s", actualEvictedCount, name) + } else { + t.Errorf("Unexpected number of pods have been evicted, got %v, expected %v", actualEvictedCount, tc.expectedEvictedCount) + } + }) + } +} + +func initFailedJob(name, namespace string) *batchv1.Job { + podSpec := MakePodSpec("", nil) + podSpec.Containers[0].Command = []string{"/bin/false"} + podSpec.RestartPolicy = v1.RestartPolicyNever + labelsSet := labels.Set{"test": name, "name": name} + jobBackoffLimit := int32(0) + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labelsSet, + Name: name, + Namespace: namespace, + }, + Spec: batchv1.JobSpec{ + Template: v1.PodTemplateSpec{ + Spec: podSpec, + ObjectMeta: metav1.ObjectMeta{Labels: labelsSet}, + }, + BackoffLimit: &jobBackoffLimit, + }, + } +} + +func waitForJobPodPhase(ctx context.Context, t *testing.T, clientSet clientset.Interface, job *batchv1.Job, phase v1.PodPhase) { + podClient := clientSet.CoreV1().Pods(job.Namespace) + if err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) { + t.Log(labels.FormatLabels(job.Labels)) + if podList, err := podClient.List(ctx, metav1.ListOptions{LabelSelector: labels.FormatLabels(job.Labels)}); err != nil { + return false, err + } else { + if len(podList.Items) == 0 { + t.Logf("Job controller has not created Pod for job %s yet", job.Name) + return false, nil + } + for _, pod := range podList.Items { + if pod.Status.Phase != phase { + t.Logf("Pod %v not in %s phase yet, is %v instead", pod.Name, phase, pod.Status.Phase) + return false, nil + } + } + t.Logf("Job %v Pod is in %s phase now", job.Name, phase) + return true, nil + } + }); err != nil { + t.Fatalf("Error waiting for pods in %s phase: %v", phase, err) + } +} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 9a3008f2a6..0c1d435d57 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -290,20 +290,7 @@ func TestLowNodeUtilization(t *testing.T) { waitForRCPodsRunning(ctx, t, clientSet, rc) // Run LowNodeUtilization strategy - evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) - if err != nil || len(evictionPolicyGroupVersion) == 0 { - t.Fatalf("%v", err) - } - podEvictor := evictions.NewPodEvictor( - clientSet, - evictionPolicyGroupVersion, - false, - 0, - nodes, - true, - false, - false, - ) + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) podsOnMosttUtilizedNode, err := podutil.ListPodsOnANode(ctx, clientSet, workerNodes[0], podutil.WithFilter(podEvictor.Evictable().IsEvictable)) if err != nil { @@ -1293,3 +1280,20 @@ func splitNodesAndWorkerNodes(nodes []v1.Node) ([]*v1.Node, []*v1.Node) { } return allNodes, workerNodes } + +func initPodEvictorOrFail(t *testing.T, clientSet clientset.Interface, nodes []*v1.Node) *evictions.PodEvictor { + evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) + if err != nil || len(evictionPolicyGroupVersion) == 0 { + t.Fatalf("Error creating eviction policy group: %v", err) + } + return evictions.NewPodEvictor( + clientSet, + evictionPolicyGroupVersion, + false, + 0, + nodes, + true, + false, + false, + ) +} diff --git a/test/e2e/e2e_topologyspreadconstraint_test.go b/test/e2e/e2e_topologyspreadconstraint_test.go index ad814e0275..d6e1c8b427 100644 --- a/test/e2e/e2e_topologyspreadconstraint_test.go +++ b/test/e2e/e2e_topologyspreadconstraint_test.go @@ -11,8 +11,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" deschedulerapi "sigs.k8s.io/descheduler/pkg/api" - "sigs.k8s.io/descheduler/pkg/descheduler/evictions" - eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" "sigs.k8s.io/descheduler/pkg/descheduler/strategies" ) @@ -79,22 +77,9 @@ func TestTopologySpreadConstraint(t *testing.T) { defer deleteRC(ctx, t, clientSet, violatorRc) waitForRCPodsRunning(ctx, t, clientSet, violatorRc) - // Run TopologySpreadConstraint strategy - evictionPolicyGroupVersion, err := eutils.SupportEviction(clientSet) - if err != nil || len(evictionPolicyGroupVersion) == 0 { - t.Fatalf("Error creating eviction policy group for %s: %v", name, err) - } - podEvictor := evictions.NewPodEvictor( - clientSet, - evictionPolicyGroupVersion, - false, - 0, - nodes, - true, - false, - false, - ) + podEvictor := initPodEvictorOrFail(t, clientSet, nodes) + // Run TopologySpreadConstraint strategy t.Logf("Running RemovePodsViolatingTopologySpreadConstraint strategy for %s", name) strategies.RemovePodsViolatingTopologySpreadConstraint( ctx, @@ -119,6 +104,9 @@ func TestTopologySpreadConstraint(t *testing.T) { t.Fatalf("Pods were not evicted for %s TopologySpreadConstraint", name) } + // Ensure recently evicted Pod are rescheduled and running before asserting for a balanced topology spread + waitForRCPodsRunning(ctx, t, clientSet, rc) + pods, err := clientSet.CoreV1().Pods(testNamespace.Name).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", tc.labelKey, tc.labelValue)}) if err != nil { t.Errorf("Error listing pods for %s: %v", name, err)