Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature(eviction): add event when EvictPod failed #1536

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ type DeschedulerPolicy struct {
// MaxNoOfPodsToTotal restricts maximum of pods to be evicted total.
MaxNoOfPodsToEvictTotal *uint

// EvictionFailureEventNotification should be set to true to enable eviction failure event notification.
// Default is false.
EvictionFailureEventNotification *bool

// MetricsCollector configures collection of metrics about actual resource utilization
MetricsCollector MetricsCollector
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/api/v1alpha2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ type DeschedulerPolicy struct {
// MaxNoOfPodsToTotal restricts maximum of pods to be evicted total.
MaxNoOfPodsToEvictTotal *uint `json:"maxNoOfPodsToEvictTotal,omitempty"`

// EvictionFailureEventNotification should be set to true to enable eviction failure event notification.
// Default is false.
EvictionFailureEventNotification *bool

// MetricsCollector configures collection of metrics for actual resource utilization
MetricsCollector MetricsCollector `json:"metricsCollector,omitempty"`
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/v1alpha2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/api/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/api/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ func newDescheduler(ctx context.Context, rs *options.DeschedulerServer, deschedu
WithMaxPodsToEvictPerNode(deschedulerPolicy.MaxNoOfPodsToEvictPerNode).
WithMaxPodsToEvictPerNamespace(deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace).
WithMaxPodsToEvictTotal(deschedulerPolicy.MaxNoOfPodsToEvictTotal).
WithEvictionFailureEventNotification(deschedulerPolicy.EvictionFailureEventNotification).
WithDryRun(rs.DryRun).
WithMetricsEnabled(!rs.DisableMetrics),
)
Expand Down
66 changes: 40 additions & 26 deletions pkg/descheduler/evictions/evictions.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,20 +206,21 @@ type (
)

type PodEvictor struct {
mu sync.RWMutex
client clientset.Interface
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
maxPodsToEvictTotal *uint
nodePodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
totalPodCount uint
metricsEnabled bool
eventRecorder events.EventRecorder
erCache *evictionRequestsCache
featureGates featuregate.FeatureGate
mu sync.RWMutex
client clientset.Interface
policyGroupVersion string
dryRun bool
evictionFailureEventNotification bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
maxPodsToEvictTotal *uint
nodePodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
totalPodCount uint
metricsEnabled bool
eventRecorder events.EventRecorder
erCache *evictionRequestsCache
featureGates featuregate.FeatureGate

// registeredHandlers contains the registrations of all handlers. It's used to check if all handlers have finished syncing before the scheduling cycles start.
registeredHandlers []cache.ResourceEventHandlerRegistration
Expand All @@ -238,17 +239,18 @@ func NewPodEvictor(
}

podEvictor := &PodEvictor{
client: client,
eventRecorder: eventRecorder,
policyGroupVersion: options.policyGroupVersion,
dryRun: options.dryRun,
maxPodsToEvictPerNode: options.maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: options.maxPodsToEvictPerNamespace,
maxPodsToEvictTotal: options.maxPodsToEvictTotal,
metricsEnabled: options.metricsEnabled,
nodePodCount: make(nodePodEvictedCount),
namespacePodCount: make(namespacePodEvictCount),
featureGates: featureGates,
client: client,
eventRecorder: eventRecorder,
policyGroupVersion: options.policyGroupVersion,
dryRun: options.dryRun,
evictionFailureEventNotification: options.evictionFailureEventNotification,
maxPodsToEvictPerNode: options.maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: options.maxPodsToEvictPerNamespace,
maxPodsToEvictTotal: options.maxPodsToEvictTotal,
metricsEnabled: options.metricsEnabled,
nodePodCount: make(nodePodEvictedCount),
namespacePodCount: make(namespacePodEvictCount),
featureGates: featureGates,
}

if featureGates.Enabled(features.EvictionsInBackground) {
Expand Down Expand Up @@ -481,6 +483,9 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictTotal)
if pe.evictionFailureEventNotification {
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: total eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictTotal)
}
return err
}

Expand All @@ -492,6 +497,9 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName)
if pe.evictionFailureEventNotification {
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: node eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictPerNode)
}
return err
}
}
Expand All @@ -503,6 +511,9 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
}
span.AddEvent("Eviction Failed", trace.WithAttributes(attribute.String("node", pod.Spec.NodeName), attribute.String("err", err.Error())))
klog.ErrorS(err, "Error evicting pod", "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace, "pod", klog.KObj(pod))
if pe.evictionFailureEventNotification {
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: namespace eviction limit exceeded (%v)", pod.Spec.NodeName, *pe.maxPodsToEvictPerNamespace)
}
return err
}

Expand All @@ -514,6 +525,9 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": opts.StrategyName, "namespace": pod.Namespace, "node": pod.Spec.NodeName, "profile": opts.ProfileName}).Inc()
}
if pe.evictionFailureEventNotification {
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeWarning, "EvictionFailed", "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler failed: %v", pod.Spec.NodeName, err.Error())
}
return err
}

Expand Down Expand Up @@ -542,7 +556,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptio
reason = "NotSet"
}
}
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod eviction from %v node by sigs.k8s.io/descheduler", pod.Spec.NodeName)
}
return nil
}
Expand Down
Loading
Loading