Skip to content

Commit

Permalink
[nodeutilization]: actual usage client through kubernetes metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
ingvagabund committed Nov 15, 2024
1 parent b672f2e commit 0eac57f
Show file tree
Hide file tree
Showing 26 changed files with 1,427 additions and 119 deletions.
2 changes: 2 additions & 0 deletions cmd/descheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
componentbaseconfig "k8s.io/component-base/config"
componentbaseoptions "k8s.io/component-base/config/options"
"k8s.io/klog/v2"
metricsclient "k8s.io/metrics/pkg/client/clientset/versioned"

"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
Expand All @@ -46,6 +47,7 @@ type DeschedulerServer struct {

Client clientset.Interface
EventClient clientset.Interface
MetricsClient metricsclient.Interface
SecureServing *apiserveroptions.SecureServingOptionsWithLoopback
SecureServingInfo *apiserver.SecureServingInfo
DisableMetrics bool
Expand Down
3 changes: 3 additions & 0 deletions kubernetes/base/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ rules:
resources: ["leases"]
resourceNames: ["descheduler"]
verbs: ["get", "patch", "delete"]
- apiGroups: ["metrics.k8s.io"]
resources: ["nodes", "pods"]
verbs: ["get", "list"]
---
apiVersion: v1
kind: ServiceAccount
Expand Down
10 changes: 10 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ type DeschedulerPolicy struct {

// MaxNoOfPodsToTotal restricts maximum of pods to be evicted total.
MaxNoOfPodsToEvictTotal *uint

// MetricsCollector configures collection of metrics about actual resource utilization
MetricsCollector MetricsCollector
}

// Namespaces carries a list of included/excluded namespaces
Expand Down Expand Up @@ -84,3 +87,10 @@ type PluginSet struct {
Enabled []string
Disabled []string
}

// MetricsCollector configures collection of metrics about actual resource utilization
type MetricsCollector struct {
// Enabled metrics collection from kubernetes metrics.
// Later, the collection can be extended to other providers.
Enabled bool
}
10 changes: 10 additions & 0 deletions pkg/api/v1alpha2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ type DeschedulerPolicy struct {

// MaxNoOfPodsToTotal restricts maximum of pods to be evicted total.
MaxNoOfPodsToEvictTotal *uint `json:"maxNoOfPodsToEvictTotal,omitempty"`

// MetricsCollector configures collection of metrics about actual resource utilization
MetricsCollector MetricsCollector `json:"metricsCollector,omitempty"`
}

type DeschedulerProfile struct {
Expand All @@ -66,3 +69,10 @@ type PluginSet struct {
Enabled []string `json:"enabled"`
Disabled []string `json:"disabled"`
}

// MetricsCollector configures collection of metrics about actual resource utilization
type MetricsCollector struct {
// Enabled metrics collection from kubernetes metrics.
// Later, the collection can be extended to other providers.
Enabled bool `json:"enabled"`
}
36 changes: 36 additions & 0 deletions pkg/api/v1alpha2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions pkg/api/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions pkg/api/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion pkg/descheduler/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ import (

clientset "k8s.io/client-go/kubernetes"
componentbaseconfig "k8s.io/component-base/config"
metricsclient "k8s.io/metrics/pkg/client/clientset/versioned"

// Ensure to load all auth plugins.
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)

func CreateClient(clientConnection componentbaseconfig.ClientConnectionConfiguration, userAgt string) (clientset.Interface, error) {
func createConfig(clientConnection componentbaseconfig.ClientConnectionConfiguration, userAgt string) (*rest.Config, error) {
var cfg *rest.Config
if len(clientConnection.Kubeconfig) != 0 {
master, err := GetMasterFromKubeconfig(clientConnection.Kubeconfig)
Expand Down Expand Up @@ -56,9 +57,28 @@ func CreateClient(clientConnection componentbaseconfig.ClientConnectionConfigura
cfg = rest.AddUserAgent(cfg, userAgt)
}

return cfg, nil
}

func CreateClient(clientConnection componentbaseconfig.ClientConnectionConfiguration, userAgt string) (clientset.Interface, error) {
cfg, err := createConfig(clientConnection, userAgt)
if err != nil {
return nil, fmt.Errorf("unable to create config: %v", err)
}

return clientset.NewForConfig(cfg)
}

func CreateMetricsClient(clientConnection componentbaseconfig.ClientConnectionConfiguration, userAgt string) (metricsclient.Interface, error) {
cfg, err := createConfig(clientConnection, userAgt)
if err != nil {
return nil, fmt.Errorf("unable to create config: %v", err)
}

// Create the metrics clientset to access the metrics.k8s.io API
return metricsclient.NewForConfig(cfg)
}

func GetMasterFromKubeconfig(filename string) (string, error) {
config, err := clientcmd.LoadFromFile(filename)
if err != nil {
Expand Down
63 changes: 45 additions & 18 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,44 +23,42 @@ import (
"strconv"
"time"

policyv1 "k8s.io/api/policy/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
"k8s.io/apimachinery/pkg/runtime/schema"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/client-go/discovery"
"k8s.io/client-go/informers"
"k8s.io/client-go/tools/events"
componentbaseconfig "k8s.io/component-base/config"
"k8s.io/klog/v2"

v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1"
policyv1 "k8s.io/api/policy/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/discovery"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"

"sigs.k8s.io/descheduler/pkg/descheduler/client"
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
"sigs.k8s.io/descheduler/pkg/tracing"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/pkg/version"
"k8s.io/client-go/tools/events"
componentbaseconfig "k8s.io/component-base/config"
"k8s.io/klog/v2"

"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/client"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/framework/pluginregistry"
frameworkprofile "sigs.k8s.io/descheduler/pkg/framework/profile"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
"sigs.k8s.io/descheduler/pkg/tracing"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/pkg/version"
)

type eprunner func(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status
Expand All @@ -79,6 +77,7 @@ type descheduler struct {
eventRecorder events.EventRecorder
podEvictor *evictions.PodEvictor
podEvictionReactionFnc func(*fakeclientset.Clientset) func(action core.Action) (bool, runtime.Object, error)
metricsCollector *metricscollector.MetricsCollector
}

type informerResources struct {
Expand Down Expand Up @@ -156,6 +155,11 @@ func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.Desche
WithMetricsEnabled(!rs.DisableMetrics),
)

var metricsCollector *metricscollector.MetricsCollector
if deschedulerPolicy.MetricsCollector.Enabled {
metricsCollector = metricscollector.NewMetricsCollector(rs.Client, rs.MetricsClient)
}

return &descheduler{
rs: rs,
ir: ir,
Expand All @@ -165,6 +169,7 @@ func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.Desche
eventRecorder: eventRecorder,
podEvictor: podEvictor,
podEvictionReactionFnc: podEvictionReactionFnc,
metricsCollector: metricsCollector,
}, nil
}

Expand Down Expand Up @@ -244,6 +249,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
frameworkprofile.WithSharedInformerFactory(d.sharedInformerFactory),
frameworkprofile.WithPodEvictor(d.podEvictor),
frameworkprofile.WithGetPodsAssignedToNodeFnc(d.getPodsAssignedToNode),
frameworkprofile.WithMetricsCollector(d.metricsCollector),
)
if err != nil {
klog.ErrorS(err, "unable to create a profile", "profile", profile.Name)
Expand Down Expand Up @@ -308,6 +314,14 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
return err
}

if deschedulerPolicy.MetricsCollector.Enabled {
metricsClient, err := client.CreateMetricsClient(clientConnection, "descheduler")
if err != nil {
return err
}
rs.MetricsClient = metricsClient
}

runFn := func() error {
return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion)
}
Expand Down Expand Up @@ -415,6 +429,19 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())

if deschedulerPolicy.MetricsCollector.Enabled {
go func() {
klog.V(2).Infof("Starting metrics collector")
descheduler.metricsCollector.Run(ctx)
klog.V(2).Infof("Stopped metrics collector")
}()
wait.PollUntilWithContext(ctx, time.Second, func(context.Context) (done bool, err error) {
synced := descheduler.metricsCollector.HasSynced()
fmt.Printf("synced: %v\n", synced)
return synced, nil
})
}

wait.NonSlidingUntil(func() {
// A next context is created here intentionally to avoid nesting the spans via context.
sCtx, sSpan := tracing.Tracer().Start(ctx, "NonSlidingUntil")
Expand Down
Loading

0 comments on commit 0eac57f

Please sign in to comment.