Skip to content

Commit

Permalink
Implementation of GetStatsSummary and GetMetricsResource for Virt…
Browse files Browse the repository at this point in the history
…ual Kubelet (#163)

* implemented  GetStatsSummary and GetMetricsResource for Virtual Kubelet

* fixed ClusterRole for node proxy

* limit the clusterrole with get and list

* remove unused Metrics client interface
  • Loading branch information
enrichman authored Dec 27, 2024
1 parent 70a098d commit 7fdd48d
Show file tree
Hide file tree
Showing 7 changed files with 407 additions and 31 deletions.
25 changes: 24 additions & 1 deletion charts/k3k/templates/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,27 @@ roleRef:
subjects:
- kind: ServiceAccount
name: {{ include "k3k.serviceAccountName" . }}
namespace: {{ .Values.namespace }}
namespace: {{ .Values.namespace }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "k3k.fullname" . }}-node-proxy
rules:
- apiGroups:
- ""
resources:
- "nodes"
- "nodes/proxy"
verbs:
- "get"
- "list"
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ include "k3k.fullname" . }}-node-proxy
roleRef:
kind: ClusterRole
name: {{ include "k3k.fullname" . }}-node-proxy
apiGroup: rbac.authorization.k8s.io
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ require (
github.com/go-logr/zapr v1.3.0
github.com/onsi/ginkgo/v2 v2.20.1
github.com/onsi/gomega v1.36.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_model v0.6.1
github.com/rancher/dynamiclistener v1.27.5
github.com/sirupsen/logrus v1.9.3
Expand All @@ -29,6 +30,7 @@ require (
k8s.io/apimachinery v0.29.11
k8s.io/apiserver v0.29.11
k8s.io/client-go v0.29.11
k8s.io/component-base v0.29.11
k8s.io/metrics v0.29.11
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
sigs.k8s.io/controller-runtime v0.17.5
Expand Down Expand Up @@ -80,7 +82,6 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand Down Expand Up @@ -119,7 +120,6 @@ require (
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.29.2 // indirect
k8s.io/component-base v0.29.11 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kms v0.31.0 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
Expand Down
4 changes: 2 additions & 2 deletions k3k-kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,10 @@ func (k *kubelet) newProviderFunc(namespace, name, hostname, agentIP, serverIP,
if err != nil {
return nil, nil, errors.New("unable to make nodeutil provider: " + err.Error())
}
nodeProvider := provider.Node{}

provider.ConfigureNode(pc.Node, hostname, k.port, agentIP)
return utilProvider, &nodeProvider, nil

return utilProvider, &provider.Node{}, nil
}
}

Expand Down
196 changes: 196 additions & 0 deletions k3k-kubelet/provider/collectors/kubelet_resource_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
/*
Copyright (c) Microsoft Corporation.
Licensed under the Apache 2.0 license.
See https://github.com/virtual-kubelet/azure-aci/tree/master/pkg/metrics/collectors
*/

package collectors

import (
"time"

stats "github.com/virtual-kubelet/virtual-kubelet/node/api/statsv1alpha1"
compbasemetrics "k8s.io/component-base/metrics"
)

// defining metrics
var (
nodeCPUUsageDesc = compbasemetrics.NewDesc("node_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the node in core-seconds",
nil,
nil,
compbasemetrics.ALPHA,
"")

nodeMemoryUsageDesc = compbasemetrics.NewDesc("node_memory_working_set_bytes",
"Current working set of the node in bytes",
nil,
nil,
compbasemetrics.ALPHA,
"")

containerCPUUsageDesc = compbasemetrics.NewDesc("container_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the container in core-seconds",
[]string{"container", "pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

containerMemoryUsageDesc = compbasemetrics.NewDesc("container_memory_working_set_bytes",
"Current working set of the container in bytes",
[]string{"container", "pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

podCPUUsageDesc = compbasemetrics.NewDesc("pod_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the pod in core-seconds",
[]string{"pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

podMemoryUsageDesc = compbasemetrics.NewDesc("pod_memory_working_set_bytes",
"Current working set of the pod in bytes",
[]string{"pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

resourceScrapeResultDesc = compbasemetrics.NewDesc("scrape_error",
"1 if there was an error while getting container metrics, 0 otherwise",
nil,
nil,
compbasemetrics.ALPHA,
"")

containerStartTimeDesc = compbasemetrics.NewDesc("container_start_time_seconds",
"Start time of the container since unix epoch in seconds",
[]string{"container", "pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")
)

// NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics
func NewKubeletResourceMetricsCollector(podStats *stats.Summary) compbasemetrics.StableCollector {
return &resourceMetricsCollector{
providerPodStats: podStats,
}
}

type resourceMetricsCollector struct {
compbasemetrics.BaseStableCollector

providerPodStats *stats.Summary
}

// Check if resourceMetricsCollector implements necessary interface
var _ compbasemetrics.StableCollector = &resourceMetricsCollector{}

// DescribeWithStability implements compbasemetrics.StableCollector
func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *compbasemetrics.Desc) {
ch <- nodeCPUUsageDesc
ch <- nodeMemoryUsageDesc
ch <- containerStartTimeDesc
ch <- containerCPUUsageDesc
ch <- containerMemoryUsageDesc
ch <- podCPUUsageDesc
ch <- podMemoryUsageDesc
ch <- resourceScrapeResultDesc
}

// CollectWithStability implements compbasemetrics.StableCollector
// Since new containers are frequently created and removed, using the Gauge would
// leak metric collectors for containers or pods that no longer exist. Instead, implement
// custom collector in a way that only collects metrics for active containers.
func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- compbasemetrics.Metric) {
var errorCount float64
defer func() {
ch <- compbasemetrics.NewLazyConstMetric(resourceScrapeResultDesc, compbasemetrics.GaugeValue, errorCount)
}()

statsSummary := *rc.providerPodStats
rc.collectNodeCPUMetrics(ch, statsSummary.Node)
rc.collectNodeMemoryMetrics(ch, statsSummary.Node)

for _, pod := range statsSummary.Pods {
for _, container := range pod.Containers {
rc.collectContainerStartTime(ch, pod, container)
rc.collectContainerCPUMetrics(ch, pod, container)
rc.collectContainerMemoryMetrics(ch, pod, container)
}
rc.collectPodCPUMetrics(ch, pod)
rc.collectPodMemoryMetrics(ch, pod)
}
}

// implement collector methods and validate that correct data is used

func (rc *resourceMetricsCollector) collectNodeCPUMetrics(ch chan<- compbasemetrics.Metric, s stats.NodeStats) {
if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
compbasemetrics.NewLazyConstMetric(nodeCPUUsageDesc, compbasemetrics.CounterValue, float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second)))
}

func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- compbasemetrics.Metric, s stats.NodeStats) {
if s.Memory == nil || s.Memory.WorkingSetBytes == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
compbasemetrics.NewLazyConstMetric(nodeMemoryUsageDesc, compbasemetrics.GaugeValue, float64(*s.Memory.WorkingSetBytes)))
}

func (rc *resourceMetricsCollector) collectContainerStartTime(ch chan<- compbasemetrics.Metric, pod stats.PodStats, s stats.ContainerStats) {
if s.StartTime.Unix() <= 0 {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.StartTime.Time,
compbasemetrics.NewLazyConstMetric(containerStartTimeDesc, compbasemetrics.GaugeValue, float64(s.StartTime.UnixNano())/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats, s stats.ContainerStats) {
if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
compbasemetrics.NewLazyConstMetric(containerCPUUsageDesc, compbasemetrics.CounterValue,
float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats, s stats.ContainerStats) {
if s.Memory == nil || s.Memory.WorkingSetBytes == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
compbasemetrics.NewLazyConstMetric(containerMemoryUsageDesc, compbasemetrics.GaugeValue,
float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats) {
if pod.CPU == nil || pod.CPU.UsageCoreNanoSeconds == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(pod.CPU.Time.Time,
compbasemetrics.NewLazyConstMetric(podCPUUsageDesc, compbasemetrics.CounterValue,
float64(*pod.CPU.UsageCoreNanoSeconds)/float64(time.Second), pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats) {
if pod.Memory == nil || pod.Memory.WorkingSetBytes == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(pod.Memory.Time.Time,
compbasemetrics.NewLazyConstMetric(podMemoryUsageDesc, compbasemetrics.GaugeValue,
float64(*pod.Memory.WorkingSetBytes), pod.PodRef.Name, pod.PodRef.Namespace))
}
Loading

0 comments on commit 7fdd48d

Please sign in to comment.