Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of GetStatsSummary and GetMetricsResource for Virtual Kubelet #163

Merged
merged 4 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion charts/k3k/templates/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,27 @@ roleRef:
subjects:
- kind: ServiceAccount
name: {{ include "k3k.serviceAccountName" . }}
namespace: {{ .Values.namespace }}
namespace: {{ .Values.namespace }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "k3k.fullname" . }}-node-proxy
rules:
- apiGroups:
- ""
resources:
- "nodes"
- "nodes/proxy"
verbs:
- "get"
- "list"
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ include "k3k.fullname" . }}-node-proxy
roleRef:
kind: ClusterRole
name: {{ include "k3k.fullname" . }}-node-proxy
apiGroup: rbac.authorization.k8s.io
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ require (
github.com/go-logr/zapr v1.3.0
github.com/onsi/ginkgo/v2 v2.20.1
github.com/onsi/gomega v1.36.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_model v0.6.1
github.com/rancher/dynamiclistener v1.27.5
github.com/sirupsen/logrus v1.9.3
Expand All @@ -29,6 +30,7 @@ require (
k8s.io/apimachinery v0.29.11
k8s.io/apiserver v0.29.11
k8s.io/client-go v0.29.11
k8s.io/component-base v0.29.11
k8s.io/metrics v0.29.11
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
sigs.k8s.io/controller-runtime v0.17.5
Expand Down Expand Up @@ -80,7 +82,6 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand Down Expand Up @@ -119,7 +120,6 @@ require (
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.29.2 // indirect
k8s.io/component-base v0.29.11 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kms v0.31.0 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
Expand Down
4 changes: 2 additions & 2 deletions k3k-kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,10 @@ func (k *kubelet) newProviderFunc(namespace, name, hostname, agentIP, serverIP,
if err != nil {
return nil, nil, errors.New("unable to make nodeutil provider: " + err.Error())
}
nodeProvider := provider.Node{}

provider.ConfigureNode(pc.Node, hostname, k.port, agentIP)
return utilProvider, &nodeProvider, nil

return utilProvider, &provider.Node{}, nil
}
}

Expand Down
196 changes: 196 additions & 0 deletions k3k-kubelet/provider/collectors/kubelet_resource_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
/*
Copyright (c) Microsoft Corporation.
Licensed under the Apache 2.0 license.

See https://github.com/virtual-kubelet/azure-aci/tree/master/pkg/metrics/collectors
*/

package collectors

import (
"time"

stats "github.com/virtual-kubelet/virtual-kubelet/node/api/statsv1alpha1"
compbasemetrics "k8s.io/component-base/metrics"
)

// defining metrics
var (
nodeCPUUsageDesc = compbasemetrics.NewDesc("node_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the node in core-seconds",
nil,
nil,
compbasemetrics.ALPHA,
"")

nodeMemoryUsageDesc = compbasemetrics.NewDesc("node_memory_working_set_bytes",
"Current working set of the node in bytes",
nil,
nil,
compbasemetrics.ALPHA,
"")

containerCPUUsageDesc = compbasemetrics.NewDesc("container_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the container in core-seconds",
[]string{"container", "pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

containerMemoryUsageDesc = compbasemetrics.NewDesc("container_memory_working_set_bytes",
"Current working set of the container in bytes",
[]string{"container", "pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

podCPUUsageDesc = compbasemetrics.NewDesc("pod_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the pod in core-seconds",
[]string{"pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

podMemoryUsageDesc = compbasemetrics.NewDesc("pod_memory_working_set_bytes",
"Current working set of the pod in bytes",
[]string{"pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")

resourceScrapeResultDesc = compbasemetrics.NewDesc("scrape_error",
"1 if there was an error while getting container metrics, 0 otherwise",
nil,
nil,
compbasemetrics.ALPHA,
"")

containerStartTimeDesc = compbasemetrics.NewDesc("container_start_time_seconds",
"Start time of the container since unix epoch in seconds",
[]string{"container", "pod", "namespace"},
nil,
compbasemetrics.ALPHA,
"")
)

// NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics
func NewKubeletResourceMetricsCollector(podStats *stats.Summary) compbasemetrics.StableCollector {
return &resourceMetricsCollector{
providerPodStats: podStats,
}
}

type resourceMetricsCollector struct {
compbasemetrics.BaseStableCollector

providerPodStats *stats.Summary
}

// Check if resourceMetricsCollector implements necessary interface
var _ compbasemetrics.StableCollector = &resourceMetricsCollector{}

// DescribeWithStability implements compbasemetrics.StableCollector
func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *compbasemetrics.Desc) {
ch <- nodeCPUUsageDesc
ch <- nodeMemoryUsageDesc
ch <- containerStartTimeDesc
ch <- containerCPUUsageDesc
ch <- containerMemoryUsageDesc
ch <- podCPUUsageDesc
ch <- podMemoryUsageDesc
ch <- resourceScrapeResultDesc
}

// CollectWithStability implements compbasemetrics.StableCollector
// Since new containers are frequently created and removed, using the Gauge would
// leak metric collectors for containers or pods that no longer exist. Instead, implement
// custom collector in a way that only collects metrics for active containers.
func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- compbasemetrics.Metric) {
var errorCount float64
defer func() {
ch <- compbasemetrics.NewLazyConstMetric(resourceScrapeResultDesc, compbasemetrics.GaugeValue, errorCount)
}()

statsSummary := *rc.providerPodStats
rc.collectNodeCPUMetrics(ch, statsSummary.Node)
rc.collectNodeMemoryMetrics(ch, statsSummary.Node)

for _, pod := range statsSummary.Pods {
for _, container := range pod.Containers {
rc.collectContainerStartTime(ch, pod, container)
rc.collectContainerCPUMetrics(ch, pod, container)
rc.collectContainerMemoryMetrics(ch, pod, container)
}
rc.collectPodCPUMetrics(ch, pod)
rc.collectPodMemoryMetrics(ch, pod)
}
}

// implement collector methods and validate that correct data is used

func (rc *resourceMetricsCollector) collectNodeCPUMetrics(ch chan<- compbasemetrics.Metric, s stats.NodeStats) {
if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
compbasemetrics.NewLazyConstMetric(nodeCPUUsageDesc, compbasemetrics.CounterValue, float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second)))
}

func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- compbasemetrics.Metric, s stats.NodeStats) {
if s.Memory == nil || s.Memory.WorkingSetBytes == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
compbasemetrics.NewLazyConstMetric(nodeMemoryUsageDesc, compbasemetrics.GaugeValue, float64(*s.Memory.WorkingSetBytes)))
}

func (rc *resourceMetricsCollector) collectContainerStartTime(ch chan<- compbasemetrics.Metric, pod stats.PodStats, s stats.ContainerStats) {
if s.StartTime.Unix() <= 0 {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.StartTime.Time,
compbasemetrics.NewLazyConstMetric(containerStartTimeDesc, compbasemetrics.GaugeValue, float64(s.StartTime.UnixNano())/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats, s stats.ContainerStats) {
if s.CPU == nil || s.CPU.UsageCoreNanoSeconds == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
compbasemetrics.NewLazyConstMetric(containerCPUUsageDesc, compbasemetrics.CounterValue,
float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats, s stats.ContainerStats) {
if s.Memory == nil || s.Memory.WorkingSetBytes == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
compbasemetrics.NewLazyConstMetric(containerMemoryUsageDesc, compbasemetrics.GaugeValue,
float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats) {
if pod.CPU == nil || pod.CPU.UsageCoreNanoSeconds == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(pod.CPU.Time.Time,
compbasemetrics.NewLazyConstMetric(podCPUUsageDesc, compbasemetrics.CounterValue,
float64(*pod.CPU.UsageCoreNanoSeconds)/float64(time.Second), pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- compbasemetrics.Metric, pod stats.PodStats) {
if pod.Memory == nil || pod.Memory.WorkingSetBytes == nil {
return
}

ch <- compbasemetrics.NewLazyMetricWithTimestamp(pod.Memory.Time.Time,
compbasemetrics.NewLazyConstMetric(podMemoryUsageDesc, compbasemetrics.GaugeValue,
float64(*pod.Memory.WorkingSetBytes), pod.PodRef.Name, pod.PodRef.Namespace))
}
Loading
Loading