-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add KaaS robustness feature tests #714
base: main
Are you sure you want to change the base?
Changes from 5 commits
3c2aaba
4d11602
7013a9d
9f4ec85
cbcca65
cb8e7a3
363d62a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,4 +7,4 @@ nodes: | |
- role: control-plane | ||
- role: worker | ||
- role: worker | ||
- role: worker | ||
- role: worker |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,319 @@ | ||
package scs_k8s_tests | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"strings" | ||
"testing" | ||
"time" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/kubernetes" | ||
"k8s.io/client-go/rest" | ||
) | ||
|
||
// ==================== Helper Functions ==================== | ||
|
||
// isKindCluster determines if we're running on a kind cluster | ||
func isKindCluster(clientset *kubernetes.Clientset) bool { | ||
return false | ||
} | ||
|
||
// setupClientset creates a Kubernetes clientset | ||
func setupClientset() (*kubernetes.Clientset, error) { | ||
config, err := rest.InClusterConfig() | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to load in-cluster config: %v", err) | ||
} | ||
return kubernetes.NewForConfig(config) | ||
} | ||
|
||
// ==================== Test Cases ==================== | ||
|
||
func Test_scs_0215_requestLimits(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Fatalf("Failed to setup clientset: %v", err) | ||
} | ||
|
||
t.Run("Check_Request_Limit_Configuration", func(t *testing.T) { | ||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) | ||
defer cancel() | ||
|
||
pods, err := clientset.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{ | ||
LabelSelector: "component=kube-apiserver", | ||
}) | ||
if err != nil || len(pods.Items) == 0 { | ||
t.Fatalf("Failed to find kube-apiserver pod: %v", err) | ||
} | ||
|
||
apiServer := pods.Items[0] | ||
var foundSettings = make(map[string]bool) | ||
requiredSettings := []string{ | ||
"max-requests-inflight", | ||
"max-mutating-requests-inflight", | ||
"min-request-timeout", | ||
} | ||
|
||
for _, container := range apiServer.Spec.Containers { | ||
for _, arg := range container.Command { | ||
for _, setting := range requiredSettings { | ||
if strings.Contains(arg, setting) { | ||
foundSettings[setting] = true | ||
} | ||
} | ||
if strings.Contains(arg, "enable-admission-plugins") && | ||
strings.Contains(arg, "EventRateLimit") { | ||
foundSettings["EventRateLimit"] = true | ||
} | ||
} | ||
} | ||
|
||
for _, setting := range requiredSettings { | ||
if !foundSettings[setting] { | ||
t.Errorf("Required setting %s not found in API server configuration", setting) | ||
} | ||
} | ||
if !foundSettings["EventRateLimit"] { | ||
t.Error("EventRateLimit admission plugin not enabled") | ||
} | ||
}) | ||
} | ||
|
||
func Test_scs_0215_minRequestTimeout(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Fatalf("Failed to setup clientset: %v", err) | ||
} | ||
|
||
t.Run("Check_minRequestTimeout_Configuration", func(t *testing.T) { | ||
pods, err := clientset.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{ | ||
LabelSelector: "component=kube-apiserver", | ||
}) | ||
if err != nil || len(pods.Items) == 0 { | ||
t.Fatalf("Failed to find kube-apiserver pod: %v", err) | ||
} | ||
|
||
found := false | ||
for _, container := range pods.Items[0].Spec.Containers { | ||
for _, arg := range container.Command { | ||
if strings.Contains(arg, "--min-request-timeout=") { | ||
found = true | ||
break | ||
} | ||
} | ||
} | ||
|
||
if !found { | ||
t.Error("min-request-timeout not configured for API server") | ||
} | ||
}) | ||
} | ||
|
||
func Test_scs_0215_eventRateLimit(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Fatalf("Failed to setup clientset: %v", err) | ||
} | ||
|
||
if isKindCluster(clientset) { | ||
t.Skip("Running on kind cluster - skipping EventRateLimit test") | ||
} | ||
|
||
t.Run("Check_EventRateLimit_Configuration", func(t *testing.T) { | ||
configLocations := []struct { | ||
name string | ||
namespace string | ||
key string | ||
}{ | ||
{"admission-configuration", "kube-system", "eventratelimit.yaml"}, | ||
{"kube-apiserver", "kube-system", "config.yaml"}, | ||
} | ||
|
||
for _, loc := range configLocations { | ||
config, err := clientset.CoreV1().ConfigMaps(loc.namespace).Get(context.Background(), loc.name, metav1.GetOptions{}) | ||
if err == nil { | ||
if data, ok := config.Data[loc.key]; ok { | ||
if strings.Contains(data, "eventratelimit.admission.k8s.io") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is this different from the test in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, these are different tests. The first test checks if the EventRateLimit is enabled in the API server command line flags, while the second test specifically looks for EventRateLimit configuration in ConfigMaps. The second test is more thorough as it searches multiple locations for the actual configuration details. |
||
t.Logf("Found EventRateLimit configuration in %s/%s", loc.namespace, loc.name) | ||
return | ||
} | ||
} | ||
} | ||
} | ||
|
||
configMaps, _ := clientset.CoreV1().ConfigMaps("kube-system").List(context.Background(), metav1.ListOptions{}) | ||
for _, cm := range configMaps.Items { | ||
if strings.Contains(cm.Name, "event-rate-limit") { | ||
t.Logf("Found standalone EventRateLimit configuration in ConfigMap: %s", cm.Name) | ||
return | ||
} | ||
} | ||
|
||
t.Error("No EventRateLimit configuration found in production cluster") | ||
}) | ||
} | ||
|
||
func Test_scs_0215_apiPriorityAndFairness(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Fatalf("Failed to setup clientset: %v", err) | ||
} | ||
|
||
if isKindCluster(clientset) { | ||
t.Skip("Running on kind cluster - skipping APF test") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This must raise an error as well. Otherwise this will be unnoticed by the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about the "skip tests if kind cluster" topic again. I tend to exclude those skipping statements. The tests should fail if the cluster cannot support the test features, that is the whole purpose of the tests. What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. we should not allow to skip any tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will change that |
||
} | ||
|
||
t.Run("Check_APF_Configuration", func(t *testing.T) { | ||
pods, err := clientset.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{ | ||
LabelSelector: "component=kube-apiserver", | ||
}) | ||
if err != nil || len(pods.Items) == 0 { | ||
t.Fatal("Failed to find kube-apiserver pod") | ||
} | ||
|
||
for _, container := range pods.Items[0].Spec.Containers { | ||
for _, arg := range container.Command { | ||
if strings.Contains(arg, "--enable-priority-and-fairness=true") { | ||
t.Log("APF enabled via Command Line Flags") | ||
return | ||
} | ||
} | ||
} | ||
|
||
t.Error("API Priority and Fairness not enabled") | ||
}) | ||
} | ||
|
||
func Test_scs_0215_rateLimitValues(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Fatalf("Failed to setup clientset: %v", err) | ||
} | ||
|
||
if isKindCluster(clientset) { | ||
t.Skip("Running on kind cluster - skipping rate limit values test") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above and to all other There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed |
||
} | ||
|
||
t.Run("Check_Rate_Limit_Values", func(t *testing.T) { | ||
expectedValues := map[string]string{ | ||
"qps": "5000", | ||
"burst": "20000", | ||
} | ||
|
||
configMaps, _ := clientset.CoreV1().ConfigMaps("kube-system").List(context.Background(), metav1.ListOptions{}) | ||
for _, cm := range configMaps.Items { | ||
var config string | ||
switch { | ||
case strings.Contains(cm.Name, "event-rate-limit"): | ||
config = cm.Data["config.yaml"] | ||
case cm.Name == "admission-configuration": | ||
config = cm.Data["eventratelimit.yaml"] | ||
case cm.Name == "kube-apiserver": | ||
config = cm.Data["config.yaml"] | ||
} | ||
|
||
if config != "" { | ||
allFound := true | ||
for k, v := range expectedValues { | ||
if !strings.Contains(config, fmt.Sprintf("%s: %s", k, v)) { | ||
allFound = false | ||
break | ||
} | ||
} | ||
if allFound { | ||
return | ||
} | ||
} | ||
} | ||
|
||
t.Error("Recommended rate limit values (qps: 5000, burst: 20000) not found") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the standards, these values are described as RECOMMENDED and furthermore “SHOULD be adapted to the needs of the environment and the expected load”. See: ../scs-0215-v1-robustness-features.md#kube-api-rate-limiting-1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes you are right here, I have overseen that it is just recommended. However, for that reason I exclude the test for now. Maybe some test for this could be added in the future if needed. The overall check for the presence of event rate limits is there. |
||
}) | ||
} | ||
|
||
func Test_scs_0215_etcdCompaction(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Fatalf("Failed to setup clientset: %v", err) | ||
} | ||
|
||
if isKindCluster(clientset) { | ||
t.Skip("Running on kind cluster - skipping etcd compaction test") | ||
} | ||
|
||
t.Run("Check_Etcd_Compaction_Settings", func(t *testing.T) { | ||
pods, err := clientset.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{ | ||
LabelSelector: "component=etcd", | ||
}) | ||
if err != nil || len(pods.Items) == 0 { | ||
t.Skip("No etcd pods found") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above this must throw an error as well. We currently don't consider someone using something else as etcd for k8s. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed |
||
return | ||
} | ||
|
||
requiredSettings := map[string]string{ | ||
"auto-compaction-mode": "periodic", | ||
"auto-compaction-retention": "8h", | ||
} | ||
|
||
for _, pod := range pods.Items { | ||
for _, container := range pod.Spec.Containers { | ||
foundSettings := make(map[string]bool) | ||
for _, arg := range container.Command { | ||
for setting, value := range requiredSettings { | ||
if strings.Contains(arg, fmt.Sprintf("--%s=%s", setting, value)) { | ||
foundSettings[setting] = true | ||
} | ||
} | ||
} | ||
|
||
if len(foundSettings) == len(requiredSettings) { | ||
t.Log("Found correct etcd compaction settings") | ||
return | ||
} | ||
} | ||
} | ||
|
||
t.Error("Required etcd compaction settings not found") | ||
}) | ||
} | ||
|
||
func Test_scs_0215_etcdBackup(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Fatalf("Failed to setup clientset: %v", err) | ||
} | ||
|
||
if isKindCluster(clientset) { | ||
t.Skip("Running on kind cluster - skipping etcd backup test") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed |
||
} | ||
|
||
t.Run("Check_Etcd_Backup_Configuration", func(t *testing.T) { | ||
cronJobs, err := clientset.BatchV1().CronJobs("").List(context.Background(), metav1.ListOptions{}) | ||
if err == nil { | ||
for _, job := range cronJobs.Items { | ||
if strings.Contains(strings.ToLower(job.Name), "etcd") && | ||
strings.Contains(strings.ToLower(job.Name), "backup") { | ||
t.Log("Found etcd backup solution: CronJob") | ||
return | ||
} | ||
} | ||
} | ||
t.Error("No etcd backup solution found") | ||
}) | ||
} | ||
|
||
func Test_scs_0215_certificateRotation(t *testing.T) { | ||
clientset, err := setupClientset() | ||
if err != nil { | ||
t.Skip("Failed to setup clientset") | ||
} | ||
|
||
t.Run("Check_Certificate_Controller", func(t *testing.T) { | ||
_, err := clientset.AppsV1().Deployments("cert-manager").Get(context.Background(), "cert-manager", metav1.GetOptions{}) | ||
if err != nil { | ||
t.Error("cert-manager not found - certificate controller required") | ||
} else { | ||
t.Log("Found active certificate controller: cert-manager") | ||
} | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this check also be carried out in the conditional statement in line 67? I think the error can already be logged there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you are right, I changed that.