Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lifecycle ts: fix to avoid some checks to be skipped. #1655

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions cnf-certification-test/lifecycle/podsets/podsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,10 @@ func getNotReadyStatefulSets(statefulSets []*provider.StatefulSet) []*provider.S
return notReadyStatefulSets
}

func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration) (claimsLog loghelper.CuratedLogLines, atLeastOnePodsetNotReady bool) {
func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration) (
claimsLog loghelper.CuratedLogLines,
notReadyDeployments []*provider.Deployment,
notReadyStatefulSets []*provider.StatefulSet) {
const queryInterval = 15 * time.Second

deploymentsToCheck := env.Deployments
Expand All @@ -190,10 +193,10 @@ func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration
logrus.Infof("Waiting %s for %d podsets to be ready.", timeout, len(deploymentsToCheck)+len(statefulSetsToCheck))
for startTime := time.Now(); time.Since(startTime) < timeout; {
logrus.Infof("Checking Deployments readiness of Deployments %v", getDeploymentsInfo(deploymentsToCheck))
notReadyDeployments := getNotReadyDeployments(deploymentsToCheck)
notReadyDeployments = getNotReadyDeployments(deploymentsToCheck)

logrus.Infof("Checking StatefulSets readiness of StatefulSets %v", getStatefulSetsInfo(statefulSetsToCheck))
notReadyStatefulSets := getNotReadyStatefulSets(statefulSetsToCheck)
notReadyStatefulSets = getNotReadyStatefulSets(statefulSetsToCheck)

logrus.Infof("Not ready Deployments: %v", getDeploymentsInfo(notReadyDeployments))
logrus.Infof("Not ready StatefulSets: %v", getStatefulSetsInfo(notReadyStatefulSets))
Expand All @@ -213,8 +216,7 @@ func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration
claimsLog.AddLogLine("Not ready Deployments: %v", getDeploymentsInfo(deploymentsToCheck))
claimsLog.AddLogLine("Not ready StatefulSets: %v", getStatefulSetsInfo(statefulSetsToCheck))

atLeastOnePodsetNotReady = len(deploymentsToCheck) > 0 || len(statefulSetsToCheck) > 0
return claimsLog, atLeastOnePodsetNotReady
return claimsLog, deploymentsToCheck, statefulSetsToCheck
}

func GetAllNodesForAllPodSets(pods []*provider.Pod) (nodes map[string]bool) {
Expand Down
110 changes: 75 additions & 35 deletions cnf-certification-test/lifecycle/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ var (
env = provider.GetTestEnvironment()
return nil
}

// podset = deployment or statefulset
skipIfNoPodSetsetsUnderTest = func() (bool, string) {
if len(env.Deployments) == 0 && len(env.StatefulSets) == 0 {
return true, "no deployments nor statefulsets to check found"
}
return false, ""
}
)

//nolint:funlen
Expand Down Expand Up @@ -144,10 +152,8 @@ func init() {
// High availability test
testID, tags = identifiers.GetGinkgoTestIDAndLabels(identifiers.TestPodHighAvailabilityBestPractices)
checksGroup.Add(checksdb.NewCheck(testID, tags).
WithSkipCheckFn(
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
testhelper.GetNoStatefulSetsUnderTestSkipFn(&env)).
WithSkipCheckFn(testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
WithCheckFn(func(c *checksdb.Check) error {
testHighAvailability(c, &env)
return nil
Expand All @@ -169,9 +175,8 @@ func init() {
checksGroup.Add(checksdb.NewCheck(testID, tags).
WithSkipCheckFn(
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
testhelper.GetNoStatefulSetsUnderTestSkipFn(&env),
testhelper.GetNotIntrusiveSkipFn(&env)).
WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
WithCheckFn(func(c *checksdb.Check) error {
testPodsRecreation(c, &env)
return nil
Expand All @@ -182,9 +187,8 @@ func init() {
checksGroup.Add(checksdb.NewCheck(testID, tags).
WithSkipCheckFn(
testhelper.GetNotIntrusiveSkipFn(&env),
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
testhelper.GetNoStatefulSetsUnderTestSkipFn(&env)).
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
WithCheckFn(func(c *checksdb.Check) error {
testDeploymentScaling(&env, timeout, c)
return nil
Expand All @@ -195,9 +199,8 @@ func init() {
checksGroup.Add(checksdb.NewCheck(testID, tags).
WithSkipCheckFn(
testhelper.GetNotIntrusiveSkipFn(&env),
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
testhelper.GetNoStatefulSetsUnderTestSkipFn(&env)).
testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
WithCheckFn(func(c *checksdb.Check) error {
testStatefulSetScaling(&env, timeout, c)
return nil
Expand Down Expand Up @@ -244,7 +247,10 @@ func init() {
// Storage provisioner test
testID, tags = identifiers.GetGinkgoTestIDAndLabels(identifiers.TestStorageProvisioner)
checksGroup.Add(checksdb.NewCheck(testID, tags).
WithSkipCheckFn(testhelper.GetNoPodsUnderTestSkipFn(&env)).
WithSkipCheckFn(
testhelper.GetNoPodsUnderTestSkipFn(&env),
testhelper.GetNoStorageClassesSkipFn(&env),
testhelper.GetNoPersistentVolumeClaimsSkipFn(&env)).
WithCheckFn(func(c *checksdb.Check) error {
testStorageProvisioner(c, &env)
return nil
Expand Down Expand Up @@ -575,7 +581,7 @@ func testHighAvailability(check *checksdb.Check, env *provider.TestEnvironment)
}

// testPodsRecreation tests that pods belonging to deployments and statefulsets are re-created and ready in case a node is lost
func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) { //nolint:funlen
func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) { //nolint:funlen,gocyclo
var compliantObjects []*testhelper.ReportObject
var nonCompliantObjects []*testhelper.ReportObject

Expand All @@ -594,10 +600,15 @@ func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) {
// Before draining any node, wait until all podsets are ready. The timeout depends on the number of podsets to check.
// timeout = k-mins + (1min * (num-deployments + num-statefulsets))
allPodsetsReadyTimeout := timeoutPodSetReady + time.Minute*time.Duration(len(env.Deployments)+len(env.StatefulSets))
claimsLog, atLeastOnePodsetNotReady := podsets.WaitForAllPodSetsReady(env, allPodsetsReadyTimeout)
if atLeastOnePodsetNotReady {
claimsLog, notReadyDeployments, notReadyStatefulSets := podsets.WaitForAllPodSetsReady(env, allPodsetsReadyTimeout)
if len(notReadyDeployments) > 0 || len(notReadyStatefulSets) > 0 {
tnf.ClaimFilePrintf("%s", claimsLog.GetLogLines())
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewPodReportObject("", "", "Some deployments or stateful sets are not in a good initial state. Cannot perform test.", false))
for _, dep := range notReadyDeployments {
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewDeploymentReportObject(dep.Namespace, dep.Name, "Deployment was not ready before draining any node.", false))
}
for _, sts := range notReadyStatefulSets {
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewStatefulSetReportObject(sts.Namespace, sts.Name, "Statefulset was not ready before draining any node.", false))
}
return
}

Expand All @@ -615,43 +626,64 @@ func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) {
}
if len(podsWithNodeAssignment) > 0 {
logrus.Errorf("Pod(s) have been found to contain a node assignment and cannot perform the pod-recreation test: %v", podsWithNodeAssignment)
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewPodReportObject("", "", "Some pods have node assignments and cannot perform the pod-recreation test", false))
for _, pod := range podsWithNodeAssignment {
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewPodReportObject(pod.Namespace, pod.Name, "Pod has node assignment.", false))
}

return
}

for n := range podsets.GetAllNodesForAllPodSets(env.Pods) {
defer podrecreation.CordonCleanup(n) //nolint:gocritic // The defer in loop is intentional, calling the cleanup function once per node
err := podrecreation.CordonHelper(n, podrecreation.Cordon)
for nodeName := range podsets.GetAllNodesForAllPodSets(env.Pods) {
defer podrecreation.CordonCleanup(nodeName) //nolint:gocritic // The defer in loop is intentional, calling the cleanup function once per node
err := podrecreation.CordonHelper(nodeName, podrecreation.Cordon)
if err != nil {
logrus.Errorf("error cordoning the node: %s", n)
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Node cordoning failed", false))
logrus.Errorf("error cordoning the node: %s", nodeName)
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(nodeName, "Node cordoning failed", false))
return
}
tnf.Logf(logrus.InfoLevel, fmt.Sprintf("Draining and Cordoning node %s: ", n))
logrus.Debugf("node: %s cordoned", n)
count, err := podrecreation.CountPodsWithDelete(env.Pods, n, podrecreation.NoDelete)
tnf.Logf(logrus.InfoLevel, fmt.Sprintf("Draining and Cordoning node %s: ", nodeName))
logrus.Debugf("node: %s cordoned", nodeName)
count, err := podrecreation.CountPodsWithDelete(env.Pods, nodeName, podrecreation.NoDelete)
if err != nil {
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Getting pods list to drain failed", false))
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(nodeName, "Getting pods list to drain failed", false))
return
}
nodeTimeout := timeoutPodSetReady + timeoutPodRecreationPerPod*time.Duration(count)
logrus.Debugf("draining node: %s with timeout: %s", n, nodeTimeout)
_, err = podrecreation.CountPodsWithDelete(env.Pods, n, podrecreation.DeleteForeground)
logrus.Debugf("draining node: %s with timeout: %s", nodeName, nodeTimeout)
_, err = podrecreation.CountPodsWithDelete(env.Pods, nodeName, podrecreation.DeleteForeground)
if err != nil {
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Draining node failed", false))
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(nodeName, "Draining node failed", false))
return
}

claimsLog, podsNotReady := podsets.WaitForAllPodSetsReady(env, nodeTimeout)
if podsNotReady {
claimsLog, notReadyDeployments, notReadyStatefulSets := podsets.WaitForAllPodSetsReady(env, nodeTimeout)
if len(notReadyDeployments) > 0 || len(notReadyStatefulSets) > 0 {
tnf.ClaimFilePrintf("%s", claimsLog.GetLogLines())
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Some pods are not ready after draining the node", false))
for _, dep := range notReadyDeployments {
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewDeploymentReportObject(dep.Namespace, dep.Name, "Deployment not ready after draining node "+nodeName, false))
}
for _, sts := range notReadyStatefulSets {
nonCompliantObjects = append(nonCompliantObjects, testhelper.NewStatefulSetReportObject(sts.Namespace, sts.Name, "Statefulset not ready after draining node "+nodeName, false))
}
return
}

err = podrecreation.CordonHelper(n, podrecreation.Uncordon)
err = podrecreation.CordonHelper(nodeName, podrecreation.Uncordon)
if err != nil {
logrus.Fatalf("error uncordoning the node: %s", n)
logrus.Fatalf("error uncordoning the node: %s", nodeName)
}
}

// If everything went well for all nodes, the nonCompliantObjects should be empty. We need to
// manually add all the deps/sts into the compliant object lists so the check is marked as skipped.
// ToDo: Improve this.
if len(nonCompliantObjects) == 0 {
for _, dep := range env.Deployments {
compliantObjects = append(compliantObjects, testhelper.NewDeploymentReportObject(dep.Namespace, dep.Name, "Deployment's pods successfully re-schedulled after node draining.", true))
}

for _, sts := range env.StatefulSets {
compliantObjects = append(compliantObjects, testhelper.NewStatefulSetReportObject(sts.Namespace, sts.Name, "Statefulset's pods successfully re-schedulled after node draining.", true))
}
}

Expand Down Expand Up @@ -773,6 +805,7 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
var Pvc = env.PersistentVolumeClaims
snoSingleLocalStorageProvisionner := ""
for _, put := range env.Pods {
usesPvcAndStorageClass := false
for pvIndex := range put.Spec.Volumes {
// Skip any nil persistentClaims.
volume := put.Spec.Volumes[pvIndex]
Expand All @@ -785,6 +818,7 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
if Pvc[i].Name == put.Spec.Volumes[pvIndex].PersistentVolumeClaim.ClaimName && Pvc[i].Namespace == put.Namespace {
for j := range StorageClasses {
if Pvc[i].Spec.StorageClassName != nil && StorageClasses[j].Name == *Pvc[i].Spec.StorageClassName {
usesPvcAndStorageClass = true
tnf.ClaimFilePrintf("%s pvc_name: %s, storageclass_name: %s, provisioner_name: %s", put.String(), put.Spec.Volumes[pvIndex].PersistentVolumeClaim.ClaimName,
StorageClasses[j].Name, StorageClasses[j].Provisioner)

Expand Down Expand Up @@ -831,6 +865,12 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
}
}
}
// Save as compliant pod in case it's not using any of the existing PVC/StorageClasses of the cluster.
// Otherwise, in this cases the check will be marked as skipped.
// ToDo: improve this function.
if !usesPvcAndStorageClass {
compliantObjects = append(compliantObjects, testhelper.NewPodReportObject(put.Namespace, put.Name, "Pod not configured to use local storage.", true))
}
}
}
check.SetResult(compliantObjects, nonCompliantObjects)
Expand Down
Loading
Loading