From 0858540f7487b926caf32928500e047e137dbf76 Mon Sep 17 00:00:00 2001
From: Gonzalo Reyero Ferreras <greyerof@redhat.com>
Date: Wed, 22 Nov 2023 03:44:19 -0600
Subject: [PATCH] Lifecycle ts: fix to avoid some checks to be skipped.

The WithSkipCheckFn works doing OR (default behaviour) or AND operations
with **all** the skip functions that have been passed. It's not possible to mix
them, so I had to create a helper function to check whether both the
env.Deployments and env.StatefulSets slices are empty.

Examples:
check1.WithSkipCheckFn(skipFn1, skipFn2)
check1 will be skipped if **any** of those skip functions return true (OR op
because check.SkipMode=SkipModeAny by default).

check2.WithSkipCheckFn(skipFn1, skipFn2).
       WithSkipModeAll()
check2 will be skipped if **both** skipFn1 and skipFn2 return true (AND op =
check.SkipMode=SkipModeAll).

Also:
- Some refactor was done in the lifecycle-pod-recreation check to
  create deployments and statefulsets compliant/nonCompliant objects
  lists.
- Added skip functions for the lifecycle-storage-provisioner and set pod
  as compliant if it's not using any pvc/storageclass.
- Added passing checks from lifecycle and networking test suites to the
  gradetool's policy json.

ToDos:
- refactor the testPodsRecreation() check function to avoid the
  cyclomatic complexity linting issue.
- refactor the testStorageProvisioner() check function to add some
  short-circuits to avoid the for-if-for-if-for-if... pattern.
---
 .../lifecycle/podsets/podsets.go              |  12 +-
 cnf-certification-test/lifecycle/suite.go     | 110 +++++++++++-----
 generated_policy.json                         | 124 +++++++++++++++---
 pkg/testhelper/testhelper.go                  |  18 +++
 4 files changed, 207 insertions(+), 57 deletions(-)

diff --git a/cnf-certification-test/lifecycle/podsets/podsets.go b/cnf-certification-test/lifecycle/podsets/podsets.go
index d1e7a0237..7169bd35a 100644
--- a/cnf-certification-test/lifecycle/podsets/podsets.go
+++ b/cnf-certification-test/lifecycle/podsets/podsets.go
@@ -181,7 +181,10 @@ func getNotReadyStatefulSets(statefulSets []*provider.StatefulSet) []*provider.S
 	return notReadyStatefulSets
 }
 
-func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration) (claimsLog loghelper.CuratedLogLines, atLeastOnePodsetNotReady bool) {
+func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration) (
+	claimsLog loghelper.CuratedLogLines,
+	notReadyDeployments []*provider.Deployment,
+	notReadyStatefulSets []*provider.StatefulSet) {
 	const queryInterval = 15 * time.Second
 
 	deploymentsToCheck := env.Deployments
@@ -190,10 +193,10 @@ func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration
 	logrus.Infof("Waiting %s for %d podsets to be ready.", timeout, len(deploymentsToCheck)+len(statefulSetsToCheck))
 	for startTime := time.Now(); time.Since(startTime) < timeout; {
 		logrus.Infof("Checking Deployments readiness of Deployments %v", getDeploymentsInfo(deploymentsToCheck))
-		notReadyDeployments := getNotReadyDeployments(deploymentsToCheck)
+		notReadyDeployments = getNotReadyDeployments(deploymentsToCheck)
 
 		logrus.Infof("Checking StatefulSets readiness of StatefulSets %v", getStatefulSetsInfo(statefulSetsToCheck))
-		notReadyStatefulSets := getNotReadyStatefulSets(statefulSetsToCheck)
+		notReadyStatefulSets = getNotReadyStatefulSets(statefulSetsToCheck)
 
 		logrus.Infof("Not ready Deployments: %v", getDeploymentsInfo(notReadyDeployments))
 		logrus.Infof("Not ready StatefulSets: %v", getStatefulSetsInfo(notReadyStatefulSets))
@@ -213,8 +216,7 @@ func WaitForAllPodSetsReady(env *provider.TestEnvironment, timeout time.Duration
 	claimsLog.AddLogLine("Not ready Deployments: %v", getDeploymentsInfo(deploymentsToCheck))
 	claimsLog.AddLogLine("Not ready StatefulSets: %v", getStatefulSetsInfo(statefulSetsToCheck))
 
-	atLeastOnePodsetNotReady = len(deploymentsToCheck) > 0 || len(statefulSetsToCheck) > 0
-	return claimsLog, atLeastOnePodsetNotReady
+	return claimsLog, deploymentsToCheck, statefulSetsToCheck
 }
 
 func GetAllNodesForAllPodSets(pods []*provider.Pod) (nodes map[string]bool) {
diff --git a/cnf-certification-test/lifecycle/suite.go b/cnf-certification-test/lifecycle/suite.go
index 92d5bb1a0..9a1daa674 100644
--- a/cnf-certification-test/lifecycle/suite.go
+++ b/cnf-certification-test/lifecycle/suite.go
@@ -56,6 +56,14 @@ var (
 		env = provider.GetTestEnvironment()
 		return nil
 	}
+
+	// podset = deployment or statefulset
+	skipIfNoPodSetsetsUnderTest = func() (bool, string) {
+		if len(env.Deployments) == 0 && len(env.StatefulSets) == 0 {
+			return true, "no deployments nor statefulsets to check found"
+		}
+		return false, ""
+	}
 )
 
 //nolint:funlen
@@ -144,10 +152,8 @@ func init() {
 	// High availability test
 	testID, tags = identifiers.GetGinkgoTestIDAndLabels(identifiers.TestPodHighAvailabilityBestPractices)
 	checksGroup.Add(checksdb.NewCheck(testID, tags).
-		WithSkipCheckFn(
-			testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
-			testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
-			testhelper.GetNoStatefulSetsUnderTestSkipFn(&env)).
+		WithSkipCheckFn(testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
+		WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
 		WithCheckFn(func(c *checksdb.Check) error {
 			testHighAvailability(c, &env)
 			return nil
@@ -169,9 +175,8 @@ func init() {
 	checksGroup.Add(checksdb.NewCheck(testID, tags).
 		WithSkipCheckFn(
 			testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
-			testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
-			testhelper.GetNoStatefulSetsUnderTestSkipFn(&env),
 			testhelper.GetNotIntrusiveSkipFn(&env)).
+		WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
 		WithCheckFn(func(c *checksdb.Check) error {
 			testPodsRecreation(c, &env)
 			return nil
@@ -182,9 +187,8 @@ func init() {
 	checksGroup.Add(checksdb.NewCheck(testID, tags).
 		WithSkipCheckFn(
 			testhelper.GetNotIntrusiveSkipFn(&env),
-			testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
-			testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
-			testhelper.GetNoStatefulSetsUnderTestSkipFn(&env)).
+			testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
+		WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
 		WithCheckFn(func(c *checksdb.Check) error {
 			testDeploymentScaling(&env, timeout, c)
 			return nil
@@ -195,9 +199,8 @@ func init() {
 	checksGroup.Add(checksdb.NewCheck(testID, tags).
 		WithSkipCheckFn(
 			testhelper.GetNotIntrusiveSkipFn(&env),
-			testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle),
-			testhelper.GetNoDeploymentsUnderTestSkipFn(&env),
-			testhelper.GetNoStatefulSetsUnderTestSkipFn(&env)).
+			testhelper.GetNotEnoughWorkersSkipFn(&env, minWorkerNodesForLifecycle)).
+		WithSkipCheckFn(skipIfNoPodSetsetsUnderTest).
 		WithCheckFn(func(c *checksdb.Check) error {
 			testStatefulSetScaling(&env, timeout, c)
 			return nil
@@ -244,7 +247,10 @@ func init() {
 	// Storage provisioner test
 	testID, tags = identifiers.GetGinkgoTestIDAndLabels(identifiers.TestStorageProvisioner)
 	checksGroup.Add(checksdb.NewCheck(testID, tags).
-		WithSkipCheckFn(testhelper.GetNoPodsUnderTestSkipFn(&env)).
+		WithSkipCheckFn(
+			testhelper.GetNoPodsUnderTestSkipFn(&env),
+			testhelper.GetNoStorageClassesSkipFn(&env),
+			testhelper.GetNoPersistentVolumeClaimsSkipFn(&env)).
 		WithCheckFn(func(c *checksdb.Check) error {
 			testStorageProvisioner(c, &env)
 			return nil
@@ -575,7 +581,7 @@ func testHighAvailability(check *checksdb.Check, env *provider.TestEnvironment)
 }
 
 // testPodsRecreation tests that pods belonging to deployments and statefulsets are re-created and ready in case a node is lost
-func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) { //nolint:funlen
+func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) { //nolint:funlen,gocyclo
 	var compliantObjects []*testhelper.ReportObject
 	var nonCompliantObjects []*testhelper.ReportObject
 
@@ -594,10 +600,15 @@ func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) {
 	// Before draining any node, wait until all podsets are ready. The timeout depends on the number of podsets to check.
 	// timeout = k-mins + (1min * (num-deployments + num-statefulsets))
 	allPodsetsReadyTimeout := timeoutPodSetReady + time.Minute*time.Duration(len(env.Deployments)+len(env.StatefulSets))
-	claimsLog, atLeastOnePodsetNotReady := podsets.WaitForAllPodSetsReady(env, allPodsetsReadyTimeout)
-	if atLeastOnePodsetNotReady {
+	claimsLog, notReadyDeployments, notReadyStatefulSets := podsets.WaitForAllPodSetsReady(env, allPodsetsReadyTimeout)
+	if len(notReadyDeployments) > 0 || len(notReadyStatefulSets) > 0 {
 		tnf.ClaimFilePrintf("%s", claimsLog.GetLogLines())
-		nonCompliantObjects = append(nonCompliantObjects, testhelper.NewPodReportObject("", "", "Some deployments or stateful sets are not in a good initial state. Cannot perform test.", false))
+		for _, dep := range notReadyDeployments {
+			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewDeploymentReportObject(dep.Namespace, dep.Name, "Deployment was not ready before draining any node.", false))
+		}
+		for _, sts := range notReadyStatefulSets {
+			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewStatefulSetReportObject(sts.Namespace, sts.Name, "Statefulset was not ready before draining any node.", false))
+		}
 		return
 	}
 
@@ -615,43 +626,64 @@ func testPodsRecreation(check *checksdb.Check, env *provider.TestEnvironment) {
 	}
 	if len(podsWithNodeAssignment) > 0 {
 		logrus.Errorf("Pod(s) have been found to contain a node assignment and cannot perform the pod-recreation test: %v", podsWithNodeAssignment)
-		nonCompliantObjects = append(nonCompliantObjects, testhelper.NewPodReportObject("", "", "Some pods have node assignments and cannot perform the pod-recreation test", false))
+		for _, pod := range podsWithNodeAssignment {
+			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewPodReportObject(pod.Namespace, pod.Name, "Pod has node assignment.", false))
+		}
+
 		return
 	}
 
-	for n := range podsets.GetAllNodesForAllPodSets(env.Pods) {
-		defer podrecreation.CordonCleanup(n) //nolint:gocritic // The defer in loop is intentional, calling the cleanup function once per node
-		err := podrecreation.CordonHelper(n, podrecreation.Cordon)
+	for nodeName := range podsets.GetAllNodesForAllPodSets(env.Pods) {
+		defer podrecreation.CordonCleanup(nodeName) //nolint:gocritic // The defer in loop is intentional, calling the cleanup function once per node
+		err := podrecreation.CordonHelper(nodeName, podrecreation.Cordon)
 		if err != nil {
-			logrus.Errorf("error cordoning the node: %s", n)
-			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Node cordoning failed", false))
+			logrus.Errorf("error cordoning the node: %s", nodeName)
+			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(nodeName, "Node cordoning failed", false))
 			return
 		}
-		tnf.Logf(logrus.InfoLevel, fmt.Sprintf("Draining and Cordoning node %s: ", n))
-		logrus.Debugf("node: %s cordoned", n)
-		count, err := podrecreation.CountPodsWithDelete(env.Pods, n, podrecreation.NoDelete)
+		tnf.Logf(logrus.InfoLevel, fmt.Sprintf("Draining and Cordoning node %s: ", nodeName))
+		logrus.Debugf("node: %s cordoned", nodeName)
+		count, err := podrecreation.CountPodsWithDelete(env.Pods, nodeName, podrecreation.NoDelete)
 		if err != nil {
-			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Getting pods list to drain failed", false))
+			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(nodeName, "Getting pods list to drain failed", false))
 			return
 		}
 		nodeTimeout := timeoutPodSetReady + timeoutPodRecreationPerPod*time.Duration(count)
-		logrus.Debugf("draining node: %s with timeout: %s", n, nodeTimeout)
-		_, err = podrecreation.CountPodsWithDelete(env.Pods, n, podrecreation.DeleteForeground)
+		logrus.Debugf("draining node: %s with timeout: %s", nodeName, nodeTimeout)
+		_, err = podrecreation.CountPodsWithDelete(env.Pods, nodeName, podrecreation.DeleteForeground)
 		if err != nil {
-			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Draining node failed", false))
+			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(nodeName, "Draining node failed", false))
 			return
 		}
 
-		claimsLog, podsNotReady := podsets.WaitForAllPodSetsReady(env, nodeTimeout)
-		if podsNotReady {
+		claimsLog, notReadyDeployments, notReadyStatefulSets := podsets.WaitForAllPodSetsReady(env, nodeTimeout)
+		if len(notReadyDeployments) > 0 || len(notReadyStatefulSets) > 0 {
 			tnf.ClaimFilePrintf("%s", claimsLog.GetLogLines())
-			nonCompliantObjects = append(nonCompliantObjects, testhelper.NewNodeReportObject(n, "Some pods are not ready after draining the node", false))
+			for _, dep := range notReadyDeployments {
+				nonCompliantObjects = append(nonCompliantObjects, testhelper.NewDeploymentReportObject(dep.Namespace, dep.Name, "Deployment not ready after draining node "+nodeName, false))
+			}
+			for _, sts := range notReadyStatefulSets {
+				nonCompliantObjects = append(nonCompliantObjects, testhelper.NewStatefulSetReportObject(sts.Namespace, sts.Name, "Statefulset not ready after draining node "+nodeName, false))
+			}
 			return
 		}
 
-		err = podrecreation.CordonHelper(n, podrecreation.Uncordon)
+		err = podrecreation.CordonHelper(nodeName, podrecreation.Uncordon)
 		if err != nil {
-			logrus.Fatalf("error uncordoning the node: %s", n)
+			logrus.Fatalf("error uncordoning the node: %s", nodeName)
+		}
+	}
+
+	// If everything went well for all nodes, the nonCompliantObjects should be empty. We need to
+	// manually add all the deps/sts into the compliant object lists so the check is marked as skipped.
+	// ToDo: Improve this.
+	if len(nonCompliantObjects) == 0 {
+		for _, dep := range env.Deployments {
+			compliantObjects = append(compliantObjects, testhelper.NewDeploymentReportObject(dep.Namespace, dep.Name, "Deployment's pods successfully re-schedulled after node draining.", true))
+		}
+
+		for _, sts := range env.StatefulSets {
+			compliantObjects = append(compliantObjects, testhelper.NewStatefulSetReportObject(sts.Namespace, sts.Name, "Statefulset's pods successfully re-schedulled after node draining.", true))
 		}
 	}
 
@@ -773,6 +805,7 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
 	var Pvc = env.PersistentVolumeClaims
 	snoSingleLocalStorageProvisionner := ""
 	for _, put := range env.Pods {
+		usesPvcAndStorageClass := false
 		for pvIndex := range put.Spec.Volumes {
 			// Skip any nil persistentClaims.
 			volume := put.Spec.Volumes[pvIndex]
@@ -785,6 +818,7 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
 				if Pvc[i].Name == put.Spec.Volumes[pvIndex].PersistentVolumeClaim.ClaimName && Pvc[i].Namespace == put.Namespace {
 					for j := range StorageClasses {
 						if Pvc[i].Spec.StorageClassName != nil && StorageClasses[j].Name == *Pvc[i].Spec.StorageClassName {
+							usesPvcAndStorageClass = true
 							tnf.ClaimFilePrintf("%s pvc_name: %s, storageclass_name: %s, provisioner_name: %s", put.String(), put.Spec.Volumes[pvIndex].PersistentVolumeClaim.ClaimName,
 								StorageClasses[j].Name, StorageClasses[j].Provisioner)
 
@@ -831,6 +865,12 @@ func testStorageProvisioner(check *checksdb.Check, env *provider.TestEnvironment
 					}
 				}
 			}
+			// Save as compliant pod in case it's not using any of the existing PVC/StorageClasses of the cluster.
+			// Otherwise, in this cases the check will be marked as skipped.
+			// ToDo: improve this function.
+			if !usesPvcAndStorageClass {
+				compliantObjects = append(compliantObjects, testhelper.NewPodReportObject(put.Namespace, put.Name, "Pod not configured to use local storage.", true))
+			}
 		}
 	}
 	check.SetResult(compliantObjects, nonCompliantObjects)
diff --git a/generated_policy.json b/generated_policy.json
index edfc128e6..3c99927f5 100644
--- a/generated_policy.json
+++ b/generated_policy.json
@@ -1,11 +1,6 @@
 {
   "grades": {
     "requiredPassingTests": [
-      {
-        "id": "affiliated-certification-operator-is-certified",
-        "suite": "affiliated-certification",
-        "tags": "common"
-      },
       {
         "id": "access-control-bpf-capability-check",
         "suite": "access-control",
@@ -101,21 +96,11 @@
         "suite": "access-control",
         "tags": "telco"
       },
-      {
-        "id": "access-control-security-context",
-        "suite": "access-control",
-        "tags": "extended"
-      },
       {
         "id": "access-control-security-context-non-root-user-check",
         "suite": "access-control",
         "tags": "common"
       },
-      {
-        "id": "access-control-security-context-privilege-escalation",
-        "suite": "access-control",
-        "tags": "common"
-      },
       {
         "id": "access-control-service-type",
         "suite": "access-control",
@@ -137,8 +122,68 @@
         "tags": "telco"
       },
       {
-        "id": "access-control-sys-ptrace-capability",
-        "suite": "access-control",
+        "id": "affiliated-certification-operator-is-certified",
+        "suite": "affiliated-certification",
+        "tags": "common"
+      },
+      {
+        "id": "lifecycle-container-shutdown",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-container-startup",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-deployment-scaling",
+        "suite": "lifecycle",
+        "tags": "common"
+      },
+      {
+        "id": "lifecycle-image-pull-policy",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-liveness-probe",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-pod-high-availability",
+        "suite": "lifecycle",
+        "tags": "common"
+      },
+      {
+        "id": "lifecycle-pod-owner-type",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-pod-recreation",
+        "suite": "lifecycle",
+        "tags": "common"
+      },
+      {
+        "id": "lifecycle-pod-scheduling",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-pod-toleration-bypass",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-readiness-probe",
+        "suite": "lifecycle",
+        "tags": "telco"
+      },
+      {
+        "id": "lifecycle-startup-probe",
+        "suite": "lifecycle",
         "tags": "telco"
       },
       {
@@ -151,6 +196,51 @@
         "suite": "manageability",
         "tags": "extended"
       },
+      {
+        "id": "networking-dual-stack-service",
+        "suite": "networking",
+        "tags": "extended"
+      },
+      {
+        "id": "networking-icmpv4-connectivity",
+        "suite": "networking",
+        "tags": "common"
+      },
+      {
+        "id": "networking-icmpv4-connectivity-multus",
+        "suite": "networking",
+        "tags": "telco"
+      },
+      {
+        "id": "networking-icmpv6-connectivity",
+        "suite": "networking",
+        "tags": "common"
+      },
+      {
+        "id": "networking-icmpv6-connectivity-multus",
+        "suite": "networking",
+        "tags": "telco"
+      },
+      {
+        "id": "networking-network-policy-deny-all",
+        "suite": "networking",
+        "tags": "common"
+      },
+      {
+        "id": "networking-ocp-reserved-ports-usage",
+        "suite": "networking",
+        "tags": "common"
+      },
+      {
+        "id": "networking-reserved-partner-ports",
+        "suite": "networking",
+        "tags": "extended"
+      },
+      {
+        "id": "networking-undeclared-container-ports-usage",
+        "suite": "networking",
+        "tags": "extended"
+      },
       {
         "id": "observability-container-logging",
         "suite": "observability",
diff --git a/pkg/testhelper/testhelper.go b/pkg/testhelper/testhelper.go
index fd3c0c224..f28dbc1cd 100644
--- a/pkg/testhelper/testhelper.go
+++ b/pkg/testhelper/testhelper.go
@@ -511,6 +511,24 @@ func GetNoAffinityRequiredPodsSkipFn(env *provider.TestEnvironment) func() (bool
 	}
 }
 
+func GetNoStorageClassesSkipFn(env *provider.TestEnvironment) func() (bool, string) {
+	return func() (bool, string) {
+		if len(env.StorageClassList) == 0 {
+			return true, "no storage classes found"
+		}
+		return false, ""
+	}
+}
+
+func GetNoPersistentVolumeClaimsSkipFn(env *provider.TestEnvironment) func() (bool, string) {
+	return func() (bool, string) {
+		if len(env.PersistentVolumeClaims) == 0 {
+			return true, "no persistent volume claims found"
+		}
+		return false, ""
+	}
+}
+
 func SkipIfEmptyAny(skip func(string, ...int), object ...[2]interface{}) {
 	for _, o := range object {
 		s := reflect.ValueOf(o[0])