Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

e2e: must-gather: fix the setup process #1120

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/e2e/install/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ var _ = Describe("[Install] continuousIntegration", Serial, func() {
Context("with a running cluster with all the components", func() {
It("[test_id:47574][tier0] should perform overall deployment and verify the condition is reported as available", func() {
deployer := deploy.NewForPlatform(configuration.Plat)
nroObj := deployer.Deploy(context.TODO())
nroObj := deployer.Deploy(context.TODO(), configuration.MachineConfigPoolUpdateTimeout)
nname := client.ObjectKeyFromObject(nroObj)
Expect(nname.Name).ToNot(BeEmpty())

Expand Down Expand Up @@ -205,7 +205,7 @@ var _ = Describe("[Install] durability", Serial, func() {

BeforeEach(func() {
deployer = deploy.NewForPlatform(configuration.Plat)
nroObj = deployer.Deploy(context.TODO())
nroObj = deployer.Deploy(context.TODO(), configuration.MachineConfigPoolUpdateTimeout)
})

AfterEach(func() {
Expand Down
30 changes: 22 additions & 8 deletions test/e2e/must-gather/must_gather_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ import (

"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"

nropv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1"
e2eclient "github.com/openshift-kni/numaresources-operator/test/utils/clients"
"github.com/openshift-kni/numaresources-operator/test/utils/configuration"
"github.com/openshift-kni/numaresources-operator/test/utils/deploy"
"github.com/openshift-kni/numaresources-operator/test/utils/objects"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
Expand All @@ -38,10 +41,14 @@ const (

defaultMustGatherImage = "quay.io/openshift-kni/numaresources-must-gather"
defaultMustGatherTag = "4.19.999-snapshot"

nroSchedTimeout = 5 * time.Minute
)

var (
deployment deploy.NroDeploymentWithSched
deployment deploy.Deployer

nroSchedObj *nropv1.NUMAResourcesScheduler

mustGatherImage string
mustGatherTag string
Expand All @@ -62,17 +69,23 @@ var _ = ginkgo.BeforeSuite(func() {
mustGatherTag = getStringValueFromEnv(envVarMustGatherTag, defaultMustGatherTag)
ginkgo.By(fmt.Sprintf("Using must-gather image %q tag %q", mustGatherImage, mustGatherTag))

ctx := context.Background()

if _, ok := os.LookupEnv("E2E_NROP_INFRA_SETUP_SKIP"); ok {
ginkgo.By("Fetching up cluster data")

var err error
deployment, err = deploy.GetDeploymentWithSched(context.TODO())
gomega.Expect(err).ToNot(gomega.HaveOccurred())
// assume cluster is set up correctly, so just fetch what we have already;
// fail loudly if we can't get, this means the assumption was wrong
nroSchedObj = &nropv1.NUMAResourcesScheduler{}
gomega.Expect(e2eclient.Client.Get(ctx, objects.NROSchedObjectKey(), nroSchedObj)).To(gomega.Succeed())
return
}

ginkgo.By("Setting up the cluster")
deployment.Deploy(context.TODO())
deployment.NroSchedObj = deploy.DeployNROScheduler()

deployment = deploy.NewForPlatform(configuration.Plat)
_ = deployment.Deploy(ctx, configuration.MachineConfigPoolUpdateTimeout) // we don't care about the nrop instance
nroSchedObj = deploy.DeployNROScheduler(ctx, nroSchedTimeout)
})

var _ = ginkgo.AfterSuite(func() {
Expand All @@ -81,8 +94,9 @@ var _ = ginkgo.AfterSuite(func() {
return
}
ginkgo.By("tearing down the cluster")
deploy.TeardownNROScheduler(deployment.NroSchedObj, 5*time.Minute)
deployment.Teardown(context.TODO(), 5*time.Minute)
ctx := context.Background()
deploy.TeardownNROScheduler(ctx, nroSchedObj, nroSchedTimeout)
deployment.Teardown(ctx, 5*time.Minute)
})

func getStringValueFromEnv(envVar, fallback string) string {
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/must-gather/must_gather_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ var _ = Describe("[must-gather] NRO data collected", func() {
var destDir string

BeforeEach(func() {
Expect(nroSchedObj).ToNot(BeNil(), "missing scheduler object reference")

var err error
destDir, err = os.MkdirTemp("", "*-e2e-data")
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -106,7 +108,7 @@ var _ = Describe("[must-gather] NRO data collected", func() {

By("Looking for resources instances")
nropInstanceFileName := fmt.Sprintf("%s.yaml", filepath.Join("cluster-scoped-resources/nodetopology.openshift.io/numaresourcesoperators", objects.NROObjectKey().Name))
nroschedInstanceFileName := fmt.Sprintf("%s.yaml", filepath.Join("cluster-scoped-resources/nodetopology.openshift.io/numaresourcesschedulers", deployment.NroSchedObj.Name))
nroschedInstanceFileName := fmt.Sprintf("%s.yaml", filepath.Join("cluster-scoped-resources/nodetopology.openshift.io/numaresourcesschedulers", nroSchedObj.Name))

collectedMCPs, err := getMachineConfigPools(filepath.Join(mgContentFolder, "cluster-scoped-resources/machineconfiguration.openshift.io/machineconfigpools"))
Expect(err).ToNot(HaveOccurred())
Expand Down
51 changes: 14 additions & 37 deletions test/utils/deploy/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,19 @@ import (
"github.com/openshift-kni/numaresources-operator/test/utils/objects"
)

const (
NROSchedulerPollingInterval = 10 * time.Second
)

type Deployer interface {
// Deploy deploys NUMAResourcesOperator object create other dependencies
// per the platform that implements it
Deploy(ctx context.Context) *nropv1.NUMAResourcesOperator
Deploy(ctx context.Context, timeout time.Duration) *nropv1.NUMAResourcesOperator
// Teardown Teardowns NUMAResourcesOperator object delete other dependencies
// per the platform that implements it
Teardown(ctx context.Context, timeout time.Duration)
}

type NroDeploymentWithSched struct {
Deployer
NroSchedObj *nropv1.NUMAResourcesScheduler
}

func NewForPlatform(plat platform.Platform) Deployer {
switch plat {
case platform.OpenShift:
Expand All @@ -70,19 +69,6 @@ func NewForPlatform(plat platform.Platform) Deployer {
}
}

func GetDeploymentWithSched(ctx context.Context) (NroDeploymentWithSched, error) {
sd := NroDeploymentWithSched{}
nroSchedKey := objects.NROSchedObjectKey()
nroSchedObj := nropv1.NUMAResourcesScheduler{}
err := e2eclient.Client.Get(ctx, nroSchedKey, &nroSchedObj)
if err != nil {
return sd, err
}
sd.NroSchedObj = &nroSchedObj

return sd, nil
}

func WaitForMCPUpdatedAfterNRODeleted(nroObj *nropv1.NUMAResourcesOperator) {
GinkgoHelper()

Expand Down Expand Up @@ -122,20 +108,16 @@ func isMachineConfigPoolsUpdatedAfterDeletion(nro *nropv1.NUMAResourcesOperator)
// or a timeout happens (5 min right now).
//
// see: `TestNROScheduler` to see the specific object characteristics.
func DeployNROScheduler() *nropv1.NUMAResourcesScheduler {
func DeployNROScheduler(ctx context.Context, timeout time.Duration) *nropv1.NUMAResourcesScheduler {
GinkgoHelper()

nroSchedObj := objects.TestNROScheduler()

err := e2eclient.Client.Create(context.TODO(), nroSchedObj)
Expect(err).NotTo(HaveOccurred())

err = e2eclient.Client.Get(context.TODO(), client.ObjectKeyFromObject(nroSchedObj), nroSchedObj)
Expect(err).NotTo(HaveOccurred())

Expect(e2eclient.Client.Create(ctx, nroSchedObj)).To(Succeed())
Expect(e2eclient.Client.Get(ctx, client.ObjectKeyFromObject(nroSchedObj), nroSchedObj)).To(Succeed())
Eventually(func() bool {
updatedNROObj := &nropv1.NUMAResourcesScheduler{}
err := e2eclient.Client.Get(context.TODO(), client.ObjectKeyFromObject(nroSchedObj), updatedNROObj)
err := e2eclient.Client.Get(ctx, client.ObjectKeyFromObject(nroSchedObj), updatedNROObj)
if err != nil {
klog.Warningf("failed to get the NRO Scheduler resource: %v", err)
return false
Expand All @@ -151,20 +133,15 @@ func DeployNROScheduler() *nropv1.NUMAResourcesScheduler {
klog.Infof("condition: %v", cond)

return cond.Status == metav1.ConditionTrue
}).WithTimeout(5*time.Minute).WithPolling(10*time.Second).Should(BeTrue(), "NRO Scheduler condition did not become available")
}).WithTimeout(timeout).WithPolling(NROSchedulerPollingInterval).Should(BeTrue(), "NRO Scheduler condition did not become available")
return nroSchedObj
}

func TeardownNROScheduler(nroSched *nropv1.NUMAResourcesScheduler, timeout time.Duration) {
func TeardownNROScheduler(ctx context.Context, nroSched *nropv1.NUMAResourcesScheduler, timeout time.Duration) {
GinkgoHelper()

if nroSched != nil {
err := e2eclient.Client.Delete(context.TODO(), nroSched)
Expect(err).ToNot(HaveOccurred())

err = wait.With(e2eclient.Client).Interval(10*time.Second).Timeout(timeout).ForNUMAResourcesSchedulerDeleted(context.TODO(), nroSched)
Expect(err).ToNot(HaveOccurred(), "NROScheduler %q failed to be deleted", nroSched.Name)
}
Expect(nroSched).ToNot(BeNil())
Expect(e2eclient.Client.Delete(ctx, nroSched)).To(Succeed())
Expect(wait.With(e2eclient.Client).Interval(NROSchedulerPollingInterval).Timeout(timeout).ForNUMAResourcesSchedulerDeleted(ctx, nroSched)).To(Succeed(), "NROScheduler %q failed to be deleted", nroSched.Name)
}

func WaitForMCPsCondition(cli client.Client, ctx context.Context, mcps []*machineconfigv1.MachineConfigPool, condition machineconfigv1.MachineConfigPoolConditionType) error {
Expand Down
6 changes: 3 additions & 3 deletions test/utils/deploy/hypershift.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const (
ConfigDataKey = "config"
)

func (h *HyperShiftNRO) Deploy(ctx context.Context) *nropv1.NUMAResourcesOperator {
func (h *HyperShiftNRO) Deploy(ctx context.Context, _ time.Duration) *nropv1.NUMAResourcesOperator {
GinkgoHelper()

hostedClusterName, err := hypershift.GetHostedClusterName()
Expand Down Expand Up @@ -105,7 +105,7 @@ func (h *HyperShiftNRO) Teardown(ctx context.Context, timeout time.Duration) {
Namespace: h.NroObj.Status.DaemonSets[0].Namespace,
}
Eventually(func() bool {
if err := e2eclient.Client.Get(context.TODO(), key, cm); !errors.IsNotFound(err) {
if err := e2eclient.Client.Get(ctx, key, cm); !errors.IsNotFound(err) {
if err == nil {
klog.Warningf("configmap %s still exists", key.String())
} else {
Expand All @@ -114,7 +114,7 @@ func (h *HyperShiftNRO) Teardown(ctx context.Context, timeout time.Duration) {
return false
}
return true
}).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).Should(BeTrue())
}).WithTimeout(timeout).WithPolling(10 * time.Second).Should(BeTrue())
}
Expect(e2eclient.Client.Delete(ctx, h.NroObj)).To(Succeed())
}
Expand Down
4 changes: 2 additions & 2 deletions test/utils/deploy/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type KubernetesNRO struct {

// Deploy returns a struct containing all the deployed objects,
// so it will be easier to introspect and delete them later.
func (k *KubernetesNRO) Deploy(ctx context.Context) *v1.NUMAResourcesOperator {
func (k *KubernetesNRO) Deploy(ctx context.Context, timeout time.Duration) *v1.NUMAResourcesOperator {
GinkgoHelper()

mcpObj := objects.TestMCP()
Expand All @@ -31,7 +31,7 @@ func (k *KubernetesNRO) Deploy(ctx context.Context) *v1.NUMAResourcesOperator {
k.McpObj = mcpObj
matchLabels := map[string]string{"test": "test"}

return k.OpenShiftNRO.deployWithLabels(ctx, matchLabels)
return k.OpenShiftNRO.deployWithLabels(ctx, timeout, matchLabels)
}

func (k *KubernetesNRO) Teardown(ctx context.Context, timeout time.Duration) {
Expand Down
8 changes: 4 additions & 4 deletions test/utils/deploy/openshift.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ type OpenShiftNRO struct {
// Deploy deploys NUMAResourcesOperator object and
// other dependencies, so the controller will be able to install TAS
// stack properly
func (o *OpenShiftNRO) Deploy(ctx context.Context) *nropv1.NUMAResourcesOperator {
func (o *OpenShiftNRO) Deploy(ctx context.Context, timeout time.Duration) *nropv1.NUMAResourcesOperator {
GinkgoHelper()

return o.deployWithLabels(ctx, objects.OpenshiftMatchLabels())
return o.deployWithLabels(ctx, timeout, objects.OpenshiftMatchLabels())
}

func (o *OpenShiftNRO) deployWithLabels(ctx context.Context, matchLabels map[string]string) *nropv1.NUMAResourcesOperator {
func (o *OpenShiftNRO) deployWithLabels(ctx context.Context, timeout time.Duration, matchLabels map[string]string) *nropv1.NUMAResourcesOperator {
GinkgoHelper()
nroObj := objects.TestNRO(objects.NROWithMCPSelector(matchLabels))
kcObj, err := objects.TestKC(matchLabels)
Expand Down Expand Up @@ -72,7 +72,7 @@ func (o *OpenShiftNRO) deployWithLabels(ctx context.Context, matchLabels map[str
o.NroObj = nroObj

By("unpausing the target MCPs")
Eventually(unpause).WithTimeout(configuration.MachineConfigPoolUpdateTimeout).WithPolling(configuration.MachineConfigPoolUpdateInterval).Should(Succeed())
Eventually(unpause).WithTimeout(timeout).WithPolling(configuration.MachineConfigPoolUpdateInterval).Should(Succeed())

By("updating the target NRO object")
Expect(e2eclient.Client.Get(ctx, client.ObjectKeyFromObject(nroObj), nroObj)).To(Succeed())
Expand Down
Loading