Skip to content

Commit

Permalink
rte: use per-nodegroup annotations
Browse files Browse the repository at this point in the history
Now that we have per-nodegroup annotation, we can
enable back the custom selinux policy per-nodegroup
(vs per-cluster), allowing granular upgrades.

Signed-off-by: Francesco Romani <[email protected]>
  • Loading branch information
ffromani committed Jan 7, 2025
1 parent 8274e2f commit 64857ba
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 81 deletions.
4 changes: 3 additions & 1 deletion controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,9 @@ func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx
}
rteupdate.DaemonSetHashAnnotation(r.RTEManifests.Core.DaemonSet, cmHash)
}
rteupdate.SecurityContextConstraint(r.RTEManifests.Core.SecurityContextConstraint, annotations.IsCustomPolicyEnabled(instance.Annotations))

rteupdate.SecurityContextConstraint(r.RTEManifests.Core.SecurityContextConstraint, true) // force to legacy context
// SCC v2 needs no updates

existing = existing.WithManifestsUpdater(func(poolName string, gdm *rtestate.GeneratedDesiredManifest) error {
err := daemonsetUpdater(poolName, gdm)
Expand Down
164 changes: 97 additions & 67 deletions controllers/numaresourcesoperator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,11 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
By("Update NRO to have just one NodeGroup")
Expect(reconciler.Client.Get(context.TODO(), nroKey, nro)).NotTo(HaveOccurred())

nro.Spec.NodeGroups = []nropv1.NodeGroup{{
PoolName: &pn1,
}}
nro.Spec.NodeGroups = []nropv1.NodeGroup{
{
PoolName: &pn1,
},
}
Expect(reconciler.Client.Update(context.TODO(), nro)).NotTo(HaveOccurred())

// immediate update reflection with no reboot needed -> no need to reconcileafter this
Expand Down Expand Up @@ -1891,8 +1893,10 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {

})

Context("[openshift] emulating upgrade from 4.1X to 4.18 which has a built-in selinux policy for RTE pods", Label("platform:openshift"), func() {
When("[openshift] we have support for the RHCOS 4.18+ built-in selinux policy for RTE pods", Label("platform:openshift"), func() {
var nro *nropv1.NUMAResourcesOperator
var ng1 nropv1.NodeGroup
var ng2 nropv1.NodeGroup
var mcp1 *machineconfigv1.MachineConfigPool
var mcp2 *machineconfigv1.MachineConfigPool

Expand All @@ -1906,98 +1910,59 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
"test2": "test2",
}

ng1 := nropv1.NodeGroup{
ng1 = nropv1.NodeGroup{
MachineConfigPoolSelector: &metav1.LabelSelector{
MatchLabels: label1,
},
Annotations: map[string]string{},
}
ng2 := nropv1.NodeGroup{
ng2 = nropv1.NodeGroup{
MachineConfigPoolSelector: &metav1.LabelSelector{
MatchLabels: label2,
},
Annotations: map[string]string{},
}
nro = testobjs.NewNUMAResourcesOperator(objectnames.DefaultNUMAResourcesOperatorCrName, ng1, ng2)
// reconciling NRO object with custom policy, emulates the old behavior version
nro.Annotations = map[string]string{annotations.SELinuxPolicyConfigAnnotation: annotations.SELinuxPolicyCustom}

mcp1 = testobjs.NewMachineConfigPool("test1", label1, &metav1.LabelSelector{MatchLabels: label1}, &metav1.LabelSelector{MatchLabels: label1})
mcp2 = testobjs.NewMachineConfigPool("test2", label2, &metav1.LabelSelector{MatchLabels: label2}, &metav1.LabelSelector{MatchLabels: label2})

var err error
reconciler, err = NewFakeNUMAResourcesOperatorReconciler(platform.OpenShift, defaultOCPVersion, nro, mcp1, mcp2)
Expect(err).ToNot(HaveOccurred())

key := client.ObjectKeyFromObject(nro)
// on the first iteration we expect the CRDs and MCPs to be created, yet, it will wait one minute to update MC, thus RTE daemonsets and complete status update is not going to be achieved at this point
firstLoopResult, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expect(firstLoopResult).To(Equal(reconcile.Result{RequeueAfter: time.Minute}))

// Ensure mcp1 is ready
Expect(reconciler.Client.Get(context.TODO(), client.ObjectKeyFromObject(mcp1), mcp1)).To(Succeed())
mcp1.Status.Configuration.Source = []corev1.ObjectReference{
{
Name: objectnames.GetMachineConfigName(nro.Name, mcp1.Name),
},
}
mcp1.Status.Conditions = []machineconfigv1.MachineConfigPoolCondition{
{
Type: machineconfigv1.MachineConfigPoolUpdated,
Status: corev1.ConditionTrue,
},
}
Expect(reconciler.Client.Update(context.TODO(), mcp1)).To(Succeed())
nro = testobjs.NewNUMAResourcesOperator(objectnames.DefaultNUMAResourcesOperatorCrName, ng1, ng2)
})

// ensure mcp2 is ready
Expect(reconciler.Client.Get(context.TODO(), client.ObjectKeyFromObject(mcp2), mcp2)).To(Succeed())
mcp2.Status.Configuration.Source = []corev1.ObjectReference{
{
Name: objectnames.GetMachineConfigName(nro.Name, mcp2.Name),
},
}
mcp2.Status.Conditions = []machineconfigv1.MachineConfigPoolCondition{
{
Type: machineconfigv1.MachineConfigPoolUpdated,
Status: corev1.ConditionTrue,
},
}
Expect(reconciler.Client.Update(context.TODO(), mcp2)).To(Succeed())
It("should keep creating custom policy and use it with per-object annotation", func() {
nro.Annotations = map[string]string{annotations.SELinuxPolicyConfigAnnotation: annotations.SELinuxPolicyCustom}
reconciler = checkSELinuxPolicyProcessing(nro, mcp1, mcp2)
// TODO: check both DSes security context
})

// triggering a second reconcile will create the RTEs and fully update the statuses making the operator in Available condition -> no more reconciliation needed thus the result is clean
secondLoopResult, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expect(secondLoopResult).To(Equal(reconcile.Result{RequeueAfter: 0}))
It("should keep creating custom policy and use it with per-nodegroup annotation", func() {
nro.Spec.NodeGroups[0].Annotations[annotations.SELinuxPolicyConfigAnnotation] = annotations.SELinuxPolicyCustom
reconciler = checkSELinuxPolicyProcessing(nro, mcp1, mcp2)
// TODO: check ng1 DS security context
})

By("Check DaemonSets are created")
mcp1DSKey := client.ObjectKey{
Name: objectnames.GetComponentName(nro.Name, mcp1.Name),
Namespace: testNamespace,
}
ds := &appsv1.DaemonSet{}
Expect(reconciler.Client.Get(context.TODO(), mcp1DSKey, ds)).ToNot(HaveOccurred())
It("should delete existing mc on upgrade", func() {
// reconciling NRO object with custom policy, emulates the old behavior version
nro.Annotations = map[string]string{annotations.SELinuxPolicyConfigAnnotation: annotations.SELinuxPolicyCustom}

mcp2DSKey := client.ObjectKey{
Name: objectnames.GetComponentName(nro.Name, mcp2.Name),
Namespace: testNamespace,
}
Expect(reconciler.Client.Get(context.TODO(), mcp2DSKey, ds)).To(Succeed())
reconciler = checkSELinuxPolicyProcessing(nro, mcp1, mcp2)

By("upgrading from 4.1X to 4.18")
Expect(reconciler.Client.Get(context.TODO(), client.ObjectKeyFromObject(nro), nro)).To(Succeed())
nro.Annotations = map[string]string{}
Expect(reconciler.Client.Update(context.TODO(), nro)).To(Succeed())

key := client.ObjectKeyFromObject(nro)
// removing the annotation will trigger reboot which requires resync after 1 min
thirdLoopResult, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expect(thirdLoopResult).To(Equal(reconcile.Result{RequeueAfter: time.Minute}))
})
It("should delete existing mc", func() {

mc1Key := client.ObjectKey{
Name: objectnames.GetMachineConfigName(nro.Name, mcp1.Name),
}
mc := &machineconfigv1.MachineConfig{}
err := reconciler.Client.Get(context.TODO(), mc1Key, mc)
err = reconciler.Client.Get(context.TODO(), mc1Key, mc)
Expect(apierrors.IsNotFound(err)).To(BeTrue(), "MachineConfig %s expected to be deleted; err=%v", mc1Key.Name, err)

mc2Key := client.ObjectKey{
Expand All @@ -2007,10 +1972,75 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(apierrors.IsNotFound(err)).To(BeTrue(), "MachineConfig %s expected to be deleted; err=%v", mc2Key.Name, err)
})
})

})
})

func checkSELinuxPolicyProcessing(nro *nropv1.NUMAResourcesOperator, mcp1, mcp2 *machineconfigv1.MachineConfigPool) *NUMAResourcesOperatorReconciler {
GinkgoHelper()

var err error
var reconciler *NUMAResourcesOperatorReconciler
reconciler, err = NewFakeNUMAResourcesOperatorReconciler(platform.OpenShift, defaultOCPVersion, nro, mcp1, mcp2)
Expect(err).ToNot(HaveOccurred())

key := client.ObjectKeyFromObject(nro)
// on the first iteration we expect the CRDs and MCPs to be created, yet, it will wait one minute to update MC, thus RTE daemonsets and complete status update is not going to be achieved at this point
firstLoopResult, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expect(firstLoopResult).To(Equal(reconcile.Result{RequeueAfter: time.Minute}))

// Ensure mcp1 is ready
Expect(reconciler.Client.Get(context.TODO(), client.ObjectKeyFromObject(mcp1), mcp1)).To(Succeed())
mcp1.Status.Configuration.Source = []corev1.ObjectReference{
{
Name: objectnames.GetMachineConfigName(nro.Name, mcp1.Name),
},
}
mcp1.Status.Conditions = []machineconfigv1.MachineConfigPoolCondition{
{
Type: machineconfigv1.MachineConfigPoolUpdated,
Status: corev1.ConditionTrue,
},
}
Expect(reconciler.Client.Update(context.TODO(), mcp1)).To(Succeed())

// ensure mcp2 is ready
Expect(reconciler.Client.Get(context.TODO(), client.ObjectKeyFromObject(mcp2), mcp2)).To(Succeed())
mcp2.Status.Configuration.Source = []corev1.ObjectReference{
{
Name: objectnames.GetMachineConfigName(nro.Name, mcp2.Name),
},
}
mcp2.Status.Conditions = []machineconfigv1.MachineConfigPoolCondition{
{
Type: machineconfigv1.MachineConfigPoolUpdated,
Status: corev1.ConditionTrue,
},
}
Expect(reconciler.Client.Update(context.TODO(), mcp2)).To(Succeed())

// triggering a second reconcile will create the RTEs and fully update the statuses making the operator in Available condition -> no more reconciliation needed thus the result is clean
secondLoopResult, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expect(secondLoopResult).To(Equal(reconcile.Result{RequeueAfter: 0}))

By("Check DaemonSets are created")
mcp1DSKey := client.ObjectKey{
Name: objectnames.GetComponentName(nro.Name, mcp1.Name),
Namespace: testNamespace,
}
ds := &appsv1.DaemonSet{}
Expect(reconciler.Client.Get(context.TODO(), mcp1DSKey, ds)).ToNot(HaveOccurred())

mcp2DSKey := client.ObjectKey{
Name: objectnames.GetComponentName(nro.Name, mcp2.Name),
Namespace: testNamespace,
}
Expect(reconciler.Client.Get(context.TODO(), mcp2DSKey, ds)).To(Succeed())

return reconciler
}

func getConditionByType(conditions []metav1.Condition, conditionType string) *metav1.Condition {
for i := range conditions {
c := &conditions[i]
Expand Down
4 changes: 3 additions & 1 deletion pkg/objectstate/rte/machineconfigpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

nropv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1"
nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1/helper/nodegroup"
"github.com/openshift-kni/numaresources-operator/internal/api/annotations"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/compare"
Expand Down Expand Up @@ -95,12 +96,13 @@ func (obj machineConfigPoolFinder) FindState(mf Manifests, tree nodegroupv1.Tree
updateError = fmt.Errorf("the machine config pool %q does not have node selector", mcp.Name)
}

cpEnabled := obj.em.customPolicyEnabled || annotations.IsCustomPolicyEnabled(tree.NodeGroup.Annotations)
gdm := GeneratedDesiredManifest{
ClusterPlatform: obj.em.plat,
MachineConfigPool: mcp.DeepCopy(),
NodeGroup: tree.NodeGroup.DeepCopy(),
DaemonSet: desiredDaemonSet,
IsCustomPolicyEnabled: obj.em.customPolicyEnabled,
IsCustomPolicyEnabled: cpEnabled,
}

err := obj.em.updater(mcp.Name, &gdm)
Expand Down
4 changes: 3 additions & 1 deletion pkg/objectstate/rte/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

nropv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1"
nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1/helper/nodegroup"
"github.com/openshift-kni/numaresources-operator/internal/api/annotations"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/compare"
Expand Down Expand Up @@ -79,12 +80,13 @@ func (obj nodeGroupFinder) FindState(mf Manifests, tree nodegroupv1.Tree) []obje
HyperShiftNodePoolLabel: poolName,
}

cpEnabled := obj.em.customPolicyEnabled || annotations.IsCustomPolicyEnabled(tree.NodeGroup.Annotations)
gdm := GeneratedDesiredManifest{
ClusterPlatform: obj.em.plat,
MachineConfigPool: nil,
NodeGroup: tree.NodeGroup.DeepCopy(),
DaemonSet: desiredDaemonSet,
IsCustomPolicyEnabled: obj.em.customPolicyEnabled,
IsCustomPolicyEnabled: cpEnabled,
}

err := obj.em.updater(poolName, &gdm)
Expand Down
20 changes: 9 additions & 11 deletions pkg/objectstate/rte/rte.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ func (em *ExistingManifests) MachineConfigsState(mf Manifests) ([]objectstate.Ob
if mf.Core.MachineConfig == nil {
return ret, nullMachineConfigPoolUpdated
}
enabledMCCount := 0
for _, tree := range em.trees {
for _, mcp := range tree.MachineConfigPools {
mcName := objectnames.GetMachineConfigName(em.instance.Name, mcp.Name)
Expand All @@ -128,7 +129,7 @@ func (em *ExistingManifests) MachineConfigsState(mf Manifests) ([]objectstate.Ob
continue
}

if !em.customPolicyEnabled {
if !em.customPolicyEnabled && !annotations.IsCustomPolicyEnabled(tree.NodeGroup.Annotations) {
// caution here: we want a *nil interface value*, not an *interface which points to nil*.
// the latter would lead to apparently correct code leading to runtime panics. See:
// https://trstringer.com/go-nil-interface-and-interface-with-nil-concrete-value/
Expand Down Expand Up @@ -157,17 +158,14 @@ func (em *ExistingManifests) MachineConfigsState(mf Manifests) ([]objectstate.Ob
Merge: merge.ObjectForUpdate,
},
)
enabledMCCount++
}
}

return ret, em.getWaitMCPUpdatedFunc()
}

func (em *ExistingManifests) getWaitMCPUpdatedFunc() MCPWaitForUpdatedFunc {
if em.customPolicyEnabled {
return IsMachineConfigPoolUpdated
if enabledMCCount > 0 {
return ret, IsMachineConfigPoolUpdated
}
return IsMachineConfigPoolUpdatedAfterDeletion
return ret, IsMachineConfigPoolUpdatedAfterDeletion
}

func nullMachineConfigPoolUpdated(instanceName string, mcp *machineconfigv1.MachineConfigPool) bool {
Expand Down Expand Up @@ -286,11 +284,11 @@ func (em *ExistingManifests) State(mf Manifests) []objectstate.ObjectState {
Merge: merge.ObjectForUpdate,
})
}
if mf.SecurityContextConstraintV2 != nil {
if mf.Core.SecurityContextConstraintV2 != nil {
ret = append(ret, objectstate.ObjectState{
Existing: em.existing.SecurityContextConstraintV2,
Existing: em.existing.Core.SecurityContextConstraintV2,
Error: em.errs.Core.SCCv2,
Desired: mf.SecurityContextConstraintV2.DeepCopy(),
Desired: mf.Core.SecurityContextConstraintV2.DeepCopy(),
Compare: compare.Object,
Merge: merge.ObjectForUpdate,
})
Expand Down

0 comments on commit 64857ba

Please sign in to comment.