Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove VA deletion #158

Merged
merged 3 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 1 addition & 19 deletions controllers/selfnoderemediation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ func (r *SelfNodeRemediationReconciler) deleteResources(node *v1.Node) error {

namespaces := v1.NamespaceList{}
if err := r.Client.List(context.Background(), &namespaces); err != nil {
r.logger.Error(err, "failed to list namespaces", err)
r.logger.Error(err, "failed to list namespaces")
return err
}

Expand All @@ -355,24 +355,6 @@ func (r *SelfNodeRemediationReconciler) deleteResources(node *v1.Node) error {
}
}

volumeAttachments := &storagev1.VolumeAttachmentList{}
if err := r.Client.List(context.Background(), volumeAttachments); err != nil {
r.logger.Error(err, "failed to get volumeAttachments list")
return err
}
forceDeleteOption := &client.DeleteOptions{
GracePeriodSeconds: &zero,
}
for _, va := range volumeAttachments.Items {
if va.Spec.NodeName == node.Name {
err := r.Client.Delete(context.Background(), &va, forceDeleteOption)
if err != nil {
r.logger.Error(err, "failed to delete volumeAttachment", "name", va.Name)
return err
}
}
}

r.logger.Info("done deleting node resources", "node name", node.Name)

return nil
Expand Down
5 changes: 2 additions & 3 deletions controllers/tests/config/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,8 @@ var _ = BeforeSuite(func() {
Expect(err).ToNot(HaveOccurred())

k8sClient = &shared.K8sClientWrapper{
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
VaFailureMessage: "simulation of client error for VA",
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
}
Expect(k8sClient).ToNot(BeNil())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,11 @@ var _ = Describe("SNR Controller", func() {

AfterEach(func() {
k8sClient.ShouldSimulateFailure = false
k8sClient.ShouldSimulateVaFailure = false
k8sClient.ShouldSimulatePodDeleteFailure = false
isAdditionalSetupNeeded = false
deleteRemediations()
deleteSelfNodeRemediationPod()
deleteVolumeAttachment(vaName, false)
verifyVaDeleted(vaName)
//clear node's state, this is important to remove taints, label etc.
Expect(k8sClient.Update(context.Background(), getNode(shared.UnhealthyNodeName)))
Expect(k8sClient.Update(context.Background(), getNode(shared.PeerNodeName)))
Expand Down Expand Up @@ -184,8 +183,6 @@ var _ = Describe("SNR Controller", func() {

verifySelfNodeRemediationPodDoesntExist()

verifyVaDeleted(vaName)

verifyEvent("Normal", "DeleteResources", "Remediation process - finished deleting unhealthy node resources")

verifyFinalizerExists()
Expand Down Expand Up @@ -219,7 +216,7 @@ var _ = Describe("SNR Controller", func() {
It("The snr agent attempts to keep deleting node resources during temporary api-server failure", func() {
node := verifyNodeIsUnschedulable()

k8sClient.ShouldSimulateVaFailure = true
k8sClient.ShouldSimulatePodDeleteFailure = true

addUnschedulableTaint(node)

Expand All @@ -229,16 +226,16 @@ var _ = Describe("SNR Controller", func() {

verifyNoWatchdogFood()

verifySelfNodeRemediationPodDoesntExist()

verifyVaNotDeleted(vaName)

// The kube-api calls for VA fail intentionally. In this case, we expect the snr agent to try
// to delete node resources again. So LastError is set to the error every time Reconcile()
// is triggered. If it becomes another error, it means something unintended happens.
verifyLastErrorKeepsApiErrorForVa()
verifyLastErrorKeepsApiError()

k8sClient.ShouldSimulateVaFailure = false
k8sClient.ShouldSimulatePodDeleteFailure = false

verifySelfNodeRemediationPodDoesntExist()

deleteVolumeAttachment(vaName, true)

Expand Down Expand Up @@ -391,20 +388,6 @@ func deleteVolumeAttachment(vaName string, verifyExist bool) {
ExpectWithOffset(1, err).To(Succeed())
}

func verifyVaDeleted(vaName string) {
razo7 marked this conversation as resolved.
Show resolved Hide resolved
vaKey := client.ObjectKey{
Namespace: shared.Namespace,
Name: vaName,
}

EventuallyWithOffset(1, func() bool {
va := &storagev1.VolumeAttachment{}
err := k8sClient.Get(context.Background(), vaKey, va)
return apierrors.IsNotFound(err)

}, 5*time.Second, 100*time.Millisecond).Should(BeTrue())
}

func verifyVaNotDeleted(vaName string) {
vaKey := client.ObjectKey{
Namespace: shared.Namespace,
Expand All @@ -419,15 +402,15 @@ func verifyVaNotDeleted(vaName string) {
}, 5*time.Second, 250*time.Millisecond).Should(BeFalse())
}

func verifyLastErrorKeepsApiErrorForVa() {
func verifyLastErrorKeepsApiError() {
By("Verify that LastError in SNR status has been kept kube-api error for VA")
snr := &v1alpha1.SelfNodeRemediation{}
ConsistentlyWithOffset(1, func() bool {
snrNamespacedName := client.ObjectKey{Name: shared.UnhealthyNodeName, Namespace: snrNamespace}
if err := k8sClient.Client.Get(context.Background(), snrNamespacedName, snr); err != nil {
return false
}
return snr.Status.LastError == k8sClient.VaFailureMessage
return snr.Status.LastError == k8sClient.SimulatedFailureMessage
}, 5*time.Second, 250*time.Millisecond).Should(BeTrue())
}

Expand Down
6 changes: 3 additions & 3 deletions controllers/tests/controller/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ var _ = BeforeSuite(func() {
Expect(err).ToNot(HaveOccurred())

k8sClient = &shared.K8sClientWrapper{
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
VaFailureMessage: "simulation of client error for VA",
Client: k8sManager.GetClient(),
Reader: k8sManager.GetAPIReader(),
SimulatedFailureMessage: "simulation of client error for delete when listing namespace",
}
Expect(k8sClient).ToNot(BeNil())

Expand Down
16 changes: 8 additions & 8 deletions controllers/tests/shared/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"errors"
"time"

storagev1 "k8s.io/api/storage/v1"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -20,10 +20,10 @@ const (

type K8sClientWrapper struct {
client.Client
Reader client.Reader
ShouldSimulateFailure bool
ShouldSimulateVaFailure bool
VaFailureMessage string
Reader client.Reader
ShouldSimulateFailure bool
ShouldSimulatePodDeleteFailure bool
SimulatedFailureMessage string
}

type MockCalculator struct {
Expand All @@ -34,9 +34,9 @@ type MockCalculator struct {
func (kcw *K8sClientWrapper) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
if kcw.ShouldSimulateFailure {
return errors.New("simulation of client error")
} else if kcw.ShouldSimulateVaFailure {
if _, ok := list.(*storagev1.VolumeAttachmentList); ok {
return errors.New(kcw.VaFailureMessage)
} else if kcw.ShouldSimulatePodDeleteFailure {
if _, ok := list.(*corev1.NamespaceList); ok {
return errors.New(kcw.SimulatedFailureMessage)
}
}
return kcw.Client.List(ctx, list, opts...)
Expand Down
40 changes: 1 addition & 39 deletions e2e/self_node_remediation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
gomegatypes "github.com/onsi/gomega/types"

v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -108,17 +107,14 @@ var _ = Describe("Self Node Remediation E2E", func() {

Context("Resource Deletion Strategy", func() {
var oldPodCreationTime time.Time
var va *storagev1.VolumeAttachment

BeforeEach(func() {
remediationStrategy = v1alpha1.ResourceDeletionRemediationStrategy
oldPodCreationTime = findSnrPod(node).CreationTimestamp.Time
va = createVolumeAttachment(node)
})

It("should delete pods and volume attachments", func() {
checkPodRecreated(node, oldPodCreationTime)
checkVaDeleted(va)
//Simulate NHC trying to delete SNR
deleteAndWait(snr)
snr = nil
Expand Down Expand Up @@ -167,17 +163,15 @@ var _ = Describe("Self Node Remediation E2E", func() {
// b) unhealthy
// - kill connectivity on one node
// - create SNR
// - verify node does reboot and and is deleted / re-created
// - verify node does reboot and is deleted / re-created

var snr *v1alpha1.SelfNodeRemediation
var va *storagev1.VolumeAttachment
var oldPodCreationTime time.Time

BeforeEach(func() {
killApiConnection(node, apiIPs, false)
snr = createSNR(node, v1alpha1.ResourceDeletionRemediationStrategy)
oldPodCreationTime = findSnrPod(node).CreationTimestamp.Time
va = createVolumeAttachment(node)
})

AfterEach(func() {
Expand All @@ -192,7 +186,6 @@ var _ = Describe("Self Node Remediation E2E", func() {
// - because the 2nd check has a small timeout only
checkReboot(node, oldBootTime)
checkPodRecreated(node, oldPodCreationTime)
checkVaDeleted(va)
if _, isExist := os.LookupEnv(skipLogsEnvVarName); !isExist {
// we can't check logs of unhealthy node anymore, check peer logs
peer := &workers.Items[1]
Expand Down Expand Up @@ -333,17 +326,14 @@ var _ = Describe("Self Node Remediation E2E", func() {

Context("Resource Deletion Strategy", func() {
var oldPodCreationTime time.Time
var va *storagev1.VolumeAttachment

BeforeEach(func() {
remediationStrategy = v1alpha1.ResourceDeletionRemediationStrategy
oldPodCreationTime = findSnrPod(controlPlaneNode).CreationTimestamp.Time
va = createVolumeAttachment(controlPlaneNode)
})

It("should delete pods and volume attachments", func() {
checkPodRecreated(controlPlaneNode, oldPodCreationTime)
checkVaDeleted(va)
//Simulate NHC trying to delete SNR
deleteAndWait(snr)
snr = nil
Expand All @@ -358,34 +348,6 @@ var _ = Describe("Self Node Remediation E2E", func() {
})
})

func checkVaDeleted(va *storagev1.VolumeAttachment) {
EventuallyWithOffset(1, func() bool {
newVa := &storagev1.VolumeAttachment{}
err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(va), newVa)
return errors.IsNotFound(err)

}, 10*time.Second, 250*time.Millisecond).Should(BeTrue())
}

func createVolumeAttachment(node *v1.Node) *storagev1.VolumeAttachment {
foo := "foo"
va := &storagev1.VolumeAttachment{
ObjectMeta: metav1.ObjectMeta{
Name: "some-va",
Namespace: testNamespace,
},
Spec: storagev1.VolumeAttachmentSpec{
Attacher: foo,
Source: storagev1.VolumeAttachmentSource{},
NodeName: node.Name,
},
}

va.Spec.Source.PersistentVolumeName = &foo
ExpectWithOffset(1, k8sClient.Create(context.Background(), va)).To(Succeed())
return va
}

func checkPodRecreated(node *v1.Node, oldPodCreationTime time.Time) bool {
return EventuallyWithOffset(1, func() time.Time {
pod := findSnrPod(node)
Expand Down