diff --git a/api/v1alpha2/terraform_types.go b/api/v1alpha2/terraform_types.go index d838bb47..1e2e8cee 100644 --- a/api/v1alpha2/terraform_types.go +++ b/api/v1alpha2/terraform_types.go @@ -933,7 +933,7 @@ func (in *Terraform) ShouldRetry() bool { return true } - return in.GetReconciliationFailures() <= in.Spec.Remediation.Retries + return in.GetReconciliationFailures() < in.Spec.Remediation.Retries } func (in *TerraformSpec) GetAlwaysCleanupRunnerPod() bool { diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go new file mode 100644 index 00000000..19183e97 --- /dev/null +++ b/controllers/tc000243_remediation_retry_test.go @@ -0,0 +1,210 @@ +package controllers + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/gomega" + + sourcev1 "github.com/fluxcd/source-controller/api/v1" + infrav1 "github.com/weaveworks/tf-controller/api/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +// +kubebuilder:docs-gen:collapse=Imports + +func Test_000243_remediation_retry_test(t *testing.T) { + Spec("This spec describes the behaviour of a Terraform resource that obtains a bad tar.gz file blob from its Source reference.") + It("should report the error and stop reconcile.") + + const ( + sourceName = "test-tf-controller-retry" + terraformName = "retry" + ) + g := NewWithT(t) + ctx := context.Background() + + Given("a GitRepository") + By("defining a new GitRepository resource") + testRepo := sourcev1.GitRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: sourceName, + Namespace: "flux-system", + }, + Spec: sourcev1.GitRepositorySpec{ + URL: "https://github.com/openshift-fluxv2-poc/podinfo", + Reference: &sourcev1.GitRepositoryRef{ + Branch: "master", + }, + Interval: metav1.Duration{Duration: time.Second * 30}, + }, + } + + By("creating the GitRepository resource in the cluster.") + It("should be created successfully.") + g.Expect(k8sClient.Create(ctx, &testRepo)).Should(Succeed()) + + Given("the GitRepository's reconciled status.") + By("setting the GitRepository's status, with the downloadable *bad* BLOB's URL, with the correct checksum.") + updatedTime := time.Now() + testRepo.Status = sourcev1.GitRepositoryStatus{ + ObservedGeneration: int64(1), + Conditions: []metav1.Condition{ + { + Type: "Ready", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: updatedTime}, + Reason: "GitOperationSucceed", + Message: "Fetched revision: master/b8e362c206e3d0cbb7ed22ced771a0056455a2fb", + }, + }, + Artifact: &sourcev1.Artifact{ + Path: "gitrepository/flux-system/test-tf-controller/b8e362c206e3d0cbb7ed22ced771a0056455a2fb.tar.gz", + URL: server.URL() + "/bad.tar.gz", + Revision: "master/b8e362c206e3d0cbb7ed22ced771a0056455a2fb", + Digest: "sha256:196d115c43583ccd10107d631d8a594be542a75911f9832a5ec2c1e22b65387b", + LastUpdateTime: metav1.Time{Time: updatedTime}, + }, + } + It("should be updated successfully.") + g.Expect(k8sClient.Status().Update(ctx, &testRepo)).Should(Succeed()) + defer waitResourceToBeDelete(g, &testRepo) + + Given("a Terraform object with auto approve, and attaching it to the bad GitRepository resource.") + By("creating a new TF resource and attaching to the bad repo via `sourceRef`.") + var helloWorldTF infrav1.Terraform + err := helloWorldTF.FromBytes([]byte(fmt.Sprintf(` +apiVersion: infra.contrib.fluxcd.io/v1alpha2 +kind: Terraform +metadata: + name: %s + namespace: flux-system +spec: + approvePlan: auto + path: ./terraform-hello-world-example + remediation: + retries: 3 + retryInterval: 5s + sourceRef: + kind: GitRepository + name: %s + namespace: flux-system + interval: 10s +`, terraformName, sourceName)), runnerServer.Scheme) + g.Expect(err).ToNot(HaveOccurred()) + + It("should be created and attached successfully.") + g.Expect(k8sClient.Create(ctx, &helloWorldTF)).Should(Succeed()) + defer waitResourceToBeDelete(g, &helloWorldTF) + + By("checking that the TF resource existed inside the cluster.") + helloWorldTFKey := types.NamespacedName{Namespace: "flux-system", Name: terraformName} + createdHelloWorldTF := infrav1.Terraform{} + g.Eventually(func() bool { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + It("should be reconciled and contain some status conditions.") + By("checking that the TF resource's status conditions has 1 element.") + g.Eventually(func() int { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return -1 + } + return len(createdHelloWorldTF.Status.Conditions) + }, timeout, interval).Should(Equal(1)) + + It("should be an error") + By("checking that the Ready's reason of the TF resource become `ArtifactFailed`.") + type failedStatusCheckResult struct { + Type string + Reason string + Message string + Status metav1.ConditionStatus + Retries int64 + } + expected := failedStatusCheckResult{ + Type: "Ready", + Reason: "ArtifactFailed", + Message: "rpc error: code = Unknown desc = failed to untar artifact, error: requires gzip-compressed body: gzip: invalid header", + Status: metav1.ConditionFalse, + Retries: 3, + } + g.Eventually(func() interface{} { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return nil + } + for _, c := range createdHelloWorldTF.Status.Conditions { + if c.Type == "Ready" { + return failedStatusCheckResult{ + Type: c.Type, + Reason: c.Reason, + Message: c.Message, + Status: c.Status, + Retries: createdHelloWorldTF.Status.ReconciliationFailures, + } + } + } + return createdHelloWorldTF.Status + }, timeout, interval).Should(Equal(expected)) + + // It should never reach 4 when Retry is set to 3. + expected = failedStatusCheckResult{ + Retries: 4, + } + time.Sleep(15 * time.Second) + g.Eventually(func() interface{} { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return nil + } + for _, c := range createdHelloWorldTF.Status.Conditions { + if c.Type == "Ready" { + return failedStatusCheckResult{ + Retries: createdHelloWorldTF.Status.ReconciliationFailures, + } + } + } + return createdHelloWorldTF.Status + }, timeout, interval).Should(Not(Equal(expected))) + + // After changing the resource, retry count should be set back to 0. + // With setting Retries lower than the previous one, we can check if it was + // reset to 0 as it would never reach 2 from 3. + createdHelloWorldTF.Spec.Remediation.Retries = 2 + g.Expect(k8sClient.Update(ctx, &createdHelloWorldTF)).Should(Succeed()) + + expected = failedStatusCheckResult{ + Type: "Ready", + Reason: "ArtifactFailed", + Message: "rpc error: code = Unknown desc = failed to untar artifact, error: requires gzip-compressed body: gzip: invalid header", + Status: metav1.ConditionFalse, + Retries: 2, + } + g.Eventually(func() interface{} { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return nil + } + for _, c := range createdHelloWorldTF.Status.Conditions { + if c.Type == "Ready" { + return failedStatusCheckResult{ + Type: c.Type, + Reason: c.Reason, + Message: c.Message, + Status: c.Status, + Retries: createdHelloWorldTF.Status.ReconciliationFailures, + } + } + } + return createdHelloWorldTF.Status + }, timeout, interval).Should(Equal(expected)) +} diff --git a/controllers/tf_controller.go b/controllers/tf_controller.go index 62de3555..62f56146 100644 --- a/controllers/tf_controller.go +++ b/controllers/tf_controller.go @@ -295,6 +295,28 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( r.RecordReadiness(ctx, &terraform) } + // Reset retry count if necessary. + revisionChanged := sourceObj.GetArtifact().Revision != terraform.Status.LastAttemptedRevision + generationChanges := terraform.Generation != terraform.Status.ObservedGeneration + if revisionChanged || generationChanges { + log.Info("Reset reconciliation failures count. Reason: resource changed") + terraform.ResetReconciliationFailures() + if err := r.patchStatus(ctx, req.NamespacedName, terraform.Status); err != nil { + log.Error(err, "unable to update status after planing") + return ctrl.Result{Requeue: true}, err + } + } + + if !terraform.ShouldRetry() { + log.Info(fmt.Sprintf( + "Resource reached maximum number of retries (%d/%d). Generation: %d", + terraform.GetReconciliationFailures(), + terraform.Spec.Remediation.Retries, + terraform.GetGeneration(), + )) + return ctrl.Result{Requeue: false}, nil + } + if !isBeingDeleted(terraform) { // case 1: // If revision is changed, and there's no intend to apply, @@ -432,6 +454,7 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Check remediation. if reconcileErr == nil { + log.Info("Reset reconciliation failures count. Reason: successful reconciliation") reconciledTerraform.ResetReconciliationFailures() } else { reconciledTerraform.IncrementReconciliationFailures() @@ -469,11 +492,6 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( traceLog.Info("Record an event for the failure") r.event(ctx, *reconciledTerraform, sourceObj.GetArtifact().Revision, eventv1.EventSeverityError, reconcileErr.Error(), nil) - if !reconciledTerraform.ShouldRetry() { - log.Info(fmt.Sprintf("Resource reached maximum number of retries. Generation: %d", reconciledTerraform.GetGeneration())) - return ctrl.Result{Requeue: false}, nil - } - if reconciledTerraform.Spec.Remediation != nil { log.Info(fmt.Sprintf( "Reconciliation failed, retry (%d/%d) after %s. Generation: %d",