diff --git a/api/v1alpha1/zz_generated.conversion.go b/api/v1alpha1/zz_generated.conversion.go index ff04e847..a09a5171 100644 --- a/api/v1alpha1/zz_generated.conversion.go +++ b/api/v1alpha1/zz_generated.conversion.go @@ -1494,6 +1494,7 @@ func autoConvert_v1beta1_Spec_To_v1alpha1_Spec(in *v1beta1.Spec, out *Spec, s co out.ValidateHealths = *(*[]ValidateHealth)(unsafe.Pointer(&in.ValidateHealths)) // WARNING: in.Patches requires manual conversion: does not exist in peer-type // WARNING: in.DriftExclusions requires manual conversion: does not exist in peer-type + // WARNING: in.MaxConsecutiveFailures requires manual conversion: does not exist in peer-type out.ExtraLabels = *(*map[string]string)(unsafe.Pointer(&in.ExtraLabels)) out.ExtraAnnotations = *(*map[string]string)(unsafe.Pointer(&in.ExtraAnnotations)) return nil diff --git a/api/v1beta1/clustersummary_types.go b/api/v1beta1/clustersummary_types.go index 6e096111..f1bd66ae 100644 --- a/api/v1beta1/clustersummary_types.go +++ b/api/v1beta1/clustersummary_types.go @@ -91,6 +91,11 @@ type FeatureSummary struct { // +optional Hash []byte `json:"hash,omitempty"` + // The maximum number of consecutive deployment failures that Sveltos will permit. + // After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + // This field is optional. If not set, Sveltos default behavior is to keep retrying. + ConsecutiveFailures uint `json:"consecutiveFailures"` + // Status represents the state of the feature in the workload cluster // +optional Status FeatureStatus `json:"status,omitempty"` diff --git a/api/v1beta1/spec.go b/api/v1beta1/spec.go index 26faee3b..805fbafe 100644 --- a/api/v1beta1/spec.go +++ b/api/v1beta1/spec.go @@ -724,6 +724,13 @@ type Spec struct { // +optional DriftExclusions []DriftExclusion `json:"driftExclusions,omitempty"` + // The maximum number of consecutive deployment failures that Sveltos will permit. + // After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + // This setting applies only to feature deployments, not resource removal. + // This field is optional. If not set, Sveltos default behavior is to keep retrying. + // +optional + MaxConsecutiveFailures *uint `json:"maxConsecutiveFailures,omitempty"` + // ExtraLabels: These labels will be added by Sveltos to all Kubernetes resources deployed in // a managed cluster based on this ClusterProfile/Profile instance. // **Important:** If a resource deployed by Sveltos already has a label with a key present in diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 7611fe87..f415cff5 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -986,6 +986,11 @@ func (in *Spec) DeepCopyInto(out *Spec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.MaxConsecutiveFailures != nil { + in, out := &in.MaxConsecutiveFailures, &out.MaxConsecutiveFailures + *out = new(uint) + **out = **in + } if in.ExtraLabels != nil { in, out := &in.ExtraLabels, &out.ExtraLabels *out = make(map[string]string, len(*in)) diff --git a/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml b/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml index 4c14681b..4e855e92 100644 --- a/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml +++ b/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml @@ -1638,6 +1638,13 @@ spec: - namespace type: object type: array + maxConsecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This setting applies only to feature deployments, not resource removal. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer maxUpdate: anyOf: - type: integer diff --git a/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml b/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml index 37620308..162a868a 100644 --- a/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml +++ b/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml @@ -1666,6 +1666,13 @@ spec: - namespace type: object type: array + maxConsecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This setting applies only to feature deployments, not resource removal. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer maxUpdate: anyOf: - type: integer @@ -2069,6 +2076,12 @@ spec: FeatureSummary contains a summary of the state of a workload cluster feature. properties: + consecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer deployedGroupVersionKind: description: |- DeployedGroupVersionKind contains all GroupVersionKinds deployed in either @@ -2116,6 +2129,7 @@ spec: - Removed type: string required: + - consecutiveFailures - featureID type: object type: array diff --git a/config/crd/bases/config.projectsveltos.io_profiles.yaml b/config/crd/bases/config.projectsveltos.io_profiles.yaml index 5e14b981..8b349937 100644 --- a/config/crd/bases/config.projectsveltos.io_profiles.yaml +++ b/config/crd/bases/config.projectsveltos.io_profiles.yaml @@ -1638,6 +1638,13 @@ spec: - namespace type: object type: array + maxConsecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This setting applies only to feature deployments, not resource removal. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer maxUpdate: anyOf: - type: integer diff --git a/controllers/clustersummary_controller.go b/controllers/clustersummary_controller.go index 28b4ba47..e70715a0 100644 --- a/controllers/clustersummary_controller.go +++ b/controllers/clustersummary_controller.go @@ -1285,13 +1285,13 @@ func (r *ClusterSummaryReconciler) setFailureMessage(clusterSummaryScope *scope. func (r *ClusterSummaryReconciler) resetFeatureStatus(clusterSummaryScope *scope.ClusterSummaryScope, status configv1beta1.FeatureStatus) { if clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.HelmCharts != nil { - clusterSummaryScope.SetFeatureStatus(configv1beta1.FeatureHelm, status, nil) + clusterSummaryScope.SetFeatureStatus(configv1beta1.FeatureHelm, status, nil, nil) } if clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.PolicyRefs != nil { - clusterSummaryScope.SetFeatureStatus(configv1beta1.FeatureResources, status, nil) + clusterSummaryScope.SetFeatureStatus(configv1beta1.FeatureResources, status, nil, nil) } if clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.KustomizationRefs != nil { - clusterSummaryScope.SetFeatureStatus(configv1beta1.FeatureKustomize, status, nil) + clusterSummaryScope.SetFeatureStatus(configv1beta1.FeatureKustomize, status, nil, nil) } } diff --git a/controllers/clustersummary_deployer.go b/controllers/clustersummary_deployer.go index 276039a0..445a72ae 100644 --- a/controllers/clustersummary_deployer.go +++ b/controllers/clustersummary_deployer.go @@ -102,6 +102,7 @@ func (r *ClusterSummaryReconciler) deployFeature(ctx context.Context, clusterSum if !isConfigSame { logger.V(logs.LogDebug).Info(fmt.Sprintf("configuration has changed. Current hash %x. Previous hash %x", currentHash, hash)) + clusterSummaryScope.ResetConsecutiveFailures(f.id) } if !r.shouldRedeploy(clusterSummaryScope, f, isConfigSame, logger) { @@ -109,6 +110,14 @@ func (r *ClusterSummaryReconciler) deployFeature(ctx context.Context, clusterSum return nil } + return r.proceedDeployingFeature(ctx, clusterSummaryScope, f, isConfigSame, currentHash, logger) +} + +func (r *ClusterSummaryReconciler) proceedDeployingFeature(ctx context.Context, clusterSummaryScope *scope.ClusterSummaryScope, + f feature, isConfigSame bool, currentHash []byte, logger logr.Logger) error { + + clusterSummary := clusterSummaryScope.ClusterSummary + var status *configv1beta1.FeatureStatus var resultError error @@ -135,6 +144,12 @@ func (r *ClusterSummaryReconciler) deployFeature(ctx context.Context, clusterSum r.updateFeatureStatus(clusterSummaryScope, f.id, &nonRetriableStatus, currentHash, resultError, logger) return nil } + if r.maxNumberOfConsecutiveFailureReached(clusterSummaryScope, f, logger) { + nonRetriableStatus := configv1beta1.FeatureStatusFailedNonRetriable + resultError := errors.New("the maximum number of consecutive errors has been reached") + r.updateFeatureStatus(clusterSummaryScope, f.id, &nonRetriableStatus, currentHash, resultError, logger) + return nil + } } if *status == configv1beta1.FeatureStatusProvisioning { return fmt.Errorf("feature is still being provisioned") @@ -355,6 +370,20 @@ func (r *ClusterSummaryReconciler) getHash(clusterSummaryScope *scope.ClusterSum return nil } +// getConsecutiveFailures returns, if available, the number of consecutive failures corresponding to the +// featureID +func (r *ClusterSummaryReconciler) getConsecutiveFailures(clusterSummaryScope *scope.ClusterSummaryScope, + featureID configv1beta1.FeatureID) uint { + + clusterSummary := clusterSummaryScope.ClusterSummary + + if fs := getFeatureSummaryForFeatureID(clusterSummary, featureID); fs != nil { + return fs.ConsecutiveFailures + } + + return 0 +} + func (r *ClusterSummaryReconciler) updateFeatureStatus(clusterSummaryScope *scope.ClusterSummaryScope, featureID configv1beta1.FeatureID, status *configv1beta1.FeatureStatus, hash []byte, statusError error, logger logr.Logger) { @@ -368,17 +397,20 @@ func (r *ClusterSummaryReconciler) updateFeatureStatus(clusterSummaryScope *scop switch *status { case configv1beta1.FeatureStatusProvisioned: - clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusProvisioned, hash) + failed := false + clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusProvisioned, hash, &failed) clusterSummaryScope.SetFailureMessage(featureID, nil) case configv1beta1.FeatureStatusRemoved: - clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusRemoved, hash) + failed := false + clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusRemoved, hash, &failed) clusterSummaryScope.SetFailureMessage(featureID, nil) case configv1beta1.FeatureStatusProvisioning: - clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusProvisioning, hash) + clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusProvisioning, hash, nil) case configv1beta1.FeatureStatusRemoving: - clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusRemoving, hash) + clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusRemoving, hash, nil) case configv1beta1.FeatureStatusFailed, configv1beta1.FeatureStatusFailedNonRetriable: - clusterSummaryScope.SetFeatureStatus(featureID, *status, hash) + failed := true + clusterSummaryScope.SetFeatureStatus(featureID, *status, hash, &failed) err := statusError.Error() clusterSummaryScope.SetFailureMessage(featureID, &err) } @@ -436,3 +468,19 @@ func (r *ClusterSummaryReconciler) shouldRedeploy(clusterSummaryScope *scope.Clu return true } + +// maxNumberOfConsecutiveFailureReached returns true if max number of consecutive failures has been reached. +func (r *ClusterSummaryReconciler) maxNumberOfConsecutiveFailureReached(clusterSummaryScope *scope.ClusterSummaryScope, f feature, + logger logr.Logger) bool { + + if clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.MaxConsecutiveFailures != nil { + consecutiveFailures := r.getConsecutiveFailures(clusterSummaryScope, f.id) + if consecutiveFailures >= *clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.MaxConsecutiveFailures { + msg := fmt.Sprintf("max number of consecutive failures reached %d", consecutiveFailures) + logger.V(logs.LogDebug).Info(msg) + return true + } + } + + return false +} diff --git a/controllers/conflicts.go b/controllers/conflicts.go index 5f38d886..f277cc15 100644 --- a/controllers/conflicts.go +++ b/controllers/conflicts.go @@ -125,7 +125,7 @@ func requeueClusterSummary(ctx context.Context, featureID configv1beta1.FeatureI // Reset the hash a deployment happens again logger.V(logs.LogDebug).Info(fmt.Sprintf("reset status of ClusterSummary %s/%s", clusterSummary.Namespace, clusterSummary.Name)) - clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusProvisioning, nil) + clusterSummaryScope.SetFeatureStatus(featureID, configv1beta1.FeatureStatusProvisioning, nil, nil) return c.Status().Update(ctx, clusterSummaryScope.ClusterSummary) } diff --git a/controllers/handlers_helm.go b/controllers/handlers_helm.go index 71b60836..1d52d701 100644 --- a/controllers/handlers_helm.go +++ b/controllers/handlers_helm.go @@ -1343,8 +1343,8 @@ func newRegistryClientWithTLS(certFile, keyFile, caFile string, insecureSkipTLSv return registryClient, nil } -func actionConfigInit(namespace, kubeconfig string, registryOptions *registryClientOptions, enableClientCache bool, -) (*action.Configuration, error) { +func actionConfigInit(namespace, kubeconfig string, registryOptions *registryClientOptions, + enableClientCache bool) (*action.Configuration, error) { actionConfig := new(action.Configuration) diff --git a/manifest/manifest.yaml b/manifest/manifest.yaml index 8f883a6f..3cd69440 100644 --- a/manifest/manifest.yaml +++ b/manifest/manifest.yaml @@ -2419,6 +2419,13 @@ spec: - namespace type: object type: array + maxConsecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This setting applies only to feature deployments, not resource removal. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer maxUpdate: anyOf: - type: integer @@ -5269,6 +5276,13 @@ spec: - namespace type: object type: array + maxConsecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This setting applies only to feature deployments, not resource removal. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer maxUpdate: anyOf: - type: integer @@ -5672,6 +5686,12 @@ spec: FeatureSummary contains a summary of the state of a workload cluster feature. properties: + consecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer deployedGroupVersionKind: description: |- DeployedGroupVersionKind contains all GroupVersionKinds deployed in either @@ -5719,6 +5739,7 @@ spec: - Removed type: string required: + - consecutiveFailures - featureID type: object type: array @@ -7422,6 +7443,13 @@ spec: - namespace type: object type: array + maxConsecutiveFailures: + description: |- + The maximum number of consecutive deployment failures that Sveltos will permit. + After this many consecutive failures, the deployment will be considered failed, and Sveltos will stop retrying. + This setting applies only to feature deployments, not resource removal. + This field is optional. If not set, Sveltos default behavior is to keep retrying. + type: integer maxUpdate: anyOf: - type: integer diff --git a/pkg/scope/clustersummary.go b/pkg/scope/clustersummary.go index 6b1ad2fd..610fa4bf 100644 --- a/pkg/scope/clustersummary.go +++ b/pkg/scope/clustersummary.go @@ -102,28 +102,51 @@ func (s *ClusterSummaryScope) initializeFeatureStatusSummary() { // SetFeatureStatus sets the feature status. func (s *ClusterSummaryScope) SetFeatureStatus(featureID configv1beta1.FeatureID, - status configv1beta1.FeatureStatus, hash []byte) { + status configv1beta1.FeatureStatus, hash []byte, failed *bool) { for i := range s.ClusterSummary.Status.FeatureSummaries { if s.ClusterSummary.Status.FeatureSummaries[i].FeatureID == featureID { s.ClusterSummary.Status.FeatureSummaries[i].Status = status s.ClusterSummary.Status.FeatureSummaries[i].Hash = hash + if failed != nil { + if *failed { + s.ClusterSummary.Status.FeatureSummaries[i].ConsecutiveFailures++ + } else { + s.ClusterSummary.Status.FeatureSummaries[i].ConsecutiveFailures = 0 + } + } return } } s.initializeFeatureStatusSummary() + consecutiveFailures := uint(0) + if failed != nil && *failed { + consecutiveFailures = 1 + } + s.ClusterSummary.Status.FeatureSummaries = append( s.ClusterSummary.Status.FeatureSummaries, configv1beta1.FeatureSummary{ - FeatureID: featureID, - Status: status, - Hash: hash, + FeatureID: featureID, + Status: status, + Hash: hash, + ConsecutiveFailures: consecutiveFailures, }, ) } +// ResetConsecutiveFailures reset status consecutiveFailures +func (s *ClusterSummaryScope) ResetConsecutiveFailures(featureID configv1beta1.FeatureID) { + for i := range s.ClusterSummary.Status.FeatureSummaries { + if s.ClusterSummary.Status.FeatureSummaries[i].FeatureID == featureID { + s.ClusterSummary.Status.FeatureSummaries[i].ConsecutiveFailures = 0 + return + } + } +} + // SetDependenciesMessage sets the dependencies status. func (s *ClusterSummaryScope) SetDependenciesMessage(message *string) { s.ClusterSummary.Status.Dependencies = message diff --git a/pkg/scope/clustersummary_test.go b/pkg/scope/clustersummary_test.go index e63a5918..d9b3b7f0 100644 --- a/pkg/scope/clustersummary_test.go +++ b/pkg/scope/clustersummary_test.go @@ -114,11 +114,13 @@ var _ = Describe("ClusterSummaryScope", func() { Expect(scope).ToNot(BeNil()) hash := []byte(randomString()) - scope.SetFeatureStatus(configv1beta1.FeatureResources, configv1beta1.FeatureStatusProvisioned, hash) + failed := false + scope.SetFeatureStatus(configv1beta1.FeatureResources, configv1beta1.FeatureStatusProvisioned, hash, &failed) Expect(clusterSummary.Status.FeatureSummaries).ToNot(BeNil()) Expect(len(clusterSummary.Status.FeatureSummaries)).To(Equal(1)) Expect(clusterSummary.Status.FeatureSummaries[0].FeatureID).To(Equal(configv1beta1.FeatureResources)) Expect(clusterSummary.Status.FeatureSummaries[0].Hash).To(Equal(hash)) + Expect(clusterSummary.Status.FeatureSummaries[0].ConsecutiveFailures).To(BeZero()) Expect(clusterSummary.Status.FeatureSummaries[0].Status).To(Equal(configv1beta1.FeatureStatusProvisioned)) }) @@ -183,17 +185,35 @@ var _ = Describe("ClusterSummaryScope", func() { Logger: textlogger.NewLogger(textlogger.NewConfig()), } + consecutiveFailures := uint(3) clusterSummary.Status.FeatureSummaries = []configv1beta1.FeatureSummary{ - {FeatureID: configv1beta1.FeatureHelm, Status: configv1beta1.FeatureStatusProvisioned, Hash: []byte(randomString())}, + { + FeatureID: configv1beta1.FeatureHelm, Status: configv1beta1.FeatureStatusProvisioned, + Hash: []byte(randomString()), ConsecutiveFailures: consecutiveFailures}, } scope, err := scope.NewClusterSummaryScope(params) Expect(err).ToNot(HaveOccurred()) Expect(scope).ToNot(BeNil()) - found := false hash := []byte(randomString()) - scope.SetFeatureStatus(configv1beta1.FeatureResources, configv1beta1.FeatureStatusProvisioning, hash) + failed := true + found := false + scope.SetFeatureStatus(configv1beta1.FeatureHelm, configv1beta1.FeatureStatusProvisioning, hash, &failed) + Expect(clusterSummary.Status.FeatureSummaries).ToNot(BeNil()) + Expect(len(clusterSummary.Status.FeatureSummaries)).To(Equal(1)) + for i := range clusterSummary.Status.FeatureSummaries { + fs := clusterSummary.Status.FeatureSummaries[i] + if fs.FeatureID == configv1beta1.FeatureHelm { + found = true + Expect(fs.Status).To(Equal(configv1beta1.FeatureStatusProvisioning)) + Expect(fs.ConsecutiveFailures).To(Equal(consecutiveFailures + 1)) + } + } + Expect(found).To(Equal(true)) + + found = false + scope.SetFeatureStatus(configv1beta1.FeatureResources, configv1beta1.FeatureStatusProvisioning, hash, &failed) Expect(clusterSummary.Status.FeatureSummaries).ToNot(BeNil()) Expect(len(clusterSummary.Status.FeatureSummaries)).To(Equal(2)) for i := range clusterSummary.Status.FeatureSummaries { @@ -201,6 +221,7 @@ var _ = Describe("ClusterSummaryScope", func() { if fs.FeatureID == configv1beta1.FeatureResources { found = true Expect(fs.Status).To(Equal(configv1beta1.FeatureStatusProvisioning)) + Expect(fs.ConsecutiveFailures).To(Equal(uint(1))) } } Expect(found).To(Equal(true)) @@ -214,8 +235,11 @@ var _ = Describe("ClusterSummaryScope", func() { Logger: textlogger.NewLogger(textlogger.NewConfig()), } + consecutiveFailures := uint(2) clusterSummary.Status.FeatureSummaries = []configv1beta1.FeatureSummary{ - {FeatureID: configv1beta1.FeatureResources, Status: configv1beta1.FeatureStatusProvisioned, Hash: []byte(randomString())}, + { + FeatureID: configv1beta1.FeatureResources, Status: configv1beta1.FeatureStatusProvisioned, + Hash: []byte(randomString()), ConsecutiveFailures: consecutiveFailures}, } scope, err := scope.NewClusterSummaryScope(params) @@ -223,11 +247,13 @@ var _ = Describe("ClusterSummaryScope", func() { Expect(scope).ToNot(BeNil()) hash := []byte(randomString()) - scope.SetFeatureStatus(configv1beta1.FeatureResources, configv1beta1.FeatureStatusProvisioning, hash) + failed := false + scope.SetFeatureStatus(configv1beta1.FeatureResources, configv1beta1.FeatureStatusProvisioning, hash, &failed) Expect(clusterSummary.Status.FeatureSummaries).ToNot(BeNil()) Expect(len(clusterSummary.Status.FeatureSummaries)).To(Equal(1)) Expect(clusterSummary.Status.FeatureSummaries[0].Status).To(Equal(configv1beta1.FeatureStatusProvisioning)) Expect(clusterSummary.Status.FeatureSummaries[0].Hash).To(Equal(hash)) + Expect(clusterSummary.Status.FeatureSummaries[0].ConsecutiveFailures).To(BeZero()) }) It("SetFeatureStatus updates ClusterSummary Status FeatureSummary when nil", func() { @@ -243,7 +269,7 @@ var _ = Describe("ClusterSummaryScope", func() { Expect(scope).ToNot(BeNil()) hash := []byte(randomString()) - scope.SetFeatureStatus(configv1beta1.FeatureHelm, configv1beta1.FeatureStatusProvisioning, hash) + scope.SetFeatureStatus(configv1beta1.FeatureHelm, configv1beta1.FeatureStatusProvisioning, hash, nil) Expect(clusterSummary.Status.FeatureSummaries).ToNot(BeNil()) Expect(len(clusterSummary.Status.FeatureSummaries)).To(Equal(1)) Expect(clusterSummary.Status.FeatureSummaries[0].FeatureID).To(Equal(configv1beta1.FeatureHelm)) diff --git a/test/fv/continue_on_error_test.go b/test/fv/continue_on_error_test.go index 283fcb09..6afd29fb 100644 --- a/test/fv/continue_on_error_test.go +++ b/test/fv/continue_on_error_test.go @@ -94,6 +94,8 @@ var _ = Describe("Feature", Serial, func() { Byf("Create a ClusterProfile matching Cluster %s/%s", kindWorkloadCluster.Namespace, kindWorkloadCluster.Name) clusterProfile := getClusterProfile(namePrefix, map[string]string{key: value}) clusterProfile.Spec.SyncMode = configv1beta1.SyncModeContinuous + maxConsecutiveFailures := uint(3) + clusterProfile.Spec.MaxConsecutiveFailures = &maxConsecutiveFailures Expect(k8sClient.Create(context.TODO(), clusterProfile)).To(Succeed()) verifyClusterProfileMatches(clusterProfile) @@ -110,7 +112,7 @@ var _ = Describe("Feature", Serial, func() { // Cert-manager installation will fails as CRDs are not present and we are not deploying those // ALso sets timeout otherwise helm takes too long before giving up on cert-manager failures (due to CRDs not being installed) const two = 2 - timeout := metav1.Duration{Duration: two * time.Minute} + helmTimeout := metav1.Duration{Duration: two * time.Minute} currentClusterProfile.Spec.HelmCharts = []configv1beta1.HelmChart{ { RepositoryURL: "https://charts.konghq.com", @@ -130,7 +132,7 @@ var _ = Describe("Feature", Serial, func() { ReleaseNamespace: "cert-manager", HelmChartAction: configv1beta1.HelmChartActionInstall, Options: &configv1beta1.HelmOptions{ - Timeout: &timeout, + Timeout: &helmTimeout, }, }, { @@ -187,6 +189,25 @@ var _ = Describe("Feature", Serial, func() { clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, configv1beta1.FeatureResources, policies, nil) + const six = 6 + Eventually(func() bool { + currentClusterSummary := &configv1beta1.ClusterSummary{} + err := k8sClient.Get(context.TODO(), + types.NamespacedName{Namespace: clusterSummary.Namespace, Name: clusterSummary.Name}, + currentClusterSummary) + if err != nil { + return false + } + for i := range currentClusterSummary.Status.FeatureSummaries { + fs := ¤tClusterSummary.Status.FeatureSummaries[i] + if fs.FeatureID == configv1beta1.FeatureHelm { + return fs.FailureMessage != nil && + *fs.FailureMessage == "the maximum number of consecutive errors has been reached" + } + } + return false + }, six*time.Minute, pollingInterval).Should(BeTrue()) + deleteClusterProfile(clusterProfile) }) })