Skip to content

Commit

Permalink
fix(health): sync cooldown before health check (#2205)
Browse files Browse the repository at this point in the history
Signed-off-by: Hidde Beydals <[email protected]>
  • Loading branch information
hiddeco authored Jun 28, 2024
1 parent c6ddffa commit 9ae2aba
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 1 deletion.
27 changes: 26 additions & 1 deletion internal/argocd/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"slices"
"time"

"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -183,6 +184,29 @@ func (h *applicationHealth) GetApplicationHealth(
if healthState, err := stageHealthForAppSync(app, desiredRevision); err != nil {
return healthState, healthStatus, syncStatus, err
}

// If we care about revisions, and recently finished an operation, we
// should wait for a cooldown period before assessing the health of the
// application. This is to ensure the health check has a chance to run
// after the sync operation has finished.
//
// xref: https://github.com/akuity/kargo/issues/2196
//
// TODO: revisit this when https://github.com/argoproj/argo-cd/pull/18660
// is merged and released.
cooldown := app.Status.OperationState.FinishedAt.Time.Add(10 * time.Second)
if duration := time.Until(cooldown); duration > 0 {
time.Sleep(duration)

// Re-fetch the application to get the latest state.
if err := h.Client.Get(ctx, key, app); err != nil {
err = fmt.Errorf("error finding Argo CD Application %q in namespace %q: %w", key.Name, key.Namespace, err)
if client.IgnoreNotFound(err) == nil {
err = fmt.Errorf("unable to find Argo CD Application %q in namespace %q", key.Name, key.Namespace)
}
return kargoapi.HealthStateUnknown, healthStatus, syncStatus, err
}
}
}

// With all the above checks passed, we can now assume the Argo CD
Expand All @@ -197,7 +221,8 @@ func stageHealthForAppSync(app *argocd.Application, revision string) (kargoapi.H
switch {
case revision == "":
return kargoapi.HealthStateHealthy, nil
case app.Operation != nil && app.Operation.Sync != nil:
case app.Operation != nil && app.Operation.Sync != nil,
app.Status.OperationState == nil || app.Status.OperationState.FinishedAt.IsZero():
err := fmt.Errorf(
"Argo CD Application %q in namespace %q is being synced",
app.GetName(),
Expand Down
122 changes: 122 additions & 0 deletions internal/argocd/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"context"
"errors"
"testing"
"time"

"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
Expand Down Expand Up @@ -56,6 +58,9 @@ func TestApplicationHealth_EvaluateHealth(t *testing.T) {
Status: argocd.SyncStatusCodeSynced,
Revision: "v1.2.3",
},
OperationState: &argocd.OperationState{
FinishedAt: &metav1.Time{Time: metav1.Now().Add(-10 * time.Second)},
},
},
},
},
Expand Down Expand Up @@ -443,6 +448,9 @@ func TestApplicationHealth_GetApplicationHealth(t *testing.T) {
Status: argocd.SyncStatusCodeSynced,
Revision: "fake-revision",
},
OperationState: &argocd.OperationState{
FinishedAt: ptr.To(metav1.Now()),
},
},
},
freight: kargoapi.FreightReference{
Expand Down Expand Up @@ -535,6 +543,9 @@ func TestApplicationHealth_GetApplicationHealth(t *testing.T) {
Status: argocd.SyncStatusCodeSynced,
Revision: "fake-revision",
},
OperationState: &argocd.OperationState{
FinishedAt: &metav1.Time{Time: metav1.Now().Add(-10 * time.Second)},
},
},
},
freight: kargoapi.FreightReference{
Expand Down Expand Up @@ -583,6 +594,80 @@ func TestApplicationHealth_GetApplicationHealth(t *testing.T) {
testCase.assertions(t, state, healthStatus, syncStatus, err)
})
}

t.Run("waits for operation cooldown", func(t *testing.T) {
app := &argocd.Application{
Spec: argocd.ApplicationSpec{
Source: &argocd.ApplicationSource{
RepoURL: "https://example.com/universe/42",
},
},
Status: argocd.ApplicationStatus{
Health: argocd.HealthStatus{
Status: argocd.HealthStatusProgressing,
},
Sync: argocd.SyncStatus{
Status: argocd.SyncStatusCodeSynced,
Revision: "fake-revision",
},
OperationState: &argocd.OperationState{
FinishedAt: ptr.To(metav1.Now()),
},
},
}
freight := kargoapi.FreightReference{
Commits: []kargoapi.GitCommit{
{
RepoURL: "https://example.com/universe/42",
ID: "fake-revision",
},
},
}

var count int
c := fake.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{
Get: func(
_ context.Context,
_ client.WithWatch,
_ client.ObjectKey,
obj client.Object,
_ ...client.GetOption,
) error {
count++

appCopy := app.DeepCopy()
if count > 1 {
appCopy.Status.Health.Status = argocd.HealthStatusHealthy
}

*obj.(*argocd.Application) = *appCopy // nolint: forcetypeassert
return nil
},
})
h := &applicationHealth{
Client: c.Build(),
}

_, _, _, err := h.GetApplicationHealth(
context.TODO(),
types.NamespacedName{
Namespace: "fake-namespace",
Name: "fake-name",
},
freight,
)
elapsed := time.Since(app.Status.OperationState.FinishedAt.Time)

require.NoError(t, err)

// We wait for 10 seconds after the sync operation has finished.
// As such, the elapsed time should be greater than 8 seconds,
// but less than 12 seconds. To ensure we do not introduce
// flakes in the tests.
require.Greater(t, elapsed, 8*time.Second)
require.Less(t, elapsed, 12*time.Second)
require.Equal(t, 2, count)
})
}

func Test_stageHealthForAppSync(t *testing.T) {
Expand Down Expand Up @@ -612,6 +697,37 @@ func Test_stageHealthForAppSync(t *testing.T) {
require.Equal(t, kargoapi.HealthStateUnknown, state)
},
},
{
name: "no operation state",
revision: "fake-revision",
app: &argocd.Application{
Status: argocd.ApplicationStatus{
Sync: argocd.SyncStatus{
Revision: "fake-revision",
},
},
},
assertions: func(t *testing.T, state kargoapi.HealthState, err error) {
require.ErrorContains(t, err, "is being synced")
require.Equal(t, kargoapi.HealthStateUnknown, state)
},
},
{
name: "operation state without finished time",
revision: "fake-revision",
app: &argocd.Application{
Status: argocd.ApplicationStatus{
Sync: argocd.SyncStatus{
Revision: "fake-revision",
},
OperationState: &argocd.OperationState{},
},
},
assertions: func(t *testing.T, state kargoapi.HealthState, err error) {
require.ErrorContains(t, err, "is being synced")
require.Equal(t, kargoapi.HealthStateUnknown, state)
},
},
{
name: "sync revision mismatch",
revision: "fake-revision",
Expand All @@ -620,6 +736,9 @@ func Test_stageHealthForAppSync(t *testing.T) {
Sync: argocd.SyncStatus{
Revision: "other-fake-revision",
},
OperationState: &argocd.OperationState{
FinishedAt: ptr.To(metav1.Now()),
},
},
},
assertions: func(t *testing.T, state kargoapi.HealthState, err error) {
Expand All @@ -635,6 +754,9 @@ func Test_stageHealthForAppSync(t *testing.T) {
Sync: argocd.SyncStatus{
Revision: "fake-revision",
},
OperationState: &argocd.OperationState{
FinishedAt: &metav1.Time{Time: metav1.Now().Add(-10 * time.Second)},
},
},
},
assertions: func(t *testing.T, state kargoapi.HealthState, err error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ type OperationState struct {
Phase OperationPhase `json:"phase,omitempty"`
Message string `json:"message,omitempty"`
SyncResult *SyncOperationResult `json:"syncResult,omitempty"`
FinishedAt *metav1.Time `json:"finishedAt,omitempty"`
}

type SyncOperationResult struct {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9ae2aba

Please sign in to comment.