-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add healthz endpoint and graceful shutdown (#70)
- Loading branch information
Showing
8 changed files
with
353 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package controller | ||
|
||
import ( | ||
"fmt" | ||
"net/http" | ||
"time" | ||
|
||
"castai-agent/internal/config" | ||
) | ||
|
||
func NewHealthzProvider(cfg config.Config) *HealthzProvider { | ||
return &HealthzProvider{ | ||
cfg: cfg, | ||
initHardTimeout: cfg.Controller.PrepTimeout + cfg.Controller.InitialSleepDuration + cfg.Controller.InitializationTimeoutExtension, | ||
} | ||
} | ||
|
||
type HealthzProvider struct { | ||
cfg config.Config | ||
initHardTimeout time.Duration | ||
|
||
initializeStartedAt *time.Time | ||
lastHealthyActionAt *time.Time | ||
} | ||
|
||
func (h *HealthzProvider) Check(_ *http.Request) error { | ||
if h.lastHealthyActionAt != nil { | ||
if time.Since(*h.lastHealthyActionAt) > h.cfg.Controller.HealthySnapshotIntervalLimit { | ||
return fmt.Errorf("time since initialization or last snapshot sent is over the considered healthy limit of %s", h.cfg.Controller.HealthySnapshotIntervalLimit) | ||
} | ||
return nil | ||
} | ||
|
||
if h.initializeStartedAt != nil { | ||
if time.Since(*h.initializeStartedAt) > h.initHardTimeout { | ||
return fmt.Errorf("controller initialization is taking longer than the hard timeout of %s", h.initHardTimeout) | ||
} | ||
return nil | ||
} | ||
|
||
return fmt.Errorf("healthz not initialized") | ||
} | ||
|
||
func (h *HealthzProvider) Initializing() { | ||
if h.initializeStartedAt == nil { | ||
h.initializeStartedAt = nowPtr() | ||
h.lastHealthyActionAt = nil | ||
} | ||
} | ||
|
||
func (h *HealthzProvider) Initialized() { | ||
h.healthyAction() | ||
} | ||
|
||
func (h *HealthzProvider) SnapshotSent() { | ||
h.healthyAction() | ||
} | ||
|
||
func (h *HealthzProvider) healthyAction() { | ||
h.initializeStartedAt = nil | ||
h.lastHealthyActionAt = nowPtr() | ||
} | ||
|
||
func nowPtr() *time.Time { | ||
now := time.Now() | ||
return &now | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package controller | ||
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"castai-agent/internal/config" | ||
) | ||
|
||
func TestNewHealthzProvider(t *testing.T) { | ||
t.Run("unhealthy statuses", func(t *testing.T) { | ||
cfg := config.Config{Controller: &config.Controller{ | ||
Interval: 15 * time.Second, | ||
PrepTimeout: time.Millisecond, | ||
InitialSleepDuration: time.Millisecond, | ||
InitializationTimeoutExtension: time.Millisecond, | ||
HealthySnapshotIntervalLimit: time.Millisecond, | ||
}} | ||
|
||
h := NewHealthzProvider(cfg) | ||
|
||
t.Run("should return not initialized error", func(t *testing.T) { | ||
require.Error(t, h.Check(nil)) | ||
}) | ||
|
||
t.Run("should return initialize timeout error", func(t *testing.T) { | ||
h.Initializing() | ||
|
||
time.Sleep(5 * time.Millisecond) | ||
|
||
require.Error(t, h.Check(nil)) | ||
}) | ||
|
||
t.Run("should return snapshot timeout error", func(t *testing.T) { | ||
h.healthyAction() | ||
|
||
time.Sleep(5 * time.Millisecond) | ||
|
||
require.Error(t, h.Check(nil)) | ||
}) | ||
}) | ||
|
||
t.Run("healthy statuses", func(t *testing.T) { | ||
cfg := config.Config{Controller: &config.Controller{ | ||
Interval: 15 * time.Second, | ||
PrepTimeout: 10 * time.Minute, | ||
InitialSleepDuration: 30 * time.Second, | ||
InitializationTimeoutExtension: 5 * time.Minute, | ||
HealthySnapshotIntervalLimit: 10 * time.Minute, | ||
}} | ||
|
||
h := NewHealthzProvider(cfg) | ||
|
||
t.Run("should return no error when still initializing", func(t *testing.T) { | ||
h.Initializing() | ||
|
||
require.NoError(t, h.Check(nil)) | ||
}) | ||
|
||
t.Run("should return no error when timeout after initialization has not yet passed", func(t *testing.T) { | ||
h.Initialized() | ||
|
||
require.NoError(t, h.Check(nil)) | ||
}) | ||
|
||
t.Run("should return no error when time since last snapshot has not been long", func(t *testing.T) { | ||
h.SnapshotSent() | ||
|
||
require.NoError(t, h.Check(nil)) | ||
}) | ||
}) | ||
} |
Oops, something went wrong.