diff --git a/pkg/apicheck/check.go b/pkg/apicheck/check.go index 532e4c11..0eb454ba 100644 --- a/pkg/apicheck/check.go +++ b/pkg/apicheck/check.go @@ -193,7 +193,7 @@ func (c *ApiConnectivityCheck) getWorkerPeersResponse() peers.Response { return peers.Response{IsHealthy: false, Reason: peers.UnHealthyBecauseNodeIsIsolated} } else { c.config.Log.Info("Ignoring no peers response error, time is below threshold for no peers response", "time without peers response (seconds)", now.Sub(c.timeOfLastPeerResponse).Seconds(), "threshold (seconds)", c.config.MaxTimeForNoPeersResponse.Seconds()) - return peers.Response{IsHealthy: true, Reason: peers.HealthyBecauseNoPeersResponseNotReachedMaxAttempts} + return peers.Response{IsHealthy: true, Reason: peers.HealthyBecauseNoPeersResponseNotReachedTimeout} } } diff --git a/pkg/controlplane/manager.go b/pkg/controlplane/manager.go index 500e7ee9..10a9840a 100644 --- a/pkg/controlplane/manager.go +++ b/pkg/controlplane/manager.go @@ -65,7 +65,7 @@ func (manager *Manager) IsControlPlaneHealthy(workerPeerResponse peers.Response, case peers.UnHealthyBecauseNodeIsIsolated: return canOtherControlPlanesBeReached //reported healthy by worker peers - case peers.HealthyBecauseErrorsThresholdNotReached, peers.HealthyBecauseCRNotFound: + case peers.HealthyBecauseErrorsThresholdNotReached, peers.HealthyBecauseCRNotFound, peers.HealthyBecauseNoPeersResponseNotReachedTimeout: return true //controlPlane node has connection to most workers, we assume it's not isolated (or at least that the controlPlane node that does not have worker peers quorum will reboot) case peers.HealthyBecauseMostPeersCantAccessAPIServer: diff --git a/pkg/peers/response.go b/pkg/peers/response.go index ae17bebd..66535e4c 100644 --- a/pkg/peers/response.go +++ b/pkg/peers/response.go @@ -8,11 +8,11 @@ type Response struct { type reason string const ( - HealthyBecauseCRNotFound reason = "CR Not found, node is considered healthy" - HealthyBecauseErrorsThresholdNotReached reason = "Errors number hasn't reached threshold not querying peers yet, node is considered healthy" - HealthyBecauseNoPeersResponseNotReachedMaxAttempts reason = "No response from peer hasn't passed the non responsive time threshold so still considered healthy" - HealthyBecauseNoPeersWereFound reason = "No Peers where found, node is considered healthy" - HealthyBecauseMostPeersCantAccessAPIServer reason = "Most peers couldn't access API server, node is considered healthy" + HealthyBecauseCRNotFound reason = "CR Not found, node is considered healthy" + HealthyBecauseErrorsThresholdNotReached reason = "Errors number hasn't reached threshold not querying peers yet, node is considered healthy" + HealthyBecauseNoPeersResponseNotReachedTimeout reason = "No response from peer. The duration of peer not responding hasn't passed the threshold so still considered healthy" + HealthyBecauseNoPeersWereFound reason = "No Peers where found, node is considered healthy" + HealthyBecauseMostPeersCantAccessAPIServer reason = "Most peers couldn't access API server, node is considered healthy" UnHealthyBecausePeersResponse reason = "Node is reported unhealthy by it's peers" UnHealthyBecauseNodeIsIsolated reason = "Node is isolated, node is considered unhealthy"