Skip to content

Commit

Permalink
fixes for flakes in raft removed tests (#29270)
Browse files Browse the repository at this point in the history
* fixes for flakes in raft removed tests

* one more fix
  • Loading branch information
miagilepner authored Jan 7, 2025
1 parent 6d566cb commit 4f32443
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 7 deletions.
12 changes: 12 additions & 0 deletions helper/testhelpers/testhelpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -1003,3 +1003,15 @@ func WaitForNodesExcludingSelectedStandbys(t testing.TB, cluster *vault.TestClus
func IsLocalOrRegressionTests() bool {
return os.Getenv("CI") == "" || os.Getenv("VAULT_REGRESSION_TESTS") == "true"
}

func RaftDataDir(t testing.TB, core *vault.TestClusterCore) string {
t.Helper()
r, ok := core.UnderlyingStorage.(*raft.RaftBackend)
if !ok {
r, ok = core.UnderlyingHAStorage.(*raft.RaftBackend)
if !ok {
t.Fatal("no raft backend")
}
}
return r.DataDir(t)
}
5 changes: 5 additions & 0 deletions physical/raft/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ import (
"github.com/hashicorp/go-uuid"
)

func (b *RaftBackend) DataDir(t testing.TB) string {
t.Helper()
return b.dataDir
}

func GetRaft(t testing.TB, bootstrap bool, noStoreState bool) (*RaftBackend, string) {
return getRaftInternal(t, bootstrap, defaultRaftConfig(t, bootstrap, noStoreState), nil, nil, nil)
}
Expand Down
28 changes: 22 additions & 6 deletions vault/external_tests/raft/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1378,14 +1378,26 @@ func TestRaftCluster_Removed(t *testing.T) {
})
require.NoError(t, err)

_, err = cluster.Cores[0].Client.Logical().Write("/sys/storage/raft/remove-peer", map[string]interface{}{
leaderClient := cluster.Cores[0].Client
_, err = leaderClient.Logical().Write("/sys/storage/raft/remove-peer", map[string]interface{}{
"server_id": follower.NodeID,
})
require.NoError(t, err)
followerClient.SetCheckRedirect(func(request *http.Request, requests []*http.Request) error {
require.Fail(t, "request caused a redirect", request.URL.Path)
return fmt.Errorf("no redirects allowed")
})
require.NoError(t, err)
configChanged := func() bool {
config, err := leaderClient.Logical().Read("sys/storage/raft/configuration")
require.NoError(t, err)
cfg := config.Data["config"].(map[string]interface{})
servers := cfg["servers"].([]interface{})
return len(servers) == 2
}
// raft config changes happen async, so block until the config change is
// applied
require.Eventually(t, configChanged, 3*time.Second, 50*time.Millisecond)

_, err = followerClient.Logical().Write("secret/foo", map[string]interface{}{
"test": "other_data",
})
Expand Down Expand Up @@ -1514,8 +1526,7 @@ func TestSysHealth_Raft(t *testing.T) {
var erroredResponse *api.Response

// now that the node can connect again, it will start getting the removed
// error when trying to connect. The code should be removed, and the ha
// connection will be nil because there is no ha connection
// error when trying to connect. The code should be removed
testhelpers.RetryUntil(t, 10*time.Second, func() error {
resp, err := followerClient.Logical().ReadRawWithData("sys/health", map[string][]string{
"perfstandbyok": {"true"},
Expand All @@ -1536,7 +1547,12 @@ func TestSysHealth_Raft(t *testing.T) {
})
r := parseHealthBody(t, erroredResponse)
require.True(t, true, *r.RemovedFromCluster)
require.Nil(t, r.HAConnectionHealthy)
// The HA connection health should either be nil or false. It's possible
// for it to be false if we got the response in between the node marking
// itself removed and sealing
if r.HAConnectionHealthy != nil {
require.False(t, *r.HAConnectionHealthy)
}
})
}

Expand All @@ -1555,7 +1571,7 @@ func TestRaftCluster_Removed_ReAdd(t *testing.T) {
"server_id": follower.NodeID,
})
require.NoError(t, err)
require.Eventually(t, follower.Sealed, 3*time.Second, 250*time.Millisecond)
require.Eventually(t, follower.Sealed, 10*time.Second, 250*time.Millisecond)

joinReq := &api.RaftJoinRequest{LeaderAPIAddr: leader.Address.String()}
_, err = follower.Client.Sys().RaftJoin(joinReq)
Expand Down
2 changes: 1 addition & 1 deletion vault/external_tests/raftha/raft_ha_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ func TestRaftHACluster_Removed_ReAdd(t *testing.T) {
"server_id": follower.NodeID,
})
require.NoError(t, err)
require.Eventually(t, follower.Sealed, 3*time.Second, 250*time.Millisecond)
require.Eventually(t, follower.Sealed, 10*time.Second, 250*time.Millisecond)

_, err = follower.Client.Sys().RaftJoin(joinReq)
require.Error(t, err)
Expand Down

0 comments on commit 4f32443

Please sign in to comment.