Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Failsafes to prevent a consensus round from taking too long #5277

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/consensus.md
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ struct ConsensusResult
ConsensusTimer roundTime;

// Indicates state in which consensus ended. Once in the accept phase
// will be either Yes or MovedOn
// will be either Yes or MovedOn or Expired
ConsensusState state = ConsensusState::No;
};

Expand Down
175 changes: 168 additions & 7 deletions src/test/consensus/Consensus_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <xrpld/consensus/ConsensusProposal.h>
#include <xrpl/beast/clock/manual_clock.h>
#include <xrpl/beast/unit_test.h>
#include <xrpl/json/to_string.h>
#include <utility>

namespace ripple {
Expand All @@ -40,6 +41,7 @@ class Consensus_test : public beast::unit_test::suite
testShouldCloseLedger()
{
using namespace std::chrono_literals;
testcase("should close ledger");

// Use default parameters
ConsensusParms const p{};
Expand Down Expand Up @@ -78,53 +80,109 @@ class Consensus_test : public beast::unit_test::suite
testCheckConsensus()
{
using namespace std::chrono_literals;
testcase("check consensus");

// Use default parameterss
ConsensusParms const p{};

///////////////
// Disputes still in doubt
//
// Not enough time has elapsed
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 2s, p, true, journal_));
checkConsensus(10, 2, 2, 0, 3s, 2s, false, p, true, journal_));

// If not enough peers have propsed, ensure
// more time for proposals
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 4s, p, true, journal_));
checkConsensus(10, 2, 2, 0, 3s, 4s, false, p, true, journal_));

// Enough time has elapsed and we all agree
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 2, 0, 3s, 10s, p, true, journal_));
checkConsensus(10, 2, 2, 0, 3s, 10s, false, p, true, journal_));

// Enough time has elapsed and we don't yet agree
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 1, 0, 3s, 10s, p, true, journal_));
checkConsensus(10, 2, 1, 0, 3s, 10s, false, p, true, journal_));

// Our peers have moved on
// Enough time has elapsed and we all agree
BEAST_EXPECT(
ConsensusState::MovedOn ==
checkConsensus(10, 2, 1, 8, 3s, 10s, p, true, journal_));
checkConsensus(10, 2, 1, 8, 3s, 10s, false, p, true, journal_));

// If no peers, don't agree until time has passed.
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(0, 0, 0, 0, 3s, 10s, p, true, journal_));
checkConsensus(0, 0, 0, 0, 3s, 10s, false, p, true, journal_));

// Agree if no peers and enough time has passed.
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(0, 0, 0, 0, 3s, 16s, p, true, journal_));
checkConsensus(0, 0, 0, 0, 3s, 16s, false, p, true, journal_));

// Expire if too much time has passed without agreement
BEAST_EXPECT(
ConsensusState::Expired ==
checkConsensus(10, 8, 1, 0, 1s, 19s, false, p, true, journal_));
///////////////
// Stable state
//
// Not enough time has elapsed
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 2s, true, p, true, journal_));

// If not enough peers have propsed, ensure
// more time for proposals
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(10, 2, 2, 0, 3s, 4s, true, p, true, journal_));

// Enough time has elapsed and we all agree
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 2, 0, 3s, 10s, true, p, true, journal_));

// Enough time has elapsed and we don't yet agree, but there's nothing
// left to dispute
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 1, 0, 3s, 10s, true, p, true, journal_));

// Our peers have moved on
// Enough time has elapsed and we all agree, nothing left to dispute
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 2, 1, 8, 3s, 10s, true, p, true, journal_));

// If no peers, don't agree until time has passed.
BEAST_EXPECT(
ConsensusState::No ==
checkConsensus(0, 0, 0, 0, 3s, 10s, true, p, true, journal_));

// Agree if no peers and enough time has passed.
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(0, 0, 0, 0, 3s, 16s, true, p, true, journal_));

// We are done if there's nothing left to dispute, no matter how much
// time has passed
BEAST_EXPECT(
ConsensusState::Yes ==
checkConsensus(10, 8, 1, 0, 1s, 19s, true, p, true, journal_));
}

void
testStandalone()
{
using namespace std::chrono_literals;
using namespace csf;
testcase("standalone");

Sim s;
PeerGroup peers = s.createGroup(1);
Expand All @@ -149,6 +207,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("peers agree");

ConsensusParms const parms{};
Sim sim;
Expand Down Expand Up @@ -186,6 +245,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("slow peers");

// Several tests of a complete trust graph with a subset of peers
// that have significantly longer network delays to the rest of the
Expand Down Expand Up @@ -351,6 +411,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("close time disagree");

// This is a very specialized test to get ledgers to disagree on
// the close time. It unfortunately assumes knowledge about current
Expand Down Expand Up @@ -417,6 +478,8 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("wrong LCL");

// Specialized test to exercise a temporary fork in which some peers
// are working on an incorrect prior ledger.

Expand Down Expand Up @@ -589,6 +652,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("consensus close time rounding");
Copy link
Collaborator

@vlntb vlntb Feb 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The re-assignment below in the test

for (Peer* peer : network)
            peer->consensusParms = parms;

does not make sense and should be removed, since parms is not modified from its default values.
It, however, conflicts with the other suggested change to make avalancheCutoffs const


// This is a specialized test engineered to yield ledgers with different
// close times even though the peers believe they had close time
Expand Down Expand Up @@ -692,6 +756,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("fork");

std::uint32_t numPeers = 10;
// Vary overlap between two UNLs
Expand Down Expand Up @@ -748,6 +813,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("hub network");

// Simulate a set of 5 validators that aren't directly connected but
// rely on a single hub node for communication
Expand Down Expand Up @@ -835,6 +901,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("preferred by branch");

// Simulate network splits that are prevented from forking when using
// preferred ledger by trie. This is a contrived example that involves
Expand Down Expand Up @@ -967,6 +1034,7 @@ class Consensus_test : public beast::unit_test::suite
{
using namespace csf;
using namespace std::chrono;
testcase("pause for laggards");

// Test that validators that jump ahead of the network slow
// down.
Expand Down Expand Up @@ -1052,6 +1120,98 @@ class Consensus_test : public beast::unit_test::suite
BEAST_EXPECT(sim.synchronized());
}

void
testDisputes()
{
// WIP: Try to create conditions where messaging is unreliable and all
// peers have different initial proposals
using namespace csf;
using namespace std::chrono;
testcase("disputes");

std::uint32_t const numPeers = 35;

ConsensusParms const parms{};
Sim sim;

std::vector<PeerGroup> peerGroups;
peerGroups.reserve(numPeers);
PeerGroup network;
for (int i = 0; i < numPeers; ++i)
{
peerGroups.emplace_back(sim.createGroup(1));
network = network + peerGroups.back();
}

// Fully connected trust graph
network.trust(network);

for (int i = 0; i < peerGroups.size(); ++i)
{
auto const delay = i * 0.2 * parms.ledgerGRANULARITY;
auto& group = peerGroups[i];
auto& nextGroup = peerGroups[(i + 1) % numPeers];
group.connect(nextGroup, round<milliseconds>(delay));
auto& oppositeGroup = peerGroups[(i + numPeers / 2) % numPeers];
group.connect(oppositeGroup, round<milliseconds>(delay));
}

// Initial round to set prior state
sim.run(1);
for (Peer* peer : network)
{
// Every transaction will be seen by every node but two.
// To accomplish that, every peer will add the ids of every peer
// except itself, and the one following.
auto const myId = peer->id;
auto const nextId = (peer->id + PeerID(1)) % PeerID(numPeers);
for (Peer* to : sim.trustGraph.trustedPeers(peer))
{
if (to->id == myId || to->id == nextId)
continue;
peer->openTxs.insert(Tx{static_cast<std::uint32_t>(to->id)});
}
}
sim.run(1);

// Peers are out of sync
if (BEAST_EXPECT(!sim.synchronized()))
{
BEAST_EXPECT(sim.branches() == 1);
for (auto const& grp : peerGroups)
{
Peer const* peer = grp[0];
BEAST_EXPECT(
peer->fullyValidatedLedger.seq() == Ledger::Seq{1});

auto const& lcl = peer->lastClosedLedger;
BEAST_EXPECT(lcl.id() == peer->prevLedgerID());
log << "Peer " << peer->id << ", lcl seq: " << lcl.seq()
<< ", prevProposers: " << peer->prevProposers
<< ", txs in lcl: " << lcl.txs().size() << ", validations: "
<< peer->validations.sizeOfByLedgerCache() << std::endl;
for (auto const& [id, positions] : peer->peerPositions)
{
log << "\tLedger ID: " << id
<< ", #positions: " << positions.size() << std::endl;
}
/*
log << "\t" << to_string(peer->consensus.getJson(true))
<< std::endl
<< std::endl;
*/
/*
BEAST_EXPECT(lcl.seq() == Ledger::Seq{1}, to_string);
// All peers proposed
BEAST_EXPECT(peer->prevProposers == network.size() - 1);
// All transactions were accepted
for (std::uint32_t i = 0; i < network.size(); ++i)
BEAST_EXPECT(lcl.txs().find(Tx{i}) != lcl.txs().end());
*/
}
}
}

void
run() override
{
Expand All @@ -1068,6 +1228,7 @@ class Consensus_test : public beast::unit_test::suite
testHubNetwork();
testPreferredByBranch();
testPauseForLaggards();
testDisputes();
}
};

Expand Down
5 changes: 3 additions & 2 deletions src/xrpld/app/consensus/RCLValidations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ handleNewValidation(
auto& validations = app.getValidations();

// masterKey is seated only if validator is trusted or listed
auto const outcome =
validations.add(calcNodeID(masterKey.value_or(signingKey)), val);
auto const nodeKey = masterKey.value_or(signingKey);
// assert(nodeKey != app.getValidationPublicKey());
auto const outcome = validations.add(calcNodeID(nodeKey), val);

if (outcome == ValStatus::current)
{
Expand Down
9 changes: 9 additions & 0 deletions src/xrpld/app/misc/NetworkOPs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1887,6 +1887,15 @@
bool
NetworkOPsImp::processTrustedProposal(RCLCxPeerPos peerPos)
{
if (auto const localPk = app_.getValidationPublicKey())
{
auto const localID = calcNodeID(*localPk);
auto const& peerID = peerPos.proposal().nodeID();

Check warning on line 1893 in src/xrpld/app/misc/NetworkOPs.cpp

View check run for this annotation

Codecov / codecov/patch

src/xrpld/app/misc/NetworkOPs.cpp#L1893

Added line #L1893 was not covered by tests
assert(localID != peerID);
if (localID == peerID)
return false;

Check warning on line 1896 in src/xrpld/app/misc/NetworkOPs.cpp

View check run for this annotation

Codecov / codecov/patch

src/xrpld/app/misc/NetworkOPs.cpp#L1896

Added line #L1896 was not covered by tests
}

return mConsensus.peerProposal(app_.timeKeeper().closeTime(), peerPos);
}

Expand Down
Loading
Loading