Skip to content

Commit

Permalink
Merge pull request #8406 from ziggie1984/fix-channel-opening-issue
Browse files Browse the repository at this point in the history
Fix case where Opening Channels get stuck forever.
  • Loading branch information
yyforyongyu authored Jan 22, 2024
2 parents e1259cd + 13e557d commit ec5b824
Show file tree
Hide file tree
Showing 10 changed files with 218 additions and 6 deletions.
6 changes: 6 additions & 0 deletions docs/release-notes/release-notes-0.17.4.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

# Bug Fixes

* [Fix the removal of failed
channels](https://github.com/lightningnetwork/lnd/pull/8406). When a pending
channel opening was pruned from memory no more channels were able to be
created nor accepted. This PR fixes this issue and enhances the test suite
for this behavior.

# New Features
## Functional Enhancements
## RPC Additions
Expand Down
4 changes: 4 additions & 0 deletions itest/list_on_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,4 +562,8 @@ var allTestCases = []*lntest.TestCase{
Name: "removetx",
TestFunc: testRemoveTx,
},
{
Name: "fail funding flow psbt",
TestFunc: testPsbtChanFundingFailFlow,
},
}
48 changes: 48 additions & 0 deletions itest/lnd_psbt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/lightningnetwork/lnd/lnrpc/walletrpc"
"github.com/lightningnetwork/lnd/lntest"
"github.com/lightningnetwork/lnd/lntest/node"
"github.com/lightningnetwork/lnd/lnwallet/chanfunding"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -1340,3 +1341,50 @@ func sendAllCoinsToAddrType(ht *lntest.HarnessTest,
ht.MineBlocksAndAssertNumTxes(1, 1)
ht.WaitForBlockchainSync(hn)
}

// testPsbtChanFundingFailFlow tests the failing of a funding flow by the
// remote peer and that the initiator receives the expected error and aborts
// the channel opening. The psbt funding flow is used to simulate this behavior
// because we can easily let the remote peer run into the timeout.
func testPsbtChanFundingFailFlow(ht *lntest.HarnessTest) {
const chanSize = funding.MaxBtcFundingAmount

// Decrease the timeout window for the remote peer to accelerate the
// funding fail process.
args := []string{
"--dev.reservationtimeout=1s",
"--dev.zombiesweeperinterval=1s",
}
ht.RestartNodeWithExtraArgs(ht.Bob, args)

// Before we start the test, we'll ensure both sides are connected so
// the funding flow can be properly executed.
alice := ht.Alice
bob := ht.Bob
ht.EnsureConnected(alice, bob)

// At this point, we can begin our PSBT channel funding workflow. We'll
// start by generating a pending channel ID externally that will be used
// to track this new funding type.
pendingChanID := ht.Random32Bytes()

// Now that we have the pending channel ID, Alice will open the channel
// by specifying a PSBT shim.
chanUpdates, _ := ht.OpenChannelPsbt(
alice, bob, lntest.OpenChannelParams{
Amt: chanSize,
FundingShim: &lnrpc.FundingShim{
Shim: &lnrpc.FundingShim_PsbtShim{
PsbtShim: &lnrpc.PsbtShim{
PendingChanId: pendingChanID,
},
},
},
},
)

// We received the AcceptChannel msg from our peer but we are not going
// to fund this channel but instead wait for our peer to fail the
// funding workflow with an internal error.
ht.ReceiveOpenChannelError(chanUpdates, chanfunding.ErrRemoteCanceled)
}
9 changes: 9 additions & 0 deletions lncfg/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"os/user"
"path/filepath"
"strings"
"time"
)

const (
Expand Down Expand Up @@ -67,6 +68,14 @@ const (
// peer and a block arriving during that round trip to trigger force
// closure.
DefaultOutgoingCltvRejectDelta = DefaultOutgoingBroadcastDelta + 3

// DefaultReservationTimeout is the default time we wait until we remove
// an unfinished (zombiestate) open channel flow from memory.
DefaultReservationTimeout = 10 * time.Minute

// DefaultZombieSweeperInterval is the default time interval at which
// unfinished (zombiestate) open channel flows are purged from memory.
DefaultZombieSweeperInterval = 1 * time.Minute
)

// CleanAndExpandPath expands environment variables and leading ~ in the
Expand Down
14 changes: 13 additions & 1 deletion lncfg/dev.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

package lncfg

import "time"
import (
"time"
)

// IsDevBuild returns a bool to indicate whether we are in a development
// environment.
Expand All @@ -21,3 +23,13 @@ type DevConfig struct{}
func (d *DevConfig) ChannelReadyWait() time.Duration {
return 0
}

// GetReservationTimeout returns the config value for `ReservationTimeout`.
func (d *DevConfig) GetReservationTimeout() time.Duration {
return DefaultReservationTimeout
}

// GetZombieSweeperInterval returns the config value for`ZombieSweeperInterval`.
func (d *DevConfig) GetZombieSweeperInterval() time.Duration {
return DefaultZombieSweeperInterval
}
24 changes: 23 additions & 1 deletion lncfg/dev_integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

package lncfg

import "time"
import (
"time"
)

// IsDevBuild returns a bool to indicate whether we are in a development
// environment.
Expand All @@ -18,9 +20,29 @@ func IsDevBuild() bool {
//nolint:lll
type DevConfig struct {
ProcessChannelReadyWait time.Duration `long:"processchannelreadywait" description:"Time to sleep before processing remote node's channel_ready message."`
ReservationTimeout time.Duration `long:"reservationtimeout" description:"The maximum time we keep a pending channel open flow in memory."`
ZombieSweeperInterval time.Duration `long:"zombiesweeperinterval" description:"The time interval at which channel opening flows are evaluated for zombie status."`
}

// ChannelReadyWait returns the config value `ProcessChannelReadyWait`.
func (d *DevConfig) ChannelReadyWait() time.Duration {
return d.ProcessChannelReadyWait
}

// GetReservationTimeout returns the config value for `ReservationTimeout`.
func (d *DevConfig) GetReservationTimeout() time.Duration {
if d.ReservationTimeout == 0 {
return DefaultReservationTimeout
}

return d.ReservationTimeout
}

// GetZombieSweeperInterval returns the config value for`ZombieSweeperInterval`.
func (d *DevConfig) GetZombieSweeperInterval() time.Duration {
if d.ZombieSweeperInterval == 0 {
return DefaultZombieSweeperInterval
}

return d.ZombieSweeperInterval
}
10 changes: 10 additions & 0 deletions lntest/harness_assertion.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,16 @@ func (h *HarnessTest) ReceiveOpenChannelUpdate(
return update
}

// ReceiveOpenChannelError waits for the expected error during the open channel
// flow from the peer or times out.
func (h *HarnessTest) ReceiveOpenChannelError(
stream rpc.OpenChanClient, expectedErr error) {

_, err := h.receiveOpenChannelUpdate(stream)
require.Contains(h, err.Error(), expectedErr.Error(),
"error not matched")
}

// receiveOpenChannelUpdate waits until a message or an error is received on
// the stream or the timeout is reached.
//
Expand Down
5 changes: 3 additions & 2 deletions peer/brontide.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,9 @@ func NewBrontide(cfg Config) *Brontide {
activeChannels: &lnutils.SyncMap[
lnwire.ChannelID, *lnwallet.LightningChannel,
]{},
newActiveChannel: make(chan *newChannelMsg, 1),
newPendingChannel: make(chan *newChannelMsg, 1),
newActiveChannel: make(chan *newChannelMsg, 1),
newPendingChannel: make(chan *newChannelMsg, 1),
removePendingChannel: make(chan *newChannelMsg),

activeMsgStreams: make(map[lnwire.ChannelID]*msgStream),
activeChanCloses: make(map[lnwire.ChannelID]*chancloser.ChanCloser),
Expand Down
87 changes: 87 additions & 0 deletions peer/brontide_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package peer

import (
"bytes"
"fmt"
"testing"
"time"

Expand All @@ -16,6 +17,7 @@ import (
"github.com/lightningnetwork/lnd/contractcourt"
"github.com/lightningnetwork/lnd/htlcswitch"
"github.com/lightningnetwork/lnd/lntest/mock"
"github.com/lightningnetwork/lnd/lntest/wait"
"github.com/lightningnetwork/lnd/lnwallet"
"github.com/lightningnetwork/lnd/lnwallet/chancloser"
"github.com/lightningnetwork/lnd/lnwire"
Expand Down Expand Up @@ -1451,3 +1453,88 @@ func TestStartupWriteMessageRace(t *testing.T) {
}
}
}

// TestRemovePendingChannel checks that we are able to remove a pending channel
// successfully from the peers channel map. This also makes sure the
// removePendingChannel is initialized so we don't send to a nil channel and
// get stuck.
func TestRemovePendingChannel(t *testing.T) {
t.Parallel()

// Set up parameters for createTestPeer.
notifier := &mock.ChainNotifier{
SpendChan: make(chan *chainntnfs.SpendDetail),
EpochChan: make(chan *chainntnfs.BlockEpoch),
ConfChan: make(chan *chainntnfs.TxConfirmation),
}
broadcastTxChan := make(chan *wire.MsgTx)
mockSwitch := &mockMessageSwitch{}

// createTestPeer creates a peer and a channel with that peer.
peer, _, err := createTestPeer(
t, notifier, broadcastTxChan, noUpdate, mockSwitch,
)
require.NoError(t, err, "unable to create test channel")

// Add a pending channel to the peer Alice.
errChan := make(chan error, 1)
pendingChanID := lnwire.ChannelID{1}
req := &newChannelMsg{
channelID: pendingChanID,
err: errChan,
}

select {
case peer.newPendingChannel <- req:
// Operation completed successfully
case <-time.After(timeout):
t.Fatalf("not able to remove pending channel")
}

// Make sure the channel was added as a pending channel.
// The peer was already created with one active channel therefore the
// `activeChannels` had already one channel prior to adding the new one.
// The `addedChannels` map only tracks new channels in the current life
// cycle therefore the initial channel is not part of it.
err = wait.NoError(func() error {
if peer.activeChannels.Len() == 2 &&
peer.addedChannels.Len() == 1 {

return nil
}

return fmt.Errorf("pending channel not successfully added")
}, wait.DefaultTimeout)

require.NoError(t, err)

// Now try to remove it, the errChan needs to be reopened because it was
// closed during the pending channel registration above.
errChan = make(chan error, 1)
req = &newChannelMsg{
channelID: pendingChanID,
err: errChan,
}

select {
case peer.removePendingChannel <- req:
// Operation completed successfully
case <-time.After(timeout):
t.Fatalf("not able to remove pending channel")
}

// Make sure the pending channel is successfully removed from both
// channel maps.
// The initial channel between the peer is still active at this point.
err = wait.NoError(func() error {
if peer.activeChannels.Len() == 1 &&
peer.addedChannels.Len() == 0 {

return nil
}

return fmt.Errorf("pending channel not successfully removed")
}, wait.DefaultTimeout)

require.NoError(t, err)
}
17 changes: 15 additions & 2 deletions server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1270,13 +1270,26 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
return ourPolicy, err
}

// For the reservationTimeout and the zombieSweeperInterval different
// values are set in case we are in a dev environment so enhance test
// capacilities.
reservationTimeout := lncfg.DefaultReservationTimeout
zombieSweeperInterval := lncfg.DefaultZombieSweeperInterval

// Get the development config for funding manager. If we are not in
// development mode, this would be nil.
var devCfg *funding.DevConfig
if lncfg.IsDevBuild() {
devCfg = &funding.DevConfig{
ProcessChannelReadyWait: cfg.Dev.ChannelReadyWait(),
}

reservationTimeout = cfg.Dev.GetReservationTimeout()
zombieSweeperInterval = cfg.Dev.GetZombieSweeperInterval()

srvrLog.Debugf("Using the dev config for the fundingMgr: %v, "+
"reservationTimeout=%v, zombieSweeperInterval=%v",
devCfg, reservationTimeout, zombieSweeperInterval)
}

//nolint:lll
Expand Down Expand Up @@ -1436,8 +1449,8 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
// channel bandwidth.
return uint16(input.MaxHTLCNumber / 2)
},
ZombieSweeperInterval: 1 * time.Minute,
ReservationTimeout: 10 * time.Minute,
ZombieSweeperInterval: zombieSweeperInterval,
ReservationTimeout: reservationTimeout,
MinChanSize: btcutil.Amount(cfg.MinChanSize),
MaxChanSize: btcutil.Amount(cfg.MaxChanSize),
MaxPendingChannels: cfg.MaxPendingChannels,
Expand Down

0 comments on commit ec5b824

Please sign in to comment.