diff --git a/light/client.go b/light/client.go index 35186965575..a91152fe03c 100644 --- a/light/client.go +++ b/light/client.go @@ -125,6 +125,13 @@ func MaxBlockLag(d time.Duration) Option { } } +// DisableProviderRemoval disables the removal of misbehaving providers. +func DisableProviderRemoval() Option { + return func(c *Client) { + c.disableProviderRemoval = true + } +} + // Client represents a light client, connected to a single chain, which gets // light blocks from a primary provider, verifies them either sequentially or by // skipping some and stores them in a trusted store (usually, a local FS). @@ -146,6 +153,8 @@ type Client struct { // Providers used to "witness" new headers. witnesses []provider.Provider + disableProviderRemoval bool + // Where trusted light blocks are stored. trustedStore store.Store // Highest trusted light block from the store (height=H). @@ -748,7 +757,7 @@ func (c *Client) verifySkipping( if depth == len(blockCache)-1 { pivotHeight := verifiedBlock.Height + (blockCache[depth].Height-verifiedBlock. Height)*verifySkippingNumerator/verifySkippingDenominator - interimBlock, providerErr := source.LightBlock(ctx, pivotHeight) + interimBlock, peer, providerErr := source.LightBlockWithPeerID(ctx, pivotHeight) switch providerErr { case nil: blockCache = append(blockCache, interimBlock) @@ -760,6 +769,7 @@ func (c *Client) verifySkipping( // all other errors such as ErrBadLightBlock or ErrUnreliableProvider are seen as malevolent and the // provider is removed default: + source.MalevolentProvider(peer) return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: pivotHeight, Reason: providerErr} } blockCache = append(blockCache, interimBlock) @@ -989,7 +999,7 @@ func (c *Client) backwards( // any other error, the primary is permanently dropped and is replaced by a witness. func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*types.LightBlock, error) { c.providerMutex.Lock() - l, err := c.primary.LightBlock(ctx, height) + l, peer, err := c.primary.LightBlockWithPeerID(ctx, height) c.providerMutex.Unlock() switch err { @@ -1011,6 +1021,9 @@ func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*type // These errors mean that the light client should drop the primary and try with another provider instead c.logger.Info("error from light block request from primary, removing...", "error", err, "height", height, "primary", c.primary) + c.providerMutex.Lock() + c.primary.MalevolentProvider(peer) + c.providerMutex.Unlock() return c.findNewPrimary(ctx, height, true) } } @@ -1026,6 +1039,12 @@ func (c *Client) removeWitnesses(indexes []int) error { // order so as to not affect the indexes themselves sort.Ints(indexes) for i := len(indexes) - 1; i >= 0; i-- { + // The primary needs to be removed even if disableProviderRemoval is enabled, + // because it has been copied into c.primary. + if c.witnesses[indexes[i]] != c.primary && c.disableProviderRemoval { + continue + } + c.witnesses[indexes[i]] = c.witnesses[len(c.witnesses)-1] c.witnesses = c.witnesses[:len(c.witnesses)-1] } @@ -1036,6 +1055,7 @@ func (c *Client) removeWitnesses(indexes []int) error { type witnessResponse struct { lb *types.LightBlock witnessIndex int + peer string err error } @@ -1044,6 +1064,10 @@ type witnessResponse struct { // entire removed or just appended to the back of the witnesses list. This method also handles witness // errors. If no witness is available, it returns the last error of the witness. func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool) (*types.LightBlock, error) { + if c.disableProviderRemoval { + remove = false + } + c.providerMutex.Lock() defer c.providerMutex.Unlock() @@ -1066,8 +1090,8 @@ func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool) go func(witnessIndex int, witnessResponsesC chan witnessResponse) { defer wg.Done() - lb, err := c.witnesses[witnessIndex].LightBlock(subctx, height) - witnessResponsesC <- witnessResponse{lb, witnessIndex, err} + lb, peer, err := c.witnesses[witnessIndex].LightBlockWithPeerID(subctx, height) + witnessResponsesC <- witnessResponse{lb, witnessIndex, peer, err} }(index, witnessResponsesC) } @@ -1115,6 +1139,7 @@ func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool) c.logger.Error("error on light block request from witness, removing...", "error", response.err, "primary", c.witnesses[response.witnessIndex]) witnessesToRemove = append(witnessesToRemove, response.witnessIndex) + c.witnesses[response.witnessIndex].MalevolentProvider(response.peer) } } @@ -1163,6 +1188,7 @@ and remove witness. Otherwise, use the different primary`, e.WitnessIndex), "wit "witness", c.witnesses[e.WitnessIndex], "err", err) witnessesToRemove = append(witnessesToRemove, e.WitnessIndex) + c.witnesses[e.WitnessIndex].MalevolentProvider(e.peer) default: // benign errors can be ignored with the exception of context errors if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { return err diff --git a/light/detector.go b/light/detector.go index 162a0e0d332..3238f0f70a0 100644 --- a/light/detector.go +++ b/light/detector.go @@ -72,6 +72,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig } // if attempt to generate conflicting headers failed then remove witness witnessesToRemove = append(witnessesToRemove, e.WitnessIndex) + c.witnesses[e.WitnessIndex].MalevolentProvider(e.peer) case errBadWitness: // these are all melevolent errors and should result in removing the @@ -79,6 +80,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig c.logger.Info("witness returned an error during header comparison, removing...", "witness", c.witnesses[e.WitnessIndex], "err", err) witnessesToRemove = append(witnessesToRemove, e.WitnessIndex) + c.witnesses[e.WitnessIndex].MalevolentProvider(e.peer) default: // Benign errors which can be ignored unless there was a context // canceled @@ -114,9 +116,9 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig // // 3: nil -> the hashes of the two headers match func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan error, h *types.SignedHeader, - witness provider.Provider, witnessIndex int) { - - lightBlock, err := witness.LightBlock(ctx, h.Height) + witness provider.Provider, witnessIndex int, +) { + lightBlock, peer, err := witness.LightBlockWithPeerID(ctx, h.Height) switch err { // no error means we move on to checking the hash of the two headers case nil: @@ -150,7 +152,7 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro // witness' last header is below the primary's header. We check the times to see if the blocks // have conflicting times if !lightBlock.Time.Before(h.Time) { - errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex} + errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex, peer: peer} return } @@ -164,7 +166,7 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { errc <- err } else { - errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex} + errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex, peer: peer} } return } @@ -175,7 +177,7 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro // the witness still doesn't have a block at the height of the primary. // Check if there is a conflicting time if !lightBlock.Time.Before(h.Time) { - errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex} + errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex, peer: peer} return } @@ -192,12 +194,12 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro default: // all other errors (i.e. invalid block, closed connection or unreliable provider) we mark the // witness as bad and remove it - errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex} + errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex, peer: peer} return } if !bytes.Equal(h.Hash(), lightBlock.Hash()) { - errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex} + errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex, peer: peer} } c.logger.Debug("Matching header received by witness", "height", h.Height, "witness", witnessIndex) diff --git a/light/errors.go b/light/errors.go index bc6357def66..a9c320a1af9 100644 --- a/light/errors.go +++ b/light/errors.go @@ -81,6 +81,7 @@ var ErrNoWitnesses = errors.New("no witnesses connected. please reset light clie type errConflictingHeaders struct { Block *types.LightBlock WitnessIndex int + peer string } func (e errConflictingHeaders) Error() string { @@ -94,6 +95,7 @@ func (e errConflictingHeaders) Error() string { type errBadWitness struct { Reason error WitnessIndex int + peer string } func (e errBadWitness) Error() string { diff --git a/light/provider/http/http.go b/light/provider/http/http.go index b73f7bd4524..03da7a60b9a 100644 --- a/light/provider/http/http.go +++ b/light/provider/http/http.go @@ -107,6 +107,14 @@ func (p *http) ReportEvidence(ctx context.Context, ev types.Evidence) error { return err } +func (p *http) LightBlockWithPeerID(ctx context.Context, height int64) (*types.LightBlock, string, error) { + lb, err := p.LightBlock(ctx, height) + return lb, "", err +} + +func (p *http) MalevolentProvider(peerID string) { +} + func (p *http) validatorSet(ctx context.Context, height *int64) (*types.ValidatorSet, error) { // Since the malicious node could report a massive number of pages, making us // spend a considerable time iterating, we restrict the number of pages here. diff --git a/light/provider/mock/deadmock.go b/light/provider/mock/deadmock.go index 8e388107380..acb11917537 100644 --- a/light/provider/mock/deadmock.go +++ b/light/provider/mock/deadmock.go @@ -24,6 +24,12 @@ func (p *deadMock) LightBlock(_ context.Context, height int64) (*types.LightBloc return nil, provider.ErrNoResponse } +func (p *deadMock) LightBlockWithPeerID(_ context.Context, height int64) (*types.LightBlock, string, error) { + return nil, "", provider.ErrNoResponse +} + +func (p *deadMock) MalevolentProvider(peerID string) {} + func (p *deadMock) ReportEvidence(_ context.Context, ev types.Evidence) error { return provider.ErrNoResponse } diff --git a/light/provider/mock/mock.go b/light/provider/mock/mock.go index 57ecef09e51..cdd2e38b1e6 100644 --- a/light/provider/mock/mock.go +++ b/light/provider/mock/mock.go @@ -107,6 +107,14 @@ func (p *Mock) ReportEvidence(_ context.Context, ev types.Evidence) error { return nil } +func (p *Mock) LightBlockWithPeerID(ctx context.Context, height int64) (*types.LightBlock, string, error) { + l, err := p.LightBlock(ctx, height) + return l, "", err +} + +func (p *Mock) MalevolentProvider(peerID string) { +} + func (p *Mock) HasEvidence(ev types.Evidence) bool { _, ok := p.evidenceToReport[string(ev.Hash())] return ok diff --git a/light/provider/provider.go b/light/provider/provider.go index 333d8c1e891..4af8e985be2 100644 --- a/light/provider/provider.go +++ b/light/provider/provider.go @@ -26,4 +26,23 @@ type Provider interface { // ReportEvidence reports an evidence of misbehavior. ReportEvidence(context.Context, types.Evidence) error + + // LightBlockWithPeerID is the same as LightBlock, but the response includes + // an identifier of the peer that served the block. This is to be used with + // MalevolentProvider method. + LightBlockWithPeerID(ctx context.Context, height int64) (*types.LightBlock, string, error) + + // MalevolentProvider notifies the provider that the provider is misbehaving. + // + // XXX: This is an Oasis hack to have a callback from the LightClient + // to the providers in case of malevolent light blocks provided by peers. + // Because LightClient uses a static-witness set (no support for + // adding/removing witnesses once the client is initialized) we use + // "virtual-providers" and in case of a misbehaving peer the provider + // will internally blacklist the peer and switch to a new one. + // A more proper/involved solution would be updating the LightClient + // provider to support dynamic witness set and adding support for subscribing + // to notifications on failures. But we're trying to keep changes in the + // fork minimal. + MalevolentProvider(peerID string) }