diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e4dc328cf6..c1e473b2a5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,7 +13,7 @@ on: jobs: build: name: Build - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: @@ -23,7 +23,7 @@ jobs: goos: ["linux"] timeout-minutes: 55 steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: "1.19" - uses: actions/checkout@v3 @@ -50,11 +50,11 @@ jobs: shell: bash test_abci_cli: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 needs: build timeout-minutes: 5 steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: "1.19" - uses: actions/checkout@v3 @@ -75,11 +75,11 @@ jobs: if: "env.GIT_DIFF != ''" test_apps: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 needs: build timeout-minutes: 5 steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: "1.19" - uses: actions/checkout@v3 diff --git a/.github/workflows/check-generated.yml b/.github/workflows/check-generated.yml index 9ee794502c..3ac125bfc7 100644 --- a/.github/workflows/check-generated.yml +++ b/.github/workflows/check-generated.yml @@ -14,9 +14,9 @@ permissions: jobs: check-mocks: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: '1.19' @@ -40,9 +40,9 @@ jobs: fi check-proto: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: '1.19' diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index c6302abdeb..0fcf36b876 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -1,7 +1,7 @@ --- name: Docker -# Build & Push rebuilds the Tenderdash docker image every time a release is published -# and pushes the image to https://hub.docker.com/r/dashpay/tenderdash/tags +# Build & Push rebuilds the Tenderdash docker image every time a release is +# published and pushes the image to https://hub.docker.com/r/dashpay/tenderdash on: workflow_dispatch: inputs: @@ -15,7 +15,7 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2.3.4 @@ -42,6 +42,17 @@ jobs: result-encoding: string script: "return (context.payload.release.tag_name.includes('-dev') ? '-dev' : '');" + - name: Determine TENDERMINT_BUILD_OPTIONS + uses: actions/github-script@v6 + id: TENDERMINT_BUILD_OPTIONS + with: + result-encoding: string + script: | + if (github.ref_type == 'tag' && !contains(github.ref_name,'-dev')) { + return 'tenderdash,stable' + } + return 'tenderdash,dev,deadlock' + - name: Set Docker tags and labels id: docker_meta uses: docker/metadata-action@v3 @@ -70,6 +81,7 @@ jobs: labels: ${{ steps.docker_meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max - + build-args: | + TENDERMINT_BUILD_OPTIONS="${{ steps.TENDERMINT_BUILD_OPTIONS.outputs.result }}" - name: Show Docker image digest run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/docs-deployment.yml b/.github/workflows/docs-deployment.yml index 082484dd58..a1654f01ab 100644 --- a/.github/workflows/docs-deployment.yml +++ b/.github/workflows/docs-deployment.yml @@ -19,7 +19,7 @@ jobs: # whole workflow read-only. build: name: VuePress build - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 container: image: alpine:latest permissions: @@ -44,7 +44,7 @@ jobs: deploy: name: Deploy to GitHub Pages - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 needs: build permissions: contents: write diff --git a/.github/workflows/e2e-manual.yml b/.github/workflows/e2e-manual.yml index cf55f76df3..a58c56be99 100644 --- a/.github/workflows/e2e-manual.yml +++ b/.github/workflows/e2e-manual.yml @@ -12,10 +12,10 @@ jobs: fail-fast: false matrix: group: ['00', '01', '02', '03', '04'] - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 timeout-minutes: 60 steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: '1.19' diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 44267f2d2c..ac1b18da39 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -12,7 +12,7 @@ on: jobs: e2e-test: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: true matrix: @@ -23,7 +23,7 @@ jobs: CGO_LDFLAGS: "-L/usr/local/lib -ldashbls -lrelic_s -lgmp -lminialloc" CGO_CXXFLAGS: "-I/usr/local/include" steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: "1.19" - uses: actions/checkout@v3 diff --git a/.github/workflows/janitor.yml b/.github/workflows/janitor.yml index ceb21941d1..7e3bc6b6ea 100644 --- a/.github/workflows/janitor.yml +++ b/.github/workflows/janitor.yml @@ -7,7 +7,7 @@ on: jobs: cancel: name: "Cancel Previous Runs" - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 timeout-minutes: 3 steps: - uses: styfle/cancel-workflow-action@0.10.0 diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 04e599564a..b254f1eebb 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -43,7 +43,7 @@ on: jobs: jepsen-test: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout the Jepsen repository uses: actions/checkout@v3 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d8835abb2b..f86ec3b7e2 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,7 +24,7 @@ jobs: - uses: actions/checkout@v3 with: submodules: true - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: '^1.19' - uses: technote-space/get-diff-action@v6 diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 290542bdea..c06a10355c 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -16,7 +16,7 @@ on: jobs: build: name: Super linter - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout Code uses: actions/checkout@v3 diff --git a/.github/workflows/markdown-links.yml b/.github/workflows/markdown-links.yml index 7af7e3ce90..7881ee14eb 100644 --- a/.github/workflows/markdown-links.yml +++ b/.github/workflows/markdown-links.yml @@ -9,7 +9,7 @@ on: jobs: markdown-link-check: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - uses: technote-space/get-diff-action@v6 diff --git a/.github/workflows/proto-lint.yml b/.github/workflows/proto-lint.yml index f52f62a497..8384d36e7d 100644 --- a/.github/workflows/proto-lint.yml +++ b/.github/workflows/proto-lint.yml @@ -11,11 +11,11 @@ on: jobs: lint: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 timeout-minutes: 5 steps: - uses: actions/checkout@v3 - - uses: bufbuild/buf-setup-action@v1.10.0 + - uses: bufbuild/buf-setup-action@v1.11.0 - uses: bufbuild/buf-lint-action@v1 with: input: 'proto' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c16c12f8af..499d1a9989 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,19 +5,19 @@ on: jobs: goreleaser: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - name: Checkout uses: actions/checkout@v3 with: fetch-depth: 0 - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: '1.19' - name: Build - uses: goreleaser/goreleaser-action@v3 + uses: goreleaser/goreleaser-action@v4 if: ${{ github.event_name == 'pull_request' }} with: version: latest @@ -26,7 +26,7 @@ jobs: - run: echo https://github.com/tendermint/tendermint/blob/${GITHUB_REF#refs/tags/}/CHANGELOG.md#${GITHUB_REF#refs/tags/} > ../release_notes.md - name: Release - uses: goreleaser/goreleaser-action@v3 + uses: goreleaser/goreleaser-action@v4 if: startsWith(github.ref, 'refs/tags/') with: version: latest diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 51229cfb70..5b459b6353 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -5,9 +5,9 @@ on: jobs: stale: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - - uses: actions/stale@v6 + - uses: actions/stale@v7 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-pr-message: "This pull request has been automatically marked as stale because it has not had diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dc70d49e37..95077f93ba 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,13 +11,13 @@ on: - v0.*-dev jobs: tests: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: part: ["00", "01", "02", "03", "04", "05"] steps: - - uses: actions/setup-go@v3.4.0 + - uses: actions/setup-go@v3.5.0 with: go-version: "1.19" - uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index f5c2433353..2b86af10b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,41 @@ ## [0.11.0-dev.1] - 2022-12-15 +## [0.10.0-dev.7] - 2022-12-20 + +### Bug Fixes + +- Decrease log verbosity by logging send/recv logs on trace level (#533) +- Ensure process proposal was called on commit processing (#534) + +### Features + +- Seed connectivity tuning options (max-incoming-connection-time,incoming-connection-window) (#532) + +### Build + +- Improve release script (#522) + +## [0.10.0-dev.6] - 2022-12-15 + +### Bug Fixes + +- ProcessProposal executed twice for a block (#516) +- Proposer-based timestamp broken during backport (#523) +- Improve wal replay mechanism (#510) + +### Miscellaneous Tasks + +- [**breaking**] Rename genesis.json quorum fields (#515) +- [**breaking**] Remove Snapshot.core_chain_locked_height (#527) +- Update changelog and version to 0.10.0-dev.6 (#526) + +### Build + +- Bump actions/setup-go from 3.3.1 to 3.4.0 (#524) +- Bump bufbuild/buf-setup-action from 1.9.0 to 1.10.0 (#525) + +## [0.8.0] - 2022-12-07 + ### Bug Fixes - Commits received during state sync are lost (#513) diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md deleted file mode 100644 index 6443f16305..0000000000 --- a/CHANGELOG_PENDING.md +++ /dev/null @@ -1,104 +0,0 @@ -# Unreleased Changes - -Friendly reminder: We have a [bug bounty program](https://hackerone.com/cosmos). - -## v0.35.3 - -Month, DD, YYYY - -Special thanks to external contributors on this release: - -### BREAKING CHANGES - -- CLI/RPC/Config - - - [rpc] \#7121 Remove the deprecated gRPC interface to the RPC service. (@creachadair) - - [blocksync] \#7159 Remove support for disabling blocksync in any circumstance. (@tychoish) - - [mempool] \#7171 Remove legacy mempool implementation. (@tychoish) - - [rpc] \#7575 Rework how RPC responses are written back via HTTP. (@creachadair) - - [rpc] \#7713 Remove unused options for websocket clients. (@creachadair) - - [config] \#7930 Add new event subscription options and defaults. (@creachadair) - - [rpc] \#7982 Add new Events interface and deprecate Subscribe. (@creachadair) - - [cli] \#8081 make the reset command safe to use by intoducing `reset-state` command. Fixed by \#8259. (@marbar3778, @cmwaters) - - [config] \#8222 default indexer configuration to null. (@creachadair) - - [rpc] \#8570 rework timeouts to be per-method instead of global. (@creachadair) - - [rpc] \#8624 deprecate `broadcast_tx_commit` and `braodcast_tx_sync` and `broadcast_tx_async` in favor of `braodcast_tx`. (@tychoish) - - [config] \#8654 remove deprecated `seeds` field from config. Users should switch to `bootstrap-peers` instead. (@cmwaters) - -- Apps - - - [tendermint/spec] \#7804 Migrate spec from [spec repo](https://github.com/tendermint/spec). - - [abci] \#7984 Remove the locks preventing concurrent use of ABCI applications by Tendermint. (@tychoish) - - [abci] \#8605 Remove info, log, events, gasUsed and mempoolError fields from ResponseCheckTx as they are not used by Tendermint. (@jmalicevic) - - [abci] \#8664 Move `app_hash` parameter from `Commit` to `FinalizeBlock`. (@sergio-mena) - - [abci] \#8656 Added cli command for `PrepareProposal`. (@jmalicevic) - - [sink/psql] \#8637 tx_results emitted from psql sink are now json encoded, previously they were protobuf encoded - - [abci] \#8901 Added cli command for `ProcessProposal`. (@hvanz) - -- P2P Protocol - - - [p2p] \#7035 Remove legacy P2P routing implementation and associated configuration options. (@tychoish) - - [p2p] \#7265 Peer manager reduces peer score for each failed dial attempts for peers that have not successfully dialed. (@tychoish) - - [p2p] [\#7594](https://github.com/tendermint/tendermint/pull/7594) always advertise self, to enable mutual address discovery. (@altergui) - - [p2p] \#8737 Introduce "inactive" peer label to avoid re-dialing incompatible peers. (@tychoish) - - [p2p] \#8737 Increase frequency of dialing attempts to reduce latency for peer acquisition. (@tychoish) - - [p2p] \#8737 Improvements to peer scoring and sorting to gossip a greater variety of peers during PEX. (@tychoish) - - [p2p] \#8737 Track incoming and outgoing peers separately to ensure more peer slots open for incoming connections. (@tychoish) - -- Go API - - - [rpc] \#7474 Remove the "URI" RPC client. (@creachadair) - - [libs/pubsub] \#7451 Internalize the pubsub packages. (@creachadair) - - [libs/sync] \#7450 Internalize and remove the library. (@creachadair) - - [libs/async] \#7449 Move library to internal. (@creachadair) - - [pubsub] \#7231 Remove unbuffered subscriptions and rework the Subscription interface. (@creachadair) - - [eventbus] \#7231 Move the EventBus type to the internal/eventbus package. (@creachadair) - - [blocksync] \#7046 Remove v2 implementation of the blocksync service and recactor, which was disabled in the previous release. (@tychoish) - - [p2p] \#7064 Remove WDRR queue implementation. (@tychoish) - - [config] \#7169 `WriteConfigFile` now returns an error. (@tychoish) - - [libs/service] \#7288 Remove SetLogger method on `service.Service` interface. (@tychoish) - - [abci/client] \#7607 Simplify client interface (removes most "async" methods). (@creachadair) - - [libs/json] \#7673 Remove the libs/json (tmjson) library. (@creachadair) - - [crypto] \#8412 \#8432 Remove `crypto/tmhash` package in favor of small functions in `crypto` package and cleanup of unused functions. (@tychoish) - -- Blockchain Protocol - -### FEATURES - -- [rpc] [\#7270](https://github.com/tendermint/tendermint/pull/7270) Add `header` and `header_by_hash` RPC Client queries. (@fedekunze) -- [rpc] [\#7701] Add `ApplicationInfo` to `status` rpc call which contains the application version. (@jonasbostoen) -- [cli] [#7033](https://github.com/tendermint/tendermint/pull/7033) Add a `rollback` command to rollback to the previous tendermint state in the event of non-determinstic app hash or reverting an upgrade. -- [mempool, rpc] \#7041 Add removeTx operation to the RPC layer. (@tychoish) -- [consensus] \#7354 add a new `synchrony` field to the `ConsensusParams` struct for controlling the parameters of the proposer-based timestamp algorithm. (@williambanfield) -- [consensus] \#7376 Update the proposal logic per the Propose-based timestamps specification so that the proposer will wait for the previous block time to occur before proposing the next block. (@williambanfield) -- [consensus] \#7391 Use the proposed block timestamp as the proposal timestamp. Update the block validation logic to ensure that the proposed block's timestamp matches the timestamp in the proposal message. (@williambanfield) -- [consensus] \#7415 Update proposal validation logic to Prevote nil if a proposal does not meet the conditions for Timelyness per the proposer-based timestamp specification. (@anca) -- [consensus] \#7382 Update block validation to no longer require the block timestamp to be the median of the timestamps of the previous commit. (@anca) -- [consensus] \#7711 Use the proposer timestamp for the first height instead of the genesis time. Chains will still start consensus at the genesis time. (@anca) -- [cli] \#8281 Add a tool to update old config files to the latest version. (@creachadair) -- [consenus] \#8514 move `RecheckTx` from the local node mempool config to a global `ConsensusParams` field in `BlockParams` (@cmwaters) -- [abci] ABCI++ [specified](https://github.com/tendermint/tendermint/tree/master/spec/abci%2B%2B). (@sergio-mena, @cmwaters, @josef-widder) -- [abci] ABCI++ [implemented](https://github.com/orgs/tendermint/projects/9). (@williambanfield, @thanethomson, @sergio-mena) - -### IMPROVEMENTS - -- [internal/protoio] \#7325 Optimized `MarshalDelimited` by inlining the common case and using a `sync.Pool` in the worst case. (@odeke-em) -- [consensus] \#6969 remove logic to 'unlock' a locked block. -- [evidence] \#7700 Evidence messages contain single Evidence instead of EvidenceList (@jmalicevic) -- [evidence] \#7802 Evidence pool emits events when evidence is validated and updates a metric when the number of evidence in the evidence pool changes. (@jmalicevic) -- [pubsub] \#7319 Performance improvements for the event query API (@creachadair) -- [node] \#7521 Define concrete type for seed node implementation (@spacech1mp) -- [rpc] \#7612 paginate mempool /unconfirmed_txs rpc endpoint (@spacech1mp) -- [light] [\#7536](https://github.com/tendermint/tendermint/pull/7536) rpc /status call returns info about the light client (@jmalicevic) -- [types] \#7765 Replace EvidenceData with EvidenceList to avoid unnecessary nesting of evidence fields within a block. (@jmalicevic) - -### BUG FIXES - -- fix: assignment copies lock value in `BitArray.UnmarshalJSON()` (@lklimek) -- [light] \#7640 Light Client: fix absence proof verification (@ashcherbakov) -- [light] \#7641 Light Client: fix querying against the latest height (@ashcherbakov) -- [cli] [#7837](https://github.com/tendermint/tendermint/pull/7837) fix app hash in state rollback. (@yihuang) -- [cli] \#8276 scmigrate: ensure target key is correctly renamed. (@creachadair) -- [cli] \#8294 keymigrate: ensure block hash keys are correctly translated. (@creachadair) -- [cli] \#8352 keymigrate: ensure transaction hash keys are correctly translated. (@creachadair) -- (indexer) \#8625 Fix overriding tx index of duplicated txs. diff --git a/DOCKER/Dockerfile b/DOCKER/Dockerfile index 6f2226a40c..f8d919d26b 100644 --- a/DOCKER/Dockerfile +++ b/DOCKER/Dockerfile @@ -5,11 +5,26 @@ RUN apk update && \ apk upgrade && \ apk add bash git gmp-dev sudo cmake build-base python3-dev -COPY . /src - -WORKDIR /src - -RUN make build-bls +WORKDIR /src/tenderdash + +# Fetch dependencies separately (for layer caching) +# TODO: move below BLS install once on new BLS version +COPY go.mod go.sum ./ +RUN go mod download + +# Install BLS library +COPY third_party ./third_party +COPY test/Makefile ./test/Makefile +COPY Makefile ./ +RUN make install-bls + +# Copy Tenderdash source +# Avoid overwriting third-party libs +RUN mv third_party third_party.bak +COPY . . +RUN rm -r third_party && mv third_party.bak third_party + +ARG TENDERMINT_BUILD_OPTIONS=tenderdash RUN make build-linux # stage 2 @@ -44,7 +59,7 @@ EXPOSE 26656 26657 26660 STOPSIGNAL SIGTERM -COPY --from=builder /src/build/tenderdash /usr/bin/tenderdash +COPY --from=builder /src/tenderdash/build/tenderdash /usr/bin/tenderdash # You can overwrite these before the first run to influence # config.json and genesis.json. Additionally, you can override diff --git a/abci/cmd/abci-cli/abci-cli.go b/abci/cmd/abci-cli/abci-cli.go index bad0993c8b..f5c0f62e96 100644 --- a/abci/cmd/abci-cli/abci-cli.go +++ b/abci/cmd/abci-cli/abci-cli.go @@ -719,9 +719,13 @@ func makeKVStoreCmd(logger log.Logger) func(*cobra.Command, []string) error { err error ) if flagPersist == "" { - app, err = kvstore.NewMemoryApp() + app, err = kvstore.NewMemoryApp(kvstore.WithDuplicateRequestDetection(false)) } else { - app, err = kvstore.NewPersistentApp(kvstore.DefaultConfig(flagPersist), kvstore.WithLogger(logger.With("module", "kvstore"))) + app, err = kvstore.NewPersistentApp( + kvstore.DefaultConfig(flagPersist), + kvstore.WithLogger(logger.With("module", "kvstore")), + kvstore.WithDuplicateRequestDetection(false), + ) } if err != nil { return err diff --git a/abci/example/kvstore/kvstore.go b/abci/example/kvstore/kvstore.go index c9914207e3..e033eab545 100644 --- a/abci/example/kvstore/kvstore.go +++ b/abci/example/kvstore/kvstore.go @@ -46,7 +46,18 @@ type Application struct { // roundStates contains state for each round, indexed by roundKey() roundStates map[string]State RetainBlocks int64 // blocks to retain after commit (via ResponseCommit.RetainHeight) - logger log.Logger + + // preparedProposals stores info about all rounds that got PrepareProposal executed, used to detect + // duplicate PrepareProposal calls. + // If `nil`, duplicate call detection is disabled. + preparedProposals map[int32]bool + + // processedProposals stores info about all rounds that got ProcessProposal executed, used to detect + // duplicate ProcessProposal calls. + // If `nil`, duplicate call detection is disabled. + processedProposals map[int32]bool + + logger log.Logger validatorSetUpdates map[int64]abci.ValidatorSetUpdate consensusParamsUpdates map[int64]types1.ConsensusParams @@ -165,6 +176,15 @@ func WithPrepareTxsFunc(prepareTxs PrepareTxsFunc) OptFunc { } } +// WithDuplicateRequestDetection makes it possible to disable duplicate request detection. +// (enabled by default) +func WithDuplicateRequestDetection(enabled bool) OptFunc { + return func(app *Application) error { + app.resetDuplicateDetection(enabled) + return nil + } +} + // NewMemoryApp creates new Key/value store application that stores data to memory. // Data is lost when the app stops. // The application can be used for testing or as an example of ABCI @@ -182,6 +202,8 @@ func newApplication(stateStore StoreFactory, opts ...OptFunc) (*Application, err logger: log.NewNopLogger(), LastCommittedState: NewKvState(dbm.NewMemDB(), initialHeight), // initial state to avoid InitChain() in unit tests roundStates: map[string]State{}, + preparedProposals: map[int32]bool{}, + processedProposals: map[int32]bool{}, validatorSetUpdates: map[int64]abci.ValidatorSetUpdate{}, consensusParamsUpdates: map[int64]types1.ConsensusParams{}, initialHeight: initialHeight, @@ -268,6 +290,9 @@ func (app *Application) InitChain(_ context.Context, req *abci.RequestInitChain) if vsu == nil { return nil, errors.New("validator-set update cannot be nil") } + + app.resetDuplicateDetection(app.preparedProposals != nil && app.processedProposals != nil) + resp := &abci.ResponseInitChain{ AppHash: app.LastCommittedState.GetAppHash(), ConsensusParams: &consensusParams, @@ -289,6 +314,13 @@ func (app *Application) PrepareProposal(_ context.Context, req *abci.RequestPrep return &abci.ResponsePrepareProposal{}, fmt.Errorf("MaxTxBytes must be positive, got: %d", req.MaxTxBytes) } + if app.preparedProposals != nil { + if app.preparedProposals[req.Round] { + return &abci.ResponsePrepareProposal{}, fmt.Errorf("duplicate PrepareProposal call at height %d, round %d", req.Height, req.Round) + } + app.preparedProposals[req.Round] = true + } + txRecords, err := app.prepareTxs(*req) if err != nil { return &abci.ResponsePrepareProposal{}, err @@ -323,6 +355,13 @@ func (app *Application) ProcessProposal(_ context.Context, req *abci.RequestProc app.mu.Lock() defer app.mu.Unlock() + if app.processedProposals != nil { + if app.processedProposals[req.Round] { + return &abci.ResponseProcessProposal{}, fmt.Errorf("duplicate ProcessProposal call at height %d, round %d", req.Height, req.Round) + } + app.processedProposals[req.Round] = true + } + roundState, txResults, err := app.executeProposal(req.Height, req.Round, types.NewTxs(req.Txs)) if err != nil { return &abci.ResponseProcessProposal{ @@ -664,6 +703,8 @@ func (app *Application) newHeight(committedAppHash tmbytes.HexBytes, height int6 return err } + app.resetDuplicateDetection(app.preparedProposals != nil && app.processedProposals != nil) + app.resetRoundStates() if err := app.persistInterval(); err != nil { return err @@ -672,6 +713,16 @@ func (app *Application) newHeight(committedAppHash tmbytes.HexBytes, height int6 return nil } +func (app *Application) resetDuplicateDetection(enabled bool) { + if enabled { + app.preparedProposals = map[int32]bool{} + app.processedProposals = map[int32]bool{} + } else { + app.preparedProposals = nil + app.processedProposals = nil + } +} + // resetRoundStates closes and cleans up uncommitted round states func (app *Application) resetRoundStates() { for _, state := range app.roundStates { diff --git a/abci/example/kvstore/kvstore_test.go b/abci/example/kvstore/kvstore_test.go index 596e878c51..14a36d4a3c 100644 --- a/abci/example/kvstore/kvstore_test.go +++ b/abci/example/kvstore/kvstore_test.go @@ -43,6 +43,23 @@ func testKVStore(ctx context.Context, t *testing.T, app types.Application, tx [] require.Equal(t, 1, len(respPrep.TxResults)) require.False(t, respPrep.TxResults[0].IsErr(), respPrep.TxResults[0].Log) + // Duplicate PrepareProposal should return error + _, err = app.PrepareProposal(ctx, &reqPrep) + require.ErrorContains(t, err, "duplicate PrepareProposal call") + + reqProcess := &types.RequestProcessProposal{ + Txs: [][]byte{tx}, + Height: height, + } + respProcess, err := app.ProcessProposal(ctx, reqProcess) + require.NoError(t, err) + require.Len(t, respProcess.TxResults, 1) + require.False(t, respProcess.TxResults[0].IsErr(), respProcess.TxResults[0].Log) + + // Duplicate ProcessProposal calls should return error + _, err = app.ProcessProposal(ctx, reqProcess) + require.ErrorContains(t, err, "duplicate ProcessProposal call") + reqFin := &types.RequestFinalizeBlock{Height: height} reqFin.Block, reqFin.BlockID = makeBlock(t, height, [][]byte{tx}, respPrep.AppHash) respFin, err := app.FinalizeBlock(ctx, reqFin) diff --git a/abci/tests/test_cli/test.sh b/abci/tests/test_cli/test.sh index d160d59c9e..34e5a81265 100755 --- a/abci/tests/test_cli/test.sh +++ b/abci/tests/test_cli/test.sh @@ -18,7 +18,7 @@ function testExample() { echo "Example $N: $APP" $APP &> /dev/null & sleep 2 - abci-cli --log_level=error --verbose batch < "$INPUT" > "${INPUT}.out.new" + abci-cli --log_level=debug --verbose batch < "$INPUT" > "${INPUT}.out.new" killall "$3" pre=$(shasum < "${INPUT}.out") diff --git a/cmd/tenderdash/commands/rollback_test.go b/cmd/tenderdash/commands/rollback_test.go index 3f2fd824fa..55817cc9c4 100644 --- a/cmd/tenderdash/commands/rollback_test.go +++ b/cmd/tenderdash/commands/rollback_test.go @@ -28,7 +28,7 @@ func TestRollbackIntegration(t *testing.T) { require.NoError(t, err) cfg.BaseConfig.DBBackend = "goleveldb" - app, err := e2e.NewApplication(kvstore.DefaultConfig(dir)) + app, err := e2e.NewApplication(kvstore.DefaultConfig(dir), kvstore.WithDuplicateRequestDetection(false)) require.NoError(t, err) t.Run("First run", func(t *testing.T) { diff --git a/cmd/tenderdash/commands/version.go b/cmd/tenderdash/commands/version.go index d1a7fba582..4927ea1348 100644 --- a/cmd/tenderdash/commands/version.go +++ b/cmd/tenderdash/commands/version.go @@ -1,18 +1,33 @@ package commands import ( - "fmt" + "runtime" + "github.com/sasha-s/go-deadlock" "github.com/spf13/cobra" "github.com/tendermint/tendermint/version" ) // VersionCmd ... -var VersionCmd = &cobra.Command{ - Use: "version", - Short: "Show version info", - Run: func(cmd *cobra.Command, args []string) { - fmt.Println(version.TMCoreSemVer) - }, -} +var VersionCmd *cobra.Command = func() *cobra.Command { + verbose := false + cmd := &cobra.Command{ + Use: "version", + Short: "Show version info", + + Run: func(cmd *cobra.Command, args []string) { + cmd.Println(version.TMCoreSemVer) + if verbose { + cmd.Println("Go version: " + runtime.Version()) + if deadlock.Opts.Disable { + cmd.Println("Deadlock detection: disabled") + } else { + cmd.Println("Deadlock detection: enabled, timeout: ", deadlock.Opts.DeadlockTimeout.String()) + } + } + }, + } + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "display additional compilation info") + return cmd +}() diff --git a/config/config.go b/config/config.go index 3a48dedc64..702f3e54dd 100644 --- a/config/config.go +++ b/config/config.go @@ -668,6 +668,16 @@ type P2PConfig struct { //nolint: maligned // attempts per IP address. MaxIncomingConnectionAttempts uint `mapstructure:"max-incoming-connection-attempts"` + // MaxIncomingConnectionTime limits maximum duration after which incoming peer will be evicted. + // Defaults to 0 which disables this mechanism. + // Used on seed nodes to evict peers and make space for others. + MaxIncomingConnectionTime time.Duration `mapstructure:"max-incoming-connection-time"` + + // IncomingConnectionWindow describes how often an IP address + // can attempt to create a new connection. Defaults to 10 + // milliseconds, and cannot be less than 1 millisecond. + IncomingConnectionWindow time.Duration `mapstructure:"incoming-connection-window"` + // Comma separated list of peer IDs to keep private (will not be gossiped to // other peers) PrivatePeerIDs string `mapstructure:"private-peer-ids"` @@ -703,6 +713,8 @@ func DefaultP2PConfig() *P2PConfig { MaxConnections: 64, MaxOutgoingConnections: 12, MaxIncomingConnectionAttempts: 100, + MaxIncomingConnectionTime: 0, + IncomingConnectionWindow: 10 * time.Millisecond, FlushThrottleTimeout: 100 * time.Millisecond, // The MTU (Maximum Transmission Unit) for Ethernet is 1500 bytes. // The IP header and the TCP header take up 20 bytes each at least (unless @@ -736,6 +748,12 @@ func (cfg *P2PConfig) ValidateBasic() error { if cfg.MaxOutgoingConnections > cfg.MaxConnections { return errors.New("max-outgoing-connections cannot be larger than max-connections") } + if cfg.MaxIncomingConnectionTime < 0 { + return errors.New("max-incoming-connection-time can't be negative") + } + if cfg.IncomingConnectionWindow < 1*time.Millisecond { + return errors.New("incoming-connection-window must be set to at least 1ms") + } return nil } diff --git a/config/toml.go b/config/toml.go index 77f9abb42f..08dee49949 100644 --- a/config/toml.go +++ b/config/toml.go @@ -328,6 +328,16 @@ max-outgoing-connections = {{ .P2P.MaxOutgoingConnections }} # Rate limits the number of incoming connection attempts per IP address. max-incoming-connection-attempts = {{ .P2P.MaxIncomingConnectionAttempts }} +# Limits maximum duration after which incoming peer will be evicted. +# Defaults to 0 which disables this mechanism. +# Used on seed nodes to evict peers and make space for others. +max-incoming-connection-time = "{{ .P2P.MaxIncomingConnectionTime }}" + +# incoming-connection-window describes how often an IP address +# can attempt to create a new connection. Defaults to 10 +# milliseconds, and cannot be less than 1 millisecond. +incoming-connection-window = "{{ .P2P.IncomingConnectionWindow }}" + # Comma separated list of peer IDs to keep private (will not be gossiped to other peers) # Warning: IPs will be exposed at /net_info, for more information https://github.com/tendermint/tendermint/issues/3055 private-peer-ids = "{{ .P2P.PrivatePeerIDs }}" diff --git a/internal/blocksync/pool.go b/internal/blocksync/pool.go index 1732b4751e..62879dac45 100644 --- a/internal/blocksync/pool.go +++ b/internal/blocksync/pool.go @@ -49,6 +49,11 @@ const ( var peerTimeout = 15 * time.Second // not const so we can override with tests +var ( + errPeerNotResponded = errors.New("peer did not send us anything") + errUnableToFindPeer = errors.New("unable to find a peer, a requester is stopped") +) + /* Peers self report their heights when we join the block pool. Starting from our latest pool.height, we request blocks @@ -547,9 +552,8 @@ func (peer *bpPeer) onTimeout() { peer.pool.mtx.Lock() defer peer.pool.mtx.Unlock() - err := errors.New("peer did not send us anything") - peer.pool.sendError(err, peer.id) - peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) + peer.pool.sendError(errPeerNotResponded, peer.id) + peer.logger.Error("SendTimeout", "reason", errPeerNotResponded, "timeout", peerTimeout) peer.didTimeout = true } @@ -591,19 +595,25 @@ func (bpr *bpRequester) OnStart(ctx context.Context) error { func (*bpRequester) OnStop() {} -// Returns true if the peer matches and block doesn't already exist. -func (bpr *bpRequester) setBlock(block *types.Block, commit *types.Commit, peerID types.NodeID) bool { +func (bpr *bpRequester) updateBlock(block *types.Block, commit *types.Commit, peerID types.NodeID) bool { bpr.mtx.Lock() + defer bpr.mtx.Unlock() if bpr.block != nil || bpr.peerID != peerID { - bpr.mtx.Unlock() return false } bpr.block = block if commit != nil { bpr.commit = commit } - bpr.mtx.Unlock() + return true +} +// Returns true if the peer matches and block doesn't already exist. +func (bpr *bpRequester) setBlock(block *types.Block, commit *types.Commit, peerID types.NodeID) bool { + updated := bpr.updateBlock(block, commit, peerID) + if !updated { + return false + } select { case bpr.gotBlockCh <- struct{}{}: default: @@ -656,52 +666,63 @@ func (bpr *bpRequester) redo(peerID types.NodeID) { // Responsible for making more requests as necessary // Returns only when a block is found (e.g. AddBlock() is called) func (bpr *bpRequester) requestRoutine(ctx context.Context) { -OUTER_LOOP: - for { + for bpr.isReqRoutineRunning() { // Pick a peer to send request to. - var peer *bpPeer - PICK_PEER_LOOP: - for { - if !bpr.IsRunning() || !bpr.pool.IsRunning() { - return - } - if ctx.Err() != nil { - return - } - - peer = bpr.pool.pickIncrAvailablePeer(bpr.height) - if peer == nil { - // This is preferable to using a timer because the request - // interval is so small. Larger request intervals may - // necessitate using a timer/ticker. - time.Sleep(requestInterval) - continue PICK_PEER_LOOP - } - break PICK_PEER_LOOP + peer, err := bpr.findPeer(ctx) + if err != nil { + return } - bpr.mtx.Lock() - bpr.peerID = peer.id - bpr.mtx.Unlock() - + bpr.updatePeerID(peer) // Send request and wait. bpr.pool.sendRequest(bpr.height, peer.id) - WAIT_LOOP: - for { - select { - case <-ctx.Done(): - return - case peerID := <-bpr.redoCh: - if peerID == bpr.peerID { - bpr.reset() - continue OUTER_LOOP - } else { - continue WAIT_LOOP - } - case <-bpr.gotBlockCh: - // We got a block! - // Continue the for-loop and wait til Quit. - continue WAIT_LOOP + shouldStop := bpr.waitForResponse(ctx) + if shouldStop { + return + } + } +} + +func (bpr *bpRequester) isReqRoutineRunning() bool { + return bpr.IsRunning() && bpr.pool.IsRunning() +} + +func (bpr *bpRequester) updatePeerID(peer *bpPeer) { + bpr.mtx.Lock() + defer bpr.mtx.Unlock() + bpr.peerID = peer.id +} + +func (bpr *bpRequester) findPeer(ctx context.Context) (*bpPeer, error) { + var peer *bpPeer + for bpr.isReqRoutineRunning() { + if ctx.Err() != nil { + return nil, ctx.Err() + } + peer = bpr.pool.pickIncrAvailablePeer(bpr.height) + if peer != nil { + return peer, nil + } + // This is preferable to using a timer because the request + // interval is so small. Larger request intervals may + // necessitate using a timer/ticker. + time.Sleep(requestInterval) + } + return nil, errUnableToFindPeer +} + +func (bpr *bpRequester) waitForResponse(ctx context.Context) bool { + for { + select { + case <-ctx.Done(): + return true + case peerID := <-bpr.redoCh: + if peerID == bpr.peerID { + bpr.reset() + return false } + case <-bpr.gotBlockCh: + // We got a block! + return true } } } diff --git a/internal/blocksync/reactor.go b/internal/blocksync/reactor.go index 95f7ba6fc8..0d124e7146 100644 --- a/internal/blocksync/reactor.go +++ b/internal/blocksync/reactor.go @@ -173,7 +173,7 @@ func (r *Reactor) OnStart(ctx context.Context) error { } go r.processBlockSyncCh(ctx, blockSyncCh) - go r.processPeerUpdates(ctx, r.peerEvents(ctx), blockSyncCh) + go r.processPeerUpdates(ctx, r.peerEvents(ctx, "blocksync"), blockSyncCh) return nil } diff --git a/internal/blocksync/reactor_test.go b/internal/blocksync/reactor_test.go index 29dae706b8..a34e9a4b62 100644 --- a/internal/blocksync/reactor_test.go +++ b/internal/blocksync/reactor_test.go @@ -2,6 +2,7 @@ package blocksync import ( "context" + "fmt" "os" "testing" "time" @@ -173,7 +174,7 @@ func (rts *reactorTestSuite) addNode( require.NoError(t, rts.app[nodeID].Start(ctx)) rts.peerChans[nodeID] = make(chan p2p.PeerUpdate) - rts.peerUpdates[nodeID] = p2p.NewPeerUpdates(rts.peerChans[nodeID], 1) + rts.peerUpdates[nodeID] = p2p.NewPeerUpdates(rts.peerChans[nodeID], 1, "blocksync") rts.network.Nodes[nodeID].PeerManager.Register(ctx, rts.peerUpdates[nodeID]) chCreator := func(ctx context.Context, chdesc *p2p.ChannelDescriptor) (p2p.Channel, error) { @@ -181,7 +182,7 @@ func (rts *reactorTestSuite) addNode( } proTxHash := rts.network.Nodes[nodeID].NodeInfo.ProTxHash - peerEvents := func(ctx context.Context) *p2p.PeerUpdates { return rts.peerUpdates[nodeID] } + peerEvents := func(ctx context.Context, _ string) *p2p.PeerUpdates { return rts.peerUpdates[nodeID] } reactor := makeReactor(ctx, t, proTxHash, nodeID, genDoc, privVal, chCreator, peerEvents) commit := types.NewCommit(0, 0, types.BlockID{}, nil) @@ -354,13 +355,12 @@ func TestReactor_NoBlockResponse(t *testing.T) { "expected node to be fully synced", ) - for _, tc := range testCases { - block := rts.reactors[rts.nodes[1]].store.LoadBlock(tc.height) - if tc.existent { - require.True(t, block != nil) - } else { - require.Nil(t, block) - } + reactor := rts.reactors[rts.nodes[1]] + for i, tc := range testCases { + t.Run(fmt.Sprintf("test-case #%d", i), func(t *testing.T) { + block := reactor.store.LoadBlock(tc.height) + require.Equal(t, tc.existent, block != nil) + }) } } diff --git a/internal/consensus/common_test.go b/internal/consensus/common_test.go index 56982c01f3..c8137a4704 100644 --- a/internal/consensus/common_test.go +++ b/internal/consensus/common_test.go @@ -545,9 +545,11 @@ func makeState(ctx context.Context, t *testing.T, args makeStateArgs) (*State, [ validators = args.validators } var app abci.Application - app, err := kvstore.NewMemoryApp() - require.NoError(t, err) - if args.application != nil { + if args.application == nil { + var err error + app, err = kvstore.NewMemoryApp() + require.NoError(t, err) + } else { app = args.application } if args.config == nil { diff --git a/internal/consensus/helper_test.go b/internal/consensus/helper_test.go index 6fafe834b6..734496aaee 100644 --- a/internal/consensus/helper_test.go +++ b/internal/consensus/helper_test.go @@ -25,14 +25,15 @@ import ( ) type nodeGen struct { - cfg *config.Config - app abci.Application - logger log.Logger - state *sm.State - storeDB dbm.DB - mempool mempool.Mempool - proxyApp abciclient.Client - eventBus *eventbus.EventBus + cfg *config.Config + app abci.Application + logger log.Logger + state *sm.State + storeDB dbm.DB + mempool mempool.Mempool + proxyApp abciclient.Client + eventBus *eventbus.EventBus + stateOpts []StateOption } func (g *nodeGen) initState(t *testing.T) { @@ -105,7 +106,7 @@ func (g *nodeGen) Generate(ctx context.Context, t *testing.T) *fakeNode { blockStore, g.eventBus, ) - csState, err := NewState(g.logger, g.cfg.Consensus, stateStore, blockExec, blockStore, g.mempool, evpool, g.eventBus) + csState, err := NewState(g.logger, g.cfg.Consensus, stateStore, blockExec, blockStore, g.mempool, evpool, g.eventBus, g.stateOpts...) require.NoError(t, err) privValidator := privval.MustLoadOrGenFilePVFromConfig(g.cfg) @@ -238,3 +239,9 @@ func (c *ChainGenerator) Generate(ctx context.Context, t *testing.T) Chain { chain.GenesisState.Validators = valSet return chain } + +func stopConsensusAtHeight(height int64, round int32) func(cs *State) bool { + return func(cs *State) bool { + return cs.Height == height && cs.Round == round + } +} diff --git a/internal/consensus/reactor.go b/internal/consensus/reactor.go index ac8844140d..b46e03e662 100644 --- a/internal/consensus/reactor.go +++ b/internal/consensus/reactor.go @@ -175,7 +175,7 @@ type channelBundle struct { func (r *Reactor) OnStart(ctx context.Context) error { r.logger.Debug("consensus wait sync", "wait_sync", r.WaitSync()) - peerUpdates := r.peerEvents(ctx) + peerUpdates := r.peerEvents(ctx, "consensus") var chBundle channelBundle var err error diff --git a/internal/consensus/reactor_test.go b/internal/consensus/reactor_test.go index dc20f2ac0a..8c40b5dcbc 100644 --- a/internal/consensus/reactor_test.go +++ b/internal/consensus/reactor_test.go @@ -124,7 +124,7 @@ func setup( state.logger.With("node", nodeID), state, chCreator(nodeID), - func(ctx context.Context) *p2p.PeerUpdates { return node.MakePeerUpdates(ctx, t) }, + func(ctx context.Context, _ string) *p2p.PeerUpdates { return node.MakePeerUpdates(ctx, t) }, state.eventBus, true, NopMetrics(), diff --git a/internal/consensus/replay_test.go b/internal/consensus/replay_test.go index c0b36f7f9e..e8a121c141 100644 --- a/internal/consensus/replay_test.go +++ b/internal/consensus/replay_test.go @@ -1354,10 +1354,21 @@ func TestHandshakeInitialCoreLockHeight(t *testing.T) { func TestWALRoundsSkipper(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + const ( + chainLen int64 = 5 + maxRound int32 = 10 + ) cfg := getConfig(t) cfg.Consensus.WalSkipRoundsToLast = true - logger := log.NewNopLogger() - ng := nodeGen{cfg: cfg, logger: logger} + logger := log.NewTestingLogger(t) + ng := nodeGen{ + cfg: cfg, + logger: logger, + stateOpts: []StateOption{WithStopFunc( + stopConsensusAtHeight(chainLen+1, 0), + stopConsensusAtHeight(chainLen, maxRound+1), + )}, + } node := ng.Generate(ctx, t) originDoPrevote := node.csState.doPrevote node.csState.doPrevote = func(ctx context.Context, height int64, round int32, allowOldBlocks bool) { @@ -1367,10 +1378,6 @@ func TestWALRoundsSkipper(t *testing.T) { } originDoPrevote(ctx, height, round, allowOldBlocks) } - const ( - chainLen int64 = 5 - maxRound int32 = 10 - ) walBody, err := WALWithNBlocks(ctx, t, logger, node, chainLen) require.NoError(t, err) walFile := tempWALWithData(t, walBody) @@ -1413,11 +1420,11 @@ func TestWALRoundsSkipper(t *testing.T) { blockStore, ) - cs := newStateWithConfigAndBlockStore(ctx, t, logger, cfg, state, privVal, app, blockStore) + cs := newStateWithConfigAndBlockStore(ctx, t, log.NewTestingLogger(t), cfg, state, privVal, app, blockStore) commit := blockStore.commits[len(blockStore.commits)-1] require.Equal(t, int64(4), commit.Height) - require.Equal(t, int32(10), commit.Round) + require.Equal(t, maxRound, commit.Round) require.NoError(t, cs.Start(ctx)) defer cs.Stop() @@ -1428,7 +1435,7 @@ func TestWALRoundsSkipper(t *testing.T) { Query: types.EventQueryNewBlock, }) require.NoError(t, err) - ctxto, cancel := context.WithTimeout(ctx, 120*time.Second) + ctxto, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() msg, err := newBlockSub.Next(ctxto) require.NoError(t, err) diff --git a/internal/consensus/state.go b/internal/consensus/state.go index 5343666a7d..b8e52d35a0 100644 --- a/internal/consensus/state.go +++ b/internal/consensus/state.go @@ -182,6 +182,8 @@ type State struct { // wait the channel event happening for shutting down the state gracefully onStopCh chan *cstypes.RoundState + + stopFn func(cs *State) bool } // StateOption sets an optional parameter on the State. @@ -193,6 +195,21 @@ func SkipStateStoreBootstrap(sm *State) { sm.skipBootstrapping = true } +func WithStopFunc(stopFns ...func(cs *State) bool) func(cs *State) { + return func(cs *State) { + // we assume that even if one function returns true, then the consensus must be stopped + cs.stopFn = func(cs *State) bool { + for _, fn := range stopFns { + ret := fn(cs) + if ret { + return true + } + } + return false + } + } +} + // NewState returns a new State. func NewState( logger log.Logger, @@ -466,7 +483,7 @@ func (cs *State) OnStart(ctx context.Context) error { } // now start the receiveRoutine - go cs.receiveRoutine(ctx, 0) + go cs.receiveRoutine(ctx, cs.stopFn) // schedule the first round! // use GetRoundState so we don't race the receiveRoutine for access @@ -486,7 +503,7 @@ func (cs *State) startRoutines(ctx context.Context, maxSteps int) { return } - go cs.receiveRoutine(ctx, maxSteps) + go cs.receiveRoutine(ctx, stopStateByMaxStepFunc(maxSteps)) } // loadWalFile loads WAL data from file. It overwrites cs.wal. @@ -888,7 +905,7 @@ func (cs *State) newStep() { // It keeps the RoundState and is the only thing that updates it. // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. // State must be locked before any internal state is updated. -func (cs *State) receiveRoutine(ctx context.Context, maxSteps int) { +func (cs *State) receiveRoutine(ctx context.Context, stopFn func(*State) bool) { onExit := func(cs *State) { // NOTE: the internalMsgQueue may have signed messages from our // priv_val that haven't hit the WAL, but its ok because @@ -943,12 +960,8 @@ func (cs *State) receiveRoutine(ctx context.Context, maxSteps int) { }() for { - if maxSteps > 0 { - if cs.nSteps >= maxSteps { - cs.logger.Debug("reached max steps; exiting receive routine") - cs.nSteps = 0 - return - } + if stopFn != nil && stopFn(cs) { + return } rs := cs.GetRoundState() @@ -1017,12 +1030,6 @@ func (cs *State) handleMsg(ctx context.Context, mi msgInfo, fromReplay bool) { // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit added, err = cs.addProposalBlockPart(ctx, msg, peerID) - if added && cs.ProposalBlockParts != nil && cs.ProposalBlockParts.IsComplete() && fromReplay { - if err := cs.ensureProcessProposal(ctx, cs.ProposalBlock, msg.Round, cs.state); err != nil { - panic(err) - } - } - // We unlock here to yield to any routines that need to read the the RoundState. // Previously, this code held the lock from the point at which the final block // part was received until the block executed against the application. @@ -1712,6 +1719,12 @@ func (cs *State) defaultDoPrevote(ctx context.Context, height int64, round int32 // Unknown error, so we panic panic(fmt.Sprintf("ProcessProposal: %v", err)) } + + // Validate the block. + if err := cs.blockExec.ValidateBlockWithRoundState(ctx, cs.state, cs.CurrentRoundState, cs.ProposalBlock); err != nil { + panic(fmt.Sprintf("prevote on invalid block: %v", err)) + } + cs.metrics.MarkProposalProcessed(true) /* @@ -2099,10 +2112,6 @@ func (cs *State) finalizeCommit(ctx context.Context, height int64) { panic("cannot finalize commit; proposal block does not hash to commit hash") } - if err := cs.blockExec.ValidateBlockWithRoundState(ctx, cs.state, cs.CurrentRoundState, block); err != nil { - panic(fmt.Errorf("+2/3 committed an invalid block %X: %w", cs.CurrentRoundState.AppHash, err)) - } - logger.Info( "finalizing commit of block", "hash", tmstrings.LazyBlockHash(block), @@ -2254,6 +2263,11 @@ func (cs *State) verifyCommit(ctx context.Context, commit *types.Commit, peerID return false, fmt.Errorf("cannot finalize commit; proposal block does not hash to commit hash") } + // We have a correct block, let's process it before applying the commit + if err := cs.ensureProcessProposal(ctx, block, commit.Round, cs.state); err != nil { + return false, fmt.Errorf("unable to process proposal: %w", err) + } + if err := cs.blockExec.ValidateBlockWithRoundState(ctx, cs.state, cs.CurrentRoundState, block); err != nil { return false, fmt.Errorf("+2/3 committed an invalid block: %w", err) } @@ -2303,16 +2317,28 @@ func (cs *State) applyCommit(ctx context.Context, commit *types.Commit, logger l ) block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts - // Save to blockStore. + if commit != nil { height = commit.Height round = commit.Round - cs.blockStore.SaveBlock(block, blockParts, commit) } else { height = cs.Height round = cs.Round } + if err := cs.ensureProcessProposal(ctx, block, round, cs.state); err != nil { + panic("cannot finalize commit; cannot process proposal block: " + err.Error()) + } + + if err := cs.blockExec.ValidateBlockWithRoundState(ctx, cs.state, cs.CurrentRoundState, block); err != nil { + panic(fmt.Errorf("+2/3 committed an invalid block %X: %w", cs.CurrentRoundState.AppHash, err)) + } + + // Save to blockStore. + if commit != nil { + cs.blockStore.SaveBlock(block, blockParts, commit) + } + // Write EndHeightMessage{} for this height, implying that the blockstore // has saved the block. // @@ -2334,13 +2360,6 @@ func (cs *State) applyCommit(ctx context.Context, commit *types.Commit, logger l )) } - // Execute and commit the block, update and save the state, and update the mempool. - // NOTE The block.AppHash wont reflect these txs until the next block. - if err := cs.ensureProcessProposal(ctx, block, round, cs.state); err != nil { - logger.Error("cannot apply commit", "error", err) - return - } - // Create a copy of the state for staging and an event cache for txs. stateCopy := cs.state.Copy() rs := cs.RoundState @@ -3200,3 +3219,14 @@ func (pv *privValidator) init(ctx context.Context) error { pv.ProTxHash, err = pv.GetProTxHash(ctx) return err } + +func stopStateByMaxStepFunc(maxSteps int) func(cs *State) bool { + return func(cs *State) bool { + if maxSteps > 0 && cs.nSteps >= maxSteps { + cs.logger.Debug("reached max steps; exiting receive routine") + cs.nSteps = 0 + return true + } + return false + } +} diff --git a/internal/consensus/state_test.go b/internal/consensus/state_test.go index 82cb1576b6..e538d7cd03 100644 --- a/internal/consensus/state_test.go +++ b/internal/consensus/state_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/tendermint/tendermint/abci/example/kvstore" abci "github.com/tendermint/tendermint/abci/types" abcimocks "github.com/tendermint/tendermint/abci/types/mocks" "github.com/tendermint/tendermint/crypto" @@ -20,6 +21,9 @@ import ( "github.com/tendermint/tendermint/internal/mempool" tmpubsub "github.com/tendermint/tendermint/internal/pubsub" tmquery "github.com/tendermint/tendermint/internal/pubsub/query" + sf "github.com/tendermint/tendermint/internal/state/test/factory" + "github.com/tendermint/tendermint/internal/test/factory" + tmbytes "github.com/tendermint/tendermint/libs/bytes" "github.com/tendermint/tendermint/libs/log" tmrand "github.com/tendermint/tendermint/libs/rand" @@ -262,7 +266,9 @@ func TestStateProposalTime(t *testing.T) { config := configSetup(t) - cs1, _ := makeState(ctx, t, makeStateArgs{config: config, validators: 1}) + app, err := kvstore.NewMemoryApp(kvstore.WithDuplicateRequestDetection(false)) + require.NoError(t, err) + cs1, _ := makeState(ctx, t, makeStateArgs{config: config, validators: 1, application: app}) cs1.config.DontAutoPropose = true cs1.config.CreateEmptyBlocksInterval = 0 cs1.state.ConsensusParams.Synchrony.MessageDelay = 5 * time.Millisecond @@ -3027,6 +3033,71 @@ func TestStateTimestamp_ProposalNotMatch(t *testing.T) { validatePrecommit(ctx, t, cs1, round, -1, vss[0], nil, nil) } +// TestStateTryAddCommitCallsProcessProposal ensures that CurrentRoundState is updated by calling Process Proposal +// before commit received from peer is verified. +// +// Proposer generates a proposal, creates a commit and sends it to otherNode. OtherNode correctly verifies the commit. +// +// This test ensures that a bug "2/3 committed an invalid block" when processing received commits will not reappear. +func TestStateTryAddCommitCallsProcessProposal(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + config := configSetup(t) + + css := makeConsensusState( + ctx, + t, + config, + 2, + t.Name(), + newTickerFunc(), + ) + privvals := []types.PrivValidator{} + for _, c := range css { + privvals = append(privvals, c.privValidator.PrivValidator) + } + proposer := css[0] + otherNode := css[1] + + block, err := sf.MakeBlock(proposer.state, 1, &types.Commit{}, kvstore.ProtocolVersion) + require.NoError(t, err) + block.CoreChainLockedHeight = 1 + + commit, err := factory.MakeCommit( + ctx, + block.BlockID(nil), + block.Height, + 0, + proposer.Votes.Precommits(0), + proposer.Validators, + privvals, + block.StateID(), + ) + require.NoError(t, err) + + proposal := types.NewProposal( + block.Height, + block.CoreChainLockedHeight, + 0, + -1, + commit.BlockID, + block.Time) + + parts, err := block.MakePartSet(999999999) + require.NoError(t, err) + + peerID := proposer.Validators.Proposer.NodeAddress.NodeID + otherNode.Proposal = proposal + otherNode.ProposalBlock = block + otherNode.ProposalBlockParts = parts + otherNode.updateRoundStep(commit.Round, cstypes.RoundStepPrevote) + + // This is where error "2/3 committed an invalid block" occurred before + added, err := otherNode.tryAddCommit(ctx, commit, peerID) + assert.True(t, added) + assert.NoError(t, err) +} + // TestStateTimestamp_ProposalMatch tests that a validator prevotes a // proposed block if the timestamp in the block matches the timestamp in the // corresponding proposal message. diff --git a/internal/evidence/reactor.go b/internal/evidence/reactor.go index 080203ac75..f03bc89780 100644 --- a/internal/evidence/reactor.go +++ b/internal/evidence/reactor.go @@ -90,7 +90,7 @@ func (r *Reactor) OnStart(ctx context.Context) error { } go r.processEvidenceCh(ctx, ch) - go r.processPeerUpdates(ctx, r.peerEvents(ctx), ch) + go r.processPeerUpdates(ctx, r.peerEvents(ctx, "evidence"), ch) return nil } diff --git a/internal/evidence/reactor_test.go b/internal/evidence/reactor_test.go index fd29003a2e..d1a0b30f39 100644 --- a/internal/evidence/reactor_test.go +++ b/internal/evidence/reactor_test.go @@ -93,7 +93,7 @@ func setup(ctx context.Context, t *testing.T, stateStores []sm.Store) *reactorTe require.NoError(t, err) rts.peerChans[nodeID] = make(chan p2p.PeerUpdate) - pu := p2p.NewPeerUpdates(rts.peerChans[nodeID], 1) + pu := p2p.NewPeerUpdates(rts.peerChans[nodeID], 1, "evidence") rts.peerUpdates[nodeID] = pu rts.network.Nodes[nodeID].PeerManager.Register(ctx, pu) rts.nodes = append(rts.nodes, rts.network.Nodes[nodeID]) @@ -105,7 +105,7 @@ func setup(ctx context.Context, t *testing.T, stateStores []sm.Store) *reactorTe rts.reactors[nodeID] = evidence.NewReactor( logger, chCreator, - func(ctx context.Context) *p2p.PeerUpdates { return pu }, + func(ctx context.Context, _ string) *p2p.PeerUpdates { return pu }, rts.pools[nodeID]) require.NoError(t, rts.reactors[nodeID].Start(ctx)) diff --git a/internal/libs/sync/waker.go b/internal/libs/sync/waker.go index 0aff3ddf83..4aa78dc64c 100644 --- a/internal/libs/sync/waker.go +++ b/internal/libs/sync/waker.go @@ -1,10 +1,17 @@ package sync +import ( + "sync" + "time" +) + // Waker is used to wake up a sleeper when some event occurs. It debounces // multiple wakeup calls occurring between each sleep, and wakeups are // non-blocking to avoid having to coordinate goroutines. type Waker struct { wakeCh chan struct{} + mtx sync.Mutex + timers []*time.Timer } // NewWaker creates a new Waker. @@ -28,3 +35,24 @@ func (w *Waker) Wake() { default: } } + +// WakeAfter wakes up the sleeper after some delay. +func (w *Waker) WakeAfter(delay time.Duration) { + w.mtx.Lock() + defer w.mtx.Unlock() + + w.timers = append(w.timers, time.AfterFunc(delay, w.Wake)) +} + +// Close closes the waker and cleans up its resources +func (w *Waker) Close() error { + w.mtx.Lock() + defer w.mtx.Unlock() + + for _, timer := range w.timers { + if timer != nil { + timer.Stop() + } + } + return nil +} diff --git a/internal/mempool/reactor.go b/internal/mempool/reactor.go index f68c852dbd..db47ed068f 100644 --- a/internal/mempool/reactor.go +++ b/internal/mempool/reactor.go @@ -105,7 +105,7 @@ func (r *Reactor) OnStart(ctx context.Context) error { } go r.processMempoolCh(ctx, ch) - go r.processPeerUpdates(ctx, r.peerEvents(ctx), ch) + go r.processPeerUpdates(ctx, r.peerEvents(ctx, "mempool"), ch) return nil } diff --git a/internal/mempool/reactor_test.go b/internal/mempool/reactor_test.go index 22c418bc36..5d72541e33 100644 --- a/internal/mempool/reactor_test.go +++ b/internal/mempool/reactor_test.go @@ -73,7 +73,7 @@ func setupReactors(ctx context.Context, t *testing.T, logger log.Logger, numNode rts.mempools[nodeID] = mempool rts.peerChans[nodeID] = make(chan p2p.PeerUpdate, chBuf) - rts.peerUpdates[nodeID] = p2p.NewPeerUpdates(rts.peerChans[nodeID], 1) + rts.peerUpdates[nodeID] = p2p.NewPeerUpdates(rts.peerChans[nodeID], 1, "mempool") rts.network.Nodes[nodeID].PeerManager.Register(ctx, rts.peerUpdates[nodeID]) chCreator := func(ctx context.Context, chDesc *p2p.ChannelDescriptor) (p2p.Channel, error) { @@ -85,7 +85,7 @@ func setupReactors(ctx context.Context, t *testing.T, logger log.Logger, numNode cfg.Mempool, mempool, chCreator, - func(ctx context.Context) *p2p.PeerUpdates { return rts.peerUpdates[nodeID] }, + func(ctx context.Context, n string) *p2p.PeerUpdates { return rts.peerUpdates[nodeID] }, ) rts.nodes = append(rts.nodes, nodeID) diff --git a/internal/p2p/conn/connection.go b/internal/p2p/conn/connection.go index 6a5b57f865..8f54984ef3 100644 --- a/internal/p2p/conn/connection.go +++ b/internal/p2p/conn/connection.go @@ -279,7 +279,7 @@ func (c *MConnection) String() string { } func (c *MConnection) flush() { - c.logger.Debug("Flush", "conn", c) + c.logger.Trace("Flush", "conn", c) err := c.bufConnWriter.Flush() if err != nil { c.logger.Debug("MConnection flush failed", "err", err) @@ -310,7 +310,7 @@ func (c *MConnection) Send(chID ChannelID, msgBytes []byte) bool { return false } - c.logger.Debug("Send", "channel", chID, "conn", c, "msgBytes", msgBytes) + c.logger.Trace("Send", "channel", chID, "conn", c, "msgBytes", msgBytes) // Send message to channel. channel, ok := c.channelsIdx[chID] @@ -563,7 +563,7 @@ FOR_LOOP: break FOR_LOOP } if msgBytes != nil { - c.logger.Debug("Received bytes", "chID", channelID, "msgBytes", msgBytes) + c.logger.Trace("Received bytes", "chID", channelID, "msgBytes", msgBytes) // NOTE: This means the reactor.Receive runs in the same thread as the p2p recv routine c.onReceive(ctx, channelID, msgBytes) } @@ -733,7 +733,7 @@ func (ch *channel) writePacketMsgTo(w io.Writer) (n int, err error) { // complete, which is owned by the caller and will not be modified. // Not goroutine-safe func (ch *channel) recvPacketMsg(packet tmp2p.PacketMsg) ([]byte, error) { - ch.logger.Debug("Read PacketMsg", "conn", ch.conn, "packet", packet) + ch.logger.Trace("Read PacketMsg", "conn", ch.conn, "packet", packet) var recvCap, recvReceived = ch.desc.RecvMessageCapacity, len(ch.recving) + len(packet.Data) if recvCap < recvReceived { return nil, fmt.Errorf("received message exceeds available capacity: %v < %v", recvCap, recvReceived) diff --git a/internal/p2p/conn_tracker_test.go b/internal/p2p/conn_tracker_test.go index daa3351f24..bf302f365a 100644 --- a/internal/p2p/conn_tracker_test.go +++ b/internal/p2p/conn_tracker_test.go @@ -28,7 +28,7 @@ func TestConnTracker(t *testing.T) { }, } { t.Run(name, func(t *testing.T) { - factory := factory // nolint:scopelint + factory := factory //nolint:scopelint t.Run("Initialized", func(t *testing.T) { ct := factory() require.Equal(t, 0, ct.Len()) diff --git a/internal/p2p/p2ptest/network.go b/internal/p2p/p2ptest/network.go index 7eb9cc28cf..ab9468f587 100644 --- a/internal/p2p/p2ptest/network.go +++ b/internal/p2p/p2ptest/network.go @@ -86,7 +86,7 @@ func (n *Network) Start(ctx context.Context, t *testing.T) { defer subcancel() for _, node := range n.Nodes { dialQueue = append(dialQueue, node.NodeAddress) - subs[node.NodeID] = node.PeerManager.Subscribe(subctx) + subs[node.NodeID] = node.PeerManager.Subscribe(subctx, "p2ptest") } // For each node, dial the nodes that it still doesn't have a connection to @@ -193,7 +193,7 @@ func (n *Network) RandomNode() *Node { for _, node := range n.Nodes { nodes = append(nodes, node) } - return nodes[rand.Intn(len(nodes))] // nolint:gosec + return nodes[rand.Intn(len(nodes))] //nolint:gosec } // Peers returns a node's peers (i.e. everyone except itself). @@ -218,7 +218,7 @@ func (n *Network) Remove(ctx context.Context, t *testing.T, id types.NodeID) { subctx, subcancel := context.WithCancel(ctx) defer subcancel() for _, peer := range n.Nodes { - sub := peer.PeerManager.Subscribe(subctx) + sub := peer.PeerManager.Subscribe(subctx, "p2ptest") subs = append(subs, sub) } @@ -270,7 +270,7 @@ func (n *Network) MakeNode(ctx context.Context, t *testing.T, proTxHash crypto.P require.NoError(t, err) require.NotNil(t, ep, "transport not listening an endpoint") - peerManager, err := p2p.NewPeerManager(nodeID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, nodeID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MinRetryTime: 10 * time.Millisecond, DisconnectCooldownPeriod: 10 * time.Millisecond, MaxRetryTime: 100 * time.Millisecond, @@ -280,6 +280,7 @@ func (n *Network) MakeNode(ctx context.Context, t *testing.T, proTxHash crypto.P Metrics: p2p.NopMetrics(), }) require.NoError(t, err) + peerManager.SetLogger(n.logger.With("module", "peer_manager")) router, err := p2p.NewRouter( n.logger.With(deriveLoggerAttrsFromCtx(ctx)), @@ -352,7 +353,7 @@ func (n *Node) MakeChannelNoCleanup( // It checks that all updates have been consumed during cleanup. func (n *Node) MakePeerUpdates(ctx context.Context, t *testing.T) *p2p.PeerUpdates { t.Helper() - sub := n.PeerManager.Subscribe(ctx) + sub := n.PeerManager.Subscribe(ctx, "p2ptest") t.Cleanup(func() { RequireNoUpdates(ctx, t, sub) }) @@ -364,7 +365,7 @@ func (n *Node) MakePeerUpdates(ctx context.Context, t *testing.T) *p2p.PeerUpdat // It does *not* check that all updates have been consumed, but will // close the update channel. func (n *Node) MakePeerUpdatesNoRequireEmpty(ctx context.Context, t *testing.T) *p2p.PeerUpdates { - return n.PeerManager.Subscribe(ctx) + return n.PeerManager.Subscribe(ctx, "p2ptest") } func MakeChannelDesc(chID p2p.ChannelID) *p2p.ChannelDescriptor { diff --git a/internal/p2p/peermanager.go b/internal/p2p/peermanager.go index 4e4865a43c..21ad92480f 100644 --- a/internal/p2p/peermanager.go +++ b/internal/p2p/peermanager.go @@ -9,13 +9,14 @@ import ( "sort" "time" - sync "github.com/sasha-s/go-deadlock" - "github.com/gogo/protobuf/proto" "github.com/google/orderedcode" + "github.com/rs/zerolog" + sync "github.com/sasha-s/go-deadlock" dbm "github.com/tendermint/tm-db" tmsync "github.com/tendermint/tendermint/internal/libs/sync" + "github.com/tendermint/tendermint/libs/log" p2pproto "github.com/tendermint/tendermint/proto/tendermint/p2p" "github.com/tendermint/tendermint/types" ) @@ -23,6 +24,10 @@ import ( const ( // retryNever is returned by retryDelay() when retries are disabled. retryNever time.Duration = math.MaxInt64 + // broadcastSubscriptionChannelCapacity defines how many messages can be buffered for each subscriber + broadcastSubscriptionChannelCapacity = 3 + // broadcastTimeout defines how long we will wait when broadcast channel is full + broadcastTimeout time.Duration = 60 * time.Second ) // PeerStatus is a peer status. @@ -42,7 +47,8 @@ const ( type peerConnectionDirection int const ( - peerConnectionIncoming peerConnectionDirection = iota + 1 + peerConnectionNone peerConnectionDirection = iota + peerConnectionIncoming peerConnectionOutgoing ) @@ -73,15 +79,18 @@ func (pu *PeerUpdate) SetProTxHash(proTxHash types.ProTxHash) { type PeerUpdates struct { routerUpdatesCh chan PeerUpdate reactorUpdatesCh chan PeerUpdate + // subscriberName is a label used for debugging + subscriberName string } // NewPeerUpdates creates a new PeerUpdates subscription. It is primarily for // internal use, callers should typically use PeerManager.Subscribe(). The // subscriber must call Close() when done. -func NewPeerUpdates(updatesCh chan PeerUpdate, buf int) *PeerUpdates { +func NewPeerUpdates(updatesCh chan PeerUpdate, routerUpdatesBufSize int, subscriberName string) *PeerUpdates { return &PeerUpdates{ reactorUpdatesCh: updatesCh, - routerUpdatesCh: make(chan PeerUpdate, buf), + routerUpdatesCh: make(chan PeerUpdate, routerUpdatesBufSize), + subscriberName: subscriberName, } } @@ -136,6 +145,11 @@ type PeerManagerOptions struct { // the connection and evict a lower-scored peer. MaxConnectedUpgrade uint16 + // MaxIncomingConnectionTime limits maximum duration after which incoming peer will be evicted. + // Defaults to 0 which disables this mechanism. + // Used on seed nodes to evict peers and make space for others. + MaxIncomingConnectionTime time.Duration + // MinRetryTime is the minimum time to wait between retries. Retry times // double for each retry, up to MaxRetryTime. 0 disables retries. MinRetryTime time.Duration @@ -298,6 +312,7 @@ type PeerManager struct { rand *rand.Rand dialWaker *tmsync.Waker // wakes up DialNext() on relevant peer changes evictWaker *tmsync.Waker // wakes up EvictNext() on relevant peer changes + logger log.Logger mtx sync.Mutex store *peerStore @@ -311,7 +326,7 @@ type PeerManager struct { } // NewPeerManager creates a new peer manager. -func NewPeerManager(selfID types.NodeID, peerDB dbm.DB, options PeerManagerOptions) (*PeerManager, error) { +func NewPeerManager(ctx context.Context, selfID types.NodeID, peerDB dbm.DB, options PeerManagerOptions) (*PeerManager, error) { if selfID == "" { return nil, errors.New("self ID not given") } @@ -329,9 +344,10 @@ func NewPeerManager(selfID types.NodeID, peerDB dbm.DB, options PeerManagerOptio peerManager := &PeerManager{ selfID: selfID, options: options, - rand: rand.New(rand.NewSource(time.Now().UnixNano())), // nolint:gosec + rand: rand.New(rand.NewSource(time.Now().UnixNano())), //nolint:gosec dialWaker: tmsync.NewWaker(), evictWaker: tmsync.NewWaker(), + logger: log.NewNopLogger(), metrics: NopMetrics(), store: store, @@ -354,9 +370,22 @@ func NewPeerManager(selfID types.NodeID, peerDB dbm.DB, options PeerManagerOptio if err = peerManager.prunePeers(); err != nil { return nil, err } + return peerManager, nil } +// SetLogger sets a logger for the PeerManager +func (m *PeerManager) SetLogger(logger log.Logger) { + m.logger = logger +} + +// Close closes peer manager and frees up all resources +func (m *PeerManager) Close() error { + m.evictWaker.Close() + m.dialWaker.Close() + return nil +} + // configurePeers configures peers in the peer store with ephemeral runtime // configuration, e.g. PersistentPeers. It also removes ourself, if we're in the // peer store. The caller must hold the mutex lock. @@ -524,11 +553,17 @@ func (m *PeerManager) HasDialedMaxPeers() bool { // becomes available. The caller must call Dialed() or DialFailed() for the // returned peer. func (m *PeerManager) DialNext(ctx context.Context) (NodeAddress, error) { - for { + for counter := uint32(0); ; counter++ { if address := m.TryDialNext(); (address != NodeAddress{}) { return address, nil } + // If we have zero peers connected, we need to schedule a retry. + // This can happen, for example, when some retry delay is not fulfilled + if m.numDialingOrConnected() == 0 { + m.scheduleDial(ctx, m.retryDelay(counter+1, false)) + } + select { case <-m.dialWaker.Sleep(): continue @@ -548,29 +583,36 @@ func (m *PeerManager) TryDialNext() NodeAddress { // MaxConnectedUpgrade allows us to probe additional peers that have a // higher score than any other peers, and if successful evict it. if m.options.MaxConnected > 0 && len(m.connected)+len(m.dialing) >= int(m.options.MaxConnected)+int(m.options.MaxConnectedUpgrade) { + m.logger.Trace("max connected reached, skipping dial attempt") return NodeAddress{} } cinfo := m.getConnectedInfo() if m.options.MaxOutgoingConnections > 0 && cinfo.outgoing >= m.options.MaxOutgoingConnections { + m.logger.Trace("max outgoing connections reached, skipping dial attempt") return NodeAddress{} } for _, peer := range m.store.Ranked() { if m.dialing[peer.ID] || m.isConnected(peer.ID) { + m.logger.Trace("peer dialing or connected, skipping", "peer", peer) continue } if !peer.LastDisconnected.IsZero() && time.Since(peer.LastDisconnected) < m.options.DisconnectCooldownPeriod { + m.logger.Trace("peer within disconnect cooldown period, skipping", "peer", peer, "cooldown_period", m.options.DisconnectCooldownPeriod) continue } for _, addressInfo := range peer.AddressInfo { - if time.Since(addressInfo.LastDialFailure) < m.retryDelay(addressInfo.DialFailures, peer.Persistent) { + delay := m.retryDelay(addressInfo.DialFailures, peer.Persistent) + if time.Since(addressInfo.LastDialFailure) < delay { + m.logger.Trace("not dialing peer due to retry delay", "peer", peer, "delay", delay, "last_failure", addressInfo.LastDialFailure) continue } if id, ok := m.store.Resolve(addressInfo.Address); ok && (m.isConnected(id) || m.dialing[id]) { + m.logger.Trace("peer address already dialing", "peer", peer, "address", addressInfo.Address.String()) continue } @@ -583,6 +625,12 @@ func (m *PeerManager) TryDialNext() NodeAddress { // peer (since they're ordered by score via peerStore.Ranked). if m.options.MaxConnected > 0 && len(m.connected) >= int(m.options.MaxConnected) { upgradeFromPeer := m.findUpgradeCandidate(peer.ID, peer.Score()) + m.logger.Trace("max connected reached, checking upgrade candidate", + "peer", peer, + "max_connected", m.options.MaxConnected, + "connected", len(m.connected), + "upgrade_candidate", upgradeFromPeer, + ) if upgradeFromPeer == "" { return NodeAddress{} } @@ -626,27 +674,19 @@ func (m *PeerManager) DialFailed(ctx context.Context, address NodeAddress) error return err } - // We spawn a goroutine that notifies DialNext() again when the retry - // timeout has elapsed, so that we can consider dialing it again. We - // calculate the retry delay outside the goroutine, since it must hold - // the mutex lock. - if d := m.retryDelay(addressInfo.DialFailures, peer.Persistent); d != 0 && d != retryNever { - go func() { - // Use an explicit timer with deferred cleanup instead of - // time.After(), to avoid leaking goroutines on PeerManager.Close(). - timer := time.NewTimer(d) - defer timer.Stop() - select { - case <-timer.C: - m.dialWaker.Wake() - case <-ctx.Done(): - } - }() + delay := m.retryDelay(addressInfo.DialFailures, peer.Persistent) + m.scheduleDial(ctx, delay) + + return nil +} + +// scheduleDial will dial peers after some delay +func (m *PeerManager) scheduleDial(ctx context.Context, delay time.Duration) { + if delay > 0 && delay != retryNever { + m.dialWaker.WakeAfter(delay) } else { m.dialWaker.Wake() } - - return nil } // Dialed marks a peer as successfully dialed. Any further connections will be @@ -791,6 +831,9 @@ func (m *PeerManager) Accepted(peerID types.NodeID, peerOpts ...func(*peerInfo)) if upgradeFromPeer != "" { m.evict[upgradeFromPeer] = true } + + evictPeerAfterTimeout(m, peerID, peerConnectionIncoming, m.options.MaxIncomingConnectionTime) + m.evictWaker.Wake() return nil } @@ -817,7 +860,13 @@ func (m *PeerManager) Ready(ctx context.Context, peerID types.NodeID, channels C if ok && len(peer.ProTxHash) > 0 { pu.SetProTxHash(peer.ProTxHash) } - m.broadcast(ctx, pu) + if err := m.broadcast(ctx, pu); err != nil { + m.logger.Error("error during broadcast ready", "error", err) + if errors.Is(err, context.DeadlineExceeded) { + // this implies deadlock condition which we really need to detect and fix + panic("possible deadlock when sending ready broadcast: " + err.Error()) + } + } } } @@ -919,7 +968,13 @@ func (m *PeerManager) Disconnected(ctx context.Context, peerID types.NodeID) { if ok && len(peer.ProTxHash) > 0 { pu.SetProTxHash(peer.ProTxHash) } - m.broadcast(ctx, pu) + if err := m.broadcast(ctx, pu); err != nil { + m.logger.Error("error during broadcast disconnected", "error", err) + if errors.Is(err, context.DeadlineExceeded) { + // this implies deadlock condition which we really need to detect and fix + panic("possible deadlock when sending disconnected broadcast: " + err.Error()) + } + } } m.dialWaker.Wake() @@ -1052,7 +1107,7 @@ func (m *PeerManager) Advertise(peerID types.NodeID, limit uint16) []NodeAddress // 10% of the time we'll randomly insert a "loosing" // peer. - // nolint:gosec // G404: Use of weak random number generator + //nolint:gosec // G404: Use of weak random number generator if numAddresses <= int(limit) || rand.Intn((meanAbsScore*2)+1) <= scores[peer.ID]+1 || rand.Intn((idx+1)*10) <= idx+1 { addresses = append(addresses, addressInfo.Address) addedLastIteration = true @@ -1079,19 +1134,19 @@ func (m *PeerManager) Advertise(peerID types.NodeID, limit uint16) []NodeAddress // PeerEventSubscriber describes the type of the subscription method, to assist // in isolating reactors specific construction and lifecycle from the // peer manager. -type PeerEventSubscriber func(context.Context) *PeerUpdates +type PeerEventSubscriber func(context.Context, string) *PeerUpdates // Subscribe subscribes to peer updates. The caller must consume the peer // updates in a timely fashion and close the subscription when done, otherwise // the PeerManager will halt. -func (m *PeerManager) Subscribe(ctx context.Context) *PeerUpdates { - // FIXME: We use a size 1 buffer here. When we broadcast a peer update - // we have to loop over all of the subscriptions, and we want to avoid - // having to block and wait for a context switch before continuing on - // to the next subscriptions. This also prevents tail latencies from - // compounding. Limiting it to 1 means that the subscribers are still - // reasonably in sync. However, this should probably be benchmarked. - peerUpdates := NewPeerUpdates(make(chan PeerUpdate, 1), 1) +func (m *PeerManager) Subscribe(ctx context.Context, subscriberName string) *PeerUpdates { + // Note: When we broadcast a peer update we have to loop over all of + // the subscriptions, and we want to avoid having to block and wait + // for a context switch before continuing on to the next subscriptions. + // This also prevents tail latencies from compounding. + // It should be limited to ensure that the subscribers are still + // reasonably in sync. + peerUpdates := NewPeerUpdates(make(chan PeerUpdate, broadcastSubscriptionChannelCapacity), 1, subscriberName) m.Register(ctx, peerUpdates) return peerUpdates } @@ -1159,19 +1214,19 @@ func (m *PeerManager) processPeerEvent(ctx context.Context, pu PeerUpdate) { // as the PeerManager processes them, but this means subscribers must be // responsive at all times or the entire PeerManager will halt. // -// FIXME: Consider using an internal channel to buffer updates while also -// maintaining order if this is a problem. -func (m *PeerManager) broadcast(ctx context.Context, peerUpdate PeerUpdate) { - for _, sub := range m.subscriptions { - if ctx.Err() != nil { - return - } +// Broadcast is asynchronous, what means that returning doesn't mean successful delivery +func (m *PeerManager) broadcast(ctx context.Context, peerUpdate PeerUpdate) error { + for pu, sub := range m.subscriptions { select { case <-ctx.Done(): - return + return ctx.Err() + case <-time.After(broadcastTimeout): + return fmt.Errorf("peer update %s capacity %d exceeded", pu.subscriberName, cap(sub.reactorUpdatesCh)) case sub.reactorUpdatesCh <- peerUpdate: } } + + return nil } // Addresses returns all known addresses for a peer, primarily for testing. @@ -1609,6 +1664,37 @@ func (p *peerInfo) Validate() error { return nil } +func (p *peerInfo) IsZero() bool { + return p == nil || len(p.ID) == 0 +} + +func (p *peerInfo) MarshalZerologObject(e *zerolog.Event) { + if p == nil { + return + } + + e.Str("node_id", string(p.ID)) + if len(p.ProTxHash) != 0 { + e.Str("protxhash", p.ProTxHash.ShortString()) + } + e.Time("last_connected", p.LastConnected) + e.Time("last_disconnected", p.LastDisconnected) + if p.Persistent { + e.Bool("persistent", p.Persistent) + } + e.Int64("height", p.Height) + if p.FixedScore != 0 { + e.Int16("fixed_score", int16(p.FixedScore)) + } + if p.MutableScore != 0 { + e.Int64("mutable_score", p.MutableScore) + } + if p.Inactive { + e.Bool("inactive", p.Inactive) + } + e.Int16("score", int16(p.Score())) +} + // peerAddressInfo contains information and statistics about a peer address. type peerAddressInfo struct { Address NodeAddress @@ -1712,6 +1798,20 @@ func (m *PeerManager) UpdatePeerInfo(nodeID types.NodeID, modifier func(peerInfo return m.store.Set(peer) } +// getPeer() loads and returns peer from store, together with last connection direction, if any +func (m *PeerManager) getPeer(peerID types.NodeID) (peerInfo, peerConnectionDirection) { + m.mtx.Lock() + defer m.mtx.Unlock() + + p, ok := m.store.Get(peerID) + if !ok { + return peerInfo{}, peerConnectionNone + } + + connType := m.connected[peerID] + return p, connType +} + // IsDialingOrConnected returns true if dialing to a peer at the moment or already connected otherwise false func (m *PeerManager) IsDialingOrConnected(nodeID types.NodeID) bool { m.mtx.Lock() @@ -1720,9 +1820,28 @@ func (m *PeerManager) IsDialingOrConnected(nodeID types.NodeID) bool { return m.dialing[nodeID] || ok } +func (m *PeerManager) numDialingOrConnected() int { + m.mtx.Lock() + defer m.mtx.Unlock() + return len(m.connected) + len(m.dialing) +} + // SetProTxHashToPeerInfo sets a proTxHash in peerInfo.proTxHash to keep this value in a store func SetProTxHashToPeerInfo(proTxHash types.ProTxHash) func(info *peerInfo) { return func(info *peerInfo) { info.ProTxHash = proTxHash.Copy() } } + +// evictPeerAfterTimeout evicts incoming peer for which the timeout expired. +func evictPeerAfterTimeout(m *PeerManager, peerID types.NodeID, direction peerConnectionDirection, timeout time.Duration) { + if timeout > 0 { + time.AfterFunc(timeout, func() { + olderThan := time.Now().Add(-timeout) + p, connType := m.getPeer(peerID) + if !p.IsZero() && connType == direction && !p.Persistent && p.LastConnected.Before(olderThan) { + m.EvictPeer(peerID) + } + }) + } +} diff --git a/internal/p2p/peermanager_scoring_test.go b/internal/p2p/peermanager_scoring_test.go index b454da151f..717123b173 100644 --- a/internal/p2p/peermanager_scoring_test.go +++ b/internal/p2p/peermanager_scoring_test.go @@ -14,13 +14,16 @@ import ( ) func TestPeerScoring(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + // coppied from p2p_test shared variables selfKey := ed25519.GenPrivKeyFromSecret([]byte{0xf9, 0x1b, 0x08, 0xaa, 0x38, 0xee, 0x34, 0xdd}) selfID := types.NodeIDFromPubKey(selfKey.PubKey()) // create a mock peer manager db := dbm.NewMemDB() - peerManager, err := NewPeerManager(selfID, db, PeerManagerOptions{}) + peerManager, err := NewPeerManager(ctx, selfID, db, PeerManagerOptions{}) require.NoError(t, err) // create a fake node @@ -29,9 +32,6 @@ func TestPeerScoring(t *testing.T) { require.NoError(t, err) require.True(t, added) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - t.Run("Synchronous", func(t *testing.T) { // update the manager and make sure it's correct require.Zero(t, peerManager.Scores()[id]) @@ -56,7 +56,7 @@ func TestPeerScoring(t *testing.T) { }) t.Run("AsynchronousIncrement", func(t *testing.T) { start := peerManager.Scores()[id] - pu := peerManager.Subscribe(ctx) + pu := peerManager.Subscribe(ctx, "p2p") pu.SendUpdate(ctx, PeerUpdate{ NodeID: id, Status: PeerStatusGood, @@ -69,7 +69,7 @@ func TestPeerScoring(t *testing.T) { }) t.Run("AsynchronousDecrement", func(t *testing.T) { start := peerManager.Scores()[id] - pu := peerManager.Subscribe(ctx) + pu := peerManager.Subscribe(ctx, "p2p") pu.SendUpdate(ctx, PeerUpdate{ NodeID: id, Status: PeerStatusBad, diff --git a/internal/p2p/peermanager_test.go b/internal/p2p/peermanager_test.go index 3e72c333b2..9ee80539e6 100644 --- a/internal/p2p/peermanager_test.go +++ b/internal/p2p/peermanager_test.go @@ -111,26 +111,28 @@ func TestPeerManagerOptions_Validate(t *testing.T) { } func TestNewPeerManager(t *testing.T) { + ctx := context.TODO() // Zero options should be valid. - _, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + _, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Invalid options should error. - _, err = p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + _, err = p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PersistentPeers: []types.NodeID{"foo"}, }) require.Error(t, err) // Invalid database should error. - _, err = p2p.NewPeerManager(selfID, nil, p2p.PeerManagerOptions{}) + _, err = p2p.NewPeerManager(ctx, selfID, nil, p2p.PeerManagerOptions{}) require.Error(t, err) // Empty self ID should error. - _, err = p2p.NewPeerManager("", nil, p2p.PeerManagerOptions{}) + _, err = p2p.NewPeerManager(ctx, "", nil, p2p.PeerManagerOptions{}) require.Error(t, err) } func TestNewPeerManager_Persistence(t *testing.T) { + ctx := context.TODO() aID := types.NodeID(strings.Repeat("a", 40)) aAddresses := []p2p.NodeAddress{ {Protocol: "tcp", NodeID: aID, Hostname: "127.0.0.1", Port: 26657, Path: "/path"}, @@ -151,7 +153,7 @@ func TestNewPeerManager_Persistence(t *testing.T) { // Create an initial peer manager and add the peers. db := dbm.NewMemDB() - peerManager, err := p2p.NewPeerManager(selfID, db, p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, db, p2p.PeerManagerOptions{ PersistentPeers: []types.NodeID{aID}, PeerScores: map[types.NodeID]p2p.PeerScore{bID: 1}, }) @@ -175,7 +177,7 @@ func TestNewPeerManager_Persistence(t *testing.T) { // Creating a new peer manager with the same database should retain the // peers, but they should have updated scores from the new PersistentPeers // configuration. - peerManager, err = p2p.NewPeerManager(selfID, db, p2p.PeerManagerOptions{ + peerManager, err = p2p.NewPeerManager(ctx, selfID, db, p2p.PeerManagerOptions{ PersistentPeers: []types.NodeID{bID}, PeerScores: map[types.NodeID]p2p.PeerScore{cID: 1}, }) @@ -192,11 +194,12 @@ func TestNewPeerManager_Persistence(t *testing.T) { } func TestNewPeerManager_SelfIDChange(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} db := dbm.NewMemDB() - peerManager, err := p2p.NewPeerManager(selfID, db, p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, db, p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(a) @@ -209,17 +212,18 @@ func TestNewPeerManager_SelfIDChange(t *testing.T) { // If we change our selfID to one of the peers in the peer store, it // should be removed from the store. - peerManager, err = p2p.NewPeerManager(a.NodeID, db, p2p.PeerManagerOptions{}) + peerManager, err = p2p.NewPeerManager(ctx, a.NodeID, db, p2p.PeerManagerOptions{}) require.NoError(t, err) require.Equal(t, []types.NodeID{b.NodeID}, peerManager.Peers()) } func TestPeerManager_Add(t *testing.T) { + ctx := context.TODO() aID := types.NodeID(strings.Repeat("a", 40)) bID := types.NodeID(strings.Repeat("b", 40)) cID := types.NodeID(strings.Repeat("c", 40)) - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PersistentPeers: []types.NodeID{aID, cID}, MaxPeers: 2, MaxConnected: 2, @@ -275,7 +279,7 @@ func TestPeerManager_DialNext(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Add an address. DialNext should return it. @@ -309,7 +313,7 @@ func TestPeerManager_DialNext_Retry(t *testing.T) { MinRetryTime: 100 * time.Millisecond, MaxRetryTime: 500 * time.Millisecond, } - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), options) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), options) require.NoError(t, err) added, err := peerManager.Add(a) @@ -353,7 +357,7 @@ func TestPeerManager_DialNext_WakeOnAdd(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Spawn a goroutine to add a peer after a delay. @@ -376,7 +380,7 @@ func TestPeerManager_DialNext_WakeOnDialFailed(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 1, }) require.NoError(t, err) @@ -420,7 +424,7 @@ func TestPeerManager_DialNext_WakeOnDialFailedRetry(t *testing.T) { defer cancel() options := p2p.PeerManagerOptions{MinRetryTime: 200 * time.Millisecond} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), options) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), options) require.NoError(t, err) a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} @@ -450,7 +454,7 @@ func TestPeerManager_DialNext_WakeOnDisconnected(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(a) @@ -477,11 +481,12 @@ func TestPeerManager_DialNext_WakeOnDisconnected(t *testing.T) { } func TestPeerManager_TryDialNext_MaxConnected(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 2, }) require.NoError(t, err) @@ -519,7 +524,7 @@ func TestPeerManager_TryDialNext_MaxConnectedUpgrade(t *testing.T) { d := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("d", 40))} e := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("e", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PeerScores: map[types.NodeID]p2p.PeerScore{ a.NodeID: p2p.PeerScore(0), b.NodeID: p2p.PeerScore(1), @@ -590,11 +595,12 @@ func TestPeerManager_TryDialNext_MaxConnectedUpgrade(t *testing.T) { } func TestPeerManager_TryDialNext_UpgradeReservesPeer(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PeerScores: map[types.NodeID]p2p.PeerScore{b.NodeID: p2p.PeerScore(1), c.NodeID: 1}, MaxConnected: 1, MaxConnectedUpgrade: 2, @@ -626,6 +632,7 @@ func TestPeerManager_TryDialNext_UpgradeReservesPeer(t *testing.T) { } func TestPeerManager_TryDialNext_DialingConnected(t *testing.T) { + ctx := context.TODO() aID := types.NodeID(strings.Repeat("a", 40)) a := p2p.NodeAddress{Protocol: "memory", NodeID: aID} aTCP := p2p.NodeAddress{Protocol: "tcp", NodeID: aID, Hostname: "localhost"} @@ -633,7 +640,7 @@ func TestPeerManager_TryDialNext_DialingConnected(t *testing.T) { bID := types.NodeID(strings.Repeat("b", 40)) b := p2p.NodeAddress{Protocol: "memory", NodeID: bID} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 2, }) require.NoError(t, err) @@ -679,7 +686,7 @@ func TestPeerManager_TryDialNext_Multiple(t *testing.T) { {Protocol: "tcp", NodeID: bID, Hostname: "::1"}, } - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) for _, address := range addresses { @@ -703,6 +710,7 @@ func TestPeerManager_TryDialNext_Multiple(t *testing.T) { } func TestPeerManager_DialFailed(t *testing.T) { + ctx := context.TODO() // DialFailed is tested through other tests, we'll just check a few basic // things here, e.g. reporting unknown addresses. aID := types.NodeID(strings.Repeat("a", 40)) @@ -710,7 +718,7 @@ func TestPeerManager_DialFailed(t *testing.T) { bID := types.NodeID(strings.Repeat("b", 40)) b := p2p.NodeAddress{Protocol: "memory", NodeID: bID} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(a) @@ -750,7 +758,7 @@ func TestPeerManager_DialFailed_UnreservePeer(t *testing.T) { b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PeerScores: map[types.NodeID]p2p.PeerScore{ b.NodeID: p2p.PeerScore(1), c.NodeID: p2p.PeerScore(2), @@ -791,10 +799,11 @@ func TestPeerManager_DialFailed_UnreservePeer(t *testing.T) { } func TestPeerManager_Dialed_Connected(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Marking a as dialed twice should error. @@ -819,7 +828,8 @@ func TestPeerManager_Dialed_Connected(t *testing.T) { } func TestPeerManager_Dialed_Self(t *testing.T) { - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + ctx := context.TODO() + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Dialing self should error. @@ -828,10 +838,11 @@ func TestPeerManager_Dialed_Self(t *testing.T) { } func TestPeerManager_Dialed_MaxConnected(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 1, }) require.NoError(t, err) @@ -855,12 +866,13 @@ func TestPeerManager_Dialed_MaxConnected(t *testing.T) { } func TestPeerManager_Dialed_MaxConnectedUpgrade(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} d := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("d", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 2, MaxConnectedUpgrade: 1, PeerScores: map[types.NodeID]p2p.PeerScore{c.NodeID: p2p.PeerScore(1), d.NodeID: 1}, @@ -895,9 +907,10 @@ func TestPeerManager_Dialed_MaxConnectedUpgrade(t *testing.T) { } func TestPeerManager_Dialed_Unknown(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Marking an unknown node as dialed should error. @@ -905,11 +918,12 @@ func TestPeerManager_Dialed_Unknown(t *testing.T) { } func TestPeerManager_Dialed_Upgrade(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 1, MaxConnectedUpgrade: 2, PeerScores: map[types.NodeID]p2p.PeerScore{b.NodeID: p2p.PeerScore(1), c.NodeID: 1}, @@ -953,7 +967,7 @@ func TestPeerManager_Dialed_UpgradeEvenLower(t *testing.T) { c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} d := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("d", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 2, MaxConnectedUpgrade: 1, PeerScores: map[types.NodeID]p2p.PeerScore{ @@ -1008,7 +1022,7 @@ func TestPeerManager_Dialed_UpgradeNoEvict(t *testing.T) { b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 2, MaxConnectedUpgrade: 1, PeerScores: map[types.NodeID]p2p.PeerScore{ @@ -1051,12 +1065,13 @@ func TestPeerManager_Dialed_UpgradeNoEvict(t *testing.T) { } func TestPeerManager_Accepted(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} d := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("d", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Accepting a connection from self should error. @@ -1096,11 +1111,12 @@ func TestPeerManager_Accepted(t *testing.T) { } func TestPeerManager_Accepted_MaxConnected(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 2, }) require.NoError(t, err) @@ -1124,12 +1140,13 @@ func TestPeerManager_Accepted_MaxConnected(t *testing.T) { } func TestPeerManager_Accepted_MaxConnectedUpgrade(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} d := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("d", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PeerScores: map[types.NodeID]p2p.PeerScore{ c.NodeID: p2p.PeerScore(1), d.NodeID: p2p.PeerScore(2), @@ -1177,7 +1194,7 @@ func TestPeerManager_Accepted_Upgrade(t *testing.T) { b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PeerScores: map[types.NodeID]p2p.PeerScore{ b.NodeID: p2p.PeerScore(1), c.NodeID: p2p.PeerScore(1), @@ -1216,11 +1233,12 @@ func TestPeerManager_Accepted_Upgrade(t *testing.T) { } func TestPeerManager_Accepted_UpgradeDialing(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} c := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("c", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PeerScores: map[types.NodeID]p2p.PeerScore{ b.NodeID: p2p.PeerScore(1), c.NodeID: p2p.PeerScore(1), @@ -1261,6 +1279,58 @@ func TestPeerManager_Accepted_UpgradeDialing(t *testing.T) { require.Error(t, peerManager.Dialed(b)) } +// TestPeerManager_Accepted_Timeout ensures that an incoming peer will be evicted after `MaxIncomingConnectionTime` +func TestPeerManager_Accepted_Timeout(t *testing.T) { + ctx := context.Background() + + // FIXME: maxIncomingTime might require tuning on low-resource runners (eg. github) + // Feel free to increase it a bit if it fails - it should not affect the test logic + const maxIncomingTime = 10 * time.Millisecond + const processingTime = maxIncomingTime / 10 + + address := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} + + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + MaxIncomingConnectionTime: maxIncomingTime, + DisconnectCooldownPeriod: 1 * time.Nanosecond, + }) + + require.NoError(t, err) + + // Accepting a connection from a known peer should work. + added, err := peerManager.Add(address) + require.NoError(t, err) + require.True(t, added) + require.NoError(t, peerManager.Accepted(address.NodeID)) + + // Initially, no peers are marked for eviction + evict, err := peerManager.TryEvictNext() + assert.NoError(t, err) + assert.Zero(t, evict, "No peer should be evicted") + + // After 1/2 of time, we disconnect and reconnect + time.Sleep(maxIncomingTime / 2) + evict, err = peerManager.TryEvictNext() + assert.NoError(t, err) + assert.Zero(t, evict, "No peer should be evicted") + + peerManager.Disconnected(ctx, address.NodeID) + time.Sleep(processingTime) + require.NoError(t, peerManager.Accepted(address.NodeID)) + + // After another 1/2 of time, we still don't expect peer to be evicted + time.Sleep(maxIncomingTime / 2) + evict, err = peerManager.TryEvictNext() + assert.NoError(t, err) + assert.Zero(t, evict, "Second peer connection was evicted after timeout starting at first connection") + + // But additional 1/2 of time, plus some processing time, should evict the peer + time.Sleep(maxIncomingTime/2 + processingTime) + evict, err = peerManager.TryEvictNext() + assert.NoError(t, err) + assert.Equal(t, address.NodeID, evict, "No peer should be evicted") +} + func TestPeerManager_Ready(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} @@ -1268,10 +1338,10 @@ func TestPeerManager_Ready(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") // Connecting to a should still have it as status down. added, err := peerManager.Add(a) @@ -1302,10 +1372,10 @@ func TestPeerManager_Ready_Channels(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - pm, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + pm, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) - sub := pm.Subscribe(ctx) + sub := pm.Subscribe(ctx, "p2p") a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} added, err := pm.Add(a) @@ -1328,7 +1398,7 @@ func TestPeerManager_EvictNext(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(a) @@ -1364,7 +1434,7 @@ func TestPeerManager_EvictNext_WakeOnError(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(a) @@ -1394,7 +1464,7 @@ func TestPeerManager_EvictNext_WakeOnUpgradeDialed(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 1, MaxConnectedUpgrade: 1, PeerScores: map[types.NodeID]p2p.PeerScore{b.NodeID: p2p.PeerScore(1)}, @@ -1434,7 +1504,7 @@ func TestPeerManager_EvictNext_WakeOnUpgradeAccepted(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} b := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("b", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MaxConnected: 1, MaxConnectedUpgrade: 1, PeerScores: map[types.NodeID]p2p.PeerScore{ @@ -1469,7 +1539,7 @@ func TestPeerManager_TryEvictNext(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(a) @@ -1503,15 +1573,16 @@ func TestPeerManager_TryEvictNext(t *testing.T) { } func TestPeerManager_Disconnected(t *testing.T) { + ctx := context.TODO() a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") // Disconnecting an unknown peer does nothing. peerManager.Disconnected(ctx, a.NodeID) @@ -1562,7 +1633,7 @@ func TestPeerManager_Errored(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // Erroring an unknown peer does nothing. @@ -1601,11 +1672,11 @@ func TestPeerManager_Subscribe(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) // This tests all subscription events for full peer lifecycles. - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") added, err := peerManager.Add(a) require.NoError(t, err) @@ -1662,10 +1733,10 @@ func TestPeerManager_Subscribe_Close(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") added, err := peerManager.Add(a) require.NoError(t, err) @@ -1679,6 +1750,7 @@ func TestPeerManager_Subscribe_Close(t *testing.T) { // Closing the subscription should not send us the disconnected update. cancel() + time.Sleep(50 * time.Millisecond) peerManager.Disconnected(ctx, a.NodeID) require.Empty(t, sub.Updates()) } @@ -1691,15 +1763,15 @@ func TestPeerManager_Subscribe_Broadcast(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) s2ctx, s2cancel := context.WithCancel(ctx) defer s2cancel() - s1 := peerManager.Subscribe(ctx) - s2 := peerManager.Subscribe(s2ctx) - s3 := peerManager.Subscribe(ctx) + s1 := peerManager.Subscribe(ctx, "p2p") + s2 := peerManager.Subscribe(s2ctx, "p2p") + s3 := peerManager.Subscribe(ctx, "p2p") // Connecting to a peer should send updates on all subscriptions. added, err := peerManager.Add(a) @@ -1739,14 +1811,14 @@ func TestPeerManager_Close(t *testing.T) { a := p2p.NodeAddress{Protocol: "memory", NodeID: types.NodeID(strings.Repeat("a", 40))} - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ MinRetryTime: 10 * time.Second, }) require.NoError(t, err) // This subscription isn't closed, but PeerManager.Close() // should reap the spawned goroutine. - _ = peerManager.Subscribe(ctx) + _ = peerManager.Subscribe(ctx, "p2p") // This dial failure will start a retry timer for 10 seconds, which // should be reaped. @@ -1774,7 +1846,8 @@ func TestPeerManager_Advertise(t *testing.T) { dID := types.NodeID(strings.Repeat("d", 40)) // Create an initial peer manager and add the peers. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + ctx := context.TODO() + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ PeerScores: map[types.NodeID]p2p.PeerScore{aID: 3, bID: 2, cID: 1}, }) require.NoError(t, err) @@ -1827,12 +1900,13 @@ func TestPeerManager_Advertise(t *testing.T) { } func TestPeerManager_Advertise_Self(t *testing.T) { + ctx := context.TODO() dID := types.NodeID(strings.Repeat("d", 40)) self := p2p.NodeAddress{Protocol: "tcp", NodeID: selfID, Hostname: "2001:db8::1", Port: 26657} // Create a peer manager with SelfAddress defined. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{ SelfAddress: self, }) require.NoError(t, err) diff --git a/internal/p2p/pex/reactor.go b/internal/p2p/pex/reactor.go index 24d3b7492e..bc9bc97826 100644 --- a/internal/p2p/pex/reactor.go +++ b/internal/p2p/pex/reactor.go @@ -134,7 +134,7 @@ func (r *Reactor) OnStart(ctx context.Context) error { return err } - peerUpdates := r.peerEvents(ctx) + peerUpdates := r.peerEvents(ctx, "pex") go r.processPexCh(ctx, channel) go r.processPeerUpdates(ctx, peerUpdates) return nil diff --git a/internal/p2p/pex/reactor_test.go b/internal/p2p/pex/reactor_test.go index 9257a317b4..37450deb9f 100644 --- a/internal/p2p/pex/reactor_test.go +++ b/internal/p2p/pex/reactor_test.go @@ -209,7 +209,7 @@ func TestReactorSmallPeerStoreInALargeNetwork(t *testing.T) { // test that all nodes reach full capacity for _, nodeID := range testNet.nodes { require.Eventually(t, func() bool { - // nolint:scopelint + //nolint:scopelint return testNet.network.Nodes[nodeID].PeerManager.PeerRatio() >= 0.9 }, longWait, checkFrequency, "peer ratio is: %f", testNet.network.Nodes[nodeID].PeerManager.PeerRatio()) @@ -298,15 +298,16 @@ func setupSingle(ctx context.Context, t *testing.T) *singleTestReactor { ) peerCh := make(chan p2p.PeerUpdate, chBuf) - peerUpdates := p2p.NewPeerUpdates(peerCh, chBuf) - peerManager, err := p2p.NewPeerManager(nodeID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerUpdates := p2p.NewPeerUpdates(peerCh, chBuf, "") + peerManager, err := p2p.NewPeerManager(ctx, nodeID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) + defer peerManager.Close() chCreator := func(context.Context, *p2p.ChannelDescriptor) (p2p.Channel, error) { return pexCh, nil } - reactor := pex.NewReactor(log.NewNopLogger(), peerManager, chCreator, func(_ context.Context) *p2p.PeerUpdates { return peerUpdates }) + reactor := pex.NewReactor(log.NewNopLogger(), peerManager, chCreator, func(context.Context, string) *p2p.PeerUpdates { return peerUpdates }) require.NoError(t, reactor.Start(ctx)) t.Cleanup(reactor.Wait) @@ -388,7 +389,7 @@ func setupNetwork(ctx context.Context, t *testing.T, opts testOptions) *reactorT nodeID := nodeID rts.peerChans[nodeID] = make(chan p2p.PeerUpdate, chBuf) - rts.peerUpdates[nodeID] = p2p.NewPeerUpdates(rts.peerChans[nodeID], chBuf) + rts.peerUpdates[nodeID] = p2p.NewPeerUpdates(rts.peerChans[nodeID], chBuf, "pex") rts.network.Nodes[nodeID].PeerManager.Register(ctx, rts.peerUpdates[nodeID]) chCreator := func(context.Context, *p2p.ChannelDescriptor) (p2p.Channel, error) { @@ -403,7 +404,7 @@ func setupNetwork(ctx context.Context, t *testing.T, opts testOptions) *reactorT rts.logger.With("nodeID", nodeID), rts.network.Nodes[nodeID].PeerManager, chCreator, - func(_ context.Context) *p2p.PeerUpdates { return rts.peerUpdates[nodeID] }, + func(context.Context, string) *p2p.PeerUpdates { return rts.peerUpdates[nodeID] }, ) } rts.nodes = append(rts.nodes, nodeID) @@ -449,7 +450,7 @@ func (r *reactorTestSuite) addNodes(ctx context.Context, t *testing.T, nodes int nodeID := node.NodeID r.pexChannels[nodeID] = node.MakeChannelNoCleanup(ctx, t, pex.ChannelDescriptor()) r.peerChans[nodeID] = make(chan p2p.PeerUpdate, r.opts.BufferSize) - r.peerUpdates[nodeID] = p2p.NewPeerUpdates(r.peerChans[nodeID], r.opts.BufferSize) + r.peerUpdates[nodeID] = p2p.NewPeerUpdates(r.peerChans[nodeID], r.opts.BufferSize, "pex") r.network.Nodes[nodeID].PeerManager.Register(ctx, r.peerUpdates[nodeID]) chCreator := func(context.Context, *p2p.ChannelDescriptor) (p2p.Channel, error) { @@ -460,7 +461,7 @@ func (r *reactorTestSuite) addNodes(ctx context.Context, t *testing.T, nodes int r.logger.With("nodeID", nodeID), r.network.Nodes[nodeID].PeerManager, chCreator, - func(_ context.Context) *p2p.PeerUpdates { return r.peerUpdates[nodeID] }, + func(context.Context, string) *p2p.PeerUpdates { return r.peerUpdates[nodeID] }, ) r.nodes = append(r.nodes, nodeID) r.total++ @@ -566,7 +567,7 @@ func (r *reactorTestSuite) listenForPeerUpdate( waitPeriod time.Duration, ) { on, with := r.checkNodePair(t, onNode, withNode) - sub := r.network.Nodes[on].PeerManager.Subscribe(ctx) + sub := r.network.Nodes[on].PeerManager.Subscribe(ctx, "pex") timesUp := time.After(waitPeriod) for { select { @@ -674,8 +675,8 @@ func (r *reactorTestSuite) connectPeers(ctx context.Context, t *testing.T, sourc return } - sourceSub := n1.PeerManager.Subscribe(ctx) - targetSub := n2.PeerManager.Subscribe(ctx) + sourceSub := n1.PeerManager.Subscribe(ctx, "pex") + targetSub := n2.PeerManager.Subscribe(ctx, "pex") sourceAddress := n1.NodeAddress targetAddress := n2.NodeAddress diff --git a/internal/p2p/router_test.go b/internal/p2p/router_test.go index d29fe234a3..8ff6d1a215 100644 --- a/internal/p2p/router_test.go +++ b/internal/p2p/router_test.go @@ -107,7 +107,7 @@ func TestRouter_Channel_Basic(t *testing.T) { defer cancel() // Set up a router with no transports (so no peers). - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) testnet := p2ptest.MakeNetwork(ctx, t, p2ptest.NetworkOptions{NumNodes: 1}) @@ -418,10 +418,10 @@ func TestRouter_AcceptPeers(t *testing.T) { mockTransport.On("Listen", mock.Anything).Return(nil) // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") router, err := p2p.NewRouter( log.NewNopLogger(), @@ -482,7 +482,7 @@ func TestRouter_AcceptPeers_Errors(t *testing.T) { mockTransport.On("Listen", mock.Anything).Return(nil) // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) router, err := p2p.NewRouter( @@ -535,7 +535,7 @@ func TestRouter_AcceptPeers_HeadOfLineBlocking(t *testing.T) { mockTransport.On("Listen", mock.Anything).Return(nil) // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) router, err := p2p.NewRouter( @@ -633,13 +633,13 @@ func TestRouter_DialPeers(t *testing.T) { } // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(address) require.NoError(t, err) require.True(t, added) - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") router, err := p2p.NewRouter( log.NewNopLogger(), @@ -711,7 +711,7 @@ func TestRouter_DialPeers_Parallel(t *testing.T) { } // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) added, err := peerManager.Add(a) @@ -794,10 +794,10 @@ func TestRouter_EvictPeers(t *testing.T) { mockTransport.On("Listen", mock.Anything).Return(nil) // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") router, err := p2p.NewRouter( log.NewNopLogger(), @@ -862,7 +862,7 @@ func TestRouter_ChannelCompatability(t *testing.T) { mockTransport.On("Listen", mock.Anything).Return(nil) // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) router, err := p2p.NewRouter( @@ -915,10 +915,10 @@ func TestRouter_DontSendOnInvalidChannel(t *testing.T) { mockTransport.On("Listen", mock.Anything).Return(nil) // Set up and start the router. - peerManager, err := p2p.NewPeerManager(selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) + peerManager, err := p2p.NewPeerManager(ctx, selfID, dbm.NewMemDB(), p2p.PeerManagerOptions{}) require.NoError(t, err) - sub := peerManager.Subscribe(ctx) + sub := peerManager.Subscribe(ctx, "p2p") router, err := p2p.NewRouter( log.NewNopLogger(), diff --git a/internal/statesync/dispatcher.go b/internal/statesync/dispatcher.go index b23346d5b5..3a7fef5dd1 100644 --- a/internal/statesync/dispatcher.go +++ b/internal/statesync/dispatcher.go @@ -9,6 +9,7 @@ import ( sync "github.com/sasha-s/go-deadlock" "github.com/tendermint/tendermint/internal/p2p" + "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/light/provider" ssproto "github.com/tendermint/tendermint/proto/tendermint/statesync" tmproto "github.com/tendermint/tendermint/proto/tendermint/types" @@ -28,14 +29,15 @@ var ( type Dispatcher struct { // the channel with which to send light block requests on requestCh p2p.Channel - - mtx sync.Mutex + logger log.Logger + mtx sync.Mutex // all pending calls that have been dispatched and are awaiting an answer calls map[types.NodeID]chan *types.LightBlock } -func NewDispatcher(requestChannel p2p.Channel) *Dispatcher { +func NewDispatcher(requestChannel p2p.Channel, logger log.Logger) *Dispatcher { return &Dispatcher{ + logger: logger.With("module", "lb-dispatcher"), requestCh: requestChannel, calls: make(map[types.NodeID]chan *types.LightBlock), } @@ -65,11 +67,17 @@ func (d *Dispatcher) LightBlock(ctx context.Context, height int64, peer types.No start := time.Now() select { case resp := <-callCh: - fmt.Printf("dispatcher LightBlock took %s\n", time.Since(start).String()) + d.logger.Debug("received light-block", + "height", height, + "took", time.Since(start).String(), + ) return resp, nil case <-ctx.Done(): - fmt.Printf("dispatcher LightBlock ctx done after %s\n", time.Since(start).String()) + d.logger.Debug("failed to get a light-block", + "height", height, + "took", time.Since(start).String(), + ) return nil, ctx.Err() } } diff --git a/internal/statesync/dispatcher_test.go b/internal/statesync/dispatcher_test.go index 298b359bb0..88add1e06b 100644 --- a/internal/statesync/dispatcher_test.go +++ b/internal/statesync/dispatcher_test.go @@ -7,14 +7,14 @@ import ( "testing" "time" - sync "github.com/sasha-s/go-deadlock" - "github.com/fortytw2/leaktest" + sync "github.com/sasha-s/go-deadlock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/tendermint/tendermint/internal/p2p" "github.com/tendermint/tendermint/internal/test/factory" + "github.com/tendermint/tendermint/libs/log" ssproto "github.com/tendermint/tendermint/proto/tendermint/statesync" "github.com/tendermint/tendermint/types" ) @@ -43,7 +43,7 @@ func TestDispatcherBasic(t *testing.T) { chans, ch := testChannel(100) - d := NewDispatcher(ch) + d := NewDispatcher(ch, log.NewTestingLogger(t)) go handleRequests(ctx, t, d, chans.Out) peers := createPeerSet(numPeers) @@ -75,7 +75,7 @@ func TestDispatcherReturnsNoBlock(t *testing.T) { chans, ch := testChannel(100) - d := NewDispatcher(ch) + d := NewDispatcher(ch, log.NewTestingLogger(t)) peer := factory.NodeID(t, "a") @@ -99,7 +99,7 @@ func TestDispatcherTimeOutWaitingOnLightBlock(t *testing.T) { defer cancel() _, ch := testChannel(100) - d := NewDispatcher(ch) + d := NewDispatcher(ch, log.NewTestingLogger(t)) peer := factory.NodeID(t, "a") ctx, cancelFunc := context.WithTimeout(ctx, 10*time.Millisecond) @@ -122,7 +122,7 @@ func TestDispatcherProviders(t *testing.T) { chans, ch := testChannel(100) - d := NewDispatcher(ch) + d := NewDispatcher(ch, log.NewTestingLogger(t)) go handleRequests(ctx, t, d, chans.Out) peers := createPeerSet(5) diff --git a/internal/statesync/reactor.go b/internal/statesync/reactor.go index d422b02697..ed52939abb 100644 --- a/internal/statesync/reactor.go +++ b/internal/statesync/reactor.go @@ -274,7 +274,7 @@ func (r *Reactor) OnStart(ctx context.Context) error { metrics: r.metrics, } } - r.dispatcher = NewDispatcher(blockCh) + r.dispatcher = NewDispatcher(blockCh, r.logger) r.requestSnaphot = func() error { // request snapshots from all currently connected peers return snapshotCh.Send(ctx, p2p.Envelope{ @@ -324,7 +324,7 @@ func (r *Reactor) OnStart(ctx context.Context) error { LightBlockChannel: blockCh, ParamsChannel: paramsCh, }) - go r.processPeerUpdates(ctx, r.peerEvents(ctx)) + go r.processPeerUpdates(ctx, r.peerEvents(ctx, "statesync")) if r.needsStateSync { r.logger.Info("starting state sync") diff --git a/internal/statesync/reactor_test.go b/internal/statesync/reactor_test.go index 4dca531991..610a06e030 100644 --- a/internal/statesync/reactor_test.go +++ b/internal/statesync/reactor_test.go @@ -111,7 +111,7 @@ func setup( } rts.peerUpdateCh = make(chan p2p.PeerUpdate, chBuf) - rts.peerUpdates = p2p.NewPeerUpdates(rts.peerUpdateCh, int(chBuf)) + rts.peerUpdates = p2p.NewPeerUpdates(rts.peerUpdateCh, int(chBuf), "statesync") rts.snapshotChannel = p2p.NewChannel( SnapshotChannel, @@ -177,7 +177,7 @@ func setup( logger.With("component", "reactor"), conn, chCreator, - func(context.Context) *p2p.PeerUpdates { return rts.peerUpdates }, + func(context.Context, string) *p2p.PeerUpdates { return rts.peerUpdates }, rts.stateStore, rts.blockStore, "", diff --git a/libs/log/default.go b/libs/log/default.go index 40c51f1190..288de90a6e 100644 --- a/libs/log/default.go +++ b/libs/log/default.go @@ -74,6 +74,10 @@ func (l defaultLogger) Debug(msg string, keyVals ...interface{}) { l.Logger.Debug().Fields(getLogFields(keyVals...)).Msg(msg) } +func (l defaultLogger) Trace(msg string, keyVals ...interface{}) { + l.Logger.Trace().Fields(getLogFields(keyVals...)).Msg(msg) +} + func (l defaultLogger) With(keyVals ...interface{}) Logger { return &defaultLogger{Logger: l.Logger.With().Fields(getLogFields(keyVals...)).Logger()} } diff --git a/libs/log/logger.go b/libs/log/logger.go index 9913983ba7..3d41660aec 100644 --- a/libs/log/logger.go +++ b/libs/log/logger.go @@ -23,6 +23,7 @@ const ( LogFormatJSON string = "json" // Supported loging levels + LogLevelTrace = "trace" LogLevelDebug = "debug" LogLevelInfo = "info" LogLevelWarn = "warn" @@ -31,6 +32,7 @@ const ( // Logger defines a generic logging interface compatible with Tendermint. type Logger interface { + Trace(msg string, keyVals ...interface{}) Debug(msg string, keyVals ...interface{}) Info(msg string, keyVals ...interface{}) Error(msg string, keyVals ...interface{}) diff --git a/node/node.go b/node/node.go index d8583f292b..40fe0c446c 100644 --- a/node/node.go +++ b/node/node.go @@ -95,6 +95,7 @@ func newDefaultNode( } if cfg.Mode == config.ModeSeed { return makeSeedNode( + ctx, logger, cfg, config.DefaultDBProvider, @@ -225,7 +226,14 @@ func makeNode( weAreOnlyValidator := onlyValidatorIsUs(state, proTxHash) - peerManager, peerCloser, err := createPeerManager(cfg, dbProvider, nodeKey.ID, nodeMetrics.p2p) + peerManager, peerCloser, err := createPeerManager( + ctx, + cfg, + dbProvider, + nodeKey.ID, + nodeMetrics.p2p, + logger.With("module", "peermanager"), + ) closers = append(closers, peerCloser) if err != nil { return nil, combineCloseError( @@ -390,7 +398,12 @@ func makeNode( nodeMetrics.consensus.BlockSyncing.Set(1) } - node.services = append(node.services, pex.NewReactor(logger, peerManager, node.router.OpenChannel, peerManager.Subscribe)) + node.services = append(node.services, pex.NewReactor( + logger.With("module", "pex"), + peerManager, + node.router.OpenChannel, + peerManager.Subscribe), + ) // Set up state sync reactor, and schedule a sync if requested. // FIXME The way we do phased startups (e.g. replay -> block sync -> consensus) is very messy, @@ -775,9 +788,10 @@ func loadStateFromDBOrGenesisDocProvider(stateStore sm.Store, genDoc *types.Gene func getRouterConfig(conf *config.Config, appClient abciclient.Client) p2p.RouterOptions { opts := p2p.RouterOptions{ - QueueType: conf.P2P.QueueType, - HandshakeTimeout: conf.P2P.HandshakeTimeout, - DialTimeout: conf.P2P.DialTimeout, + QueueType: conf.P2P.QueueType, + HandshakeTimeout: conf.P2P.HandshakeTimeout, + DialTimeout: conf.P2P.DialTimeout, + IncomingConnectionWindow: conf.P2P.IncomingConnectionWindow, } if conf.FilterPeers && appClient != nil { diff --git a/node/node_test.go b/node/node_test.go index 7a7b67cbab..2800f3f0a4 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -636,6 +636,7 @@ func TestNodeNewSeedNode(t *testing.T) { logger := log.NewNopLogger() ns, err := makeSeedNode( + ctx, logger, cfg, config.DefaultDBProvider, diff --git a/node/public.go b/node/public.go index dbdd20d6ba..66b7492700 100644 --- a/node/public.go +++ b/node/public.go @@ -63,7 +63,7 @@ func New( config.DefaultDBProvider, logger) case config.ModeSeed: - return makeSeedNode(logger, conf, config.DefaultDBProvider, nodeKey, genProvider) + return makeSeedNode(ctx, logger, conf, config.DefaultDBProvider, nodeKey, genProvider) default: return nil, fmt.Errorf("%q is not a valid mode", conf.Mode) } diff --git a/node/seed.go b/node/seed.go index 194bf4dbcb..bd9b37cf3a 100644 --- a/node/seed.go +++ b/node/seed.go @@ -38,6 +38,7 @@ type seedNodeImpl struct { // makeSeedNode returns a new seed node, containing only p2p, pex reactor func makeSeedNode( + ctx context.Context, logger log.Logger, cfg *config.Config, dbProvider config.DBProvider, @@ -62,7 +63,7 @@ func makeSeedNode( // Setup Transport and Switch. p2pMetrics := p2p.PrometheusMetrics(cfg.Instrumentation.Namespace, "chain_id", genDoc.ChainID) - peerManager, closer, err := createPeerManager(cfg, dbProvider, nodeKey.ID, p2pMetrics) + peerManager, closer, err := createPeerManager(ctx, cfg, dbProvider, nodeKey.ID, p2pMetrics, logger) if err != nil { return nil, combineCloseError( fmt.Errorf("failed to create peer manager: %w", err), diff --git a/node/setup.go b/node/setup.go index 8bf57ed0f8..7e26b18938 100644 --- a/node/setup.go +++ b/node/setup.go @@ -35,7 +35,7 @@ import ( "github.com/tendermint/tendermint/types" "github.com/tendermint/tendermint/version" - _ "net/http/pprof" // nolint: gosec // securely exposed on separate, optional port + _ "net/http/pprof" //nolint: gosec // securely exposed on separate, optional port ) type closer func() error @@ -193,10 +193,12 @@ func createEvidenceReactor( } func createPeerManager( + ctx context.Context, cfg *config.Config, dbProvider config.DBProvider, nodeID types.NodeID, metrics *p2p.Metrics, + logger log.Logger, ) (*p2p.PeerManager, closer, error) { selfAddr, err := p2p.ParseNodeAddress(nodeID.AddressString(cfg.P2P.ExternalAddress)) @@ -229,18 +231,19 @@ func createPeerManager( maxUpgradeConns := uint16(4) options := p2p.PeerManagerOptions{ - SelfAddress: selfAddr, - MaxConnected: maxConns, - MaxOutgoingConnections: maxOutgoingConns, - MaxConnectedUpgrade: maxUpgradeConns, - DisconnectCooldownPeriod: 2 * time.Second, - MaxPeers: maxUpgradeConns + 4*maxConns, - MinRetryTime: 250 * time.Millisecond, - MaxRetryTime: 30 * time.Minute, - MaxRetryTimePersistent: 5 * time.Minute, - RetryTimeJitter: 5 * time.Second, - PrivatePeers: privatePeerIDs, - Metrics: metrics, + SelfAddress: selfAddr, + MaxConnected: maxConns, + MaxOutgoingConnections: maxOutgoingConns, + MaxIncomingConnectionTime: cfg.P2P.MaxIncomingConnectionTime, + MaxConnectedUpgrade: maxUpgradeConns, + DisconnectCooldownPeriod: 2 * time.Second, + MaxPeers: maxUpgradeConns + 4*maxConns, + MinRetryTime: 250 * time.Millisecond, + MaxRetryTime: 30 * time.Minute, + MaxRetryTimePersistent: 5 * time.Minute, + RetryTimeJitter: 5 * time.Second, + PrivatePeers: privatePeerIDs, + Metrics: metrics, } peers := []p2p.NodeAddress{} @@ -267,18 +270,22 @@ func createPeerManager( return nil, func() error { return nil }, fmt.Errorf("unable to initialize peer store: %w", err) } - peerManager, err := p2p.NewPeerManager(nodeID, peerDB, options) + peerManager, err := p2p.NewPeerManager(ctx, nodeID, peerDB, options) if err != nil { return nil, peerDB.Close, fmt.Errorf("failed to create peer manager: %w", err) } - + peerManager.SetLogger(logger.With("module", "peermanager")) + closer := func() error { + peerManager.Close() + return peerDB.Close() + } for _, peer := range peers { if _, err := peerManager.Add(peer); err != nil { - return nil, peerDB.Close, fmt.Errorf("failed to add peer %q: %w", peer, err) + return nil, closer, fmt.Errorf("failed to add peer %q: %w", peer, err) } } - return peerManager, peerDB.Close, nil + return peerManager, closer, nil } func createRouter( diff --git a/scripts/release/release.sh b/scripts/release/release.sh index f6a3ceaab8..2a9dbbd45c 100755 --- a/scripts/release/release.sh +++ b/scripts/release/release.sh @@ -233,8 +233,9 @@ function createRelease() { --draft \ --title "v${NEW_PACKAGE_VERSION}" \ --generate-notes \ - $gh_args \ - "v${NEW_PACKAGE_VERSION}" + --target "${TARGET_BRANCH}" \ + ${gh_args} \ + "v${NEW_PACKAGE_VERSION}" } function deleteRelease() { @@ -279,7 +280,14 @@ PR_URL="$(getPrURL)" success "New release branch ${RELEASE_BRANCH} for ${NEW_PACKAGE_VERSION} prepared successfully." success "Release PR: ${PR_URL}" -success "Please review it, merge and create a release in Github." + +success "Please review it and merge." + +if [[ "${RELEASE_TYPE}" = "prerelease" ]] ; then + success "NOTE: Use 'squash and merge' approach." +else + success "NOTE: Use 'create merge commit' approach." +fi waitForMerge diff --git a/test/e2e/networks/dashcore.toml b/test/e2e/networks/dashcore.toml index bfb2724e65..e403206c16 100644 --- a/test/e2e/networks/dashcore.toml +++ b/test/e2e/networks/dashcore.toml @@ -37,6 +37,12 @@ validator05 = 100 [node.seed01] mode = "seed" perturb = ["restart"] +persistent_peers = ["validator01"] + +p2p_max_connections = 4 +p2p_max_outgoing_connections = 2 +p2p_max_incoming_connection_time = "5s" +p2p_incoming_connection_window = "10s" [node.validator01] seeds = ["seed01"] diff --git a/test/e2e/networks/rotate.toml b/test/e2e/networks/rotate.toml index fe230a2a35..2c4e881dcb 100644 --- a/test/e2e/networks/rotate.toml +++ b/test/e2e/networks/rotate.toml @@ -49,6 +49,12 @@ validator09 = 100 [node.seed01] mode = "seed" perturb = ["restart"] +persistent_peers = ["validator01"] + +p2p_max_connections = 4 +p2p_max_outgoing_connections = 2 +p2p_max_incoming_connection_time = "5s" +p2p_incoming_connection_window = "10s" [node.validator01] seeds = ["seed01"] diff --git a/test/e2e/pkg/exec/exec.go b/test/e2e/pkg/exec/exec.go index 9dcd793844..6a00afc88f 100644 --- a/test/e2e/pkg/exec/exec.go +++ b/test/e2e/pkg/exec/exec.go @@ -1,15 +1,18 @@ package exec import ( + "bytes" "context" "fmt" + "io" "os" osexec "os/exec" + "time" ) // Command executes a shell command. func Command(ctx context.Context, args ...string) error { - // nolint: gosec + //nolint: gosec // G204: Subprocess launched with a potential tainted input or cmd arguments cmd := osexec.CommandContext(ctx, args[0], args[1:]...) out, err := cmd.CombinedOutput() @@ -25,10 +28,49 @@ func Command(ctx context.Context, args ...string) error { // CommandVerbose executes a shell command while displaying its output. func CommandVerbose(ctx context.Context, args ...string) error { - // nolint: gosec + //nolint: gosec // G204: Subprocess launched with a potential tainted input or cmd arguments cmd := osexec.CommandContext(ctx, args[0], args[1:]...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr + now := time.Now() + cmd.Stdout = &tsWriter{out: os.Stdout, start: now} + cmd.Stderr = &tsWriter{out: os.Stderr, start: now} return cmd.Run() } + +// tsWriter prepends each item in written data with current timestamp. +// It is used mainly to add info about execution time to output of `e2e runner test` +type tsWriter struct { + out io.Writer + start time.Time + tsAdded bool // tsAdded is true if timestamp was already added to current line +} + +// Write implements io.Writer +func (w *tsWriter) Write(p []byte) (n int, err error) { + for n = 0; n < len(p); { + if !w.tsAdded { + took := time.Since(w.start) + ts := fmt.Sprintf("%09.5fs ", took.Seconds()) + if _, err := w.out.Write([]byte(ts)); err != nil { + return n, err + } + w.tsAdded = true + } + + index := bytes.IndexByte(p[n:], '\n') + if index < 0 { + // not found + index = len(p) - 1 - n + } else { + // we have \n, let's add timestamp in next loop + w.tsAdded = false + } + w, err := w.out.Write(p[n : n+index+1]) + n += w + if err != nil { + return n, err + } + } + + return n, nil +} diff --git a/test/e2e/pkg/exec/exec_test.go b/test/e2e/pkg/exec/exec_test.go new file mode 100644 index 0000000000..7fa03416e6 --- /dev/null +++ b/test/e2e/pkg/exec/exec_test.go @@ -0,0 +1,80 @@ +package exec + +import ( + "bytes" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +const timePrefixLen = 11 + +func TestTSWriter(t *testing.T) { + // length of time added to output + type testCase struct { + input []byte + expectLen int + } + + testCases := []testCase{ + {nil, 0}, + {[]byte{}, 0}, + {[]byte{'\n'}, 1}, + {[]byte("hi"), timePrefixLen + 2}, + {[]byte("hi\n"), timePrefixLen + 3}, + {[]byte("test\nnew\nlines\n\n\nWonder if it will work"), timePrefixLen * 6}, + } + + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + buf := bytes.Buffer{} + + writer := &tsWriter{out: &buf, start: time.Now()} + _, err := writer.Write(tc.input) + assert.NoError(t, err) + + out := buf.Bytes() + newlines := bytes.Count(tc.input, []byte{'\n'}) + if len(tc.input) > 0 && tc.input[len(tc.input)-1] != '\n' { + //Add initial new line. + // We don't add it if last char is a new line, as it will only switch the flag to add prefix in next Write() + newlines++ + } + assert.Len(t, out, len(tc.input)+newlines*timePrefixLen, "new lines: %d", newlines) + }) + } +} + +func TestTSWriterMultiline(t *testing.T) { + tc := [][]byte{ + []byte("Hi\n"), + []byte("My name is "), + []byte("John Doe."), + []byte("\n"), + []byte("\n"), + []byte("I like drinking coffee.\n"), + []byte("\n"), + {}, + []byte("\n\n\n"), + []byte("This "), + nil, + []byte("is "), + []byte("all "), + []byte("\nfor today."), + nil, + {}, + } + expectLen := 76 + 10*timePrefixLen + + buf := bytes.Buffer{} + + writer := &tsWriter{out: &buf, start: time.Now()} + for _, item := range tc { + _, err := writer.Write(item) + assert.NoError(t, err) + } + out := buf.Bytes() + assert.Len(t, out, expectLen) + t.Log("\n" + string(out)) +} diff --git a/test/e2e/pkg/manifest.go b/test/e2e/pkg/manifest.go index 888dd592b0..cc768f8784 100644 --- a/test/e2e/pkg/manifest.go +++ b/test/e2e/pkg/manifest.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "sort" + "time" "github.com/BurntSushi/toml" @@ -165,6 +166,11 @@ type ManifestNode struct { // SnapshotInterval and EvidenceAgeHeight. RetainBlocks uint64 `toml:"retain_blocks"` + P2PMaxConnections uint16 `toml:"p2p_max_connections"` + P2PMaxOutgoingConnections uint16 `toml:"p2p_max_outgoing_connections"` + P2PMaxIncomingConnectionTime time.Duration `toml:"p2p_max_incoming_connection_time"` + P2PIncomingConnectionWindow time.Duration `toml:"p2p_incoming_connection_window"` + // Perturb lists perturbations to apply to the node after it has been // started and synced with the network: // diff --git a/test/e2e/pkg/testnet.go b/test/e2e/pkg/testnet.go index 370636e195..6f04e36c75 100644 --- a/test/e2e/pkg/testnet.go +++ b/test/e2e/pkg/testnet.go @@ -105,29 +105,33 @@ type Testnet struct { // Node represents a Tenderdash node in a testnet. type Node struct { - Name string - Testnet *Testnet - Mode Mode - PrivvalKeys map[string]crypto.QuorumKeys - PrivvalUpdateHeights map[string]crypto.QuorumHash - NodeKey crypto.PrivKey - ProTxHash crypto.ProTxHash - IP net.IP - ProxyPort uint32 - StartAt int64 - Mempool string - StateSync string - Database string - PrivvalProtocol Protocol - PersistInterval uint64 - SnapshotInterval uint64 - RetainBlocks uint64 - Seeds []*Node - PersistentPeers []*Node - Perturbations []Perturbation - LogLevel string - QueueType string - HasStarted bool + Name string + Testnet *Testnet + Mode Mode + PrivvalKeys map[string]crypto.QuorumKeys + PrivvalUpdateHeights map[string]crypto.QuorumHash + NodeKey crypto.PrivKey + ProTxHash crypto.ProTxHash + IP net.IP + ProxyPort uint32 + StartAt int64 + Mempool string + StateSync string + Database string + PrivvalProtocol Protocol + PersistInterval uint64 + SnapshotInterval uint64 + RetainBlocks uint64 + P2PMaxConnections uint16 + P2PMaxOutgoingConnections uint16 + P2PMaxIncomingConnectionTime time.Duration + P2PIncomingConnectionWindow time.Duration + Seeds []*Node + PersistentPeers []*Node + Perturbations []Perturbation + LogLevel string + QueueType string + HasStarted bool } // LoadTestnet loads a testnet from a manifest file, using the filename to @@ -274,6 +278,19 @@ func LoadTestnet(file string) (*Testnet, error) { if nodeManifest.PersistInterval != nil { node.PersistInterval = *nodeManifest.PersistInterval } + if nodeManifest.P2PMaxConnections > 0 { + node.P2PMaxConnections = nodeManifest.P2PMaxConnections + } + if nodeManifest.P2PMaxOutgoingConnections > 0 { + node.P2PMaxOutgoingConnections = nodeManifest.P2PMaxOutgoingConnections + } + if nodeManifest.P2PMaxIncomingConnectionTime > 0 { + node.P2PMaxIncomingConnectionTime = nodeManifest.P2PMaxIncomingConnectionTime + } + if nodeManifest.P2PIncomingConnectionWindow > 0 { + node.P2PIncomingConnectionWindow = nodeManifest.P2PIncomingConnectionWindow + } + for _, p := range nodeManifest.Perturb { node.Perturbations = append(node.Perturbations, Perturbation(p)) } @@ -504,7 +521,7 @@ func (n Node) Validate(testnet Testnet) error { return fmt.Errorf("unsupported p2p queue type: %s", n.QueueType) } switch n.Database { - case "goleveldb", "cleveldb", "boltdb", "badgerdb": + case "goleveldb", "cleveldb", "boltdb", "badgerdb", "memdb": default: return fmt.Errorf("invalid database setting %q", n.Database) } diff --git a/test/e2e/runner/setup.go b/test/e2e/runner/setup.go index ce497c0057..4ba9cfb250 100644 --- a/test/e2e/runner/setup.go +++ b/test/e2e/runner/setup.go @@ -296,12 +296,20 @@ func MakeConfig(node *e2e.Node) (*config.Config, error) { } } - cfg.P2P.PersistentPeers = "" - for _, peer := range node.PersistentPeers { - if len(cfg.P2P.PersistentPeers) > 0 { - cfg.P2P.PersistentPeers += "," - } - cfg.P2P.PersistentPeers += peer.AddressP2P(true) + cfg.P2P.PersistentPeers = joinNodeP2PAddresses(node.PersistentPeers, true, ",") + cfg.P2P.BootstrapPeers = joinNodeP2PAddresses(node.Seeds, true, ",") + + if node.P2PMaxConnections > 0 { + cfg.P2P.MaxConnections = node.P2PMaxConnections + } + if node.P2PMaxOutgoingConnections > 0 { + cfg.P2P.MaxOutgoingConnections = node.P2PMaxOutgoingConnections + } + if node.P2PMaxIncomingConnectionTime > 0 { + cfg.P2P.MaxIncomingConnectionTime = node.P2PMaxIncomingConnectionTime + } + if node.P2PIncomingConnectionWindow > 0 { + cfg.P2P.IncomingConnectionWindow = node.P2PIncomingConnectionWindow } cfg.Instrumentation.Prometheus = true @@ -309,6 +317,14 @@ func MakeConfig(node *e2e.Node) (*config.Config, error) { return cfg, nil } +func joinNodeP2PAddresses(nodes []*e2e.Node, withID bool, sep string) string { + addresses := []string{} + for _, node := range nodes { + addresses = append(addresses, node.AddressP2P(withID)) + } + return strings.Join(addresses, sep) +} + // MakeAppConfig generates an ABCI application config for a node. func MakeAppConfig(node *e2e.Node) ([]byte, error) { cfg := map[string]interface{}{ diff --git a/third_party/bls-signatures/build.sh b/third_party/bls-signatures/build.sh index 7586d2d06b..cb5252952f 100755 --- a/third_party/bls-signatures/build.sh +++ b/third_party/bls-signatures/build.sh @@ -1,32 +1,32 @@ #!/bin/bash -set -e - SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" -SRC_PATH="$SCRIPT_PATH/src" -BUILD_PATH="$SCRIPT_PATH/build" +SRC_PATH="${SCRIPT_PATH}/src" +BUILD_PATH="${SCRIPT_PATH}/build" BLS_SM_PATH="third_party/bls-signatures/src" BLS_GIT_REPO="https://github.com/dashpay/bls-signatures.git" BLS_GIT_BRANCH="develop" -git submodule update --init $BLS_SM_PATH -if [ $? -ne 0 ]; then +set -ex + +if ! git submodule update --init "${BLS_SM_PATH}" ; then echo "It looks like this source code is not tracked by git." - echo "As a fallback scenario we will fetch \"$BLS_GIT_BRANCH\" branch \"$BLS_GIT_REPO\" library." + echo "As a fallback scenario we will fetch \"${BLS_GIT_BRANCH}\" branch \"${BLS_GIT_REPO}\" library." echo "We would recommend to clone of this project rather than using a release archive." - git clone --single-branch --branch $BLS_GIT_BRANCH $BLS_GIT_REPO $BLS_SM_PATH + rm -r "${BLS_SM_PATH}" || true + git clone --single-branch --branch "${BLS_GIT_BRANCH}" "${BLS_GIT_REPO}" "${BLS_SM_PATH}" fi # Create folders for source and build data -mkdir -p $BUILD_PATH +mkdir -p "${BUILD_PATH}" # Configurate the library build -cmake -B $BUILD_PATH -S $SRC_PATH +cmake -B "${BUILD_PATH}" -S "${SRC_PATH}" # Build the library -cmake --build $BUILD_PATH -- -j 6 +cmake --build "${BUILD_PATH}" -- -j 6 -mkdir -p $BUILD_PATH/src/bls-dash -cp -r $SRC_PATH/src/* $BUILD_PATH/src/bls-dash +mkdir -p "${BUILD_PATH}/src/bls-dash" +cp -r ${SRC_PATH}/src/* "${BUILD_PATH}/src/bls-dash" exit 0