Skip to content

Commit

Permalink
op-challenger: Fix highest acted L1 block metric for completed games (e…
Browse files Browse the repository at this point in the history
…thereum-optimism#10549)

* op-challenger: Fix highest acted L1 block metric for completed games

Adds an e2e test to prove it really does work.

* op-challenger: Add more context to comment.
  • Loading branch information
ajsutton authored May 15, 2024
1 parent fa88d9c commit 27d2f16
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 23 deletions.
5 changes: 3 additions & 2 deletions op-challenger/challenger.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package op_challenger
import (
"context"

"github.com/ethereum-optimism/optimism/op-challenger/metrics"
"github.com/ethereum/go-ethereum/log"

"github.com/ethereum-optimism/optimism/op-challenger/config"
Expand All @@ -11,9 +12,9 @@ import (
)

// Main is the programmatic entry-point for running op-challenger with a given configuration.
func Main(ctx context.Context, logger log.Logger, cfg *config.Config) (cliapp.Lifecycle, error) {
func Main(ctx context.Context, logger log.Logger, cfg *config.Config, m metrics.Metricer) (cliapp.Lifecycle, error) {
if err := cfg.Check(); err != nil {
return nil, err
}
return game.NewService(ctx, logger, cfg)
return game.NewService(ctx, logger, cfg, m)
}
3 changes: 2 additions & 1 deletion op-challenger/challenger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ import (
"testing"

"github.com/ethereum-optimism/optimism/op-challenger/config"
"github.com/ethereum-optimism/optimism/op-challenger/metrics"
"github.com/ethereum-optimism/optimism/op-service/testlog"
"github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require"
)

func TestMainShouldReturnErrorWhenConfigInvalid(t *testing.T) {
cfg := &config.Config{}
app, err := Main(context.Background(), testlog.Logger(t, log.LevelInfo), cfg)
app, err := Main(context.Background(), testlog.Logger(t, log.LevelInfo), cfg, metrics.NoopMetrics)
require.ErrorIs(t, err, cfg.Check())
require.Nil(t, app)
}
5 changes: 4 additions & 1 deletion op-challenger/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"os"

"github.com/ethereum-optimism/optimism/op-challenger/metrics"
"github.com/urfave/cli/v2"

"github.com/ethereum/go-ethereum/log"
Expand All @@ -29,7 +30,9 @@ var VersionWithMeta = opservice.FormatVersion(version.Version, GitCommit, GitDat
func main() {
args := os.Args
ctx := opio.WithInterruptBlocker(context.Background())
if err := run(ctx, args, challenger.Main); err != nil {
if err := run(ctx, args, func(ctx context.Context, l log.Logger, config *config.Config) (cliapp.Lifecycle, error) {
return challenger.Main(ctx, l, config, metrics.NewMetrics())
}); err != nil {
log.Crit("Application failed", "err", err)
}
}
Expand Down
2 changes: 1 addition & 1 deletion op-challenger/game/scheduler/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,12 @@ func (c *coordinator) createJob(ctx context.Context, game types.GameMetadata, bl
state.player = player
state.status = player.Status()
}
state.inflight = true
if state.status != types.GameStatusInProgress {
c.logger.Debug("Not rescheduling resolved game", "game", game.Proxy, "status", state.status)
state.lastProcessedBlockNum = blockNumber
return nil, nil
}
state.inflight = true
return newJob(blockNumber, game.Proxy, state.player, state.status), nil
}

Expand Down
48 changes: 41 additions & 7 deletions op-challenger/game/scheduler/coordinator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,29 +204,63 @@ func TestDeleteDataForResolvedGames(t *testing.T) {

require.True(t, disk.gameDirExists[gameAddr1], "game 1 data should be preserved (not resolved)")
require.False(t, disk.gameDirExists[gameAddr2], "game 2 data should be deleted")
require.True(t, disk.gameDirExists[gameAddr3], "game 3 data should be preserved (inflight)")
// Game 3 never got marked as in-flight because it was already resolved so got skipped.
// We shouldn't be able to have a known-resolved game that is also in-flight because we always skip processing it.
require.False(t, disk.gameDirExists[gameAddr3], "game 3 data should be deleted")
}

func TestSchedule_RecordActedL1Block(t *testing.T) {
c, workQueue, _, _, _, _ := setupCoordinatorTest(t, 10)
gameAddr3 := common.Address{0xcc}
gameAddr1 := common.Address{0xaa}
gameAddr2 := common.Address{0xcc}
ctx := context.Background()

// The first game should be tracked
require.NoError(t, c.schedule(ctx, asGames(gameAddr3), 1))
require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 1))

// Process the result
require.Len(t, workQueue, 1)
require.Len(t, workQueue, 2)
j := <-workQueue
require.Equal(t, gameAddr1, j.addr)
j.status = types.GameStatusDefenderWon
require.NoError(t, c.processResult(j))
j = <-workQueue
require.Equal(t, gameAddr2, j.addr)
j.status = types.GameStatusInProgress
require.NoError(t, c.processResult(j))

// Schedule another block
require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 2))

// Process the result (only the in-progress game gets rescheduled)
require.Len(t, workQueue, 1)
j = <-workQueue
require.Equal(t, gameAddr2, j.addr)
require.Equal(t, uint64(2), j.block)
j.status = types.GameStatusInProgress
require.NoError(t, c.processResult(j))

// Schedule a third block
require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 3))

// Process the result (only the in-progress game gets rescheduled)
// This is deliberately done a third time, because there was actually a bug where it worked for the first two
// cycles and failed on the third. This was because the first cycle the game status was unknown so it was processed
// the second cycle was the first time the game was known to be complete so was skipped but crucially it left it
// marked as in-flight. On the third update the was incorrectly skipped as in-flight and the l1 block number
// wasn't updated. From then on the block number would never be updated.
require.Len(t, workQueue, 1)
j = <-workQueue
require.Equal(t, gameAddr2, j.addr)
require.Equal(t, uint64(3), j.block)
j.status = types.GameStatusInProgress
require.NoError(t, c.processResult(j))

// Schedule so that the metric is updated
require.NoError(t, c.schedule(ctx, asGames(gameAddr3), 2))
require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 4))

// Verify that the block number is recorded by the metricer as acted upon
// The one game is now complete so its block number is updated immediately because it doesn't get scheduled
require.Equal(t, uint64(2), c.m.(*stubSchedulerMetrics).actedL1Blocks)
require.Equal(t, uint64(3), c.m.(*stubSchedulerMetrics).actedL1Blocks)
}

func TestSchedule_RecordActedL1BlockMultipleGames(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions op-challenger/game/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ type Service struct {
}

// NewService creates a new Service.
func NewService(ctx context.Context, logger log.Logger, cfg *config.Config) (*Service, error) {
func NewService(ctx context.Context, logger log.Logger, cfg *config.Config, m metrics.Metricer) (*Service, error) {
s := &Service{
systemClock: clock.SystemClock,
l1Clock: clock.NewSimpleClock(),
logger: logger,
metrics: metrics.NewMetrics(),
metrics: m,
}

if err := s.initFromConfig(ctx, cfg); err != nil {
Expand Down
35 changes: 26 additions & 9 deletions op-e2e/e2eutils/challenger/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"time"

"github.com/ethereum-optimism/optimism/op-service/metrics"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/stretchr/testify/require"

"github.com/ethereum/go-ethereum/common"
Expand Down Expand Up @@ -39,15 +40,17 @@ type Helper struct {
require *require.Assertions
dir string
chl cliapp.Lifecycle
metrics *CapturingMetrics
}

func NewHelper(log log.Logger, t *testing.T, require *require.Assertions, dir string, chl cliapp.Lifecycle) *Helper {
func NewHelper(log log.Logger, t *testing.T, require *require.Assertions, dir string, chl cliapp.Lifecycle, m *CapturingMetrics) *Helper {
return &Helper{
log: log,
t: t,
require: require,
dir: dir,
chl: chl,
metrics: m,
}
}

Expand Down Expand Up @@ -134,17 +137,13 @@ func NewChallenger(t *testing.T, ctx context.Context, sys EndpointProvider, name
log := testlog.Logger(t, log.LevelDebug).New("role", name)
log.Info("Creating challenger")
cfg := NewChallengerConfig(t, sys, "sequencer", options...)
chl, err := challenger.Main(ctx, log, cfg)
cfg.MetricsConfig.Enabled = false // Don't start the metrics server
m := NewCapturingMetrics()
chl, err := challenger.Main(ctx, log, cfg, m)
require.NoError(t, err, "must init challenger")
require.NoError(t, chl.Start(ctx), "must start challenger")

return &Helper{
log: log,
t: t,
require: require.New(t),
dir: cfg.Datadir,
chl: chl,
}
return NewHelper(log, t, require.New(t), cfg.Datadir, chl, m)
}

func NewChallengerConfig(t *testing.T, sys EndpointProvider, l2NodeName string, options ...Option) *config.Config {
Expand Down Expand Up @@ -234,3 +233,21 @@ func (h *Helper) WaitForGameDataDeletion(ctx context.Context, games ...GameAddr)
func (h *Helper) gameDataDir(addr common.Address) string {
return filepath.Join(h.dir, "game-"+addr.Hex())
}

func (h *Helper) WaitL1HeadActedOn(ctx context.Context, client *ethclient.Client) {
l1Head, err := client.BlockNumber(ctx)
h.require.NoError(err)
h.WaitForHighestActedL1Block(ctx, l1Head)
}

func (h *Helper) WaitForHighestActedL1Block(ctx context.Context, head uint64) {
timedCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
var actual uint64
err := wait.For(timedCtx, time.Second, func() (bool, error) {
actual = h.metrics.HighestActedL1Block.Load()
h.log.Info("Waiting for highest acted L1 block", "target", head, "actual", actual)
return actual >= head, nil
})
h.require.NoErrorf(err, "Highest acted L1 block did not reach %v, was: %v", head, actual)
}
23 changes: 23 additions & 0 deletions op-e2e/e2eutils/challenger/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package challenger

import (
"sync/atomic"

"github.com/ethereum-optimism/optimism/op-challenger/metrics"
)

type CapturingMetrics struct {
metrics.NoopMetricsImpl

HighestActedL1Block atomic.Uint64
}

func NewCapturingMetrics() *CapturingMetrics {
return &CapturingMetrics{}
}

var _ metrics.Metricer = (*CapturingMetrics)(nil)

func (c *CapturingMetrics) RecordActedL1Block(block uint64) {
c.HighestActedL1Block.Store(block)
}
24 changes: 24 additions & 0 deletions op-e2e/faultproofs/output_alphabet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,27 @@ func TestOutputAlphabetGame_FreeloaderEarnsNothing(t *testing.T) {
amt := game.Credit(ctx, freeloaderOpts.From)
require.True(t, amt.BitLen() == 0, "freeloaders should not be rewarded")
}

func TestHighestActedL1BlockMetric(t *testing.T) {
op_e2e.InitParallel(t)
ctx := context.Background()
sys, l1Client := StartFaultDisputeSystem(t)
t.Cleanup(sys.Close)

disputeGameFactory := disputegame.NewFactoryHelper(t, ctx, sys)
honestChallenger := disputeGameFactory.StartChallenger(ctx, "Honest", challenger.WithAlphabet(), challenger.WithPrivKey(sys.Cfg.Secrets.Alice))

game1 := disputeGameFactory.StartOutputAlphabetGame(ctx, "sequencer", 1, common.Hash{0xaa})
sys.AdvanceTime(game1.MaxClockDuration(ctx))
require.NoError(t, wait.ForNextBlock(ctx, l1Client))

game1.WaitForGameStatus(ctx, disputegame.StatusDefenderWins)

disputeGameFactory.StartOutputAlphabetGame(ctx, "sequencer", 2, common.Hash{0xaa})
disputeGameFactory.StartOutputAlphabetGame(ctx, "sequencer", 3, common.Hash{0xaa})

honestChallenger.WaitL1HeadActedOn(ctx, l1Client)

require.NoError(t, wait.ForNextBlock(ctx, l1Client))
honestChallenger.WaitL1HeadActedOn(ctx, l1Client)
}

0 comments on commit 27d2f16

Please sign in to comment.