From ddd1e3e7573a3c2e31fd8364f68671e55d466e5c Mon Sep 17 00:00:00 2001 From: Diego Ximenes Date: Mon, 20 Jan 2025 16:31:41 -0300 Subject: [PATCH 1/4] Flushed TrieDB during maintenance --- arbnode/maintenance.go | 17 ++++++---- execution/gethexec/executionengine.go | 5 +++ execution/gethexec/node.go | 6 +++- execution/interface.go | 2 +- system_tests/maintenance_test.go | 46 +++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 system_tests/maintenance_test.go diff --git a/arbnode/maintenance.go b/arbnode/maintenance.go index 5e4e56b577..d7bfba686f 100644 --- a/arbnode/maintenance.go +++ b/arbnode/maintenance.go @@ -36,8 +36,9 @@ type MaintenanceRunner struct { } type MaintenanceConfig struct { - TimeOfDay string `koanf:"time-of-day" reload:"hot"` - Lock redislock.SimpleCfg `koanf:"lock" reload:"hot"` + TimeOfDay string `koanf:"time-of-day" reload:"hot"` + Lock redislock.SimpleCfg `koanf:"lock" reload:"hot"` + TrieDBCapLimit int64 `koanf:"triedb-cap-limit" reload:"hot"` // Generated: the minutes since start of UTC day to compact at minutesAfterMidnight int @@ -75,12 +76,14 @@ func (c *MaintenanceConfig) Validate() error { func MaintenanceConfigAddOptions(prefix string, f *flag.FlagSet) { f.String(prefix+".time-of-day", DefaultMaintenanceConfig.TimeOfDay, "UTC 24-hour time of day to run maintenance (currently only db compaction) at (e.g. 15:00)") + f.Int(prefix+".triedb-cap-limit", int(DefaultMaintenanceConfig.TrieDBCapLimit), "amount of memory in bytes to be used in the TrieDB Cap operation") redislock.AddConfigOptions(prefix+".lock", f) } var DefaultMaintenanceConfig = MaintenanceConfig{ - TimeOfDay: "", - Lock: redislock.DefaultCfg, + TimeOfDay: "", + TrieDBCapLimit: 100 * 1024 * 1024, + Lock: redislock.DefaultCfg, minutesAfterMidnight: 0, } @@ -171,7 +174,7 @@ func (mr *MaintenanceRunner) maybeRunMaintenance(ctx context.Context) time.Durat } func (mr *MaintenanceRunner) runMaintenance() { - log.Info("Compacting databases (this may take a while...)") + log.Info("Compacting databases and flushing triedb to disk (this may take a while...)") results := make(chan error, len(mr.dbs)) expected := 0 for _, db := range mr.dbs { @@ -183,7 +186,7 @@ func (mr *MaintenanceRunner) runMaintenance() { } expected++ go func() { - results <- mr.exec.Maintenance() + results <- mr.exec.Maintenance(mr.config().TrieDBCapLimit) }() for i := 0; i < expected; i++ { err := <-results @@ -191,5 +194,5 @@ func (mr *MaintenanceRunner) runMaintenance() { log.Warn("maintenance error", "err", err) } } - log.Info("Done compacting databases") + log.Info("Done compacting databases and flushing triedb to disk") } diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index e606027419..9c5815a58e 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -29,6 +29,7 @@ import ( "github.com/google/uuid" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" @@ -1009,3 +1010,7 @@ func (s *ExecutionEngine) Start(ctx_in context.Context) { }) } } + +func (s *ExecutionEngine) Maintenance(capLimit int64) error { + return s.bc.FlushTrieDB(&s.createBlocksMutex, common.StorageSize(capLimit)) +} diff --git a/execution/gethexec/node.go b/execution/gethexec/node.go index 5030de0cfa..044ddab7a1 100644 --- a/execution/gethexec/node.go +++ b/execution/gethexec/node.go @@ -453,7 +453,11 @@ func (n *ExecutionNode) MessageIndexToBlockNumber(messageNum arbutil.MessageInde return n.ExecEngine.MessageIndexToBlockNumber(messageNum) } -func (n *ExecutionNode) Maintenance() error { +func (n *ExecutionNode) Maintenance(capLimit int64) error { + err := n.ExecEngine.Maintenance(capLimit) + if err != nil { + return err + } return n.ChainDB.Compact(nil, nil) } diff --git a/execution/interface.go b/execution/interface.go index c0aa71c146..c6e030835d 100644 --- a/execution/interface.go +++ b/execution/interface.go @@ -68,7 +68,7 @@ type FullExecutionClient interface { Start(ctx context.Context) error StopAndWait() - Maintenance() error + Maintenance(capLimit int64) error ArbOSVersionForMessageNumber(messageNum arbutil.MessageIndex) (uint64, error) } diff --git a/system_tests/maintenance_test.go b/system_tests/maintenance_test.go new file mode 100644 index 0000000000..ad591ca9ce --- /dev/null +++ b/system_tests/maintenance_test.go @@ -0,0 +1,46 @@ +// Copyright 2021-2025, Offchain Labs, Inc. +// For license information, see https://github.com/OffchainLabs/nitro/blob/master/LICENSE + +package arbtest + +import ( + "context" + "fmt" + "math/big" + "testing" +) + +func TestMaintenance(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + builder := NewNodeBuilder(ctx).DefaultConfig(t, false) + cleanup := builder.Build(t) + defer cleanup() + + numberOfTransfers := 10 + for i := 2; i < 3+numberOfTransfers; i++ { + account := fmt.Sprintf("User%d", i) + builder.L2Info.GenerateAccount(account) + + tx := builder.L2Info.PrepareTx("Owner", account, builder.L2Info.TransferGas, big.NewInt(1e12), nil) + err := builder.L2.Client.SendTransaction(ctx, tx) + Require(t, err) + _, err = builder.L2.EnsureTxSucceeded(tx) + Require(t, err) + } + + err := builder.L2.ExecNode.Maintenance(100 * 1024 * 1024) + Require(t, err) + + for i := 2; i < 3+numberOfTransfers; i++ { + account := fmt.Sprintf("User%d", i) + balance, err := builder.L2.Client.BalanceAt(ctx, builder.L2Info.GetAddress(account), nil) + Require(t, err) + if balance.Cmp(big.NewInt(int64(1e12))) != 0 { + t.Fatal("Unexpected balance:", balance, "for account:", account) + } + } +} From e9bda8dae0388b3b15d333a20eef9bb5b4b8d7cc Mon Sep 17 00:00:00 2001 From: Diego Ximenes Date: Fri, 24 Jan 2025 10:23:30 -0300 Subject: [PATCH 2/4] Moves maintenance trie cap limit config to execution config --- arbnode/maintenance.go | 13 +++++-------- execution/gethexec/blockchain.go | 3 +++ execution/gethexec/executionengine.go | 2 +- execution/gethexec/node.go | 5 +++-- execution/interface.go | 2 +- system_tests/maintenance_test.go | 2 +- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arbnode/maintenance.go b/arbnode/maintenance.go index d7bfba686f..479188656d 100644 --- a/arbnode/maintenance.go +++ b/arbnode/maintenance.go @@ -36,9 +36,8 @@ type MaintenanceRunner struct { } type MaintenanceConfig struct { - TimeOfDay string `koanf:"time-of-day" reload:"hot"` - Lock redislock.SimpleCfg `koanf:"lock" reload:"hot"` - TrieDBCapLimit int64 `koanf:"triedb-cap-limit" reload:"hot"` + TimeOfDay string `koanf:"time-of-day" reload:"hot"` + Lock redislock.SimpleCfg `koanf:"lock" reload:"hot"` // Generated: the minutes since start of UTC day to compact at minutesAfterMidnight int @@ -76,14 +75,12 @@ func (c *MaintenanceConfig) Validate() error { func MaintenanceConfigAddOptions(prefix string, f *flag.FlagSet) { f.String(prefix+".time-of-day", DefaultMaintenanceConfig.TimeOfDay, "UTC 24-hour time of day to run maintenance (currently only db compaction) at (e.g. 15:00)") - f.Int(prefix+".triedb-cap-limit", int(DefaultMaintenanceConfig.TrieDBCapLimit), "amount of memory in bytes to be used in the TrieDB Cap operation") redislock.AddConfigOptions(prefix+".lock", f) } var DefaultMaintenanceConfig = MaintenanceConfig{ - TimeOfDay: "", - TrieDBCapLimit: 100 * 1024 * 1024, - Lock: redislock.DefaultCfg, + TimeOfDay: "", + Lock: redislock.DefaultCfg, minutesAfterMidnight: 0, } @@ -186,7 +183,7 @@ func (mr *MaintenanceRunner) runMaintenance() { } expected++ go func() { - results <- mr.exec.Maintenance(mr.config().TrieDBCapLimit) + results <- mr.exec.Maintenance() }() for i := 0; i < expected; i++ { err := <-results diff --git a/execution/gethexec/blockchain.go b/execution/gethexec/blockchain.go index 53b494a3c2..9daf76ea78 100644 --- a/execution/gethexec/blockchain.go +++ b/execution/gethexec/blockchain.go @@ -33,6 +33,7 @@ type CachingConfig struct { TrieTimeLimit time.Duration `koanf:"trie-time-limit"` TrieDirtyCache int `koanf:"trie-dirty-cache"` TrieCleanCache int `koanf:"trie-clean-cache"` + TrieCapLimit uint32 `koanf:"trie-cap-limit"` SnapshotCache int `koanf:"snapshot-cache"` DatabaseCache int `koanf:"database-cache"` SnapshotRestoreGasLimit uint64 `koanf:"snapshot-restore-gas-limit"` @@ -53,6 +54,7 @@ func CachingConfigAddOptions(prefix string, f *flag.FlagSet) { f.Int(prefix+".trie-clean-cache", DefaultCachingConfig.TrieCleanCache, "amount of memory in megabytes to cache unchanged state trie nodes with") f.Int(prefix+".snapshot-cache", DefaultCachingConfig.SnapshotCache, "amount of memory in megabytes to cache state snapshots with") f.Int(prefix+".database-cache", DefaultCachingConfig.DatabaseCache, "amount of memory in megabytes to cache database contents with") + f.Uint32(prefix+".trie-cap-limit", DefaultCachingConfig.TrieCapLimit, "amount of memory in megabytes to be used in the TrieDB Cap operation during maintenance") f.Uint64(prefix+".snapshot-restore-gas-limit", DefaultCachingConfig.SnapshotRestoreGasLimit, "maximum gas rolled back to recover snapshot") f.Uint32(prefix+".max-number-of-blocks-to-skip-state-saving", DefaultCachingConfig.MaxNumberOfBlocksToSkipStateSaving, "maximum number of blocks to skip state saving to persistent storage (archive node only) -- warning: this option seems to cause issues") f.Uint64(prefix+".max-amount-of-gas-to-skip-state-saving", DefaultCachingConfig.MaxAmountOfGasToSkipStateSaving, "maximum amount of gas in blocks to skip saving state to Persistent storage (archive node only) -- warning: this option seems to cause issues") @@ -74,6 +76,7 @@ var DefaultCachingConfig = CachingConfig{ TrieTimeLimit: time.Hour, TrieDirtyCache: 1024, TrieCleanCache: 600, + TrieCapLimit: 100 * 1024 * 1024, SnapshotCache: 400, DatabaseCache: 2048, SnapshotRestoreGasLimit: 300_000_000_000, diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index 9c5815a58e..b67dff6a83 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -1011,6 +1011,6 @@ func (s *ExecutionEngine) Start(ctx_in context.Context) { } } -func (s *ExecutionEngine) Maintenance(capLimit int64) error { +func (s *ExecutionEngine) Maintenance(capLimit uint64) error { return s.bc.FlushTrieDB(&s.createBlocksMutex, common.StorageSize(capLimit)) } diff --git a/execution/gethexec/node.go b/execution/gethexec/node.go index 044ddab7a1..d62bfdc963 100644 --- a/execution/gethexec/node.go +++ b/execution/gethexec/node.go @@ -453,8 +453,9 @@ func (n *ExecutionNode) MessageIndexToBlockNumber(messageNum arbutil.MessageInde return n.ExecEngine.MessageIndexToBlockNumber(messageNum) } -func (n *ExecutionNode) Maintenance(capLimit int64) error { - err := n.ExecEngine.Maintenance(capLimit) +func (n *ExecutionNode) Maintenance() error { + trieCapLimitBytes := 1024 * uint64(n.ConfigFetcher().Caching.TrieCapLimit) + err := n.ExecEngine.Maintenance(trieCapLimitBytes) if err != nil { return err } diff --git a/execution/interface.go b/execution/interface.go index c6e030835d..c0aa71c146 100644 --- a/execution/interface.go +++ b/execution/interface.go @@ -68,7 +68,7 @@ type FullExecutionClient interface { Start(ctx context.Context) error StopAndWait() - Maintenance(capLimit int64) error + Maintenance() error ArbOSVersionForMessageNumber(messageNum arbutil.MessageIndex) (uint64, error) } diff --git a/system_tests/maintenance_test.go b/system_tests/maintenance_test.go index ad591ca9ce..a1f36bbcfc 100644 --- a/system_tests/maintenance_test.go +++ b/system_tests/maintenance_test.go @@ -32,7 +32,7 @@ func TestMaintenance(t *testing.T) { Require(t, err) } - err := builder.L2.ExecNode.Maintenance(100 * 1024 * 1024) + err := builder.L2.ExecNode.Maintenance() Require(t, err) for i := 2; i < 3+numberOfTransfers; i++ { From 9663e83b8d26189bbf1eb53cbe90e5da8320f00e Mon Sep 17 00:00:00 2001 From: Diego Ximenes Date: Fri, 24 Jan 2025 10:47:04 -0300 Subject: [PATCH 3/4] Fixes units related to trie cap limit config --- execution/gethexec/blockchain.go | 2 +- execution/gethexec/node.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/execution/gethexec/blockchain.go b/execution/gethexec/blockchain.go index 9daf76ea78..64b6a626de 100644 --- a/execution/gethexec/blockchain.go +++ b/execution/gethexec/blockchain.go @@ -76,7 +76,7 @@ var DefaultCachingConfig = CachingConfig{ TrieTimeLimit: time.Hour, TrieDirtyCache: 1024, TrieCleanCache: 600, - TrieCapLimit: 100 * 1024 * 1024, + TrieCapLimit: 100, SnapshotCache: 400, DatabaseCache: 2048, SnapshotRestoreGasLimit: 300_000_000_000, diff --git a/execution/gethexec/node.go b/execution/gethexec/node.go index d62bfdc963..5336d66d1b 100644 --- a/execution/gethexec/node.go +++ b/execution/gethexec/node.go @@ -29,6 +29,7 @@ import ( "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/execution" "github.com/offchainlabs/nitro/solgen/go/precompilesgen" + "github.com/offchainlabs/nitro/util/arbmath" "github.com/offchainlabs/nitro/util/dbutil" "github.com/offchainlabs/nitro/util/headerreader" ) @@ -454,7 +455,7 @@ func (n *ExecutionNode) MessageIndexToBlockNumber(messageNum arbutil.MessageInde } func (n *ExecutionNode) Maintenance() error { - trieCapLimitBytes := 1024 * uint64(n.ConfigFetcher().Caching.TrieCapLimit) + trieCapLimitBytes := arbmath.SaturatingUMul(uint64(n.ConfigFetcher().Caching.TrieCapLimit), 1024*1024) err := n.ExecEngine.Maintenance(trieCapLimitBytes) if err != nil { return err From 8dd3db4f6f370e1ccca63b61d6b922d6552698ef Mon Sep 17 00:00:00 2001 From: Diego Ximenes Date: Fri, 24 Jan 2025 17:04:46 -0300 Subject: [PATCH 4/4] Fixes mutex in FlushTrieDB --- execution/gethexec/executionengine.go | 4 +++- go-ethereum | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index b67dff6a83..5b512c1360 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -1012,5 +1012,7 @@ func (s *ExecutionEngine) Start(ctx_in context.Context) { } func (s *ExecutionEngine) Maintenance(capLimit uint64) error { - return s.bc.FlushTrieDB(&s.createBlocksMutex, common.StorageSize(capLimit)) + s.createBlocksMutex.Lock() + defer s.createBlocksMutex.Unlock() + return s.bc.FlushTrieDB(common.StorageSize(capLimit)) } diff --git a/go-ethereum b/go-ethereum index 5a7010a057..cbb47d194b 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit 5a7010a057c6539a3bb154d15a47c015a96b1ef8 +Subproject commit cbb47d194bd2e87322d35b09af36a83320345814